xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll (revision c83f23d6abb6f8d693c643bc1b43f9b9e06bc537)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
3
4; rdar://9428579
5
6%type1 = type { <16 x i8> }
7%type2 = type { <8 x i8> }
8%type3 = type { <4 x i16> }
9
10
11define hidden fastcc void @t1(ptr %argtable) nounwind {
12; CHECK-LABEL: t1:
13; CHECK:       // %bb.0: // %entry
14; CHECK-NEXT:    movi.2d v0, #0000000000000000
15; CHECK-NEXT:    ldr x8, [x0]
16; CHECK-NEXT:    str q0, [x8]
17; CHECK-NEXT:    ret
18entry:
19  %tmp1 = load ptr, ptr %argtable, align 8
20  store <16 x i8> zeroinitializer, ptr %tmp1, align 16
21  ret void
22}
23
24define hidden fastcc void @t2(ptr %argtable) nounwind {
25; CHECK-LABEL: t2:
26; CHECK:       // %bb.0: // %entry
27; CHECK-NEXT:    movi.2d v0, #0000000000000000
28; CHECK-NEXT:    ldr x8, [x0]
29; CHECK-NEXT:    str d0, [x8]
30; CHECK-NEXT:    ret
31entry:
32  %tmp1 = load ptr, ptr %argtable, align 8
33  store <8 x i8> zeroinitializer, ptr %tmp1, align 8
34  ret void
35}
36
37; add a bunch of tests for rdar://11246289
38
39@globalArray64x2 = common global ptr null, align 8
40@globalArray32x4 = common global ptr null, align 8
41@globalArray16x8 = common global ptr null, align 8
42@globalArray8x16 = common global ptr null, align 8
43@globalArray64x1 = common global ptr null, align 8
44@globalArray32x2 = common global ptr null, align 8
45@globalArray16x4 = common global ptr null, align 8
46@globalArray8x8 = common global ptr null, align 8
47@floatglobalArray64x2 = common global ptr null, align 8
48@floatglobalArray32x4 = common global ptr null, align 8
49@floatglobalArray64x1 = common global ptr null, align 8
50@floatglobalArray32x2 = common global ptr null, align 8
51
52define void @fct1_64x2(ptr nocapture %array, i64 %offset) nounwind ssp {
53; CHECK-LABEL: fct1_64x2:
54; CHECK:       // %bb.0: // %entry
55; CHECK-NEXT:    adrp x8, :got:globalArray64x2
56; CHECK-NEXT:    lsl x9, x1, #4
57; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray64x2]
58; CHECK-NEXT:    ldr q0, [x0, x9]
59; CHECK-NEXT:    ldr x8, [x8]
60; CHECK-NEXT:    str q0, [x8, x9]
61; CHECK-NEXT:    ret
62entry:
63  %arrayidx = getelementptr inbounds <2 x i64>, ptr %array, i64 %offset
64  %tmp = load <2 x i64>, ptr %arrayidx, align 16
65  %tmp1 = load ptr, ptr @globalArray64x2, align 8
66  %arrayidx1 = getelementptr inbounds <2 x i64>, ptr %tmp1, i64 %offset
67  store <2 x i64> %tmp, ptr %arrayidx1, align 16
68  ret void
69}
70
71define void @fct2_64x2(ptr nocapture %array) nounwind ssp {
72; CHECK-LABEL: fct2_64x2:
73; CHECK:       // %bb.0: // %entry
74; CHECK-NEXT:    adrp x8, :got:globalArray64x2
75; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray64x2]
76; CHECK-NEXT:    ldr q0, [x0, #48]
77; CHECK-NEXT:    ldr x8, [x8]
78; CHECK-NEXT:    str q0, [x8, #80]
79; CHECK-NEXT:    ret
80entry:
81  %arrayidx = getelementptr inbounds <2 x i64>, ptr %array, i64 3
82  %tmp = load <2 x i64>, ptr %arrayidx, align 16
83  %tmp1 = load ptr, ptr @globalArray64x2, align 8
84  %arrayidx1 = getelementptr inbounds <2 x i64>, ptr %tmp1, i64 5
85  store <2 x i64> %tmp, ptr %arrayidx1, align 16
86  ret void
87}
88
89define void @fct1_32x4(ptr nocapture %array, i64 %offset) nounwind ssp {
90; CHECK-LABEL: fct1_32x4:
91; CHECK:       // %bb.0: // %entry
92; CHECK-NEXT:    adrp x8, :got:globalArray32x4
93; CHECK-NEXT:    lsl x9, x1, #4
94; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray32x4]
95; CHECK-NEXT:    ldr q0, [x0, x9]
96; CHECK-NEXT:    ldr x8, [x8]
97; CHECK-NEXT:    str q0, [x8, x9]
98; CHECK-NEXT:    ret
99entry:
100  %arrayidx = getelementptr inbounds <4 x i32>, ptr %array, i64 %offset
101  %tmp = load <4 x i32>, ptr %arrayidx, align 16
102  %tmp1 = load ptr, ptr @globalArray32x4, align 8
103  %arrayidx1 = getelementptr inbounds <4 x i32>, ptr %tmp1, i64 %offset
104  store <4 x i32> %tmp, ptr %arrayidx1, align 16
105  ret void
106}
107
108define void @fct2_32x4(ptr nocapture %array) nounwind ssp {
109; CHECK-LABEL: fct2_32x4:
110; CHECK:       // %bb.0: // %entry
111; CHECK-NEXT:    adrp x8, :got:globalArray32x4
112; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray32x4]
113; CHECK-NEXT:    ldr q0, [x0, #48]
114; CHECK-NEXT:    ldr x8, [x8]
115; CHECK-NEXT:    str q0, [x8, #80]
116; CHECK-NEXT:    ret
117entry:
118  %arrayidx = getelementptr inbounds <4 x i32>, ptr %array, i64 3
119  %tmp = load <4 x i32>, ptr %arrayidx, align 16
120  %tmp1 = load ptr, ptr @globalArray32x4, align 8
121  %arrayidx1 = getelementptr inbounds <4 x i32>, ptr %tmp1, i64 5
122  store <4 x i32> %tmp, ptr %arrayidx1, align 16
123  ret void
124}
125
126define void @fct1_16x8(ptr nocapture %array, i64 %offset) nounwind ssp {
127; CHECK-LABEL: fct1_16x8:
128; CHECK:       // %bb.0: // %entry
129; CHECK-NEXT:    adrp x8, :got:globalArray16x8
130; CHECK-NEXT:    lsl x9, x1, #4
131; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray16x8]
132; CHECK-NEXT:    ldr q0, [x0, x9]
133; CHECK-NEXT:    ldr x8, [x8]
134; CHECK-NEXT:    str q0, [x8, x9]
135; CHECK-NEXT:    ret
136entry:
137  %arrayidx = getelementptr inbounds <8 x i16>, ptr %array, i64 %offset
138  %tmp = load <8 x i16>, ptr %arrayidx, align 16
139  %tmp1 = load ptr, ptr @globalArray16x8, align 8
140  %arrayidx1 = getelementptr inbounds <8 x i16>, ptr %tmp1, i64 %offset
141  store <8 x i16> %tmp, ptr %arrayidx1, align 16
142  ret void
143}
144
145define void @fct2_16x8(ptr nocapture %array) nounwind ssp {
146; CHECK-LABEL: fct2_16x8:
147; CHECK:       // %bb.0: // %entry
148; CHECK-NEXT:    adrp x8, :got:globalArray16x8
149; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray16x8]
150; CHECK-NEXT:    ldr q0, [x0, #48]
151; CHECK-NEXT:    ldr x8, [x8]
152; CHECK-NEXT:    str q0, [x8, #80]
153; CHECK-NEXT:    ret
154entry:
155  %arrayidx = getelementptr inbounds <8 x i16>, ptr %array, i64 3
156  %tmp = load <8 x i16>, ptr %arrayidx, align 16
157  %tmp1 = load ptr, ptr @globalArray16x8, align 8
158  %arrayidx1 = getelementptr inbounds <8 x i16>, ptr %tmp1, i64 5
159  store <8 x i16> %tmp, ptr %arrayidx1, align 16
160  ret void
161}
162
163define void @fct1_8x16(ptr nocapture %array, i64 %offset) nounwind ssp {
164; CHECK-LABEL: fct1_8x16:
165; CHECK:       // %bb.0: // %entry
166; CHECK-NEXT:    adrp x8, :got:globalArray8x16
167; CHECK-NEXT:    lsl x9, x1, #4
168; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray8x16]
169; CHECK-NEXT:    ldr q0, [x0, x9]
170; CHECK-NEXT:    ldr x8, [x8]
171; CHECK-NEXT:    str q0, [x8, x9]
172; CHECK-NEXT:    ret
173entry:
174  %arrayidx = getelementptr inbounds <16 x i8>, ptr %array, i64 %offset
175  %tmp = load <16 x i8>, ptr %arrayidx, align 16
176  %tmp1 = load ptr, ptr @globalArray8x16, align 8
177  %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %tmp1, i64 %offset
178  store <16 x i8> %tmp, ptr %arrayidx1, align 16
179  ret void
180}
181
182define void @fct2_8x16(ptr nocapture %array) nounwind ssp {
183; CHECK-LABEL: fct2_8x16:
184; CHECK:       // %bb.0: // %entry
185; CHECK-NEXT:    adrp x8, :got:globalArray8x16
186; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray8x16]
187; CHECK-NEXT:    ldr q0, [x0, #48]
188; CHECK-NEXT:    ldr x8, [x8]
189; CHECK-NEXT:    str q0, [x8, #80]
190; CHECK-NEXT:    ret
191entry:
192  %arrayidx = getelementptr inbounds <16 x i8>, ptr %array, i64 3
193  %tmp = load <16 x i8>, ptr %arrayidx, align 16
194  %tmp1 = load ptr, ptr @globalArray8x16, align 8
195  %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %tmp1, i64 5
196  store <16 x i8> %tmp, ptr %arrayidx1, align 16
197  ret void
198}
199
200define void @fct1_64x1(ptr nocapture %array, i64 %offset) nounwind ssp {
201; CHECK-LABEL: fct1_64x1:
202; CHECK:       // %bb.0: // %entry
203; CHECK-NEXT:    adrp x8, :got:globalArray64x1
204; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray64x1]
205; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
206; CHECK-NEXT:    ldr x8, [x8]
207; CHECK-NEXT:    str d0, [x8, x1, lsl #3]
208; CHECK-NEXT:    ret
209entry:
210  %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 %offset
211  %tmp = load <1 x i64>, ptr %arrayidx, align 8
212  %tmp1 = load ptr, ptr @globalArray64x1, align 8
213  %arrayidx1 = getelementptr inbounds <1 x i64>, ptr %tmp1, i64 %offset
214  store <1 x i64> %tmp, ptr %arrayidx1, align 8
215  ret void
216}
217
218define void @fct2_64x1(ptr nocapture %array) nounwind ssp {
219; CHECK-LABEL: fct2_64x1:
220; CHECK:       // %bb.0: // %entry
221; CHECK-NEXT:    adrp x8, :got:globalArray64x1
222; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray64x1]
223; CHECK-NEXT:    ldr d0, [x0, #24]
224; CHECK-NEXT:    ldr x8, [x8]
225; CHECK-NEXT:    str d0, [x8, #40]
226; CHECK-NEXT:    ret
227entry:
228  %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 3
229  %tmp = load <1 x i64>, ptr %arrayidx, align 8
230  %tmp1 = load ptr, ptr @globalArray64x1, align 8
231  %arrayidx1 = getelementptr inbounds <1 x i64>, ptr %tmp1, i64 5
232  store <1 x i64> %tmp, ptr %arrayidx1, align 8
233  ret void
234}
235
236define void @fct1_32x2(ptr nocapture %array, i64 %offset) nounwind ssp {
237; CHECK-LABEL: fct1_32x2:
238; CHECK:       // %bb.0: // %entry
239; CHECK-NEXT:    adrp x8, :got:globalArray32x2
240; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray32x2]
241; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
242; CHECK-NEXT:    ldr x8, [x8]
243; CHECK-NEXT:    str d0, [x8, x1, lsl #3]
244; CHECK-NEXT:    ret
245entry:
246  %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 %offset
247  %tmp = load <2 x i32>, ptr %arrayidx, align 8
248  %tmp1 = load ptr, ptr @globalArray32x2, align 8
249  %arrayidx1 = getelementptr inbounds <2 x i32>, ptr %tmp1, i64 %offset
250  store <2 x i32> %tmp, ptr %arrayidx1, align 8
251  ret void
252}
253
254define void @fct2_32x2(ptr nocapture %array) nounwind ssp {
255; CHECK-LABEL: fct2_32x2:
256; CHECK:       // %bb.0: // %entry
257; CHECK-NEXT:    adrp x8, :got:globalArray32x2
258; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray32x2]
259; CHECK-NEXT:    ldr d0, [x0, #24]
260; CHECK-NEXT:    ldr x8, [x8]
261; CHECK-NEXT:    str d0, [x8, #40]
262; CHECK-NEXT:    ret
263entry:
264  %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 3
265  %tmp = load <2 x i32>, ptr %arrayidx, align 8
266  %tmp1 = load ptr, ptr @globalArray32x2, align 8
267  %arrayidx1 = getelementptr inbounds <2 x i32>, ptr %tmp1, i64 5
268  store <2 x i32> %tmp, ptr %arrayidx1, align 8
269  ret void
270}
271
272define void @fct1_16x4(ptr nocapture %array, i64 %offset) nounwind ssp {
273; CHECK-LABEL: fct1_16x4:
274; CHECK:       // %bb.0: // %entry
275; CHECK-NEXT:    adrp x8, :got:globalArray16x4
276; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray16x4]
277; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
278; CHECK-NEXT:    ldr x8, [x8]
279; CHECK-NEXT:    str d0, [x8, x1, lsl #3]
280; CHECK-NEXT:    ret
281entry:
282  %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 %offset
283  %tmp = load <4 x i16>, ptr %arrayidx, align 8
284  %tmp1 = load ptr, ptr @globalArray16x4, align 8
285  %arrayidx1 = getelementptr inbounds <4 x i16>, ptr %tmp1, i64 %offset
286  store <4 x i16> %tmp, ptr %arrayidx1, align 8
287  ret void
288}
289
290define void @fct2_16x4(ptr nocapture %array) nounwind ssp {
291; CHECK-LABEL: fct2_16x4:
292; CHECK:       // %bb.0: // %entry
293; CHECK-NEXT:    adrp x8, :got:globalArray16x4
294; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray16x4]
295; CHECK-NEXT:    ldr d0, [x0, #24]
296; CHECK-NEXT:    ldr x8, [x8]
297; CHECK-NEXT:    str d0, [x8, #40]
298; CHECK-NEXT:    ret
299entry:
300  %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 3
301  %tmp = load <4 x i16>, ptr %arrayidx, align 8
302  %tmp1 = load ptr, ptr @globalArray16x4, align 8
303  %arrayidx1 = getelementptr inbounds <4 x i16>, ptr %tmp1, i64 5
304  store <4 x i16> %tmp, ptr %arrayidx1, align 8
305  ret void
306}
307
308define void @fct1_8x8(ptr nocapture %array, i64 %offset) nounwind ssp {
309; CHECK-LABEL: fct1_8x8:
310; CHECK:       // %bb.0: // %entry
311; CHECK-NEXT:    adrp x8, :got:globalArray8x8
312; CHECK-NEXT:    ldr x8, [x8, :got_lo12:globalArray8x8]
313; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
314; CHECK-NEXT:    ldr x8, [x8]
315; CHECK-NEXT:    str d0, [x8, x1, lsl #3]
316; CHECK-NEXT:    ret
317entry:
318  %arrayidx = getelementptr inbounds <8 x i8>, ptr %array, i64 %offset
319  %tmp = load <8 x i8>, ptr %arrayidx, align 8
320  %tmp1 = load ptr, ptr @globalArray8x8, align 8
321  %arrayidx1 = getelementptr inbounds <8 x i8>, ptr %tmp1, i64 %offset
322  store <8 x i8> %tmp, ptr %arrayidx1, align 8
323  ret void
324}
325
326; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
327; registers for unscaled vector accesses
328
329define <1 x i64> @fct0(ptr %str) nounwind readonly ssp {
330; CHECK-LABEL: fct0:
331; CHECK:       // %bb.0: // %entry
332; CHECK-NEXT:    ldur d0, [x0, #3]
333; CHECK-NEXT:    ret
334entry:
335  %p = getelementptr inbounds i8, ptr %str, i64 3
336  %0 = load <1 x i64>, ptr %p, align 8
337  ret <1 x i64> %0
338}
339
340define <2 x i32> @fct1(ptr %str) nounwind readonly ssp {
341; CHECK-LABEL: fct1:
342; CHECK:       // %bb.0: // %entry
343; CHECK-NEXT:    ldur d0, [x0, #3]
344; CHECK-NEXT:    ret
345entry:
346  %p = getelementptr inbounds i8, ptr %str, i64 3
347  %0 = load <2 x i32>, ptr %p, align 8
348  ret <2 x i32> %0
349}
350
351define <4 x i16> @fct2(ptr %str) nounwind readonly ssp {
352; CHECK-LABEL: fct2:
353; CHECK:       // %bb.0: // %entry
354; CHECK-NEXT:    ldur d0, [x0, #3]
355; CHECK-NEXT:    ret
356entry:
357  %p = getelementptr inbounds i8, ptr %str, i64 3
358  %0 = load <4 x i16>, ptr %p, align 8
359  ret <4 x i16> %0
360}
361
362define <8 x i8> @fct3(ptr %str) nounwind readonly ssp {
363; CHECK-LABEL: fct3:
364; CHECK:       // %bb.0: // %entry
365; CHECK-NEXT:    ldur d0, [x0, #3]
366; CHECK-NEXT:    ret
367entry:
368  %p = getelementptr inbounds i8, ptr %str, i64 3
369  %0 = load <8 x i8>, ptr %p, align 8
370  ret <8 x i8> %0
371}
372
373define <2 x i64> @fct4(ptr %str) nounwind readonly ssp {
374; CHECK-LABEL: fct4:
375; CHECK:       // %bb.0: // %entry
376; CHECK-NEXT:    ldur q0, [x0, #3]
377; CHECK-NEXT:    ret
378entry:
379  %p = getelementptr inbounds i8, ptr %str, i64 3
380  %0 = load <2 x i64>, ptr %p, align 16
381  ret <2 x i64> %0
382}
383
384define <4 x i32> @fct5(ptr %str) nounwind readonly ssp {
385; CHECK-LABEL: fct5:
386; CHECK:       // %bb.0: // %entry
387; CHECK-NEXT:    ldur q0, [x0, #3]
388; CHECK-NEXT:    ret
389entry:
390  %p = getelementptr inbounds i8, ptr %str, i64 3
391  %0 = load <4 x i32>, ptr %p, align 16
392  ret <4 x i32> %0
393}
394
395define <8 x i16> @fct6(ptr %str) nounwind readonly ssp {
396; CHECK-LABEL: fct6:
397; CHECK:       // %bb.0: // %entry
398; CHECK-NEXT:    ldur q0, [x0, #3]
399; CHECK-NEXT:    ret
400entry:
401  %p = getelementptr inbounds i8, ptr %str, i64 3
402  %0 = load <8 x i16>, ptr %p, align 16
403  ret <8 x i16> %0
404}
405
406define <16 x i8> @fct7(ptr %str) nounwind readonly ssp {
407; CHECK-LABEL: fct7:
408; CHECK:       // %bb.0: // %entry
409; CHECK-NEXT:    ldur q0, [x0, #3]
410; CHECK-NEXT:    ret
411entry:
412  %p = getelementptr inbounds i8, ptr %str, i64 3
413  %0 = load <16 x i8>, ptr %p, align 16
414  ret <16 x i8> %0
415}
416
417define void @fct8(ptr %str) nounwind ssp {
418; CHECK-LABEL: fct8:
419; CHECK:       // %bb.0: // %entry
420; CHECK-NEXT:    ldur d0, [x0, #3]
421; CHECK-NEXT:    stur d0, [x0, #4]
422; CHECK-NEXT:    ret
423entry:
424  %p = getelementptr inbounds i8, ptr %str, i64 3
425  %0 = load <1 x i64>, ptr %p, align 8
426  %p2 = getelementptr inbounds i8, ptr %str, i64 4
427  store <1 x i64> %0, ptr %p2, align 8
428  ret void
429}
430
431define void @fct9(ptr %str) nounwind ssp {
432; CHECK-LABEL: fct9:
433; CHECK:       // %bb.0: // %entry
434; CHECK-NEXT:    ldur d0, [x0, #3]
435; CHECK-NEXT:    stur d0, [x0, #4]
436; CHECK-NEXT:    ret
437entry:
438  %p = getelementptr inbounds i8, ptr %str, i64 3
439  %0 = load <2 x i32>, ptr %p, align 8
440  %p2 = getelementptr inbounds i8, ptr %str, i64 4
441  store <2 x i32> %0, ptr %p2, align 8
442  ret void
443}
444
445define void @fct10(ptr %str) nounwind ssp {
446; CHECK-LABEL: fct10:
447; CHECK:       // %bb.0: // %entry
448; CHECK-NEXT:    ldur d0, [x0, #3]
449; CHECK-NEXT:    stur d0, [x0, #4]
450; CHECK-NEXT:    ret
451entry:
452  %p = getelementptr inbounds i8, ptr %str, i64 3
453  %0 = load <4 x i16>, ptr %p, align 8
454  %p2 = getelementptr inbounds i8, ptr %str, i64 4
455  store <4 x i16> %0, ptr %p2, align 8
456  ret void
457}
458
459define void @fct11(ptr %str) nounwind ssp {
460; CHECK-LABEL: fct11:
461; CHECK:       // %bb.0: // %entry
462; CHECK-NEXT:    ldur d0, [x0, #3]
463; CHECK-NEXT:    stur d0, [x0, #4]
464; CHECK-NEXT:    ret
465entry:
466  %p = getelementptr inbounds i8, ptr %str, i64 3
467  %0 = load <8 x i8>, ptr %p, align 8
468  %p2 = getelementptr inbounds i8, ptr %str, i64 4
469  store <8 x i8> %0, ptr %p2, align 8
470  ret void
471}
472
473define void @fct12(ptr %str) nounwind ssp {
474; CHECK-LABEL: fct12:
475; CHECK:       // %bb.0: // %entry
476; CHECK-NEXT:    ldur q0, [x0, #3]
477; CHECK-NEXT:    stur q0, [x0, #4]
478; CHECK-NEXT:    ret
479entry:
480  %p = getelementptr inbounds i8, ptr %str, i64 3
481  %0 = load <2 x i64>, ptr %p, align 16
482  %p2 = getelementptr inbounds i8, ptr %str, i64 4
483  store <2 x i64> %0, ptr %p2, align 16
484  ret void
485}
486
487define void @fct13(ptr %str) nounwind ssp {
488; CHECK-LABEL: fct13:
489; CHECK:       // %bb.0: // %entry
490; CHECK-NEXT:    ldur q0, [x0, #3]
491; CHECK-NEXT:    stur q0, [x0, #4]
492; CHECK-NEXT:    ret
493entry:
494  %p = getelementptr inbounds i8, ptr %str, i64 3
495  %0 = load <4 x i32>, ptr %p, align 16
496  %p2 = getelementptr inbounds i8, ptr %str, i64 4
497  store <4 x i32> %0, ptr %p2, align 16
498  ret void
499}
500
501define void @fct14(ptr %str) nounwind ssp {
502; CHECK-LABEL: fct14:
503; CHECK:       // %bb.0: // %entry
504; CHECK-NEXT:    ldur q0, [x0, #3]
505; CHECK-NEXT:    stur q0, [x0, #4]
506; CHECK-NEXT:    ret
507entry:
508  %p = getelementptr inbounds i8, ptr %str, i64 3
509  %0 = load <8 x i16>, ptr %p, align 16
510  %p2 = getelementptr inbounds i8, ptr %str, i64 4
511  store <8 x i16> %0, ptr %p2, align 16
512  ret void
513}
514
515define void @fct15(ptr %str) nounwind ssp {
516; CHECK-LABEL: fct15:
517; CHECK:       // %bb.0: // %entry
518; CHECK-NEXT:    ldur q0, [x0, #3]
519; CHECK-NEXT:    stur q0, [x0, #4]
520; CHECK-NEXT:    ret
521entry:
522  %p = getelementptr inbounds i8, ptr %str, i64 3
523  %0 = load <16 x i8>, ptr %p, align 16
524  %p2 = getelementptr inbounds i8, ptr %str, i64 4
525  store <16 x i8> %0, ptr %p2, align 16
526  ret void
527}
528
529; Check the building of vector from a single loaded value.
530; Part of <rdar://problem/14170854>
531;
532; Single loads with immediate offset.
533define <8 x i8> @fct16(ptr nocapture %sp0) {
534; CHECK-LABEL: fct16:
535; CHECK:       // %bb.0: // %entry
536; CHECK-NEXT:    ldr b0, [x0, #1]
537; CHECK-NEXT:    mul.8b v0, v0, v0
538; CHECK-NEXT:    ret
539entry:
540  %addr = getelementptr i8, ptr %sp0, i64 1
541  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
542  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
543  %vmull.i = mul <8 x i8> %vec, %vec
544  ret <8 x i8> %vmull.i
545}
546
547define <16 x i8> @fct17(ptr nocapture %sp0) {
548; CHECK-LABEL: fct17:
549; CHECK:       // %bb.0: // %entry
550; CHECK-NEXT:    ldr b0, [x0, #1]
551; CHECK-NEXT:    mul.16b v0, v0, v0
552; CHECK-NEXT:    ret
553entry:
554  %addr = getelementptr i8, ptr %sp0, i64 1
555  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
556  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
557  %vmull.i = mul <16 x i8> %vec, %vec
558  ret <16 x i8> %vmull.i
559}
560
561define <4 x i16> @fct18(ptr nocapture %sp0) {
562; CHECK-LABEL: fct18:
563; CHECK:       // %bb.0: // %entry
564; CHECK-NEXT:    ldr h0, [x0, #2]
565; CHECK-NEXT:    mul.4h v0, v0, v0
566; CHECK-NEXT:    ret
567entry:
568  %addr = getelementptr i16, ptr %sp0, i64 1
569  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
570  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
571  %vmull.i = mul <4 x i16> %vec, %vec
572  ret <4 x i16> %vmull.i
573}
574
575define <8 x i16> @fct19(ptr nocapture %sp0) {
576; CHECK-LABEL: fct19:
577; CHECK:       // %bb.0: // %entry
578; CHECK-NEXT:    ldr h0, [x0, #2]
579; CHECK-NEXT:    mul.8h v0, v0, v0
580; CHECK-NEXT:    ret
581entry:
582  %addr = getelementptr i16, ptr %sp0, i64 1
583  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
584  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
585  %vmull.i = mul <8 x i16> %vec, %vec
586  ret <8 x i16> %vmull.i
587}
588
589define <2 x i32> @fct20(ptr nocapture %sp0) {
590; CHECK-LABEL: fct20:
591; CHECK:       // %bb.0: // %entry
592; CHECK-NEXT:    ldr s0, [x0, #4]
593; CHECK-NEXT:    mul.2s v0, v0, v0
594; CHECK-NEXT:    ret
595entry:
596  %addr = getelementptr i32, ptr %sp0, i64 1
597  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
598  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
599  %vmull.i = mul <2 x i32> %vec, %vec
600  ret <2 x i32> %vmull.i
601}
602
603define <4 x i32> @fct21(ptr nocapture %sp0) {
604; CHECK-LABEL: fct21:
605; CHECK:       // %bb.0: // %entry
606; CHECK-NEXT:    ldr s0, [x0, #4]
607; CHECK-NEXT:    mul.4s v0, v0, v0
608; CHECK-NEXT:    ret
609entry:
610  %addr = getelementptr i32, ptr %sp0, i64 1
611  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
612  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
613  %vmull.i = mul <4 x i32> %vec, %vec
614  ret <4 x i32> %vmull.i
615}
616
617define <1 x i64> @fct22(ptr nocapture %sp0) {
618; CHECK-LABEL: fct22:
619; CHECK:       // %bb.0: // %entry
620; CHECK-NEXT:    ldr d0, [x0, #8]
621; CHECK-NEXT:    ret
622entry:
623  %addr = getelementptr i64, ptr %sp0, i64 1
624  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
625  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
626   ret <1 x i64> %vec
627}
628
629define <2 x i64> @fct23(ptr nocapture %sp0) {
630; CHECK-LABEL: fct23:
631; CHECK:       // %bb.0: // %entry
632; CHECK-NEXT:    ldr d0, [x0, #8]
633; CHECK-NEXT:    ret
634entry:
635  %addr = getelementptr i64, ptr %sp0, i64 1
636  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
637  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
638  ret <2 x i64> %vec
639}
640
641;
642; Single loads with register offset.
643define <8 x i8> @fct24(ptr nocapture %sp0, i64 %offset) {
644; CHECK-LABEL: fct24:
645; CHECK:       // %bb.0: // %entry
646; CHECK-NEXT:    ldr b0, [x0, x1]
647; CHECK-NEXT:    mul.8b v0, v0, v0
648; CHECK-NEXT:    ret
649entry:
650  %addr = getelementptr i8, ptr %sp0, i64 %offset
651  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
652  %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
653  %vmull.i = mul <8 x i8> %vec, %vec
654  ret <8 x i8> %vmull.i
655}
656
657define <16 x i8> @fct25(ptr nocapture %sp0, i64 %offset) {
658; CHECK-LABEL: fct25:
659; CHECK:       // %bb.0: // %entry
660; CHECK-NEXT:    ldr b0, [x0, x1]
661; CHECK-NEXT:    mul.16b v0, v0, v0
662; CHECK-NEXT:    ret
663entry:
664  %addr = getelementptr i8, ptr %sp0, i64 %offset
665  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
666  %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
667  %vmull.i = mul <16 x i8> %vec, %vec
668  ret <16 x i8> %vmull.i
669}
670
671define <4 x i16> @fct26(ptr nocapture %sp0, i64 %offset) {
672; CHECK-LABEL: fct26:
673; CHECK:       // %bb.0: // %entry
674; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
675; CHECK-NEXT:    mul.4h v0, v0, v0
676; CHECK-NEXT:    ret
677entry:
678  %addr = getelementptr i16, ptr %sp0, i64 %offset
679  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
680  %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
681  %vmull.i = mul <4 x i16> %vec, %vec
682  ret <4 x i16> %vmull.i
683}
684
685define <8 x i16> @fct27(ptr nocapture %sp0, i64 %offset) {
686; CHECK-LABEL: fct27:
687; CHECK:       // %bb.0: // %entry
688; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
689; CHECK-NEXT:    mul.8h v0, v0, v0
690; CHECK-NEXT:    ret
691entry:
692  %addr = getelementptr i16, ptr %sp0, i64 %offset
693  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
694  %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
695  %vmull.i = mul <8 x i16> %vec, %vec
696  ret <8 x i16> %vmull.i
697}
698
699define <2 x i32> @fct28(ptr nocapture %sp0, i64 %offset) {
700; CHECK-LABEL: fct28:
701; CHECK:       // %bb.0: // %entry
702; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
703; CHECK-NEXT:    mul.2s v0, v0, v0
704; CHECK-NEXT:    ret
705entry:
706  %addr = getelementptr i32, ptr %sp0, i64 %offset
707  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
708  %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
709  %vmull.i = mul <2 x i32> %vec, %vec
710  ret <2 x i32> %vmull.i
711}
712
713define <4 x i32> @fct29(ptr nocapture %sp0, i64 %offset) {
714; CHECK-LABEL: fct29:
715; CHECK:       // %bb.0: // %entry
716; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
717; CHECK-NEXT:    mul.4s v0, v0, v0
718; CHECK-NEXT:    ret
719entry:
720  %addr = getelementptr i32, ptr %sp0, i64 %offset
721  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
722  %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
723  %vmull.i = mul <4 x i32> %vec, %vec
724  ret <4 x i32> %vmull.i
725}
726
727define <1 x i64> @fct30(ptr nocapture %sp0, i64 %offset) {
728; CHECK-LABEL: fct30:
729; CHECK:       // %bb.0: // %entry
730; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
731; CHECK-NEXT:    ret
732entry:
733  %addr = getelementptr i64, ptr %sp0, i64 %offset
734  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
735  %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
736   ret <1 x i64> %vec
737}
738
739define <2 x i64> @fct31(ptr nocapture %sp0, i64 %offset) {
740; CHECK-LABEL: fct31:
741; CHECK:       // %bb.0: // %entry
742; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
743; CHECK-NEXT:    ret
744entry:
745  %addr = getelementptr i64, ptr %sp0, i64 %offset
746  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
747  %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
748  ret <2 x i64> %vec
749}
750