xref: /llvm-project/llvm/test/CodeGen/AArch64/nontemporal.ll (revision 1610311a95b1a98f47e9242d67141c5b3e44a138)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple aarch64 | FileCheck %s --check-prefixes=CHECK-LE
3; RUN: llc < %s -mtriple aarch64_be | FileCheck %s --check-prefixes=CHECK-BE
4
5define void @test_stnp_v4i64(ptr %p, <4 x i64> %v) #0 {
6; CHECK-LE-LABEL: test_stnp_v4i64:
7; CHECK-LE:       // %bb.0:
8; CHECK-LE-NEXT:    stnp q0, q1, [x0]
9; CHECK-LE-NEXT:    ret
10;
11; CHECK-BE-LABEL: test_stnp_v4i64:
12; CHECK-BE:       // %bb.0:
13; CHECK-BE-NEXT:    stp q0, q1, [x0]
14; CHECK-BE-NEXT:    ret
15  store <4 x i64> %v, ptr %p, align 1, !nontemporal !0
16  ret void
17}
18
19define void @test_stnp_v4i32(ptr %p, <4 x i32> %v) #0 {
20; CHECK-LE-LABEL: test_stnp_v4i32:
21; CHECK-LE:       // %bb.0:
22; CHECK-LE-NEXT:    mov d1, v0.d[1]
23; CHECK-LE-NEXT:    stnp d0, d1, [x0]
24; CHECK-LE-NEXT:    ret
25;
26; CHECK-BE-LABEL: test_stnp_v4i32:
27; CHECK-BE:       // %bb.0:
28; CHECK-BE-NEXT:    str q0, [x0]
29; CHECK-BE-NEXT:    ret
30  store <4 x i32> %v, ptr %p, align 1, !nontemporal !0
31  ret void
32}
33
34define void @test_stnp_v8i16(ptr %p, <8 x i16> %v) #0 {
35; CHECK-LE-LABEL: test_stnp_v8i16:
36; CHECK-LE:       // %bb.0:
37; CHECK-LE-NEXT:    mov d1, v0.d[1]
38; CHECK-LE-NEXT:    stnp d0, d1, [x0]
39; CHECK-LE-NEXT:    ret
40;
41; CHECK-BE-LABEL: test_stnp_v8i16:
42; CHECK-BE:       // %bb.0:
43; CHECK-BE-NEXT:    str q0, [x0]
44; CHECK-BE-NEXT:    ret
45  store <8 x i16> %v, ptr %p, align 1, !nontemporal !0
46  ret void
47}
48
49define void @test_stnp_v16i8(ptr %p, <16 x i8> %v) #0 {
50; CHECK-LE-LABEL: test_stnp_v16i8:
51; CHECK-LE:       // %bb.0:
52; CHECK-LE-NEXT:    mov d1, v0.d[1]
53; CHECK-LE-NEXT:    stnp d0, d1, [x0]
54; CHECK-LE-NEXT:    ret
55;
56; CHECK-BE-LABEL: test_stnp_v16i8:
57; CHECK-BE:       // %bb.0:
58; CHECK-BE-NEXT:    str q0, [x0]
59; CHECK-BE-NEXT:    ret
60  store <16 x i8> %v, ptr %p, align 1, !nontemporal !0
61  ret void
62}
63
64define void @test_stnp_v2i32(ptr %p, <2 x i32> %v) #0 {
65; CHECK-LE-LABEL: test_stnp_v2i32:
66; CHECK-LE:       // %bb.0:
67; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
68; CHECK-LE-NEXT:    mov s1, v0.s[1]
69; CHECK-LE-NEXT:    stnp s0, s1, [x0]
70; CHECK-LE-NEXT:    ret
71;
72; CHECK-BE-LABEL: test_stnp_v2i32:
73; CHECK-BE:       // %bb.0:
74; CHECK-BE-NEXT:    str d0, [x0]
75; CHECK-BE-NEXT:    ret
76  store <2 x i32> %v, ptr %p, align 1, !nontemporal !0
77  ret void
78}
79
80define void @test_stnp_v4i16(ptr %p, <4 x i16> %v) #0 {
81; CHECK-LE-LABEL: test_stnp_v4i16:
82; CHECK-LE:       // %bb.0:
83; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
84; CHECK-LE-NEXT:    mov s1, v0.s[1]
85; CHECK-LE-NEXT:    stnp s0, s1, [x0]
86; CHECK-LE-NEXT:    ret
87;
88; CHECK-BE-LABEL: test_stnp_v4i16:
89; CHECK-BE:       // %bb.0:
90; CHECK-BE-NEXT:    str d0, [x0]
91; CHECK-BE-NEXT:    ret
92  store <4 x i16> %v, ptr %p, align 1, !nontemporal !0
93  ret void
94}
95
96define void @test_stnp_v8i8(ptr %p, <8 x i8> %v) #0 {
97; CHECK-LE-LABEL: test_stnp_v8i8:
98; CHECK-LE:       // %bb.0:
99; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
100; CHECK-LE-NEXT:    mov s1, v0.s[1]
101; CHECK-LE-NEXT:    stnp s0, s1, [x0]
102; CHECK-LE-NEXT:    ret
103;
104; CHECK-BE-LABEL: test_stnp_v8i8:
105; CHECK-BE:       // %bb.0:
106; CHECK-BE-NEXT:    str d0, [x0]
107; CHECK-BE-NEXT:    ret
108  store <8 x i8> %v, ptr %p, align 1, !nontemporal !0
109  ret void
110}
111
112define void @test_stnp_v2f64(ptr %p, <2 x double> %v) #0 {
113; CHECK-LE-LABEL: test_stnp_v2f64:
114; CHECK-LE:       // %bb.0:
115; CHECK-LE-NEXT:    mov d1, v0.d[1]
116; CHECK-LE-NEXT:    stnp d0, d1, [x0]
117; CHECK-LE-NEXT:    ret
118;
119; CHECK-BE-LABEL: test_stnp_v2f64:
120; CHECK-BE:       // %bb.0:
121; CHECK-BE-NEXT:    str q0, [x0]
122; CHECK-BE-NEXT:    ret
123  store <2 x double> %v, ptr %p, align 1, !nontemporal !0
124  ret void
125}
126
127define void @test_stnp_v4f32(ptr %p, <4 x float> %v) #0 {
128; CHECK-LE-LABEL: test_stnp_v4f32:
129; CHECK-LE:       // %bb.0:
130; CHECK-LE-NEXT:    mov d1, v0.d[1]
131; CHECK-LE-NEXT:    stnp d0, d1, [x0]
132; CHECK-LE-NEXT:    ret
133;
134; CHECK-BE-LABEL: test_stnp_v4f32:
135; CHECK-BE:       // %bb.0:
136; CHECK-BE-NEXT:    str q0, [x0]
137; CHECK-BE-NEXT:    ret
138  store <4 x float> %v, ptr %p, align 1, !nontemporal !0
139  ret void
140}
141
142define void @test_stnp_v2f32(ptr %p, <2 x float> %v) #0 {
143; CHECK-LE-LABEL: test_stnp_v2f32:
144; CHECK-LE:       // %bb.0:
145; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
146; CHECK-LE-NEXT:    mov s1, v0.s[1]
147; CHECK-LE-NEXT:    stnp s0, s1, [x0]
148; CHECK-LE-NEXT:    ret
149;
150; CHECK-BE-LABEL: test_stnp_v2f32:
151; CHECK-BE:       // %bb.0:
152; CHECK-BE-NEXT:    str d0, [x0]
153; CHECK-BE-NEXT:    ret
154  store <2 x float> %v, ptr %p, align 1, !nontemporal !0
155  ret void
156}
157
158define void @test_stnp_v1f64(ptr %p, <1 x double> %v) #0 {
159; CHECK-LE-LABEL: test_stnp_v1f64:
160; CHECK-LE:       // %bb.0:
161; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
162; CHECK-LE-NEXT:    mov s1, v0.s[1]
163; CHECK-LE-NEXT:    stnp s0, s1, [x0]
164; CHECK-LE-NEXT:    ret
165;
166; CHECK-BE-LABEL: test_stnp_v1f64:
167; CHECK-BE:       // %bb.0:
168; CHECK-BE-NEXT:    str d0, [x0]
169; CHECK-BE-NEXT:    ret
170  store <1 x double> %v, ptr %p, align 1, !nontemporal !0
171  ret void
172}
173
174define void @test_stnp_v1i64(ptr %p, <1 x i64> %v) #0 {
175; CHECK-LE-LABEL: test_stnp_v1i64:
176; CHECK-LE:       // %bb.0:
177; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
178; CHECK-LE-NEXT:    mov s1, v0.s[1]
179; CHECK-LE-NEXT:    stnp s0, s1, [x0]
180; CHECK-LE-NEXT:    ret
181;
182; CHECK-BE-LABEL: test_stnp_v1i64:
183; CHECK-BE:       // %bb.0:
184; CHECK-BE-NEXT:    str d0, [x0]
185; CHECK-BE-NEXT:    ret
186  store <1 x i64> %v, ptr %p, align 1, !nontemporal !0
187  ret void
188}
189
190define void @test_stnp_i64(ptr %p, i64 %v) #0 {
191; CHECK-LE-LABEL: test_stnp_i64:
192; CHECK-LE:       // %bb.0:
193; CHECK-LE-NEXT:    lsr x8, x1, #32
194; CHECK-LE-NEXT:    stnp w1, w8, [x0]
195; CHECK-LE-NEXT:    ret
196;
197; CHECK-BE-LABEL: test_stnp_i64:
198; CHECK-BE:       // %bb.0:
199; CHECK-BE-NEXT:    str x1, [x0]
200; CHECK-BE-NEXT:    ret
201  store i64 %v, ptr %p, align 1, !nontemporal !0
202  ret void
203}
204
205
206define void @test_stnp_v2f64_offset(ptr %p, <2 x double> %v) #0 {
207; CHECK-LE-LABEL: test_stnp_v2f64_offset:
208; CHECK-LE:       // %bb.0:
209; CHECK-LE-NEXT:    mov d1, v0.d[1]
210; CHECK-LE-NEXT:    stnp d0, d1, [x0, #16]
211; CHECK-LE-NEXT:    ret
212;
213; CHECK-BE-LABEL: test_stnp_v2f64_offset:
214; CHECK-BE:       // %bb.0:
215; CHECK-BE-NEXT:    str q0, [x0, #16]
216; CHECK-BE-NEXT:    ret
217  %tmp0 = getelementptr <2 x double>, ptr %p, i32 1
218  store <2 x double> %v, ptr %tmp0, align 1, !nontemporal !0
219  ret void
220}
221
222define void @test_stnp_v2f64_offset_neg(ptr %p, <2 x double> %v) #0 {
223; CHECK-LE-LABEL: test_stnp_v2f64_offset_neg:
224; CHECK-LE:       // %bb.0:
225; CHECK-LE-NEXT:    mov d1, v0.d[1]
226; CHECK-LE-NEXT:    stnp d0, d1, [x0, #-16]
227; CHECK-LE-NEXT:    ret
228;
229; CHECK-BE-LABEL: test_stnp_v2f64_offset_neg:
230; CHECK-BE:       // %bb.0:
231; CHECK-BE-NEXT:    stur q0, [x0, #-16]
232; CHECK-BE-NEXT:    ret
233  %tmp0 = getelementptr <2 x double>, ptr %p, i32 -1
234  store <2 x double> %v, ptr %tmp0, align 1, !nontemporal !0
235  ret void
236}
237
238define void @test_stnp_v2f32_offset(ptr %p, <2 x float> %v) #0 {
239; CHECK-LE-LABEL: test_stnp_v2f32_offset:
240; CHECK-LE:       // %bb.0:
241; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
242; CHECK-LE-NEXT:    mov s1, v0.s[1]
243; CHECK-LE-NEXT:    stnp s0, s1, [x0, #8]
244; CHECK-LE-NEXT:    ret
245;
246; CHECK-BE-LABEL: test_stnp_v2f32_offset:
247; CHECK-BE:       // %bb.0:
248; CHECK-BE-NEXT:    str d0, [x0, #8]
249; CHECK-BE-NEXT:    ret
250  %tmp0 = getelementptr <2 x float>, ptr %p, i32 1
251  store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
252  ret void
253}
254
255define void @test_stnp_v2f32_offset_neg(ptr %p, <2 x float> %v) #0 {
256; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg:
257; CHECK-LE:       // %bb.0:
258; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
259; CHECK-LE-NEXT:    mov s1, v0.s[1]
260; CHECK-LE-NEXT:    stnp s0, s1, [x0, #-8]
261; CHECK-LE-NEXT:    ret
262;
263; CHECK-BE-LABEL: test_stnp_v2f32_offset_neg:
264; CHECK-BE:       // %bb.0:
265; CHECK-BE-NEXT:    stur d0, [x0, #-8]
266; CHECK-BE-NEXT:    ret
267  %tmp0 = getelementptr <2 x float>, ptr %p, i32 -1
268  store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
269  ret void
270}
271
272define void @test_stnp_i64_offset(ptr %p, i64 %v) #0 {
273; CHECK-LE-LABEL: test_stnp_i64_offset:
274; CHECK-LE:       // %bb.0:
275; CHECK-LE-NEXT:    lsr x8, x1, #32
276; CHECK-LE-NEXT:    stnp w1, w8, [x0, #8]
277; CHECK-LE-NEXT:    ret
278;
279; CHECK-BE-LABEL: test_stnp_i64_offset:
280; CHECK-BE:       // %bb.0:
281; CHECK-BE-NEXT:    str x1, [x0, #8]
282; CHECK-BE-NEXT:    ret
283  %tmp0 = getelementptr i64, ptr %p, i32 1
284  store i64 %v, ptr %tmp0, align 1, !nontemporal !0
285  ret void
286}
287
288define void @test_stnp_i64_offset_neg(ptr %p, i64 %v) #0 {
289; CHECK-LE-LABEL: test_stnp_i64_offset_neg:
290; CHECK-LE:       // %bb.0:
291; CHECK-LE-NEXT:    lsr x8, x1, #32
292; CHECK-LE-NEXT:    stnp w1, w8, [x0, #-8]
293; CHECK-LE-NEXT:    ret
294;
295; CHECK-BE-LABEL: test_stnp_i64_offset_neg:
296; CHECK-BE:       // %bb.0:
297; CHECK-BE-NEXT:    stur x1, [x0, #-8]
298; CHECK-BE-NEXT:    ret
299  %tmp0 = getelementptr i64, ptr %p, i32 -1
300  store i64 %v, ptr %tmp0, align 1, !nontemporal !0
301  ret void
302}
303
304define void @test_stnp_v4f32_invalid_offset_4(ptr %p, <4 x float> %v) #0 {
305; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_4:
306; CHECK-LE:       // %bb.0:
307; CHECK-LE-NEXT:    mov d1, v0.d[1]
308; CHECK-LE-NEXT:    add x8, x0, #4
309; CHECK-LE-NEXT:    stnp d0, d1, [x8]
310; CHECK-LE-NEXT:    ret
311;
312; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_4:
313; CHECK-BE:       // %bb.0:
314; CHECK-BE-NEXT:    stur q0, [x0, #4]
315; CHECK-BE-NEXT:    ret
316  %tmp0 = getelementptr i8, ptr %p, i32 4
317  store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
318  ret void
319}
320
321define void @test_stnp_v4f32_invalid_offset_neg_4(ptr %p, <4 x float> %v) #0 {
322; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
323; CHECK-LE:       // %bb.0:
324; CHECK-LE-NEXT:    mov d1, v0.d[1]
325; CHECK-LE-NEXT:    sub x8, x0, #4
326; CHECK-LE-NEXT:    stnp d0, d1, [x8]
327; CHECK-LE-NEXT:    ret
328;
329; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
330; CHECK-BE:       // %bb.0:
331; CHECK-BE-NEXT:    stur q0, [x0, #-4]
332; CHECK-BE-NEXT:    ret
333  %tmp0 = getelementptr i8, ptr %p, i32 -4
334  store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
335  ret void
336}
337
338define void @test_stnp_v4f32_invalid_offset_512(ptr %p, <4 x float> %v) #0 {
339; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_512:
340; CHECK-LE:       // %bb.0:
341; CHECK-LE-NEXT:    mov d1, v0.d[1]
342; CHECK-LE-NEXT:    add x8, x0, #512
343; CHECK-LE-NEXT:    stnp d0, d1, [x8]
344; CHECK-LE-NEXT:    ret
345;
346; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_512:
347; CHECK-BE:       // %bb.0:
348; CHECK-BE-NEXT:    str q0, [x0, #512]
349; CHECK-BE-NEXT:    ret
350  %tmp0 = getelementptr i8, ptr %p, i32 512
351  store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
352  ret void
353}
354
355define void @test_stnp_v4f32_offset_504(ptr %p, <4 x float> %v) #0 {
356; CHECK-LE-LABEL: test_stnp_v4f32_offset_504:
357; CHECK-LE:       // %bb.0:
358; CHECK-LE-NEXT:    mov d1, v0.d[1]
359; CHECK-LE-NEXT:    stnp d0, d1, [x0, #504]
360; CHECK-LE-NEXT:    ret
361;
362; CHECK-BE-LABEL: test_stnp_v4f32_offset_504:
363; CHECK-BE:       // %bb.0:
364; CHECK-BE-NEXT:    add x8, x0, #504
365; CHECK-BE-NEXT:    str q0, [x8]
366; CHECK-BE-NEXT:    ret
367  %tmp0 = getelementptr i8, ptr %p, i32 504
368  store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
369  ret void
370}
371
372define void @test_stnp_v4f32_invalid_offset_508(ptr %p, <4 x float> %v) #0 {
373; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_508:
374; CHECK-LE:       // %bb.0:
375; CHECK-LE-NEXT:    mov d1, v0.d[1]
376; CHECK-LE-NEXT:    add x8, x0, #508
377; CHECK-LE-NEXT:    stnp d0, d1, [x8]
378; CHECK-LE-NEXT:    ret
379;
380; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_508:
381; CHECK-BE:       // %bb.0:
382; CHECK-BE-NEXT:    add x8, x0, #508
383; CHECK-BE-NEXT:    str q0, [x8]
384; CHECK-BE-NEXT:    ret
385  %tmp0 = getelementptr i8, ptr %p, i32 508
386  store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
387  ret void
388}
389
390define void @test_stnp_v4f32_invalid_offset_neg_520(ptr %p, <4 x float> %v) #0 {
391; CHECK-LE-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
392; CHECK-LE:       // %bb.0:
393; CHECK-LE-NEXT:    mov d1, v0.d[1]
394; CHECK-LE-NEXT:    sub x8, x0, #520
395; CHECK-LE-NEXT:    stnp d0, d1, [x8]
396; CHECK-LE-NEXT:    ret
397;
398; CHECK-BE-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
399; CHECK-BE:       // %bb.0:
400; CHECK-BE-NEXT:    sub x8, x0, #520
401; CHECK-BE-NEXT:    str q0, [x8]
402; CHECK-BE-NEXT:    ret
403  %tmp0 = getelementptr i8, ptr %p, i32 -520
404  store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
405  ret void
406}
407
408define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 {
409; CHECK-LE-LABEL: test_stnp_v4f32_offset_neg_512:
410; CHECK-LE:       // %bb.0:
411; CHECK-LE-NEXT:    mov d1, v0.d[1]
412; CHECK-LE-NEXT:    stnp d0, d1, [x0, #-512]
413; CHECK-LE-NEXT:    ret
414;
415; CHECK-BE-LABEL: test_stnp_v4f32_offset_neg_512:
416; CHECK-BE:       // %bb.0:
417; CHECK-BE-NEXT:    sub x8, x0, #512
418; CHECK-BE-NEXT:    str q0, [x8]
419; CHECK-BE-NEXT:    ret
420  %tmp0 = getelementptr i8, ptr %p, i32 -512
421  store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
422  ret void
423}
424
425
426define void @test_stnp_v2f32_invalid_offset_256(ptr %p, <2 x float> %v) #0 {
427; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_256:
428; CHECK-LE:       // %bb.0:
429; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
430; CHECK-LE-NEXT:    mov s1, v0.s[1]
431; CHECK-LE-NEXT:    add x8, x0, #256
432; CHECK-LE-NEXT:    stnp s0, s1, [x8]
433; CHECK-LE-NEXT:    ret
434;
435; CHECK-BE-LABEL: test_stnp_v2f32_invalid_offset_256:
436; CHECK-BE:       // %bb.0:
437; CHECK-BE-NEXT:    str d0, [x0, #256]
438; CHECK-BE-NEXT:    ret
439  %tmp0 = getelementptr i8, ptr %p, i32 256
440  store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
441  ret void
442}
443
444define void @test_stnp_v2f32_offset_252(ptr %p, <2 x float> %v) #0 {
445; CHECK-LE-LABEL: test_stnp_v2f32_offset_252:
446; CHECK-LE:       // %bb.0:
447; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
448; CHECK-LE-NEXT:    mov s1, v0.s[1]
449; CHECK-LE-NEXT:    stnp s0, s1, [x0, #252]
450; CHECK-LE-NEXT:    ret
451;
452; CHECK-BE-LABEL: test_stnp_v2f32_offset_252:
453; CHECK-BE:       // %bb.0:
454; CHECK-BE-NEXT:    stur d0, [x0, #252]
455; CHECK-BE-NEXT:    ret
456  %tmp0 = getelementptr i8, ptr %p, i32 252
457  store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
458  ret void
459}
460
461define void @test_stnp_v2f32_invalid_offset_neg_260(ptr %p, <2 x float> %v) #0 {
462; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
463; CHECK-LE:       // %bb.0:
464; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
465; CHECK-LE-NEXT:    mov s1, v0.s[1]
466; CHECK-LE-NEXT:    sub x8, x0, #260
467; CHECK-LE-NEXT:    stnp s0, s1, [x8]
468; CHECK-LE-NEXT:    ret
469;
470; CHECK-BE-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
471; CHECK-BE:       // %bb.0:
472; CHECK-BE-NEXT:    sub x8, x0, #260
473; CHECK-BE-NEXT:    str d0, [x8]
474; CHECK-BE-NEXT:    ret
475  %tmp0 = getelementptr i8, ptr %p, i32 -260
476  store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
477  ret void
478}
479
480define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 {
481; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg_256:
482; CHECK-LE:       // %bb.0:
483; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
484; CHECK-LE-NEXT:    mov s1, v0.s[1]
485; CHECK-LE-NEXT:    stnp s0, s1, [x0, #-256]
486; CHECK-LE-NEXT:    ret
487;
488; CHECK-BE-LABEL: test_stnp_v2f32_offset_neg_256:
489; CHECK-BE:       // %bb.0:
490; CHECK-BE-NEXT:    stur d0, [x0, #-256]
491; CHECK-BE-NEXT:    ret
492  %tmp0 = getelementptr i8, ptr %p, i32 -256
493  store <2 x float> %v, ptr %tmp0, align 1, !nontemporal !0
494  ret void
495}
496
497declare void @dummy(ptr)
498
499define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
500; CHECK-LE-LABEL: test_stnp_v4f32_offset_alloca:
501; CHECK-LE:       // %bb.0:
502; CHECK-LE-NEXT:    sub sp, sp, #32
503; CHECK-LE-NEXT:    mov d1, v0.d[1]
504; CHECK-LE-NEXT:    mov x0, sp
505; CHECK-LE-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
506; CHECK-LE-NEXT:    stnp d0, d1, [sp]
507; CHECK-LE-NEXT:    bl dummy
508; CHECK-LE-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
509; CHECK-LE-NEXT:    add sp, sp, #32
510; CHECK-LE-NEXT:    ret
511;
512; CHECK-BE-LABEL: test_stnp_v4f32_offset_alloca:
513; CHECK-BE:       // %bb.0:
514; CHECK-BE-NEXT:    sub sp, sp, #32
515; CHECK-BE-NEXT:    mov x0, sp
516; CHECK-BE-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
517; CHECK-BE-NEXT:    str q0, [sp]
518; CHECK-BE-NEXT:    bl dummy
519; CHECK-BE-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
520; CHECK-BE-NEXT:    add sp, sp, #32
521; CHECK-BE-NEXT:    ret
522  %tmp0 = alloca <4 x float>
523  store <4 x float> %v, ptr %tmp0, align 1, !nontemporal !0
524  call void @dummy(ptr %tmp0)
525  ret void
526}
527
528define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
529; CHECK-LE-LABEL: test_stnp_v4f32_offset_alloca_2:
530; CHECK-LE:       // %bb.0:
531; CHECK-LE-NEXT:    sub sp, sp, #48
532; CHECK-LE-NEXT:    mov d1, v0.d[1]
533; CHECK-LE-NEXT:    mov x0, sp
534; CHECK-LE-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
535; CHECK-LE-NEXT:    stnp d0, d1, [sp, #16]
536; CHECK-LE-NEXT:    bl dummy
537; CHECK-LE-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
538; CHECK-LE-NEXT:    add sp, sp, #48
539; CHECK-LE-NEXT:    ret
540;
541; CHECK-BE-LABEL: test_stnp_v4f32_offset_alloca_2:
542; CHECK-BE:       // %bb.0:
543; CHECK-BE-NEXT:    sub sp, sp, #48
544; CHECK-BE-NEXT:    mov x0, sp
545; CHECK-BE-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
546; CHECK-BE-NEXT:    str q0, [sp, #16]
547; CHECK-BE-NEXT:    bl dummy
548; CHECK-BE-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
549; CHECK-BE-NEXT:    add sp, sp, #48
550; CHECK-BE-NEXT:    ret
551  %tmp0 = alloca <4 x float>, i32 2
552  %tmp1 = getelementptr <4 x float>, ptr %tmp0, i32 1
553  store <4 x float> %v, ptr %tmp1, align 1, !nontemporal !0
554  call void @dummy(ptr %tmp0)
555  ret void
556}
557
558define void @test_stnp_v32i8(<32 x i8> %v, ptr %ptr) {
559; CHECK-LE-LABEL: test_stnp_v32i8:
560; CHECK-LE:       // %bb.0: // %entry
561; CHECK-LE-NEXT:    stnp q0, q1, [x0]
562; CHECK-LE-NEXT:    ret
563;
564; CHECK-BE-LABEL: test_stnp_v32i8:
565; CHECK-BE:       // %bb.0: // %entry
566; CHECK-BE-NEXT:    stp q0, q1, [x0]
567; CHECK-BE-NEXT:    ret
568entry:
569  store <32 x i8> %v, ptr %ptr, align 4, !nontemporal !0
570  ret void
571}
572
573define void @test_stnp_v32i16(<32 x i16> %v, ptr %ptr) {
574; CHECK-LE-LABEL: test_stnp_v32i16:
575; CHECK-LE:       // %bb.0: // %entry
576; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
577; CHECK-LE-NEXT:    stnp q0, q1, [x0]
578; CHECK-LE-NEXT:    ret
579;
580; CHECK-BE-LABEL: test_stnp_v32i16:
581; CHECK-BE:       // %bb.0: // %entry
582; CHECK-BE-NEXT:    stp q0, q1, [x0]
583; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
584; CHECK-BE-NEXT:    ret
585entry:
586  store <32 x i16> %v, ptr %ptr, align 4, !nontemporal !0
587  ret void
588}
589
590define void @test_stnp_v32f16(<32 x half> %v, ptr %ptr) {
591; CHECK-LE-LABEL: test_stnp_v32f16:
592; CHECK-LE:       // %bb.0: // %entry
593; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
594; CHECK-LE-NEXT:    stnp q0, q1, [x0]
595; CHECK-LE-NEXT:    ret
596;
597; CHECK-BE-LABEL: test_stnp_v32f16:
598; CHECK-BE:       // %bb.0: // %entry
599; CHECK-BE-NEXT:    stp q0, q1, [x0]
600; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
601; CHECK-BE-NEXT:    ret
602entry:
603  store <32 x half> %v, ptr %ptr, align 4, !nontemporal !0
604  ret void
605}
606
607define void @test_stnp_v16i32(<16 x i32> %v, ptr %ptr) {
608; CHECK-LE-LABEL: test_stnp_v16i32:
609; CHECK-LE:       // %bb.0: // %entry
610; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
611; CHECK-LE-NEXT:    stnp q0, q1, [x0]
612; CHECK-LE-NEXT:    ret
613;
614; CHECK-BE-LABEL: test_stnp_v16i32:
615; CHECK-BE:       // %bb.0: // %entry
616; CHECK-BE-NEXT:    stp q0, q1, [x0]
617; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
618; CHECK-BE-NEXT:    ret
619entry:
620  store <16 x i32> %v, ptr %ptr, align 4, !nontemporal !0
621  ret void
622}
623
624define void @test_stnp_v16f32(<16 x float> %v, ptr %ptr) {
625; CHECK-LE-LABEL: test_stnp_v16f32:
626; CHECK-LE:       // %bb.0: // %entry
627; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
628; CHECK-LE-NEXT:    stnp q0, q1, [x0]
629; CHECK-LE-NEXT:    ret
630;
631; CHECK-BE-LABEL: test_stnp_v16f32:
632; CHECK-BE:       // %bb.0: // %entry
633; CHECK-BE-NEXT:    stp q0, q1, [x0]
634; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
635; CHECK-BE-NEXT:    ret
636entry:
637  store <16 x float> %v, ptr %ptr, align 4, !nontemporal !0
638  ret void
639}
640
641define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) {
642; CHECK-LE-LABEL: test_stnp_v17f32:
643; CHECK-LE:       // %bb.0: // %entry
644; CHECK-LE-NEXT:    // kill: def $s4 killed $s4 def $q4
645; CHECK-LE-NEXT:    // kill: def $s0 killed $s0 def $q0
646; CHECK-LE-NEXT:    ldr s16, [sp, #32]
647; CHECK-LE-NEXT:    // kill: def $s5 killed $s5 def $q5
648; CHECK-LE-NEXT:    // kill: def $s1 killed $s1 def $q1
649; CHECK-LE-NEXT:    add x8, sp, #40
650; CHECK-LE-NEXT:    // kill: def $s6 killed $s6 def $q6
651; CHECK-LE-NEXT:    // kill: def $s2 killed $s2 def $q2
652; CHECK-LE-NEXT:    // kill: def $s7 killed $s7 def $q7
653; CHECK-LE-NEXT:    // kill: def $s3 killed $s3 def $q3
654; CHECK-LE-NEXT:    mov v4.s[1], v5.s[0]
655; CHECK-LE-NEXT:    mov v0.s[1], v1.s[0]
656; CHECK-LE-NEXT:    ldr s5, [sp]
657; CHECK-LE-NEXT:    ld1 { v16.s }[1], [x8]
658; CHECK-LE-NEXT:    add x8, sp, #8
659; CHECK-LE-NEXT:    ld1 { v5.s }[1], [x8]
660; CHECK-LE-NEXT:    add x8, sp, #48
661; CHECK-LE-NEXT:    mov v4.s[2], v6.s[0]
662; CHECK-LE-NEXT:    ld1 { v16.s }[2], [x8]
663; CHECK-LE-NEXT:    mov v0.s[2], v2.s[0]
664; CHECK-LE-NEXT:    add x8, sp, #16
665; CHECK-LE-NEXT:    ld1 { v5.s }[2], [x8]
666; CHECK-LE-NEXT:    add x8, sp, #56
667; CHECK-LE-NEXT:    ld1 { v16.s }[3], [x8]
668; CHECK-LE-NEXT:    add x8, sp, #24
669; CHECK-LE-NEXT:    mov v4.s[3], v7.s[0]
670; CHECK-LE-NEXT:    mov v0.s[3], v3.s[0]
671; CHECK-LE-NEXT:    ld1 { v5.s }[3], [x8]
672; CHECK-LE-NEXT:    mov d1, v16.d[1]
673; CHECK-LE-NEXT:    mov d2, v5.d[1]
674; CHECK-LE-NEXT:    mov d3, v4.d[1]
675; CHECK-LE-NEXT:    mov d6, v0.d[1]
676; CHECK-LE-NEXT:    stnp d16, d1, [x0, #48]
677; CHECK-LE-NEXT:    ldr s1, [sp, #64]
678; CHECK-LE-NEXT:    stnp d5, d2, [x0, #32]
679; CHECK-LE-NEXT:    stnp d4, d3, [x0, #16]
680; CHECK-LE-NEXT:    stnp d0, d6, [x0]
681; CHECK-LE-NEXT:    str s1, [x0, #64]
682; CHECK-LE-NEXT:    ret
683;
684; CHECK-BE-LABEL: test_stnp_v17f32:
685; CHECK-BE:       // %bb.0: // %entry
686; CHECK-BE-NEXT:    // kill: def $s4 killed $s4 def $q4
687; CHECK-BE-NEXT:    // kill: def $s0 killed $s0 def $q0
688; CHECK-BE-NEXT:    ldr s16, [sp, #36]
689; CHECK-BE-NEXT:    // kill: def $s5 killed $s5 def $q5
690; CHECK-BE-NEXT:    // kill: def $s1 killed $s1 def $q1
691; CHECK-BE-NEXT:    ldr s17, [sp, #4]
692; CHECK-BE-NEXT:    add x8, sp, #44
693; CHECK-BE-NEXT:    mov v4.s[1], v5.s[0]
694; CHECK-BE-NEXT:    mov v0.s[1], v1.s[0]
695; CHECK-BE-NEXT:    // kill: def $s6 killed $s6 def $q6
696; CHECK-BE-NEXT:    // kill: def $s2 killed $s2 def $q2
697; CHECK-BE-NEXT:    // kill: def $s7 killed $s7 def $q7
698; CHECK-BE-NEXT:    // kill: def $s3 killed $s3 def $q3
699; CHECK-BE-NEXT:    ldr s1, [sp, #68]
700; CHECK-BE-NEXT:    ld1 { v16.s }[1], [x8]
701; CHECK-BE-NEXT:    add x8, sp, #12
702; CHECK-BE-NEXT:    ld1 { v17.s }[1], [x8]
703; CHECK-BE-NEXT:    add x8, sp, #52
704; CHECK-BE-NEXT:    str s1, [x0, #64]
705; CHECK-BE-NEXT:    ld1 { v16.s }[2], [x8]
706; CHECK-BE-NEXT:    add x8, sp, #20
707; CHECK-BE-NEXT:    mov v4.s[2], v6.s[0]
708; CHECK-BE-NEXT:    mov v0.s[2], v2.s[0]
709; CHECK-BE-NEXT:    ld1 { v17.s }[2], [x8]
710; CHECK-BE-NEXT:    add x8, sp, #60
711; CHECK-BE-NEXT:    ld1 { v16.s }[3], [x8]
712; CHECK-BE-NEXT:    add x8, sp, #28
713; CHECK-BE-NEXT:    ld1 { v17.s }[3], [x8]
714; CHECK-BE-NEXT:    mov v4.s[3], v7.s[0]
715; CHECK-BE-NEXT:    add x8, x0, #48
716; CHECK-BE-NEXT:    mov v0.s[3], v3.s[0]
717; CHECK-BE-NEXT:    st1 { v16.4s }, [x8]
718; CHECK-BE-NEXT:    add x8, x0, #32
719; CHECK-BE-NEXT:    st1 { v17.4s }, [x8]
720; CHECK-BE-NEXT:    add x8, x0, #16
721; CHECK-BE-NEXT:    st1 { v4.4s }, [x8]
722; CHECK-BE-NEXT:    st1 { v0.4s }, [x0]
723; CHECK-BE-NEXT:    ret
724entry:
725  store <17 x float> %v, ptr %ptr, align 4, !nontemporal !0
726  ret void
727}
728define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, ptr %ptr) {
729; CHECK-LE-LABEL: test_stnp_v16i32_invalid_offset:
730; CHECK-LE:       // %bb.0: // %entry
731; CHECK-LE-NEXT:    mov w8, #32032 // =0x7d20
732; CHECK-LE-NEXT:    mov w9, #32000 // =0x7d00
733; CHECK-LE-NEXT:    add x8, x0, x8
734; CHECK-LE-NEXT:    add x9, x0, x9
735; CHECK-LE-NEXT:    stnp q2, q3, [x8]
736; CHECK-LE-NEXT:    stnp q0, q1, [x9]
737; CHECK-LE-NEXT:    ret
738;
739; CHECK-BE-LABEL: test_stnp_v16i32_invalid_offset:
740; CHECK-BE:       // %bb.0: // %entry
741; CHECK-BE-NEXT:    str q3, [x0, #32048]
742; CHECK-BE-NEXT:    str q2, [x0, #32032]
743; CHECK-BE-NEXT:    str q1, [x0, #32016]
744; CHECK-BE-NEXT:    str q0, [x0, #32000]
745; CHECK-BE-NEXT:    ret
746entry:
747  %gep = getelementptr <16 x i32>, ptr %ptr, i32 500
748  store <16 x i32> %v, ptr %gep, align 4, !nontemporal !0
749  ret void
750}
751
752define void @test_stnp_v16f64(<16 x double> %v, ptr %ptr) {
753; CHECK-LE-LABEL: test_stnp_v16f64:
754; CHECK-LE:       // %bb.0: // %entry
755; CHECK-LE-NEXT:    stnp q6, q7, [x0, #96]
756; CHECK-LE-NEXT:    stnp q4, q5, [x0, #64]
757; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
758; CHECK-LE-NEXT:    stnp q0, q1, [x0]
759; CHECK-LE-NEXT:    ret
760;
761; CHECK-BE-LABEL: test_stnp_v16f64:
762; CHECK-BE:       // %bb.0: // %entry
763; CHECK-BE-NEXT:    stp q0, q1, [x0]
764; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
765; CHECK-BE-NEXT:    stp q4, q5, [x0, #64]
766; CHECK-BE-NEXT:    stp q6, q7, [x0, #96]
767; CHECK-BE-NEXT:    ret
768entry:
769  store <16 x double> %v, ptr %ptr, align 4, !nontemporal !0
770  ret void
771}
772
773define void @test_stnp_v16i64(<16 x i64> %v, ptr %ptr) {
774; CHECK-LE-LABEL: test_stnp_v16i64:
775; CHECK-LE:       // %bb.0: // %entry
776; CHECK-LE-NEXT:    stnp q6, q7, [x0, #96]
777; CHECK-LE-NEXT:    stnp q4, q5, [x0, #64]
778; CHECK-LE-NEXT:    stnp q2, q3, [x0, #32]
779; CHECK-LE-NEXT:    stnp q0, q1, [x0]
780; CHECK-LE-NEXT:    ret
781;
782; CHECK-BE-LABEL: test_stnp_v16i64:
783; CHECK-BE:       // %bb.0: // %entry
784; CHECK-BE-NEXT:    stp q0, q1, [x0]
785; CHECK-BE-NEXT:    stp q2, q3, [x0, #32]
786; CHECK-BE-NEXT:    stp q4, q5, [x0, #64]
787; CHECK-BE-NEXT:    stp q6, q7, [x0, #96]
788; CHECK-BE-NEXT:    ret
789entry:
790  store <16 x i64> %v, ptr %ptr, align 4, !nontemporal !0
791  ret void
792}
793
794!0 = !{ i32 1 }
795
796attributes #0 = { nounwind }
797