xref: /llvm-project/llvm/test/CodeGen/ARM/vst4.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
2
3define void @vst4i8(ptr %A, ptr %B) nounwind {
4;CHECK-LABEL: vst4i8:
5;Check the alignment value.  Max for this instruction is 256 bits:
6;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
7	%tmp1 = load <8 x i8>, ptr %B
8	call void @llvm.arm.neon.vst4.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
9	ret void
10}
11
12;Check for a post-increment updating store with register increment.
13define void @vst4i8_update(ptr %ptr, ptr %B, i32 %inc) nounwind {
14;CHECK-LABEL: vst4i8_update:
15;CHECK: vst4.8 {d16, d17, d18, d19}, [r{{[0-9]+}}:128], r2
16	%A = load ptr, ptr %ptr
17	%tmp1 = load <8 x i8>, ptr %B
18	call void @llvm.arm.neon.vst4.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
19	%tmp2 = getelementptr i8, ptr %A, i32 %inc
20	store ptr %tmp2, ptr %ptr
21	ret void
22}
23
24define void @vst4i16(ptr %A, ptr %B) nounwind {
25;CHECK-LABEL: vst4i16:
26;Check the alignment value.  Max for this instruction is 256 bits:
27;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
28	%tmp1 = load <4 x i16>, ptr %B
29	call void @llvm.arm.neon.vst4.p0.v4i16(ptr %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
30	ret void
31}
32
33define void @vst4i32(ptr %A, ptr %B) nounwind {
34;CHECK-LABEL: vst4i32:
35;Check the alignment value.  Max for this instruction is 256 bits:
36;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
37	%tmp1 = load <2 x i32>, ptr %B
38	call void @llvm.arm.neon.vst4.p0.v2i32(ptr %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
39	ret void
40}
41
42define void @vst4f(ptr %A, ptr %B) nounwind {
43;CHECK-LABEL: vst4f:
44;CHECK: vst4.32
45	%tmp1 = load <2 x float>, ptr %B
46	call void @llvm.arm.neon.vst4.p0.v2f32(ptr %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
47	ret void
48}
49
50define void @vst4i64(ptr %A, ptr %B) nounwind {
51;CHECK-LABEL: vst4i64:
52;Check the alignment value.  Max for this instruction is 256 bits:
53;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
54	%tmp1 = load <1 x i64>, ptr %B
55	call void @llvm.arm.neon.vst4.p0.v1i64(ptr %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
56	ret void
57}
58
59define void @vst4i64_update(ptr %ptr, ptr %B) nounwind {
60;CHECK-LABEL: vst4i64_update:
61;CHECK: vst1.64	{d16, d17, d18, d19}, [r{{[0-9]+}}]!
62        %A = load ptr, ptr %ptr
63        %tmp1 = load <1 x i64>, ptr %B
64        call void @llvm.arm.neon.vst4.p0.v1i64(ptr %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
65        %tmp2 = getelementptr i64, ptr %A, i32 4
66        store ptr %tmp2, ptr %ptr
67        ret void
68}
69
70define void @vst4i64_reg_update(ptr %ptr, ptr %B) nounwind {
71;CHECK-LABEL: vst4i64_reg_update:
72;CHECK: vst1.64	{d16, d17, d18, d19}, [r{{[0-9]+}}], r{{[0-9]+}}
73        %A = load ptr, ptr %ptr
74        %tmp1 = load <1 x i64>, ptr %B
75        call void @llvm.arm.neon.vst4.p0.v1i64(ptr %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
76        %tmp2 = getelementptr i64, ptr %A, i32 1
77        store ptr %tmp2, ptr %ptr
78        ret void
79}
80
81define void @vst4Qi8(ptr %A, ptr %B) nounwind {
82;CHECK-LABEL: vst4Qi8:
83;Check the alignment value.  Max for this instruction is 256 bits:
84;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
85;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
86	%tmp1 = load <16 x i8>, ptr %B
87	call void @llvm.arm.neon.vst4.p0.v16i8(ptr %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
88	ret void
89}
90
91define void @vst4Qi16(ptr %A, ptr %B) nounwind {
92;CHECK-LABEL: vst4Qi16:
93;Check for no alignment specifier.
94;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
95;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
96	%tmp1 = load <8 x i16>, ptr %B
97	call void @llvm.arm.neon.vst4.p0.v8i16(ptr %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
98	ret void
99}
100
101define void @vst4Qi32(ptr %A, ptr %B) nounwind {
102;CHECK-LABEL: vst4Qi32:
103;CHECK: vst4.32
104;CHECK: vst4.32
105	%tmp1 = load <4 x i32>, ptr %B
106	call void @llvm.arm.neon.vst4.p0.v4i32(ptr %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
107	ret void
108}
109
110define void @vst4Qf(ptr %A, ptr %B) nounwind {
111;CHECK-LABEL: vst4Qf:
112;CHECK: vst4.32
113;CHECK: vst4.32
114	%tmp1 = load <4 x float>, ptr %B
115	call void @llvm.arm.neon.vst4.p0.v4f32(ptr %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
116	ret void
117}
118
119;Check for a post-increment updating store.
120define void @vst4Qf_update(ptr %ptr, ptr %B) nounwind {
121;CHECK-LABEL: vst4Qf_update:
122  ;CHECK: vst4.32 {d16, d18, d20, d22}, [r[[REG:[0-9]+]]]!
123;CHECK: vst4.32 {d17, d19, d21, d23}, [r[[REG]]]!
124	%A = load ptr, ptr %ptr
125	%tmp1 = load <4 x float>, ptr %B
126	call void @llvm.arm.neon.vst4.p0.v4f32(ptr %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
127	%tmp2 = getelementptr float, ptr %A, i32 16
128	store ptr %tmp2, ptr %ptr
129	ret void
130}
131
132declare void @llvm.arm.neon.vst4.p0.v8i8(ptr, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
133declare void @llvm.arm.neon.vst4.p0.v4i16(ptr, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
134declare void @llvm.arm.neon.vst4.p0.v2i32(ptr, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
135declare void @llvm.arm.neon.vst4.p0.v2f32(ptr, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
136declare void @llvm.arm.neon.vst4.p0.v1i64(ptr, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
137
138declare void @llvm.arm.neon.vst4.p0.v16i8(ptr, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
139declare void @llvm.arm.neon.vst4.p0.v8i16(ptr, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
140declare void @llvm.arm.neon.vst4.p0.v4i32(ptr, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
141declare void @llvm.arm.neon.vst4.p0.v4f32(ptr, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
142