xref: /llvm-project/llvm/test/CodeGen/ARM/vst2.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
2
3define void @vst2i8(ptr %A, ptr %B) nounwind {
4;CHECK-LABEL: vst2i8:
5;Check the alignment value.  Max for this instruction is 128 bits:
6;CHECK: vst2.8 {d16, d17}, [r0:64]
7	%tmp1 = load <8 x i8>, ptr %B
8	call void @llvm.arm.neon.vst2.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
9	ret void
10}
11
12;Check for a post-increment updating store with register increment.
13define void @vst2i8_update(ptr %ptr, ptr %B, i32 %inc) nounwind {
14;CHECK-LABEL: vst2i8_update:
15;CHECK: vst2.8 {d16, d17}, [r1], r2
16	%A = load ptr, ptr %ptr
17	%tmp1 = load <8 x i8>, ptr %B
18	call void @llvm.arm.neon.vst2.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
19	%tmp2 = getelementptr i8, ptr %A, i32 %inc
20	store ptr %tmp2, ptr %ptr
21	ret void
22}
23
24define void @vst2i16(ptr %A, ptr %B) nounwind {
25;CHECK-LABEL: vst2i16:
26;Check the alignment value.  Max for this instruction is 128 bits:
27;CHECK: vst2.16 {d16, d17}, [r0:128]
28	%tmp1 = load <4 x i16>, ptr %B
29	call void @llvm.arm.neon.vst2.p0.v4i16(ptr %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
30	ret void
31}
32
33define void @vst2i32(ptr %A, ptr %B) nounwind {
34;CHECK-LABEL: vst2i32:
35;CHECK: vst2.32
36	%tmp1 = load <2 x i32>, ptr %B
37	call void @llvm.arm.neon.vst2.p0.v2i32(ptr %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
38	ret void
39}
40
41define void @vst2f(ptr %A, ptr %B) nounwind {
42;CHECK-LABEL: vst2f:
43;CHECK: vst2.32
44	%tmp1 = load <2 x float>, ptr %B
45	call void @llvm.arm.neon.vst2.p0.v2f32(ptr %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
46	ret void
47}
48
49define void @vst2i64(ptr %A, ptr %B) nounwind {
50;CHECK-LABEL: vst2i64:
51;Check the alignment value.  Max for this instruction is 128 bits:
52;CHECK: vst1.64 {d16, d17}, [r0:128]
53	%tmp1 = load <1 x i64>, ptr %B
54	call void @llvm.arm.neon.vst2.p0.v1i64(ptr %A, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
55	ret void
56}
57
58;Check for a post-increment updating store.
59define void @vst2i64_update(ptr %ptr, ptr %B) nounwind {
60;CHECK-LABEL: vst2i64_update:
61;CHECK: vst1.64 {d16, d17}, [r1:64]!
62	%A = load ptr, ptr %ptr
63	%tmp1 = load <1 x i64>, ptr %B
64	call void @llvm.arm.neon.vst2.p0.v1i64(ptr %A, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
65	%tmp2 = getelementptr i64, ptr %A, i32 2
66	store ptr %tmp2, ptr %ptr
67	ret void
68}
69
70define void @vst2Qi8(ptr %A, ptr %B) nounwind {
71;CHECK-LABEL: vst2Qi8:
72;Check the alignment value.  Max for this instruction is 256 bits:
73;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
74	%tmp1 = load <16 x i8>, ptr %B
75	call void @llvm.arm.neon.vst2.p0.v16i8(ptr %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
76	ret void
77}
78
79define void @vst2Qi16(ptr %A, ptr %B) nounwind {
80;CHECK-LABEL: vst2Qi16:
81;Check the alignment value.  Max for this instruction is 256 bits:
82;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
83	%tmp1 = load <8 x i16>, ptr %B
84	call void @llvm.arm.neon.vst2.p0.v8i16(ptr %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
85	ret void
86}
87
88define void @vst2Qi32(ptr %A, ptr %B) nounwind {
89;CHECK-LABEL: vst2Qi32:
90;Check the alignment value.  Max for this instruction is 256 bits:
91;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
92	%tmp1 = load <4 x i32>, ptr %B
93	call void @llvm.arm.neon.vst2.p0.v4i32(ptr %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
94	ret void
95}
96
97define void @vst2Qf(ptr %A, ptr %B) nounwind {
98;CHECK-LABEL: vst2Qf:
99;CHECK: vst2.32
100	%tmp1 = load <4 x float>, ptr %B
101	call void @llvm.arm.neon.vst2.p0.v4f32(ptr %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
102	ret void
103}
104
105define ptr @vst2update(ptr %out, ptr %B) nounwind {
106;CHECK-LABEL: vst2update:
107;CHECK: vst2.16 {d16, d17}, [r0]!
108	%tmp1 = load <4 x i16>, ptr %B
109	tail call void @llvm.arm.neon.vst2.p0.v4i16(ptr %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
110	%t5 = getelementptr inbounds i8, ptr %out, i32 16
111	ret ptr %t5
112}
113
114define ptr @vst2update2(ptr %out, ptr %this) nounwind optsize ssp align 2 {
115;CHECK-LABEL: vst2update2:
116;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
117  %tmp1 = load <4 x float>, ptr %this
118  call void @llvm.arm.neon.vst2.p0.v4f32(ptr %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
119  %tmp2 = getelementptr inbounds i8, ptr %out, i32  32
120  ret ptr %tmp2
121}
122
123declare void @llvm.arm.neon.vst2.p0.v8i8(ptr, <8 x i8>, <8 x i8>, i32) nounwind
124declare void @llvm.arm.neon.vst2.p0.v4i16(ptr, <4 x i16>, <4 x i16>, i32) nounwind
125declare void @llvm.arm.neon.vst2.p0.v2i32(ptr, <2 x i32>, <2 x i32>, i32) nounwind
126declare void @llvm.arm.neon.vst2.p0.v2f32(ptr, <2 x float>, <2 x float>, i32) nounwind
127declare void @llvm.arm.neon.vst2.p0.v1i64(ptr, <1 x i64>, <1 x i64>, i32) nounwind
128
129declare void @llvm.arm.neon.vst2.p0.v16i8(ptr, <16 x i8>, <16 x i8>, i32) nounwind
130declare void @llvm.arm.neon.vst2.p0.v8i16(ptr, <8 x i16>, <8 x i16>, i32) nounwind
131declare void @llvm.arm.neon.vst2.p0.v4i32(ptr, <4 x i32>, <4 x i32>, i32) nounwind
132declare void @llvm.arm.neon.vst2.p0.v4f32(ptr, <4 x float>, <4 x float>, i32) nounwind
133