xref: /llvm-project/llvm/test/CodeGen/Thumb2/cde-vec.ll (revision ab0c5cea0b1a9a1227fea840184dd7b5983c22a5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4declare <16 x i8> @llvm.arm.cde.vcx1q(i32 immarg, i32 immarg)
5declare <16 x i8> @llvm.arm.cde.vcx1qa(i32 immarg, <16 x i8>, i32 immarg)
6declare <16 x i8> @llvm.arm.cde.vcx2q(i32 immarg, <16 x i8>, i32 immarg)
7declare <16 x i8> @llvm.arm.cde.vcx2qa(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg)
8declare <16 x i8> @llvm.arm.cde.vcx3q(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg)
9declare <16 x i8> @llvm.arm.cde.vcx3qa(i32 immarg, <16 x i8>, <16 x i8>, <16 x i8>, i32 immarg)
10
11define arm_aapcs_vfpcc <16 x i8> @test_vcx1q_u8() {
12; CHECK-LABEL: test_vcx1q_u8:
13; CHECK:       @ %bb.0: @ %entry
14; CHECK-NEXT:    vcx1 p0, q0, #1111
15; CHECK-NEXT:    bx lr
16entry:
17  %0 = call <16 x i8> @llvm.arm.cde.vcx1q(i32 0, i32 1111)
18  ret <16 x i8> %0
19}
20
21define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_1(<16 x i8> %acc) {
22; CHECK-LABEL: test_vcx1qa_1:
23; CHECK:       @ %bb.0: @ %entry
24; CHECK-NEXT:    vcx1a p1, q0, #1112
25; CHECK-NEXT:    bx lr
26entry:
27  %0 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 1, <16 x i8> %acc, i32 1112)
28  ret <16 x i8> %0
29}
30
31define arm_aapcs_vfpcc <4 x i32> @test_vcx1qa_2(<4 x i32> %acc) {
32; CHECK-LABEL: test_vcx1qa_2:
33; CHECK:       @ %bb.0: @ %entry
34; CHECK-NEXT:    vcx1a p0, q0, #1113
35; CHECK-NEXT:    bx lr
36entry:
37  %0 = bitcast <4 x i32> %acc to <16 x i8>
38  %1 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 0, <16 x i8> %0, i32 1113)
39  %2 = bitcast <16 x i8> %1 to <4 x i32>
40  ret <4 x i32> %2
41}
42
43define arm_aapcs_vfpcc <16 x i8> @test_vcx2q_u8(<8 x half> %n) {
44; CHECK-LABEL: test_vcx2q_u8:
45; CHECK:       @ %bb.0: @ %entry
46; CHECK-NEXT:    vcx2 p1, q0, q0, #111
47; CHECK-NEXT:    bx lr
48entry:
49  %0 = bitcast <8 x half> %n to <16 x i8>
50  %1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 111)
51  ret <16 x i8> %1
52}
53
54define arm_aapcs_vfpcc <4 x float> @test_vcx2q(<4 x float> %n) {
55; CHECK-LABEL: test_vcx2q:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vcx2 p1, q0, q0, #112
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = bitcast <4 x float> %n to <16 x i8>
61  %1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 112)
62  %2 = bitcast <16 x i8> %1 to <4 x float>
63  ret <4 x float> %2
64}
65
66define arm_aapcs_vfpcc <4 x float> @test_vcx2qa(<4 x float> %acc, <2 x i64> %n) {
67; CHECK-LABEL: test_vcx2qa:
68; CHECK:       @ %bb.0: @ %entry
69; CHECK-NEXT:    vcx2a p0, q0, q1, #113
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = bitcast <4 x float> %acc to <16 x i8>
73  %1 = bitcast <2 x i64> %n to <16 x i8>
74  %2 = call <16 x i8> @llvm.arm.cde.vcx2qa(i32 0, <16 x i8> %0, <16 x i8> %1, i32 113)
75  %3 = bitcast <16 x i8> %2 to <4 x float>
76  ret <4 x float> %3
77}
78
79define arm_aapcs_vfpcc <16 x i8> @test_vcx3q_u8(<8 x i16> %n, <4 x i32> %m) {
80; CHECK-LABEL: test_vcx3q_u8:
81; CHECK:       @ %bb.0: @ %entry
82; CHECK-NEXT:    vcx3 p0, q0, q0, q1, #11
83; CHECK-NEXT:    bx lr
84entry:
85  %0 = bitcast <8 x i16> %n to <16 x i8>
86  %1 = bitcast <4 x i32> %m to <16 x i8>
87  %2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 0, <16 x i8> %0, <16 x i8> %1, i32 11)
88  ret <16 x i8> %2
89}
90
91define arm_aapcs_vfpcc <2 x i64> @test_vcx3q(<2 x i64> %n, <4 x float> %m) {
92; CHECK-LABEL: test_vcx3q:
93; CHECK:       @ %bb.0: @ %entry
94; CHECK-NEXT:    vcx3 p1, q0, q0, q1, #12
95; CHECK-NEXT:    bx lr
96entry:
97  %0 = bitcast <2 x i64> %n to <16 x i8>
98  %1 = bitcast <4 x float> %m to <16 x i8>
99  %2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 1, <16 x i8> %0, <16 x i8> %1, i32 12)
100  %3 = bitcast <16 x i8> %2 to <2 x i64>
101  ret <2 x i64> %3
102}
103
104define arm_aapcs_vfpcc <16 x i8> @test_vcx3qa(<16 x i8> %acc, <8 x i16> %n, <4 x float> %m) {
105; CHECK-LABEL: test_vcx3qa:
106; CHECK:       @ %bb.0: @ %entry
107; CHECK-NEXT:    vcx3a p1, q0, q1, q2, #13
108; CHECK-NEXT:    bx lr
109entry:
110  %0 = bitcast <8 x i16> %n to <16 x i8>
111  %1 = bitcast <4 x float> %m to <16 x i8>
112  %2 = call <16 x i8> @llvm.arm.cde.vcx3qa(i32 1, <16 x i8> %acc, <16 x i8> %0, <16 x i8> %1, i32 13)
113  ret <16 x i8> %2
114}
115
116declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
117declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
118declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
119declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32)
120declare <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 immarg, <8 x i16>, i32 immarg, <8 x i1>)
121declare <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 immarg, <16 x i8>, i32 immarg, <16 x i1>)
122declare <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 immarg, <4 x i32>, <16 x i8>, i32 immarg, <4 x i1>)
123declare <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, i32 immarg, <4 x i1>)
124declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <2 x i1>)
125declare <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
126
127define arm_aapcs_vfpcc <8 x i16> @test_vcx1q_m(<8 x i16> %inactive, i16 zeroext %p) {
128; CHECK-LABEL: test_vcx1q_m:
129; CHECK:       @ %bb.0: @ %entry
130; CHECK-NEXT:    vmsr p0, r0
131; CHECK-NEXT:    vpst
132; CHECK-NEXT:    vcx1t p0, q0, #1111
133; CHECK-NEXT:    bx lr
134entry:
135  %0 = zext i16 %p to i32
136  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
137  %2 = call <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 0, <8 x i16> %inactive, i32 1111, <8 x i1> %1)
138  ret <8 x i16> %2
139}
140
141define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_m(<16 x i8> %acc, i16 zeroext %p) {
142; CHECK-LABEL: test_vcx1qa_m:
143; CHECK:       @ %bb.0: @ %entry
144; CHECK-NEXT:    vmsr p0, r0
145; CHECK-NEXT:    vpst
146; CHECK-NEXT:    vcx1at p1, q0, #1112
147; CHECK-NEXT:    bx lr
148entry:
149  %0 = zext i16 %p to i32
150  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
151  %2 = call <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 1, <16 x i8> %acc, i32 1112, <16 x i1> %1)
152  ret <16 x i8> %2
153}
154
155define arm_aapcs_vfpcc <4 x i32> @test_vcx2q_m(<4 x i32> %inactive, <4 x float> %n, i16 zeroext %p) {
156; CHECK-LABEL: test_vcx2q_m:
157; CHECK:       @ %bb.0: @ %entry
158; CHECK-NEXT:    vmsr p0, r0
159; CHECK-NEXT:    vpst
160; CHECK-NEXT:    vcx2t p0, q0, q1, #111
161; CHECK-NEXT:    bx lr
162entry:
163  %0 = bitcast <4 x float> %n to <16 x i8>
164  %1 = zext i16 %p to i32
165  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
166  %3 = call <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 0, <4 x i32> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2)
167  ret <4 x i32> %3
168}
169
170define arm_aapcs_vfpcc <4 x float> @test_vcx2qa_m(<4 x float> %acc, <8 x half> %n, i16 zeroext %p) {
171; CHECK-LABEL: test_vcx2qa_m:
172; CHECK:       @ %bb.0: @ %entry
173; CHECK-NEXT:    vmsr p0, r0
174; CHECK-NEXT:    vpst
175; CHECK-NEXT:    vcx2at p0, q0, q1, #112
176; CHECK-NEXT:    bx lr
177entry:
178  %0 = bitcast <8 x half> %n to <16 x i8>
179  %1 = zext i16 %p to i32
180  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
181  %3 = call <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 0, <4 x float> %acc, <16 x i8> %0, i32 112, <4 x i1> %2)
182  ret <4 x float> %3
183}
184
185define arm_aapcs_vfpcc <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) {
186; CHECK-LABEL: test_vcx3q_m:
187; CHECK:       @ %bb.0: @ %entry
188; CHECK-NEXT:    vmsr p0, r0
189; CHECK-NEXT:    vpst
190; CHECK-NEXT:    vcx3t p0, q0, q1, q2, #11
191; CHECK-NEXT:    bx lr
192entry:
193  %0 = bitcast <4 x float> %n to <16 x i8>
194  %1 = zext i16 %p to i32
195  %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
196  %3 = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <2 x i1> %2)
197  ret <2 x i64> %3
198}
199
200define arm_aapcs_vfpcc <8 x half> @test_vcx3qa_m(<4 x float> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) {
201; CHECK-LABEL: test_vcx3qa_m:
202; CHECK:       @ %bb.0: @ %entry
203; CHECK-NEXT:    vmsr p0, r0
204; CHECK-NEXT:    vpst
205; CHECK-NEXT:    vcx3at p0, q0, q1, q2, #12
206; CHECK-NEXT:    bx lr
207entry:
208  %0 = bitcast <8 x half> %n to <16 x i8>
209  %1 = bitcast <4 x i32> %m to <16 x i8>
210  %2 = zext i16 %p to i32
211  %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
212  %4 = call <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3)
213  %5 = bitcast <4 x float> %4 to <8 x half>
214  ret <8 x half> %5
215}
216