xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vctp.ll (revision b5b663aac17415625340eb29c8010832bfc4c21c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s
3
4define void @vctp8(i32 %arg, ptr %in, ptr %out) {
5; CHECK-LABEL: vctp8:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vldrw.u32 q1, [r1]
8; CHECK-NEXT:    vctp.8 r0
9; CHECK-NEXT:    vmov.i32 q0, #0x0
10; CHECK-NEXT:    vpst
11; CHECK-NEXT:    vmovt q0, q1
12; CHECK-NEXT:    vstrw.32 q0, [r2]
13; CHECK-NEXT:    bx lr
14  %pred = call <16 x i1> @llvm.arm.mve.vctp8(i32 %arg)
15  %ld = load <16 x i8>, ptr %in
16  %res = select <16 x i1> %pred, <16 x i8> %ld, <16 x i8> zeroinitializer
17  store <16 x i8> %res, ptr %out
18  ret void
19}
20
21define void @vctp16(i32 %arg, ptr %in, ptr %out) {
22; CHECK-LABEL: vctp16:
23; CHECK:       @ %bb.0:
24; CHECK-NEXT:    vldrw.u32 q1, [r1]
25; CHECK-NEXT:    vctp.16 r0
26; CHECK-NEXT:    vmov.i32 q0, #0x0
27; CHECK-NEXT:    vpst
28; CHECK-NEXT:    vmovt q0, q1
29; CHECK-NEXT:    vstrw.32 q0, [r2]
30; CHECK-NEXT:    bx lr
31  %pred = call <8 x i1> @llvm.arm.mve.vctp16(i32 %arg)
32  %ld = load <8 x i16>, ptr %in
33  %res = select <8 x i1> %pred, <8 x i16> %ld, <8 x i16> zeroinitializer
34  store <8 x i16> %res, ptr %out
35  ret void
36}
37
38define void @vctp32(i32 %arg, ptr %in, ptr %out) {
39; CHECK-LABEL: vctp32:
40; CHECK:       @ %bb.0:
41; CHECK-NEXT:    vldrw.u32 q1, [r1]
42; CHECK-NEXT:    vctp.32 r0
43; CHECK-NEXT:    vmov.i32 q0, #0x0
44; CHECK-NEXT:    vpst
45; CHECK-NEXT:    vmovt q0, q1
46; CHECK-NEXT:    vstrw.32 q0, [r2]
47; CHECK-NEXT:    bx lr
48  %pred = call <4 x i1> @llvm.arm.mve.vctp32(i32 %arg)
49  %ld = load <4 x i32>, ptr %in
50  %res = select <4 x i1> %pred, <4 x i32> %ld, <4 x i32> zeroinitializer
51  store <4 x i32> %res, ptr %out
52  ret void
53}
54
55define void @vctp64(i32 %arg, ptr %in, ptr %out) {
56; CHECK-LABEL: vctp64:
57; CHECK:       @ %bb.0:
58; CHECK-NEXT:    vldrw.u32 q1, [r1]
59; CHECK-NEXT:    vctp.64 r0
60; CHECK-NEXT:    vmov.i32 q0, #0x0
61; CHECK-NEXT:    vpst
62; CHECK-NEXT:    vmovt q0, q1
63; CHECK-NEXT:    vstrw.32 q0, [r2]
64; CHECK-NEXT:    bx lr
65  %pred = call <2 x i1> @llvm.arm.mve.vctp64(i32 %arg)
66  %ld = load <2 x i64>, ptr %in
67  %res = select <2 x i1> %pred, <2 x i64> %ld, <2 x i64> zeroinitializer
68  store <2 x i64> %res, ptr %out
69  ret void
70}
71
72define arm_aapcs_vfpcc <4 x i32> @vcmp_ult_v4i32(i32 %n, <4 x i32> %a, <4 x i32> %b) {
73; CHECK-LABEL: vcmp_ult_v4i32:
74; CHECK:       @ %bb.0: @ %entry
75; CHECK-NEXT:    vctp.32 r0
76; CHECK-NEXT:    vpst
77; CHECK-NEXT:    vmovt q1, q0
78; CHECK-NEXT:    vmov q0, q1
79; CHECK-NEXT:    bx lr
80entry:
81  %i = insertelement <4 x i32> undef, i32 %n, i32 0
82  %ns = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
83  %c = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %ns
84  %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
85  ret <4 x i32> %s
86}
87
88define arm_aapcs_vfpcc <4 x i32> @vcmp_uge_v4i32(i32 %n, <4 x i32> %a, <4 x i32> %b) {
89; CHECK-LABEL: vcmp_uge_v4i32:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    vctp.32 r0
92; CHECK-NEXT:    vpst
93; CHECK-NEXT:    vmovt q1, q0
94; CHECK-NEXT:    vmov q0, q1
95; CHECK-NEXT:    bx lr
96entry:
97  %i = insertelement <4 x i32> undef, i32 %n, i32 0
98  %ns = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
99  %c = icmp uge <4 x i32> %ns, <i32 0, i32 1, i32 2, i32 3>
100  %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
101  ret <4 x i32> %s
102}
103
104define arm_aapcs_vfpcc <4 x i32> @vcmp_ult_v4i32_undef(i32 %n, <4 x i32> %a, <4 x i32> %b) {
105; CHECK-LABEL: vcmp_ult_v4i32_undef:
106; CHECK:       @ %bb.0: @ %entry
107; CHECK-NEXT:    vctp.32 r0
108; CHECK-NEXT:    vpst
109; CHECK-NEXT:    vmovt q1, q0
110; CHECK-NEXT:    vmov q0, q1
111; CHECK-NEXT:    bx lr
112entry:
113  %i = insertelement <4 x i32> undef, i32 %n, i32 0
114  %ns = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
115  %c = icmp ult <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>, %ns
116  %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
117  ret <4 x i32> %s
118}
119
120
121define arm_aapcs_vfpcc <8 x i16> @vcmp_ult_v8i16(i16 %n, <8 x i16> %a, <8 x i16> %b) {
122; CHECK-LABEL: vcmp_ult_v8i16:
123; CHECK:       @ %bb.0: @ %entry
124; CHECK-NEXT:    uxth r0, r0
125; CHECK-NEXT:    vctp.16 r0
126; CHECK-NEXT:    vpst
127; CHECK-NEXT:    vmovt q1, q0
128; CHECK-NEXT:    vmov q0, q1
129; CHECK-NEXT:    bx lr
130entry:
131  %i = insertelement <8 x i16> undef, i16 %n, i32 0
132  %ns = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
133  %c = icmp ult <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %ns
134  %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
135  ret <8 x i16> %s
136}
137
138define arm_aapcs_vfpcc <8 x i16> @vcmp_uge_v8i16(i16 %n, <8 x i16> %a, <8 x i16> %b) {
139; CHECK-LABEL: vcmp_uge_v8i16:
140; CHECK:       @ %bb.0: @ %entry
141; CHECK-NEXT:    uxth r0, r0
142; CHECK-NEXT:    vctp.16 r0
143; CHECK-NEXT:    vpst
144; CHECK-NEXT:    vmovt q1, q0
145; CHECK-NEXT:    vmov q0, q1
146; CHECK-NEXT:    bx lr
147entry:
148  %i = insertelement <8 x i16> undef, i16 %n, i32 0
149  %ns = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
150  %c = icmp uge <8 x i16> %ns, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
151  %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
152  ret <8 x i16> %s
153}
154
155
156define arm_aapcs_vfpcc <16 x i8> @vcmp_ult_v16i8(i8 %n, <16 x i8> %a, <16 x i8> %b) {
157; CHECK-LABEL: vcmp_ult_v16i8:
158; CHECK:       @ %bb.0: @ %entry
159; CHECK-NEXT:    uxtb r0, r0
160; CHECK-NEXT:    vctp.8 r0
161; CHECK-NEXT:    vpst
162; CHECK-NEXT:    vmovt q1, q0
163; CHECK-NEXT:    vmov q0, q1
164; CHECK-NEXT:    bx lr
165entry:
166  %i = insertelement <16 x i8> undef, i8 %n, i32 0
167  %ns = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
168  %c = icmp ult <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %ns
169  %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
170  ret <16 x i8> %s
171}
172
173define arm_aapcs_vfpcc <16 x i8> @vcmp_uge_v16i8(i8 %n, <16 x i8> %a, <16 x i8> %b) {
174; CHECK-LABEL: vcmp_uge_v16i8:
175; CHECK:       @ %bb.0: @ %entry
176; CHECK-NEXT:    uxtb r0, r0
177; CHECK-NEXT:    vctp.8 r0
178; CHECK-NEXT:    vpst
179; CHECK-NEXT:    vmovt q1, q0
180; CHECK-NEXT:    vmov q0, q1
181; CHECK-NEXT:    bx lr
182entry:
183  %i = insertelement <16 x i8> undef, i8 %n, i32 0
184  %ns = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
185  %c = icmp uge <16 x i8> %ns, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
186  %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
187  ret <16 x i8> %s
188}
189
190
191define arm_aapcs_vfpcc <2 x i64> @vcmp_ult_v2i64(i64 %n, <2 x i64> %a, <2 x i64> %b) {
192; CHECK-LABEL: vcmp_ult_v2i64:
193; CHECK:       @ %bb.0: @ %entry
194; CHECK-NEXT:    vctp.64 r0
195; CHECK-NEXT:    vpst
196; CHECK-NEXT:    vmovt q1, q0
197; CHECK-NEXT:    vmov q0, q1
198; CHECK-NEXT:    bx lr
199entry:
200  %i = insertelement <2 x i64> undef, i64 %n, i32 0
201  %ns = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
202  %c = icmp ult <2 x i64> <i64 0, i64 1>, %ns
203  %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b
204  ret <2 x i64> %s
205}
206
207define arm_aapcs_vfpcc <2 x i64> @vcmp_uge_v2i64(i64 %n, <2 x i64> %a, <2 x i64> %b) {
208; CHECK-LABEL: vcmp_uge_v2i64:
209; CHECK:       @ %bb.0: @ %entry
210; CHECK-NEXT:    vctp.64 r0
211; CHECK-NEXT:    vpst
212; CHECK-NEXT:    vmovt q1, q0
213; CHECK-NEXT:    vmov q0, q1
214; CHECK-NEXT:    bx lr
215entry:
216  %i = insertelement <2 x i64> undef, i64 %n, i32 0
217  %ns = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
218  %c = icmp uge <2 x i64> %ns, <i64 0, i64 1>
219  %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b
220  ret <2 x i64> %s
221}
222
223
224declare <16 x i1> @llvm.arm.mve.vctp8(i32)
225declare <8 x i1> @llvm.arm.mve.vctp16(i32)
226declare <4 x i1> @llvm.arm.mve.vctp32(i32)
227declare <2 x i1> @llvm.arm.mve.vctp64(i32)
228