xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll (revision 62baf21daa377c4ec1a641b26931063c1117d262)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
4
5;
6; CLS
7;
8
9define <vscale x 16 x i8> @cls_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
10; CHECK-LABEL: cls_i8:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    cls z0.b, p0/m, z1.b
13; CHECK-NEXT:    ret
14  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> %a,
15                                                               <vscale x 16 x i1> %pg,
16                                                               <vscale x 16 x i8> %b)
17  ret <vscale x 16 x i8> %out
18}
19
20define <vscale x 8 x i16> @cls_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
21; CHECK-LABEL: cls_i16:
22; CHECK:       // %bb.0:
23; CHECK-NEXT:    cls z0.h, p0/m, z1.h
24; CHECK-NEXT:    ret
25  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> %a,
26                                                               <vscale x 8 x i1> %pg,
27                                                               <vscale x 8 x i16> %b)
28  ret <vscale x 8 x i16> %out
29}
30
31define <vscale x 4 x i32> @cls_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
32; CHECK-LABEL: cls_i32:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    cls z0.s, p0/m, z1.s
35; CHECK-NEXT:    ret
36  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> %a,
37                                                               <vscale x 4 x i1> %pg,
38                                                               <vscale x 4 x i32> %b)
39  ret <vscale x 4 x i32> %out
40}
41
42define <vscale x 2 x i64> @cls_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
43; CHECK-LABEL: cls_i64:
44; CHECK:       // %bb.0:
45; CHECK-NEXT:    cls z0.d, p0/m, z1.d
46; CHECK-NEXT:    ret
47  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %a,
48                                                               <vscale x 2 x i1> %pg,
49                                                               <vscale x 2 x i64> %b)
50  ret <vscale x 2 x i64> %out
51}
52
53;
54; CLZ
55;
56
57define <vscale x 16 x i8> @clz_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
58; CHECK-LABEL: clz_i8:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    clz z0.b, p0/m, z1.b
61; CHECK-NEXT:    ret
62  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> %a,
63                                                               <vscale x 16 x i1> %pg,
64                                                               <vscale x 16 x i8> %b)
65  ret <vscale x 16 x i8> %out
66}
67
68define <vscale x 8 x i16> @clz_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
69; CHECK-LABEL: clz_i16:
70; CHECK:       // %bb.0:
71; CHECK-NEXT:    clz z0.h, p0/m, z1.h
72; CHECK-NEXT:    ret
73  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> %a,
74                                                               <vscale x 8 x i1> %pg,
75                                                               <vscale x 8 x i16> %b)
76  ret <vscale x 8 x i16> %out
77}
78
79define <vscale x 4 x i32> @clz_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
80; CHECK-LABEL: clz_i32:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    clz z0.s, p0/m, z1.s
83; CHECK-NEXT:    ret
84  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> %a,
85                                                               <vscale x 4 x i1> %pg,
86                                                               <vscale x 4 x i32> %b)
87  ret <vscale x 4 x i32> %out
88}
89
90define <vscale x 2 x i64> @clz_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
91; CHECK-LABEL: clz_i64:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    clz z0.d, p0/m, z1.d
94; CHECK-NEXT:    ret
95  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> %a,
96                                                               <vscale x 2 x i1> %pg,
97                                                               <vscale x 2 x i64> %b)
98  ret <vscale x 2 x i64> %out
99}
100
101;
102; CNT
103;
104
105define <vscale x 16 x i8> @cnt_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
106; CHECK-LABEL: cnt_i8:
107; CHECK:       // %bb.0:
108; CHECK-NEXT:    cnt z0.b, p0/m, z1.b
109; CHECK-NEXT:    ret
110  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> %a,
111                                                               <vscale x 16 x i1> %pg,
112                                                               <vscale x 16 x i8> %b)
113  ret <vscale x 16 x i8> %out
114}
115
116define <vscale x 8 x i16> @cnt_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
117; CHECK-LABEL: cnt_i16:
118; CHECK:       // %bb.0:
119; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
120; CHECK-NEXT:    ret
121  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> %a,
122                                                               <vscale x 8 x i1> %pg,
123                                                               <vscale x 8 x i16> %b)
124  ret <vscale x 8 x i16> %out
125}
126
127define <vscale x 4 x i32> @cnt_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
128; CHECK-LABEL: cnt_i32:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
131; CHECK-NEXT:    ret
132  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> %a,
133                                                               <vscale x 4 x i1> %pg,
134                                                               <vscale x 4 x i32> %b)
135  ret <vscale x 4 x i32> %out
136}
137
138define <vscale x 2 x i64> @cnt_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
139; CHECK-LABEL: cnt_i64:
140; CHECK:       // %bb.0:
141; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
142; CHECK-NEXT:    ret
143  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> %a,
144                                                               <vscale x 2 x i1> %pg,
145                                                               <vscale x 2 x i64> %b)
146  ret <vscale x 2 x i64> %out
147}
148
149define <vscale x 8 x i16> @cnt_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
150; CHECK-LABEL: cnt_f16:
151; CHECK:       // %bb.0:
152; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
153; CHECK-NEXT:    ret
154  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> %a,
155                                                               <vscale x 8 x i1> %pg,
156                                                               <vscale x 8 x half> %b)
157  ret <vscale x 8 x i16> %out
158}
159
160define <vscale x 8 x i16> @cnt_bf16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %b) #0 {
161; CHECK-LABEL: cnt_bf16:
162; CHECK:       // %bb.0:
163; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
164; CHECK-NEXT:    ret
165  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> %a,
166                                                                <vscale x 8 x i1> %pg,
167                                                                <vscale x 8 x bfloat> %b)
168  ret <vscale x 8 x i16> %out
169}
170
171define <vscale x 4 x i32> @cnt_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
172; CHECK-LABEL: cnt_f32:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
175; CHECK-NEXT:    ret
176  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> %a,
177                                                               <vscale x 4 x i1> %pg,
178                                                               <vscale x 4 x float> %b)
179  ret <vscale x 4 x i32> %out
180}
181
182define <vscale x 2 x i64> @cnt_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
183; CHECK-LABEL: cnt_f64:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
186; CHECK-NEXT:    ret
187  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> %a,
188                                                               <vscale x 2 x i1> %pg,
189                                                               <vscale x 2 x double> %b)
190  ret <vscale x 2 x i64> %out
191}
192
193declare <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
194declare <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
195declare <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
196declare <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
197
198declare <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
199declare <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
200declare <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
201declare <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
202
203declare <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
204declare <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
205declare <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
206declare <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
207declare <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x half>)
208declare <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
209declare <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x float>)
210declare <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x double>)
211
212; +bf16 is required for the bfloat version.
213attributes #0 = { "target-features"="+bf16" }
214