xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-bit-counting.ll (revision 672f673004663aeb15ece1af4b5b219994924167)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6;
7; CLZ
8;
9
10define <vscale x 16 x i8> @ctlz_b(<vscale x 16 x i8> %a) #0 {
11; CHECK-LABEL: ctlz_b:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    ptrue p0.b
14; CHECK-NEXT:    clz z0.b, p0/m, z0.b
15; CHECK-NEXT:    ret
16
17  %res = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %a)
18  ret <vscale x 16 x i8> %res
19}
20
21define <vscale x 8 x i16> @ctlz_h(<vscale x 8 x i16> %a) #0 {
22; CHECK-LABEL: ctlz_h:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    ptrue p0.h
25; CHECK-NEXT:    clz z0.h, p0/m, z0.h
26; CHECK-NEXT:    ret
27
28  %res = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %a)
29  ret <vscale x 8 x i16> %res
30}
31
32define <vscale x 4 x i32> @ctlz_s(<vscale x 4 x i32> %a) #0 {
33; CHECK-LABEL: ctlz_s:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    ptrue p0.s
36; CHECK-NEXT:    clz z0.s, p0/m, z0.s
37; CHECK-NEXT:    ret
38
39  %res = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %a)
40  ret <vscale x 4 x i32> %res
41}
42
43define <vscale x 2 x i64> @ctlz_d(<vscale x 2 x i64> %a) #0 {
44; CHECK-LABEL: ctlz_d:
45; CHECK:       // %bb.0:
46; CHECK-NEXT:    ptrue p0.d
47; CHECK-NEXT:    clz z0.d, p0/m, z0.d
48; CHECK-NEXT:    ret
49
50  %res = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %a)
51  ret <vscale x 2 x i64> %res
52}
53
54;
55; CNT
56;
57
58define <vscale x 16 x i8> @ctpop_b(<vscale x 16 x i8> %a) #0 {
59; CHECK-LABEL: ctpop_b:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    ptrue p0.b
62; CHECK-NEXT:    cnt z0.b, p0/m, z0.b
63; CHECK-NEXT:    ret
64
65  %res = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> %a)
66  ret <vscale x 16 x i8> %res
67}
68
69define <vscale x 8 x i16> @ctpop_h(<vscale x 8 x i16> %a) #0 {
70; CHECK-LABEL: ctpop_h:
71; CHECK:       // %bb.0:
72; CHECK-NEXT:    ptrue p0.h
73; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
74; CHECK-NEXT:    ret
75
76  %res = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> %a)
77  ret <vscale x 8 x i16> %res
78}
79
80define <vscale x 4 x i32> @ctpop_s(<vscale x 4 x i32> %a) #0 {
81; CHECK-LABEL: ctpop_s:
82; CHECK:       // %bb.0:
83; CHECK-NEXT:    ptrue p0.s
84; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
85; CHECK-NEXT:    ret
86
87  %res = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> %a)
88  ret <vscale x 4 x i32> %res
89}
90
91define <vscale x 2 x i64> @ctpop_d(<vscale x 2 x i64> %a) #0 {
92; CHECK-LABEL: ctpop_d:
93; CHECK:       // %bb.0:
94; CHECK-NEXT:    ptrue p0.d
95; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
96; CHECK-NEXT:    ret
97
98  %res = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> %a)
99  ret <vscale x 2 x i64> %res
100}
101
102;
103; Count trailing zeros
104;
105
106define <vscale x 16 x i8> @cttz_b(<vscale x 16 x i8> %a) #0 {
107; CHECK-LABEL: cttz_b:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    ptrue p0.b
110; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
111; CHECK-NEXT:    clz z0.b, p0/m, z0.b
112; CHECK-NEXT:    ret
113
114  %res = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> %a)
115  ret <vscale x 16 x i8> %res
116}
117
118define <vscale x 8 x i16> @cttz_h(<vscale x 8 x i16> %a) #0 {
119; CHECK-LABEL: cttz_h:
120; CHECK:       // %bb.0:
121; CHECK-NEXT:    ptrue p0.h
122; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
123; CHECK-NEXT:    clz z0.h, p0/m, z0.h
124; CHECK-NEXT:    ret
125
126  %res = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> %a)
127  ret <vscale x 8 x i16> %res
128}
129
130define <vscale x 4 x i32> @cttz_s(<vscale x 4 x i32> %a) #0 {
131; CHECK-LABEL: cttz_s:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    ptrue p0.s
134; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
135; CHECK-NEXT:    clz z0.s, p0/m, z0.s
136; CHECK-NEXT:    ret
137
138  %res = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %a)
139  ret <vscale x 4 x i32> %res
140}
141
142define <vscale x 2 x i64> @cttz_d(<vscale x 2 x i64> %a) #0 {
143; CHECK-LABEL: cttz_d:
144; CHECK:       // %bb.0:
145; CHECK-NEXT:    ptrue p0.d
146; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
147; CHECK-NEXT:    clz z0.d, p0/m, z0.d
148; CHECK-NEXT:    ret
149
150  %res = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> %a)
151  ret <vscale x 2 x i64> %res
152}
153
154attributes #0 = { "target-features"="+sve" }
155
156declare <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8>)
157declare <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16>)
158declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>)
159declare <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64>)
160
161declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>)
162declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
163declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
164declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
165
166declare <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8>)
167declare <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16>)
168declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>)
169declare <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64>)
170