xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-vclz.ll (revision 28064bfad12cfce959d74fa6d099312e19703f26)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
6; CHECK-LABEL: test_vclz_u8:
7; CHECK:       // %bb.0:
8; CHECK-NEXT:    clz.8b v0, v0
9; CHECK-NEXT:    ret
10  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind
11  ret <8 x i8> %vclz.i
12}
13
14define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
15; CHECK-LABEL: test_vclz_s8:
16; CHECK:       // %bb.0:
17; CHECK-NEXT:    clz.8b v0, v0
18; CHECK-NEXT:    ret
19  %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind
20  ret <8 x i8> %vclz.i
21}
22
23define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
24; CHECK-LABEL: test_vclz_u16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    clz.4h v0, v0
27; CHECK-NEXT:    ret
28  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind
29  ret <4 x i16> %vclz1.i
30}
31
32define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
33; CHECK-LABEL: test_vclz_s16:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    clz.4h v0, v0
36; CHECK-NEXT:    ret
37  %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind
38  ret <4 x i16> %vclz1.i
39}
40
41define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
42; CHECK-LABEL: test_vclz_u32:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    clz.2s v0, v0
45; CHECK-NEXT:    ret
46  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind
47  ret <2 x i32> %vclz1.i
48}
49
50define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
51; CHECK-LABEL: test_vclz_s32:
52; CHECK:       // %bb.0:
53; CHECK-NEXT:    clz.2s v0, v0
54; CHECK-NEXT:    ret
55  %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind
56  ret <2 x i32> %vclz1.i
57}
58
59define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp {
60; CHECK-SD-LABEL: test_vclz_u64:
61; CHECK-SD:       // %bb.0:
62; CHECK-SD-NEXT:    ushr d1, d0, #1
63; CHECK-SD-NEXT:    orr.8b v0, v0, v1
64; CHECK-SD-NEXT:    ushr d1, d0, #2
65; CHECK-SD-NEXT:    orr.8b v0, v0, v1
66; CHECK-SD-NEXT:    ushr d1, d0, #4
67; CHECK-SD-NEXT:    orr.8b v0, v0, v1
68; CHECK-SD-NEXT:    ushr d1, d0, #8
69; CHECK-SD-NEXT:    orr.8b v0, v0, v1
70; CHECK-SD-NEXT:    ushr d1, d0, #16
71; CHECK-SD-NEXT:    orr.8b v0, v0, v1
72; CHECK-SD-NEXT:    ushr d1, d0, #32
73; CHECK-SD-NEXT:    orr.8b v0, v0, v1
74; CHECK-SD-NEXT:    mvn.8b v0, v0
75; CHECK-SD-NEXT:    cnt.8b v0, v0
76; CHECK-SD-NEXT:    uaddlp.4h v0, v0
77; CHECK-SD-NEXT:    uaddlp.2s v0, v0
78; CHECK-SD-NEXT:    uaddlp.1d v0, v0
79; CHECK-SD-NEXT:    ret
80;
81; CHECK-GI-LABEL: test_vclz_u64:
82; CHECK-GI:       // %bb.0:
83; CHECK-GI-NEXT:    fmov x8, d0
84; CHECK-GI-NEXT:    clz x8, x8
85; CHECK-GI-NEXT:    fmov d0, x8
86; CHECK-GI-NEXT:    ret
87  %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
88  ret <1 x i64> %vclz1.i
89}
90
91define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp {
92; CHECK-SD-LABEL: test_vclz_s64:
93; CHECK-SD:       // %bb.0:
94; CHECK-SD-NEXT:    ushr d1, d0, #1
95; CHECK-SD-NEXT:    orr.8b v0, v0, v1
96; CHECK-SD-NEXT:    ushr d1, d0, #2
97; CHECK-SD-NEXT:    orr.8b v0, v0, v1
98; CHECK-SD-NEXT:    ushr d1, d0, #4
99; CHECK-SD-NEXT:    orr.8b v0, v0, v1
100; CHECK-SD-NEXT:    ushr d1, d0, #8
101; CHECK-SD-NEXT:    orr.8b v0, v0, v1
102; CHECK-SD-NEXT:    ushr d1, d0, #16
103; CHECK-SD-NEXT:    orr.8b v0, v0, v1
104; CHECK-SD-NEXT:    ushr d1, d0, #32
105; CHECK-SD-NEXT:    orr.8b v0, v0, v1
106; CHECK-SD-NEXT:    mvn.8b v0, v0
107; CHECK-SD-NEXT:    cnt.8b v0, v0
108; CHECK-SD-NEXT:    uaddlp.4h v0, v0
109; CHECK-SD-NEXT:    uaddlp.2s v0, v0
110; CHECK-SD-NEXT:    uaddlp.1d v0, v0
111; CHECK-SD-NEXT:    ret
112;
113; CHECK-GI-LABEL: test_vclz_s64:
114; CHECK-GI:       // %bb.0:
115; CHECK-GI-NEXT:    fmov x8, d0
116; CHECK-GI-NEXT:    clz x8, x8
117; CHECK-GI-NEXT:    fmov d0, x8
118; CHECK-GI-NEXT:    ret
119  %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
120  ret <1 x i64> %vclz1.i
121}
122
123define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
124; CHECK-LABEL: test_vclzq_u8:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    clz.16b v0, v0
127; CHECK-NEXT:    ret
128  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind
129  ret <16 x i8> %vclz.i
130}
131
132define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
133; CHECK-LABEL: test_vclzq_s8:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    clz.16b v0, v0
136; CHECK-NEXT:    ret
137  %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind
138  ret <16 x i8> %vclz.i
139}
140
141define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
142; CHECK-LABEL: test_vclzq_u16:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    clz.8h v0, v0
145; CHECK-NEXT:    ret
146  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind
147  ret <8 x i16> %vclz1.i
148}
149
150define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
151; CHECK-LABEL: test_vclzq_s16:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    clz.8h v0, v0
154; CHECK-NEXT:    ret
155  %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind
156  ret <8 x i16> %vclz1.i
157}
158
159define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
160; CHECK-LABEL: test_vclzq_u32:
161; CHECK:       // %bb.0:
162; CHECK-NEXT:    clz.4s v0, v0
163; CHECK-NEXT:    ret
164  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind
165  ret <4 x i32> %vclz1.i
166}
167
168define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
169; CHECK-LABEL: test_vclzq_s32:
170; CHECK:       // %bb.0:
171; CHECK-NEXT:    clz.4s v0, v0
172; CHECK-NEXT:    ret
173  %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind
174  ret <4 x i32> %vclz1.i
175}
176
177define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp {
178; CHECK-SD-LABEL: test_vclzq_u64:
179; CHECK-SD:       // %bb.0:
180; CHECK-SD-NEXT:    ushr.2d v1, v0, #1
181; CHECK-SD-NEXT:    orr.16b v0, v0, v1
182; CHECK-SD-NEXT:    ushr.2d v1, v0, #2
183; CHECK-SD-NEXT:    orr.16b v0, v0, v1
184; CHECK-SD-NEXT:    ushr.2d v1, v0, #4
185; CHECK-SD-NEXT:    orr.16b v0, v0, v1
186; CHECK-SD-NEXT:    ushr.2d v1, v0, #8
187; CHECK-SD-NEXT:    orr.16b v0, v0, v1
188; CHECK-SD-NEXT:    ushr.2d v1, v0, #16
189; CHECK-SD-NEXT:    orr.16b v0, v0, v1
190; CHECK-SD-NEXT:    ushr.2d v1, v0, #32
191; CHECK-SD-NEXT:    orr.16b v0, v0, v1
192; CHECK-SD-NEXT:    mvn.16b v0, v0
193; CHECK-SD-NEXT:    cnt.16b v0, v0
194; CHECK-SD-NEXT:    uaddlp.8h v0, v0
195; CHECK-SD-NEXT:    uaddlp.4s v0, v0
196; CHECK-SD-NEXT:    uaddlp.2d v0, v0
197; CHECK-SD-NEXT:    ret
198;
199; CHECK-GI-LABEL: test_vclzq_u64:
200; CHECK-GI:       // %bb.0:
201; CHECK-GI-NEXT:    fmov x8, d0
202; CHECK-GI-NEXT:    mov.d x9, v0[1]
203; CHECK-GI-NEXT:    clz x8, x8
204; CHECK-GI-NEXT:    mov.d v0[0], x8
205; CHECK-GI-NEXT:    clz x8, x9
206; CHECK-GI-NEXT:    mov.d v0[1], x8
207; CHECK-GI-NEXT:    ret
208  %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
209  ret <2 x i64> %vclz1.i
210}
211
212define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp {
213; CHECK-SD-LABEL: test_vclzq_s64:
214; CHECK-SD:       // %bb.0:
215; CHECK-SD-NEXT:    ushr.2d v1, v0, #1
216; CHECK-SD-NEXT:    orr.16b v0, v0, v1
217; CHECK-SD-NEXT:    ushr.2d v1, v0, #2
218; CHECK-SD-NEXT:    orr.16b v0, v0, v1
219; CHECK-SD-NEXT:    ushr.2d v1, v0, #4
220; CHECK-SD-NEXT:    orr.16b v0, v0, v1
221; CHECK-SD-NEXT:    ushr.2d v1, v0, #8
222; CHECK-SD-NEXT:    orr.16b v0, v0, v1
223; CHECK-SD-NEXT:    ushr.2d v1, v0, #16
224; CHECK-SD-NEXT:    orr.16b v0, v0, v1
225; CHECK-SD-NEXT:    ushr.2d v1, v0, #32
226; CHECK-SD-NEXT:    orr.16b v0, v0, v1
227; CHECK-SD-NEXT:    mvn.16b v0, v0
228; CHECK-SD-NEXT:    cnt.16b v0, v0
229; CHECK-SD-NEXT:    uaddlp.8h v0, v0
230; CHECK-SD-NEXT:    uaddlp.4s v0, v0
231; CHECK-SD-NEXT:    uaddlp.2d v0, v0
232; CHECK-SD-NEXT:    ret
233;
234; CHECK-GI-LABEL: test_vclzq_s64:
235; CHECK-GI:       // %bb.0:
236; CHECK-GI-NEXT:    fmov x8, d0
237; CHECK-GI-NEXT:    mov.d x9, v0[1]
238; CHECK-GI-NEXT:    clz x8, x8
239; CHECK-GI-NEXT:    mov.d v0[0], x8
240; CHECK-GI-NEXT:    clz x8, x9
241; CHECK-GI-NEXT:    mov.d v0[1], x8
242; CHECK-GI-NEXT:    ret
243  %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
244  ret <2 x i64> %vclz1.i
245}
246
247declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
248declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
249declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
250declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
251declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>, i1) nounwind readnone
252declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
253declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
254declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
255