xref: /llvm-project/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts.ll (revision 3b786f2c7608964b4481b9fcd24ab29c7c42243d)
1; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
2
3; FIXED WIDTH
4
5define i8 @ctz_v8i1(<8 x i1> %a) {
6; CHECK-LABEL: .LCPI0_0:
7; CHECK-NEXT:   .byte 8
8; CHECK-NEXT:   .byte 7
9; CHECK-NEXT:   .byte 6
10; CHECK-NEXT:   .byte 5
11; CHECK-NEXT:   .byte 4
12; CHECK-NEXT:   .byte 3
13; CHECK-NEXT:   .byte 2
14; CHECK-NEXT:   .byte 1
15; CHECK-LABEL: ctz_v8i1:
16; CHECK:       // %bb.0:
17; CHECK-NEXT:    shl v0.8b, v0.8b, #7
18; CHECK-NEXT:    adrp x8, .LCPI0_0
19; CHECK-NEXT:    mov w9, #8 // =0x8
20; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
21; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
22; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
23; CHECK-NEXT:    umaxv b0, v0.8b
24; CHECK-NEXT:    fmov w8, s0
25; CHECK-NEXT:    sub w0, w9, w8
26; CHECK-NEXT:    ret
27  %res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 0)
28  ret i8 %res
29}
30
31define i32 @ctz_v16i1(<16 x i1> %a) {
32; CHECK-LABEL: .LCPI1_0:
33; CHECK-NEXT:   .byte 16
34; CHECK-NEXT:   .byte 15
35; CHECK-NEXT:   .byte 14
36; CHECK-NEXT:   .byte 13
37; CHECK-NEXT:   .byte 12
38; CHECK-NEXT:   .byte 11
39; CHECK-NEXT:   .byte 10
40; CHECK-NEXT:   .byte 9
41; CHECK-NEXT:   .byte 8
42; CHECK-NEXT:   .byte 7
43; CHECK-NEXT:   .byte 6
44; CHECK-NEXT:   .byte 5
45; CHECK-NEXT:   .byte 4
46; CHECK-NEXT:   .byte 3
47; CHECK-NEXT:   .byte 2
48; CHECK-NEXT:   .byte 1
49; CHECK-LABEL: ctz_v16i1:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    shl v0.16b, v0.16b, #7
52; CHECK-NEXT:    adrp x8, .LCPI1_0
53; CHECK-NEXT:    mov w9, #16 // =0x10
54; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
55; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
56; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
57; CHECK-NEXT:    umaxv b0, v0.16b
58; CHECK-NEXT:    fmov w8, s0
59; CHECK-NEXT:    sub w8, w9, w8
60; CHECK-NEXT:    and w0, w8, #0xff
61; CHECK-NEXT:    ret
62  %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
63  ret i32 %res
64}
65
66define i16 @ctz_v4i32(<4 x i32> %a) {
67; CHECK-LABEL: .LCPI2_0:
68; CHECK-NEXT:   .hword 4
69; CHECK-NEXT:   .hword 3
70; CHECK-NEXT:   .hword 2
71; CHECK-NEXT:   .hword 1
72; CHECK-LABEL: ctz_v4i32:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    cmtst v0.4s, v0.4s, v0.4s
75; CHECK-NEXT:    adrp x8, .LCPI2_0
76; CHECK-NEXT:    mov w9, #4 // =0x4
77; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI2_0]
78; CHECK-NEXT:    xtn v0.4h, v0.4s
79; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
80; CHECK-NEXT:    umaxv h0, v0.4h
81; CHECK-NEXT:    fmov w8, s0
82; CHECK-NEXT:    sub w8, w9, w8
83; CHECK-NEXT:    and w0, w8, #0xff
84; CHECK-NEXT:    ret
85  %res = call i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32> %a, i1 0)
86  ret i16 %res
87}
88
89define i7 @ctz_i7_v8i1(<8 x i1> %a) {
90; CHECK-LABEL: .LCPI3_0:
91; CHECK-NEXT:   .byte 8
92; CHECK-NEXT:   .byte 7
93; CHECK-NEXT:   .byte 6
94; CHECK-NEXT:   .byte 5
95; CHECK-NEXT:   .byte 4
96; CHECK-NEXT:   .byte 3
97; CHECK-NEXT:   .byte 2
98; CHECK-NEXT:   .byte 1
99; CHECK-LABEL: ctz_i7_v8i1:
100; CHECK:       // %bb.0:
101; CHECK-NEXT:    shl v0.8b, v0.8b, #7
102; CHECK-NEXT:    adrp x8, .LCPI3_0
103; CHECK-NEXT:    mov w9, #8 // =0x8
104; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI3_0]
105; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
106; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
107; CHECK-NEXT:    umaxv b0, v0.8b
108; CHECK-NEXT:    fmov w8, s0
109; CHECK-NEXT:    sub w0, w9, w8
110; CHECK-NEXT:    ret
111  %res = call i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1> %a, i1 0)
112  ret i7 %res
113}
114
115; ZERO IS POISON
116
117define i8 @ctz_v8i1_poison(<8 x i1> %a) {
118; CHECK-LABEL: .LCPI4_0:
119; CHECK-NEXT:   .byte 8
120; CHECK-NEXT:   .byte 7
121; CHECK-NEXT:   .byte 6
122; CHECK-NEXT:   .byte 5
123; CHECK-NEXT:   .byte 4
124; CHECK-NEXT:   .byte 3
125; CHECK-NEXT:   .byte 2
126; CHECK-NEXT:   .byte 1
127; CHECK-LABEL: ctz_v8i1_poison:
128; CHECK:       // %bb.0:
129; CHECK-NEXT:    shl v0.8b, v0.8b, #7
130; CHECK-NEXT:    adrp x8, .LCPI4_0
131; CHECK-NEXT:    mov w9, #8 // =0x8
132; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI4_0]
133; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
134; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
135; CHECK-NEXT:    umaxv b0, v0.8b
136; CHECK-NEXT:    fmov w8, s0
137; CHECK-NEXT:    sub w0, w9, w8
138; CHECK-NEXT:    ret
139  %res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 1)
140  ret i8 %res
141}
142
143declare i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1>, i1)
144declare i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1>, i1)
145declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1)
146declare i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32>, i1)
147