xref: /llvm-project/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=riscv32 -mattr=+v < %s | FileCheck %s -check-prefix=RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s -check-prefix=RV64
4
5; WITH VSCALE RANGE
6
7define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
8; RV32-LABEL: ctz_nxv4i32:
9; RV32:       # %bb.0:
10; RV32-NEXT:    csrr a0, vlenb
11; RV32-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
12; RV32-NEXT:    vid.v v10
13; RV32-NEXT:    li a1, -1
14; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
15; RV32-NEXT:    vmsne.vi v0, v8, 0
16; RV32-NEXT:    srli a0, a0, 1
17; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
18; RV32-NEXT:    vmv.v.x v8, a0
19; RV32-NEXT:    vmadd.vx v10, a1, v8
20; RV32-NEXT:    vmv.v.i v8, 0
21; RV32-NEXT:    vmerge.vvm v8, v8, v10, v0
22; RV32-NEXT:    vredmaxu.vs v8, v8, v8
23; RV32-NEXT:    vmv.x.s a1, v8
24; RV32-NEXT:    sub a0, a0, a1
25; RV32-NEXT:    slli a0, a0, 16
26; RV32-NEXT:    srli a0, a0, 16
27; RV32-NEXT:    ret
28;
29; RV64-LABEL: ctz_nxv4i32:
30; RV64:       # %bb.0:
31; RV64-NEXT:    csrr a0, vlenb
32; RV64-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
33; RV64-NEXT:    vid.v v10
34; RV64-NEXT:    li a1, -1
35; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
36; RV64-NEXT:    vmsne.vi v0, v8, 0
37; RV64-NEXT:    srli a0, a0, 1
38; RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
39; RV64-NEXT:    vmv.v.x v8, a0
40; RV64-NEXT:    vmadd.vx v10, a1, v8
41; RV64-NEXT:    vmv.v.i v8, 0
42; RV64-NEXT:    vmerge.vvm v8, v8, v10, v0
43; RV64-NEXT:    vredmaxu.vs v8, v8, v8
44; RV64-NEXT:    vmv.x.s a1, v8
45; RV64-NEXT:    subw a0, a0, a1
46; RV64-NEXT:    slli a0, a0, 48
47; RV64-NEXT:    srli a0, a0, 48
48; RV64-NEXT:    ret
49  %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32> %a, i1 0)
50  ret i32 %res
51}
52
53; NO VSCALE RANGE
54
55define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
56; RV32-LABEL: ctz_nxv8i1_no_range:
57; RV32:       # %bb.0:
58; RV32-NEXT:    addi sp, sp, -48
59; RV32-NEXT:    .cfi_def_cfa_offset 48
60; RV32-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
61; RV32-NEXT:    .cfi_offset ra, -4
62; RV32-NEXT:    csrr a0, vlenb
63; RV32-NEXT:    slli a0, a0, 1
64; RV32-NEXT:    sub sp, sp, a0
65; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
66; RV32-NEXT:    addi a0, sp, 32
67; RV32-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
68; RV32-NEXT:    csrr a0, vlenb
69; RV32-NEXT:    srli a0, a0, 3
70; RV32-NEXT:    li a2, 8
71; RV32-NEXT:    li a1, 0
72; RV32-NEXT:    li a3, 0
73; RV32-NEXT:    call __muldi3
74; RV32-NEXT:    sw a0, 16(sp)
75; RV32-NEXT:    sw a1, 20(sp)
76; RV32-NEXT:    addi a2, sp, 16
77; RV32-NEXT:    vsetvli a3, zero, e64, m8, ta, ma
78; RV32-NEXT:    vlse64.v v16, (a2), zero
79; RV32-NEXT:    vid.v v8
80; RV32-NEXT:    li a2, -1
81; RV32-NEXT:    addi a3, sp, 32
82; RV32-NEXT:    vl2r.v v24, (a3) # Unknown-size Folded Reload
83; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
84; RV32-NEXT:    vmsne.vi v0, v24, 0
85; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
86; RV32-NEXT:    vmadd.vx v8, a2, v16
87; RV32-NEXT:    vmv.v.i v16, 0
88; RV32-NEXT:    li a2, 32
89; RV32-NEXT:    vmerge.vim v16, v16, -1, v0
90; RV32-NEXT:    vand.vv v8, v8, v16
91; RV32-NEXT:    vredmaxu.vs v8, v8, v8
92; RV32-NEXT:    vmv.x.s a3, v8
93; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
94; RV32-NEXT:    vsrl.vx v8, v8, a2
95; RV32-NEXT:    sltu a2, a0, a3
96; RV32-NEXT:    vmv.x.s a4, v8
97; RV32-NEXT:    sub a1, a1, a4
98; RV32-NEXT:    sub a1, a1, a2
99; RV32-NEXT:    sub a0, a0, a3
100; RV32-NEXT:    csrr a2, vlenb
101; RV32-NEXT:    slli a2, a2, 1
102; RV32-NEXT:    add sp, sp, a2
103; RV32-NEXT:    .cfi_def_cfa sp, 48
104; RV32-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
105; RV32-NEXT:    .cfi_restore ra
106; RV32-NEXT:    addi sp, sp, 48
107; RV32-NEXT:    .cfi_def_cfa_offset 0
108; RV32-NEXT:    ret
109;
110; RV64-LABEL: ctz_nxv8i1_no_range:
111; RV64:       # %bb.0:
112; RV64-NEXT:    csrr a0, vlenb
113; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
114; RV64-NEXT:    vid.v v16
115; RV64-NEXT:    li a1, -1
116; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
117; RV64-NEXT:    vmsne.vi v0, v8, 0
118; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
119; RV64-NEXT:    vmv.v.x v8, a0
120; RV64-NEXT:    vmadd.vx v16, a1, v8
121; RV64-NEXT:    vmv.v.i v8, 0
122; RV64-NEXT:    vmerge.vvm v8, v8, v16, v0
123; RV64-NEXT:    vredmaxu.vs v8, v8, v8
124; RV64-NEXT:    vmv.x.s a1, v8
125; RV64-NEXT:    sub a0, a0, a1
126; RV64-NEXT:    ret
127  %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i16(<vscale x 8 x i16> %a, i1 0)
128  ret i64 %res
129}
130
131define i32 @ctz_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
132; RV32-LABEL: ctz_nxv16i1:
133; RV32:       # %bb.0:
134; RV32-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
135; RV32-NEXT:    vfirst.m a0, v8
136; RV32-NEXT:    bgez a0, .LBB2_2
137; RV32-NEXT:  # %bb.1:
138; RV32-NEXT:    csrr a0, vlenb
139; RV32-NEXT:    slli a0, a0, 1
140; RV32-NEXT:  .LBB2_2:
141; RV32-NEXT:    ret
142;
143; RV64-LABEL: ctz_nxv16i1:
144; RV64:       # %bb.0:
145; RV64-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
146; RV64-NEXT:    vfirst.m a0, v8
147; RV64-NEXT:    bgez a0, .LBB2_2
148; RV64-NEXT:  # %bb.1:
149; RV64-NEXT:    csrr a0, vlenb
150; RV64-NEXT:    slli a0, a0, 1
151; RV64-NEXT:  .LBB2_2:
152; RV64-NEXT:    ret
153  %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0)
154  ret i32 %res
155}
156
157define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
158; RV32-LABEL: ctz_nxv16i1_poison:
159; RV32:       # %bb.0:
160; RV32-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
161; RV32-NEXT:    vfirst.m a0, v8
162; RV32-NEXT:    ret
163;
164; RV64-LABEL: ctz_nxv16i1_poison:
165; RV64:       # %bb.0:
166; RV64-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
167; RV64-NEXT:    vfirst.m a0, v8
168; RV64-NEXT:    ret
169  %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1)
170  ret i32 %res
171}
172
173define i32 @ctz_v16i1(<16 x i1> %pg, <16 x i1> %a) {
174; RV32-LABEL: ctz_v16i1:
175; RV32:       # %bb.0:
176; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
177; RV32-NEXT:    vfirst.m a0, v8
178; RV32-NEXT:    bgez a0, .LBB4_2
179; RV32-NEXT:  # %bb.1:
180; RV32-NEXT:    li a0, 16
181; RV32-NEXT:  .LBB4_2:
182; RV32-NEXT:    ret
183;
184; RV64-LABEL: ctz_v16i1:
185; RV64:       # %bb.0:
186; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
187; RV64-NEXT:    vfirst.m a0, v8
188; RV64-NEXT:    bgez a0, .LBB4_2
189; RV64-NEXT:  # %bb.1:
190; RV64-NEXT:    li a0, 16
191; RV64-NEXT:  .LBB4_2:
192; RV64-NEXT:    ret
193  %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
194  ret i32 %res
195}
196
197define i32 @ctz_v16i1_poison(<16 x i1> %pg, <16 x i1> %a) {
198; RV32-LABEL: ctz_v16i1_poison:
199; RV32:       # %bb.0:
200; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
201; RV32-NEXT:    vfirst.m a0, v8
202; RV32-NEXT:    ret
203;
204; RV64-LABEL: ctz_v16i1_poison:
205; RV64:       # %bb.0:
206; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
207; RV64-NEXT:    vfirst.m a0, v8
208; RV64-NEXT:    ret
209  %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
210  ret i32 %res
211}
212
213define i16 @ctz_v8i1_i16_ret(<8 x i1> %a) {
214; RV32-LABEL: ctz_v8i1_i16_ret:
215; RV32:       # %bb.0:
216; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
217; RV32-NEXT:    vfirst.m a0, v0
218; RV32-NEXT:    bgez a0, .LBB6_2
219; RV32-NEXT:  # %bb.1:
220; RV32-NEXT:    li a0, 8
221; RV32-NEXT:  .LBB6_2:
222; RV32-NEXT:    ret
223;
224; RV64-LABEL: ctz_v8i1_i16_ret:
225; RV64:       # %bb.0:
226; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
227; RV64-NEXT:    vfirst.m a0, v0
228; RV64-NEXT:    bgez a0, .LBB6_2
229; RV64-NEXT:  # %bb.1:
230; RV64-NEXT:    li a0, 8
231; RV64-NEXT:  .LBB6_2:
232; RV64-NEXT:    ret
233  %res = call i16 @llvm.experimental.cttz.elts.i16.v8i1(<8 x i1> %a, i1 0)
234  ret i16 %res
235}
236
237declare i64 @llvm.experimental.cttz.elts.i64.nxv8i16(<vscale x 8 x i16>, i1)
238declare i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1>, i1)
239declare i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32>, i1)
240declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1)
241declare i16 @llvm.experimental.cttz.elts.i16.v16i1(<8 x i1>, i1)
242
243attributes #0 = { vscale_range(2,1024) }
244