xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
3
4define <vscale x 1 x i1> @get_lane_mask(ptr %p, i64 %index, i64 %tc) {
5; CHECK-LABEL: get_lane_mask:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
8; CHECK-NEXT:    vid.v v8
9; CHECK-NEXT:    vsaddu.vx v8, v8, a1
10; CHECK-NEXT:    vmsltu.vx v0, v8, a2
11; CHECK-NEXT:    ret
12  %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 %index, i64 %tc)
13  ret <vscale x 1 x i1> %mask
14}
15
16define <vscale x 1 x i1> @constant_zero_index(ptr %p, i64 %tc) {
17; CHECK-LABEL: constant_zero_index:
18; CHECK:       # %bb.0:
19; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
20; CHECK-NEXT:    vid.v v8
21; CHECK-NEXT:    vmsltu.vx v0, v8, a1
22; CHECK-NEXT:    ret
23  %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 %tc)
24  ret <vscale x 1 x i1> %mask
25}
26
27define <vscale x 1 x i1> @constant_nonzero_index(ptr %p, i64 %tc) {
28; CHECK-LABEL: constant_nonzero_index:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
31; CHECK-NEXT:    vid.v v8
32; CHECK-NEXT:    li a0, 24
33; CHECK-NEXT:    vsaddu.vx v8, v8, a0
34; CHECK-NEXT:    vmsltu.vx v0, v8, a1
35; CHECK-NEXT:    ret
36  %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 24, i64 %tc)
37  ret <vscale x 1 x i1> %mask
38}
39
40define <vscale x 1 x i1> @constant_tripcount(ptr %p, i64 %index) {
41; CHECK-LABEL: constant_tripcount:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
44; CHECK-NEXT:    vid.v v8
45; CHECK-NEXT:    vsaddu.vx v8, v8, a1
46; CHECK-NEXT:    li a0, 1024
47; CHECK-NEXT:    vmsltu.vx v0, v8, a0
48; CHECK-NEXT:    ret
49  %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 %index, i64 1024)
50  ret <vscale x 1 x i1> %mask
51}
52
53define <vscale x 1 x i1> @constant_both(ptr %p) {
54; CHECK-LABEL: constant_both:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
57; CHECK-NEXT:    vid.v v8
58; CHECK-NEXT:    li a0, 1024
59; CHECK-NEXT:    vmsltu.vx v0, v8, a0
60; CHECK-NEXT:    ret
61  %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 1024)
62  ret <vscale x 1 x i1> %mask
63}
64
65; Architectural max VLEN=64k, so result is "as-if" TC=1024
66define <vscale x 1 x i1> @above_maxvl(ptr %p) {
67; CHECK-LABEL: above_maxvl:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
70; CHECK-NEXT:    vid.v v8
71; CHECK-NEXT:    li a0, 1
72; CHECK-NEXT:    slli a0, a0, 11
73; CHECK-NEXT:    vmsltu.vx v0, v8, a0
74; CHECK-NEXT:    ret
75  %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 2048)
76  ret <vscale x 1 x i1> %mask
77}
78
79define <2 x i1> @fv2(ptr %p, i64 %index, i64 %tc) {
80; CHECK-LABEL: fv2:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
83; CHECK-NEXT:    vid.v v8
84; CHECK-NEXT:    vsaddu.vx v8, v8, a1
85; CHECK-NEXT:    vmsltu.vx v0, v8, a2
86; CHECK-NEXT:    ret
87  %mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %tc)
88  ret <2 x i1> %mask
89}
90
91define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) {
92; CHECK-LABEL: fv8:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
95; CHECK-NEXT:    vid.v v8
96; CHECK-NEXT:    vsaddu.vx v8, v8, a1
97; CHECK-NEXT:    vmsltu.vx v0, v8, a2
98; CHECK-NEXT:    ret
99  %mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %tc)
100  ret <8 x i1> %mask
101}
102
103define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
104; CHECK-LABEL: fv32:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    lui a0, %hi(.LCPI8_0)
107; CHECK-NEXT:    addi a0, a0, %lo(.LCPI8_0)
108; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
109; CHECK-NEXT:    vle8.v v8, (a0)
110; CHECK-NEXT:    vid.v v16
111; CHECK-NEXT:    vsaddu.vx v16, v16, a1
112; CHECK-NEXT:    vmsltu.vx v0, v16, a2
113; CHECK-NEXT:    vsext.vf8 v16, v8
114; CHECK-NEXT:    vsaddu.vx v8, v16, a1
115; CHECK-NEXT:    vmsltu.vx v16, v8, a2
116; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
117; CHECK-NEXT:    vslideup.vi v0, v16, 2
118; CHECK-NEXT:    ret
119  %mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc)
120  ret <32 x i1> %mask
121}
122
123define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
124; CHECK-LABEL: fv64:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
127; CHECK-NEXT:    vid.v v8
128; CHECK-NEXT:    lui a0, %hi(.LCPI9_0)
129; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_0)
130; CHECK-NEXT:    vle8.v v16, (a0)
131; CHECK-NEXT:    lui a0, %hi(.LCPI9_1)
132; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_1)
133; CHECK-NEXT:    vle8.v v17, (a0)
134; CHECK-NEXT:    lui a0, %hi(.LCPI9_2)
135; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_2)
136; CHECK-NEXT:    vsaddu.vx v8, v8, a1
137; CHECK-NEXT:    vle8.v v18, (a0)
138; CHECK-NEXT:    vmsltu.vx v0, v8, a2
139; CHECK-NEXT:    vsext.vf8 v8, v16
140; CHECK-NEXT:    vsaddu.vx v8, v8, a1
141; CHECK-NEXT:    vmsltu.vx v16, v8, a2
142; CHECK-NEXT:    vsext.vf8 v8, v17
143; CHECK-NEXT:    vsaddu.vx v8, v8, a1
144; CHECK-NEXT:    vmsltu.vx v17, v8, a2
145; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
146; CHECK-NEXT:    vslideup.vi v0, v16, 2
147; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
148; CHECK-NEXT:    vslideup.vi v0, v17, 4
149; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
150; CHECK-NEXT:    vsext.vf8 v8, v18
151; CHECK-NEXT:    vsaddu.vx v8, v8, a1
152; CHECK-NEXT:    vmsltu.vx v16, v8, a2
153; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
154; CHECK-NEXT:    vslideup.vi v0, v16, 6
155; CHECK-NEXT:    ret
156  %mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc)
157  ret <64 x i1> %mask
158}
159
160define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
161; CHECK-LABEL: fv128:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    lui a0, %hi(.LCPI10_0)
164; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_0)
165; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
166; CHECK-NEXT:    vle8.v v16, (a0)
167; CHECK-NEXT:    lui a0, %hi(.LCPI10_1)
168; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_1)
169; CHECK-NEXT:    vle8.v v17, (a0)
170; CHECK-NEXT:    lui a0, %hi(.LCPI10_2)
171; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_2)
172; CHECK-NEXT:    vle8.v v18, (a0)
173; CHECK-NEXT:    lui a0, %hi(.LCPI10_3)
174; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_3)
175; CHECK-NEXT:    vid.v v8
176; CHECK-NEXT:    vle8.v v19, (a0)
177; CHECK-NEXT:    lui a0, %hi(.LCPI10_4)
178; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_4)
179; CHECK-NEXT:    vle8.v v20, (a0)
180; CHECK-NEXT:    lui a0, %hi(.LCPI10_5)
181; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_5)
182; CHECK-NEXT:    vle8.v v21, (a0)
183; CHECK-NEXT:    lui a0, %hi(.LCPI10_6)
184; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_6)
185; CHECK-NEXT:    vsaddu.vx v8, v8, a1
186; CHECK-NEXT:    vle8.v v22, (a0)
187; CHECK-NEXT:    vmsltu.vx v0, v8, a2
188; CHECK-NEXT:    vsext.vf8 v8, v16
189; CHECK-NEXT:    vsaddu.vx v8, v8, a1
190; CHECK-NEXT:    vmsltu.vx v16, v8, a2
191; CHECK-NEXT:    vsext.vf8 v8, v17
192; CHECK-NEXT:    vsaddu.vx v8, v8, a1
193; CHECK-NEXT:    vmsltu.vx v17, v8, a2
194; CHECK-NEXT:    vsext.vf8 v8, v18
195; CHECK-NEXT:    vsaddu.vx v8, v8, a1
196; CHECK-NEXT:    vmsltu.vx v18, v8, a2
197; CHECK-NEXT:    vsext.vf8 v8, v19
198; CHECK-NEXT:    vsaddu.vx v8, v8, a1
199; CHECK-NEXT:    vmsltu.vx v19, v8, a2
200; CHECK-NEXT:    vsext.vf8 v8, v20
201; CHECK-NEXT:    vsaddu.vx v8, v8, a1
202; CHECK-NEXT:    vmsltu.vx v20, v8, a2
203; CHECK-NEXT:    vsext.vf8 v8, v21
204; CHECK-NEXT:    vsaddu.vx v8, v8, a1
205; CHECK-NEXT:    vmsltu.vx v21, v8, a2
206; CHECK-NEXT:    vsext.vf8 v8, v22
207; CHECK-NEXT:    vsaddu.vx v8, v8, a1
208; CHECK-NEXT:    vmsltu.vx v22, v8, a2
209; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
210; CHECK-NEXT:    vslideup.vi v17, v16, 2
211; CHECK-NEXT:    vslideup.vi v0, v20, 2
212; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
213; CHECK-NEXT:    vslideup.vi v17, v18, 4
214; CHECK-NEXT:    vslideup.vi v0, v21, 4
215; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
216; CHECK-NEXT:    vslideup.vi v17, v19, 6
217; CHECK-NEXT:    vslideup.vi v0, v22, 6
218; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
219; CHECK-NEXT:    vslideup.vi v0, v17, 8
220; CHECK-NEXT:    ret
221  %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc)
222  ret <128 x i1> %mask
223}
224
225
226declare <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64, i64)
227declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64, i64)
228declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64, i64)
229declare <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64, i64)
230declare <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64, i64)
231declare <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64, i64)
232