xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/stepvector.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
4
5declare <vscale x 1 x i8> @llvm.stepvector.nxv1i8()
6
7define <vscale x 1 x i8> @stepvector_nxv1i8() {
8; CHECK-LABEL: stepvector_nxv1i8:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
11; CHECK-NEXT:    vid.v v8
12; CHECK-NEXT:    ret
13  %v = call <vscale x 1 x i8> @llvm.stepvector.nxv1i8()
14  ret <vscale x 1 x i8> %v
15}
16
17declare <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
18
19define <vscale x 2 x i8> @stepvector_nxv2i8() {
20; CHECK-LABEL: stepvector_nxv2i8:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
23; CHECK-NEXT:    vid.v v8
24; CHECK-NEXT:    ret
25  %v = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
26  ret <vscale x 2 x i8> %v
27}
28
29declare <vscale x 3 x i8> @llvm.stepvector.nxv3i8()
30
31define <vscale x 3 x i8> @stepvector_nxv3i8() {
32; CHECK-LABEL: stepvector_nxv3i8:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
35; CHECK-NEXT:    vid.v v8
36; CHECK-NEXT:    ret
37  %v = call <vscale x 3 x i8> @llvm.stepvector.nxv3i8()
38  ret <vscale x 3 x i8> %v
39}
40
41declare <vscale x 4 x i8> @llvm.stepvector.nxv4i8()
42
43define <vscale x 4 x i8> @stepvector_nxv4i8() {
44; CHECK-LABEL: stepvector_nxv4i8:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
47; CHECK-NEXT:    vid.v v8
48; CHECK-NEXT:    ret
49  %v = call <vscale x 4 x i8> @llvm.stepvector.nxv4i8()
50  ret <vscale x 4 x i8> %v
51}
52
53declare <vscale x 8 x i8> @llvm.stepvector.nxv8i8()
54
55define <vscale x 8 x i8> @stepvector_nxv8i8() {
56; CHECK-LABEL: stepvector_nxv8i8:
57; CHECK:       # %bb.0:
58; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
59; CHECK-NEXT:    vid.v v8
60; CHECK-NEXT:    ret
61  %v = call <vscale x 8 x i8> @llvm.stepvector.nxv8i8()
62  ret <vscale x 8 x i8> %v
63}
64
65define <vscale x 8 x i8> @add_stepvector_nxv8i8() {
66; CHECK-LABEL: add_stepvector_nxv8i8:
67; CHECK:       # %bb.0: # %entry
68; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
69; CHECK-NEXT:    vid.v v8
70; CHECK-NEXT:    vadd.vv v8, v8, v8
71; CHECK-NEXT:    ret
72entry:
73  %0 = call <vscale x 8 x i8> @llvm.stepvector.nxv8i8()
74  %1 = call <vscale x 8 x i8> @llvm.stepvector.nxv8i8()
75  %2 = add <vscale x 8 x i8> %0, %1
76  ret <vscale x 8 x i8> %2
77}
78
79define <vscale x 8 x i8> @mul_stepvector_nxv8i8() {
80; CHECK-LABEL: mul_stepvector_nxv8i8:
81; CHECK:       # %bb.0: # %entry
82; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
83; CHECK-NEXT:    vid.v v8
84; CHECK-NEXT:    li a0, 3
85; CHECK-NEXT:    vmul.vx v8, v8, a0
86; CHECK-NEXT:    ret
87entry:
88  %2 = call <vscale x 8 x i8> @llvm.stepvector.nxv8i8()
89  %3 = mul <vscale x 8 x i8> %2, splat (i8 3)
90  ret <vscale x 8 x i8> %3
91}
92
93define <vscale x 8 x i8> @shl_stepvector_nxv8i8() {
94; CHECK-LABEL: shl_stepvector_nxv8i8:
95; CHECK:       # %bb.0: # %entry
96; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
97; CHECK-NEXT:    vid.v v8
98; CHECK-NEXT:    vsll.vi v8, v8, 2
99; CHECK-NEXT:    ret
100entry:
101  %2 = call <vscale x 8 x i8> @llvm.stepvector.nxv8i8()
102  %3 = shl <vscale x 8 x i8> %2, splat (i8 2)
103  ret <vscale x 8 x i8> %3
104}
105
106declare <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
107
108define <vscale x 16 x i8> @stepvector_nxv16i8() {
109; CHECK-LABEL: stepvector_nxv16i8:
110; CHECK:       # %bb.0:
111; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
112; CHECK-NEXT:    vid.v v8
113; CHECK-NEXT:    ret
114  %v = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
115  ret <vscale x 16 x i8> %v
116}
117
118declare <vscale x 32 x i8> @llvm.stepvector.nxv32i8()
119
120define <vscale x 32 x i8> @stepvector_nxv32i8() {
121; CHECK-LABEL: stepvector_nxv32i8:
122; CHECK:       # %bb.0:
123; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
124; CHECK-NEXT:    vid.v v8
125; CHECK-NEXT:    ret
126  %v = call <vscale x 32 x i8> @llvm.stepvector.nxv32i8()
127  ret <vscale x 32 x i8> %v
128}
129
130declare <vscale x 64 x i8> @llvm.stepvector.nxv64i8()
131
132define <vscale x 64 x i8> @stepvector_nxv64i8() {
133; CHECK-LABEL: stepvector_nxv64i8:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
136; CHECK-NEXT:    vid.v v8
137; CHECK-NEXT:    ret
138  %v = call <vscale x 64 x i8> @llvm.stepvector.nxv64i8()
139  ret <vscale x 64 x i8> %v
140}
141
142declare <vscale x 1 x i16> @llvm.stepvector.nxv1i16()
143
144define <vscale x 1 x i16> @stepvector_nxv1i16() {
145; CHECK-LABEL: stepvector_nxv1i16:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
148; CHECK-NEXT:    vid.v v8
149; CHECK-NEXT:    ret
150  %v = call <vscale x 1 x i16> @llvm.stepvector.nxv1i16()
151  ret <vscale x 1 x i16> %v
152}
153
154declare <vscale x 2 x i16> @llvm.stepvector.nxv2i16()
155
156define <vscale x 2 x i16> @stepvector_nxv2i16() {
157; CHECK-LABEL: stepvector_nxv2i16:
158; CHECK:       # %bb.0:
159; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
160; CHECK-NEXT:    vid.v v8
161; CHECK-NEXT:    ret
162  %v = call <vscale x 2 x i16> @llvm.stepvector.nxv2i16()
163  ret <vscale x 2 x i16> %v
164}
165
166declare <vscale x 2 x i15> @llvm.stepvector.nxv2i15()
167
168define <vscale x 2 x i15> @stepvector_nxv2i15() {
169; CHECK-LABEL: stepvector_nxv2i15:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
172; CHECK-NEXT:    vid.v v8
173; CHECK-NEXT:    ret
174  %v = call <vscale x 2 x i15> @llvm.stepvector.nxv2i15()
175  ret <vscale x 2 x i15> %v
176}
177
178declare <vscale x 3 x i16> @llvm.stepvector.nxv3i16()
179
180define <vscale x 3 x i16> @stepvector_nxv3i16() {
181; CHECK-LABEL: stepvector_nxv3i16:
182; CHECK:       # %bb.0:
183; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
184; CHECK-NEXT:    vid.v v8
185; CHECK-NEXT:    ret
186  %v = call <vscale x 3 x i16> @llvm.stepvector.nxv3i16()
187  ret <vscale x 3 x i16> %v
188}
189
190declare <vscale x 4 x i16> @llvm.stepvector.nxv4i16()
191
192define <vscale x 4 x i16> @stepvector_nxv4i16() {
193; CHECK-LABEL: stepvector_nxv4i16:
194; CHECK:       # %bb.0:
195; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
196; CHECK-NEXT:    vid.v v8
197; CHECK-NEXT:    ret
198  %v = call <vscale x 4 x i16> @llvm.stepvector.nxv4i16()
199  ret <vscale x 4 x i16> %v
200}
201
202declare <vscale x 8 x i16> @llvm.stepvector.nxv8i16()
203
204define <vscale x 8 x i16> @stepvector_nxv8i16() {
205; CHECK-LABEL: stepvector_nxv8i16:
206; CHECK:       # %bb.0:
207; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
208; CHECK-NEXT:    vid.v v8
209; CHECK-NEXT:    ret
210  %v = call <vscale x 8 x i16> @llvm.stepvector.nxv8i16()
211  ret <vscale x 8 x i16> %v
212}
213
214declare <vscale x 16 x i16> @llvm.stepvector.nxv16i16()
215
216define <vscale x 16 x i16> @stepvector_nxv16i16() {
217; CHECK-LABEL: stepvector_nxv16i16:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
220; CHECK-NEXT:    vid.v v8
221; CHECK-NEXT:    ret
222  %v = call <vscale x 16 x i16> @llvm.stepvector.nxv16i16()
223  ret <vscale x 16 x i16> %v
224}
225
226define <vscale x 16 x i16> @add_stepvector_nxv16i16() {
227; CHECK-LABEL: add_stepvector_nxv16i16:
228; CHECK:       # %bb.0: # %entry
229; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
230; CHECK-NEXT:    vid.v v8
231; CHECK-NEXT:    vadd.vv v8, v8, v8
232; CHECK-NEXT:    ret
233entry:
234  %0 = call <vscale x 16 x i16> @llvm.stepvector.nxv16i16()
235  %1 = call <vscale x 16 x i16> @llvm.stepvector.nxv16i16()
236  %2 = add <vscale x 16 x i16> %0, %1
237  ret <vscale x 16 x i16> %2
238}
239
240define <vscale x 16 x i16> @mul_stepvector_nxv16i16() {
241; CHECK-LABEL: mul_stepvector_nxv16i16:
242; CHECK:       # %bb.0: # %entry
243; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
244; CHECK-NEXT:    vid.v v8
245; CHECK-NEXT:    li a0, 3
246; CHECK-NEXT:    vmul.vx v8, v8, a0
247; CHECK-NEXT:    ret
248entry:
249  %2 = call <vscale x 16 x i16> @llvm.stepvector.nxv16i16()
250  %3 = mul <vscale x 16 x i16> %2, splat (i16 3)
251  ret <vscale x 16 x i16> %3
252}
253
254define <vscale x 16 x i16> @shl_stepvector_nxv16i16() {
255; CHECK-LABEL: shl_stepvector_nxv16i16:
256; CHECK:       # %bb.0: # %entry
257; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
258; CHECK-NEXT:    vid.v v8
259; CHECK-NEXT:    vsll.vi v8, v8, 2
260; CHECK-NEXT:    ret
261entry:
262  %2 = call <vscale x 16 x i16> @llvm.stepvector.nxv16i16()
263  %3 = shl <vscale x 16 x i16> %2, splat (i16 2)
264  ret <vscale x 16 x i16> %3
265}
266
267declare <vscale x 32 x i16> @llvm.stepvector.nxv32i16()
268
269define <vscale x 32 x i16> @stepvector_nxv32i16() {
270; CHECK-LABEL: stepvector_nxv32i16:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
273; CHECK-NEXT:    vid.v v8
274; CHECK-NEXT:    ret
275  %v = call <vscale x 32 x i16> @llvm.stepvector.nxv32i16()
276  ret <vscale x 32 x i16> %v
277}
278
279declare <vscale x 1 x i32> @llvm.stepvector.nxv1i32()
280
281define <vscale x 1 x i32> @stepvector_nxv1i32() {
282; CHECK-LABEL: stepvector_nxv1i32:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
285; CHECK-NEXT:    vid.v v8
286; CHECK-NEXT:    ret
287  %v = call <vscale x 1 x i32> @llvm.stepvector.nxv1i32()
288  ret <vscale x 1 x i32> %v
289}
290
291declare <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
292
293define <vscale x 2 x i32> @stepvector_nxv2i32() {
294; CHECK-LABEL: stepvector_nxv2i32:
295; CHECK:       # %bb.0:
296; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
297; CHECK-NEXT:    vid.v v8
298; CHECK-NEXT:    ret
299  %v = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
300  ret <vscale x 2 x i32> %v
301}
302
303declare <vscale x 3 x i32> @llvm.stepvector.nxv3i32()
304
305define <vscale x 3 x i32> @stepvector_nxv3i32() {
306; CHECK-LABEL: stepvector_nxv3i32:
307; CHECK:       # %bb.0:
308; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
309; CHECK-NEXT:    vid.v v8
310; CHECK-NEXT:    ret
311  %v = call <vscale x 3 x i32> @llvm.stepvector.nxv3i32()
312  ret <vscale x 3 x i32> %v
313}
314
315declare <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
316
317define <vscale x 4 x i32> @stepvector_nxv4i32() {
318; CHECK-LABEL: stepvector_nxv4i32:
319; CHECK:       # %bb.0:
320; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
321; CHECK-NEXT:    vid.v v8
322; CHECK-NEXT:    ret
323  %v = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
324  ret <vscale x 4 x i32> %v
325}
326
327declare <vscale x 8 x i32> @llvm.stepvector.nxv8i32()
328
329define <vscale x 8 x i32> @stepvector_nxv8i32() {
330; CHECK-LABEL: stepvector_nxv8i32:
331; CHECK:       # %bb.0:
332; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
333; CHECK-NEXT:    vid.v v8
334; CHECK-NEXT:    ret
335  %v = call <vscale x 8 x i32> @llvm.stepvector.nxv8i32()
336  ret <vscale x 8 x i32> %v
337}
338
339declare <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
340
341define <vscale x 16 x i32> @stepvector_nxv16i32() {
342; CHECK-LABEL: stepvector_nxv16i32:
343; CHECK:       # %bb.0:
344; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
345; CHECK-NEXT:    vid.v v8
346; CHECK-NEXT:    ret
347  %v = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
348  ret <vscale x 16 x i32> %v
349}
350
351define <vscale x 16 x i32> @add_stepvector_nxv16i32() {
352; CHECK-LABEL: add_stepvector_nxv16i32:
353; CHECK:       # %bb.0: # %entry
354; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
355; CHECK-NEXT:    vid.v v8
356; CHECK-NEXT:    vadd.vv v8, v8, v8
357; CHECK-NEXT:    ret
358entry:
359  %0 = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
360  %1 = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
361  %2 = add <vscale x 16 x i32> %0, %1
362  ret <vscale x 16 x i32> %2
363}
364
365define <vscale x 16 x i32> @mul_stepvector_nxv16i32() {
366; CHECK-LABEL: mul_stepvector_nxv16i32:
367; CHECK:       # %bb.0: # %entry
368; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
369; CHECK-NEXT:    vid.v v8
370; CHECK-NEXT:    li a0, 3
371; CHECK-NEXT:    vmul.vx v8, v8, a0
372; CHECK-NEXT:    ret
373entry:
374  %2 = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
375  %3 = mul <vscale x 16 x i32> %2, splat (i32 3)
376  ret <vscale x 16 x i32> %3
377}
378
379define <vscale x 16 x i32> @shl_stepvector_nxv16i32() {
380; CHECK-LABEL: shl_stepvector_nxv16i32:
381; CHECK:       # %bb.0: # %entry
382; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
383; CHECK-NEXT:    vid.v v8
384; CHECK-NEXT:    vsll.vi v8, v8, 2
385; CHECK-NEXT:    ret
386entry:
387  %2 = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
388  %3 = shl <vscale x 16 x i32> %2, splat (i32 2)
389  ret <vscale x 16 x i32> %3
390}
391
392declare <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
393
394define <vscale x 1 x i64> @stepvector_nxv1i64() {
395; CHECK-LABEL: stepvector_nxv1i64:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
398; CHECK-NEXT:    vid.v v8
399; CHECK-NEXT:    ret
400  %v = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
401  ret <vscale x 1 x i64> %v
402}
403
404declare <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
405
406define <vscale x 2 x i64> @stepvector_nxv2i64() {
407; CHECK-LABEL: stepvector_nxv2i64:
408; CHECK:       # %bb.0:
409; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
410; CHECK-NEXT:    vid.v v8
411; CHECK-NEXT:    ret
412  %v = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
413  ret <vscale x 2 x i64> %v
414}
415
416declare <vscale x 3 x i64> @llvm.stepvector.nxv3i64()
417
418define <vscale x 3 x i64> @stepvector_nxv3i64() {
419; CHECK-LABEL: stepvector_nxv3i64:
420; CHECK:       # %bb.0:
421; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
422; CHECK-NEXT:    vid.v v8
423; CHECK-NEXT:    ret
424  %v = call <vscale x 3 x i64> @llvm.stepvector.nxv3i64()
425  ret <vscale x 3 x i64> %v
426}
427
428declare <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
429
430define <vscale x 4 x i64> @stepvector_nxv4i64() {
431; CHECK-LABEL: stepvector_nxv4i64:
432; CHECK:       # %bb.0:
433; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
434; CHECK-NEXT:    vid.v v8
435; CHECK-NEXT:    ret
436  %v = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
437  ret <vscale x 4 x i64> %v
438}
439
440declare <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
441
442define <vscale x 8 x i64> @stepvector_nxv8i64() {
443; CHECK-LABEL: stepvector_nxv8i64:
444; CHECK:       # %bb.0:
445; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
446; CHECK-NEXT:    vid.v v8
447; CHECK-NEXT:    ret
448  %v = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
449  ret <vscale x 8 x i64> %v
450}
451
452define <vscale x 8 x i64> @add_stepvector_nxv8i64() {
453; CHECK-LABEL: add_stepvector_nxv8i64:
454; CHECK:       # %bb.0: # %entry
455; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
456; CHECK-NEXT:    vid.v v8
457; CHECK-NEXT:    vadd.vv v8, v8, v8
458; CHECK-NEXT:    ret
459entry:
460  %0 = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
461  %1 = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
462  %2 = add <vscale x 8 x i64> %0, %1
463  ret <vscale x 8 x i64> %2
464}
465
466define <vscale x 8 x i64> @mul_stepvector_nxv8i64() {
467; CHECK-LABEL: mul_stepvector_nxv8i64:
468; CHECK:       # %bb.0: # %entry
469; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
470; CHECK-NEXT:    vid.v v8
471; CHECK-NEXT:    li a0, 3
472; CHECK-NEXT:    vmul.vx v8, v8, a0
473; CHECK-NEXT:    ret
474entry:
475  %2 = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
476  %3 = mul <vscale x 8 x i64> %2, splat (i64 3)
477  ret <vscale x 8 x i64> %3
478}
479
480define <vscale x 8 x i64> @mul_bigimm_stepvector_nxv8i64() {
481; RV32-LABEL: mul_bigimm_stepvector_nxv8i64:
482; RV32:       # %bb.0: # %entry
483; RV32-NEXT:    addi sp, sp, -16
484; RV32-NEXT:    .cfi_def_cfa_offset 16
485; RV32-NEXT:    li a0, 7
486; RV32-NEXT:    lui a1, 797989
487; RV32-NEXT:    addi a1, a1, -683
488; RV32-NEXT:    sw a1, 8(sp)
489; RV32-NEXT:    sw a0, 12(sp)
490; RV32-NEXT:    addi a0, sp, 8
491; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
492; RV32-NEXT:    vlse64.v v8, (a0), zero
493; RV32-NEXT:    vid.v v16
494; RV32-NEXT:    vmul.vv v8, v16, v8
495; RV32-NEXT:    addi sp, sp, 16
496; RV32-NEXT:    .cfi_def_cfa_offset 0
497; RV32-NEXT:    ret
498;
499; RV64-LABEL: mul_bigimm_stepvector_nxv8i64:
500; RV64:       # %bb.0: # %entry
501; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
502; RV64-NEXT:    vid.v v8
503; RV64-NEXT:    lui a0, 1987
504; RV64-NEXT:    addiw a0, a0, -731
505; RV64-NEXT:    slli a0, a0, 12
506; RV64-NEXT:    addi a0, a0, -683
507; RV64-NEXT:    vmul.vx v8, v8, a0
508; RV64-NEXT:    ret
509entry:
510  %2 = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
511  %3 = mul <vscale x 8 x i64> %2, splat (i64 33333333333)
512  ret <vscale x 8 x i64> %3
513}
514
515define <vscale x 8 x i64> @shl_stepvector_nxv8i64() {
516; CHECK-LABEL: shl_stepvector_nxv8i64:
517; CHECK:       # %bb.0: # %entry
518; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
519; CHECK-NEXT:    vid.v v8
520; CHECK-NEXT:    vsll.vi v8, v8, 2
521; CHECK-NEXT:    ret
522entry:
523  %2 = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
524  %3 = shl <vscale x 8 x i64> %2, splat (i64 2)
525  ret <vscale x 8 x i64> %3
526}
527
528declare <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
529
530define <vscale x 16 x i64> @stepvector_nxv16i64() {
531; RV32-LABEL: stepvector_nxv16i64:
532; RV32:       # %bb.0:
533; RV32-NEXT:    addi sp, sp, -16
534; RV32-NEXT:    .cfi_def_cfa_offset 16
535; RV32-NEXT:    csrr a0, vlenb
536; RV32-NEXT:    sw a0, 8(sp)
537; RV32-NEXT:    sw zero, 12(sp)
538; RV32-NEXT:    addi a0, sp, 8
539; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
540; RV32-NEXT:    vlse64.v v16, (a0), zero
541; RV32-NEXT:    vid.v v8
542; RV32-NEXT:    vadd.vv v16, v8, v16
543; RV32-NEXT:    addi sp, sp, 16
544; RV32-NEXT:    .cfi_def_cfa_offset 0
545; RV32-NEXT:    ret
546;
547; RV64-LABEL: stepvector_nxv16i64:
548; RV64:       # %bb.0:
549; RV64-NEXT:    csrr a0, vlenb
550; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
551; RV64-NEXT:    vid.v v8
552; RV64-NEXT:    vadd.vx v16, v8, a0
553; RV64-NEXT:    ret
554  %v = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
555  ret <vscale x 16 x i64> %v
556}
557
558define <vscale x 16 x i64> @add_stepvector_nxv16i64() {
559; RV32-LABEL: add_stepvector_nxv16i64:
560; RV32:       # %bb.0: # %entry
561; RV32-NEXT:    addi sp, sp, -16
562; RV32-NEXT:    .cfi_def_cfa_offset 16
563; RV32-NEXT:    csrr a0, vlenb
564; RV32-NEXT:    slli a0, a0, 1
565; RV32-NEXT:    sw a0, 8(sp)
566; RV32-NEXT:    sw zero, 12(sp)
567; RV32-NEXT:    addi a0, sp, 8
568; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
569; RV32-NEXT:    vlse64.v v16, (a0), zero
570; RV32-NEXT:    vid.v v8
571; RV32-NEXT:    vadd.vv v8, v8, v8
572; RV32-NEXT:    vadd.vv v16, v8, v16
573; RV32-NEXT:    addi sp, sp, 16
574; RV32-NEXT:    .cfi_def_cfa_offset 0
575; RV32-NEXT:    ret
576;
577; RV64-LABEL: add_stepvector_nxv16i64:
578; RV64:       # %bb.0: # %entry
579; RV64-NEXT:    csrr a0, vlenb
580; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
581; RV64-NEXT:    vid.v v8
582; RV64-NEXT:    slli a0, a0, 1
583; RV64-NEXT:    vadd.vv v8, v8, v8
584; RV64-NEXT:    vadd.vx v16, v8, a0
585; RV64-NEXT:    ret
586entry:
587  %0 = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
588  %1 = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
589  %2 = add <vscale x 16 x i64> %0, %1
590  ret <vscale x 16 x i64> %2
591}
592
593define <vscale x 16 x i64> @mul_stepvector_nxv16i64() {
594; RV32-LABEL: mul_stepvector_nxv16i64:
595; RV32:       # %bb.0: # %entry
596; RV32-NEXT:    addi sp, sp, -16
597; RV32-NEXT:    .cfi_def_cfa_offset 16
598; RV32-NEXT:    csrr a0, vlenb
599; RV32-NEXT:    slli a1, a0, 1
600; RV32-NEXT:    add a0, a1, a0
601; RV32-NEXT:    sw a0, 8(sp)
602; RV32-NEXT:    sw zero, 12(sp)
603; RV32-NEXT:    addi a0, sp, 8
604; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
605; RV32-NEXT:    vlse64.v v16, (a0), zero
606; RV32-NEXT:    vid.v v8
607; RV32-NEXT:    li a0, 3
608; RV32-NEXT:    vmul.vx v8, v8, a0
609; RV32-NEXT:    vadd.vv v16, v8, v16
610; RV32-NEXT:    addi sp, sp, 16
611; RV32-NEXT:    .cfi_def_cfa_offset 0
612; RV32-NEXT:    ret
613;
614; RV64-LABEL: mul_stepvector_nxv16i64:
615; RV64:       # %bb.0: # %entry
616; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
617; RV64-NEXT:    vid.v v8
618; RV64-NEXT:    li a0, 3
619; RV64-NEXT:    csrr a1, vlenb
620; RV64-NEXT:    vmul.vx v8, v8, a0
621; RV64-NEXT:    slli a0, a1, 1
622; RV64-NEXT:    add a0, a0, a1
623; RV64-NEXT:    vadd.vx v16, v8, a0
624; RV64-NEXT:    ret
625entry:
626  %2 = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
627  %3 = mul <vscale x 16 x i64> %2, splat (i64 3)
628  ret <vscale x 16 x i64> %3
629}
630
631define <vscale x 16 x i64> @mul_bigimm_stepvector_nxv16i64() {
632; RV32-LABEL: mul_bigimm_stepvector_nxv16i64:
633; RV32:       # %bb.0: # %entry
634; RV32-NEXT:    addi sp, sp, -16
635; RV32-NEXT:    .cfi_def_cfa_offset 16
636; RV32-NEXT:    li a0, 7
637; RV32-NEXT:    lui a1, 797989
638; RV32-NEXT:    csrr a2, vlenb
639; RV32-NEXT:    lui a3, 11557
640; RV32-NEXT:    lui a4, 92455
641; RV32-NEXT:    addi a1, a1, -683
642; RV32-NEXT:    addi a3, a3, -683
643; RV32-NEXT:    sw a1, 8(sp)
644; RV32-NEXT:    sw a0, 12(sp)
645; RV32-NEXT:    srli a0, a2, 3
646; RV32-NEXT:    addi a1, a4, -1368
647; RV32-NEXT:    mul a2, a2, a3
648; RV32-NEXT:    mulhu a1, a0, a1
649; RV32-NEXT:    slli a3, a0, 1
650; RV32-NEXT:    slli a0, a0, 6
651; RV32-NEXT:    sub a0, a0, a3
652; RV32-NEXT:    add a0, a1, a0
653; RV32-NEXT:    addi a1, sp, 8
654; RV32-NEXT:    sw a2, 0(sp)
655; RV32-NEXT:    sw a0, 4(sp)
656; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
657; RV32-NEXT:    vlse64.v v8, (a1), zero
658; RV32-NEXT:    mv a0, sp
659; RV32-NEXT:    vlse64.v v16, (a0), zero
660; RV32-NEXT:    vid.v v24
661; RV32-NEXT:    vmul.vv v8, v24, v8
662; RV32-NEXT:    vadd.vv v16, v8, v16
663; RV32-NEXT:    addi sp, sp, 16
664; RV32-NEXT:    .cfi_def_cfa_offset 0
665; RV32-NEXT:    ret
666;
667; RV64-LABEL: mul_bigimm_stepvector_nxv16i64:
668; RV64:       # %bb.0: # %entry
669; RV64-NEXT:    csrr a0, vlenb
670; RV64-NEXT:    lui a1, 1987
671; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
672; RV64-NEXT:    vid.v v8
673; RV64-NEXT:    addiw a1, a1, -731
674; RV64-NEXT:    slli a1, a1, 12
675; RV64-NEXT:    addi a1, a1, -683
676; RV64-NEXT:    mul a0, a0, a1
677; RV64-NEXT:    vmul.vx v8, v8, a1
678; RV64-NEXT:    vadd.vx v16, v8, a0
679; RV64-NEXT:    ret
680entry:
681  %2 = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
682  %3 = mul <vscale x 16 x i64> %2, splat (i64 33333333333)
683  ret <vscale x 16 x i64> %3
684}
685
686define <vscale x 16 x i64> @shl_stepvector_nxv16i64() {
687; RV32-LABEL: shl_stepvector_nxv16i64:
688; RV32:       # %bb.0: # %entry
689; RV32-NEXT:    addi sp, sp, -16
690; RV32-NEXT:    .cfi_def_cfa_offset 16
691; RV32-NEXT:    csrr a0, vlenb
692; RV32-NEXT:    slli a0, a0, 2
693; RV32-NEXT:    sw a0, 8(sp)
694; RV32-NEXT:    sw zero, 12(sp)
695; RV32-NEXT:    addi a0, sp, 8
696; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
697; RV32-NEXT:    vlse64.v v16, (a0), zero
698; RV32-NEXT:    vid.v v8
699; RV32-NEXT:    vsll.vi v8, v8, 2
700; RV32-NEXT:    vadd.vv v16, v8, v16
701; RV32-NEXT:    addi sp, sp, 16
702; RV32-NEXT:    .cfi_def_cfa_offset 0
703; RV32-NEXT:    ret
704;
705; RV64-LABEL: shl_stepvector_nxv16i64:
706; RV64:       # %bb.0: # %entry
707; RV64-NEXT:    csrr a0, vlenb
708; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
709; RV64-NEXT:    vid.v v8
710; RV64-NEXT:    slli a0, a0, 2
711; RV64-NEXT:    vsll.vi v8, v8, 2
712; RV64-NEXT:    vadd.vx v16, v8, a0
713; RV64-NEXT:    ret
714entry:
715  %2 = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
716  %3 = shl <vscale x 16 x i64> %2, splat (i64 2)
717  ret <vscale x 16 x i64> %3
718}
719
720; maximum step is 4 * 2 = 8, so maximum step value is 7, so hi 61 bits are known
721; zero
722define <vscale x 2 x i64> @hi_bits_known_zero() vscale_range(2, 4) {
723; CHECK-LABEL: hi_bits_known_zero:
724; CHECK:       # %bb.0:
725; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
726; CHECK-NEXT:    vmv.v.i v8, 0
727; CHECK-NEXT:    ret
728  %step = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
729  %and = and <vscale x 2 x i64> %step, splat (i64 u0xfffffffffffffff8)
730  ret <vscale x 2 x i64> %and
731}
732
733; the maximum step here overflows so don't set the known hi bits
734define <vscale x 2 x i64> @hi_bits_known_zero_overflow() vscale_range(2, 4) {
735; CHECK-LABEL: hi_bits_known_zero_overflow:
736; CHECK:       # %bb.0:
737; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
738; CHECK-NEXT:    vid.v v8
739; CHECK-NEXT:    li a0, -1
740; CHECK-NEXT:    vmul.vx v8, v8, a0
741; CHECK-NEXT:    vand.vi v8, v8, -8
742; CHECK-NEXT:    ret
743  %step = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
744  %step.mul = mul <vscale x 2 x i64> %step, splat (i64 u0xffffffffffffffff)
745  %and = and <vscale x 2 x i64> %step.mul, splat (i64 u0xfffffffffffffff8)
746  ret <vscale x 2 x i64> %and
747}
748
749; step values are multiple of 8, so lo 3 bits are known zero
750define <vscale x 2 x i64> @lo_bits_known_zero() {
751; CHECK-LABEL: lo_bits_known_zero:
752; CHECK:       # %bb.0:
753; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
754; CHECK-NEXT:    vmv.v.i v8, 0
755; CHECK-NEXT:    ret
756  %step = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
757  %step.mul = mul <vscale x 2 x i64> %step, splat (i64 8)
758  %and = and <vscale x 2 x i64> %step.mul, splat (i64 7)
759  ret <vscale x 2 x i64> %and
760}
761