xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-vl-arith.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s -check-prefix=NO_SCALAR_INC
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -sve-use-scalar-inc-vl=true -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s
5; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -sve-use-scalar-inc-vl=false -verify-machineinstrs < %s | FileCheck %s -check-prefix=NO_SCALAR_INC
6
7define <vscale x 8 x i16> @inch_vec(<vscale x 8 x i16> %a) {
8; NO_SCALAR_INC-LABEL: inch_vec:
9; NO_SCALAR_INC:       // %bb.0:
10; NO_SCALAR_INC-NEXT:    inch z0.h
11; NO_SCALAR_INC-NEXT:    ret
12;
13; CHECK-LABEL: inch_vec:
14; CHECK:       // %bb.0:
15; CHECK-NEXT:    inch z0.h
16; CHECK-NEXT:    ret
17  %vscale = call i16 @llvm.vscale.i16()
18  %mul = mul i16 %vscale, 8
19  %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0
20  %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
21  %res = add <vscale x 8 x i16> %a, %vl.splat
22  ret <vscale x 8 x i16> %res
23}
24
25define <vscale x 4 x i32> @incw_vec(<vscale x 4 x i32> %a) {
26; NO_SCALAR_INC-LABEL: incw_vec:
27; NO_SCALAR_INC:       // %bb.0:
28; NO_SCALAR_INC-NEXT:    incw z0.s
29; NO_SCALAR_INC-NEXT:    ret
30;
31; CHECK-LABEL: incw_vec:
32; CHECK:       // %bb.0:
33; CHECK-NEXT:    incw z0.s
34; CHECK-NEXT:    ret
35  %vscale = call i32 @llvm.vscale.i32()
36  %mul = mul i32 %vscale, 4
37  %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0
38  %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
39  %res = add <vscale x 4 x i32> %a, %vl.splat
40  ret <vscale x 4 x i32> %res
41}
42
43define <vscale x 2 x i64> @incd_vec(<vscale x 2 x i64> %a) {
44; NO_SCALAR_INC-LABEL: incd_vec:
45; NO_SCALAR_INC:       // %bb.0:
46; NO_SCALAR_INC-NEXT:    incd z0.d
47; NO_SCALAR_INC-NEXT:    ret
48;
49; CHECK-LABEL: incd_vec:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    incd z0.d
52; CHECK-NEXT:    ret
53  %vscale = call i64 @llvm.vscale.i64()
54  %mul = mul i64 %vscale, 2
55  %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0
56  %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
57  %res = add <vscale x 2 x i64> %a, %vl.splat
58  ret <vscale x 2 x i64> %res
59}
60
61define <vscale x 8 x i16> @dech_vec(<vscale x 8 x i16> %a) {
62; NO_SCALAR_INC-LABEL: dech_vec:
63; NO_SCALAR_INC:       // %bb.0:
64; NO_SCALAR_INC-NEXT:    dech z0.h, all, mul #2
65; NO_SCALAR_INC-NEXT:    ret
66;
67; CHECK-LABEL: dech_vec:
68; CHECK:       // %bb.0:
69; CHECK-NEXT:    dech z0.h, all, mul #2
70; CHECK-NEXT:    ret
71  %vscale = call i16 @llvm.vscale.i16()
72  %mul = mul i16 %vscale, 16
73  %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0
74  %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
75  %res = sub <vscale x 8 x i16> %a, %vl.splat
76  ret <vscale x 8 x i16> %res
77}
78
79define <vscale x 4 x i32> @decw_vec(<vscale x 4 x i32> %a) {
80; NO_SCALAR_INC-LABEL: decw_vec:
81; NO_SCALAR_INC:       // %bb.0:
82; NO_SCALAR_INC-NEXT:    decw z0.s, all, mul #4
83; NO_SCALAR_INC-NEXT:    ret
84;
85; CHECK-LABEL: decw_vec:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    decw z0.s, all, mul #4
88; CHECK-NEXT:    ret
89  %vscale = call i32 @llvm.vscale.i32()
90  %mul = mul i32 %vscale, 16
91  %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0
92  %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
93  %res = sub <vscale x 4 x i32> %a, %vl.splat
94  ret <vscale x 4 x i32> %res
95}
96
97define <vscale x 2 x i64> @decd_vec(<vscale x 2 x i64> %a) {
98; NO_SCALAR_INC-LABEL: decd_vec:
99; NO_SCALAR_INC:       // %bb.0:
100; NO_SCALAR_INC-NEXT:    decd z0.d, all, mul #8
101; NO_SCALAR_INC-NEXT:    ret
102;
103; CHECK-LABEL: decd_vec:
104; CHECK:       // %bb.0:
105; CHECK-NEXT:    decd z0.d, all, mul #8
106; CHECK-NEXT:    ret
107  %vscale = call i64 @llvm.vscale.i64()
108  %mul = mul i64 %vscale, 16
109  %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0
110  %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
111  %res = sub <vscale x 2 x i64> %a, %vl.splat
112  ret <vscale x 2 x i64> %res
113}
114
115; NOTE: As there is no need for the predicate pattern we
116; fall back to using ADDVL with its larger immediate range.
117define i64 @incb_scalar_i64(i64 %a) {
118; NO_SCALAR_INC-LABEL: incb_scalar_i64:
119; NO_SCALAR_INC:       // %bb.0:
120; NO_SCALAR_INC-NEXT:    rdvl x8, #1
121; NO_SCALAR_INC-NEXT:    add x0, x0, x8
122; NO_SCALAR_INC-NEXT:    ret
123;
124; CHECK-LABEL: incb_scalar_i64:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    addvl x0, x0, #1
127; CHECK-NEXT:    ret
128  %vscale = call i64 @llvm.vscale.i64()
129  %mul = mul i64 %vscale, 16
130  %add = add i64 %a, %mul
131  ret i64 %add
132}
133
134define i64 @inch_scalar_i64(i64 %a) {
135; NO_SCALAR_INC-LABEL: inch_scalar_i64:
136; NO_SCALAR_INC:       // %bb.0:
137; NO_SCALAR_INC-NEXT:    cnth x8
138; NO_SCALAR_INC-NEXT:    add x0, x0, x8
139; NO_SCALAR_INC-NEXT:    ret
140;
141; CHECK-LABEL: inch_scalar_i64:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    inch x0
144; CHECK-NEXT:    ret
145  %vscale = call i64 @llvm.vscale.i64()
146  %mul = mul i64 %vscale, 8
147  %add = add i64 %a, %mul
148  ret i64 %add
149}
150
151define i64 @incw_scalar_i64(i64 %a) {
152; NO_SCALAR_INC-LABEL: incw_scalar_i64:
153; NO_SCALAR_INC:       // %bb.0:
154; NO_SCALAR_INC-NEXT:    cntw x8
155; NO_SCALAR_INC-NEXT:    add x0, x0, x8
156; NO_SCALAR_INC-NEXT:    ret
157;
158; CHECK-LABEL: incw_scalar_i64:
159; CHECK:       // %bb.0:
160; CHECK-NEXT:    incw x0
161; CHECK-NEXT:    ret
162  %vscale = call i64 @llvm.vscale.i64()
163  %mul = mul i64 %vscale, 4
164  %add = add i64 %a, %mul
165  ret i64 %add
166}
167
168define i64 @incd_scalar_i64(i64 %a) {
169; NO_SCALAR_INC-LABEL: incd_scalar_i64:
170; NO_SCALAR_INC:       // %bb.0:
171; NO_SCALAR_INC-NEXT:    cntd x8
172; NO_SCALAR_INC-NEXT:    add x0, x0, x8
173; NO_SCALAR_INC-NEXT:    ret
174;
175; CHECK-LABEL: incd_scalar_i64:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    incd x0
178; CHECK-NEXT:    ret
179  %vscale = call i64 @llvm.vscale.i64()
180  %mul = mul i64 %vscale, 2
181  %add = add i64 %a, %mul
182  ret i64 %add
183}
184
185; NOTE: As there is no need for the predicate pattern we
186; fall back to using ADDVL with its larger immediate range.
187define i64 @decb_scalar_i64(i64 %a) {
188; NO_SCALAR_INC-LABEL: decb_scalar_i64:
189; NO_SCALAR_INC:       // %bb.0:
190; NO_SCALAR_INC-NEXT:    cnth x8, all, mul #4
191; NO_SCALAR_INC-NEXT:    sub x0, x0, x8
192; NO_SCALAR_INC-NEXT:    ret
193;
194; CHECK-LABEL: decb_scalar_i64:
195; CHECK:       // %bb.0:
196; CHECK-NEXT:    addvl x0, x0, #-2
197; CHECK-NEXT:    ret
198  %vscale = call i64 @llvm.vscale.i64()
199  %mul = mul i64 %vscale, 32
200  %sub = sub i64 %a, %mul
201  ret i64 %sub
202}
203
204define i64 @dech_scalar_i64(i64 %a) {
205; NO_SCALAR_INC-LABEL: dech_scalar_i64:
206; NO_SCALAR_INC:       // %bb.0:
207; NO_SCALAR_INC-NEXT:    cnth x8, all, mul #3
208; NO_SCALAR_INC-NEXT:    sub x0, x0, x8
209; NO_SCALAR_INC-NEXT:    ret
210;
211; CHECK-LABEL: dech_scalar_i64:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    dech x0, all, mul #3
214; CHECK-NEXT:    ret
215  %vscale = call i64 @llvm.vscale.i64()
216  %mul = mul i64 %vscale, 24
217  %sub = sub i64 %a, %mul
218  ret i64 %sub
219}
220
221define i64 @decw_scalar_i64(i64 %a) {
222; NO_SCALAR_INC-LABEL: decw_scalar_i64:
223; NO_SCALAR_INC:       // %bb.0:
224; NO_SCALAR_INC-NEXT:    cntw x8, all, mul #3
225; NO_SCALAR_INC-NEXT:    sub x0, x0, x8
226; NO_SCALAR_INC-NEXT:    ret
227;
228; CHECK-LABEL: decw_scalar_i64:
229; CHECK:       // %bb.0:
230; CHECK-NEXT:    decw x0, all, mul #3
231; CHECK-NEXT:    ret
232  %vscale = call i64 @llvm.vscale.i64()
233  %mul = mul i64 %vscale, 12
234  %sub = sub i64 %a, %mul
235  ret i64 %sub
236}
237
238define i64 @decd_scalar_i64(i64 %a) {
239; NO_SCALAR_INC-LABEL: decd_scalar_i64:
240; NO_SCALAR_INC:       // %bb.0:
241; NO_SCALAR_INC-NEXT:    cntd x8, all, mul #3
242; NO_SCALAR_INC-NEXT:    sub x0, x0, x8
243; NO_SCALAR_INC-NEXT:    ret
244;
245; CHECK-LABEL: decd_scalar_i64:
246; CHECK:       // %bb.0:
247; CHECK-NEXT:    decd x0, all, mul #3
248; CHECK-NEXT:    ret
249  %vscale = call i64 @llvm.vscale.i64()
250  %mul = mul i64 %vscale, 6
251  %sub = sub i64 %a, %mul
252  ret i64 %sub
253}
254
255; NOTE: As there is no need for the predicate pattern we
256; fall back to using ADDVL with its larger immediate range.
257define i32 @incb_scalar_i32(i32 %a) {
258; NO_SCALAR_INC-LABEL: incb_scalar_i32:
259; NO_SCALAR_INC:       // %bb.0:
260; NO_SCALAR_INC-NEXT:    rdvl x8, #3
261; NO_SCALAR_INC-NEXT:    add w0, w0, w8
262; NO_SCALAR_INC-NEXT:    ret
263;
264; CHECK-LABEL: incb_scalar_i32:
265; CHECK:       // %bb.0:
266; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
267; CHECK-NEXT:    addvl x0, x0, #3
268; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
269; CHECK-NEXT:    ret
270
271  %vscale = call i64 @llvm.vscale.i64()
272  %mul = mul i64 %vscale, 48
273  %vl = trunc i64 %mul to i32
274  %add = add i32 %a, %vl
275  ret i32 %add
276}
277
278define i32 @inch_scalar_i32(i32 %a) {
279; NO_SCALAR_INC-LABEL: inch_scalar_i32:
280; NO_SCALAR_INC:       // %bb.0:
281; NO_SCALAR_INC-NEXT:    cnth x8, all, mul #7
282; NO_SCALAR_INC-NEXT:    add w0, w0, w8
283; NO_SCALAR_INC-NEXT:    ret
284;
285; CHECK-LABEL: inch_scalar_i32:
286; CHECK:       // %bb.0:
287; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
288; CHECK-NEXT:    inch x0, all, mul #7
289; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
290; CHECK-NEXT:    ret
291
292  %vscale = call i64 @llvm.vscale.i64()
293  %mul = mul i64 %vscale, 56
294  %vl = trunc i64 %mul to i32
295  %add = add i32 %a, %vl
296  ret i32 %add
297}
298
299define i32 @incw_scalar_i32(i32 %a) {
300; NO_SCALAR_INC-LABEL: incw_scalar_i32:
301; NO_SCALAR_INC:       // %bb.0:
302; NO_SCALAR_INC-NEXT:    cntw x8, all, mul #7
303; NO_SCALAR_INC-NEXT:    add w0, w0, w8
304; NO_SCALAR_INC-NEXT:    ret
305;
306; CHECK-LABEL: incw_scalar_i32:
307; CHECK:       // %bb.0:
308; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
309; CHECK-NEXT:    incw x0, all, mul #7
310; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
311; CHECK-NEXT:    ret
312
313  %vscale = call i64 @llvm.vscale.i64()
314  %mul = mul i64 %vscale, 28
315  %vl = trunc i64 %mul to i32
316  %add = add i32 %a, %vl
317  ret i32 %add
318}
319
320define i32 @incd_scalar_i32(i32 %a) {
321; NO_SCALAR_INC-LABEL: incd_scalar_i32:
322; NO_SCALAR_INC:       // %bb.0:
323; NO_SCALAR_INC-NEXT:    cntd x8, all, mul #7
324; NO_SCALAR_INC-NEXT:    add w0, w0, w8
325; NO_SCALAR_INC-NEXT:    ret
326;
327; CHECK-LABEL: incd_scalar_i32:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
330; CHECK-NEXT:    incd x0, all, mul #7
331; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
332; CHECK-NEXT:    ret
333
334  %vscale = call i64 @llvm.vscale.i64()
335  %mul = mul i64 %vscale, 14
336  %vl = trunc i64 %mul to i32
337  %add = add i32 %a, %vl
338  ret i32 %add
339}
340
341; NOTE: As there is no need for the predicate pattern we
342; fall back to using ADDVL with its larger immediate range.
343define i32 @decb_scalar_i32(i32 %a) {
344; NO_SCALAR_INC-LABEL: decb_scalar_i32:
345; NO_SCALAR_INC:       // %bb.0:
346; NO_SCALAR_INC-NEXT:    cnth x8, all, mul #8
347; NO_SCALAR_INC-NEXT:    sub w0, w0, w8
348; NO_SCALAR_INC-NEXT:    ret
349;
350; CHECK-LABEL: decb_scalar_i32:
351; CHECK:       // %bb.0:
352; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
353; CHECK-NEXT:    addvl x0, x0, #-4
354; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
355; CHECK-NEXT:    ret
356
357  %vscale = call i64 @llvm.vscale.i64()
358  %mul = mul i64 %vscale, 64
359  %vl = trunc i64 %mul to i32
360  %sub = sub i32 %a, %vl
361  ret i32 %sub
362}
363
364define i32 @dech_scalar_i32(i32 %a) {
365; NO_SCALAR_INC-LABEL: dech_scalar_i32:
366; NO_SCALAR_INC:       // %bb.0:
367; NO_SCALAR_INC-NEXT:    cnth x8
368; NO_SCALAR_INC-NEXT:    sub w0, w0, w8
369; NO_SCALAR_INC-NEXT:    ret
370;
371; CHECK-LABEL: dech_scalar_i32:
372; CHECK:       // %bb.0:
373; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
374; CHECK-NEXT:    dech x0
375; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
376; CHECK-NEXT:    ret
377
378  %vscale = call i64 @llvm.vscale.i64()
379  %mul = mul i64 %vscale, 8
380  %vl = trunc i64 %mul to i32
381  %sub = sub i32 %a, %vl
382  ret i32 %sub
383}
384
385define i32 @decw_scalar_i32(i32 %a) {
386; NO_SCALAR_INC-LABEL: decw_scalar_i32:
387; NO_SCALAR_INC:       // %bb.0:
388; NO_SCALAR_INC-NEXT:    cntw x8
389; NO_SCALAR_INC-NEXT:    sub w0, w0, w8
390; NO_SCALAR_INC-NEXT:    ret
391;
392; CHECK-LABEL: decw_scalar_i32:
393; CHECK:       // %bb.0:
394; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
395; CHECK-NEXT:    decw x0
396; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
397; CHECK-NEXT:    ret
398
399  %vscale = call i64 @llvm.vscale.i64()
400  %mul = mul i64 %vscale, 4
401  %vl = trunc i64 %mul to i32
402  %sub = sub i32 %a, %vl
403  ret i32 %sub
404}
405
406define i32 @decd_scalar_i32(i32 %a) {
407; NO_SCALAR_INC-LABEL: decd_scalar_i32:
408; NO_SCALAR_INC:       // %bb.0:
409; NO_SCALAR_INC-NEXT:    cntd x8
410; NO_SCALAR_INC-NEXT:    sub w0, w0, w8
411; NO_SCALAR_INC-NEXT:    ret
412;
413; CHECK-LABEL: decd_scalar_i32:
414; CHECK:       // %bb.0:
415; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
416; CHECK-NEXT:    decd x0
417; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
418; CHECK-NEXT:    ret
419  %vscale = call i64 @llvm.vscale.i64()
420  %mul = mul i64 %vscale, 2
421  %vl = trunc i64 %mul to i32
422  %sub = sub i32 %a, %vl
423  ret i32 %sub
424}
425
426declare i16 @llvm.vscale.i16()
427declare i32 @llvm.vscale.i32()
428declare i64 @llvm.vscale.i64()
429