xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fold-vscale.ll (revision 1ee315ae7964c8433b772e0b5d667834994ba753)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4; Check that vscale call is recognised by load/store reg/reg pattern and
5; partially folded, with the rest pulled out of the loop.
6
7define void @ld1w_reg_loop(ptr %addr) {
8; CHECK-LABEL: ld1w_reg_loop:
9; CHECK:       // %bb.0: // %entry
10; CHECK-NEXT:    ptrue p0.s
11; CHECK-NEXT:    mov x8, xzr
12; CHECK-NEXT:    cntw x9
13; CHECK-NEXT:  .LBB0_1: // %vector.body
14; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
15; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
16; CHECK-NEXT:    adds x8, x8, x9
17; CHECK-NEXT:    b.ne .LBB0_1
18; CHECK-NEXT:  // %bb.2: // %for.cond.cleanup
19; CHECK-NEXT:    ret
20entry:
21  %0 = call i64 @llvm.vscale.i64()
22  %1 = shl i64 %0, 2
23  br label %vector.body
24
25vector.body:
26  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
27  %2 = getelementptr inbounds [32000 x i32], ptr %addr, i64 0, i64 %index
28  %load = load volatile <vscale x 4 x i32>, ptr %2, align 16
29  %index.next = add i64 %index, %1
30  %3 = icmp eq i64 %index.next, 0
31  br i1 %3, label %for.cond.cleanup, label %vector.body
32
33for.cond.cleanup:
34  ret void
35}
36
37define void @st1w_reg_loop(ptr %addr, <vscale x 4 x i32> %val) {
38; CHECK-LABEL: st1w_reg_loop:
39; CHECK:       // %bb.0: // %entry
40; CHECK-NEXT:    ptrue p0.s
41; CHECK-NEXT:    mov x8, xzr
42; CHECK-NEXT:    cntw x9
43; CHECK-NEXT:  .LBB1_1: // %vector.body
44; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
45; CHECK-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
46; CHECK-NEXT:    adds x8, x8, x9
47; CHECK-NEXT:    b.ne .LBB1_1
48; CHECK-NEXT:  // %bb.2: // %for.cond.cleanup
49; CHECK-NEXT:    ret
50entry:
51  %0 = call i64 @llvm.vscale.i64()
52  %1 = shl i64 %0, 2
53  br label %vector.body
54
55vector.body:
56  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
57  %2 = getelementptr inbounds [32000 x i32], ptr %addr, i64 0, i64 %index
58  store volatile <vscale x 4 x i32> %val, ptr %2, align 16
59  %index.next = add i64 %index, %1
60  %3 = icmp eq i64 %index.next, 0
61  br i1 %3, label %for.cond.cleanup, label %vector.body
62
63for.cond.cleanup:
64  ret void
65}
66
67declare i64 @llvm.vscale.i64()
68