xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-useafterloop.ll (revision 54e5de08d4beca8a079703f087a7a80eedb2dff0)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4define nonnull ptr @useafterloop(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr noundef writeonly %pDst, i32 noundef %blockSize) {
5; CHECK-LABEL: useafterloop:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    .save {r7, lr}
8; CHECK-NEXT:    push {r7, lr}
9; CHECK-NEXT:    mov.w lr, #64
10; CHECK-NEXT:    mov r3, r2
11; CHECK-NEXT:  .LBB0_1: @ %while.body
12; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
13; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
14; CHECK-NEXT:    vldrw.u32 q1, [r1], #16
15; CHECK-NEXT:    vadd.f32 q0, q1, q0
16; CHECK-NEXT:    vstrb.8 q0, [r3], #16
17; CHECK-NEXT:    le lr, .LBB0_1
18; CHECK-NEXT:  @ %bb.2: @ %while.end
19; CHECK-NEXT:    mov r0, r2
20; CHECK-NEXT:    pop {r7, pc}
21entry:
22  br label %while.body
23
24while.body:
25  %pSrcA.addr.012 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ]
26  %pSrcB.addr.011 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ]
27  %pDst.addr.010 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ]
28  %blkCnt.09 = phi i32 [ 64, %entry ], [ %dec, %while.body ]
29  %0 = load <4 x float>, ptr %pSrcA.addr.012, align 4
30  %1 = load <4 x float>, ptr %pSrcB.addr.011, align 4
31  %2 = fadd fast <4 x float> %1, %0
32  store <4 x float> %2, ptr %pDst.addr.010, align 4
33  %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.012, i32 4
34  %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.011, i32 4
35  %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.010, i32 4
36  %dec = add nsw i32 %blkCnt.09, -1
37  %cmp.not = icmp eq i32 %dec, 0
38  br i1 %cmp.not, label %while.end, label %while.body
39
40while.end:
41  ret ptr %pDst
42}
43
44
45define nonnull ptr @nouse(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr noundef writeonly %pDst, i32 noundef %blockSize) {
46; CHECK-LABEL: nouse:
47; CHECK:       @ %bb.0: @ %entry
48; CHECK-NEXT:    .save {r7, lr}
49; CHECK-NEXT:    push {r7, lr}
50; CHECK-NEXT:    mov.w lr, #64
51; CHECK-NEXT:    mov r3, r2
52; CHECK-NEXT:  .LBB1_1: @ %while.body
53; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
54; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
55; CHECK-NEXT:    vldrw.u32 q1, [r1], #16
56; CHECK-NEXT:    vadd.f32 q0, q1, q0
57; CHECK-NEXT:    vstrb.8 q0, [r3], #16
58; CHECK-NEXT:    le lr, .LBB1_1
59; CHECK-NEXT:  @ %bb.2: @ %while.end
60; CHECK-NEXT:    adds r0, r2, #4
61; CHECK-NEXT:    pop {r7, pc}
62entry:
63  br label %while.body
64
65while.body:
66  %pSrcA.addr.012 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ]
67  %pSrcB.addr.011 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ]
68  %pDst.addr.010 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ]
69  %blkCnt.09 = phi i32 [ 64, %entry ], [ %dec, %while.body ]
70  %0 = load <4 x float>, ptr %pSrcA.addr.012, align 4
71  %1 = load <4 x float>, ptr %pSrcB.addr.011, align 4
72  %2 = fadd fast <4 x float> %1, %0
73  store <4 x float> %2, ptr %pDst.addr.010, align 4
74  %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.012, i32 4
75  %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.011, i32 4
76  %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.010, i32 4
77  %dec = add nsw i32 %blkCnt.09, -1
78  %cmp.not = icmp eq i32 %dec, 0
79  br i1 %cmp.not, label %while.end, label %while.body
80
81while.end:
82  %add.ptr3 = getelementptr inbounds float, ptr %pDst, i32 1
83  ret ptr %add.ptr3
84}
85
86define nofpclass(nan inf) float @manyusesafterloop(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr nocapture noundef %pDst, i32 noundef %blockSize) {
87; CHECK-LABEL: manyusesafterloop:
88; CHECK:       @ %bb.0: @ %entry
89; CHECK-NEXT:    .save {r4, lr}
90; CHECK-NEXT:    push {r4, lr}
91; CHECK-NEXT:    mov.w lr, #64
92; CHECK-NEXT:    mov r12, r0
93; CHECK-NEXT:    mov r3, r1
94; CHECK-NEXT:    mov r4, r2
95; CHECK-NEXT:  .LBB2_1: @ %while.body
96; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
97; CHECK-NEXT:    vldrw.u32 q0, [r12], #16
98; CHECK-NEXT:    vldrw.u32 q1, [r3], #16
99; CHECK-NEXT:    vadd.f32 q0, q1, q0
100; CHECK-NEXT:    vstrb.8 q0, [r4], #16
101; CHECK-NEXT:    le lr, .LBB2_1
102; CHECK-NEXT:  @ %bb.2: @ %while.end
103; CHECK-NEXT:    vldr s0, [r2]
104; CHECK-NEXT:    vldr s2, [r0]
105; CHECK-NEXT:    vadd.f32 s0, s2, s0
106; CHECK-NEXT:    vldr s2, [r1]
107; CHECK-NEXT:    vadd.f32 s0, s0, s2
108; CHECK-NEXT:    vmov r0, s0
109; CHECK-NEXT:    pop {r4, pc}
110entry:
111  br label %while.body
112
113while.body:
114  %pSrcA.addr.016 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ]
115  %pSrcB.addr.015 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ]
116  %pDst.addr.014 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ]
117  %blkCnt.013 = phi i32 [ 64, %entry ], [ %dec, %while.body ]
118  %0 = load <4 x float>, ptr %pSrcA.addr.016, align 4
119  %1 = load <4 x float>, ptr %pSrcB.addr.015, align 4
120  %2 = fadd fast <4 x float> %1, %0
121  store <4 x float> %2, ptr %pDst.addr.014, align 4
122  %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.016, i32 4
123  %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.015, i32 4
124  %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.014, i32 4
125  %dec = add nsw i32 %blkCnt.013, -1
126  %cmp.not = icmp eq i32 %dec, 0
127  br i1 %cmp.not, label %while.end, label %while.body
128
129while.end:
130  %3 = load float, ptr %pDst, align 4
131  %4 = load float, ptr %pSrcA, align 4
132  %add = fadd fast float %4, %3
133  %5 = load float, ptr %pSrcB, align 4
134  %add5 = fadd fast float %add, %5
135  ret float %add5
136}
137
138