xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/unroll-n-jam-smlad.ll (revision eecb99c5f66c8491766628a2925587e20f3b1dbd)
1; RUN: llc -O3 -mtriple=thumbv7em -mcpu=cortex-m4 %s -o - | FileCheck %s --check-prefix=CHECK-REG-PRESSURE
2; RUN: llc -O3 -mtriple=thumbv7eb %s -o - | FileCheck %s --check-prefix=CHECK-UNSUPPORTED
3; RUN: llc -O3 -mtriple=thumbv8m.main -mattr=+dsp -arm-parallel-dsp-load-limit=20 %s -o - | FileCheck %s --check-prefix=CHECK
4
5; CHECK-UNSUPPORTED-LABEL: unroll_n_jam_smlad
6; CHECK-UNSUPPORTED-NOT: smlad r{{.}}
7
8; Test that the duplicate loads are removed, which allows parallel dsp to find
9; the parallel operations.
10
11; CHECK-LABEL: unroll_n_jam_smlad
12define void @unroll_n_jam_smlad(ptr %res, ptr %A, ptr %B, i32 %N, i32 %idx) {
13entry:
14  %xtraiter306.i = and i32 %N, 3
15  %unroll_iter310.i = sub i32 %N, %xtraiter306.i
16  %arrayidx.us.i117.i = getelementptr inbounds i32, ptr %res, i32 %idx
17  store i32 0, ptr %arrayidx.us.i117.i, align 4
18  %mul.us.i118.i = mul i32 %idx, %N
19  %inc11.us.i.i = or disjoint i32 %idx, 1
20  %arrayidx.us.i117.1.i = getelementptr inbounds i32, ptr %res, i32 %inc11.us.i.i
21  store i32 0, ptr %arrayidx.us.i117.1.i, align 4
22  %mul.us.i118.1.i = mul i32 %inc11.us.i.i, %N
23  %inc11.us.i.1.i = or disjoint i32 %idx, 2
24  %arrayidx.us.i117.2.i = getelementptr inbounds i32, ptr %res, i32 %inc11.us.i.1.i
25  store i32 0, ptr %arrayidx.us.i117.2.i, align 4
26  %mul.us.i118.2.i = mul i32 %inc11.us.i.1.i, %N
27  %inc11.us.i.2.i = or disjoint i32 %idx, 3
28  %arrayidx.us.i117.3.i = getelementptr inbounds i32, ptr %res, i32 %inc11.us.i.2.i
29  store i32 0, ptr %arrayidx.us.i117.3.i, align 4
30  %mul.us.i118.3.i = mul i32 %inc11.us.i.2.i, %N
31  %inc11.us.i.3.i = add i32 %idx, 4
32  br label %for.body
33
34; TODO: CSE, or something similar, is required to remove the duplicate loads.
35; CHECK: %for.body
36; CHECK: smlad
37; CHECK: smlad
38; CHECK-NOT: smlad r{{.*}}
39
40; CHECK-REG-PRESSURE: .LBB0_1:
41; CHECK-REG-PRESSURE-NOT: call i32 @llvm.arm.smlad
42; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
43; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
44; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
45; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
46; CHECK-REG-PRESSURE: ldr{{.*}}, [sp
47; CHECK-REG-PRESSURE-NOT: ldr{{.*}}, [sp
48; CHECK-REG-PRESSURE: bne .LBB0_1
49
50for.body:
51  %A3 = phi i32 [ %add9.us.i.3361.i, %for.body ], [ 0, %entry ]
52  %j.026.us.i.i = phi i32 [ %inc.us.i.3362.i, %for.body ], [ 0, %entry ]
53  %A4 = phi i32 [ %add9.us.i.1.3.i, %for.body ], [ 0, %entry ]
54  %A5 = phi i32 [ %add9.us.i.2.3.i, %for.body ], [ 0, %entry ]
55  %A6 = phi i32 [ %add9.us.i.3.3.i, %for.body ], [ 0, %entry ]
56  %niter335.i = phi i32 [ %niter335.nsub.3.i, %for.body ], [ %unroll_iter310.i, %entry ]
57  %add.us.i.i = add i32 %j.026.us.i.i, %mul.us.i118.i
58  %arrayidx4.us.i.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.i
59  %A7 = load i16, ptr %arrayidx4.us.i.i, align 2
60  %conv.us.i.i = sext i16 %A7 to i32
61  %arrayidx5.us.i.i = getelementptr inbounds i16, ptr %B, i32 %j.026.us.i.i
62  %A8 = load i16, ptr %arrayidx5.us.i.i, align 2
63  %conv6.us.i.i = sext i16 %A8 to i32
64  %mul7.us.i.i = mul nsw i32 %conv6.us.i.i, %conv.us.i.i
65  %add9.us.i.i = add nsw i32 %mul7.us.i.i, %A3
66  %inc.us.i.i = or disjoint i32 %j.026.us.i.i, 1
67  %add.us.i.1.i = add i32 %j.026.us.i.i, %mul.us.i118.1.i
68  %arrayidx4.us.i.1.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1.i
69  %A9 = load i16, ptr %arrayidx4.us.i.1.i, align 2
70  %conv.us.i.1.i = sext i16 %A9 to i32
71  %arrayidx5.us.i.1.i = getelementptr inbounds i16, ptr %B, i32 %j.026.us.i.i
72  %B0 = load i16, ptr %arrayidx5.us.i.1.i, align 2
73  %conv6.us.i.1.i = sext i16 %B0 to i32
74  %mul7.us.i.1.i = mul nsw i32 %conv6.us.i.1.i, %conv.us.i.1.i
75  %add9.us.i.1.i = add nsw i32 %mul7.us.i.1.i, %A4
76  %inc.us.i.1.i = or disjoint i32 %j.026.us.i.i, 1
77  %add.us.i.2.i = add i32 %j.026.us.i.i, %mul.us.i118.2.i
78  %arrayidx4.us.i.2.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2.i
79  %B1 = load i16, ptr %arrayidx4.us.i.2.i, align 2
80  %conv.us.i.2.i = sext i16 %B1 to i32
81  %arrayidx5.us.i.2.i = getelementptr inbounds i16, ptr %B, i32 %j.026.us.i.i
82  %B2 = load i16, ptr %arrayidx5.us.i.2.i, align 2
83  %conv6.us.i.2.i = sext i16 %B2 to i32
84  %mul7.us.i.2.i = mul nsw i32 %conv6.us.i.2.i, %conv.us.i.2.i
85  %add9.us.i.2.i = add nsw i32 %mul7.us.i.2.i, %A5
86  %inc.us.i.2.i = or disjoint i32 %j.026.us.i.i, 1
87  %add.us.i.3.i = add i32 %j.026.us.i.i, %mul.us.i118.3.i
88  %arrayidx4.us.i.3.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3.i
89  %B3 = load i16, ptr %arrayidx4.us.i.3.i, align 2
90  %conv.us.i.3.i = sext i16 %B3 to i32
91  %arrayidx5.us.i.3.i = getelementptr inbounds i16, ptr %B, i32 %j.026.us.i.i
92  %B4 = load i16, ptr %arrayidx5.us.i.3.i, align 2
93  %conv6.us.i.3.i = sext i16 %B4 to i32
94  %mul7.us.i.3.i = mul nsw i32 %conv6.us.i.3.i, %conv.us.i.3.i
95  %add9.us.i.3.i = add nsw i32 %mul7.us.i.3.i, %A6
96  %inc.us.i.3.i = or disjoint i32 %j.026.us.i.i, 1
97  %add.us.i.1337.i = add i32 %inc.us.i.i, %mul.us.i118.i
98  %arrayidx4.us.i.1338.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1337.i
99  %B5 = load i16, ptr %arrayidx4.us.i.1338.i, align 2
100  %conv.us.i.1339.i = sext i16 %B5 to i32
101  %arrayidx5.us.i.1340.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.i
102  %B6 = load i16, ptr %arrayidx5.us.i.1340.i, align 2
103  %conv6.us.i.1341.i = sext i16 %B6 to i32
104  %mul7.us.i.1342.i = mul nsw i32 %conv6.us.i.1341.i, %conv.us.i.1339.i
105  %add9.us.i.1343.i = add nsw i32 %mul7.us.i.1342.i, %add9.us.i.i
106  %inc.us.i.1344.i = or disjoint i32 %j.026.us.i.i, 2
107  %add.us.i.1.1.i = add i32 %inc.us.i.1.i, %mul.us.i118.1.i
108  %arrayidx4.us.i.1.1.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1.1.i
109  %B7 = load i16, ptr %arrayidx4.us.i.1.1.i, align 2
110  %conv.us.i.1.1.i = sext i16 %B7 to i32
111  %arrayidx5.us.i.1.1.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.1.i
112  %B6.dup = load i16, ptr %arrayidx5.us.i.1.1.i, align 2
113  %conv6.us.i.1.1.i = sext i16 %B6.dup to i32
114  %mul7.us.i.1.1.i = mul nsw i32 %conv6.us.i.1.1.i, %conv.us.i.1.1.i
115  %add9.us.i.1.1.i = add nsw i32 %mul7.us.i.1.1.i, %add9.us.i.1.i
116  %inc.us.i.1.1.i = or disjoint i32 %j.026.us.i.i, 2
117  %add.us.i.2.1.i = add i32 %inc.us.i.2.i, %mul.us.i118.2.i
118  %arrayidx4.us.i.2.1.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2.1.i
119  %B9 = load i16, ptr %arrayidx4.us.i.2.1.i, align 2
120  %conv.us.i.2.1.i = sext i16 %B9 to i32
121  %arrayidx5.us.i.2.1.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.2.i
122  %B6.dup.i = load i16, ptr %arrayidx5.us.i.2.1.i, align 2
123  %conv6.us.i.2.1.i = sext i16 %B6.dup.i to i32
124  %mul7.us.i.2.1.i = mul nsw i32 %conv6.us.i.2.1.i, %conv.us.i.2.1.i
125  %add9.us.i.2.1.i = add nsw i32 %mul7.us.i.2.1.i, %add9.us.i.2.i
126  %inc.us.i.2.1.i = or disjoint i32 %j.026.us.i.i, 2
127  %add.us.i.3.1.i = add i32 %inc.us.i.3.i, %mul.us.i118.3.i
128  %arrayidx4.us.i.3.1.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3.1.i
129  %B11 = load i16, ptr %arrayidx4.us.i.3.1.i, align 2
130  %conv.us.i.3.1.i = sext i16 %B11 to i32
131  %arrayidx5.us.i.3.1.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.3.i
132  %B6.dup.i.i = load i16, ptr %arrayidx5.us.i.3.1.i, align 2
133  %conv6.us.i.3.1.i = sext i16 %B6.dup.i.i to i32
134  %mul7.us.i.3.1.i = mul nsw i32 %conv6.us.i.3.1.i, %conv.us.i.3.1.i
135  %add9.us.i.3.1.i = add nsw i32 %mul7.us.i.3.1.i, %add9.us.i.3.i
136  %inc.us.i.3.1.i = or disjoint i32 %j.026.us.i.i, 2
137  %add.us.i.2346.i = add i32 %inc.us.i.1344.i, %mul.us.i118.i
138  %arrayidx4.us.i.2347.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2346.i
139  %B13 = load i16, ptr %arrayidx4.us.i.2347.i, align 2
140  %conv.us.i.2348.i = sext i16 %B13 to i32
141  %arrayidx5.us.i.2349.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.1344.i
142  %B14 = load i16, ptr %arrayidx5.us.i.2349.i, align 2
143  %conv6.us.i.2350.i = sext i16 %B14 to i32
144  %mul7.us.i.2351.i = mul nsw i32 %conv6.us.i.2350.i, %conv.us.i.2348.i
145  %add9.us.i.2352.i = add nsw i32 %mul7.us.i.2351.i, %add9.us.i.1343.i
146  %inc.us.i.2353.i = or disjoint i32 %j.026.us.i.i, 3
147  %add.us.i.1.2.i = add i32 %inc.us.i.1.1.i, %mul.us.i118.1.i
148  %arrayidx4.us.i.1.2.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1.2.i
149  %B15 = load i16, ptr %arrayidx4.us.i.1.2.i, align 2
150  %conv.us.i.1.2.i = sext i16 %B15 to i32
151  %arrayidx5.us.i.1.2.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.1.1.i
152  %B14.dup = load i16, ptr %arrayidx5.us.i.1.2.i, align 2
153  %conv6.us.i.1.2.i = sext i16 %B14.dup to i32
154  %mul7.us.i.1.2.i = mul nsw i32 %conv6.us.i.1.2.i, %conv.us.i.1.2.i
155  %add9.us.i.1.2.i = add nsw i32 %mul7.us.i.1.2.i, %add9.us.i.1.1.i
156  %inc.us.i.1.2.i = or disjoint i32 %j.026.us.i.i, 3
157  %add.us.i.2.2.i = add i32 %inc.us.i.2.1.i, %mul.us.i118.2.i
158  %arrayidx4.us.i.2.2.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2.2.i
159  %B17 = load i16, ptr %arrayidx4.us.i.2.2.i, align 2
160  %conv.us.i.2.2.i = sext i16 %B17 to i32
161  %arrayidx5.us.i.2.2.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.2.1.i
162  %B14.dup.i = load i16, ptr %arrayidx5.us.i.2.2.i, align 2
163  %conv6.us.i.2.2.i = sext i16 %B14.dup.i to i32
164  %mul7.us.i.2.2.i = mul nsw i32 %conv6.us.i.2.2.i, %conv.us.i.2.2.i
165  %add9.us.i.2.2.i = add nsw i32 %mul7.us.i.2.2.i, %add9.us.i.2.1.i
166  %inc.us.i.2.2.i = or disjoint i32 %j.026.us.i.i, 3
167  %add.us.i.3.2.i = add i32 %inc.us.i.3.1.i, %mul.us.i118.3.i
168  %arrayidx4.us.i.3.2.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3.2.i
169  %B19 = load i16, ptr %arrayidx4.us.i.3.2.i, align 2
170  %conv.us.i.3.2.i = sext i16 %B19 to i32
171  %arrayidx5.us.i.3.2.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.3.1.i
172  %B14.dup.i.i = load i16, ptr %arrayidx5.us.i.3.2.i, align 2
173  %conv6.us.i.3.2.i = sext i16 %B14.dup.i.i to i32
174  %mul7.us.i.3.2.i = mul nsw i32 %conv6.us.i.3.2.i, %conv.us.i.3.2.i
175  %add9.us.i.3.2.i = add nsw i32 %mul7.us.i.3.2.i, %add9.us.i.3.1.i
176  %inc.us.i.3.2.i = or disjoint i32 %j.026.us.i.i, 3
177  %add.us.i.3355.i = add i32 %inc.us.i.2353.i, %mul.us.i118.i
178  %arrayidx4.us.i.3356.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3355.i
179  %B21 = load i16, ptr %arrayidx4.us.i.3356.i, align 2
180  %conv.us.i.3357.i = sext i16 %B21 to i32
181  %arrayidx5.us.i.3358.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.2353.i
182  %B22 = load i16, ptr %arrayidx5.us.i.3358.i, align 2
183  %conv6.us.i.3359.i = sext i16 %B22 to i32
184  %mul7.us.i.3360.i = mul nsw i32 %conv6.us.i.3359.i, %conv.us.i.3357.i
185  %add9.us.i.3361.i = add nsw i32 %mul7.us.i.3360.i, %add9.us.i.2352.i
186  %inc.us.i.3362.i = add i32 %j.026.us.i.i, 4
187  %add.us.i.1.3.i = add i32 %inc.us.i.1.2.i, %mul.us.i118.1.i
188  %arrayidx4.us.i.1.3.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.1.3.i
189  %B23 = load i16, ptr %arrayidx4.us.i.1.3.i, align 2
190  %conv.us.i.1.3.i = sext i16 %B23 to i32
191  %arrayidx5.us.i.1.3.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.1.2.i
192  %B22.dup = load i16, ptr %arrayidx5.us.i.1.3.i, align 2
193  %conv6.us.i.1.3.i = sext i16 %B22.dup to i32
194  %mul7.us.i.1.3.i = mul nsw i32 %conv6.us.i.1.3.i, %conv.us.i.1.3.i
195  %add9.us.i.1.3.i = add nsw i32 %mul7.us.i.1.3.i, %add9.us.i.1.2.i
196  %add.us.i.2.3.i = add i32 %inc.us.i.2.2.i, %mul.us.i118.2.i
197  %arrayidx4.us.i.2.3.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.2.3.i
198  %B25 = load i16, ptr %arrayidx4.us.i.2.3.i, align 2
199  %conv.us.i.2.3.i = sext i16 %B25 to i32
200  %arrayidx5.us.i.2.3.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.2.2.i
201  %B22.dup.i = load i16, ptr %arrayidx5.us.i.2.3.i, align 2
202  %conv6.us.i.2.3.i = sext i16 %B22.dup.i to i32
203  %mul7.us.i.2.3.i = mul nsw i32 %conv6.us.i.2.3.i, %conv.us.i.2.3.i
204  %add9.us.i.2.3.i = add nsw i32 %mul7.us.i.2.3.i, %add9.us.i.2.2.i
205  %add.us.i.3.3.i = add i32 %inc.us.i.3.2.i, %mul.us.i118.3.i
206  %arrayidx4.us.i.3.3.i = getelementptr inbounds i16, ptr %A, i32 %add.us.i.3.3.i
207  %B27 = load i16, ptr %arrayidx4.us.i.3.3.i, align 2
208  %conv.us.i.3.3.i = sext i16 %B27 to i32
209  %arrayidx5.us.i.3.3.i = getelementptr inbounds i16, ptr %B, i32 %inc.us.i.3.2.i
210  %B22.dup.i.i = load i16, ptr %arrayidx5.us.i.3.3.i, align 2
211  %conv6.us.i.3.3.i = sext i16 %B22.dup.i.i to i32
212  %mul7.us.i.3.3.i = mul nsw i32 %conv6.us.i.3.3.i, %conv.us.i.3.3.i
213  %add9.us.i.3.3.i = add nsw i32 %mul7.us.i.3.3.i, %add9.us.i.3.2.i
214  %niter335.nsub.3.i = add i32 %niter335.i, -4
215  %niter335.ncmp.3.i = icmp eq i32 %niter335.nsub.3.i, 0
216  br i1 %niter335.ncmp.3.i, label %exit, label %for.body
217
218exit:
219  store i32 %add9.us.i.3361.i, ptr %res, align 4
220  %arrayidx.out.1.i = getelementptr inbounds i32, ptr %res, i32 1
221  store i32 %add9.us.i.1.3.i, ptr %arrayidx.out.1.i, align 4
222  %arrayidx.out.2.i = getelementptr inbounds i32, ptr %res, i32 2
223  store i32 %add9.us.i.2.3.i, ptr %arrayidx.out.2.i, align 4
224  %arrayidx.out.3.i = getelementptr inbounds i32, ptr %res, i32 3
225  store i32 %add9.us.i.3.3.i, ptr %arrayidx.out.3.i, align 4
226  ret void
227}
228