xref: /llvm-project/mlir/test/Dialect/SCF/loop-unroll.mlir (revision d056c756aea3fc709cf7d6bc8acabe9a8c7218db)
1// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2' | FileCheck %s --check-prefix UNROLL-BY-2
2// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3
3// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2
4// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2
5// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 annotate=true' | FileCheck %s --check-prefix UNROLL-BY-2-ANNOTATE
6// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO
7// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=5 cleanup-unroll=true' | FileCheck %s --check-prefix CLEANUP-UNROLL-BY-5
8// RUN: mlir-opt %s --affine-loop-unroll --split-input-file | FileCheck %s
9
10func.func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
11                          %arg3: memref<?xf32>) {
12  %0 = arith.constant 7.0 : f32
13  scf.for %i0 = %arg0 to %arg1 step %arg2 {
14    memref.store %0, %arg3[%i0] : memref<?xf32>
15  }
16  return
17}
18// UNROLL-BY-2-LABEL: func @dynamic_loop_unroll
19//  UNROLL-BY-2-SAME:  %[[LB:.*0]]: index,
20//  UNROLL-BY-2-SAME:  %[[UB:.*1]]: index,
21//  UNROLL-BY-2-SAME:  %[[STEP:.*2]]: index,
22//  UNROLL-BY-2-SAME:  %[[MEM:.*3]]: memref<?xf32>
23//
24//   UNROLL-BY-2-DAG:  %[[V0:.*]] = arith.subi %[[UB]], %[[LB]] : index
25//   UNROLL-BY-2-DAG:  %[[C1:.*]] = arith.constant 1 : index
26//   UNROLL-BY-2-DAG:  %[[V1:.*]] = arith.subi %[[STEP]], %[[C1]] : index
27//   UNROLL-BY-2-DAG:  %[[V2:.*]] = arith.addi %[[V0]], %[[V1]] : index
28//       Compute trip count in V3.
29//   UNROLL-BY-2-DAG:  %[[V3:.*]] = arith.divui %[[V2]], %[[STEP]] : index
30//       Store unroll factor in C2.
31//   UNROLL-BY-2-DAG:  %[[C2:.*]] = arith.constant 2 : index
32//   UNROLL-BY-2-DAG:  %[[V4:.*]] = arith.remsi %[[V3]], %[[C2]] : index
33//   UNROLL-BY-2-DAG:  %[[V5:.*]] = arith.subi %[[V3]], %[[V4]] : index
34//   UNROLL-BY-2-DAG:  %[[V6:.*]] = arith.muli %[[V5]], %[[STEP]] : index
35//       Compute upper bound of unrolled loop in V7.
36//   UNROLL-BY-2-DAG:  %[[V7:.*]] = arith.addi %[[LB]], %[[V6]] : index
37//       Compute step of unrolled loop in V8.
38//   UNROLL-BY-2-DAG:  %[[V8:.*]] = arith.muli %[[STEP]], %[[C2]] : index
39//       UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
40//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
41//  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = arith.constant 1 : index
42//  UNROLL-BY-2-NEXT:    %[[V9:.*]] = arith.muli %[[STEP]], %[[C1_IV]] : index
43//  UNROLL-BY-2-NEXT:    %[[V10:.*]] = arith.addi %[[IV]], %[[V9]] : index
44//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
45//  UNROLL-BY-2-NEXT:  }
46//  UNROLL-BY-2-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
47//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
48//  UNROLL-BY-2-NEXT:  }
49//  UNROLL-BY-2-NEXT:  return
50
51// UNROLL-BY-3-LABEL: func @dynamic_loop_unroll
52//  UNROLL-BY-3-SAME:  %[[LB:.*0]]: index,
53//  UNROLL-BY-3-SAME:  %[[UB:.*1]]: index,
54//  UNROLL-BY-3-SAME:  %[[STEP:.*2]]: index,
55//  UNROLL-BY-3-SAME:  %[[MEM:.*3]]: memref<?xf32>
56//
57//   UNROLL-BY-3-DAG:  %[[V0:.*]] = arith.subi %[[UB]], %[[LB]] : index
58//   UNROLL-BY-3-DAG:  %[[C1:.*]] = arith.constant 1 : index
59//   UNROLL-BY-3-DAG:  %[[V1:.*]] = arith.subi %[[STEP]], %[[C1]] : index
60//   UNROLL-BY-3-DAG:  %[[V2:.*]] = arith.addi %[[V0]], %[[V1]] : index
61//       Compute trip count in V3.
62//   UNROLL-BY-3-DAG:  %[[V3:.*]] = arith.divui %[[V2]], %[[STEP]] : index
63//       Store unroll factor in C3.
64//   UNROLL-BY-3-DAG:  %[[C3:.*]] = arith.constant 3 : index
65//   UNROLL-BY-3-DAG:  %[[V4:.*]] = arith.remsi %[[V3]], %[[C3]] : index
66//   UNROLL-BY-3-DAG:  %[[V5:.*]] = arith.subi %[[V3]], %[[V4]] : index
67//   UNROLL-BY-3-DAG:  %[[V6:.*]] = arith.muli %[[V5]], %[[STEP]] : index
68//       Compute upper bound of unrolled loop in V7.
69//   UNROLL-BY-3-DAG:  %[[V7:.*]] = arith.addi %[[LB]], %[[V6]] : index
70//       Compute step of unrolled loop in V8.
71//   UNROLL-BY-3-DAG:  %[[V8:.*]] = arith.muli %[[STEP]], %[[C3]] : index
72//       UNROLL-BY-3:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
73//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
74//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = arith.constant 1 : index
75//  UNROLL-BY-3-NEXT:    %[[V9:.*]] = arith.muli %[[STEP]], %[[C1_IV]] : index
76//  UNROLL-BY-3-NEXT:    %[[V10:.*]] = arith.addi %[[IV]], %[[V9]] : index
77//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
78//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = arith.constant 2 : index
79//  UNROLL-BY-3-NEXT:    %[[V11:.*]] = arith.muli %[[STEP]], %[[C2_IV]] : index
80//  UNROLL-BY-3-NEXT:    %[[V12:.*]] = arith.addi %[[IV]], %[[V11]] : index
81//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32>
82//  UNROLL-BY-3-NEXT:  }
83//  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
84//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
85//  UNROLL-BY-3-NEXT:  }
86//  UNROLL-BY-3-NEXT:  return
87
88func.func @dynamic_loop_unroll_outer_by_2(
89  %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
90  %arg5 : index, %arg6: memref<?xf32>) {
91  %0 = arith.constant 7.0 : f32
92  scf.for %i0 = %arg0 to %arg1 step %arg2 {
93    scf.for %i1 = %arg3 to %arg4 step %arg5 {
94     memref.store %0, %arg6[%i1] : memref<?xf32>
95    }
96  }
97  return
98}
99// UNROLL-OUTER-BY-2-LABEL: func @dynamic_loop_unroll_outer_by_2
100//  UNROLL-OUTER-BY-2-SAME:  %[[LB0:.*0]]: index,
101//  UNROLL-OUTER-BY-2-SAME:  %[[UB0:.*1]]: index,
102//  UNROLL-OUTER-BY-2-SAME:  %[[STEP0:.*2]]: index,
103//  UNROLL-OUTER-BY-2-SAME:  %[[LB1:.*3]]: index,
104//  UNROLL-OUTER-BY-2-SAME:  %[[UB1:.*4]]: index,
105//  UNROLL-OUTER-BY-2-SAME:  %[[STEP1:.*5]]: index,
106//  UNROLL-OUTER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
107//
108//       UNROLL-OUTER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
109//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
110//  UNROLL-OUTER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
111//  UNROLL-OUTER-BY-2-NEXT:    }
112//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
113//  UNROLL-OUTER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
114//  UNROLL-OUTER-BY-2-NEXT:    }
115//  UNROLL-OUTER-BY-2-NEXT:  }
116//  UNROLL-OUTER-BY-2-NEXT:  scf.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
117//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
118//  UNROLL-OUTER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
119//  UNROLL-OUTER-BY-2-NEXT:    }
120//  UNROLL-OUTER-BY-2-NEXT:  }
121//  UNROLL-OUTER-BY-2-NEXT:  return
122
123func.func @dynamic_loop_unroll_inner_by_2(
124  %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
125  %arg5 : index, %arg6: memref<?xf32>) {
126  %0 = arith.constant 7.0 : f32
127  scf.for %i0 = %arg0 to %arg1 step %arg2 {
128    scf.for %i1 = %arg3 to %arg4 step %arg5 {
129     memref.store %0, %arg6[%i1] : memref<?xf32>
130    }
131  }
132  return
133}
134// UNROLL-INNER-BY-2-LABEL: func @dynamic_loop_unroll_inner_by_2
135//  UNROLL-INNER-BY-2-SAME:  %[[LB0:.*0]]: index,
136//  UNROLL-INNER-BY-2-SAME:  %[[UB0:.*1]]: index,
137//  UNROLL-INNER-BY-2-SAME:  %[[STEP0:.*2]]: index,
138//  UNROLL-INNER-BY-2-SAME:  %[[LB1:.*3]]: index,
139//  UNROLL-INNER-BY-2-SAME:  %[[UB1:.*4]]: index,
140//  UNROLL-INNER-BY-2-SAME:  %[[STEP1:.*5]]: index,
141//  UNROLL-INNER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
142//
143//       UNROLL-INNER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
144//       UNROLL-INNER-BY-2:    scf.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
145//  UNROLL-INNER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
146//  UNROLL-INNER-BY-2-NEXT:      %[[C1_IV:.*]] = arith.constant 1 : index
147//  UNROLL-INNER-BY-2-NEXT:      %[[V0:.*]] = arith.muli %[[STEP1]], %[[C1_IV]] : index
148//  UNROLL-INNER-BY-2-NEXT:      %[[V1:.*]] = arith.addi %[[IV1]], %[[V0]] : index
149//  UNROLL-INNER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
150//  UNROLL-INNER-BY-2-NEXT:    }
151//  UNROLL-INNER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
152//  UNROLL-INNER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
153//  UNROLL-INNER-BY-2-NEXT:    }
154//  UNROLL-INNER-BY-2-NEXT:  }
155//  UNROLL-INNER-BY-2-NEXT:  return
156
157// Test that no epilogue clean-up loop is generated because the trip count is
158// a multiple of the unroll factor.
159func.func @static_loop_unroll_by_2(%arg0 : memref<?xf32>) {
160  %0 = arith.constant 7.0 : f32
161  %lb = arith.constant 0 : index
162  %ub = arith.constant 20 : index
163  %step = arith.constant 1 : index
164  scf.for %i0 = %lb to %ub step %step {
165    memref.store %0, %arg0[%i0] : memref<?xf32>
166  }
167  return
168}
169// UNROLL-BY-2-LABEL: func @static_loop_unroll_by_2
170//  UNROLL-BY-2-SAME:  %[[MEM:.*0]]: memref<?xf32>
171//
172//   UNROLL-BY-2-DAG:  %[[C0:.*]] = arith.constant 0 : index
173//   UNROLL-BY-2-DAG:  %[[C1:.*]] = arith.constant 1 : index
174//   UNROLL-BY-2-DAG:  %[[C20:.*]] = arith.constant 20 : index
175//   UNROLL-BY-2-DAG:  %[[C2:.*]] = arith.constant 2 : index
176//   UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
177//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
178//  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = arith.constant 1 : index
179//  UNROLL-BY-2-NEXT:    %[[V0:.*]] = arith.muli %[[C1]], %[[C1_IV]] : index
180//  UNROLL-BY-2-NEXT:    %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index
181//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
182//  UNROLL-BY-2-NEXT:  }
183//  UNROLL-BY-2-NEXT:  return
184
185// UNROLL-BY-2-ANNOTATE-LABEL: func @static_loop_unroll_by_2
186// UNROLL-BY-2-ANNOTATE:    memref.store %{{.*}}, %[[MEM:.*0]][%{{.*}}] {unrolled_iteration = 0 : ui32} : memref<?xf32>
187// UNROLL-BY-2-ANNOTATE:    memref.store %{{.*}}, %[[MEM]][%{{.*}}] {unrolled_iteration = 1 : ui32} : memref<?xf32>
188
189// Test that no epilogue clean-up loop is generated because the trip count
190// (taking into account the non-unit step size) is a multiple of the unroll
191// factor.
192func.func @static_loop_step_2_unroll_by_2(%arg0 : memref<?xf32>) {
193  %0 = arith.constant 7.0 : f32
194  %lb = arith.constant 0 : index
195  %ub = arith.constant 19 : index
196  %step = arith.constant 2 : index
197  scf.for %i0 = %lb to %ub step %step {
198    memref.store %0, %arg0[%i0] : memref<?xf32>
199  }
200  return
201}
202
203// UNROLL-BY-2-LABEL: func @static_loop_step_2_unroll_by_2
204//  UNROLL-BY-2-SAME:  %[[MEM:.*0]]: memref<?xf32>
205//
206//   UNROLL-BY-2-DAG:  %[[C0:.*]] = arith.constant 0 : index
207//   UNROLL-BY-2-DAG:  %[[C2:.*]] = arith.constant 2 : index
208//   UNROLL-BY-2-DAG:  %[[C19:.*]] = arith.constant 19 : index
209//   UNROLL-BY-2-DAG:  %[[C4:.*]] = arith.constant 4 : index
210//   UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[C0]] to %[[C19]] step %[[C4]] {
211//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
212//  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = arith.constant 1 : index
213//  UNROLL-BY-2-NEXT:    %[[V0:.*]] = arith.muli %[[C2]], %[[C1_IV]] : index
214//  UNROLL-BY-2-NEXT:    %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index
215//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
216//  UNROLL-BY-2-NEXT:  }
217//  UNROLL-BY-2-NEXT:  return
218
219// Test that epilogue clean up loop is generated (trip count is not
220// a multiple of unroll factor).
221func.func @static_loop_unroll_by_3(%arg0 : memref<?xf32>) {
222  %0 = arith.constant 7.0 : f32
223  %lb = arith.constant 0 : index
224  %ub = arith.constant 20 : index
225  %step = arith.constant 1 : index
226  scf.for %i0 = %lb to %ub step %step {
227    memref.store %0, %arg0[%i0] : memref<?xf32>
228  }
229  return
230}
231
232// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3
233//  UNROLL-BY-3-SAME:  %[[MEM:.*0]]: memref<?xf32>
234//
235//   UNROLL-BY-3-DAG:  %[[C0:.*]] = arith.constant 0 : index
236//   UNROLL-BY-3-DAG:  %[[C1:.*]] = arith.constant 1 : index
237//   UNROLL-BY-3-DAG:  %[[C20:.*]] = arith.constant 20 : index
238//   UNROLL-BY-3-DAG:  %[[C18:.*]] = arith.constant 18 : index
239//   UNROLL-BY-3-DAG:  %[[C3:.*]] = arith.constant 3 : index
240//       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
241//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
242//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = arith.constant 1 : index
243//  UNROLL-BY-3-NEXT:    %[[V0:.*]] = arith.muli %[[C1]], %[[C1_IV]] : index
244//  UNROLL-BY-3-NEXT:    %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index
245//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
246//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = arith.constant 2 : index
247//  UNROLL-BY-3-NEXT:    %[[V2:.*]] = arith.muli %[[C1]], %[[C2_IV]] : index
248//  UNROLL-BY-3-NEXT:    %[[V3:.*]] = arith.addi %[[IV]], %[[V2]] : index
249//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
250//  UNROLL-BY-3-NEXT:  }
251//  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
252//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
253//  UNROLL-BY-3-NEXT:  }
254//  UNROLL-BY-3-NEXT:  return
255
256// Test that the single iteration epilogue loop body is promoted to the loops
257// containing block.
258func.func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref<?xf32>) {
259  %0 = arith.constant 7.0 : f32
260  %lb = arith.constant 0 : index
261  %ub = arith.constant 10 : index
262  %step = arith.constant 1 : index
263  scf.for %i0 = %lb to %ub step %step {
264    memref.store %0, %arg0[%i0] : memref<?xf32>
265  }
266  return
267}
268// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_promote_epilogue
269//  UNROLL-BY-3-SAME:  %[[MEM:.*0]]: memref<?xf32>
270//
271//   UNROLL-BY-3-DAG:  %[[C0:.*]] = arith.constant 0 : index
272//   UNROLL-BY-3-DAG:  %[[C1:.*]] = arith.constant 1 : index
273//   UNROLL-BY-3-DAG:  %[[C10:.*]] = arith.constant 10 : index
274//   UNROLL-BY-3-DAG:  %[[C9:.*]] = arith.constant 9 : index
275//   UNROLL-BY-3-DAG:  %[[C3:.*]] = arith.constant 3 : index
276//       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
277//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
278//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = arith.constant 1 : index
279//  UNROLL-BY-3-NEXT:    %[[V0:.*]] = arith.muli %[[C1]], %[[C1_IV]] : index
280//  UNROLL-BY-3-NEXT:    %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index
281//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
282//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = arith.constant 2 : index
283//  UNROLL-BY-3-NEXT:    %[[V2:.*]] = arith.muli %[[C1]], %[[C2_IV]] : index
284//  UNROLL-BY-3-NEXT:    %[[V3:.*]] = arith.addi %[[IV]], %[[V2]] : index
285//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
286//  UNROLL-BY-3-NEXT:  }
287//  UNROLL-BY-3-NEXT:  memref.store %{{.*}}, %[[MEM]][%[[C9]]] : memref<?xf32>
288//  UNROLL-BY-3-NEXT:  return
289
290// Test unroll-up-to functionality.
291func.func @static_loop_unroll_up_to_factor(%arg0 : memref<?xf32>) {
292  %0 = arith.constant 7.0 : f32
293  %lb = arith.constant 0 : index
294  %ub = arith.constant 2 : index
295  affine.for %i0 = %lb to %ub {
296    affine.store %0, %arg0[%i0] : memref<?xf32>
297  }
298  return
299}
300// UNROLL-UP-TO-LABEL: func @static_loop_unroll_up_to_factor
301//  UNROLL-UP-TO-SAME:  %[[MEM:.*0]]: memref<?xf32>
302//
303//   UNROLL-UP-TO-DAG:  %[[C0:.*]] = arith.constant 0 : index
304//   UNROLL-UP-TO-DAG:  %[[C2:.*]] = arith.constant 2 : index
305//   UNROLL-UP-TO-NEXT: %[[V0:.*]] = affine.apply {{.*}}
306//   UNROLL-UP-TO-NEXT: affine.store %{{.*}}, %[[MEM]][%[[V0]]] : memref<?xf32>
307//   UNROLL-UP-TO-NEXT: %[[V1:.*]] = affine.apply {{.*}}
308//   UNROLL-UP-TO-NEXT: affine.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
309//   UNROLL-UP-TO-NEXT: return
310
311// Test that epilogue's arguments are correctly renamed.
312func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
313  %0 = arith.constant 7.0 : f32
314  %lb = arith.constant 0 : index
315  %ub = arith.constant 20 : index
316  %step = arith.constant 1 : index
317  %result:2 = scf.for %i0 = %lb to %ub step %step iter_args(%arg0 = %0, %arg1 = %0) -> (f32, f32) {
318    %add = arith.addf %arg0, %arg1 : f32
319    %mul = arith.mulf %arg0, %arg1 : f32
320    scf.yield %add, %mul : f32, f32
321  }
322  return %result#0, %result#1 : f32, f32
323}
324// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_rename_epilogue_arguments
325//
326//   UNROLL-BY-3-DAG:   %[[CST:.*]] = arith.constant {{.*}} : f32
327//   UNROLL-BY-3-DAG:   %[[C0:.*]] = arith.constant 0 : index
328//   UNROLL-BY-3-DAG:   %[[C1:.*]] = arith.constant 1 : index
329//   UNROLL-BY-3-DAG:   %[[C20:.*]] = arith.constant 20 : index
330//   UNROLL-BY-3-DAG:   %[[C18:.*]] = arith.constant 18 : index
331//   UNROLL-BY-3-DAG:   %[[C3:.*]] = arith.constant 3 : index
332//       UNROLL-BY-3:   %[[FOR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]]
333//  UNROLL-BY-3-SAME:     iter_args(%[[ARG0:.*]] = %[[CST]], %[[ARG1:.*]] = %[[CST]]) -> (f32, f32) {
334//  UNROLL-BY-3-NEXT:     %[[ADD0:.*]] = arith.addf %[[ARG0]], %[[ARG1]] : f32
335//  UNROLL-BY-3-NEXT:     %[[MUL0:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32
336//  UNROLL-BY-3-NEXT:     %[[ADD1:.*]] = arith.addf %[[ADD0]], %[[MUL0]] : f32
337//  UNROLL-BY-3-NEXT:     %[[MUL1:.*]] = arith.mulf %[[ADD0]], %[[MUL0]] : f32
338//  UNROLL-BY-3-NEXT:     %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[MUL1]] : f32
339//  UNROLL-BY-3-NEXT:     %[[MUL2:.*]] = arith.mulf %[[ADD1]], %[[MUL1]] : f32
340//  UNROLL-BY-3-NEXT:     scf.yield %[[ADD2]], %[[MUL2]] : f32, f32
341//  UNROLL-BY-3-NEXT:   }
342//       UNROLL-BY-3:   %[[EFOR:.*]]:2 = scf.for %[[EIV:.*]] = %[[C18]] to %[[C20]] step %[[C1]]
343//  UNROLL-BY-3-SAME:     iter_args(%[[EARG0:.*]] = %[[FOR]]#0, %[[EARG1:.*]] = %[[FOR]]#1) -> (f32, f32) {
344//  UNROLL-BY-3-NEXT:     %[[EADD:.*]] = arith.addf %[[EARG0]], %[[EARG1]] : f32
345//  UNROLL-BY-3-NEXT:     %[[EMUL:.*]] = arith.mulf %[[EARG0]], %[[EARG1]] : f32
346//  UNROLL-BY-3-NEXT:     scf.yield %[[EADD]], %[[EMUL]] : f32, f32
347//  UNROLL-BY-3-NEXT:   }
348//  UNROLL-BY-3-NEXT:   return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32
349
350// Test that epilogue clean up loop is generated (trip count is less
351// than an unroll factor).
352func.func @static_loop_unroll_by_5_with_cleanup(%arg0 : memref<?xf32>) {
353  %0 = arith.constant 7.0 : f32
354  %lb = arith.constant 0 : index
355  %ub = arith.constant 3 : index
356  affine.for %i0 = %lb to %ub {
357    memref.store %0, %arg0[%i0] : memref<?xf32>
358  }
359  return
360}
361
362// CLEANUP-UNROLL-BY-5-LABEL: func @static_loop_unroll_by_5_with_cleanup
363//  CLEANUP-UNROLL-BY-5-SAME:  %[[MEM:.*0]]: memref<?xf32>
364//
365//   CLEANUP-UNROLL-BY-5-DAG:  %[[C0:.*]] = arith.constant 0 : index
366//   CLEANUP-UNROLL-BY-5-DAG:  %[[C3:.*]] = arith.constant 3 : index
367//   CLEANUP-UNROLL-BY-5-NEXT: %[[V0:.*]] = affine.apply {{.*}}
368//   CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V0]]] : memref<?xf32>
369//   CLEANUP-UNROLL-BY-5-NEXT: %[[V1:.*]] = affine.apply {{.*}}
370//   CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
371//   CLEANUP-UNROLL-BY-5-NEXT: %[[V2:.*]] = affine.apply {{.*}}
372//   CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V2]]] : memref<?xf32>
373//   CLEANUP-UNROLL-BY-5-NEXT: return
374
375// -----
376
377// Test loop unrolling when the yielded value remains unchanged.
378// CHECK: [[$MAP:#map]] = affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>
379// CHECK-LABEL: func @loop_unroll_static_yield_value
380func.func @loop_unroll_static_yield_value_test1() {
381  %true_4 = arith.constant true
382  %c1 = arith.constant 1 : index
383  %103 = affine.for %arg2 = 0 to 40 iter_args(%arg3 = %true_4) -> (i1) {
384    %324 = affine.max affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>(%c1)
385    affine.yield %true_4 : i1
386  }
387  return
388}
389// CHECK:         %[[TRUE:.*]] = arith.constant true
390// CHECK-NEXT:    %[[C1:.*]] = arith.constant 1 : index
391// CHECK-NEXT:    affine.for %{{.*}} = 0 to 40 step 4 iter_args(%{{.*}} = %[[TRUE]]) -> (i1) {
392// CHECK-NEXT:      affine.max [[$MAP]](%[[C1]])
393// CHECK-NEXT:      affine.max [[$MAP]](%[[C1]])
394// CHECK-NEXT:      affine.max [[$MAP]](%[[C1]])
395// CHECK-NEXT:      affine.max [[$MAP]](%[[C1]])
396// CHECK-NEXT:      affine.yield %[[TRUE]] : i1
397// CHECK-NEXT:    }
398// CHECK-NEXT:    return
399
400// -----
401
402// Loop unrolling when the yielded value is loop iv.
403// CHECK: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>
404// CHECK: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)>
405// CHECK: [[$MAP2:#map[0-9]*]] = affine_map<(d0) -> (d0 + 4)>
406// CHECK: [[$MAP3:#map[0-9]*]] = affine_map<(d0) -> (d0 + 6)>
407// CHECK-LABEL: func @loop_unroll_yield_loop_iv
408func.func @loop_unroll_yield_loop_iv() {
409  %c1 = arith.constant 1 : index
410  %103 = affine.for %arg2 = 0 to 40 step 2 iter_args(%arg3 = %c1) -> (index) {
411    %324 = affine.max affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>(%arg2)
412    affine.yield %arg2 : index
413  }
414  return
415}
416// CHECK:         %[[C1:.*]] = arith.constant 1 : index
417// CHECK-NEXT:    affine.for %[[LOOP_IV:.*]] = 0 to 40 step 8 iter_args(%{{.*}} = %[[C1]]) -> (index) {
418// CHECK-NEXT:    affine.max [[$MAP0]](%[[LOOP_IV]])
419// CHECK-NEXT:    %[[LOOP_IV_PLUS_2:.*]] = affine.apply [[$MAP1]](%[[LOOP_IV]])
420// CHECK-NEXT:    affine.max [[$MAP0]](%[[LOOP_IV_PLUS_2]])
421// CHECK-NEXT:    %[[LOOP_IV_PLUS_4:.*]] = affine.apply [[$MAP2]](%[[LOOP_IV]])
422// CHECK-NEXT:    affine.max [[$MAP0]](%[[LOOP_IV_PLUS_4]])
423// CHECK-NEXT:    %[[LOOP_IV_PLUS_6:.*]] = affine.apply [[$MAP3]](%[[LOOP_IV]])
424// CHECK-NEXT:    affine.max [[$MAP0]](%[[LOOP_IV_PLUS_6]])
425// CHECK-NEXT:    affine.yield %[[LOOP_IV]] : index
426// CHECK-NEXT:  }
427// CHECK-NEXT:  return
428
429// -----
430
431// Loop unrolling when the yielded value is iter_arg.
432// CHECK: [[$MAP:#map]] = affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>
433// CHECK-LABEL: func @loop_unroll_yield_iter_arg
434func.func @loop_unroll_yield_iter_arg() {
435  %c1 = arith.constant 1 : index
436  %103 = affine.for %arg2 = 0 to 40 step 2 iter_args(%arg3 = %c1) -> (index) {
437    %324 = affine.max affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>(%arg3)
438    affine.yield %arg3 : index
439  }
440  return
441}
442// CHECK:         %[[C1:.*]] = arith.constant 1 : index
443// CHECK-NEXT:    affine.for %{{.*}} = 0 to 40 step 8 iter_args(%[[ITER_ARG:.*]] = %[[C1]]) -> (index) {
444// CHECK-NEXT:      affine.max [[$MAP]](%[[ITER_ARG]])
445// CHECK-NEXT:      affine.max [[$MAP]](%[[ITER_ARG]])
446// CHECK-NEXT:      affine.max [[$MAP]](%[[ITER_ARG]])
447// CHECK-NEXT:      affine.max [[$MAP]](%[[ITER_ARG]])
448// CHECK-NEXT:      affine.yield %[[ITER_ARG]] : index
449// CHECK-NEXT:    }
450// CHECK-NEXT:    return
451
452// -----
453
454// Test the loop unroller works with integer IV type.
455func.func @static_loop_unroll_with_integer_iv() -> (f32, f32) {
456  %0 = arith.constant 7.0 : f32
457  %lb = arith.constant 0 : i32
458  %ub = arith.constant 20 : i32
459  %step = arith.constant 1 : i32
460  %result:2 = scf.for %i0 = %lb to %ub step %step iter_args(%arg0 = %0, %arg1 = %0) -> (f32, f32) : i32{
461    %add = arith.addf %arg0, %arg1 : f32
462    %mul = arith.mulf %arg0, %arg1 : f32
463    scf.yield %add, %mul : f32, f32
464  }
465  return %result#0, %result#1 : f32, f32
466}
467// UNROLL-BY-3-LABEL: func @static_loop_unroll_with_integer_iv
468//
469//   UNROLL-BY-3-DAG:   %[[CST:.*]] = arith.constant {{.*}} : f32
470//   UNROLL-BY-3-DAG:   %[[C0:.*]] = arith.constant 0 : i32
471//   UNROLL-BY-3-DAG:   %[[C1:.*]] = arith.constant 1 : i32
472//   UNROLL-BY-3-DAG:   %[[C20:.*]] = arith.constant 20 : i32
473//   UNROLL-BY-3-DAG:   %[[C18:.*]] = arith.constant 18 : i32
474//   UNROLL-BY-3-DAG:   %[[C3:.*]] = arith.constant 3 : i32
475//       UNROLL-BY-3:   %[[FOR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]]
476//  UNROLL-BY-3-SAME:     iter_args(%[[ARG0:.*]] = %[[CST]], %[[ARG1:.*]] = %[[CST]]) -> (f32, f32)  : i32 {
477//  UNROLL-BY-3-NEXT:     %[[ADD0:.*]] = arith.addf %[[ARG0]], %[[ARG1]] : f32
478//  UNROLL-BY-3-NEXT:     %[[MUL0:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32
479//  UNROLL-BY-3-NEXT:     %[[ADD1:.*]] = arith.addf %[[ADD0]], %[[MUL0]] : f32
480//  UNROLL-BY-3-NEXT:     %[[MUL1:.*]] = arith.mulf %[[ADD0]], %[[MUL0]] : f32
481//  UNROLL-BY-3-NEXT:     %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[MUL1]] : f32
482//  UNROLL-BY-3-NEXT:     %[[MUL2:.*]] = arith.mulf %[[ADD1]], %[[MUL1]] : f32
483//  UNROLL-BY-3-NEXT:     scf.yield %[[ADD2]], %[[MUL2]] : f32, f32
484//  UNROLL-BY-3-NEXT:   }
485//       UNROLL-BY-3:   %[[EFOR:.*]]:2 = scf.for %[[EIV:.*]] = %[[C18]] to %[[C20]] step %[[C1]]
486//  UNROLL-BY-3-SAME:     iter_args(%[[EARG0:.*]] = %[[FOR]]#0, %[[EARG1:.*]] = %[[FOR]]#1) -> (f32, f32)  : i32 {
487//  UNROLL-BY-3-NEXT:     %[[EADD:.*]] = arith.addf %[[EARG0]], %[[EARG1]] : f32
488//  UNROLL-BY-3-NEXT:     %[[EMUL:.*]] = arith.mulf %[[EARG0]], %[[EARG1]] : f32
489//  UNROLL-BY-3-NEXT:     scf.yield %[[EADD]], %[[EMUL]] : f32, f32
490//  UNROLL-BY-3-NEXT:   }
491//  UNROLL-BY-3-NEXT:   return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32
492
493// -----
494
495// Test loop unrolling when the yielded value is defined above the loop.
496func.func @loop_unroll_static_yield_value_defined_above(%init: i32) {
497  %c42 = arith.constant 42 : i32
498  %c0 = arith.constant 0 : index
499  %c1 = arith.constant 1 : index
500  %c4 = arith.constant 4 : index
501  %103:2 = scf.for %i = %c0 to %c4 step %c1
502      iter_args(%iter1 = %c42, %iter2 = %init) -> (i32, i32) {
503    %0 = arith.andi %iter2, %iter1 : i32
504    scf.yield %0, %init : i32, i32
505  }
506  return
507}
508// UNROLL-OUTER-BY-2-LABEL: @loop_unroll_static_yield_value_defined_above(
509// UNROLL-OUTER-BY-2-SAME:    %[[INIT:.*]]: i32) {
510// UNROLL-OUTER-BY-2-DAG:   %[[C42:.*]] = arith.constant 42 : i32
511// UNROLL-OUTER-BY-2-DAG:   %[[C0:.*]] = arith.constant 0 : index
512// UNROLL-OUTER-BY-2-DAG:   %[[C4:.*]] = arith.constant 4 : index
513// UNROLL-OUTER-BY-2-DAG:   %[[C2:.*]] = arith.constant 2 : index
514// UNROLL-OUTER-BY-2:       scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C2]]
515// UNROLL-OUTER-BY-2-SAME:      iter_args(%[[ITER1:.*]] = %[[C42]],
516// UNROLL-OUTER-BY-2-SAME:                %[[ITER2:.*]] = %[[INIT]])
517// UNROLL-OUTER-BY-2:         %[[SUM:.*]] = arith.andi %[[ITER2]], %[[ITER1]]
518// UNROLL-OUTER-BY-2:         %[[SUM1:.*]] = arith.andi %[[INIT]], %[[SUM]]
519// UNROLL-OUTER-BY-2:         scf.yield %[[SUM1]], %[[INIT]] : i32, i32
520
521