xref: /llvm-project/mlir/test/Dialect/Affine/unroll-jam.mlir (revision c8496d292e0ae4ba60b3905de884c83d66387e7e)
1// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=2" | FileCheck %s
2// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=4" | FileCheck --check-prefix=UJAM-FOUR %s
3
4// CHECK-DAG: [[$MAP_PLUS_1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)>
5// CHECK-DAG: [[$MAP_DIV_OFFSET:#map[0-9]*]] = affine_map<()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)>
6// CHECK-DAG: [[$MAP_SYM_UB:#map[0-9]*]] = affine_map<()[s0, s1] -> (s0, s1, 1024)>
7
8// UJAM-FOUR-DAG: [[$UBMAP:#map[0-9]*]] = affine_map<()[s0] -> (s0 + 8)>
9// UJAM-FOUR-DAG: [[$MAP_PLUS_1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)>
10// UJAM-FOUR-DAG: [[$MAP_PLUS_2:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)>
11// UJAM-FOUR-DAG: [[$MAP_PLUS_3:#map[0-9]*]] = affine_map<(d0) -> (d0 + 3)>
12
13// CHECK-LABEL: func @unroll_jam_imperfect_nest() {
14func.func @unroll_jam_imperfect_nest() {
15  affine.for %i = 0 to 101 {
16    %x = "addi32"(%i, %i) : (index, index) -> i32
17    affine.for %j = 0 to 17 {
18      %y = "addi32"(%i, %i) : (index, index) -> i32
19      %z = "addi32"(%y, %y) : (i32, i32) -> i32
20    }
21    %w = "foo"(%i, %x) : (index, i32) -> i32
22  }
23  return
24}
25// CHECK:      affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 {
26// CHECK-NEXT:   [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]])
27// CHECK-NEXT:   [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
28// CHECK-NEXT:   [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]])
29// CHECK-NEXT:   affine.for %{{.*}} = 0 to 17 {
30// CHECK-NEXT:     [[RES3:%[0-9]+]] = "addi32"([[IV0]], [[IV0]])
31// CHECK-NEXT:     "addi32"([[RES3]], [[RES3]]) : (i32, i32) -> i32
32// CHECK-NEXT:     [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
33// CHECK-NEXT:     [[RES4:%[0-9]+]] = "addi32"([[INC1]], [[INC1]])
34// CHECK-NEXT:     "addi32"([[RES4]], [[RES4]]) : (i32, i32) -> i32
35// CHECK-NEXT:   }
36// CHECK:        "foo"([[IV0]], [[RES1]])
37// CHECK-NEXT:   affine.apply [[$MAP_PLUS_1]]([[IV0]])
38// CHECK-NEXT:   "foo"({{.*}}, [[RES2]])
39// CHECK:      }
40// Cleanup loop (single iteration).
41// CHECK:      "addi32"(%c100, %c100)
42// CHECK-NEXT: affine.for [[IV0]] = 0 to 17 {
43// CHECK-NEXT:   [[RESC:%[0-9]+]] = "addi32"(%c100, %c100)
44// CHECK-NEXT:   "addi32"([[RESC]], [[RESC]]) : (i32, i32) -> i32
45// CHECK-NEXT: }
46// CHECK-NEXT: "foo"(%c100, %{{.*}})
47// CHECK-NEXT: return
48
49// CHECK-LABEL: func @loop_nest_unknown_count_1
50// CHECK-SAME: [[N:arg[0-9]+]]: index
51func.func @loop_nest_unknown_count_1(%N : index) {
52  // CHECK-NEXT: affine.for %{{.*}} = 1 to [[$MAP_DIV_OFFSET]]()[%[[N]]] step 2 {
53  // CHECK-NEXT:   affine.for %{{.*}} = 1 to 100 {
54  // CHECK-NEXT:     "foo"() : () -> i32
55  // CHECK-NEXT:     "foo"() : () -> i32
56  // CHECK-NEXT:   }
57  // CHECK-NEXT: }
58  // A cleanup loop should be generated here.
59  // CHECK-NEXT: affine.for %{{.*}} = [[$MAP_DIV_OFFSET]]()[%[[N]]] to %[[N]] {
60  // CHECK-NEXT:   affine.for %{{.*}} = 1 to 100 {
61  // CHECK-NEXT:     "foo"() : () -> i32
62  // CHECK-NEXT:   }
63  // CHECK-NEXT: }
64  affine.for %i = 1 to %N {
65    affine.for %j = 1 to 100 {
66      %x = "foo"() : () -> i32
67    }
68  }
69  return
70}
71
72// UJAM-FOUR-LABEL: func @loop_nest_unknown_count_2
73// UJAM-FOUR-SAME: %[[N:arg[0-9]+]]: index
74func.func @loop_nest_unknown_count_2(%N : index) {
75  // UJAM-FOUR-NEXT: affine.for [[IV0:%arg[0-9]+]] = %[[N]] to  [[$UBMAP]]()[%[[N]]] step 4 {
76  // UJAM-FOUR-NEXT:   affine.for [[IV1:%arg[0-9]+]] = 1 to 100 {
77  // UJAM-FOUR-NEXT:     "foo"([[IV0]])
78  // UJAM-FOUR-NEXT:     [[IV_PLUS_1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
79  // UJAM-FOUR-NEXT:     "foo"([[IV_PLUS_1]])
80  // UJAM-FOUR-NEXT:     [[IV_PLUS_2:%[0-9]+]] = affine.apply [[$MAP_PLUS_2]]([[IV0]])
81  // UJAM-FOUR-NEXT:     "foo"([[IV_PLUS_2]])
82  // UJAM-FOUR-NEXT:     [[IV_PLUS_3:%[0-9]+]] = affine.apply [[$MAP_PLUS_3]]([[IV0]])
83  // UJAM-FOUR-NEXT:     "foo"([[IV_PLUS_3]])
84  // UJAM-FOUR-NEXT:   }
85  // UJAM-FOUR-NEXT: }
86  // The cleanup loop is a single iteration one and is promoted.
87  // UJAM-FOUR-NEXT: [[RES:%[0-9]+]] = affine.apply [[$UBMAP]]()[%[[N]]]
88  // UJAM-FOUR-NEXT: affine.for [[IV0]] = 1 to 100 {
89  // UJAM-FOUR-NEXT:   "foo"([[RES]])
90  // UJAM-FOUR-NEXT: }
91  affine.for %i = %N to affine_map<()[s0] -> (s0+9)> ()[%N] {
92    affine.for %j = 1 to 100 {
93      "foo"(%i) : (index) -> ()
94    }
95  }
96  return
97}
98
99// CHECK-LABEL: func @loop_nest_symbolic_and_min_upper_bound
100// CHECK-SAME: [[M:arg[0-9]+]]: index
101// CHECK-SAME: [[N:arg[0-9]+]]: index
102// CHECK-SAME: [[K:arg[0-9]+]]: index
103func.func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
104  affine.for %i = 0 to min affine_map<()[s0, s1] -> (s0, s1, 1024)>()[%M, %N] {
105    affine.for %j = 0 to %K {
106      "test.foo"(%i, %j) : (index, index) -> ()
107    }
108  }
109  return
110}
111// No unroll-and-jam possible here as the lower bound for the cleanup loop won't
112// be representable.
113// CHECK-NEXT:  affine.for [[IV0:%arg[0-9]+]] = 0 to min #map{{.*}}()[%[[M]], %[[N]]] {
114// CHECK-NEXT:    affine.for [[IV1:%arg[0-9]+]] = 0 to %[[K]] {
115// CHECK-NEXT:      "test.foo"([[IV0]], [[IV1]])
116// CHECK-NEXT:    }
117// CHECK-NEXT:  }
118// CHECK-NEXT:  return
119
120// The inner loop trip count changes each iteration of outer loop.
121// Do no unroll-and-jam.
122// CHECK-LABEL: func @no_unroll_jam_dependent_ubound
123func.func @no_unroll_jam_dependent_ubound(%in0: memref<?xf32, 1>) {
124  affine.for %i = 0 to 100 {
125    affine.for %k = 0 to affine_map<(d0) -> (d0 + 1)>(%i) {
126      %y = "addi32"(%k, %k) : (index, index) -> i32
127    }
128  }
129  return
130}
131// CHECK:      affine.for [[IV0:%arg[0-9]+]] = 0 to 100 {
132// CHECK-NEXT:   affine.for [[IV1:%arg[0-9]+]] = 0 to [[$MAP_PLUS_1]]([[IV0]]) {
133// CHECK-NEXT:     "addi32"([[IV1]], [[IV1]])
134// CHECK-NEXT:   }
135// CHECK-NEXT: }
136// CHECK-NEXT: return
137
138// Inner loop with one iter_arg.
139// CHECK-LABEL: func @unroll_jam_one_iter_arg
140func.func @unroll_jam_one_iter_arg() {
141  affine.for %i = 0 to 101 {
142    %cst = arith.constant 1 : i32
143    %x = "addi32"(%i, %i) : (index, index) -> i32
144    %red = affine.for %j = 0 to 17 iter_args(%acc = %cst) -> (i32) {
145      %y = "bar"(%i, %j, %acc) : (index, index, i32) -> i32
146      affine.yield %y : i32
147    }
148    %w = "foo"(%i, %x, %red) : (index, i32, i32) -> i32
149  }
150  return
151}
152// CHECK:      affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 {
153// CHECK-NEXT:   [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
154// CHECK-NEXT:   [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]])
155// CHECK-NEXT:   [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
156// CHECK-NEXT:   [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
157// CHECK-NEXT:   [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]])
158// CHECK-NEXT:   [[RES3:%[0-9]+]]:2 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[CONST1]], [[ACC2:%arg[0-9]+]] = [[CONST2]]) -> (i32, i32) {
159// CHECK-NEXT:     [[RES4:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[ACC1]])
160// CHECK-NEXT:     [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
161// CHECK-NEXT:     [[RES5:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[ACC2]])
162// CHECK-NEXT:     affine.yield [[RES4]], [[RES5]]
163// CHECK-NEXT:   }
164// CHECK:        "foo"([[IV0]], [[RES1]], [[RES3]]#0)
165// CHECK-NEXT:   affine.apply [[$MAP_PLUS_1]]([[IV0]])
166// CHECK-NEXT:   "foo"({{.*}}, [[RES2]], [[RES3]]#1)
167// CHECK:      }
168// Cleanup loop (single iteration).
169// CHECK:      constant 1 : i32
170// CHECK-NEXT: "addi32"(%c100, %c100)
171// CHECK-NEXT: [[RES6:%[0-9]+]] = affine.for
172// CHECK-NEXT:   [[RES7:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}})
173// CHECK-NEXT:   affine.yield [[RES7]] : i32
174// CHECK-NEXT: }
175// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]])
176// CHECK-NEXT: return
177
178// Inner loop with multiple iter_args.
179// CHECK-LABEL: func @unroll_jam_iter_args
180func.func @unroll_jam_iter_args() {
181  affine.for %i = 0 to 101 {
182    %cst = arith.constant 0 : i32
183    %cst1 = arith.constant 1 : i32
184    %x = "addi32"(%i, %i) : (index, index) -> i32
185    %red:2 = affine.for %j = 0 to 17 iter_args(%acc = %cst, %acc1 = %cst1) -> (i32, i32) {
186      %y = "bar"(%i, %j, %acc) : (index, index, i32) -> i32
187      %z = "bar1"(%i, %j, %acc1) : (index, index, i32) -> i32
188      affine.yield %y, %z : i32, i32
189    }
190    %w = "foo"(%i, %x, %red#0, %red#1) : (index, i32, i32, i32) -> i32
191  }
192  return
193}
194// CHECK:      affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 {
195// CHECK-NEXT:   [[CONST0:%[a-zA-Z0-9_]*]] = arith.constant 0 : i32
196// CHECK-NEXT:   [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
197// CHECK-NEXT:   [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]])
198// CHECK-NEXT:   [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
199// CHECK-NEXT:   [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 0 : i32
200// CHECK-NEXT:   [[CONST3:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
201// CHECK-NEXT:   [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]])
202// CHECK-NEXT:   [[RES3:%[0-9]+]]:4 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC0:%arg[0-9]+]] = [[CONST0]], [[ACC1:%arg[0-9]+]] = [[CONST1]],
203// CHECK-SAME:   [[ACC2:%arg[0-9]+]] = [[CONST2]], [[ACC3:%arg[0-9]+]] = [[CONST3]]) -> (i32, i32, i32, i32) {
204// CHECK-NEXT:     [[RES4:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[ACC0]])
205// CHECK-NEXT:     [[RES5:%[0-9]+]] = "bar1"([[IV0]], [[IV1]], [[ACC1]])
206// CHECK-NEXT:     [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
207// CHECK-NEXT:     [[RES6:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[ACC2]])
208// CHECK-NEXT:     [[RES7:%[0-9]+]] = "bar1"([[INC1]], [[IV1]], [[ACC3]])
209// CHECK-NEXT:     affine.yield [[RES4]], [[RES5]], [[RES6]], [[RES7]]
210// CHECK-NEXT:   }
211// CHECK:        "foo"([[IV0]], [[RES1]], [[RES3]]#0, [[RES3]]#1)
212// CHECK-NEXT:   affine.apply [[$MAP_PLUS_1]]([[IV0]])
213// CHECK-NEXT:   "foo"({{.*}}, [[RES2]], [[RES3]]#2, [[RES3]]#3)
214// CHECK:      }
215// Cleanup loop (single iteration).
216// CHECK:      constant 0 : i32
217// CHECK-NEXT: constant 1 : i32
218// CHECK-NEXT: "addi32"(%c100, %c100)
219// CHECK-NEXT: [[RES8:%[0-9]+]]:2 = affine.for
220// CHECK-NEXT:   [[RES9:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}})
221// CHECK-NEXT:   [[RES10:%[0-9]+]] = "bar1"(%c100, {{.*}}, {{.*}})
222// CHECK-NEXT:   affine.yield [[RES9]], [[RES10]] : i32, i32
223// CHECK-NEXT: }
224// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES8]]#0, [[RES8]]#1)
225// CHECK-NEXT: return
226
227// When an iter operand is a function argument, do not replace any use of the
228// operand .
229// CHECK-LABEL: func @unroll_jam_iter_args_func_arg
230// CHECK-SAME:  [[INIT:%arg[0-9]+]]: i32
231func.func @unroll_jam_iter_args_func_arg(%in: i32) {
232  affine.for %i = 0 to 101 {
233    %x = "addi32"(%i, %i) : (index, index) -> i32
234    %red = affine.for %j = 0 to 17 iter_args(%acc = %in) -> (i32) {
235      %y = "bar"(%i, %j, %acc) : (index, index, i32) -> i32
236      affine.yield %y : i32
237    }
238    %w = "foo"(%i, %x, %red) : (index, i32, i32) -> i32
239  }
240  return
241}
242// CHECK:      affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 {
243// CHECK-NEXT:   [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]])
244// CHECK-NEXT:   [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
245// CHECK-NEXT:   [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]])
246// CHECK-NEXT:   [[RES3:%[0-9]+]]:2 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[INIT]], [[ACC2:%arg[0-9]+]] = [[INIT]]) -> (i32, i32) {
247// CHECK-NEXT:     [[RES4:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[ACC1]])
248// CHECK-NEXT:     [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
249// CHECK-NEXT:     [[RES5:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[ACC2]])
250// CHECK-NEXT:     affine.yield [[RES4]], [[RES5]]
251// CHECK-NEXT:   }
252// CHECK:        "foo"([[IV0]], [[RES1]], [[RES3]]#0)
253// CHECK-NEXT:   affine.apply [[$MAP_PLUS_1]]([[IV0]])
254// CHECK-NEXT:   "foo"({{.*}}, [[RES2]], [[RES3]]#1)
255// CHECK:      }
256// Cleanup loop (single iteration).
257// CHECK:      "addi32"(%c100, %c100)
258// CHECK-NEXT: [[RES6:%[0-9]+]] = affine.for
259// CHECK-NEXT:   [[RES7:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}})
260// CHECK-NEXT:   affine.yield [[RES7]] : i32
261// CHECK-NEXT: }
262// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]])
263// CHECK-NEXT: return
264
265// Nested inner loops, each with one iter_arg. The inner most loop uses its
266// outer loop's iter_arg as its iter operand.
267// CHECK-LABEL: func @unroll_jam_iter_args_nested
268func.func @unroll_jam_iter_args_nested() {
269  affine.for %i = 0 to 101 {
270    %cst = arith.constant 1 : i32
271    %x = "addi32"(%i, %i) : (index, index) -> i32
272    %red = affine.for %j = 0 to 17 iter_args(%acc = %cst) -> (i32) {
273      %red1 = affine.for %k = 0 to 35 iter_args(%acc1 = %acc) -> (i32) {
274        %y = "bar"(%i, %j, %k, %acc1) : (index, index, index, i32) -> i32
275        affine.yield %y : i32
276      }
277      affine.yield %red1 : i32
278    }
279    %w = "foo"(%i, %x, %red) : (index, i32, i32) -> i32
280  }
281  return
282}
283// CHECK:      affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 {
284// CHECK-NEXT:   [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
285// CHECK-NEXT:   [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]])
286// CHECK-NEXT:   [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
287// CHECK-NEXT:   [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
288// CHECK-NEXT:   [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]])
289// CHECK-NEXT:   [[RES3:%[0-9]+]]:2 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[CONST1]], [[ACC2:%arg[0-9]+]] = [[CONST2]]) -> (i32, i32) {
290// CHECK-NEXT:     [[RES4:%[0-9]+]]:2 = affine.for [[IV2:%arg[0-9]+]] = 0 to 35 iter_args([[ACC3:%arg[0-9]+]] = [[ACC1]], [[ACC4:%arg[0-9]+]] = [[ACC2]]) -> (i32, i32) {
291// CHECK-NEXT:       [[RES5:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[IV2]], [[ACC3]])
292// CHECK-NEXT:       [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
293// CHECK-NEXT:       [[RES6:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[IV2]], [[ACC4]])
294// CHECK-NEXT:       affine.yield [[RES5]], [[RES6]]
295// CHECK-NEXT:     }
296// CHECK-NEXT:     affine.yield [[RES4]]#0, [[RES4]]#1
297// CHECK-NEXT:   }
298// CHECK:        "foo"([[IV0]], [[RES1]], [[RES3]]#0)
299// CHECK-NEXT:   affine.apply [[$MAP_PLUS_1]]([[IV0]])
300// CHECK-NEXT:   "foo"({{.*}}, [[RES2]], [[RES3]]#1)
301// CHECK:      }
302// Cleanup loop (single iteration).
303// CHECK:      constant 1 : i32
304// CHECK-NEXT: "addi32"(%c100, %c100)
305// CHECK-NEXT: [[RES6:%[0-9]+]] = affine.for
306// CHECK-NEXT:   [[RES7:%[0-9]+]] = affine.for
307// CHECK-NEXT:     [[RES8:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}, {{.*}})
308// CHECK-NEXT:     affine.yield [[RES8]] : i32
309// CHECK-NEXT:   }
310// CHECK-NEXT:   affine.yield [[RES7]] : i32
311// CHECK-NEXT: }
312// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]])
313// CHECK-NEXT: return
314
315// Nested inner loops, each with one iter_arg. One loop uses its sibling loop's
316// result as its iter operand.
317// CHECK-LABEL: func @unroll_jam_iter_args_nested_affine_for_result
318func.func @unroll_jam_iter_args_nested_affine_for_result() {
319  affine.for %i = 0 to 101 {
320    %cst = arith.constant 1 : i32
321    %x = "addi32"(%i, %i) : (index, index) -> i32
322    %red = affine.for %j = 0 to 17 iter_args(%acc = %cst) -> (i32) {
323      %red1 = affine.for %k = 0 to 35 iter_args(%acc1 = %acc) -> (i32) {
324        %y = "bar"(%i, %j, %k, %acc1) : (index, index, index, i32) -> i32
325        affine.yield %acc : i32
326      }
327      %red2 = affine.for %l = 0 to 36 iter_args(%acc2 = %red1) -> (i32) {
328        %y = "bar"(%i, %j, %l, %acc2) : (index, index, index, i32) -> i32
329        affine.yield %y : i32
330      }
331      affine.yield %red2 : i32
332    }
333    %w = "foo"(%i, %x, %red) : (index, i32, i32) -> i32
334  }
335  return
336}
337// CHECK:      affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 {
338// CHECK-NEXT:   [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
339// CHECK-NEXT:   [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]])
340// CHECK-NEXT:   [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
341// CHECK-NEXT:   [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
342// CHECK-NEXT:   [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]])
343// CHECK-NEXT:   [[RES3:%[0-9]+]]:2 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[CONST1]], [[ACC2:%arg[0-9]+]] = [[CONST2]]) -> (i32, i32) {
344// CHECK-NEXT:     [[RES4:%[0-9]+]]:2 = affine.for [[IV2:%arg[0-9]+]] = 0 to 35 iter_args([[ACC3:%arg[0-9]+]] = [[ACC1]], [[ACC4:%arg[0-9]+]] = [[ACC2]]) -> (i32, i32) {
345// CHECK-NEXT:       [[RES5:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[IV2]], [[ACC3]])
346// CHECK-NEXT:       [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
347// CHECK-NEXT:       [[RES6:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[IV2]], [[ACC4]])
348// CHECK-NEXT:       affine.yield [[ACC1]], [[ACC2]]
349// CHECK-NEXT:     }
350// CHECK-NEXT:     [[RES14:%[0-9]+]]:2 = affine.for [[IV3:%arg[0-9]+]] = 0 to 36 iter_args([[ACC13:%arg[0-9]+]] = [[RES4]]#0, [[ACC14:%arg[0-9]+]] = [[RES4]]#1) -> (i32, i32) {
351// CHECK-NEXT:       [[RES15:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[IV3]], [[ACC13]])
352// CHECK-NEXT:       [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
353// CHECK-NEXT:       [[RES16:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[IV3]], [[ACC14]])
354// CHECK-NEXT:       affine.yield [[RES15]], [[RES16]]
355// CHECK-NEXT:     }
356// CHECK-NEXT:     affine.yield [[RES14]]#0, [[RES14]]#1
357// CHECK-NEXT:   }
358// CHECK:        "foo"([[IV0]], [[RES1]], [[RES3]]#0)
359// CHECK-NEXT:   affine.apply [[$MAP_PLUS_1]]([[IV0]])
360// CHECK-NEXT:   "foo"({{.*}}, [[RES2]], [[RES3]]#1)
361// CHECK:      }
362// Cleanup loop (single iteration).
363// CHECK:      constant 1 : i32
364// CHECK-NEXT: "addi32"(%c100, %c100)
365// CHECK-NEXT: [[RES6:%[0-9]+]] = affine.for
366// CHECK-NEXT:   [[RES7:%[0-9]+]] = affine.for
367// CHECK-NEXT:     [[RES8:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}, {{.*}})
368// CHECK-NEXT:     affine.yield
369// CHECK-NEXT:   }
370// CHECK-NEXT:   [[RES17:%[0-9]+]] = affine.for
371// CHECK-NEXT:     [[RES18:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}, {{.*}})
372// CHECK-NEXT:     affine.yield [[RES18]] : i32
373// CHECK-NEXT:   }
374// CHECK-NEXT:   affine.yield [[RES17]] : i32
375// CHECK-NEXT: }
376// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]])
377// CHECK-NEXT: return
378
379// Nested inner loops, each with one or more iter_args. Yeild the same value
380// multiple times.
381// CHECK-LABEL: func @unroll_jam_iter_args_nested_yield
382func.func @unroll_jam_iter_args_nested_yield() {
383  affine.for %i = 0 to 101 {
384    %cst = arith.constant 1 : i32
385    %x = "addi32"(%i, %i) : (index, index) -> i32
386    %red:3 = affine.for %j = 0 to 17 iter_args(%acc = %cst, %acc1 = %cst, %acc2 = %cst) -> (i32, i32, i32) {
387      %red1 = affine.for %k = 0 to 35 iter_args(%acc3 = %acc) -> (i32) {
388        %y = "bar"(%i, %j, %k, %acc3) : (index, index, index, i32) -> i32
389        affine.yield %y : i32
390      }
391      %red2:2 = affine.for %l = 0 to 36 iter_args(%acc4 = %acc1, %acc5 = %acc2) -> (i32, i32) {
392        %y = "bar1"(%i, %j, %l, %acc4, %acc5) : (index, index, index, i32, i32) -> i32
393        affine.yield %y, %y : i32, i32
394      }
395      affine.yield %red1, %red1, %red2#1 : i32, i32, i32
396    }
397    %w = "foo"(%i, %x, %red#0, %red#2) : (index, i32, i32, i32) -> i32
398  }
399  return
400}
401// CHECK:      affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 {
402// CHECK-NEXT:   [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
403// CHECK-NEXT:   [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]])
404// CHECK-NEXT:   [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
405// CHECK-NEXT:   [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32
406// CHECK-NEXT:   [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]])
407// CHECK-NEXT:   [[RES3:%[0-9]+]]:6 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[CONST1]], [[ACC2:%arg[0-9]+]] = [[CONST1]],
408// CHECK-SAME:   [[ACC3:%arg[0-9]+]] = [[CONST1]], [[ACC4:%arg[0-9]+]] = [[CONST2]], [[ACC5:%arg[0-9]+]] = [[CONST2]], [[ACC6:%arg[0-9]+]] = [[CONST2]]) -> (i32, i32, i32, i32, i32, i32) {
409// CHECK-NEXT:     [[RES4:%[0-9]+]]:2 = affine.for [[IV2:%arg[0-9]+]] = 0 to 35 iter_args([[ACC7:%arg[0-9]+]] = [[ACC1]], [[ACC8:%arg[0-9]+]] = [[ACC4]]) -> (i32, i32) {
410// CHECK-NEXT:       [[RES5:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[IV2]], [[ACC7]])
411// CHECK-NEXT:       [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
412// CHECK-NEXT:       [[RES6:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[IV2]], [[ACC8]])
413// CHECK-NEXT:       affine.yield [[RES5]], [[RES6]]
414// CHECK-NEXT:     }
415// CHECK-NEXT:     [[RES14:%[0-9]+]]:4 = affine.for [[IV3:%arg[0-9]+]] = 0 to 36 iter_args([[ACC13:%arg[0-9]+]] = [[ACC2]], [[ACC14:%arg[0-9]+]] = [[ACC3]],
416// CHECK-SAME:     [[ACC15:%arg[0-9]+]] = [[ACC5]], [[ACC16:%arg[0-9]+]] = [[ACC6]]) -> (i32, i32, i32, i32) {
417// CHECK-NEXT:       [[RES15:%[0-9]+]] = "bar1"([[IV0]], [[IV1]], [[IV3]], [[ACC13]], [[ACC14]])
418// CHECK-NEXT:       [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]])
419// CHECK-NEXT:       [[RES16:%[0-9]+]] = "bar1"([[INC1]], [[IV1]], [[IV3]], [[ACC15]], [[ACC16]])
420// CHECK-NEXT:       affine.yield [[RES15]], [[RES15]], [[RES16]], [[RES16]]
421// CHECK-NEXT:     }
422// CHECK-NEXT:     affine.yield [[RES4]]#0, [[RES4]]#0, [[RES14]]#1, [[RES4]]#1, [[RES4]]#1, [[RES14]]#3
423// CHECK-NEXT:   }
424// CHECK:        "foo"([[IV0]], [[RES1]], [[RES3]]#0, [[RES3]]#2)
425// CHECK-NEXT:   affine.apply [[$MAP_PLUS_1]]([[IV0]])
426// CHECK-NEXT:   "foo"({{.*}}, [[RES2]], [[RES3]]#3, [[RES3]]#5)
427// CHECK:      }
428// Cleanup loop (single iteration).
429// CHECK:      constant 1 : i32
430// CHECK-NEXT: "addi32"(%c100, %c100)
431// CHECK-NEXT: [[RES6:%[0-9]+]]:3 = affine.for
432// CHECK-NEXT:   [[RES7:%[0-9]+]] = affine.for
433// CHECK-NEXT:     [[RES8:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}, {{.*}})
434// CHECK-NEXT:     affine.yield [[RES8]] : i32
435// CHECK-NEXT:   }
436// CHECK-NEXT:   [[RES17:%[0-9]+]]:2 = affine.for
437// CHECK-NEXT:     [[RES18:%[0-9]+]] = "bar1"(%c100, {{.*}}, {{.*}}, {{.*}}, {{.*}})
438// CHECK-NEXT:     affine.yield [[RES18]], [[RES18]] : i32, i32
439// CHECK-NEXT:   }
440// CHECK-NEXT:   affine.yield [[RES7]], [[RES7]], [[RES17]]#1 : i32, i32, i32
441// CHECK-NEXT: }
442// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]]#0, [[RES6]]#2)
443// CHECK-NEXT: return
444
445// CHECK-LABEL: func @unroll_jam_nested_iter_args_mulf
446// CHECK-SAME:  [[INIT0:%arg[0-9]+]]: f32, [[INIT1:%arg[0-9]+]]: f32
447func.func @unroll_jam_nested_iter_args_mulf(%arg0: memref<21x30xf32, 1>, %init : f32, %init1 : f32) {
448  %0 = affine.for %arg3 = 0 to 21 iter_args(%arg4 = %init) -> (f32) {
449    %1 = affine.for %arg5 = 0 to 30 iter_args(%arg6 = %init1) -> (f32) {
450      %3 = affine.load %arg0[%arg3, %arg5] : memref<21x30xf32, 1>
451      %4 = arith.addf %arg6, %3 : f32
452      affine.yield %4 : f32
453    }
454    %2 = arith.mulf %arg4, %1 : f32
455    affine.yield %2 : f32
456  }
457  return
458}
459
460// CHECK:      %[[CONST0:[a-zA-Z0-9_]*]] = arith.constant 20 : index
461// CHECK-NEXT: [[RES:%[0-9]+]]:2 = affine.for %[[IV0:arg[0-9]+]] = 0 to 20 step 2 iter_args([[ACC0:%arg[0-9]+]] = [[INIT0]], [[ACC1:%arg[0-9]+]] = [[INIT0]]) -> (f32, f32) {
462// CHECK-NEXT:   [[RES1:%[0-9]+]]:2 = affine.for %[[IV1:arg[0-9]+]] = 0 to 30 iter_args([[ACC2:%arg[0-9]+]] = [[INIT1]], [[ACC3:%arg[0-9]+]] = [[INIT1]]) -> (f32, f32) {
463// CHECK-NEXT:     [[LOAD1:%[0-9]+]] = affine.load {{.*}}[%[[IV0]], %[[IV1]]]
464// CHECK-NEXT:     [[ADD1:%[0-9]+]] = arith.addf [[ACC2]], [[LOAD1]] : f32
465// CHECK-NEXT:     %[[INC1:[0-9]+]] = affine.apply [[$MAP_PLUS_1]](%[[IV0]])
466// CHECK-NEXT:     [[LOAD2:%[0-9]+]] = affine.load {{.*}}[%[[INC1]], %[[IV1]]]
467// CHECK-NEXT:     [[ADD2:%[0-9]+]] = arith.addf [[ACC3]], [[LOAD2]] : f32
468// CHECK-NEXT:     affine.yield [[ADD1]], [[ADD2]]
469// CHECK-NEXT:   }
470// CHECK-NEXT:   [[MUL1:%[0-9]+]] = arith.mulf [[ACC0]], [[RES1]]#0 : f32
471// CHECK-NEXT:   affine.apply
472// CHECK-NEXT:   [[MUL2:%[0-9]+]] = arith.mulf [[ACC1]], [[RES1]]#1 : f32
473// CHECK-NEXT:   affine.yield [[MUL1]], [[MUL2]]
474// CHECK-NEXT: }
475// Reduction op.
476// CHECK-NEXT: [[MUL3:%[0-9]+]] = arith.mulf [[RES]]#0, [[RES]]#1 : f32
477// Cleanup loop (single iteration).
478// CHECK-NEXT: [[RES2:%[0-9]+]] = affine.for %[[IV2:arg[0-9]+]] = 0 to 30 iter_args([[ACC4:%arg[0-9]+]] = [[INIT1]]) -> (f32) {
479// CHECK-NEXT:   [[LOAD3:%[0-9]+]] = affine.load {{.*}}[%[[CONST0]], %[[IV2]]]
480// CHECK-NEXT:   [[ADD3:%[0-9]+]] = arith.addf [[ACC4]], [[LOAD3]] : f32
481// CHECK-NEXT:   affine.yield [[ADD3]] : f32
482// CHECK-NEXT: }
483// CHECK-NEXT: [[MUL4:%[0-9]+]] = arith.mulf [[MUL3]], [[RES2]] : f32
484// CHECK-NEXT: return
485
486// CHECK-LABEL: func @unroll_jam_iter_args_addi
487// CHECK-SAME:  [[INIT0:%arg[0-9]+]]: i32
488func.func @unroll_jam_iter_args_addi(%arg0: memref<21xi32, 1>, %init : i32) {
489  %0 = affine.for %arg3 = 0 to 21 iter_args(%arg4 = %init) -> (i32) {
490    %1 = affine.load %arg0[%arg3] : memref<21xi32, 1>
491    %2 = arith.addi %arg4, %1 : i32
492    affine.yield %2 : i32
493  }
494  return
495}
496
497// CHECK:      %[[CONST0:[a-zA-Z0-9_]*]] = arith.constant 20 : index
498// CHECK-NEXT: [[RES:%[0-9]+]]:2 = affine.for %[[IV0:arg[0-9]+]] = 0 to 20 step 2 iter_args([[ACC0:%arg[0-9]+]] = [[INIT0]], [[ACC1:%arg[0-9]+]] = [[INIT0]]) -> (i32, i32) {
499// CHECK-NEXT:   [[LOAD1:%[0-9]+]] = affine.load {{.*}}[%[[IV0]]]
500// CHECK-NEXT:   [[ADD1:%[0-9]+]] = arith.addi [[ACC0]], [[LOAD1]] : i32
501// CHECK-NEXT:   %[[INC1:[0-9]+]] = affine.apply [[$MAP_PLUS_1]](%[[IV0]])
502// CHECK-NEXT:   [[LOAD2:%[0-9]+]] = affine.load {{.*}}[%[[INC1]]]
503// CHECK-NEXT:   [[ADD2:%[0-9]+]] = arith.addi [[ACC1]], [[LOAD2]] : i32
504// CHECK-NEXT:   affine.yield [[ADD1]], [[ADD2]]
505// CHECK-NEXT: }
506// Reduction op.
507// CHECK-NEXT: [[ADD3:%[0-9]+]] = arith.addi [[RES]]#0, [[RES]]#1 : i32
508// Cleanup loop (single iteration).
509// CHECK-NEXT: [[LOAD3:%[0-9]+]] = affine.load {{.*}}[%[[CONST0]]]
510// CHECK-NEXT: [[ADD4:%[0-9]+]] = arith.addi [[ADD3]], [[LOAD3]] : i32
511// CHECK-NEXT: return
512