xref: /llvm-project/mlir/test/Conversion/AffineToStandard/lower-affine.mlir (revision 8cd94e0b6d18b6b454431ba9481c2489b480baf4)
1// RUN: mlir-opt -lower-affine %s | FileCheck %s
2
3// CHECK-LABEL: func @empty() {
4func.func @empty() {
5  return     // CHECK:  return
6}            // CHECK: }
7
8func.func private @body(index) -> ()
9
10// Simple loops are properly converted.
11// CHECK-LABEL: func @simple_loop
12// CHECK-NEXT:   %[[c1:.*]] = arith.constant 1 : index
13// CHECK-NEXT:   %[[c42:.*]] = arith.constant 42 : index
14// CHECK-NEXT:   %[[c1_0:.*]] = arith.constant 1 : index
15// CHECK-NEXT:   for %{{.*}} = %[[c1]] to %[[c42]] step %[[c1_0]] {
16// CHECK-NEXT:     call @body(%{{.*}}) : (index) -> ()
17// CHECK-NEXT:   }
18// CHECK-NEXT:   return
19// CHECK-NEXT: }
20func.func @simple_loop() {
21  affine.for %i = 1 to 42 {
22    func.call @body(%i) : (index) -> ()
23  }
24  return
25}
26
27/////////////////////////////////////////////////////////////////////
28
29func.func @for_with_yield(%buffer: memref<1024xf32>) -> (f32) {
30  %sum_0 = arith.constant 0.0 : f32
31  %sum = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_0) -> (f32) {
32    %t = affine.load %buffer[%i] : memref<1024xf32>
33    %sum_next = arith.addf %sum_iter, %t : f32
34    affine.yield %sum_next : f32
35  }
36  return %sum : f32
37}
38
39// CHECK-LABEL: func @for_with_yield
40// CHECK:         %[[INIT_SUM:.*]] = arith.constant 0.000000e+00 : f32
41// CHECK-NEXT:    %[[LOWER:.*]] = arith.constant 0 : index
42// CHECK-NEXT:    %[[UPPER:.*]] = arith.constant 10 : index
43// CHECK-NEXT:    %[[STEP:.*]] = arith.constant 2 : index
44// CHECK-NEXT:    %[[SUM:.*]] = scf.for %[[IV:.*]] = %[[LOWER]] to %[[UPPER]] step %[[STEP]] iter_args(%[[SUM_ITER:.*]] = %[[INIT_SUM]]) -> (f32) {
45// CHECK-NEXT:      memref.load
46// CHECK-NEXT:      %[[SUM_NEXT:.*]] = arith.addf
47// CHECK-NEXT:      scf.yield %[[SUM_NEXT]] : f32
48// CHECK-NEXT:    }
49// CHECK-NEXT:    return %[[SUM]] : f32
50
51/////////////////////////////////////////////////////////////////////
52
53func.func private @pre(index) -> ()
54func.func private @body2(index, index) -> ()
55func.func private @post(index) -> ()
56
57// CHECK-LABEL: func @imperfectly_nested_loops
58// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
59// CHECK-NEXT:   %[[c42:.*]] = arith.constant 42 : index
60// CHECK-NEXT:   %[[c1:.*]] = arith.constant 1 : index
61// CHECK-NEXT:   for %{{.*}} = %[[c0]] to %[[c42]] step %[[c1]] {
62// CHECK-NEXT:     call @pre(%{{.*}}) : (index) -> ()
63// CHECK-NEXT:     %[[c7:.*]] = arith.constant 7 : index
64// CHECK-NEXT:     %[[c56:.*]] = arith.constant 56 : index
65// CHECK-NEXT:     %[[c2:.*]] = arith.constant 2 : index
66// CHECK-NEXT:     for %{{.*}} = %[[c7]] to %[[c56]] step %[[c2]] {
67// CHECK-NEXT:       call @body2(%{{.*}}, %{{.*}}) : (index, index) -> ()
68// CHECK-NEXT:     }
69// CHECK-NEXT:     call @post(%{{.*}}) : (index) -> ()
70// CHECK-NEXT:   }
71// CHECK-NEXT:   return
72// CHECK-NEXT: }
73func.func @imperfectly_nested_loops() {
74  affine.for %i = 0 to 42 {
75    func.call @pre(%i) : (index) -> ()
76    affine.for %j = 7 to 56 step 2 {
77      func.call @body2(%i, %j) : (index, index) -> ()
78    }
79    func.call @post(%i) : (index) -> ()
80  }
81  return
82}
83
84/////////////////////////////////////////////////////////////////////
85
86func.func private @mid(index) -> ()
87func.func private @body3(index, index) -> ()
88
89// CHECK-LABEL: func @more_imperfectly_nested_loops
90// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
91// CHECK-NEXT:   %[[c42:.*]] = arith.constant 42 : index
92// CHECK-NEXT:   %[[c1:.*]] = arith.constant 1 : index
93// CHECK-NEXT:   for %{{.*}} = %[[c0]] to %[[c42]] step %[[c1]] {
94// CHECK-NEXT:     call @pre(%{{.*}}) : (index) -> ()
95// CHECK-NEXT:     %[[c7:.*]] = arith.constant 7 : index
96// CHECK-NEXT:     %[[c56:.*]] = arith.constant 56 : index
97// CHECK-NEXT:     %[[c2:.*]] = arith.constant 2 : index
98// CHECK-NEXT:     for %{{.*}} = %[[c7]] to %[[c56]] step %[[c2]] {
99// CHECK-NEXT:       call @body2(%{{.*}}, %{{.*}}) : (index, index) -> ()
100// CHECK-NEXT:     }
101// CHECK-NEXT:     call @mid(%{{.*}}) : (index) -> ()
102// CHECK-NEXT:     %[[c18:.*]] = arith.constant 18 : index
103// CHECK-NEXT:     %[[c37:.*]] = arith.constant 37 : index
104// CHECK-NEXT:     %[[c3:.*]] = arith.constant 3 : index
105// CHECK-NEXT:     for %{{.*}} = %[[c18]] to %[[c37]] step %[[c3]] {
106// CHECK-NEXT:       call @body3(%{{.*}}, %{{.*}}) : (index, index) -> ()
107// CHECK-NEXT:     }
108// CHECK-NEXT:     call @post(%{{.*}}) : (index) -> ()
109// CHECK-NEXT:   }
110// CHECK-NEXT:   return
111// CHECK-NEXT: }
112func.func @more_imperfectly_nested_loops() {
113  affine.for %i = 0 to 42 {
114    func.call @pre(%i) : (index) -> ()
115    affine.for %j = 7 to 56 step 2 {
116      func.call @body2(%i, %j) : (index, index) -> ()
117    }
118    func.call @mid(%i) : (index) -> ()
119    affine.for %k = 18 to 37 step 3 {
120      func.call @body3(%i, %k) : (index, index) -> ()
121    }
122    func.call @post(%i) : (index) -> ()
123  }
124  return
125}
126
127// CHECK-LABEL: func @affine_apply_loops_shorthand
128// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
129// CHECK-NEXT:   %[[c1:.*]] = arith.constant 1 : index
130// CHECK-NEXT:   for %{{.*}} = %[[c0]] to %{{.*}} step %[[c1]] {
131// CHECK-NEXT:     %[[c42:.*]] = arith.constant 42 : index
132// CHECK-NEXT:     %[[c1_0:.*]] = arith.constant 1 : index
133// CHECK-NEXT:     for %{{.*}} = %{{.*}} to %[[c42]] step %[[c1_0]] {
134// CHECK-NEXT:       call @body2(%{{.*}}, %{{.*}}) : (index, index) -> ()
135// CHECK-NEXT:     }
136// CHECK-NEXT:   }
137// CHECK-NEXT:   return
138// CHECK-NEXT: }
139func.func @affine_apply_loops_shorthand(%N : index) {
140  affine.for %i = 0 to %N {
141    affine.for %j = affine_map<(d0)[]->(d0)>(%i)[] to 42 {
142      func.call @body2(%i, %j) : (index, index) -> ()
143    }
144  }
145  return
146}
147
148/////////////////////////////////////////////////////////////////////
149
150func.func private @get_idx() -> (index)
151
152#set1 = affine_set<(d0) : (20 - d0 >= 0)>
153#set2 = affine_set<(d0) : (d0 - 10 >= 0)>
154
155// CHECK-LABEL: func @if_only
156// CHECK-NEXT:   %[[v0:.*]] = call @get_idx() : () -> index
157// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
158// CHECK-NEXT:   %[[cm1:.*]] = arith.constant -1 : index
159// CHECK-NEXT:   %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index
160// CHECK-NEXT:   %[[c20:.*]] = arith.constant 20 : index
161// CHECK-NEXT:   %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index
162// CHECK-NEXT:   %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index
163// CHECK-NEXT:   if %[[v3]] {
164// CHECK-NEXT:     call @body(%[[v0:.*]]) : (index) -> ()
165// CHECK-NEXT:   }
166// CHECK-NEXT:   return
167// CHECK-NEXT: }
168func.func @if_only() {
169  %i = call @get_idx() : () -> (index)
170  affine.if #set1(%i) {
171    func.call @body(%i) : (index) -> ()
172  }
173  return
174}
175
176// CHECK-LABEL: func @if_else
177// CHECK-NEXT:   %[[v0:.*]] = call @get_idx() : () -> index
178// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
179// CHECK-NEXT:   %[[cm1:.*]] = arith.constant -1 : index
180// CHECK-NEXT:   %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index
181// CHECK-NEXT:   %[[c20:.*]] = arith.constant 20 : index
182// CHECK-NEXT:   %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index
183// CHECK-NEXT:   %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index
184// CHECK-NEXT:   if %[[v3]] {
185// CHECK-NEXT:     call @body(%[[v0:.*]]) : (index) -> ()
186// CHECK-NEXT:   } else {
187// CHECK-NEXT:     call @mid(%[[v0:.*]]) : (index) -> ()
188// CHECK-NEXT:   }
189// CHECK-NEXT:   return
190// CHECK-NEXT: }
191func.func @if_else() {
192  %i = call @get_idx() : () -> (index)
193  affine.if #set1(%i) {
194    func.call @body(%i) : (index) -> ()
195  } else {
196    func.call @mid(%i) : (index) -> ()
197  }
198  return
199}
200
201// CHECK-LABEL: func @nested_ifs
202// CHECK-NEXT:   %[[v0:.*]] = call @get_idx() : () -> index
203// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
204// CHECK-NEXT:   %[[cm1:.*]] = arith.constant -1 : index
205// CHECK-NEXT:   %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index
206// CHECK-NEXT:   %[[c20:.*]] = arith.constant 20 : index
207// CHECK-NEXT:   %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index
208// CHECK-NEXT:   %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index
209// CHECK-NEXT:   if %[[v3]] {
210// CHECK-NEXT:     %[[c0_0:.*]] = arith.constant 0 : index
211// CHECK-NEXT:     %[[cm10:.*]] = arith.constant -10 : index
212// CHECK-NEXT:     %[[v4:.*]] = arith.addi %[[v0]], %[[cm10]] : index
213// CHECK-NEXT:     %[[v5:.*]] = arith.cmpi sge, %[[v4]], %[[c0_0]] : index
214// CHECK-NEXT:     if %[[v5]] {
215// CHECK-NEXT:       call @body(%[[v0:.*]]) : (index) -> ()
216// CHECK-NEXT:     }
217// CHECK-NEXT:   } else {
218// CHECK-NEXT:     %[[c0_0:.*]] = arith.constant 0 : index
219// CHECK-NEXT:     %[[cm10:.*]] = arith.constant -10 : index
220// CHECK-NEXT:     %{{.*}} = arith.addi %[[v0]], %[[cm10]] : index
221// CHECK-NEXT:     %{{.*}} = arith.cmpi sge, %{{.*}}, %[[c0_0]] : index
222// CHECK-NEXT:     if %{{.*}} {
223// CHECK-NEXT:       call @mid(%[[v0:.*]]) : (index) -> ()
224// CHECK-NEXT:     }
225// CHECK-NEXT:   }
226// CHECK-NEXT:   return
227// CHECK-NEXT: }
228func.func @nested_ifs() {
229  %i = call @get_idx() : () -> (index)
230  affine.if #set1(%i) {
231    affine.if #set2(%i) {
232      func.call @body(%i) : (index) -> ()
233    }
234  } else {
235    affine.if #set2(%i) {
236      func.call @mid(%i) : (index) -> ()
237    }
238  }
239  return
240}
241
242// CHECK-LABEL: func @if_with_yield
243// CHECK-NEXT:   %[[c0_i64:.*]] = arith.constant 0 : i64
244// CHECK-NEXT:   %[[c1_i64:.*]] = arith.constant 1 : i64
245// CHECK-NEXT:   %[[v0:.*]] = call @get_idx() : () -> index
246// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
247// CHECK-NEXT:   %[[cm10:.*]] = arith.constant -10 : index
248// CHECK-NEXT:   %[[v1:.*]] = arith.addi %[[v0]], %[[cm10]] : index
249// CHECK-NEXT:   %[[v2:.*]] = arith.cmpi sge, %[[v1]], %[[c0]] : index
250// CHECK-NEXT:   %[[v3:.*]] = scf.if %[[v2]] -> (i64) {
251// CHECK-NEXT:     scf.yield %[[c0_i64]] : i64
252// CHECK-NEXT:   } else {
253// CHECK-NEXT:     scf.yield %[[c1_i64]] : i64
254// CHECK-NEXT:   }
255// CHECK-NEXT:   return %[[v3]] : i64
256// CHECK-NEXT: }
257func.func @if_with_yield() -> (i64) {
258  %cst0 = arith.constant 0 : i64
259  %cst1 = arith.constant 1 : i64
260  %i = call @get_idx() : () -> (index)
261  %1 = affine.if #set2(%i) -> (i64) {
262      affine.yield %cst0 : i64
263  } else {
264      affine.yield %cst1 : i64
265  }
266  return %1 : i64
267}
268
269#setN = affine_set<(d0)[N,M,K,L] : (N - d0 + 1 >= 0, N - 1 >= 0, M - 1 >= 0, K - 1 >= 0, L - 42 == 0)>
270
271// CHECK-LABEL: func @multi_cond
272// CHECK-NEXT:   %[[v0:.*]] = call @get_idx() : () -> index
273// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
274// CHECK-NEXT:   %[[cm1:.*]] = arith.constant -1 : index
275// CHECK-NEXT:   %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index
276// CHECK-NEXT:   %[[v2:.*]] = arith.addi %[[v1]], %{{.*}} : index
277// CHECK-NEXT:   %[[c1:.*]] = arith.constant 1 : index
278// CHECK-NEXT:   %[[v3:.*]] = arith.addi %[[v2]], %[[c1]] : index
279// CHECK-NEXT:   %[[v4:.*]] = arith.cmpi sge, %[[v3]], %[[c0]] : index
280// CHECK-NEXT:   %[[cm1_0:.*]] = arith.constant -1 : index
281// CHECK-NEXT:   %[[v5:.*]] = arith.addi %{{.*}}, %[[cm1_0]] : index
282// CHECK-NEXT:   %[[v6:.*]] = arith.cmpi sge, %[[v5]], %[[c0]] : index
283// CHECK-NEXT:   %[[v7:.*]] = arith.andi %[[v4]], %[[v6]] : i1
284// CHECK-NEXT:   %[[cm1_1:.*]] = arith.constant -1 : index
285// CHECK-NEXT:   %[[v8:.*]] = arith.addi %{{.*}}, %[[cm1_1]] : index
286// CHECK-NEXT:   %[[v9:.*]] = arith.cmpi sge, %[[v8]], %[[c0]] : index
287// CHECK-NEXT:   %[[v10:.*]] = arith.andi %[[v7]], %[[v9]] : i1
288// CHECK-NEXT:   %[[cm1_2:.*]] = arith.constant -1 : index
289// CHECK-NEXT:   %[[v11:.*]] = arith.addi %{{.*}}, %[[cm1_2]] : index
290// CHECK-NEXT:   %[[v12:.*]] = arith.cmpi sge, %[[v11]], %[[c0]] : index
291// CHECK-NEXT:   %[[v13:.*]] = arith.andi %[[v10]], %[[v12]] : i1
292// CHECK-NEXT:   %[[cm42:.*]] = arith.constant -42 : index
293// CHECK-NEXT:   %[[v14:.*]] = arith.addi %{{.*}}, %[[cm42]] : index
294// CHECK-NEXT:   %[[v15:.*]] = arith.cmpi eq, %[[v14]], %[[c0]] : index
295// CHECK-NEXT:   %[[v16:.*]] = arith.andi %[[v13]], %[[v15]] : i1
296// CHECK-NEXT:   if %[[v16]] {
297// CHECK-NEXT:     call @body(%[[v0:.*]]) : (index) -> ()
298// CHECK-NEXT:   } else {
299// CHECK-NEXT:     call @mid(%[[v0:.*]]) : (index) -> ()
300// CHECK-NEXT:   }
301// CHECK-NEXT:   return
302// CHECK-NEXT: }
303func.func @multi_cond(%N : index, %M : index, %K : index, %L : index) {
304  %i = call @get_idx() : () -> (index)
305  affine.if #setN(%i)[%N,%M,%K,%L] {
306    func.call @body(%i) : (index) -> ()
307  } else {
308    func.call @mid(%i) : (index) -> ()
309  }
310  return
311}
312
313// CHECK-LABEL: func @if_for
314func.func @if_for() {
315// CHECK-NEXT:   %[[v0:.*]] = call @get_idx() : () -> index
316  %i = call @get_idx() : () -> (index)
317// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
318// CHECK-NEXT:   %[[cm1:.*]] = arith.constant -1 : index
319// CHECK-NEXT:   %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index
320// CHECK-NEXT:   %[[c20:.*]] = arith.constant 20 : index
321// CHECK-NEXT:   %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index
322// CHECK-NEXT:   %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index
323// CHECK-NEXT:   if %[[v3]] {
324// CHECK-NEXT:     %[[c0:.*]]{{.*}} = arith.constant 0 : index
325// CHECK-NEXT:     %[[c42:.*]]{{.*}} = arith.constant 42 : index
326// CHECK-NEXT:     %[[c1:.*]]{{.*}} = arith.constant 1 : index
327// CHECK-NEXT:     for %{{.*}} = %[[c0:.*]]{{.*}} to %[[c42:.*]]{{.*}} step %[[c1:.*]]{{.*}} {
328// CHECK-NEXT:       %[[c0_:.*]]{{.*}} = arith.constant 0 : index
329// CHECK-NEXT:       %[[cm10:.*]] = arith.constant -10 : index
330// CHECK-NEXT:       %[[v4:.*]] = arith.addi %{{.*}}, %[[cm10]] : index
331// CHECK-NEXT:       %[[v5:.*]] = arith.cmpi sge, %[[v4]], %[[c0_:.*]]{{.*}} : index
332// CHECK-NEXT:       if %[[v5]] {
333// CHECK-NEXT:         call @body2(%[[v0]], %{{.*}}) : (index, index) -> ()
334  affine.if #set1(%i) {
335    affine.for %j = 0 to 42 {
336      affine.if #set2(%j) {
337        func.call @body2(%i, %j) : (index, index) -> ()
338      }
339    }
340  }
341//      CHECK:   %[[c0:.*]]{{.*}} = arith.constant 0 : index
342// CHECK-NEXT:   %[[c42:.*]]{{.*}} = arith.constant 42 : index
343// CHECK-NEXT:   %[[c1:.*]]{{.*}} = arith.constant 1 : index
344// CHECK-NEXT:   for %{{.*}} = %[[c0:.*]]{{.*}} to %[[c42:.*]]{{.*}} step %[[c1:.*]]{{.*}} {
345// CHECK-NEXT:     %[[c0:.*]]{{.*}} = arith.constant 0 : index
346// CHECK-NEXT:     %[[cm10:.*]]{{.*}} = arith.constant -10 : index
347// CHECK-NEXT:     %{{.*}} = arith.addi %{{.*}}, %[[cm10:.*]]{{.*}} : index
348// CHECK-NEXT:     %{{.*}} = arith.cmpi sge, %{{.*}}, %[[c0:.*]]{{.*}} : index
349// CHECK-NEXT:     if %{{.*}} {
350// CHECK-NEXT:       %[[c0_:.*]]{{.*}} = arith.constant 0 : index
351// CHECK-NEXT:       %[[c42_:.*]]{{.*}} = arith.constant 42 : index
352// CHECK-NEXT:       %[[c1_:.*]]{{.*}} = arith.constant 1 : index
353// CHECK-NEXT:       for %{{.*}} = %[[c0_:.*]]{{.*}} to %[[c42_:.*]]{{.*}} step %[[c1_:.*]]{{.*}} {
354  affine.for %k = 0 to 42 {
355    affine.if #set2(%k) {
356      affine.for %l = 0 to 42 {
357        func.call @body3(%k, %l) : (index, index) -> ()
358      }
359    }
360  }
361//      CHECK:   return
362  return
363}
364
365#lbMultiMap = affine_map<(d0)[s0] -> (d0, s0 - d0)>
366#ubMultiMap = affine_map<(d0)[s0] -> (s0, d0 + 10)>
367
368// CHECK-LABEL: func @loop_min_max
369// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
370// CHECK-NEXT:   %[[c42:.*]] = arith.constant 42 : index
371// CHECK-NEXT:   %[[c1:.*]] = arith.constant 1 : index
372// CHECK-NEXT:   for %{{.*}} = %[[c0]] to %[[c42]] step %[[c1]] {
373// CHECK-NEXT:     %[[cm1:.*]] = arith.constant -1 : index
374// CHECK-NEXT:     %[[mul0:.*]] = arith.muli %{{.*}}, %[[cm1]] overflow<nsw> : index
375// CHECK-NEXT:     %[[add0:.*]] = arith.addi %[[mul0]], %{{.*}} : index
376// CHECK-NEXT:     %[[max:.*]] = arith.maxsi %{{.*}}, %[[add0]] : index
377// CHECK-NEXT:     %[[c10:.*]] = arith.constant 10 : index
378// CHECK-NEXT:     %[[add1:.*]] = arith.addi %{{.*}}, %[[c10]] : index
379// CHECK-NEXT:     %[[min:.*]] = arith.minsi %{{.*}}, %[[add1]] : index
380// CHECK-NEXT:     %[[c1_0:.*]] = arith.constant 1 : index
381// CHECK-NEXT:     for %{{.*}} = %[[max]] to %[[min]] step %[[c1_0]] {
382// CHECK-NEXT:       call @body2(%{{.*}}, %{{.*}}) : (index, index) -> ()
383// CHECK-NEXT:     }
384// CHECK-NEXT:   }
385// CHECK-NEXT:   return
386// CHECK-NEXT: }
387func.func @loop_min_max(%N : index) {
388  affine.for %i = 0 to 42 {
389    affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
390      func.call @body2(%i, %j) : (index, index) -> ()
391    }
392  }
393  return
394}
395
396#map_7_values = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
397
398// Check that the "min" reduction sequence is emitted
399// correctly for an affine map with 7 results.
400
401// CHECK-LABEL: func @min_reduction_tree
402// CHECK-NEXT:   %[[c0:.*]] = arith.constant 0 : index
403// CHECK-NEXT:   %[[min:.+]] = arith.minsi %{{.*}}, %{{.*}} : index
404// CHECK-NEXT:   %[[min_0:.+]] = arith.minsi %[[min]], %{{.*}} : index
405// CHECK-NEXT:   %[[min_1:.+]] = arith.minsi %[[min_0]], %{{.*}} : index
406// CHECK-NEXT:   %[[min_2:.+]] = arith.minsi %[[min_1]], %{{.*}} : index
407// CHECK-NEXT:   %[[min_3:.+]] = arith.minsi %[[min_2]], %{{.*}} : index
408// CHECK-NEXT:   %[[min_4:.+]] = arith.minsi %[[min_3]], %{{.*}} : index
409// CHECK-NEXT:   %[[c1:.*]] = arith.constant 1 : index
410// CHECK-NEXT:   for %{{.*}} = %[[c0]] to %[[min_4]] step %[[c1]] {
411// CHECK-NEXT:     call @body(%{{.*}}) : (index) -> ()
412// CHECK-NEXT:   }
413// CHECK-NEXT:   return
414// CHECK-NEXT: }
415func.func @min_reduction_tree(%v1 : index, %v2 : index, %v3 : index, %v4 : index, %v5 : index, %v6 : index, %v7 : index) {
416  affine.for %i = 0 to min #map_7_values(%v1, %v2, %v3, %v4, %v5, %v6, %v7)[] {
417    func.call @body(%i) : (index) -> ()
418  }
419  return
420}
421
422/////////////////////////////////////////////////////////////////////
423
424#map0 = affine_map<() -> (0)>
425#map1 = affine_map<()[s0] -> (s0)>
426#map2 = affine_map<(d0) -> (d0)>
427#map3 = affine_map<(d0)[s0] -> (d0 + s0 + 1)>
428#map4 = affine_map<(d0,d1,d2,d3)[s0,s1,s2] -> (d0 + 2*d1 + 3*d2 + 4*d3 + 5*s0 + 6*s1 + 7*s2)>
429#map5 = affine_map<(d0,d1,d2) -> (d0,d1,d2)>
430#map6 = affine_map<(d0,d1,d2) -> (d0 + d1 + d2)>
431
432// CHECK-LABEL: func @affine_applies(
433func.func @affine_applies(%arg0 : index) {
434// CHECK: %[[c0:.*]] = arith.constant 0 : index
435  %zero = affine.apply #map0()
436
437// Identity maps are just discarded.
438// CHECK-NEXT: %[[c101:.*]] = arith.constant 101 : index
439  %101 = arith.constant 101 : index
440  %symbZero = affine.apply #map1()[%zero]
441// CHECK-NEXT: %[[c102:.*]] = arith.constant 102 : index
442  %102 = arith.constant 102 : index
443  %copy = affine.apply #map2(%zero)
444
445// CHECK-NEXT: %[[v0:.*]] = arith.addi %[[c0]], %[[c0]] : index
446// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index
447// CHECK-NEXT: %[[v1:.*]] = arith.addi %[[v0]], %[[c1]] : index
448  %one = affine.apply #map3(%symbZero)[%zero]
449
450// CHECK-NEXT: %[[c2:.*]] = arith.constant 2 : index
451// CHECK-NEXT: %[[v2:.*]] = arith.muli %arg0, %[[c2]] overflow<nsw> : index
452// CHECK-NEXT: %[[v3:.*]] = arith.addi %arg0, %[[v2]] : index
453// CHECK-NEXT: %[[c3:.*]] = arith.constant 3 : index
454// CHECK-NEXT: %[[v4:.*]] = arith.muli %arg0, %[[c3]] overflow<nsw> : index
455// CHECK-NEXT: %[[v5:.*]] = arith.addi %[[v3]], %[[v4]] : index
456// CHECK-NEXT: %[[c4:.*]] = arith.constant 4 : index
457// CHECK-NEXT: %[[v6:.*]] = arith.muli %arg0, %[[c4]] overflow<nsw> : index
458// CHECK-NEXT: %[[v7:.*]] = arith.addi %[[v5]], %[[v6]] : index
459// CHECK-NEXT: %[[c5:.*]] = arith.constant 5 : index
460// CHECK-NEXT: %[[v8:.*]] = arith.muli %arg0, %[[c5]] overflow<nsw> : index
461// CHECK-NEXT: %[[v9:.*]] = arith.addi %[[v7]], %[[v8]] : index
462// CHECK-NEXT: %[[c6:.*]] = arith.constant 6 : index
463// CHECK-NEXT: %[[v10:.*]] = arith.muli %arg0, %[[c6]] overflow<nsw> : index
464// CHECK-NEXT: %[[v11:.*]] = arith.addi %[[v9]], %[[v10]] : index
465// CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index
466// CHECK-NEXT: %[[v12:.*]] = arith.muli %arg0, %[[c7]] overflow<nsw> : index
467// CHECK-NEXT: %[[v13:.*]] = arith.addi %[[v11]], %[[v12]] : index
468  %four = affine.apply #map4(%arg0, %arg0, %arg0, %arg0)[%arg0, %arg0, %arg0]
469  return
470}
471
472// CHECK-LABEL: func @args_ret_affine_apply(
473func.func @args_ret_affine_apply(index, index) -> (index, index) {
474^bb0(%0 : index, %1 : index):
475// CHECK-NEXT: return %{{.*}}, %{{.*}} : index, index
476  %00 = affine.apply #map2 (%0)
477  %11 = affine.apply #map1 ()[%1]
478  return %00, %11 : index, index
479}
480
481//===---------------------------------------------------------------------===//
482// Test lowering of Euclidean (floor) division, ceil division and modulo
483// operation used in affine expressions.  In addition to testing the
484// operation-level output, check that the obtained results are correct by
485// applying constant folding transformation after affine lowering.
486//===---------------------------------------------------------------------===//
487
488// --------------------------------------------------------------------------//
489// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_mod
490// test in the "canonicalize.mlir" test to reflect the expected output of
491// affine.apply lowering.
492// --------------------------------------------------------------------------//
493
494#map_mod = affine_map<(i) -> (i mod 42)>
495// CHECK-LABEL: func @affine_apply_mod
496func.func @affine_apply_mod(%arg0 : index) -> (index) {
497// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index
498// CHECK-NEXT: %[[v0:.*]] = arith.remsi %{{.*}}, %[[c42]] : index
499// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index
500// CHECK-NEXT: %[[v1:.*]] = arith.cmpi slt, %[[v0]], %[[c0]] : index
501// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v0]], %[[c42]] : index
502// CHECK-NEXT: %[[v3:.*]] = arith.select %[[v1]], %[[v2]], %[[v0]] : index
503  %0 = affine.apply #map_mod (%arg0)
504  return %0 : index
505}
506#map_mod_dynamic_divisor = affine_map<(i)[s] -> (i mod s)>
507// CHECK-LABEL: func @affine_apply_mod_dynamic_divisor
508func.func @affine_apply_mod_dynamic_divisor(%arg0 : index, %arg1 : index) -> (index) {
509// CHECK-NEXT: %[[v0:.*]] = arith.remsi %{{.*}}, %arg1 : index
510// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index
511// CHECK-NEXT: %[[v1:.*]] = arith.cmpi slt, %[[v0]], %[[c0]] : index
512// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v0]], %arg1 : index
513// CHECK-NEXT: %[[v3:.*]] = arith.select %[[v1]], %[[v2]], %[[v0]] : index
514  %0 = affine.apply #map_mod_dynamic_divisor (%arg0)[%arg1]
515  return %0 : index
516}
517
518// --------------------------------------------------------------------------//
519// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_floordiv
520// test in the "canonicalize.mlir" test to reflect the expected output of
521// affine.apply lowering.
522// --------------------------------------------------------------------------//
523#map_floordiv = affine_map<(i) -> (i floordiv 42)>
524// CHECK-LABEL: func @affine_apply_floordiv
525func.func @affine_apply_floordiv(%arg0 : index) -> (index) {
526// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index
527// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index
528// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index
529// CHECK-NEXT: %[[v0:.*]] = arith.cmpi slt, %{{.*}}, %[[c0]] : index
530// CHECK-NEXT: %[[v1:.*]] = arith.subi %[[cm1]], %{{.*}} : index
531// CHECK-NEXT: %[[v2:.*]] = arith.select %[[v0]], %[[v1]], %{{.*}} : index
532// CHECK-NEXT: %[[v3:.*]] = arith.divsi %[[v2]], %[[c42]] : index
533// CHECK-NEXT: %[[v4:.*]] = arith.subi %[[cm1]], %[[v3]] : index
534// CHECK-NEXT: %[[v5:.*]] = arith.select %[[v0]], %[[v4]], %[[v3]] : index
535  %0 = affine.apply #map_floordiv (%arg0)
536  return %0 : index
537}
538#map_floordiv_dynamic_divisor = affine_map<(i)[s] -> (i floordiv s)>
539// CHECK-LABEL: func @affine_apply_floordiv_dynamic_divisor
540func.func @affine_apply_floordiv_dynamic_divisor(%arg0 : index, %arg1 : index) -> (index) {
541// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index
542// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index
543// CHECK-NEXT: %[[v0:.*]] = arith.cmpi slt, %{{.*}}, %[[c0]] : index
544// CHECK-NEXT: %[[v1:.*]] = arith.subi %[[cm1]], %{{.*}} : index
545// CHECK-NEXT: %[[v2:.*]] = arith.select %[[v0]], %[[v1]], %{{.*}} : index
546// CHECK-NEXT: %[[v3:.*]] = arith.divsi %[[v2]], %arg1 : index
547// CHECK-NEXT: %[[v4:.*]] = arith.subi %[[cm1]], %[[v3]] : index
548// CHECK-NEXT: %[[v5:.*]] = arith.select %[[v0]], %[[v4]], %[[v3]] : index
549  %0 = affine.apply #map_floordiv_dynamic_divisor (%arg0)[%arg1]
550  return %0 : index
551}
552
553// --------------------------------------------------------------------------//
554// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_ceildiv
555// test in the "canonicalize.mlir" test to reflect the expected output of
556// affine.apply lowering.
557// --------------------------------------------------------------------------//
558#map_ceildiv = affine_map<(i) -> (i ceildiv 42)>
559// CHECK-LABEL: func @affine_apply_ceildiv
560func.func @affine_apply_ceildiv(%arg0 : index) -> (index) {
561// CHECK-NEXT:  %[[c42:.*]] = arith.constant 42 : index
562// CHECK-NEXT:  %[[c0:.*]] = arith.constant 0 : index
563// CHECK-NEXT:  %[[c1:.*]] = arith.constant 1 : index
564// CHECK-NEXT:  %[[v0:.*]] = arith.cmpi sle, %{{.*}}, %[[c0]] : index
565// CHECK-NEXT:  %[[v1:.*]] = arith.subi %[[c0]], %{{.*}} : index
566// CHECK-NEXT:  %[[v2:.*]] = arith.subi %{{.*}}, %[[c1]] : index
567// CHECK-NEXT:  %[[v3:.*]] = arith.select %[[v0]], %[[v1]], %[[v2]] : index
568// CHECK-NEXT:  %[[v4:.*]] = arith.divsi %[[v3]], %[[c42]] : index
569// CHECK-NEXT:  %[[v5:.*]] = arith.subi %[[c0]], %[[v4]] : index
570// CHECK-NEXT:  %[[v6:.*]] = arith.addi %[[v4]], %[[c1]] : index
571// CHECK-NEXT:  %[[v7:.*]] = arith.select %[[v0]], %[[v5]], %[[v6]] : index
572  %0 = affine.apply #map_ceildiv (%arg0)
573  return %0 : index
574}
575#map_ceildiv_dynamic_divisor = affine_map<(i)[s] -> (i ceildiv s)>
576// CHECK-LABEL: func @affine_apply_ceildiv_dynamic_divisor
577func.func @affine_apply_ceildiv_dynamic_divisor(%arg0 : index, %arg1 : index) -> (index) {
578// CHECK-NEXT:  %[[c0:.*]] = arith.constant 0 : index
579// CHECK-NEXT:  %[[c1:.*]] = arith.constant 1 : index
580// CHECK-NEXT:  %[[v0:.*]] = arith.cmpi sle, %{{.*}}, %[[c0]] : index
581// CHECK-NEXT:  %[[v1:.*]] = arith.subi %[[c0]], %{{.*}} : index
582// CHECK-NEXT:  %[[v2:.*]] = arith.subi %{{.*}}, %[[c1]] : index
583// CHECK-NEXT:  %[[v3:.*]] = arith.select %[[v0]], %[[v1]], %[[v2]] : index
584// CHECK-NEXT:  %[[v4:.*]] = arith.divsi %[[v3]], %arg1 : index
585// CHECK-NEXT:  %[[v5:.*]] = arith.subi %[[c0]], %[[v4]] : index
586// CHECK-NEXT:  %[[v6:.*]] = arith.addi %[[v4]], %[[c1]] : index
587// CHECK-NEXT:  %[[v7:.*]] = arith.select %[[v0]], %[[v5]], %[[v6]] : index
588  %0 = affine.apply #map_ceildiv_dynamic_divisor (%arg0)[%arg1]
589  return %0 : index
590}
591
592// CHECK-LABEL: func @affine_load
593func.func @affine_load(%arg0 : index) {
594  %0 = memref.alloc() : memref<10xf32>
595  affine.for %i0 = 0 to 10 {
596    %1 = affine.load %0[%i0 + symbol(%arg0) + 7] : memref<10xf32>
597  }
598// CHECK:       %[[a:.*]] = arith.addi %{{.*}}, %{{.*}} : index
599// CHECK-NEXT:  %[[c7:.*]] = arith.constant 7 : index
600// CHECK-NEXT:  %[[b:.*]] = arith.addi %[[a]], %[[c7]] : index
601// CHECK-NEXT:  %{{.*}} = memref.load %[[v0:.*]][%[[b]]] : memref<10xf32>
602  return
603}
604
605// CHECK-LABEL: func @affine_store
606func.func @affine_store(%arg0 : index) {
607  %0 = memref.alloc() : memref<10xf32>
608  %1 = arith.constant 11.0 : f32
609  affine.for %i0 = 0 to 10 {
610    affine.store %1, %0[%i0 - symbol(%arg0) + 7] : memref<10xf32>
611  }
612// CHECK:       %[[cm1:.*]] = arith.constant -1 : index
613// CHECK-NEXT:  %[[a:.*]] = arith.muli %{{.*}}, %[[cm1]] overflow<nsw> : index
614// CHECK-NEXT:  %[[b:.*]] = arith.addi %{{.*}}, %[[a]] : index
615// CHECK-NEXT:  %[[c7:.*]] = arith.constant 7 : index
616// CHECK-NEXT:  %[[c:.*]] = arith.addi %[[b]], %[[c7]] : index
617// CHECK-NEXT:  store %{{.*}}, %{{.*}}[%[[c]]] : memref<10xf32>
618  return
619}
620
621// CHECK-LABEL: func @affine_load_store_zero_dim
622func.func @affine_load_store_zero_dim(%arg0 : memref<i32>, %arg1 : memref<i32>) {
623  %0 = affine.load %arg0[] : memref<i32>
624  affine.store %0, %arg1[] : memref<i32>
625// CHECK: %[[x:.*]] = memref.load %arg0[] : memref<i32>
626// CHECK: store %[[x]], %arg1[] : memref<i32>
627  return
628}
629
630// CHECK-LABEL: func @affine_prefetch
631func.func @affine_prefetch(%arg0 : index) {
632  %0 = memref.alloc() : memref<10xf32>
633  affine.for %i0 = 0 to 10 {
634    affine.prefetch %0[%i0 + symbol(%arg0) + 7], read, locality<3>, data : memref<10xf32>
635  }
636// CHECK:       %[[a:.*]] = arith.addi %{{.*}}, %{{.*}} : index
637// CHECK-NEXT:  %[[c7:.*]] = arith.constant 7 : index
638// CHECK-NEXT:  %[[b:.*]] = arith.addi %[[a]], %[[c7]] : index
639// CHECK-NEXT:  memref.prefetch %[[v0:.*]][%[[b]]], read, locality<3>, data : memref<10xf32>
640  return
641}
642
643// CHECK-LABEL: func @affine_dma_start
644func.func @affine_dma_start(%arg0 : index) {
645  %0 = memref.alloc() : memref<100xf32>
646  %1 = memref.alloc() : memref<100xf32, 2>
647  %2 = memref.alloc() : memref<1xi32>
648  %c0 = arith.constant 0 : index
649  %c64 = arith.constant 64 : index
650  affine.for %i0 = 0 to 10 {
651    affine.dma_start %0[%i0 + 7], %1[%arg0 + 11], %2[%c0], %c64
652        : memref<100xf32>, memref<100xf32, 2>, memref<1xi32>
653  }
654// CHECK:       %[[c7:.*]] = arith.constant 7 : index
655// CHECK-NEXT:  %[[a:.*]] = arith.addi %{{.*}}, %[[c7]] : index
656// CHECK-NEXT:  %[[c11:.*]] = arith.constant 11 : index
657// CHECK-NEXT:  %[[b:.*]] = arith.addi %{{.*}}, %[[c11]] : index
658// CHECK-NEXT:  dma_start %{{.*}}[%[[a]]], %{{.*}}[%[[b]]], %{{.*}}, %{{.*}}[%{{.*}}] : memref<100xf32>, memref<100xf32, 2>, memref<1xi32>
659  return
660}
661
662// CHECK-LABEL: func @affine_dma_wait
663func.func @affine_dma_wait(%arg0 : index) {
664  %2 = memref.alloc() : memref<1xi32>
665  %c64 = arith.constant 64 : index
666  affine.for %i0 = 0 to 10 {
667    affine.dma_wait %2[%i0 + %arg0 + 17], %c64 : memref<1xi32>
668  }
669// CHECK:       %[[a:.*]] = arith.addi %{{.*}}, %arg0 : index
670// CHECK-NEXT:  %[[c17:.*]] = arith.constant 17 : index
671// CHECK-NEXT:  %[[b:.*]] = arith.addi %[[a]], %[[c17]] : index
672// CHECK-NEXT:  dma_wait %{{.*}}[%[[b]]], %{{.*}} : memref<1xi32>
673  return
674}
675
676// CHECK-LABEL: func @affine_min
677// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
678func.func @affine_min(%arg0: index, %arg1: index) -> index{
679  // CHECK: %[[Cm1:.*]] = arith.constant -1
680  // CHECK: %[[neg1:.*]] = arith.muli %[[ARG1]], %[[Cm1:.*]]
681  // CHECK: %[[first:.*]] = arith.addi %[[ARG0]], %[[neg1]]
682  // CHECK: %[[Cm2:.*]] = arith.constant -1
683  // CHECK: %[[neg2:.*]] = arith.muli %[[ARG0]], %[[Cm2:.*]]
684  // CHECK: %[[second:.*]] = arith.addi %[[ARG1]], %[[neg2]]
685  // CHECK: arith.minsi %[[first]], %[[second]]
686  %0 = affine.min affine_map<(d0,d1) -> (d0 - d1, d1 - d0)>(%arg0, %arg1)
687  return %0 : index
688}
689
690// CHECK-LABEL: func @affine_max
691// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
692func.func @affine_max(%arg0: index, %arg1: index) -> index{
693  // CHECK: %[[Cm1:.*]] = arith.constant -1
694  // CHECK: %[[neg1:.*]] = arith.muli %[[ARG1]], %[[Cm1:.*]]
695  // CHECK: %[[first:.*]] = arith.addi %[[ARG0]], %[[neg1]]
696  // CHECK: %[[Cm2:.*]] = arith.constant -1
697  // CHECK: %[[neg2:.*]] = arith.muli %[[ARG0]], %[[Cm2:.*]]
698  // CHECK: %[[second:.*]] = arith.addi %[[ARG1]], %[[neg2]]
699  // CHECK: arith.maxsi %[[first]], %[[second]]
700  %0 = affine.max affine_map<(d0,d1) -> (d0 - d1, d1 - d0)>(%arg0, %arg1)
701  return %0 : index
702}
703
704// CHECK-LABEL: func @affine_parallel(
705// CHECK-SAME: %[[ARG0:.*]]: memref<100x100xf32>, %[[ARG1:.*]]: memref<100x100xf32>) {
706func.func @affine_parallel(%o: memref<100x100xf32>, %a: memref<100x100xf32>) {
707  affine.parallel (%i, %j) = (0, 0) to (100, 100) {
708  }
709  return
710}
711
712// CHECK-DAG:    %[[C100:.*]] = arith.constant 100
713// CHECK-DAG:    %[[C100_1:.*]] = arith.constant 100
714// CHECK-DAG:    %[[C0:.*]] = arith.constant 0
715// CHECK-DAG:    %[[C0_1:.*]] = arith.constant 0
716// CHECK-DAG:    %[[C1:.*]] = arith.constant 1
717// CHECK-DAG:    %[[C1_1:.*]] = arith.constant 1
718// CHECK-DAG:    scf.parallel (%arg2, %arg3) = (%[[C0]], %[[C0_1]]) to (%[[C100]], %[[C100_1]]) step (%[[C1]], %[[C1_1]]) {
719
720// CHECK-LABEL: func @affine_parallel_tiled(
721// CHECK-SAME: %[[ARG0:.*]]: memref<100x100xf32>, %[[ARG1:.*]]: memref<100x100xf32>, %[[ARG2:.*]]: memref<100x100xf32>) {
722func.func @affine_parallel_tiled(%o: memref<100x100xf32>, %a: memref<100x100xf32>, %b: memref<100x100xf32>) {
723  affine.parallel (%i0, %j0, %k0) = (0, 0, 0) to (100, 100, 100) step (10, 10, 10) {
724    affine.parallel (%i1, %j1, %k1) = (%i0, %j0, %k0) to (%i0 + 10, %j0 + 10, %k0 + 10) {
725      %0 = affine.load %a[%i1, %k1] : memref<100x100xf32>
726      %1 = affine.load %b[%k1, %j1] : memref<100x100xf32>
727      %2 = arith.mulf %0, %1 : f32
728    }
729  }
730  return
731}
732
733// CHECK-DAG:     %[[C100:.*]] = arith.constant 100
734// CHECK-DAG:     %[[C100_0:.*]] = arith.constant 100
735// CHECK-DAG:     %[[C100_1:.*]] = arith.constant 100
736// CHECK-DAG:     %[[C0:.*]] = arith.constant 0
737// CHECK-DAG:     %[[C0_2:.*]] = arith.constant 0
738// CHECK-DAG:     %[[C0_3:.*]] = arith.constant 0
739// CHECK-DAG:     %[[C10:.*]] = arith.constant 10
740// CHECK-DAG:     %[[C10_4:.*]] = arith.constant 10
741// CHECK-DAG:     %[[C10_5:.*]] = arith.constant 10
742// CHECK:         scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[C0]], %[[C0_2]], %[[C0_3]]) to (%[[C100]], %[[C100_0]], %[[C100_1]]) step (%[[C10]], %[[C10_4]], %[[C10_5]]) {
743// CHECK-DAG:       %[[C10_6:.*]] = arith.constant 10
744// CHECK-DAG:       %[[A0:.*]] = arith.addi %[[arg3]], %[[C10_6]]
745// CHECK-DAG:       %[[C10_7:.*]] = arith.constant 10
746// CHECK-DAG:       %[[A1:.*]] = arith.addi %[[arg4]], %[[C10_7]]
747// CHECK-DAG:       %[[C10_8:.*]] = arith.constant 10
748// CHECK-DAG:       %[[A2:.*]] = arith.addi %[[arg5]], %[[C10_8]]
749// CHECK-DAG:       %[[C1:.*]] = arith.constant 1
750// CHECK-DAG:       %[[C1_9:.*]] = arith.constant 1
751// CHECK-DAG:       %[[C1_10:.*]] = arith.constant 1
752// CHECK:           scf.parallel (%[[arg6:.*]], %[[arg7:.*]], %[[arg8:.*]]) = (%[[arg3]], %[[arg4]], %[[arg5]]) to (%[[A0]], %[[A1]], %[[A2]]) step (%[[C1]], %[[C1_9]], %[[C1_10]]) {
753// CHECK:             %[[A3:.*]] = memref.load %[[ARG1]][%[[arg6]], %[[arg8]]] : memref<100x100xf32>
754// CHECK:             %[[A4:.*]] = memref.load %[[ARG2]][%[[arg8]], %[[arg7]]] : memref<100x100xf32>
755// CHECK:             arith.mulf %[[A3]], %[[A4]] : f32
756// CHECK:             scf.reduce
757
758/////////////////////////////////////////////////////////////////////
759
760func.func @affine_parallel_simple(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>) -> (memref<3x3xf32>) {
761  %O = memref.alloc() : memref<3x3xf32>
762  affine.parallel (%kx, %ky) = (0, 0) to (2, 2) {
763      %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf32>
764      %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf32>
765      %3 = arith.mulf %1, %2 : f32
766      affine.store %3, %O[%kx, %ky] : memref<3x3xf32>
767  }
768  return %O : memref<3x3xf32>
769}
770// CHECK-LABEL: func @affine_parallel_simple
771// CHECK:         %[[LOWER_1:.*]] = arith.constant 0 : index
772// CHECK-NEXT:    %[[UPPER_1:.*]] = arith.constant 2 : index
773// CHECK-NEXT:    %[[LOWER_2:.*]] = arith.constant 0 : index
774// CHECK-NEXT:    %[[UPPER_2:.*]] = arith.constant 2 : index
775// CHECK-NEXT:    %[[STEP_1:.*]] = arith.constant 1 : index
776// CHECK-NEXT:    %[[STEP_2:.*]] = arith.constant 1 : index
777// CHECK-NEXT:    scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) {
778// CHECK-NEXT:      %[[VAL_1:.*]] = memref.load
779// CHECK-NEXT:      %[[VAL_2:.*]] = memref.load
780// CHECK-NEXT:      %[[PRODUCT:.*]] = arith.mulf
781// CHECK-NEXT:      store
782// CHECK-NEXT:      scf.reduce
783// CHECK-NEXT:    }
784// CHECK-NEXT:    return
785// CHECK-NEXT:  }
786
787/////////////////////////////////////////////////////////////////////
788
789func.func @affine_parallel_simple_dynamic_bounds(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
790  %c_0 = arith.constant 0 : index
791  %output_dim = memref.dim %arg0, %c_0 : memref<?x?xf32>
792  affine.parallel (%kx, %ky) = (%c_0, %c_0) to (%output_dim, %output_dim) {
793      %1 = affine.load %arg0[%kx, %ky] : memref<?x?xf32>
794      %2 = affine.load %arg1[%kx, %ky] : memref<?x?xf32>
795      %3 = arith.mulf %1, %2 : f32
796      affine.store %3, %arg2[%kx, %ky] : memref<?x?xf32>
797  }
798  return
799}
800// CHECK-LABEL: func @affine_parallel_simple_dynamic_bounds
801// CHECK-SAME:  %[[ARG_0:.*]]: memref<?x?xf32>, %[[ARG_1:.*]]: memref<?x?xf32>, %[[ARG_2:.*]]: memref<?x?xf32>
802// CHECK:         %[[DIM_INDEX:.*]] = arith.constant 0 : index
803// CHECK-NEXT:    %[[UPPER:.*]] = memref.dim %[[ARG_0]], %[[DIM_INDEX]] : memref<?x?xf32>
804// CHECK-NEXT:    %[[LOWER_1:.*]] = arith.constant 0 : index
805// CHECK-NEXT:    %[[LOWER_2:.*]] = arith.constant 0 : index
806// CHECK-NEXT:    %[[STEP_1:.*]] = arith.constant 1 : index
807// CHECK-NEXT:    %[[STEP_2:.*]] = arith.constant 1 : index
808// CHECK-NEXT:    scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER]], %[[UPPER]]) step (%[[STEP_1]], %[[STEP_2]]) {
809// CHECK-NEXT:      %[[VAL_1:.*]] = memref.load
810// CHECK-NEXT:      %[[VAL_2:.*]] = memref.load
811// CHECK-NEXT:      %[[PRODUCT:.*]] = arith.mulf
812// CHECK-NEXT:      store
813// CHECK-NEXT:      scf.reduce
814// CHECK-NEXT:    }
815// CHECK-NEXT:    return
816// CHECK-NEXT:  }
817
818/////////////////////////////////////////////////////////////////////
819
820func.func @affine_parallel_with_reductions(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>) -> (f32, f32) {
821  %0:2 = affine.parallel (%kx, %ky) = (0, 0) to (2, 2) reduce ("addf", "mulf") -> (f32, f32) {
822            %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf32>
823            %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf32>
824            %3 = arith.mulf %1, %2 : f32
825            %4 = arith.addf %1, %2 : f32
826            affine.yield %3, %4 : f32, f32
827          }
828  return %0#0, %0#1 : f32, f32
829}
830// CHECK-LABEL: func @affine_parallel_with_reductions
831// CHECK:         %[[LOWER_1:.*]] = arith.constant 0 : index
832// CHECK-NEXT:    %[[UPPER_1:.*]] = arith.constant 2 : index
833// CHECK-NEXT:    %[[LOWER_2:.*]] = arith.constant 0 : index
834// CHECK-NEXT:    %[[UPPER_2:.*]] = arith.constant 2 : index
835// CHECK-NEXT:    %[[STEP_1:.*]] = arith.constant 1 : index
836// CHECK-NEXT:    %[[STEP_2:.*]] = arith.constant 1 : index
837// CHECK-NEXT:    %[[INIT_1:.*]] = arith.constant 0.000000e+00 : f32
838// CHECK-NEXT:    %[[INIT_2:.*]] = arith.constant 1.000000e+00 : f32
839// CHECK-NEXT:    %[[RES:.*]] = scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) init (%[[INIT_1]], %[[INIT_2]]) -> (f32, f32) {
840// CHECK-NEXT:      %[[VAL_1:.*]] = memref.load
841// CHECK-NEXT:      %[[VAL_2:.*]] = memref.load
842// CHECK-NEXT:      %[[PRODUCT:.*]] = arith.mulf
843// CHECK-NEXT:      %[[SUM:.*]] = arith.addf
844// CHECK-NEXT:      scf.reduce(%[[PRODUCT]], %[[SUM]] : f32, f32) {
845// CHECK-NEXT:      ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32):
846// CHECK-NEXT:        %[[RES:.*]] = arith.addf
847// CHECK-NEXT:        scf.reduce.return %[[RES]] : f32
848// CHECK-NEXT:      }, {
849// CHECK-NEXT:      ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32):
850// CHECK-NEXT:        %[[RES:.*]] = arith.mulf
851// CHECK-NEXT:        scf.reduce.return %[[RES]] : f32
852// CHECK-NEXT:      }
853// CHECK-NEXT:    }
854// CHECK-NEXT:    return
855// CHECK-NEXT:  }
856
857/////////////////////////////////////////////////////////////////////
858
859func.func @affine_parallel_with_reductions_f64(%arg0: memref<3x3xf64>, %arg1: memref<3x3xf64>) -> (f64, f64) {
860  %0:2 = affine.parallel (%kx, %ky) = (0, 0) to (2, 2) reduce ("addf", "mulf") -> (f64, f64) {
861            %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf64>
862            %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf64>
863            %3 = arith.mulf %1, %2 : f64
864            %4 = arith.addf %1, %2 : f64
865            affine.yield %3, %4 : f64, f64
866          }
867  return %0#0, %0#1 : f64, f64
868}
869// CHECK-LABEL: @affine_parallel_with_reductions_f64
870// CHECK:  %[[LOWER_1:.*]] = arith.constant 0 : index
871// CHECK:  %[[UPPER_1:.*]] = arith.constant 2 : index
872// CHECK:  %[[LOWER_2:.*]] = arith.constant 0 : index
873// CHECK:  %[[UPPER_2:.*]] = arith.constant 2 : index
874// CHECK:  %[[STEP_1:.*]] = arith.constant 1 : index
875// CHECK:  %[[STEP_2:.*]] = arith.constant 1 : index
876// CHECK:  %[[INIT_1:.*]] = arith.constant 0.000000e+00 : f64
877// CHECK:  %[[INIT_2:.*]] = arith.constant 1.000000e+00 : f64
878// CHECK:  %[[RES:.*]] = scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) init (%[[INIT_1]], %[[INIT_2]]) -> (f64, f64) {
879// CHECK:    %[[VAL_1:.*]] = memref.load
880// CHECK:    %[[VAL_2:.*]] = memref.load
881// CHECK:    %[[PRODUCT:.*]] = arith.mulf
882// CHECK:    %[[SUM:.*]] = arith.addf
883// CHECK:    scf.reduce(%[[PRODUCT]], %[[SUM]] : f64, f64) {
884// CHECK:    ^bb0(%[[LHS:.*]]: f64, %[[RHS:.*]]: f64):
885// CHECK:      %[[RES:.*]] = arith.addf
886// CHECK:      scf.reduce.return %[[RES]] : f64
887// CHECK:    }, {
888// CHECK:    ^bb0(%[[LHS:.*]]: f64, %[[RHS:.*]]: f64):
889// CHECK:      %[[RES:.*]] = arith.mulf
890// CHECK:      scf.reduce.return %[[RES]] : f64
891// CHECK:    }
892// CHECK:  }
893
894/////////////////////////////////////////////////////////////////////
895
896func.func @affine_parallel_with_reductions_i64(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>) -> (i64, i64) {
897  %0:2 = affine.parallel (%kx, %ky) = (0, 0) to (2, 2) reduce ("addi", "muli") -> (i64, i64) {
898            %1 = affine.load %arg0[%kx, %ky] : memref<3x3xi64>
899            %2 = affine.load %arg1[%kx, %ky] : memref<3x3xi64>
900            %3 = arith.muli %1, %2 : i64
901            %4 = arith.addi %1, %2 : i64
902            affine.yield %3, %4 : i64, i64
903          }
904  return %0#0, %0#1 : i64, i64
905}
906// CHECK-LABEL: @affine_parallel_with_reductions_i64
907// CHECK:  %[[LOWER_1:.*]] = arith.constant 0 : index
908// CHECK:  %[[UPPER_1:.*]] = arith.constant 2 : index
909// CHECK:  %[[LOWER_2:.*]] = arith.constant 0 : index
910// CHECK:  %[[UPPER_2:.*]] = arith.constant 2 : index
911// CHECK:  %[[STEP_1:.*]] = arith.constant 1 : index
912// CHECK:  %[[STEP_2:.*]] = arith.constant 1 : index
913// CHECK:  %[[INIT_1:.*]] = arith.constant 0 : i64
914// CHECK:  %[[INIT_2:.*]] = arith.constant 1 : i64
915// CHECK:  %[[RES:.*]] = scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) init (%[[INIT_1]], %[[INIT_2]]) -> (i64, i64) {
916// CHECK:    %[[VAL_1:.*]] = memref.load
917// CHECK:    %[[VAL_2:.*]] = memref.load
918// CHECK:    %[[PRODUCT:.*]] = arith.muli
919// CHECK:    %[[SUM:.*]] = arith.addi
920// CHECK:    scf.reduce(%[[PRODUCT]], %[[SUM]] : i64, i64) {
921// CHECK:    ^bb0(%[[LHS:.*]]: i64, %[[RHS:.*]]: i64):
922// CHECK:      %[[RES:.*]] = arith.addi
923// CHECK:      scf.reduce.return %[[RES]] : i64
924// CHECK:    }, {
925// CHECK:    ^bb0(%[[LHS:.*]]: i64, %[[RHS:.*]]: i64):
926// CHECK:      %[[RES:.*]] = arith.muli
927// CHECK:      scf.reduce.return %[[RES]] : i64
928// CHECK:    }
929// CHECK:  }
930