xref: /llvm-project/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir (revision 132de3a71f581dcb008a124d52c83ccca8158d98)
1// RUN: mlir-opt %s -affine-loop-invariant-code-motion -split-input-file | FileCheck %s
2
3func.func @nested_loops_both_having_invariant_code() {
4  %m = memref.alloc() : memref<10xf32>
5  %cf7 = arith.constant 7.0 : f32
6  %cf8 = arith.constant 8.0 : f32
7
8  affine.for %arg0 = 0 to 10 {
9    %v0 = arith.addf %cf7, %cf8 : f32
10    affine.for %arg1 = 0 to 10 {
11      affine.store %v0, %m[%arg0] : memref<10xf32>
12    }
13  }
14
15  // CHECK: memref.alloc() : memref<10xf32>
16  // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32
17  // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32
18  // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32
19  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
20  // CHECK-NEXT: }
21  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
22  // CHECK-NEXT: affine.store
23
24  return
25}
26
27// -----
28
29// The store-load forwarding can see through affine apply's since it relies on
30// dependence information.
31// CHECK-LABEL: func @store_affine_apply
32func.func @store_affine_apply() -> memref<10xf32> {
33  %cf7 = arith.constant 7.0 : f32
34  %m = memref.alloc() : memref<10xf32>
35  affine.for %arg0 = 0 to 10 {
36      %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0)
37      affine.store %cf7, %m[%t0] : memref<10xf32>
38  }
39  return %m : memref<10xf32>
40// CHECK:       %[[cst:.*]] = arith.constant 7.000000e+00 : f32
41// CHECK-NEXT:  %[[VAR_0:.*]] = memref.alloc() : memref<10xf32>
42// CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
43// CHECK-NEXT:      affine.apply
44// CHECK-NEXT:      affine.store %[[cst]]
45// CHECK-NEXT:  }
46// CHECK-NEXT:  return %[[VAR_0]]  : memref<10xf32>
47}
48
49// -----
50
51func.func @nested_loops_code_invariant_to_both() {
52  %m = memref.alloc() : memref<10xf32>
53  %cf7 = arith.constant 7.0 : f32
54  %cf8 = arith.constant 8.0 : f32
55
56  affine.for %arg0 = 0 to 10 {
57    affine.for %arg1 = 0 to 10 {
58      %v0 = arith.addf %cf7, %cf8 : f32
59    }
60  }
61
62  // CHECK: memref.alloc() : memref<10xf32>
63  // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32
64  // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32
65  // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32
66
67  return
68}
69
70// -----
71
72// CHECK-LABEL: func @nested_loops_inner_loops_invariant_to_outermost_loop
73func.func @nested_loops_inner_loops_invariant_to_outermost_loop(%m : memref<10xindex>) {
74  affine.for %arg0 = 0 to 20 {
75    affine.for %arg1 = 0 to 30 {
76      %v0 = affine.for %arg2 = 0 to 10 iter_args (%prevAccum = %arg1) -> index {
77        %v1 = affine.load %m[%arg2] : memref<10xindex>
78        %newAccum = arith.addi %prevAccum, %v1 : index
79        affine.yield %newAccum : index
80      }
81    }
82  }
83
84  // CHECK:      affine.for %{{.*}} = 0 to 30 {
85  // CHECK-NEXT:   %{{.*}}  = affine.for %{{.*}}  = 0 to 10 iter_args(%{{.*}} = %{{.*}}) -> (index) {
86  // CHECK-NEXT:     %{{.*}}  = affine.load %{{.*}}[%{{.*}}  : memref<10xindex>
87  // CHECK-NEXT:     %{{.*}}  = arith.addi %{{.*}}, %{{.*}} : index
88  // CHECK-NEXT:     affine.yield %{{.*}} : index
89  // CHECK-NEXT:   }
90  // CHECK-NEXT: }
91  // CHECK-NEXT: affine.for %{{.*}} = 0 to 20 {
92  // CHECK-NEXT: }
93
94  return
95}
96
97// -----
98
99func.func @single_loop_nothing_invariant() {
100  %m1 = memref.alloc() : memref<10xf32>
101  %m2 = memref.alloc() : memref<11xf32>
102  affine.for %arg0 = 0 to 10 {
103    %v0 = affine.load %m1[%arg0] : memref<10xf32>
104    %v1 = affine.load %m2[%arg0] : memref<11xf32>
105    %v2 = arith.addf %v0, %v1 : f32
106    affine.store %v2, %m1[%arg0] : memref<10xf32>
107  }
108
109  // CHECK: memref.alloc() : memref<10xf32>
110  // CHECK-NEXT: memref.alloc() : memref<11xf32>
111  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
112  // CHECK-NEXT: affine.load %{{.*}} : memref<10xf32>
113  // CHECK-NEXT: affine.load %{{.*}} : memref<11xf32>
114  // CHECK-NEXT: arith.addf
115  // CHECK-NEXT: affine.store %{{.*}} : memref<10xf32>
116
117  return
118}
119
120// -----
121
122func.func @invariant_code_inside_affine_if() {
123  %m = memref.alloc() : memref<10xf32>
124  %cf8 = arith.constant 8.0 : f32
125
126  affine.for %arg0 = 0 to 10 {
127    %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0)
128    affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) {
129        %cf9 = arith.addf %cf8, %cf8 : f32
130        affine.store %cf9, %m[%arg0] : memref<10xf32>
131
132    }
133  }
134
135  // CHECK: memref.alloc() : memref<10xf32>
136  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
137  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
138  // CHECK-NEXT: affine.apply #map{{[0-9]*}}(%arg0)
139  // CHECK-NEXT: affine.if
140  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
141  // CHECK-NEXT: affine.store
142  // CHECK-NEXT: }
143
144
145  return
146}
147
148// -----
149
150func.func @dependent_stores() {
151  %m = memref.alloc() : memref<10xf32>
152  %cf7 = arith.constant 7.0 : f32
153  %cf8 = arith.constant 8.0 : f32
154
155  affine.for %arg0 = 0 to 10 {
156    %v0 = arith.addf %cf7, %cf8 : f32
157    affine.for %arg1 = 0 to 10 {
158      %v1 = arith.mulf %cf7, %cf7 : f32
159      affine.store %v1, %m[%arg1] : memref<10xf32>
160      affine.store %v0, %m[%arg0] : memref<10xf32>
161    }
162  }
163
164  // CHECK: memref.alloc() : memref<10xf32>
165  // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32
166  // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32
167  // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32
168  // CHECK-NEXT: %[[mul:.*]] = arith.mulf %[[cst]], %[[cst]] : f32
169  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
170
171  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
172  // CHECK-NEXT:   affine.store %[[mul]]
173  // CHECK-NEXT:   affine.store
174
175  return
176}
177
178// -----
179
180func.func @independent_stores() {
181  %m = memref.alloc() : memref<10xf32>
182  %cf7 = arith.constant 7.0 : f32
183  %cf8 = arith.constant 8.0 : f32
184
185  affine.for %arg0 = 0 to 10 {
186    %v0 = arith.addf %cf7, %cf8 : f32
187    affine.for %arg1 = 0 to 10 {
188      %v1 = arith.mulf %cf7, %cf7 : f32
189      affine.store %v0, %m[%arg0] : memref<10xf32>
190      affine.store %v1, %m[%arg1] : memref<10xf32>
191    }
192  }
193
194  // CHECK: memref.alloc() : memref<10xf32>
195  // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32
196  // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32
197  // CHECK-NEXT: %[[add:.*]] = arith.addf %[[cst]], %[[cst_0]] : f32
198  // CHECK-NEXT: %[[mul:.*]] = arith.mulf %[[cst]], %[[cst]] : f32
199  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
200  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 10 {
201  // CHECK-NEXT:     affine.store %[[add]]
202  // CHECK-NEXT:     affine.store %[[mul]]
203  // CHECK-NEXT:    }
204
205  return
206}
207
208// -----
209
210func.func @load_dependent_store() {
211  %m = memref.alloc() : memref<10xf32>
212  %cf7 = arith.constant 7.0 : f32
213  %cf8 = arith.constant 8.0 : f32
214
215  affine.for %arg0 = 0 to 10 {
216    %v0 = arith.addf %cf7, %cf8 : f32
217    affine.for %arg1 = 0 to 10 {
218      %v1 = arith.addf %cf7, %cf7 : f32
219      affine.store %v0, %m[%arg1] : memref<10xf32>
220      %v2 = affine.load %m[%arg0] : memref<10xf32>
221    }
222  }
223
224  // CHECK: memref.alloc() : memref<10xf32>
225  // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32
226  // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32
227  // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32
228  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
229  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
230  // CHECK-NEXT: affine.for
231  // CHECK-NEXT:   affine.store
232  // CHECK-NEXT:   affine.load
233
234  return
235}
236
237// -----
238
239func.func @load_after_load() {
240  %m = memref.alloc() : memref<10xf32>
241  %cf7 = arith.constant 7.0 : f32
242  %cf8 = arith.constant 8.0 : f32
243
244  affine.for %arg0 = 0 to 10 {
245    %v0 = arith.addf %cf7, %cf8 : f32
246    affine.for %arg1 = 0 to 10 {
247      %v1 = arith.addf %cf7, %cf7 : f32
248      %v3 = affine.load %m[%arg1] : memref<10xf32>
249      %v2 = affine.load %m[%arg0] : memref<10xf32>
250    }
251  }
252
253  // CHECK: memref.alloc() : memref<10xf32>
254  // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32
255  // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32
256  // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32
257  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
258  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
259  // CHECK-NEXT: affine.load
260  // CHECK-NEXT: }
261  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
262  // CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
263
264  return
265}
266
267// -----
268
269func.func @invariant_affine_if() {
270  %m = memref.alloc() : memref<10xf32>
271  %cf8 = arith.constant 8.0 : f32
272  affine.for %arg0 = 0 to 10 {
273    affine.for %arg1 = 0 to 10 {
274      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
275          %cf9 = arith.addf %cf8, %cf8 : f32
276          affine.store %cf9, %m[%arg0] : memref<10xf32>
277
278      }
279    }
280  }
281
282  // CHECK: memref.alloc() : memref<10xf32>
283  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
284  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
285  // CHECK-NEXT: }
286  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
287  // CHECK-NEXT: affine.if
288  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
289  // CHECK-NEXT: affine.store
290  // CHECK-NEXT: }
291
292
293  return
294}
295
296// -----
297
298func.func @invariant_affine_if2() {
299  %m = memref.alloc() : memref<10xf32>
300  %cf8 = arith.constant 8.0 : f32
301  affine.for %arg0 = 0 to 10 {
302    affine.for %arg1 = 0 to 10 {
303      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
304          %cf9 = arith.addf %cf8, %cf8 : f32
305          affine.store %cf9, %m[%arg1] : memref<10xf32>
306
307      }
308    }
309  }
310
311  // CHECK: memref.alloc() : memref<10xf32>
312  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
313  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
314  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
315  // CHECK-NEXT: affine.if
316  // CHECK-NEXT:   arith.addf %[[cst]], %[[cst]] : f32
317  // CHECK-NEXT:   affine.store
318  // CHECK-NEXT: }
319  // CHECK-NEXT: }
320
321
322  return
323}
324
325// -----
326
327func.func @invariant_affine_nested_if() {
328  %m = memref.alloc() : memref<10xf32>
329  %cf8 = arith.constant 8.0 : f32
330  affine.for %arg0 = 0 to 10 {
331    affine.for %arg1 = 0 to 10 {
332      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
333          %cf9 = arith.addf %cf8, %cf8 : f32
334          affine.store %cf9, %m[%arg0] : memref<10xf32>
335          affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
336            affine.store %cf9, %m[%arg1] : memref<10xf32>
337          }
338      }
339    }
340  }
341
342  // CHECK: memref.alloc() : memref<10xf32>
343  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
344  // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
345  // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 {
346  // CHECK-NEXT: affine.if
347  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
348  // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
349  // CHECK-NEXT: affine.if
350  // CHECK-NEXT: affine.store {{.*}}[%[[arg1]]] : memref<10xf32>
351  // CHECK-NEXT: }
352  // CHECK-NEXT: }
353  // CHECK-NEXT: }
354
355
356  return
357}
358
359// -----
360
361func.func @invariant_affine_nested_if_else() {
362  %m = memref.alloc() : memref<10xf32>
363  %cf8 = arith.constant 8.0 : f32
364  affine.for %arg0 = 0 to 10 {
365    affine.for %arg1 = 0 to 10 {
366      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
367          %cf9 = arith.addf %cf8, %cf8 : f32
368          affine.store %cf9, %m[%arg0] : memref<10xf32>
369          affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
370            affine.store %cf9, %m[%arg0] : memref<10xf32>
371          } else {
372            affine.store %cf9, %m[%arg1] : memref<10xf32>
373          }
374      }
375    }
376  }
377
378  // CHECK: memref.alloc() : memref<10xf32>
379  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
380  // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
381  // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 {
382  // CHECK-NEXT: affine.if
383  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
384  // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
385  // CHECK-NEXT: affine.if
386  // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
387  // CHECK-NEXT: } else {
388  // CHECK-NEXT: affine.store {{.*}}[%[[arg1]]] : memref<10xf32>
389  // CHECK-NEXT: }
390  // CHECK-NEXT: }
391  // CHECK-NEXT: }
392
393
394  return
395}
396
397// -----
398
399func.func @invariant_affine_nested_if_else2() {
400  %m = memref.alloc() : memref<10xf32>
401  %m2 = memref.alloc() : memref<10xf32>
402  %cf8 = arith.constant 8.0 : f32
403  affine.for %arg0 = 0 to 10 {
404    affine.for %arg1 = 0 to 10 {
405      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
406          %cf9 = arith.addf %cf8, %cf8 : f32
407          %tload1 = affine.load %m[%arg0] : memref<10xf32>
408          affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
409            affine.store %cf9, %m2[%arg0] : memref<10xf32>
410          } else {
411            %tload2 = affine.load %m[%arg0] : memref<10xf32>
412          }
413      }
414    }
415  }
416
417  // CHECK: memref.alloc() : memref<10xf32>
418  // CHECK-NEXT: memref.alloc() : memref<10xf32>
419  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
420  // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
421  // CHECK-NEXT: }
422  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
423  // CHECK-NEXT: affine.if
424  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
425  // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32>
426  // CHECK-NEXT: affine.if
427  // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
428  // CHECK-NEXT: } else {
429  // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32>
430  // CHECK-NEXT: }
431  // CHECK-NEXT: }
432
433
434  return
435}
436
437// -----
438
439func.func @invariant_affine_nested_if2() {
440  %m = memref.alloc() : memref<10xf32>
441  %cf8 = arith.constant 8.0 : f32
442  affine.for %arg0 = 0 to 10 {
443    affine.for %arg1 = 0 to 10 {
444      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
445          %cf9 = arith.addf %cf8, %cf8 : f32
446          %v1 = affine.load %m[%arg0] : memref<10xf32>
447          affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
448            %v2 = affine.load %m[%arg0] : memref<10xf32>
449          }
450      }
451    }
452  }
453
454  // CHECK: memref.alloc() : memref<10xf32>
455  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
456  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
457  // CHECK-NEXT: }
458  // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
459  // CHECK-NEXT: affine.if
460  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
461  // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32>
462  // CHECK-NEXT: affine.if
463  // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32>
464  // CHECK-NEXT: }
465  // CHECK-NEXT: }
466
467
468  return
469}
470
471// -----
472
473func.func @invariant_affine_for_inside_affine_if() {
474  %m = memref.alloc() : memref<10xf32>
475  %cf8 = arith.constant 8.0 : f32
476  affine.for %arg0 = 0 to 10 {
477    affine.for %arg1 = 0 to 10 {
478      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
479          %cf9 = arith.addf %cf8, %cf8 : f32
480          affine.store %cf9, %m[%arg0] : memref<10xf32>
481          affine.for %arg2 = 0 to 10 {
482            affine.store %cf9, %m[%arg2] : memref<10xf32>
483          }
484      }
485    }
486  }
487
488  // CHECK: memref.alloc() : memref<10xf32>
489  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
490  // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
491  // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 {
492  // CHECK-NEXT: affine.if
493  // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
494  // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
495  // CHECK-NEXT: affine.for %[[arg2:.*]] = 0 to 10 {
496  // CHECK-NEXT: affine.store {{.*}}[%[[arg2]]] : memref<10xf32>
497  // CHECK-NEXT: }
498  // CHECK-NEXT: }
499  // CHECK-NEXT: }
500
501
502  return
503}
504
505// -----
506
507func.func @invariant_constant_and_load() {
508  %m = memref.alloc() : memref<100xf32>
509  %m2 = memref.alloc() : memref<100xf32>
510  affine.for %arg0 = 0 to 5 {
511    %c0 = arith.constant 0 : index
512    %v = affine.load %m2[%c0] : memref<100xf32>
513    affine.store %v, %m[%arg0] : memref<100xf32>
514  }
515
516  // CHECK: memref.alloc() : memref<100xf32>
517  // CHECK-NEXT: memref.alloc() : memref<100xf32>
518  // CHECK-NEXT: arith.constant 0 : index
519  // CHECK-NEXT: affine.load
520  // CHECK-NEXT: affine.for %{{.*}} = 0 to 5 {
521  // CHECK-NEXT:  affine.store
522
523
524  return
525}
526
527// -----
528
529func.func @nested_load_store_same_memref() {
530  %m = memref.alloc() : memref<10xf32>
531  %cst = arith.constant 8.0 : f32
532  %c0 = arith.constant 0 : index
533   affine.for %arg0 = 0 to 10 {
534    %v0 = affine.load %m[%c0] : memref<10xf32>
535    affine.for %arg1 = 0 to 10 {
536      affine.store %cst, %m[%arg1] : memref<10xf32>
537    }
538  }
539
540  // CHECK: memref.alloc() : memref<10xf32>
541  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
542  // CHECK-NEXT: arith.constant 0 : index
543  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
544  // CHECK-NEXT:   affine.load
545  // CHECK-NEXT:   affine.for
546  // CHECK-NEXT:    affine.store %[[cst]]
547
548
549  return
550}
551
552// -----
553
554func.func @nested_load_store_same_memref2() {
555  %m = memref.alloc() : memref<10xf32>
556  %cst = arith.constant 8.0 : f32
557  %c0 = arith.constant 0 : index
558   affine.for %arg0 = 0 to 10 {
559     affine.store %cst, %m[%c0] : memref<10xf32>
560      affine.for %arg1 = 0 to 10 {
561        %v0 = affine.load %m[%arg0] : memref<10xf32>
562    }
563  }
564
565  // CHECK: memref.alloc() : memref<10xf32>
566  // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
567  // CHECK-NEXT: arith.constant 0 : index
568  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
569  // CHECK-NEXT: }
570  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
571  // CHECK-NEXT:   affine.store %[[cst]]
572  // CHECK-NEXT:   affine.load
573
574
575  return
576}
577
578// -----
579
580// CHECK-LABEL:   func @do_not_hoist_dependent_side_effect_free_op
581func.func @do_not_hoist_dependent_side_effect_free_op(%arg0: memref<10x512xf32>) {
582  %0 = memref.alloca() : memref<1xf32>
583  %cst = arith.constant 8.0 : f32
584  affine.for %i = 0 to 512 {
585    affine.for %j = 0 to 10 {
586      %5 = affine.load %arg0[%i, %j] : memref<10x512xf32>
587      %6 = affine.load %0[0] : memref<1xf32>
588      %add = arith.addf %5, %6 : f32
589      affine.store %add, %0[0] : memref<1xf32>
590    }
591    %3 = affine.load %0[0] : memref<1xf32>
592    %4 = arith.mulf %3, %cst : f32 // It shouldn't be hoisted.
593  }
594  return
595}
596
597// CHECK:       affine.for
598// CHECK-NEXT:    affine.for
599// CHECK-NEXT:      affine.load
600// CHECK-NEXT:      affine.load
601// CHECK-NEXT:      arith.addf
602// CHECK-NEXT:      affine.store
603// CHECK-NEXT:    }
604// CHECK-NEXT:    affine.load
605// CHECK-NEXT:    arith.mulf
606// CHECK-NEXT:  }
607
608// -----
609
610// CHECK-LABEL: func @vector_loop_nothing_invariant
611func.func @vector_loop_nothing_invariant() {
612  %m1 = memref.alloc() : memref<40xf32>
613  %m2 = memref.alloc() : memref<40xf32>
614  affine.for %arg0 = 0 to 10 {
615    %v0 = affine.vector_load %m1[%arg0*4] : memref<40xf32>, vector<4xf32>
616    %v1 = affine.vector_load %m2[%arg0*4] : memref<40xf32>, vector<4xf32>
617    %v2 = arith.addf %v0, %v1 : vector<4xf32>
618    affine.vector_store %v2, %m1[%arg0*4] : memref<40xf32>, vector<4xf32>
619  }
620  return
621}
622
623// CHECK:       affine.for
624// CHECK-NEXT:    affine.vector_load
625// CHECK-NEXT:    affine.vector_load
626// CHECK-NEXT:    arith.addf
627// CHECK-NEXT:    affine.vector_store
628// CHECK-NEXT:  }
629
630// -----
631
632// CHECK-LABEL: func @vector_loop_all_invariant
633func.func @vector_loop_all_invariant() {
634  %m1 = memref.alloc() : memref<4xf32>
635  %m2 = memref.alloc() : memref<4xf32>
636  %m3 = memref.alloc() : memref<4xf32>
637  affine.for %arg0 = 0 to 10 {
638    %v0 = affine.vector_load %m1[0] : memref<4xf32>, vector<4xf32>
639    %v1 = affine.vector_load %m2[0] : memref<4xf32>, vector<4xf32>
640    %v2 = arith.addf %v0, %v1 : vector<4xf32>
641    affine.vector_store %v2, %m3[0] : memref<4xf32>, vector<4xf32>
642  }
643  return
644}
645
646// CHECK:       memref.alloc()
647// CHECK-NEXT:  memref.alloc()
648// CHECK-NEXT:  memref.alloc()
649// CHECK-NEXT:  affine.vector_load
650// CHECK-NEXT:  affine.vector_load
651// CHECK-NEXT:  arith.addf
652// CHECK-NEXT:  affine.vector_store
653// CHECK-NEXT:  affine.for
654
655// -----
656
657#set = affine_set<(d0): (d0 - 10 >= 0)>
658// CHECK-LABEL:   func @affine_if_not_invariant(
659func.func @affine_if_not_invariant(%buffer: memref<1024xf32>) -> f32 {
660  %sum_init_0 = arith.constant 0.0 : f32
661  %sum_init_1 = arith.constant 1.0 : f32
662  %res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 {
663    %t = affine.load %buffer[%i] : memref<1024xf32>
664    %sum_next = affine.if #set(%i) -> (f32) {
665      %new_sum = arith.addf %sum_iter, %t : f32
666      affine.yield %new_sum : f32
667    } else {
668      affine.yield %sum_iter : f32
669    }
670    %modified_sum = arith.addf %sum_next, %sum_init_1 : f32
671    affine.yield %modified_sum : f32
672  }
673  return %res : f32
674}
675
676// CHECK:       arith.constant 0.000000e+00 : f32
677// CHECK-NEXT:  arith.constant 1.000000e+00 : f32
678// CHECK-NEXT:  affine.for
679// CHECK-NEXT:  affine.load
680// CHECK-NEXT:  affine.if
681// CHECK-NEXT:  arith.addf
682// CHECK-NEXT:  affine.yield
683// CHECK-NEXT:  } else {
684// CHECK-NEXT:  affine.yield
685// CHECK-NEXT:  }
686// CHECK-NEXT:  arith.addf
687// CHECK-NEXT:  affine.yield
688// CHECK-NEXT:  }
689
690// -----
691
692// CHECK-LABEL:   func @affine_for_not_invariant(
693func.func @affine_for_not_invariant(%in : memref<30x512xf32, 1>,
694                               %out : memref<30x1xf32, 1>) {
695  %sum_0 = arith.constant 0.0 : f32
696  %cst_0 = arith.constant 1.1 : f32
697  affine.for %j = 0 to 30 {
698    %sum = affine.for %i = 0 to 512 iter_args(%sum_iter = %sum_0) -> (f32) {
699      %t = affine.load %in[%j,%i] : memref<30x512xf32,1>
700      %sum_next = arith.addf %sum_iter, %t : f32
701      affine.yield %sum_next : f32
702    }
703    %mod_sum = arith.mulf %sum, %cst_0 : f32
704    affine.store %mod_sum, %out[%j, 0] : memref<30x1xf32, 1>
705  }
706  return
707}
708
709// CHECK:       arith.constant 0.000000e+00 : f32
710// CHECK-NEXT:  arith.constant 1.100000e+00 : f32
711// CHECK-NEXT:  affine.for
712// CHECK-NEXT:  affine.for
713// CHECK-NEXT:  affine.load
714// CHECK-NEXT:  arith.addf
715// CHECK-NEXT:  affine.yield
716// CHECK-NEXT:  }
717// CHECK-NEXT:  arith.mulf
718// CHECK-NEXT:  affine.store
719
720// -----
721
722// CHECK-LABEL: func @use_of_iter_operands_invariant
723func.func @use_of_iter_operands_invariant(%m : memref<10xindex>) {
724  %sum_1 = arith.constant 0 : index
725  %v0 = affine.for %arg1 = 0 to 11 iter_args (%prevAccum = %sum_1) -> index {
726    %prod = arith.muli %sum_1, %sum_1 : index
727    %newAccum = arith.addi %prevAccum, %prod : index
728    affine.yield %newAccum : index
729  }
730  return
731}
732
733// CHECK:       constant
734// CHECK-NEXT:  muli
735// CHECK-NEXT:  affine.for
736// CHECK-NEXT:    addi
737// CHECK-NEXT:    affine.yield
738
739// -----
740
741// CHECK-LABEL: func @use_of_iter_args_not_invariant
742func.func @use_of_iter_args_not_invariant(%m : memref<10xindex>) {
743  %sum_1 = arith.constant 0 : index
744  %v0 = affine.for %arg1 = 0 to 11 iter_args (%prevAccum = %sum_1) -> index {
745    %newAccum = arith.addi %prevAccum, %sum_1 : index
746    affine.yield %newAccum : index
747  }
748  return
749}
750
751// CHECK:       arith.constant
752// CHECK-NEXT:  affine.for
753// CHECK-NEXT:  arith.addi
754// CHECK-NEXT:  affine.yield
755
756#map = affine_map<(d0) -> (64, d0 * -64 + 1020)>
757// CHECK-LABEL: func.func @affine_parallel
758func.func @affine_parallel(%memref_8: memref<4090x2040xf32>, %x: index) {
759  %cst = arith.constant 0.000000e+00 : f32
760  affine.parallel (%arg3) = (0) to (32) {
761    affine.for %arg4 = 0 to 16 {
762      affine.parallel (%arg5, %arg6) = (0, 0) to (min(128, 122), min(64, %arg3 * -64 + 2040)) {
763        affine.for %arg7 = 0 to min #map(%arg4) {
764          affine.store %cst, %memref_8[%arg5 + 3968, %arg6 + %arg3 * 64] : memref<4090x2040xf32>
765        }
766      }
767    }
768  }
769  // CHECK:       affine.parallel
770  // CHECK-NEXT:    affine.for
771  // CHECK-NEXT:      affine.parallel
772  // CHECK-NEXT:        affine.store
773  // CHECK-NEXT:        affine.for
774
775  %c0 = arith.constant 0 : index
776  %c1 = arith.constant 1 : index
777  %c32 = arith.constant 32 : index
778  scf.parallel (%arg3) = (%c0) to (%c32) step (%c1) {
779    affine.for %arg4 = 0 to 16 {
780      affine.parallel (%arg5, %arg6) = (0, 0) to (min(128, 122), min(64, %x * -64 + 2040)) {
781        affine.for %arg7 = 0 to min #map(%arg4) {
782          affine.store %cst, %memref_8[%arg5 + 3968, %arg6] : memref<4090x2040xf32>
783        }
784      }
785    }
786  }
787  // CHECK:       scf.parallel
788  // CHECK-NEXT:    affine.for
789  // CHECK-NEXT:      affine.parallel
790  // CHECK-NEXT:        affine.store
791  // CHECK-NEXT:        affine.for
792
793  affine.for %arg3 = 0 to 32 {
794    affine.for %arg4 = 0 to 16 {
795      affine.parallel (%arg5, %arg6) = (0, 0) to (min(128, 122), min(64, %arg3 * -64 + 2040)) {
796        // Unknown region-holding op for this pass.
797        scf.for %arg7 = %c0 to %x step %c1 {
798          affine.store %cst, %memref_8[%arg5 + 3968, %arg6 + %arg3 * 64] : memref<4090x2040xf32>
799        }
800      }
801    }
802  }
803  // CHECK:       affine.for
804  // CHECK-NEXT:    affine.for
805  // CHECK-NEXT:      affine.parallel
806  // CHECK-NEXT:        scf.for
807  // CHECK-NEXT:          affine.store
808
809  return
810}
811
812// -----
813
814// CHECK-LABEL: func.func @affine_invariant_use_after_dma
815#map = affine_map<(d0) -> (d0 * 163840)>
816func.func @affine_invariant_use_after_dma(%arg0: memref<10485760xi32>, %arg1: memref<1xi32>, %arg2: memref<10485760xi32>) {
817  %c320 = arith.constant 320 : index
818  %c0 = arith.constant 0 : index
819  %c1 = arith.constant 1 : index
820  %alloc = memref.alloc() {alignment = 16 : i64} : memref<0xi32, 2>
821  %alloc_0 = memref.alloc() : memref<1xi32, 2>
822  affine.for %arg3 = 0 to 64 {
823    %0 = affine.apply #map(%arg3)
824    %alloc_1 = memref.alloc() {alignment = 16 : i64} : memref<0xi32, 2>
825    %alloc_2 = memref.alloc() : memref<320xi32, 2>
826    affine.dma_start %arg0[%0], %alloc_2[%c0], %alloc_1[%c0], %c320 : memref<10485760xi32>, memref<320xi32, 2>, memref<0xi32, 2>
827    affine.dma_start %arg1[%c0], %alloc_0[%c0], %alloc[%c0], %c1 : memref<1xi32>, memref<1xi32, 2>, memref<0xi32, 2>
828    affine.dma_wait %alloc_1[%c0], %c320 : memref<0xi32, 2>
829    affine.dma_wait %alloc[%c0], %c1 : memref<0xi32, 2>
830    %1 = affine.apply #map(%arg3)
831    %alloc_3 = memref.alloc() {alignment = 16 : i64} : memref<0xi32, 2>
832    %alloc_4 = memref.alloc() : memref<320xi32, 2>
833    affine.for %arg4 = 0 to 320 {
834      %2 = affine.load %alloc_2[%arg4] : memref<320xi32, 2>
835      %3 = affine.load %alloc_0[0] : memref<1xi32, 2>
836      %4 = arith.addi %2, %3 : i32
837      %5 = arith.addi %4, %2 : i32
838      affine.store %5, %alloc_4[%arg4] : memref<320xi32, 2>
839    }
840    affine.dma_start %alloc_4[%c0], %arg2[%1], %alloc_3[%c0], %c320 : memref<320xi32, 2>, memref<10485760xi32>, memref<0xi32, 2>
841    affine.dma_wait %alloc_3[%c0], %c320 : memref<0xi32, 2>
842  }
843  return
844}
845// CHECK: %[[zero:.*]] = arith.constant 0 : index
846// CHECK: %[[scalar_mem:.*]] = memref.alloc() : memref<1xi32, 2>
847// CHECK: affine.dma_start %arg1[%[[zero]]], %alloc_0[%[[zero]]], %alloc[%[[zero]]], %c1
848// CHECK: affine.load %[[scalar_mem]][0]
849
850// -----
851
852// CHECK-LABEL: func @affine_prefetch_invariant
853func.func @affine_prefetch_invariant() {
854  %0 = memref.alloc() : memref<10x10xf32>
855  affine.for %i0 = 0 to 10 {
856    affine.for %i1 = 0 to 10 {
857      %1 = affine.load %0[%i0, %i1] : memref<10x10xf32>
858      // A prefetch shouldn't be hoisted.
859      affine.prefetch %0[%i0, %i0], write, locality<0>, data : memref<10x10xf32>
860    }
861  }
862
863  // CHECK:      memref.alloc() : memref<10x10xf32>
864  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
865  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 10 {
866  // CHECK-NEXT:     affine.load %{{.*}}[%{{.*}}  : memref<10x10xf32>
867  // CHECK-NEXT:     affine.prefetch
868  // CHECK-NEXT:   }
869  // CHECK-NEXT: }
870  return
871}
872
873// Side-effecting ops shouldn't be hoisted.
874
875// CHECK-LABEL: func @side_effecting_ops
876func.func @side_effecting_ops() {
877  %cst = arith.constant 0.0 : f32
878  %m0 = memref.alloc(): memref<1x512x16x16xf32>
879  %0 = gpu.wait async
880  affine.for %arg783 = 0 to 14 {
881    affine.for %arg784 = 0 to 14 {
882      affine.parallel (%arg785) = (0) to (512) {
883        affine.for %arg786 = 0 to 1 {
884          affine.for %arg787 = 0 to 1 {
885            affine.for %arg788 = 0 to 1 {
886              %m1 = memref.alloc() : memref<1xf32, 3>
887              %m2 = memref.alloc() : memref<1xf32, 3>
888              affine.store %cst, %m1[0] : memref<1xf32, 3>
889              affine.store %cst, %m2[0] : memref<1xf32, 3>
890              %memref_2897, %asyncToken_2898 = gpu.alloc async [%0] () : memref<1x512x16x16xf32>
891              %2432 = gpu.memcpy async [%0] %memref_2897, %m0 : memref<1x512x16x16xf32>, memref<1x512x16x16xf32>
892              affine.for %arg789 = 0 to 16 {
893                affine.for %arg790 = 0 to 16 {
894                  affine.store %cst, %memref_2897[0, %arg785 + %arg788, %arg789, %arg790] : memref<1x512x16x16xf32>
895                }
896              }
897              memref.dealloc %m2 : memref<1xf32, 3>
898              memref.dealloc %m1 : memref<1xf32, 3>
899              %2433 = gpu.memcpy async [%0] %m0, %memref_2897 : memref<1x512x16x16xf32>, memref<1x512x16x16xf32>
900              %2434 = gpu.dealloc async [%asyncToken_2898] %memref_2897 : memref<1x512x16x16xf32>
901            }
902          }
903        }
904      }
905    }
906  }
907  // CHECK:      affine.for %{{.*}} = 0 to 1
908  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 1
909  // CHECK:          memref.alloc
910  // CHECK:          memref.alloc
911  // CHECK:          gpu.memcpy
912  // CHECK:          affine.for %{{.*}} = 0 to 16
913  // CHECK:            affine.for %{{.*}} = 0 to 16
914  // CHECK:          memref.dealloc
915  return
916}
917