xref: /llvm-project/mlir/test/Transforms/loop-invariant-code-motion.mlir (revision b3ce6dc7232c566c21b84ac5d5795341a355ff79)
1// RUN: mlir-opt %s  -split-input-file -loop-invariant-code-motion | FileCheck %s
2
3func.func @nested_loops_both_having_invariant_code() {
4  %m = memref.alloc() : memref<10xf32>
5  %cf7 = arith.constant 7.0 : f32
6  %cf8 = arith.constant 8.0 : f32
7
8  affine.for %arg0 = 0 to 10 {
9    %v0 = arith.addf %cf7, %cf8 : f32
10    affine.for %arg1 = 0 to 10 {
11      %v1 = arith.addf %v0, %cf8 : f32
12      affine.store %v0, %m[%arg0] : memref<10xf32>
13    }
14  }
15
16  // CHECK: memref.alloc() : memref<10xf32>
17  // CHECK-NEXT: %[[CST0:.*]] = arith.constant 7.000000e+00 : f32
18  // CHECK-NEXT: %[[CST1:.*]] = arith.constant 8.000000e+00 : f32
19  // CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[CST0]], %[[CST1]] : f32
20  // CHECK-NEXT: arith.addf %[[ADD0]], %[[CST1]] : f32
21  // CHECK-NEXT: affine.for
22  // CHECK-NEXT: affine.for
23  // CHECK-NEXT: affine.store
24
25  return
26}
27
28// -----
29
30func.func @nested_loops_code_invariant_to_both() {
31  %m = memref.alloc() : memref<10xf32>
32  %cf7 = arith.constant 7.0 : f32
33  %cf8 = arith.constant 8.0 : f32
34
35  affine.for %arg0 = 0 to 10 {
36    affine.for %arg1 = 0 to 10 {
37      %v0 = arith.addf %cf7, %cf8 : f32
38    }
39  }
40
41  // CHECK: memref.alloc() : memref<10xf32>
42  // CHECK-NEXT: arith.constant 7.000000e+00 : f32
43  // CHECK-NEXT: arith.constant 8.000000e+00 : f32
44  // CHECK-NEXT: arith.addf
45
46  return
47}
48
49// -----
50
51func.func @single_loop_nothing_invariant() {
52  %m1 = memref.alloc() : memref<10xf32>
53  %m2 = memref.alloc() : memref<10xf32>
54  affine.for %arg0 = 0 to 10 {
55    %v0 = affine.load %m1[%arg0] : memref<10xf32>
56    %v1 = affine.load %m2[%arg0] : memref<10xf32>
57    %v2 = arith.addf %v0, %v1 : f32
58    affine.store %v2, %m1[%arg0] : memref<10xf32>
59  }
60
61  // CHECK: memref.alloc() : memref<10xf32>
62  // CHECK-NEXT: memref.alloc() : memref<10xf32>
63  // CHECK-NEXT: affine.for
64  // CHECK-NEXT: affine.load
65  // CHECK-NEXT: affine.load
66  // CHECK-NEXT: arith.addf
67  // CHECK-NEXT: affine.store
68
69  return
70}
71
72// -----
73
74func.func @invariant_code_inside_affine_if() {
75  %m = memref.alloc() : memref<10xf32>
76  %cf8 = arith.constant 8.0 : f32
77
78  affine.for %arg0 = 0 to 10 {
79    %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0)
80    affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) {
81        %cf9 = arith.addf %cf8, %cf8 : f32
82        affine.store %cf9, %m[%arg0] : memref<10xf32>
83
84    }
85  }
86
87  // CHECK: memref.alloc() : memref<10xf32>
88  // CHECK-NEXT: arith.constant 8.000000e+00 : f32
89  // CHECK-NEXT: affine.for
90  // CHECK-NEXT: affine.apply
91  // CHECK-NEXT: affine.if
92  // CHECK-NEXT: arith.addf
93  // CHECK-NEXT: affine.store
94  // CHECK-NEXT: }
95
96
97  return
98}
99
100// -----
101
102func.func @invariant_affine_if() {
103  %m = memref.alloc() : memref<10xf32>
104  %cf8 = arith.constant 8.0 : f32
105  affine.for %arg0 = 0 to 10 {
106    affine.for %arg1 = 0 to 20 {
107      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
108          %cf9 = arith.addf %cf8, %cf8 : f32
109      }
110    }
111  }
112
113  // CHECK: memref.alloc() : memref<10xf32>
114  // CHECK-NEXT: %[[CST:.*]] = arith.constant 8.000000e+00 : f32
115  // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 20 {
116  // CHECK-NEXT: }
117  // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
118  // CHECK-NEXT: affine.if #set(%[[ARG]], %[[ARG]]) {
119  // CHECK-NEXT: arith.addf %[[CST]], %[[CST]] : f32
120  // CHECK-NEXT: }
121
122  return
123}
124
125// -----
126
127func.func @hoist_invariant_affine_if_success(%lb: index, %ub: index, %step: index) -> i32 {
128  %cst_0 = arith.constant 0 : i32
129  %cst_42 = arith.constant 42 : i32
130  %sum_result = affine.for %i = %lb to %ub iter_args(%acc = %cst_0) -> i32 {
131    %conditional_add = affine.if affine_set<() : ()> () -> (i32) {
132      %add = arith.addi %cst_42, %cst_42 : i32
133      affine.yield %add : i32
134    } else {
135      %poison = ub.poison : i32
136      affine.yield %poison : i32
137    }
138    %sum = arith.addi %acc, %conditional_add : i32
139    affine.yield %sum : i32
140  }
141
142  // CHECK-LABEL: hoist_invariant_affine_if_success
143  // CHECK-NEXT: arith.constant 0 : i32
144  // CHECK-NEXT: %[[CST:.*]] = arith.constant 42 : i32
145  // CHECK-NEXT: %[[IF:.*]] = affine.if
146  // CHECK-NEXT: arith.addi %[[CST]], %[[CST]] : i32
147  // CHECK: affine.for
148  // CHECK-NOT: affine.if
149  // CHECK-NEXT: arith.addi %{{.*}}, %[[IF]]
150
151  return %sum_result : i32
152}
153
154// -----
155
156func.func @hoist_variant_affine_if_failure(%lb: index, %ub: index, %step: index) -> i32 {
157  %cst_0 = arith.constant 0 : i32
158  %cst_42 = arith.constant 42 : i32
159  %ind_7 = arith.constant 7 : index
160  %sum_result = affine.for %i = %lb to %ub iter_args(%acc = %cst_0) -> i32 {
161    %conditional_add = affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%i, %ind_7) -> (i32) {
162      %add = arith.addi %cst_42, %cst_42 : i32
163      affine.yield %add : i32
164    } else {
165      %poison = ub.poison : i32
166      affine.yield %poison : i32
167    }
168    %sum = arith.addi %acc, %conditional_add : i32
169    affine.yield %sum : i32
170  }
171
172  // CHECK-LABEL: hoist_variant_affine_if_failure
173  // CHECK-NEXT: arith.constant 0 : i32
174  // CHECK-NEXT: %[[CST:.*]] = arith.constant 42 : i32
175  // CHECK-NEXT: arith.constant 7 : index
176  // CHECK-NEXT: affine.for
177  // CHECK-NEXT: %[[IF:.*]] = affine.if
178  // CHECK: arith.addi %{{.*}}, %[[IF]]
179
180  return %sum_result : i32
181}
182
183// -----
184
185func.func @hoist_affine_for_with_unknown_trip_count(%lb: index, %ub: index) {
186  affine.for %arg0 = 0 to 10 {
187    affine.for %arg1 = %lb to %ub {
188    }
189  }
190
191  // CHECK: @hoist_affine_for_with_unknown_trip_count(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) {
192  // CHECK-NEXT: affine.for %[[ARG2:.*]] = %[[ARG0]] to %[[ARG1]] {
193  // CHECK-NEXT: }
194  // CHECK-NEXT: affine.for %[[ARG3:.*]] = 0 to 10 {
195  // CHECK-NEXT: }
196
197  return
198}
199
200// -----
201
202func.func @hoist_affine_for_with_unknown_trip_count_non_unit_step(%lb: index, %ub: index) {
203  affine.for %arg0 = 0 to 10 {
204    affine.for %arg1 = %lb to %ub step 2 {
205    }
206  }
207
208  // CHECK: @hoist_affine_for_with_unknown_trip_count_non_unit_step(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) {
209  // CHECK-NEXT: affine.for %[[ARG2:.*]] = 0 to 10 {
210  // CHECK-NEXT: affine.for %[[ARG3:.*]] = %[[ARG0]] to %[[ARG1]] step 2 {
211  // CHECK-NEXT: }
212  // CHECK-NEXT: }
213
214  return
215}
216
217// -----
218
219func.func @hoist_scf_for_with_unknown_trip_count_unit_step(%lb: index, %ub: index) {
220  %c1 = arith.constant 1 : index
221  scf.for %arg0 = %lb to %ub step %c1 {
222    scf.for %arg1 = %lb to %ub step %c1 {
223    }
224  }
225
226  // CHECK: @hoist_scf_for_with_unknown_trip_count_unit_step(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) {
227  // CHECK: scf.for %[[ARG2:.*]] = %[[ARG0]] to %[[ARG1]]
228  // CHECK-NEXT: }
229  // CHECK-NEXT: scf.for %[[ARG3:.*]] = %[[ARG0]] to %[[ARG1]]
230  // CHECK-NEXT: }
231
232  return
233}
234
235// -----
236
237func.func @hoist_scf_for_with_unknown_trip_count_non_unit_constant_step(%lb: index, %ub: index) {
238  %c1 = arith.constant 1 : index
239  %c2 = arith.constant 2 : index
240  scf.for %arg0 = %lb to %ub step %c1 {
241    scf.for %arg1 = %lb to %ub step %c2 {
242    }
243  }
244
245  // CHECK: @hoist_scf_for_with_unknown_trip_count_non_unit_constant_step(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) {
246  // CHECK: scf.for %[[ARG2:.*]] = %[[ARG0]] to %[[ARG1]]
247  // CHECK-NEXT: scf.for %[[ARG3:.*]] = %[[ARG0]] to %[[ARG1]]
248  // CHECK-NEXT: }
249  // CHECK-NEXT: }
250
251  return
252}
253
254// -----
255
256func.func @hoist_scf_for_with_unknown_trip_count_unknown_step(%lb: index, %ub: index, %step: index) {
257  %c1 = arith.constant 1 : index
258  scf.for %arg0 = %lb to %ub step %c1 {
259    scf.for %arg1 = %lb to %ub step %step {
260    }
261  }
262
263  // CHECK: @hoist_scf_for_with_unknown_trip_count_unknown_step(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index, %[[STEP:.*]]: index) {
264  // CHECK: scf.for %[[ARG2:.*]] = %[[ARG0]] to %[[ARG1]]
265  // CHECK-NEXT: scf.for %[[ARG3:.*]] = %[[ARG0]] to %[[ARG1]] step %[[STEP]]
266  // CHECK-NEXT: }
267  // CHECK-NEXT: }
268
269  return
270}
271
272// -----
273
274func.func @invariant_affine_if2() {
275  %m = memref.alloc() : memref<10xf32>
276  %cf8 = arith.constant 8.0 : f32
277  affine.for %arg0 = 0 to 10 {
278    affine.for %arg1 = 0 to 10 {
279      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
280          %cf9 = arith.addf %cf8, %cf8 : f32
281          affine.store %cf9, %m[%arg1] : memref<10xf32>
282      }
283    }
284  }
285
286  // CHECK: memref.alloc
287  // CHECK-NEXT: arith.constant
288  // CHECK-NEXT: affine.for
289  // CHECK-NEXT: affine.for
290  // CHECK-NEXT: affine.if
291  // CHECK-NEXT: arith.addf
292  // CHECK-NEXT: affine.store
293  // CHECK-NEXT: }
294  // CHECK-NEXT: }
295
296  return
297}
298
299// -----
300
301func.func @invariant_affine_nested_if() {
302  %m = memref.alloc() : memref<10xf32>
303  %cf8 = arith.constant 8.0 : f32
304  affine.for %arg0 = 0 to 10 {
305    affine.for %arg1 = 0 to 10 {
306      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
307        %cf9 = arith.addf %cf8, %cf8 : f32
308        affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
309          %cf10 = arith.addf %cf9, %cf9 : f32
310        }
311      }
312    }
313  }
314
315  // CHECK: memref.alloc
316  // CHECK-NEXT: arith.constant
317  // CHECK-NEXT: affine.for
318  // CHECK-NEXT: }
319  // CHECK-NEXT: affine.for
320  // CHECK-NEXT: affine.if
321  // CHECK-NEXT: arith.addf
322  // CHECK-NEXT: affine.if
323  // CHECK-NEXT: arith.addf
324  // CHECK-NEXT: }
325  // CHECK-NEXT: }
326
327
328  return
329}
330
331// -----
332
333func.func @invariant_affine_nested_if_else() {
334  %m = memref.alloc() : memref<10xf32>
335  %cf8 = arith.constant 8.0 : f32
336  affine.for %arg0 = 0 to 10 {
337    affine.for %arg1 = 0 to 10 {
338      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
339          %cf9 = arith.addf %cf8, %cf8 : f32
340          affine.store %cf9, %m[%arg0] : memref<10xf32>
341          affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
342            %cf10 = arith.addf %cf9, %cf9 : f32
343          } else {
344            affine.store %cf9, %m[%arg1] : memref<10xf32>
345          }
346      }
347    }
348  }
349
350  // CHECK: memref.alloc
351  // CHECK-NEXT: arith.constant
352  // CHECK-NEXT: affine.for
353  // CHECK-NEXT: affine.for
354  // CHECK-NEXT: affine.if
355  // CHECK-NEXT: arith.addf
356  // CHECK-NEXT: affine.store
357  // CHECK-NEXT: affine.if
358  // CHECK-NEXT: arith.addf
359  // CHECK-NEXT: } else {
360  // CHECK-NEXT: affine.store
361  // CHECK-NEXT: }
362  // CHECK-NEXT: }
363  // CHECK-NEXT: }
364
365
366  return
367}
368
369// -----
370
371func.func @invariant_loop_dialect() {
372  %ci0 = arith.constant 0 : index
373  %ci10 = arith.constant 10 : index
374  %ci1 = arith.constant 1 : index
375  %m = memref.alloc() : memref<10xf32>
376  %cf7 = arith.constant 7.0 : f32
377  %cf8 = arith.constant 8.0 : f32
378  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
379    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
380      %v0 = arith.addf %cf7, %cf8 : f32
381    }
382  }
383
384  // CHECK: memref.alloc() : memref<10xf32>
385  // CHECK-NEXT: arith.constant 7.000000e+00 : f32
386  // CHECK-NEXT: arith.constant 8.000000e+00 : f32
387  // CHECK-NEXT: arith.addf
388
389  return
390}
391
392// -----
393
394func.func @variant_loop_dialect() {
395  %ci0 = arith.constant 0 : index
396  %ci10 = arith.constant 10 : index
397  %ci1 = arith.constant 1 : index
398  %m = memref.alloc() : memref<10xf32>
399  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
400    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
401      %v0 = arith.addi %arg0, %arg1 : index
402    }
403  }
404
405  // CHECK: memref.alloc() : memref<10xf32>
406  // CHECK-NEXT: scf.for
407  // CHECK-NEXT: scf.for
408  // CHECK-NEXT: arith.addi
409
410  return
411}
412
413// -----
414
415func.func @parallel_loop_with_invariant() {
416  %c0 = arith.constant 0 : index
417  %c10 = arith.constant 10 : index
418  %c1 = arith.constant 1 : index
419  %c7 = arith.constant 7 : i32
420  %c8 = arith.constant 8 : i32
421  scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
422      %v0 = arith.addi %c7, %c8 : i32
423      %v3 = arith.addi %arg0, %arg1 : index
424  }
425
426  // CHECK-LABEL: func @parallel_loop_with_invariant
427  // CHECK: arith.constant 0 : index
428  // CHECK-NEXT: arith.constant 10 : index
429  // CHECK-NEXT: arith.constant 1 : index
430  // CHECK-NEXT: arith.constant 7 : i32
431  // CHECK-NEXT: arith.constant 8 : i32
432  // CHECK-NEXT: arith.addi
433  // CHECK-NEXT: scf.parallel (%[[A:.*]],{{.*}}) =
434  // CHECK-NEXT:   arith.addi %[[A]]
435  // CHECK-NEXT:   reduce
436  // CHECK-NEXT: }
437  // CHECK-NEXT: return
438
439  return
440}
441
442// -----
443
444func.func @hoist_invariant_scf_if_success(%lb: index, %ub: index, %step: index) -> i32 {
445  %cst_0 = arith.constant 0 : i32
446  %cst_42 = arith.constant 42 : i32
447  %true = arith.constant true
448  %sum_result = scf.for %i = %lb to %ub step %step iter_args(%acc = %cst_0) -> i32 {
449    %conditional_add = scf.if %true -> (i32) {
450      %add = arith.addi %cst_42, %cst_42 : i32
451      scf.yield %add : i32
452    } else {
453      %poison = ub.poison : i32
454      scf.yield %poison : i32
455    }
456    %sum = arith.addi %acc, %conditional_add : i32
457    scf.yield %sum : i32
458  }
459
460  // CHECK-LABEL: hoist_invariant_scf_if_success
461  // CHECK-NEXT: arith.constant 0 : i32
462  // CHECK-NEXT: %[[CST:.*]] = arith.constant 42 : i32
463  // CHECK-NEXT: %[[TRUE:.*]] = arith.constant true
464  // CHECK-NEXT: %[[IF:.*]] = scf.if %[[TRUE]]
465  // CHECK-NEXT: arith.addi %[[CST]], %[[CST]] : i32
466  // CHECK: scf.for
467  // CHECK-NOT: scf.if
468  // CHECK-NEXT: arith.addi %{{.*}}, %[[IF]]
469
470  return %sum_result : i32
471}
472
473// -----
474
475func.func @hoist_variant_scf_if_failure(%lb: index, %ub: index, %step: index) -> i32 {
476  %cst_0 = arith.constant 0 : i32
477  %cst_42 = arith.constant 42 : i32
478  %ind_7 = arith.constant 7 : index
479  %sum_result = scf.for %i = %lb to %ub step %step iter_args(%acc = %cst_0) -> i32 {
480    %cond = arith.cmpi ult, %i, %ind_7 : index
481    %conditional_add = scf.if %cond -> (i32) {
482      %add = arith.addi %cst_42, %cst_42 : i32
483      scf.yield %add : i32
484    } else {
485      %poison = ub.poison : i32
486      scf.yield %poison : i32
487    }
488    %sum = arith.addi %acc, %conditional_add : i32
489    scf.yield %sum : i32
490  }
491
492  // CHECK-LABEL: hoist_variant_scf_if_failure
493  // CHECK-NEXT: arith.constant 0 : i32
494  // CHECK-NEXT: %[[CST_42:.*]] = arith.constant 42 : i32
495  // CHECK-NEXT: %[[CST_7:.*]] = arith.constant 7 : index
496  // CHECK-NEXT: scf.for %[[IV:.*]] = %{{.*}} to %{{.*}}
497  // CHECK-NEXT: %[[CMP:.*]] = arith.cmpi ult, %[[IV]], %[[CST_7]]
498  // CHECK-NEXT: %[[IF:.*]] = scf.if %[[CMP]]
499  // CHECK-NEXT: arith.addi %[[CST_42]], %[[CST_42]] : i32
500  // CHECK: arith.addi %{{.*}}, %[[IF]]
501
502  return %sum_result : i32
503}
504
505// -----
506
507func.func private @make_val() -> (index)
508
509// CHECK-LABEL: func @nested_uses_inside
510func.func @nested_uses_inside(%lb: index, %ub: index, %step: index) {
511  %true = arith.constant true
512
513  // Check that ops that contain nested uses to values not defiend outside
514  // remain in the loop.
515  // CHECK-NEXT: arith.constant
516  // CHECK-NEXT: scf.for
517  // CHECK-NEXT:   call @
518  // CHECK-NEXT:   call @
519  // CHECK-NEXT:   scf.if
520  // CHECK-NEXT:     scf.yield
521  // CHECK-NEXT:   else
522  // CHECK-NEXT:     scf.yield
523  scf.for %i = %lb to %ub step %step {
524    %val = func.call @make_val() : () -> (index)
525    %val2 = func.call @make_val() : () -> (index)
526    %r = scf.if %true -> (index) {
527      scf.yield %val: index
528    } else {
529      scf.yield %val2: index
530    }
531  }
532  return
533}
534
535// -----
536
537// Test that two ops that feed into each other are moved without violating
538// dominance in non-graph regions.
539// CHECK-LABEL: func @invariant_subgraph
540// CHECK-SAME: %{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %[[ARG:.*]]: i32
541func.func @invariant_subgraph(%lb: index, %ub: index, %step: index, %arg: i32) {
542  // CHECK:      %[[V0:.*]] = arith.addi %[[ARG]], %[[ARG]]
543  // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[ARG]], %[[V0]]
544  // CHECK-NEXT: scf.for
545  scf.for %i = %lb to %ub step %step {
546    // CHECK-NEXT: "test.sink"(%[[V1]])
547    %v0 = arith.addi %arg, %arg : i32
548    %v1 = arith.addi %arg, %v0 : i32
549    "test.sink"(%v1) : (i32) -> ()
550  }
551  return
552}
553
554// -----
555
556// Test invariant nested loop is hoisted.
557// CHECK-LABEL: func @test_invariant_nested_loop
558func.func @test_invariant_nested_loop() {
559  // CHECK: %[[C:.*]] = arith.constant
560  %0 = arith.constant 5 : i32
561  // CHECK: %[[V0:.*]] = arith.addi %[[C]], %[[C]]
562  // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[V0]], %[[C]]
563  // CHECK-NEXT: test.graph_loop
564  // CHECK-NEXT: ^bb0(%[[ARG0:.*]]: i32)
565  // CHECK-NEXT: %[[V2:.*]] = arith.subi %[[ARG0]], %[[ARG0]]
566  // CHECK-NEXT: test.region_yield %[[V2]]
567  // CHECK: test.graph_loop
568  // CHECK-NEXT: test.region_yield %[[V1]]
569  test.graph_loop {
570    %1 = arith.addi %0, %0 : i32
571    %2 = arith.addi %1, %0 : i32
572    test.graph_loop {
573    ^bb0(%arg0: i32):
574      %3 = arith.subi %arg0, %arg0 : i32
575      test.region_yield %3 : i32
576    } : () -> ()
577    test.region_yield %2 : i32
578  } : () -> ()
579  return
580}
581
582
583// -----
584
585// Test ops in a graph region are hoisted.
586// CHECK-LABEL: func @test_invariants_in_graph_region
587func.func @test_invariants_in_graph_region() {
588  // CHECK: test.single_no_terminator_op
589  test.single_no_terminator_op : {
590    // CHECK-NEXT: %[[C:.*]] = arith.constant
591    // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]]
592    // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]]
593    test.graph_loop {
594      %v0 = arith.addi %c0, %v1 : i32
595      %v1 = arith.addi %c0, %c0 : i32
596      %c0 = arith.constant 5 : i32
597      test.region_yield %v0 : i32
598    } : () -> ()
599  }
600  return
601}
602
603// -----
604
605// Test ops in a graph region are hoisted in topological order into non-graph
606// regions and that dominance is preserved.
607// CHECK-LABEL: func @test_invariant_backedge
608func.func @test_invariant_backedge() {
609  // CHECK-NEXT: %[[C:.*]] = arith.constant
610  // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]]
611  // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]]
612  // CHECK-NEXT: test.graph_loop
613  test.graph_loop {
614    // CHECK-NEXT: test.region_yield %[[V0]]
615    %v0 = arith.addi %c0, %v1 : i32
616    %v1 = arith.addi %c0, %c0 : i32
617    %c0 = arith.constant 5 : i32
618    test.region_yield %v0 : i32
619  } : () -> ()
620  return
621}
622
623// -----
624
625// Test that cycles aren't hoisted from graph regions to non-graph regions.
626// CHECK-LABEL: func @test_invariant_cycle_not_hoisted
627func.func @test_invariant_cycle_not_hoisted() {
628  // CHECK: test.graph_loop
629  test.graph_loop {
630    // CHECK-NEXT: %[[A:.*]] = "test.a"(%[[B:.*]]) :
631    // CHECK-NEXT: %[[B]] = "test.b"(%[[A]]) :
632    // CHECK-NEXT: test.region_yield %[[A]]
633    %a = "test.a"(%b) : (i32) -> i32
634    %b = "test.b"(%a) : (i32) -> i32
635    test.region_yield %a : i32
636  } : () -> ()
637  return
638}
639
640// -----
641
642// CHECK-LABEL: test_always_speculatable_op
643func.func @test_always_speculatable_op(%lb: index, %ub: index, %step: index) {
644  // CHECK: test.always_speculatable_op
645  // CHECK-NEXT: scf.for
646  scf.for %i = %lb to %ub step %step {
647    %val = "test.always_speculatable_op"() : () -> i32
648  }
649
650  return
651}
652
653// CHECK-LABEL: test_never_speculatable_op
654func.func @test_never_speculatable_op(%lb: index, %ub: index, %step: index) {
655  // CHECK: scf.for
656  // CHECK-NEXT: test.never_speculatable_op
657  scf.for %i = %lb to %ub step %step {
658    %val = "test.never_speculatable_op"() : () -> i32
659  }
660
661  return
662}
663
664// CHECK-LABEL: test_conditionally_speculatable_op_success
665func.func @test_conditionally_speculatable_op_success(%lb: index, %ub: index, %step: index) {
666  // CHECK: test.conditionally_speculatable_op
667  // CHECK-NEXT: scf.for
668  scf.for %i = %lb to %ub step %step {
669    %const_val = arith.constant 5 : i32
670    %val = "test.conditionally_speculatable_op"(%const_val) : (i32) -> i32
671  }
672
673  return
674}
675
676// CHECK-LABEL: test_conditionally_speculatable_op_failure
677func.func @test_conditionally_speculatable_op_failure(%lb: index, %ub: index, %step: index, %arg: i32) {
678  // CHECK: scf.for
679  // CHECK-NEXT: test.conditionally_speculatable_op
680  %const_5 = arith.constant 5 : i32
681  %non_const = arith.addi %arg, %const_5 : i32
682  scf.for %i = %lb to %ub step %step {
683    %val = "test.conditionally_speculatable_op"(%non_const) : (i32) -> i32
684  }
685
686  return
687}
688
689// CHECK-LABEL: test_recursively_speculatable_op_success
690func.func @test_recursively_speculatable_op_success(%lb: index, %ub: index, %step: index, %arg: i32) {
691  // CHECK: test.recursively_speculatable_op
692  // CHECK: scf.for
693  scf.for %i = %lb to %ub step %step {
694    %val = "test.recursively_speculatable_op"()({
695      %result = arith.addi %arg, %arg : i32
696      test.region_yield %result : i32
697    }) : () -> i32
698  }
699
700  return
701}
702
703// CHECK-LABEL: test_recursively_speculatable_op_failure
704func.func @test_recursively_speculatable_op_failure(%lb: index, %ub: index, %step: index, %arg: i32) {
705  // CHECK: scf.for
706  // CHECK-NEXT: test.recursively_speculatable_op
707  scf.for %i = %lb to %ub step %step {
708    %val = "test.recursively_speculatable_op"()({
709      %result = "test.never_speculatable_op"() : () -> i32
710      test.region_yield %result : i32
711    }) : () -> i32
712  }
713
714  return
715}
716
717// -----
718
719func.func @speculate_tensor_dim_unknown_rank_unknown_dim(
720// CHECK-LABEL: @speculate_tensor_dim_unknown_rank_unknown_dim
721    %t: tensor<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
722  // CHECK: scf.for
723  // CHECK-NEXT: tensor.dim
724  scf.for %i = %lb to %ub step %step {
725    %val = tensor.dim %t, %dim_idx : tensor<*xf32>
726  }
727
728  return
729}
730
731func.func @speculate_tensor_dim_known_rank_unknown_dim(
732// CHECK-LABEL: @speculate_tensor_dim_known_rank_unknown_dim
733    %t: tensor<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
734  // CHECK: scf.for
735  // CHECK-NEXT: tensor.dim
736  scf.for %i = %lb to %ub step %step {
737    %val = tensor.dim %t, %dim_idx : tensor<?x?x?x?xf32>
738  }
739
740  return
741}
742
743func.func @speculate_tensor_dim_unknown_rank_known_dim(
744// CHECK-LABEL: @speculate_tensor_dim_unknown_rank_known_dim
745    %t: tensor<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
746  %c0 = arith.constant 0 : index
747  // CHECK: scf.for
748  // CHECK-NEXT: tensor.dim
749  scf.for %i = %lb to %ub step %step {
750    %val = tensor.dim %t, %c0 : tensor<*xf32>
751  }
752
753  return
754}
755
756func.func @speculate_tensor_dim_known_rank_known_dim_inbounds(
757// CHECK-LABEL: @speculate_tensor_dim_known_rank_known_dim_inbounds
758    %t: tensor<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
759  %c1 = arith.constant 1 : index
760  // CHECK: tensor.dim
761  // CHECK-NEXT: scf.for
762  scf.for %i = %lb to %ub step %step {
763    %val = tensor.dim %t, %c1 : tensor<?x?x?x?xf32>
764  }
765
766  return
767}
768
769// -----
770
771func.func @speculate_memref_dim_unknown_rank_unknown_dim(
772// CHECK-LABEL: @speculate_memref_dim_unknown_rank_unknown_dim
773    %t: memref<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
774  // CHECK: scf.for
775  // CHECK-NEXT: memref.dim
776  scf.for %i = %lb to %ub step %step {
777    %val = memref.dim %t, %dim_idx : memref<*xf32>
778  }
779
780  return
781}
782
783func.func @speculate_memref_dim_known_rank_unknown_dim(
784// CHECK-LABEL: @speculate_memref_dim_known_rank_unknown_dim
785    %t: memref<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
786  // CHECK: scf.for
787  // CHECK-NEXT: memref.dim
788  scf.for %i = %lb to %ub step %step {
789    %val = memref.dim %t, %dim_idx : memref<?x?x?x?xf32>
790  }
791
792  return
793}
794
795func.func @speculate_memref_dim_unknown_rank_known_dim(
796// CHECK-LABEL: @speculate_memref_dim_unknown_rank_known_dim
797    %t: memref<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
798  %c0 = arith.constant 0 : index
799  // CHECK: scf.for
800  // CHECK-NEXT: memref.dim
801  scf.for %i = %lb to %ub step %step {
802    %val = memref.dim %t, %c0 : memref<*xf32>
803  }
804
805  return
806}
807
808func.func @speculate_memref_dim_known_rank_known_dim_inbounds(
809// CHECK-LABEL: @speculate_memref_dim_known_rank_known_dim_inbounds
810    %t: memref<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
811  %c1 = arith.constant 1 : index
812  // CHECK: memref.dim
813  // CHECK-NEXT: scf.for
814  scf.for %i = %lb to %ub step %step {
815    %val = memref.dim %t, %c1 : memref<?x?x?x?xf32>
816  }
817
818  return
819}
820
821// -----
822
823// CHECK-LABEL: @speculate_memref_dim_known_rank_known_dim_inbounds
824func.func @speculate_memref_dim_known_rank_known_dim_inbounds() {
825  %c0 = arith.constant 0 : index
826  %c1 = arith.constant 1 : index
827  %c22 = arith.constant 22 : index
828  %alloc = memref.alloc(%c22) : memref<?xi1>
829  scf.for %arg4 = %c0 to %c22 step %c1 {
830    %dim = memref.dim %alloc, %c0 : memref<?xi1>
831  }
832  return
833}
834// CHECK: memref.dim
835// CHECK-NEXT: scf.for
836
837// -----
838
839// CHECK-LABEL: @speculate_tensor_dim_known_rank_known_dim_inbounds
840func.func @speculate_tensor_dim_known_rank_known_dim_inbounds() {
841  %c0 = arith.constant 0 : index
842  %c1 = arith.constant 1 : index
843  %c22 = arith.constant 22 : index
844  %t = tensor.empty(%c22, %c22) : tensor<?x?xi1>
845  scf.for %arg4 = %c0 to %c22 step %c1 {
846    %dim = tensor.dim %t, %c1 : tensor<?x?xi1>
847  }
848  return
849}
850// CHECK: tensor.dim
851// CHECK-NEXT: scf.for
852
853// -----
854
855// CHECK-LABEL: @no_speculate_memref_dim_known_rank_known_dim_out_of_bounds
856func.func @no_speculate_memref_dim_known_rank_known_dim_out_of_bounds() {
857  %c0 = arith.constant 0 : index
858  %c1 = arith.constant 1 : index
859  %c22 = arith.constant 22 : index
860  %alloc = memref.alloc(%c22) : memref<?xi1>
861  scf.for %arg4 = %c0 to %c22 step %c1 {
862    %dim = memref.dim %alloc, %c1 : memref<?xi1>
863  }
864  return
865}
866// CHECK: scf.for
867// CHECK-NEXT: memref.dim
868
869// -----
870
871func.func @no_speculate_divui(
872// CHECK-LABEL: @no_speculate_divui(
873    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
874  scf.for %i = %lb to %ub step %step {
875// CHECK: scf.for
876// CHECK: arith.divui
877    %val = arith.divui %num, %denom : i32
878  }
879
880  return
881}
882
883func.func @no_speculate_divsi(
884// CHECK-LABEL: @no_speculate_divsi(
885    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
886  scf.for %i = %lb to %ub step %step {
887// CHECK: scf.for
888// CHECK: arith.divsi
889    %val = arith.divsi %num, %denom : i32
890  }
891
892  return
893}
894
895func.func @no_speculate_ceildivui(
896// CHECK-LABEL: @no_speculate_ceildivui(
897    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
898  scf.for %i = %lb to %ub step %step {
899// CHECK: scf.for
900// CHECK: arith.ceildivui
901    %val = arith.ceildivui %num, %denom : i32
902  }
903
904  return
905}
906
907func.func @no_speculate_ceildivsi(
908// CHECK-LABEL: @no_speculate_ceildivsi(
909    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
910  scf.for %i = %lb to %ub step %step {
911// CHECK: scf.for
912// CHECK: arith.ceildivsi
913    %val = arith.ceildivsi %num, %denom : i32
914  }
915
916  return
917}
918
919func.func @no_speculate_divui_const(%num: i32, %lb: index, %ub: index, %step: index) {
920// CHECK-LABEL: @no_speculate_divui_const(
921  %c0 = arith.constant 0 : i32
922  scf.for %i = %lb to %ub step %step {
923// CHECK: scf.for
924// CHECK: arith.divui
925    %val = arith.divui %num, %c0 : i32
926  }
927
928  return
929}
930
931func.func @speculate_divui_const(
932// CHECK-LABEL: @speculate_divui_const(
933    %num: i32, %lb: index, %ub: index, %step: index) {
934  %c5 = arith.constant 5 : i32
935// CHECK: arith.divui
936// CHECK: scf.for
937  scf.for %i = %lb to %ub step %step {
938    %val = arith.divui %num, %c5 : i32
939  }
940
941  return
942}
943
944func.func @no_speculate_ceildivui_const(%num: i32, %lb: index, %ub: index, %step: index) {
945// CHECK-LABEL: @no_speculate_ceildivui_const(
946  %c0 = arith.constant 0 : i32
947  scf.for %i = %lb to %ub step %step {
948// CHECK: scf.for
949// CHECK: arith.ceildivui
950    %val = arith.ceildivui %num, %c0 : i32
951  }
952
953  return
954}
955
956func.func @speculate_ceildivui_const(
957// CHECK-LABEL: @speculate_ceildivui_const(
958    %num: i32, %lb: index, %ub: index, %step: index) {
959  %c5 = arith.constant 5 : i32
960// CHECK: arith.ceildivui
961// CHECK: scf.for
962  scf.for %i = %lb to %ub step %step {
963    %val = arith.ceildivui %num, %c5 : i32
964  }
965
966  return
967}
968
969func.func @no_speculate_divsi_const0(
970// CHECK-LABEL: @no_speculate_divsi_const0(
971    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
972  %c0 = arith.constant 0 : i32
973  scf.for %i = %lb to %ub step %step {
974// CHECK: scf.for
975// CHECK: arith.divsi
976    %val = arith.divsi %num, %c0 : i32
977  }
978
979  return
980}
981
982func.func @no_speculate_divsi_const_minus1(
983// CHECK-LABEL: @no_speculate_divsi_const_minus1(
984    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
985  %cm1 = arith.constant -1 : i32
986  scf.for %i = %lb to %ub step %step {
987// CHECK: scf.for
988// CHECK: arith.divsi
989    %val = arith.divsi %num, %cm1 : i32
990  }
991
992  return
993}
994
995func.func @speculate_divsi_const(
996// CHECK-LABEL: @speculate_divsi_const(
997    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
998  %c5 = arith.constant 5 : i32
999  scf.for %i = %lb to %ub step %step {
1000// CHECK: arith.divsi
1001// CHECK: scf.for
1002    %val = arith.divsi %num, %c5 : i32
1003  }
1004
1005  return
1006}
1007
1008func.func @no_speculate_ceildivsi_const0(
1009// CHECK-LABEL: @no_speculate_ceildivsi_const0(
1010    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
1011  %c0 = arith.constant 0 : i32
1012  scf.for %i = %lb to %ub step %step {
1013// CHECK: scf.for
1014// CHECK: arith.ceildivsi
1015    %val = arith.ceildivsi %num, %c0 : i32
1016  }
1017
1018  return
1019}
1020
1021func.func @no_speculate_ceildivsi_const_minus1(
1022// CHECK-LABEL: @no_speculate_ceildivsi_const_minus1(
1023    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
1024  %cm1 = arith.constant -1 : i32
1025  scf.for %i = %lb to %ub step %step {
1026// CHECK: scf.for
1027// CHECK: arith.ceildivsi
1028    %val = arith.ceildivsi %num, %cm1 : i32
1029  }
1030
1031  return
1032}
1033
1034func.func @speculate_ceildivsi_const(
1035// CHECK-LABEL: @speculate_ceildivsi_const(
1036    %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) {
1037  %c5 = arith.constant 5 : i32
1038  scf.for %i = %lb to %ub step %step {
1039// CHECK: arith.ceildivsi
1040// CHECK: scf.for
1041    %val = arith.ceildivsi %num, %c5 : i32
1042  }
1043
1044  return
1045}
1046
1047func.func @no_speculate_divui_range(
1048// CHECK-LABEL: @no_speculate_divui_range(
1049    %num: i8, %lb: index, %ub: index, %step: index) {
1050  %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1051  scf.for %i = %lb to %ub step %step {
1052// CHECK: scf.for
1053// CHECK: arith.divui
1054    %val = arith.divui %num, %denom : i8
1055  }
1056
1057  return
1058}
1059
1060func.func @no_speculate_divsi_range(
1061// CHECK-LABEL: @no_speculate_divsi_range(
1062    %num: i8, %lb: index, %ub: index, %step: index) {
1063  %denom0 = test.with_bounds {smax = -1: i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1064  %denom1 = test.with_bounds {smax = 127 : i8, smin = 0 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1065  scf.for %i = %lb to %ub step %step {
1066// CHECK: scf.for
1067// CHECK-COUNT-2: arith.divsi
1068    %val0 = arith.divsi %num, %denom0 : i8
1069    %val1 = arith.divsi %num, %denom1 : i8
1070  }
1071
1072  return
1073}
1074
1075func.func @no_speculate_ceildivui_range(
1076// CHECK-LABEL: @no_speculate_ceildivui_range(
1077    %num: i8, %lb: index, %ub: index, %step: index) {
1078  %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1079  scf.for %i = %lb to %ub step %step {
1080// CHECK: scf.for
1081// CHECK: arith.ceildivui
1082    %val = arith.ceildivui %num, %denom : i8
1083  }
1084
1085  return
1086}
1087
1088func.func @no_speculate_ceildivsi_range(
1089// CHECK-LABEL: @no_speculate_ceildivsi_range(
1090    %num: i8, %lb: index, %ub: index, %step: index) {
1091  %denom0 = test.with_bounds {smax = -1 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1092  %denom1 = test.with_bounds {smax = 127 : i8, smin = 0 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1093  scf.for %i = %lb to %ub step %step {
1094// CHECK: scf.for
1095// CHECK-COUNT-2: arith.ceildivsi
1096    %val0 = arith.ceildivsi %num, %denom0 : i8
1097    %val1 = arith.ceildivsi %num, %denom1 : i8
1098  }
1099
1100  return
1101}
1102
1103func.func @speculate_divui_range(
1104// CHECK-LABEL: @speculate_divui_range(
1105    %num: i8, %lb: index, %ub: index, %step: index) {
1106  %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 1 : i8} : i8
1107  scf.for %i = %lb to %ub step %step {
1108// CHECK: arith.divui
1109// CHECK: scf.for
1110    %val = arith.divui %num, %denom : i8
1111  }
1112
1113  return
1114}
1115
1116func.func @speculate_divsi_range(
1117// CHECK-LABEL: @speculate_divsi_range(
1118    %num: i8, %lb: index, %ub: index, %step: index) {
1119  %denom0 = test.with_bounds {smax = 127 : i8, smin = 1 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1120  %denom1 = test.with_bounds {smax = -2 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1121  scf.for %i = %lb to %ub step %step {
1122// CHECK-COUNT-2: arith.divsi
1123// CHECK: scf.for
1124    %val0 = arith.divsi %num, %denom0 : i8
1125    %val1 = arith.divsi %num, %denom1 : i8
1126
1127  }
1128
1129  return
1130}
1131
1132func.func @speculate_ceildivui_range(
1133// CHECK-LABEL: @speculate_ceildivui_range(
1134    %num: i8, %lb: index, %ub: index, %step: index) {
1135  %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 1 : i8} : i8
1136  scf.for %i = %lb to %ub step %step {
1137// CHECK: arith.ceildivui
1138// CHECK: scf.for
1139    %val = arith.ceildivui %num, %denom : i8
1140  }
1141
1142  return
1143}
1144
1145func.func @speculate_ceildivsi_range(
1146// CHECK-LABEL: @speculate_ceildivsi_range(
1147    %num: i8, %lb: index, %ub: index, %step: index) {
1148  %denom0 = test.with_bounds {smax = 127 : i8, smin = 1 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1149  %denom1 = test.with_bounds {smax = -2 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8
1150  scf.for %i = %lb to %ub step %step {
1151// CHECK-COUNT-2: arith.ceildivsi
1152// CHECK: scf.for
1153    %val0 = arith.ceildivsi %num, %denom0 : i8
1154    %val1 = arith.ceildivsi %num, %denom1 : i8
1155
1156  }
1157
1158  return
1159}
1160
1161// -----
1162
1163func.func @speculate_static_pack_and_unpack(%source: tensor<128x256xf32>,
1164  %dest: tensor<4x16x32x16xf32>, %lb: index, %ub: index, %step: index) {
1165
1166  // CHECK: tensor.pack
1167  // CHECK-NEXT: scf.for
1168  scf.for %i = %lb to %ub step %step {
1169    %packed = tensor.pack %source
1170      inner_dims_pos = [0, 1]
1171      inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
1172  }
1173
1174  // CHECK: tensor.unpack
1175  // CHECK-NEXT: scf.for
1176  scf.for %i = %lb to %ub step %step {
1177    %unpacked = tensor.unpack %dest
1178      inner_dims_pos = [0, 1]
1179      inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
1180  }
1181  return
1182}
1183
1184// -----
1185
1186func.func @speculate_dynamic_pack_and_unpack(%source: tensor<?x?xf32>,
1187  %dest: tensor<?x?x?x?xf32>, %lb: index, %ub: index, %step: index,
1188  %tile_m: index, %tile_n: index, %pad: f32) {
1189
1190  // CHECK: scf.for
1191  // CHECK-NEXT: tensor.pack
1192  scf.for %i = %lb to %ub step %step {
1193    %packed = tensor.pack %source
1194      inner_dims_pos = [0, 1]
1195      inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
1196  }
1197
1198  // CHECK: scf.for
1199  // CHECK-NEXT: tensor.unpack
1200  scf.for %i = %lb to %ub step %step {
1201    %unpacked = tensor.unpack %dest
1202      inner_dims_pos = [0, 1]
1203      inner_tiles = [%tile_n, %tile_m] into %source : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
1204  }
1205
1206  // CHECK: tensor.pack
1207  // CHECK-NEXT: scf.for
1208  scf.for %i = %lb to %ub step %step {
1209    %packed = tensor.pack %source padding_value(%pad : f32)
1210      inner_dims_pos = [0, 1]
1211      inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32>
1212  }
1213  return
1214}
1215
1216// -----
1217
1218// CHECK-LABEL: func @hoist_from_scf_while(
1219//  CHECK-SAME:     %[[arg0:.*]]: i32, %{{.*}}: i32)
1220//   CHECK-DAG:   arith.constant 1 : i32
1221//   CHECK-DAG:   %[[c2:.*]] = arith.constant 2 : i32
1222//   CHECK-DAG:   %[[c10:.*]] = arith.constant 10 : i32
1223//   CHECK-DAG:   %[[added:.*]] = arith.addi %[[arg0]], %[[c2]]
1224//       CHECK:   scf.while
1225//       CHECK:     %[[cmpi:.*]] = arith.cmpi slt, %{{.*}}, %[[added]]
1226//       CHECK:     scf.condition(%[[cmpi]])
1227func.func @hoist_from_scf_while(%arg0: i32, %arg1: i32) -> i32 {
1228  %0 = scf.while (%arg2 = %arg1) : (i32) -> (i32) {
1229    %c2 = arith.constant 2 : i32
1230    %c10 = arith.constant 10 : i32
1231    %added = arith.addi %arg0, %c2 : i32
1232    %1 = arith.cmpi slt, %arg2, %added : i32
1233    scf.condition(%1) %arg2 : i32
1234  } do {
1235  ^bb0(%arg2: i32):
1236    %c1 = arith.constant 1 : i32
1237    %added2 = arith.addi %c1, %arg2 : i32
1238    scf.yield %added2 : i32
1239  }
1240  return %0 : i32
1241}
1242
1243// -----
1244
1245#trait = {
1246  indexing_maps = [
1247    affine_map<(m, n, k) -> (m, k)>,
1248    affine_map<(m, n, k) -> (k, n)>,
1249    affine_map<(m, n, k) -> (m, n)>
1250  ],
1251  iterator_types = ["parallel", "parallel", "reduction"]
1252}
1253
1254// CHECK-LABEL: func @hoist_linalg_ops
1255// CHECK: linalg.generic
1256// CHECK: scf.for
1257// CHECK-NOT: linalg.generic
1258// CHECK: tensor.insert_slice
1259// CHECK: scf.yield
1260func.func @hoist_linalg_ops(%a : tensor<128x128xf32>,
1261                            %b : tensor<128x128xf32>,
1262                            %c: tensor<128x128xf32>,
1263                            %lb : index,
1264                            %ub : index,
1265                            %step : index,
1266                            %output : tensor<?x128xf32>) -> tensor<?x128xf32> {
1267  %final =
1268  scf.for %i = %lb to %ub step %step iter_args(%acc = %output)
1269                                            -> tensor<?x128xf32> {
1270    %compute = linalg.generic #trait
1271               ins(%a, %b : tensor<128x128xf32>, tensor<128x128xf32>)
1272               outs(%c : tensor<128x128xf32>) {
1273    ^bb0(%in : f32, %in2 : f32, %in3 : f32):
1274      %mul = arith.mulf %in, %in2 : f32
1275      %add = arith.addf %mul, %in3 : f32
1276      linalg.yield %in3 : f32
1277    } -> tensor<128x128xf32>
1278
1279    %newacc = tensor.insert_slice %compute into
1280                                  %output[%i, 0][128, 128][1, 1]
1281                                  : tensor<128x128xf32> into tensor<?x128xf32>
1282    scf.yield %newacc : tensor<?x128xf32>
1283  }
1284
1285  func.return %final : tensor<?x128xf32>
1286}
1287
1288// -----
1289
1290#trait = {
1291  indexing_maps = [
1292    affine_map<(m, n, k) -> (m, k)>,
1293    affine_map<(m, n, k) -> (k, n)>,
1294    affine_map<(m, n, k) -> (m, n)>
1295  ],
1296  iterator_types = ["parallel", "parallel", "reduction"]
1297}
1298
1299// CHECK-LABEL: func @hoist_linalg_ops_div_by_zero
1300// CHECK-NOT: linalg.generic
1301// CHECK: scf.for
1302// CHECK: linalg.generic
1303// CHECK: tensor.insert_slice
1304// CHECK: scf.yield
1305func.func @hoist_linalg_ops_div_by_zero(%a : tensor<128x128xi32>,
1306                            %b : tensor<128x128xi32>,
1307                            %c: tensor<128x128xi32>,
1308                            %lb : index,
1309                            %ub : index,
1310                            %step : index,
1311                            %output : tensor<?x128xi32>) -> tensor<?x128xi32> {
1312  %cst0 = arith.constant 0 : i32
1313  %final =
1314  scf.for %i = %lb to %ub step %step iter_args(%acc = %output)
1315                                            -> tensor<?x128xi32> {
1316    %compute = linalg.generic #trait
1317               ins(%a, %b : tensor<128x128xi32>, tensor<128x128xi32>)
1318               outs(%c : tensor<128x128xi32>) {
1319    ^bb0(%in : i32, %in2 : i32, %in3 : i32):
1320      %div = arith.divui %in, %in2 : i32
1321      %add = arith.addi %div, %in3 : i32
1322      linalg.yield %in3 : i32
1323    } -> tensor<128x128xi32>
1324
1325    %newacc = tensor.insert_slice %compute into
1326                                  %output[%i, 0][128, 128][1, 1]
1327                                  : tensor<128x128xi32> into tensor<?x128xi32>
1328    scf.yield %newacc : tensor<?x128xi32>
1329  }
1330
1331  func.return %final : tensor<?x128xi32>
1332}
1333
1334// -----
1335
1336// CHECK-LABEL: func @hoist_vector_transfer_ops
1337// CHECK: vector.transfer_read
1338// CHECK: scf.for
1339// CHECK-NOT: vector.transfer_read
1340// CHECK: arith.addf
1341// CHECK: scf.yield
1342func.func @hoist_vector_transfer_ops(
1343                            %a : tensor<128x128xf32>,
1344                            %lb : index,
1345                            %ub : index,
1346                            %step : index,
1347                            %ida : index,
1348                            %idb : index) -> vector<4x4xf32> {
1349  %cst_0 = arith.constant 0.0 : f32
1350  %cst = arith.constant dense<0.0> : vector<4x4xf32>
1351  %final =
1352  scf.for %i = %lb to %ub step %step iter_args(%acc = %cst) -> vector<4x4xf32> {
1353    %read = vector.transfer_read %a[%ida, %idb], %cst_0 : tensor<128x128xf32>, vector<4x4xf32>
1354    %out = arith.addf %read, %acc : vector<4x4xf32>
1355    scf.yield %out : vector<4x4xf32>
1356  }
1357  func.return %final : vector<4x4xf32>
1358}
1359
1360// -----
1361
1362// CHECK-LABEL: func @hoist_vector_transfer_ops
1363// CHECK: vector.transfer_write
1364// CHECK: vector.transfer_read
1365// CHECK: scf.for
1366// CHECK-NOT: vector.transfer_write
1367// CHECK-NOT: vector.transfer_read
1368// CHECK: arith.addf
1369// CHECK: scf.yield
1370func.func @hoist_vector_transfer_ops(
1371                            %lb : index,
1372                            %ub : index,
1373                            %step : index,
1374                            %ida : index,
1375                            %idb : index) -> vector<4x4xf32> {
1376  %c0 = arith.constant 0 : index
1377  %cst_0 = arith.constant 0.0 : f32
1378  %cst = arith.constant dense<0.0> : vector<4x4xf32>
1379  %empty = tensor.empty() : tensor<4x4xf32>
1380  %final =
1381  scf.for %i = %lb to %ub step %step iter_args(%acc = %cst) -> vector<4x4xf32> {
1382    %a = vector.transfer_write %cst, %empty[%c0, %c0] : vector<4x4xf32>, tensor<4x4xf32>
1383    %read = vector.transfer_read %a[%c0, %c0], %cst_0 : tensor<4x4xf32>, vector<4x4xf32>
1384    %out = arith.addf %read, %acc : vector<4x4xf32>
1385    scf.yield %out : vector<4x4xf32>
1386  }
1387  func.return %final : vector<4x4xf32>
1388}
1389
1390// -----
1391
1392// CHECK-LABEL: func @do_not_hoist_vector_transfer_ops_loop_dep
1393// CHECK-NOT: vector.transfer_read
1394// CHECK: scf.for
1395// CHECK: vector.transfer_read
1396// CHECK: arith.addf
1397// CHECK: scf.yield
1398func.func @do_not_hoist_vector_transfer_ops_loop_dep(
1399                            %a : tensor<128x128xf32>,
1400                            %lb : index,
1401                            %ub : index,
1402                            %step : index,
1403                            %ida : index) -> vector<4x4xf32> {
1404  %cst_0 = arith.constant 0.0 : f32
1405  %cst = arith.constant dense<0.0> : vector<4x4xf32>
1406  %final =
1407  scf.for %i = %lb to %ub step %step iter_args(%acc = %cst) -> vector<4x4xf32> {
1408    %read = vector.transfer_read %a[%ida, %i], %cst_0 : tensor<128x128xf32>, vector<4x4xf32>
1409    %out = arith.addf %read, %acc : vector<4x4xf32>
1410    scf.yield %out : vector<4x4xf32>
1411  }
1412  func.return %final : vector<4x4xf32>
1413}
1414
1415// -----
1416
1417// CHECK-LABEL: func @do_not_hoist_vector_transfer_ops_memref
1418// CHECK-NOT: vector.transfer_read
1419// CHECK: scf.for
1420// CHECK: vector.transfer_read
1421// CHECK: arith.addf
1422// CHECK: scf.yield
1423func.func @do_not_hoist_vector_transfer_ops_memref(
1424                            %a : memref<128x128xf32>,
1425                            %lb : index,
1426                            %ub : index,
1427                            %step : index,
1428                            %ida : index,
1429                            %idb : index) -> vector<4x4xf32> {
1430  %cst_0 = arith.constant 0.0 : f32
1431  %cst = arith.constant dense<0.0> : vector<4x4xf32>
1432  %final =
1433  scf.for %i = %lb to %ub step %step iter_args(%acc = %cst) -> vector<4x4xf32> {
1434    %read = vector.transfer_read %a[%ida, %idb], %cst_0 : memref<128x128xf32>, vector<4x4xf32>
1435    %out = arith.addf %read, %acc : vector<4x4xf32>
1436    scf.yield %out : vector<4x4xf32>
1437  }
1438  func.return %final : vector<4x4xf32>
1439}
1440