xref: /llvm-project/mlir/test/Dialect/SCF/transform-op-coalesce.mlir (revision cca32174fef004aadc177fcde44904e326c639fb)
1// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics -allow-unregistered-dialect --cse --mlir-print-local-scope | FileCheck %s
2
3func.func @coalesce_inner() {
4  %c0 = arith.constant 0 : index
5  %c1 = arith.constant 1 : index
6  %c10 = arith.constant 10 : index
7
8  // CHECK: scf.for %[[IV0:.+]]
9  // CHECK:   scf.for %[[IV1:.+]]
10  // CHECK:     scf.for %[[IV2:.+]]
11  // CHECK-NOT:   scf.for %[[IV3:.+]]
12  scf.for %i = %c0 to %c10 step %c1 {
13    scf.for %j = %c0 to %c10 step %c1 {
14      scf.for %k = %i to %j step %c1 {
15        // Inner loop must have been removed.
16        scf.for %l = %i to %j step %c1 {
17          "use"(%i, %j) : (index, index) -> ()
18        }
19      } {coalesce}
20    }
21  }
22  return
23}
24
25module attributes {transform.with_named_sequence} {
26  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
27    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
28    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
29    %2 = transform.loop.coalesce %1: (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
30    transform.yield
31  }
32}
33
34// -----
35
36func.func @coalesce_outer(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64xf32, 1>, %arg3: memref<64x64xf32, 1>) attributes {} {
37  // CHECK: %[[T0:.+]] = affine.apply affine_map<() -> (64)>()
38  // CHECK: %[[UB:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]]
39  // CHECK: affine.for %[[IV1:.+]] = 0 to %[[UB:.+]] {
40  // CHECK-NOT: affine.for %[[IV2:.+]]
41  affine.for %arg4 = 0 to 64 {
42    affine.for %arg5 = 0 to 64 {
43      // CHECK: %[[IDX0:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV1]])[%{{.+}}]
44      // CHECK: %[[IDX1:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV1]])[%{{.+}}]
45      // CHECK-NEXT: %{{.+}} = affine.load %{{.+}}[%[[IDX1]], %[[IDX0]]] : memref<64x64xf32, 1>
46      %0 = affine.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1>
47      %1 = affine.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1>
48      %2 = arith.addf %0, %1 : f32
49      affine.store %2, %arg3[%arg4, %arg5] : memref<64x64xf32, 1>
50    }
51  } {coalesce}
52  return
53}
54
55module attributes {transform.with_named_sequence} {
56  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
57    %0 = transform.structured.match ops{["affine.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
58    %1 = transform.cast %0 : !transform.any_op to !transform.op<"affine.for">
59    %2 = transform.loop.coalesce %1 : (!transform.op<"affine.for">) -> (!transform.op<"affine.for">)
60    transform.yield
61  }
62}
63
64// -----
65
66func.func @coalesce_and_unroll(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64xf32, 1>, %arg3: memref<64x64xf32, 1>) attributes {} {
67  // CHECK: scf.for %[[IV1:.+]] =
68  %c0 = arith.constant 0 : index
69  %c1 = arith.constant 1 : index
70  %c64 = arith.constant 64 : index
71
72  scf.for %arg4 = %c0 to %c64 step %c1 {
73    // CHECK-NOT: scf.for
74    scf.for %arg5 = %c0 to %c64 step %c1 {
75      // CHECK: %[[IDX:.+]]:2 = affine.delinearize_index
76      // CHECK-NEXT: %{{.+}} = memref.load %{{.+}}[%[[IDX]]#0, %[[IDX]]#1] : memref<64x64xf32, 1>
77      %0 = memref.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1>
78      %1 = memref.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1>
79      %2 = arith.addf %0, %1 : f32
80      // CHECK: memref.store
81      // CHECK: memref.store
82      // CHECK: memref.store
83      // Residual loop must have a single store.
84      // CHECK: memref.store
85      memref.store %2, %arg3[%arg4, %arg5] : memref<64x64xf32, 1>
86    }
87  } {coalesce}
88  return
89}
90
91module attributes {transform.with_named_sequence} {
92  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
93    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
94    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
95    %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
96    transform.loop.unroll %2 {factor = 3} : !transform.op<"scf.for">
97    transform.yield
98  }
99}
100
101// -----
102
103func.func @tensor_loops(%arg0 : tensor<?x?xf32>, %lb0 : index, %ub0 : index, %step0 : index,
104    %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> tensor<?x?xf32> {
105  %0 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg1 = %arg0) -> tensor<?x?xf32> {
106    %1 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg2 = %arg1) -> tensor<?x?xf32> {
107      %2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg3 = %arg2) -> tensor<?x?xf32> {
108        %3 = "use"(%arg3, %i, %j, %k) : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>)
109        scf.yield %3 : tensor<?x?xf32>
110      }
111      scf.yield %2 : tensor<?x?xf32>
112    }
113    scf.yield %1 : tensor<?x?xf32>
114  } {coalesce}
115  return %0 : tensor<?x?xf32>
116}
117module attributes {transform.with_named_sequence} {
118  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
119    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
120    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
121    %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
122    transform.yield
123  }
124}
125//      CHECK: func.func @tensor_loops(
126// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?xf32>
127// CHECK-SAME:     %[[LB0:[a-zA-Z0-9_]+]]: index
128// CHECK-SAME:     %[[UB0:[a-zA-Z0-9_]+]]: index
129// CHECK-SAME:     %[[STEP0:[a-zA-Z0-9_]+]]: index
130// CHECK-SAME:     %[[LB1:[a-zA-Z0-9_]+]]: index
131// CHECK-SAME:     %[[UB1:[a-zA-Z0-9_]+]]: index
132// CHECK-SAME:     %[[STEP1:[a-zA-Z0-9_]+]]: index
133// CHECK-SAME:     %[[LB2:[a-zA-Z0-9_]+]]: index
134// CHECK-SAME:     %[[UB2:[a-zA-Z0-9_]+]]: index
135// CHECK-SAME:     %[[STEP2:[a-zA-Z0-9_]+]]: index
136//      CHECK:   %[[NITERS0:.+]] = affine.apply
137// CHECK-SAME:       affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB0]], %[[UB0]], %[[STEP0]]]
138//      CHECK:   %[[C0:.+]] = arith.constant 0 : index
139//      CHECK:   %[[C1:.+]] = arith.constant 1 : index
140//      CHECK:   %[[NITERS1:.+]] = affine.apply
141// CHECK-SAME:       affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB1]], %[[UB1]], %[[STEP1]]]
142//      CHECK:   %[[NITERS2:.+]] = affine.apply
143// CHECK-SAME:        affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB2]], %[[UB2]], %[[STEP2]]]
144//      CHECK:   %[[NEWUB:.+]] = affine.apply affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] ->
145// CHECK-SAME:       ((((-s0 + s1) ceildiv s2) * ((-s3 + s4) ceildiv s5)) * ((-s6 + s7) ceildiv s8))
146// CHECK-SAME:       [%[[LB0]], %[[UB0]], %[[STEP0]], %[[LB1]], %[[UB1]], %[[STEP1]], %[[LB2]], %[[UB2]], %[[STEP2]]]
147//      CHECK:   %[[RESULT:.+]] = scf.for %[[IV:[a-zA-Z0-9]+]] = %[[C0]] to %[[NEWUB]] step %[[C1]] iter_args(%[[ITER_ARG:.+]] = %[[ARG0]])
148//      CHECK:     %[[DELINEARIZE:.+]]:3 = affine.delinearize_index %[[IV]] into (%[[NITERS0]], %[[NITERS1]], %[[NITERS2]])
149//  CHECK-DAG:     %[[K:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#2)[%[[LB2]], %[[STEP2]]]
150//  CHECK-DAG:     %[[J:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#1)[%[[LB1]], %[[STEP1]]]
151//  CHECK-DAG:     %[[I:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#0)[%[[LB0]], %[[STEP0]]]
152//      CHECK:     %[[USE:.+]] = "use"(%[[ITER_ARG]], %[[I]], %[[J]], %[[K]])
153//      CHECK:     scf.yield %[[USE]]
154//      CHECK:   return %[[RESULT]]
155
156// -----
157
158// Coalesce only first two loops, but not the last since the iter_args dont line up
159func.func @tensor_loops_first_two(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %lb0 : index, %ub0 : index, %step0 : index,
160    %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
161  %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
162    %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
163      %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg5, %arg7 = %arg4) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
164        %3:2 = "use"(%arg3, %i, %j, %k) : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>, tensor<?x?xf32>)
165        scf.yield %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32>
166      }
167      scf.yield %2#0, %2#1 : tensor<?x?xf32>, tensor<?x?xf32>
168    }
169    scf.yield %1#0, %1#1 : tensor<?x?xf32>, tensor<?x?xf32>
170  } {coalesce}
171  return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32>
172}
173module attributes {transform.with_named_sequence} {
174  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
175    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
176    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
177    %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
178    transform.yield
179  }
180}
181//      CHECK: func.func @tensor_loops_first_two(
182// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
183// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
184// CHECK-SAME:     %[[LB0:[a-zA-Z0-9_]+]]: index
185// CHECK-SAME:     %[[UB0:[a-zA-Z0-9_]+]]: index
186// CHECK-SAME:     %[[STEP0:[a-zA-Z0-9_]+]]: index
187// CHECK-SAME:     %[[LB1:[a-zA-Z0-9_]+]]: index
188// CHECK-SAME:     %[[UB1:[a-zA-Z0-9_]+]]: index
189// CHECK-SAME:     %[[STEP1:[a-zA-Z0-9_]+]]: index
190// CHECK-SAME:     %[[LB2:[a-zA-Z0-9_]+]]: index
191// CHECK-SAME:     %[[UB2:[a-zA-Z0-9_]+]]: index
192// CHECK-SAME:     %[[STEP2:[a-zA-Z0-9_]+]]: index
193//      CHECK:   scf.for
194//      CHECK:     affine.delinearize_index
195//      CHECK:     scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]]
196//  CHECK-NOT:       scf.for
197//      CHECK:   transform.named_sequence
198
199// -----
200
201// Coalesce only first two loops, but not the last since the yields dont match up
202func.func @tensor_loops_first_two_2(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %lb0 : index, %ub0 : index, %step0 : index,
203    %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
204  %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
205    %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
206      %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg4, %arg7 = %arg5) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
207        %3:2 = "use"(%arg3, %i, %j, %k) : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>, tensor<?x?xf32>)
208        scf.yield %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32>
209      }
210      scf.yield %2#1, %2#0 : tensor<?x?xf32>, tensor<?x?xf32>
211    }
212    scf.yield %1#0, %1#1 : tensor<?x?xf32>, tensor<?x?xf32>
213  } {coalesce}
214  return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32>
215}
216module attributes {transform.with_named_sequence} {
217  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
218    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
219    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
220    %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
221    transform.yield
222  }
223}
224//      CHECK: func.func @tensor_loops_first_two_2(
225// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
226// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
227// CHECK-SAME:     %[[LB0:[a-zA-Z0-9_]+]]: index
228// CHECK-SAME:     %[[UB0:[a-zA-Z0-9_]+]]: index
229// CHECK-SAME:     %[[STEP0:[a-zA-Z0-9_]+]]: index
230// CHECK-SAME:     %[[LB1:[a-zA-Z0-9_]+]]: index
231// CHECK-SAME:     %[[UB1:[a-zA-Z0-9_]+]]: index
232// CHECK-SAME:     %[[STEP1:[a-zA-Z0-9_]+]]: index
233// CHECK-SAME:     %[[LB2:[a-zA-Z0-9_]+]]: index
234// CHECK-SAME:     %[[UB2:[a-zA-Z0-9_]+]]: index
235// CHECK-SAME:     %[[STEP2:[a-zA-Z0-9_]+]]: index
236//      CHECK:   scf.for
237//      CHECK:     affine.delinearize_index
238//      CHECK:     scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]]
239//  CHECK-NOT:       scf.for
240//      CHECK:   transform.named_sequence
241
242// -----
243
244// Coalesce only last two loops, but not the first since the yields dont match up
245func.func @tensor_loops_last_two(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %lb0 : index, %ub0 : index, %step0 : index,
246    %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
247  %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
248    %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
249      %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg4, %arg7 = %arg5) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
250        %3:2 = "use"(%arg3, %i, %j, %k) : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>, tensor<?x?xf32>)
251        scf.yield %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32>
252      }
253      scf.yield %2#0, %2#1 : tensor<?x?xf32>, tensor<?x?xf32>
254    }
255    scf.yield %1#1, %1#0 : tensor<?x?xf32>, tensor<?x?xf32>
256  } {coalesce}
257  return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32>
258}
259module attributes {transform.with_named_sequence} {
260  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
261    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
262    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
263    %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
264    transform.yield
265  }
266}
267//      CHECK: func.func @tensor_loops_last_two(
268// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
269// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
270// CHECK-SAME:     %[[LB0:[a-zA-Z0-9_]+]]: index
271// CHECK-SAME:     %[[UB0:[a-zA-Z0-9_]+]]: index
272// CHECK-SAME:     %[[STEP0:[a-zA-Z0-9_]+]]: index
273// CHECK-SAME:     %[[LB1:[a-zA-Z0-9_]+]]: index
274// CHECK-SAME:     %[[UB1:[a-zA-Z0-9_]+]]: index
275// CHECK-SAME:     %[[STEP1:[a-zA-Z0-9_]+]]: index
276// CHECK-SAME:     %[[LB2:[a-zA-Z0-9_]+]]: index
277// CHECK-SAME:     %[[UB2:[a-zA-Z0-9_]+]]: index
278// CHECK-SAME:     %[[STEP2:[a-zA-Z0-9_]+]]: index
279//      CHECK:   scf.for %{{[a-zA-Z0-9]+}} = %[[LB0]] to %[[UB0]] step %[[STEP0]]
280//  CHECK-NOT:     affine.delinearize_index
281//      CHECK:     scf.for
282//      CHECK:       affine.delinearize_index
283//  CHECK-NOT:       scf.for
284//      CHECK:   transform.named_sequence
285
286// -----
287
288// Check avoiding generating unnecessary operations while collapsing trip-1 loops.
289func.func @trip_one_loops(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index) -> tensor<?x?xf32> {
290  %c0 = arith.constant 0 : index
291  %c1 = arith.constant 1 : index
292  %0 = scf.for %iv0 = %c0 to %c1 step %c1 iter_args(%iter0 = %arg0) -> tensor<?x?xf32> {
293    %1 = scf.for %iv1 = %c0 to %c1 step %c1 iter_args(%iter1 = %iter0) -> tensor<?x?xf32> {
294      %2 = scf.for %iv2 = %c0 to %arg1 step %c1 iter_args(%iter2 = %iter1) -> tensor<?x?xf32> {
295        %3 = scf.for %iv3 = %c0 to %c1 step %c1 iter_args(%iter3 = %iter2) -> tensor<?x?xf32> {
296          %4 = scf.for %iv4 = %c0 to %arg2 step %c1 iter_args(%iter4 = %iter3) -> tensor<?x?xf32> {
297            %5 = "some_use"(%iter4, %iv0, %iv1, %iv2, %iv3, %iv4)
298              : (tensor<?x?xf32>, index, index, index, index, index) -> (tensor<?x?xf32>)
299            scf.yield %5 : tensor<?x?xf32>
300          }
301          scf.yield %4 : tensor<?x?xf32>
302        }
303        scf.yield %3 : tensor<?x?xf32>
304      }
305      scf.yield %2 : tensor<?x?xf32>
306    }
307    scf.yield %1 : tensor<?x?xf32>
308  } {coalesce}
309  return %0 : tensor<?x?xf32>
310}
311module attributes {transform.with_named_sequence} {
312  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
313    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
314    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
315    %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
316    transform.apply_patterns to %2 {
317      transform.apply_patterns.canonicalization
318    } : !transform.op<"scf.for">
319    transform.yield
320  }
321}
322// CHECK-LABEL: func @trip_one_loops
323//  CHECK-SAME:     , %[[ARG1:.+]]: index,
324//  CHECK-SAME:     %[[ARG2:.+]]: index)
325//   CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
326//   CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
327//       CHECK:   %[[UB:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%[[ARG1]], %[[ARG2]]]
328//       CHECK:   scf.for %[[IV:.+]] = %[[C0]] to %[[UB]] step %[[C1]]
329//       CHECK:     %[[DELINEARIZE:.+]]:2 = affine.delinearize_index %[[IV]] into (%[[ARG1]], %[[ARG2]])
330//       CHECK:     "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[DELINEARIZE]]#0, %[[C0]], %[[DELINEARIZE]]#1)
331
332// -----
333
334// Check generating no instructions when all except one loops is non unit-trip.
335func.func @all_outer_trip_one(%arg0 : tensor<?x?xf32>, %arg1 : index) -> tensor<?x?xf32> {
336  %c0 = arith.constant 0 : index
337  %c1 = arith.constant 1 : index
338  %0 = scf.for %iv0 = %c0 to %c1 step %c1 iter_args(%iter0 = %arg0) -> tensor<?x?xf32> {
339    %1 = scf.for %iv1 = %c0 to %c1 step %c1 iter_args(%iter1 = %iter0) -> tensor<?x?xf32> {
340      %2 = scf.for %iv2 = %c0 to %arg1 step %c1 iter_args(%iter2 = %iter1) -> tensor<?x?xf32> {
341        %3 = "some_use"(%iter2, %iv0, %iv1, %iv2)
342          : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>)
343        scf.yield %3 : tensor<?x?xf32>
344      }
345      scf.yield %2 : tensor<?x?xf32>
346    }
347    scf.yield %1 : tensor<?x?xf32>
348  } {coalesce}
349  return %0 : tensor<?x?xf32>
350}
351module attributes {transform.with_named_sequence} {
352  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
353    %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op
354    %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for">
355    %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">)
356    transform.apply_patterns to %2 {
357      transform.apply_patterns.canonicalization
358    } : !transform.op<"scf.for">
359    transform.yield
360  }
361}
362// CHECK-LABEL: func @all_outer_trip_one
363//  CHECK-SAME:     , %[[ARG1:.+]]: index)
364//   CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
365//   CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
366//       CHECK:   scf.for %[[IV:.+]] = %[[C0]] to %[[ARG1]] step %[[C1]]
367//       CHECK:     "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[IV]])
368