1// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics -allow-unregistered-dialect --cse --mlir-print-local-scope | FileCheck %s 2 3func.func @coalesce_inner() { 4 %c0 = arith.constant 0 : index 5 %c1 = arith.constant 1 : index 6 %c10 = arith.constant 10 : index 7 8 // CHECK: scf.for %[[IV0:.+]] 9 // CHECK: scf.for %[[IV1:.+]] 10 // CHECK: scf.for %[[IV2:.+]] 11 // CHECK-NOT: scf.for %[[IV3:.+]] 12 scf.for %i = %c0 to %c10 step %c1 { 13 scf.for %j = %c0 to %c10 step %c1 { 14 scf.for %k = %i to %j step %c1 { 15 // Inner loop must have been removed. 16 scf.for %l = %i to %j step %c1 { 17 "use"(%i, %j) : (index, index) -> () 18 } 19 } {coalesce} 20 } 21 } 22 return 23} 24 25module attributes {transform.with_named_sequence} { 26 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 27 %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 28 %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> 29 %2 = transform.loop.coalesce %1: (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) 30 transform.yield 31 } 32} 33 34// ----- 35 36func.func @coalesce_outer(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64xf32, 1>, %arg3: memref<64x64xf32, 1>) attributes {} { 37 // CHECK: %[[T0:.+]] = affine.apply affine_map<() -> (64)>() 38 // CHECK: %[[UB:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]] 39 // CHECK: affine.for %[[IV1:.+]] = 0 to %[[UB:.+]] { 40 // CHECK-NOT: affine.for %[[IV2:.+]] 41 affine.for %arg4 = 0 to 64 { 42 affine.for %arg5 = 0 to 64 { 43 // CHECK: %[[IDX0:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV1]])[%{{.+}}] 44 // CHECK: %[[IDX1:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV1]])[%{{.+}}] 45 // CHECK-NEXT: %{{.+}} = affine.load %{{.+}}[%[[IDX1]], %[[IDX0]]] : memref<64x64xf32, 1> 46 %0 = affine.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1> 47 %1 = affine.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1> 48 %2 = arith.addf %0, %1 : f32 49 affine.store %2, %arg3[%arg4, %arg5] : memref<64x64xf32, 1> 50 } 51 } {coalesce} 52 return 53} 54 55module attributes {transform.with_named_sequence} { 56 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 57 %0 = transform.structured.match ops{["affine.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 58 %1 = transform.cast %0 : !transform.any_op to !transform.op<"affine.for"> 59 %2 = transform.loop.coalesce %1 : (!transform.op<"affine.for">) -> (!transform.op<"affine.for">) 60 transform.yield 61 } 62} 63 64// ----- 65 66func.func @coalesce_and_unroll(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64xf32, 1>, %arg3: memref<64x64xf32, 1>) attributes {} { 67 // CHECK: scf.for %[[IV1:.+]] = 68 %c0 = arith.constant 0 : index 69 %c1 = arith.constant 1 : index 70 %c64 = arith.constant 64 : index 71 72 scf.for %arg4 = %c0 to %c64 step %c1 { 73 // CHECK-NOT: scf.for 74 scf.for %arg5 = %c0 to %c64 step %c1 { 75 // CHECK: %[[IDX:.+]]:2 = affine.delinearize_index 76 // CHECK-NEXT: %{{.+}} = memref.load %{{.+}}[%[[IDX]]#0, %[[IDX]]#1] : memref<64x64xf32, 1> 77 %0 = memref.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1> 78 %1 = memref.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1> 79 %2 = arith.addf %0, %1 : f32 80 // CHECK: memref.store 81 // CHECK: memref.store 82 // CHECK: memref.store 83 // Residual loop must have a single store. 84 // CHECK: memref.store 85 memref.store %2, %arg3[%arg4, %arg5] : memref<64x64xf32, 1> 86 } 87 } {coalesce} 88 return 89} 90 91module attributes {transform.with_named_sequence} { 92 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 93 %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 94 %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> 95 %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) 96 transform.loop.unroll %2 {factor = 3} : !transform.op<"scf.for"> 97 transform.yield 98 } 99} 100 101// ----- 102 103func.func @tensor_loops(%arg0 : tensor<?x?xf32>, %lb0 : index, %ub0 : index, %step0 : index, 104 %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> tensor<?x?xf32> { 105 %0 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg1 = %arg0) -> tensor<?x?xf32> { 106 %1 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg2 = %arg1) -> tensor<?x?xf32> { 107 %2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg3 = %arg2) -> tensor<?x?xf32> { 108 %3 = "use"(%arg3, %i, %j, %k) : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>) 109 scf.yield %3 : tensor<?x?xf32> 110 } 111 scf.yield %2 : tensor<?x?xf32> 112 } 113 scf.yield %1 : tensor<?x?xf32> 114 } {coalesce} 115 return %0 : tensor<?x?xf32> 116} 117module attributes {transform.with_named_sequence} { 118 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 119 %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 120 %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> 121 %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) 122 transform.yield 123 } 124} 125// CHECK: func.func @tensor_loops( 126// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32> 127// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index 128// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index 129// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index 130// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index 131// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index 132// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index 133// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index 134// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index 135// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index 136// CHECK: %[[NITERS0:.+]] = affine.apply 137// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB0]], %[[UB0]], %[[STEP0]]] 138// CHECK: %[[C0:.+]] = arith.constant 0 : index 139// CHECK: %[[C1:.+]] = arith.constant 1 : index 140// CHECK: %[[NITERS1:.+]] = affine.apply 141// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB1]], %[[UB1]], %[[STEP1]]] 142// CHECK: %[[NITERS2:.+]] = affine.apply 143// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB2]], %[[UB2]], %[[STEP2]]] 144// CHECK: %[[NEWUB:.+]] = affine.apply affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] -> 145// CHECK-SAME: ((((-s0 + s1) ceildiv s2) * ((-s3 + s4) ceildiv s5)) * ((-s6 + s7) ceildiv s8)) 146// CHECK-SAME: [%[[LB0]], %[[UB0]], %[[STEP0]], %[[LB1]], %[[UB1]], %[[STEP1]], %[[LB2]], %[[UB2]], %[[STEP2]]] 147// CHECK: %[[RESULT:.+]] = scf.for %[[IV:[a-zA-Z0-9]+]] = %[[C0]] to %[[NEWUB]] step %[[C1]] iter_args(%[[ITER_ARG:.+]] = %[[ARG0]]) 148// CHECK: %[[DELINEARIZE:.+]]:3 = affine.delinearize_index %[[IV]] into (%[[NITERS0]], %[[NITERS1]], %[[NITERS2]]) 149// CHECK-DAG: %[[K:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#2)[%[[LB2]], %[[STEP2]]] 150// CHECK-DAG: %[[J:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#1)[%[[LB1]], %[[STEP1]]] 151// CHECK-DAG: %[[I:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#0)[%[[LB0]], %[[STEP0]]] 152// CHECK: %[[USE:.+]] = "use"(%[[ITER_ARG]], %[[I]], %[[J]], %[[K]]) 153// CHECK: scf.yield %[[USE]] 154// CHECK: return %[[RESULT]] 155 156// ----- 157 158// Coalesce only first two loops, but not the last since the iter_args dont line up 159func.func @tensor_loops_first_two(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %lb0 : index, %ub0 : index, %step0 : index, 160 %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 161 %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 162 %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 163 %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg5, %arg7 = %arg4) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 164 %3:2 = "use"(%arg3, %i, %j, %k) : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>, tensor<?x?xf32>) 165 scf.yield %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32> 166 } 167 scf.yield %2#0, %2#1 : tensor<?x?xf32>, tensor<?x?xf32> 168 } 169 scf.yield %1#0, %1#1 : tensor<?x?xf32>, tensor<?x?xf32> 170 } {coalesce} 171 return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32> 172} 173module attributes {transform.with_named_sequence} { 174 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 175 %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 176 %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> 177 %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) 178 transform.yield 179 } 180} 181// CHECK: func.func @tensor_loops_first_two( 182// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32> 183// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32> 184// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index 185// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index 186// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index 187// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index 188// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index 189// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index 190// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index 191// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index 192// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index 193// CHECK: scf.for 194// CHECK: affine.delinearize_index 195// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]] 196// CHECK-NOT: scf.for 197// CHECK: transform.named_sequence 198 199// ----- 200 201// Coalesce only first two loops, but not the last since the yields dont match up 202func.func @tensor_loops_first_two_2(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %lb0 : index, %ub0 : index, %step0 : index, 203 %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 204 %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 205 %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 206 %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg4, %arg7 = %arg5) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 207 %3:2 = "use"(%arg3, %i, %j, %k) : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>, tensor<?x?xf32>) 208 scf.yield %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32> 209 } 210 scf.yield %2#1, %2#0 : tensor<?x?xf32>, tensor<?x?xf32> 211 } 212 scf.yield %1#0, %1#1 : tensor<?x?xf32>, tensor<?x?xf32> 213 } {coalesce} 214 return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32> 215} 216module attributes {transform.with_named_sequence} { 217 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 218 %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 219 %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> 220 %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) 221 transform.yield 222 } 223} 224// CHECK: func.func @tensor_loops_first_two_2( 225// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32> 226// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32> 227// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index 228// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index 229// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index 230// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index 231// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index 232// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index 233// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index 234// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index 235// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index 236// CHECK: scf.for 237// CHECK: affine.delinearize_index 238// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]] 239// CHECK-NOT: scf.for 240// CHECK: transform.named_sequence 241 242// ----- 243 244// Coalesce only last two loops, but not the first since the yields dont match up 245func.func @tensor_loops_last_two(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %lb0 : index, %ub0 : index, %step0 : index, 246 %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 247 %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 248 %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 249 %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg4, %arg7 = %arg5) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 250 %3:2 = "use"(%arg3, %i, %j, %k) : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>, tensor<?x?xf32>) 251 scf.yield %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32> 252 } 253 scf.yield %2#0, %2#1 : tensor<?x?xf32>, tensor<?x?xf32> 254 } 255 scf.yield %1#1, %1#0 : tensor<?x?xf32>, tensor<?x?xf32> 256 } {coalesce} 257 return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32> 258} 259module attributes {transform.with_named_sequence} { 260 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 261 %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 262 %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> 263 %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) 264 transform.yield 265 } 266} 267// CHECK: func.func @tensor_loops_last_two( 268// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32> 269// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32> 270// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index 271// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index 272// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index 273// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index 274// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index 275// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index 276// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index 277// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index 278// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index 279// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB0]] to %[[UB0]] step %[[STEP0]] 280// CHECK-NOT: affine.delinearize_index 281// CHECK: scf.for 282// CHECK: affine.delinearize_index 283// CHECK-NOT: scf.for 284// CHECK: transform.named_sequence 285 286// ----- 287 288// Check avoiding generating unnecessary operations while collapsing trip-1 loops. 289func.func @trip_one_loops(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index) -> tensor<?x?xf32> { 290 %c0 = arith.constant 0 : index 291 %c1 = arith.constant 1 : index 292 %0 = scf.for %iv0 = %c0 to %c1 step %c1 iter_args(%iter0 = %arg0) -> tensor<?x?xf32> { 293 %1 = scf.for %iv1 = %c0 to %c1 step %c1 iter_args(%iter1 = %iter0) -> tensor<?x?xf32> { 294 %2 = scf.for %iv2 = %c0 to %arg1 step %c1 iter_args(%iter2 = %iter1) -> tensor<?x?xf32> { 295 %3 = scf.for %iv3 = %c0 to %c1 step %c1 iter_args(%iter3 = %iter2) -> tensor<?x?xf32> { 296 %4 = scf.for %iv4 = %c0 to %arg2 step %c1 iter_args(%iter4 = %iter3) -> tensor<?x?xf32> { 297 %5 = "some_use"(%iter4, %iv0, %iv1, %iv2, %iv3, %iv4) 298 : (tensor<?x?xf32>, index, index, index, index, index) -> (tensor<?x?xf32>) 299 scf.yield %5 : tensor<?x?xf32> 300 } 301 scf.yield %4 : tensor<?x?xf32> 302 } 303 scf.yield %3 : tensor<?x?xf32> 304 } 305 scf.yield %2 : tensor<?x?xf32> 306 } 307 scf.yield %1 : tensor<?x?xf32> 308 } {coalesce} 309 return %0 : tensor<?x?xf32> 310} 311module attributes {transform.with_named_sequence} { 312 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 313 %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 314 %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> 315 %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) 316 transform.apply_patterns to %2 { 317 transform.apply_patterns.canonicalization 318 } : !transform.op<"scf.for"> 319 transform.yield 320 } 321} 322// CHECK-LABEL: func @trip_one_loops 323// CHECK-SAME: , %[[ARG1:.+]]: index, 324// CHECK-SAME: %[[ARG2:.+]]: index) 325// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 326// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index 327// CHECK: %[[UB:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%[[ARG1]], %[[ARG2]]] 328// CHECK: scf.for %[[IV:.+]] = %[[C0]] to %[[UB]] step %[[C1]] 329// CHECK: %[[DELINEARIZE:.+]]:2 = affine.delinearize_index %[[IV]] into (%[[ARG1]], %[[ARG2]]) 330// CHECK: "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[DELINEARIZE]]#0, %[[C0]], %[[DELINEARIZE]]#1) 331 332// ----- 333 334// Check generating no instructions when all except one loops is non unit-trip. 335func.func @all_outer_trip_one(%arg0 : tensor<?x?xf32>, %arg1 : index) -> tensor<?x?xf32> { 336 %c0 = arith.constant 0 : index 337 %c1 = arith.constant 1 : index 338 %0 = scf.for %iv0 = %c0 to %c1 step %c1 iter_args(%iter0 = %arg0) -> tensor<?x?xf32> { 339 %1 = scf.for %iv1 = %c0 to %c1 step %c1 iter_args(%iter1 = %iter0) -> tensor<?x?xf32> { 340 %2 = scf.for %iv2 = %c0 to %arg1 step %c1 iter_args(%iter2 = %iter1) -> tensor<?x?xf32> { 341 %3 = "some_use"(%iter2, %iv0, %iv1, %iv2) 342 : (tensor<?x?xf32>, index, index, index) -> (tensor<?x?xf32>) 343 scf.yield %3 : tensor<?x?xf32> 344 } 345 scf.yield %2 : tensor<?x?xf32> 346 } 347 scf.yield %1 : tensor<?x?xf32> 348 } {coalesce} 349 return %0 : tensor<?x?xf32> 350} 351module attributes {transform.with_named_sequence} { 352 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 353 %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op 354 %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> 355 %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) 356 transform.apply_patterns to %2 { 357 transform.apply_patterns.canonicalization 358 } : !transform.op<"scf.for"> 359 transform.yield 360 } 361} 362// CHECK-LABEL: func @all_outer_trip_one 363// CHECK-SAME: , %[[ARG1:.+]]: index) 364// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 365// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index 366// CHECK: scf.for %[[IV:.+]] = %[[C0]] to %[[ARG1]] step %[[C1]] 367// CHECK: "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[IV]]) 368