1// RUN: mlir-opt %s -transform-interpreter -verify-diagnostics -allow-unregistered-dialect -split-input-file | FileCheck %s 2 3// CHECK-DAG: memref.global "private" @[[ALLOC0:alloc.*]] : memref<2x32xf32> 4// CHECK-DAG: memref.global "private" @[[ALLOC1:alloc.*]] : memref<2x32xf32> 5 6// CHECK-DAG: func.func @func(%[[LB:.*]]: index, %[[UB:.*]]: index) 7func.func @func(%lb: index, %ub: index) { 8 // CHECK-DAG: scf.forall (%[[ARG0:.*]], %[[ARG1:.*]]) in (%[[LB]], %[[UB]]) 9 scf.forall (%arg0, %arg1) in (%lb, %ub) { 10 // CHECK-DAG: %[[MR0:.*]] = memref.get_global @[[ALLOC0]] : memref<2x32xf32> 11 // CHECK-DAG: %[[MR1:.*]] = memref.get_global @[[ALLOC1]] : memref<2x32xf32> 12 // CHECK-DAG: memref.store %{{.*}}, %[[MR0]][%{{.*}}, %{{.*}}] : memref<2x32xf32> 13 // CHECK-DAG: memref.store %{{.*}}, %[[MR1]][%{{.*}}, %{{.*}}] : memref<2x32xf32> 14 %cst = arith.constant 0.0 : f32 15 %mr0 = memref.alloca() : memref<2x32xf32> 16 %mr1 = memref.alloca() : memref<2x32xf32> 17 memref.store %cst, %mr0[%arg0, %arg1] : memref<2x32xf32> 18 memref.store %cst, %mr1[%arg0, %arg1] : memref<2x32xf32> 19 } 20 return 21} 22 23module attributes {transform.with_named_sequence} { 24 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 25 %alloca = transform.structured.match ops{["memref.alloca"]} in %arg0 26 : (!transform.any_op) -> !transform.op<"memref.alloca"> 27 %get_global, %global = transform.memref.alloca_to_global %alloca 28 : (!transform.op<"memref.alloca">) 29 -> (!transform.any_op, !transform.any_op) 30 transform.yield 31 } 32} 33 34// ----- 35 36// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)> 37// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> 38 39// CHECK-LABEL: func @multi_buffer 40func.func @multi_buffer(%in: memref<16xf32>) { 41 // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32> 42 // expected-remark @below {{transformed}} 43 %tmp = memref.alloc() : memref<4xf32> 44 45 // CHECK: %[[C0:.*]] = arith.constant 0 : index 46 // CHECK: %[[C4:.*]] = arith.constant 4 : index 47 %c0 = arith.constant 0 : index 48 %c4 = arith.constant 4 : index 49 %c16 = arith.constant 16 : index 50 51 // CHECK: scf.for %[[IV:.*]] = %[[C0]] 52 scf.for %i0 = %c0 to %c16 step %c4 { 53 // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]]) 54 // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>> 55 %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> 56 // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>> 57 memref.copy %1, %tmp : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32> 58 59 "some_use"(%tmp) : (memref<4xf32>) ->() 60 } 61 return 62} 63 64module attributes {transform.with_named_sequence} { 65 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 66 %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc"> 67 %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloc">) -> !transform.any_op 68 // Verify that the returned handle is usable. 69 transform.debug.emit_remark_at %1, "transformed" : !transform.any_op 70 transform.yield 71 } 72} 73 74// ----- 75 76// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)> 77// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> 78 79// CHECK-LABEL: func @multi_buffer_on_affine_loop 80func.func @multi_buffer_on_affine_loop(%in: memref<16xf32>) { 81 // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32> 82 // expected-remark @below {{transformed}} 83 %tmp = memref.alloc() : memref<4xf32> 84 85 // CHECK: %[[C0:.*]] = arith.constant 0 : index 86 %c0 = arith.constant 0 : index 87 88 // CHECK: affine.for %[[IV:.*]] = 0 89 affine.for %i0 = 0 to 16 step 4 { 90 // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]]) 91 // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>> 92 %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> 93 // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>> 94 memref.copy %1, %tmp : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32> 95 96 "some_use"(%tmp) : (memref<4xf32>) ->() 97 } 98 return 99} 100 101module attributes {transform.with_named_sequence} { 102 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 103 %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc"> 104 %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloc">) -> !transform.any_op 105 // Verify that the returned handle is usable. 106 transform.debug.emit_remark_at %1, "transformed" : !transform.any_op 107 transform.yield 108 } 109} 110 111// ----- 112 113// Trying to use multibuffer on allocs that are used in different loops 114// with none dominating the other is going to fail. 115// Check that we emit a proper error for that. 116func.func @multi_buffer_uses_with_no_loop_dominator(%in: memref<16xf32>, %cond: i1) { 117 // expected-error @below {{op failed to multibuffer}} 118 %tmp = memref.alloc() : memref<4xf32> 119 120 %c0 = arith.constant 0 : index 121 %c4 = arith.constant 4 : index 122 %c16 = arith.constant 16 : index 123 scf.if %cond { 124 scf.for %i0 = %c0 to %c16 step %c4 { 125 %var = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> 126 memref.copy %var, %tmp : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32> 127 128 "some_use"(%tmp) : (memref<4xf32>) ->() 129 } 130 } 131 132 scf.for %i0 = %c0 to %c16 step %c4 { 133 %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> 134 memref.copy %1, %tmp : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32> 135 136 "some_use"(%tmp) : (memref<4xf32>) ->() 137 } 138 return 139} 140 141module attributes {transform.with_named_sequence} { 142 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 143 %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc"> 144 %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloc">) -> !transform.any_op 145 transform.yield 146 } 147} 148 149// ----- 150 151// Make sure the multibuffer operation is typed so that it only supports 152// memref.alloc. 153// Check that we emit an error if we try to match something else. 154func.func @multi_buffer_reject_alloca(%in: memref<16xf32>, %cond: i1) { 155 %tmp = memref.alloca() : memref<4xf32> 156 157 %c0 = arith.constant 0 : index 158 %c4 = arith.constant 4 : index 159 %c16 = arith.constant 16 : index 160 scf.if %cond { 161 scf.for %i0 = %c0 to %c16 step %c4 { 162 %var = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> 163 memref.copy %var, %tmp : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32> 164 165 "some_use"(%tmp) : (memref<4xf32>) ->() 166 } 167 } 168 169 scf.for %i0 = %c0 to %c16 step %c4 { 170 %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> 171 memref.copy %1, %tmp : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32> 172 173 "some_use"(%tmp) : (memref<4xf32>) ->() 174 } 175 return 176} 177 178module attributes {transform.with_named_sequence} { 179 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 180 %0 = transform.structured.match ops{["memref.alloca"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloca"> 181 // expected-error @below {{'transform.memref.multibuffer' op operand #0 must be Transform IR handle to memref.alloc operations, but got '!transform.op<"memref.alloca">'}} 182 %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloca">) -> !transform.any_op 183 transform.yield 184 } 185} 186 187// ----- 188 189// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)> 190// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> 191 192// CHECK-LABEL: func @multi_buffer_one_alloc_with_use_outside_of_loop 193// Make sure we manage to apply multi_buffer to the memref that is used in 194// the loop (%tmp) and don't error out for the one that is not (%tmp2). 195func.func @multi_buffer_one_alloc_with_use_outside_of_loop(%in: memref<16xf32>) { 196 // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32> 197 // expected-remark @below {{transformed}} 198 %tmp = memref.alloc() : memref<4xf32> 199 %tmp2 = memref.alloc() : memref<4xf32> 200 201 "some_use_outside_of_loop"(%tmp2) : (memref<4xf32>) -> () 202 203 // CHECK: %[[C0:.*]] = arith.constant 0 : index 204 // CHECK: %[[C4:.*]] = arith.constant 4 : index 205 %c0 = arith.constant 0 : index 206 %c4 = arith.constant 4 : index 207 %c16 = arith.constant 16 : index 208 209 // CHECK: scf.for %[[IV:.*]] = %[[C0]] 210 scf.for %i0 = %c0 to %c16 step %c4 { 211 // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]]) 212 // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>> 213 %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> 214 // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>> 215 memref.copy %1, %tmp : memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32> 216 217 "some_use"(%tmp) : (memref<4xf32>) ->() 218 } 219 return 220} 221 222module attributes {transform.with_named_sequence} { 223 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 224 %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc"> 225 %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloc">) -> !transform.any_op 226 // Verify that the returned handle is usable. 227 transform.debug.emit_remark_at %1, "transformed" : !transform.any_op 228 transform.yield 229 } 230} 231 232// ----- 233 234 235// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)> 236 237// CHECK-LABEL: func @multi_buffer 238func.func @multi_buffer_no_analysis(%in: memref<16xf32>) { 239 // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32> 240 // expected-remark @below {{transformed}} 241 %tmp = memref.alloc() : memref<4xf32> 242 243 // CHECK: %[[C0:.*]] = arith.constant 0 : index 244 // CHECK: %[[C4:.*]] = arith.constant 4 : index 245 %c0 = arith.constant 0 : index 246 %c4 = arith.constant 4 : index 247 %c16 = arith.constant 16 : index 248 249 // CHECK: scf.for %[[IV:.*]] = %[[C0]] 250 scf.for %i0 = %c0 to %c16 step %c4 { 251 // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]]) 252 // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>> 253 "some_write_read"(%tmp) : (memref<4xf32>) ->() 254 } 255 return 256} 257 258module attributes {transform.with_named_sequence} { 259 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 260 %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc"> 261 %1 = transform.memref.multibuffer %0 {factor = 2 : i64, skip_analysis} : (!transform.op<"memref.alloc">) -> !transform.any_op 262 // Verify that the returned handle is usable. 263 transform.debug.emit_remark_at %1, "transformed" : !transform.any_op 264 transform.yield 265 } 266} 267 268// ----- 269 270// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)> 271 272// CHECK-LABEL: func @multi_buffer_dealloc 273func.func @multi_buffer_dealloc(%in: memref<16xf32>) { 274 // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32> 275 // expected-remark @below {{transformed}} 276 %tmp = memref.alloc() : memref<4xf32> 277 278 // CHECK: %[[C0:.*]] = arith.constant 0 : index 279 // CHECK: %[[C4:.*]] = arith.constant 4 : index 280 %c0 = arith.constant 0 : index 281 %c4 = arith.constant 4 : index 282 %c16 = arith.constant 16 : index 283 284 // CHECK: scf.for %[[IV:.*]] = %[[C0]] 285 scf.for %i0 = %c0 to %c16 step %c4 { 286 // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]]) 287 // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>> 288 "some_write_read"(%tmp) : (memref<4xf32>) ->() 289 } 290 291 // CHECK-NOT: memref.dealloc {{.*}} : memref<4xf32> 292 // CHECK: memref.dealloc %[[A]] : memref<2x4xf32> 293 memref.dealloc %tmp : memref<4xf32> 294 return 295} 296 297module attributes {transform.with_named_sequence} { 298 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 299 %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc"> 300 %1 = transform.memref.multibuffer %0 {factor = 2 : i64, skip_analysis} : (!transform.op<"memref.alloc">) -> !transform.any_op 301 // Verify that the returned handle is usable. 302 transform.debug.emit_remark_at %1, "transformed" : !transform.any_op 303 transform.yield 304 } 305} 306 307// ----- 308 309// CHECK-LABEL: func.func @dead_alloc 310func.func @dead_alloc() { 311 // CHECK-NOT: %{{.+}} = memref.alloc 312 %0 = memref.alloc() : memref<8x64xf32, 3> 313 %1 = memref.subview %0[0, 0] [8, 4] [1, 1] : memref<8x64xf32, 3> to 314 memref<8x4xf32, affine_map<(d0, d1) -> (d0 * 64 + d1)>, 3> 315 %c0 = arith.constant 0 : index 316 %cst_0 = arith.constant dense<0.000000e+00> : vector<1x4xf32> 317 vector.transfer_write %cst_0, %1[%c0, %c0] {in_bounds = [true, true]} : 318 vector<1x4xf32>, memref<8x4xf32, affine_map<(d0, d1) -> (d0 * 64 + d1)>, 3> 319 return 320} 321 322module attributes {transform.with_named_sequence} { 323 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 324 %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op 325 transform.memref.erase_dead_alloc_and_stores %0 : (!transform.any_op) -> () 326 transform.yield 327 } 328} 329 330// ----- 331 332// CHECK-LABEL: @store_to_load 333// CHECK-SAME: (%[[ARG:.+]]: vector<4xf32>) 334// CHECK-NOT: memref.alloc() 335// CHECK-NOT: vector.transfer_write 336// CHECK-NOT: vector.transfer_read 337// CHECK: return %[[ARG]] : vector<4xf32> 338func.func @store_to_load(%arg: vector<4xf32>) -> vector<4xf32> { 339 %c0 = arith.constant 0 : index 340 %cst_1 = arith.constant 0.000000e+00 : f32 341 %alloc = memref.alloc() {alignment = 64 : i64} : memref<64xf32> 342 vector.transfer_write %arg, %alloc[%c0] {in_bounds = [true]} : vector<4xf32>, memref<64xf32> 343 %r = vector.transfer_read %alloc[%c0], %cst_1 {in_bounds = [true]} : memref<64xf32>, vector<4xf32> 344 return %r : vector<4xf32> 345} 346 347module attributes {transform.with_named_sequence} { 348 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 349 %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op 350 transform.memref.erase_dead_alloc_and_stores %0 : (!transform.any_op) -> () 351 transform.yield 352 } 353} 354 355// ----- 356 357// CHECK-LABEL: func @lower_to_llvm 358// CHECK-NOT: memref.alloc 359// CHECK: llvm.call @malloc 360func.func @lower_to_llvm() { 361 %0 = memref.alloc() : memref<2048xi8> 362 return 363} 364 365module attributes {transform.with_named_sequence} { 366 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 367 %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op 368 transform.apply_conversion_patterns to %0 { 369 transform.apply_conversion_patterns.dialect_to_llvm "memref" 370 } with type_converter { 371 transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter 372 } {legal_dialects = ["func", "llvm"]} : !transform.any_op 373 transform.yield 374 } 375} 376