1// Note: Default is function-boundary-type-conversion=infer-layout-map 2// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1" -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s 3 4// Run fuzzer with different seeds. 5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null 6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null 7// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null 8 9// Test bufferization using memref types that have no layout map. 10// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP 11 12// Test bufferization using memref types that have fully dynamic layout maps. 13// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 function-boundary-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-FULLY-DYNAMIC-LAYOUT-MAP 14 15 16// Bufferization of bodiless function with no tensor return value. 17 18// CHECK-LABEL: func private @private_func(memref<?xf32, strided<[?], offset: ?>> 19// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func(memref<?xf32>) 20func.func private @private_func(tensor<?xf32>) -> () 21 22// CHECK-LABEL: func private @private_func_2d(memref<?x?xf32, strided<[?, ?], offset: ?>> 23// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func_2d(memref<?x?xf32>) 24func.func private @private_func_2d(tensor<?x?xf32>) -> () 25 26// CHECK-LABEL: func @empty_func() { 27// CHECK-NO-LAYOUT-MAP-LABEL: func @empty_func() { 28// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @empty_func() { 29func.func @empty_func() -> () { 30 return 31} 32 33// ----- 34 35// A bodiless function that returns something that is not a tensor. 36 37// CHECK: func private @external_func_with_return_val(memref<4xi32, strided{{.*}}>) -> f32 38// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func private @external_func_with_return_val(memref<4xi32, 39// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: strided<[?], offset: ?>> 40// CHECK-NO-LAYOUT-MAP-LABEL: func private @external_func_with_return_val(memref<4xi32>) 41func.func private @external_func_with_return_val(tensor<4xi32>) -> f32 42 43// ----- 44 45// Bufferization of bodiless function that returns a tensor. 46 47// CHECK: func.func private @foo(memref<?xf32, strided<[?], offset: ?>>) -> (f32, memref<?xf32, strided<[?], offset: ?>>, f32) 48func.func private @foo(%t : tensor<?xf32>) -> (f32, tensor<?xf32>, f32) 49 50// CHECK: func.func @call_to_unknown_tensor_returning_func( 51// CHECK-SAME: %[[arg0:.*]]: memref<?xf32, strided<[?], offset: ?>>) { 52func.func @call_to_unknown_tensor_returning_func(%t : tensor<?xf32>) { 53 // CHECK: call @foo(%[[arg0]]) : (memref<?xf32, strided<[?], offset: ?>>) -> (f32, memref<?xf32, strided<[?], offset: ?>>, f32) 54 call @foo(%t) : (tensor<?xf32>) -> (f32, tensor<?xf32>, f32) 55 return 56} 57 58// ----- 59 60// A function that returns a non-equivalent tensor with layout map. 61 62// CHECK-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32, strided<[10, 1], offset: ?>> 63// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32> 64// CHECK: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, strided<[10, 1], offset: ?>> 65// CHECK: return %[[subview]] 66 67// CHECK-NO-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32> 68// CHECK-NO-LAYOUT-MAP: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32> 69// CHECK-NO-LAYOUT-MAP: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, strided<[10, 1], offset: ?>> 70// CHECK-NO-LAYOUT-MAP: %[[alloc_no_layout:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref<2x?xf32> 71// CHECK-NO-LAYOUT-MAP: memref.copy %[[subview]], %[[alloc_no_layout]] 72// TODO: %alloc should be deallocated here, but we currently do not dealloc 73// buffers that are inserted due to to_tensor/to_memref canonicalization (when 74// the buffer types have different layout maps). 75// CHECK-NO-LAYOUT-MAP: return %[[alloc_no_layout]] 76 77// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32, 78// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: strided<[?, ?], offset: ?>> { 79func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>) 80{ 81 %t = bufferization.alloc_tensor() : tensor<20x10xf32> 82 %0 = tensor.extract_slice %t[%idx, %idx][2, %sz][1, 1] 83 : tensor<20x10xf32> to tensor<2x?xf32> 84 return %0 : tensor<2x?xf32> 85} 86 87// ----- 88 89// CHECK-NO-LAYOUT-MAP-LABEL: func.func @foo( 90// CHECK-NO-LAYOUT-MAP-SAME: %[[VAL_0:.*]]: memref<3x8xf16>) -> memref<3x8xf16> { 91// CHECK-NO-LAYOUT-MAP: return %[[VAL_0]] : memref<3x8xf16> 92// CHECK-NO-LAYOUT-MAP: } 93func.func @foo(%arg0: tensor<3x8xf16>) -> tensor<3x8xf16> { 94 return %arg0 : tensor<3x8xf16> 95} 96 97// CHECK-NO-LAYOUT-MAP-LABEL: func.func @call_extract_slice( 98// CHECK-NO-LAYOUT-MAP-SAME: %[[VAL_0:.*]]: memref<4x8xf16>) -> memref<3x8xf16> { 99// CHECK-NO-LAYOUT-MAP: %[[VAL_1:.*]] = memref.subview %[[VAL_0]][1, 0] [3, 8] [1, 1] : memref<4x8xf16> to memref<3x8xf16, strided<[8, 1], offset: 8>> 100// CHECK-NO-LAYOUT-MAP: %[[VAL_2:.*]] = memref.alloc() {alignment = 64 : i64} : memref<3x8xf16> 101// CHECK-NO-LAYOUT-MAP: memref.copy %[[VAL_1]], %[[VAL_2]] : memref<3x8xf16, strided<[8, 1], offset: 8>> to memref<3x8xf16> 102// CHECK-NO-LAYOUT-MAP: %[[VAL_3:.*]] = call @foo(%[[VAL_2]]) : (memref<3x8xf16>) -> memref<3x8xf16> 103// CHECK-NO-LAYOUT-MAP: return %[[VAL_3]] : memref<3x8xf16> 104// CHECK-NO-LAYOUT-MAP: } 105func.func @call_extract_slice(%arg0: tensor<4x8xf16>) -> (tensor<3x8xf16>) { 106 %0 = tensor.extract_slice %arg0[1, 0] [3, 8] [1, 1] : tensor<4x8xf16> to tensor<3x8xf16> 107 %1 = call @foo(%0) : (tensor<3x8xf16>) -> tensor<3x8xf16> 108 return %1 : tensor<3x8xf16> 109} 110 111// ----- 112 113// CHECK-LABEL: func private @private_func 114// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func(memref<?xf32>) -> f32 115func.func private @private_func(tensor<?xf32>) -> (f32) 116 117// private_func may modify the buffer arg, but that's OK because %t is writable. 118// No alloc/copy should be inserted. 119 120// CHECK-LABEL: func @main( 121// CHECK-SAME: %[[t:.*]]: memref<?xf32 122// CHECK-NOT: alloc 123// CHECK-NOT: copy 124// CHECK: call @private_func(%[[t]]) 125func.func @main(%t: tensor<?xf32> {bufferization.writable = true}) -> (f32) { 126 %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32) 127 return %0 : f32 128} 129 130// ----- 131 132// CHECK-LABEL: func private @private_func 133func.func private @private_func(tensor<?xf32>) -> (f32) 134 135// private_func may modify the buffer arg, %t is not writable. A copy is needed. 136 137// CHECK-LABEL: func @main( 138// CHECK-SAME: %[[t:.*]]: memref<?xf32 139// CHECK: %[[alloc:.*]] = memref.alloc 140// CHECK-DAG: memref.copy %[[t]], %[[alloc]] 141// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 142// CHECK: call @private_func(%[[casted]]) 143func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) { 144 %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32) 145 return %0 : f32 146} 147 148// ----- 149 150// Test bufferization of a function without tensor args. 151 152// CHECK-LABEL: func @func_without_tensor_args 153func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { 154 // CHECK: %[[alloc:.*]] = memref.alloc() 155 %0 = bufferization.alloc_tensor() : tensor<10xf32> 156 157 %c0 = arith.constant 0 : index 158 // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] 159 %1 = vector.transfer_write %v, %0[%c0] : vector<10xf32>, tensor<10xf32> 160 161 %cst = arith.constant 0.0 : f32 162 // CHECK: vector.transfer_read %[[alloc]] 163 %r = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<11xf32> 164 165 vector.print %r : vector<11xf32> 166 return 167} 168 169// ----- 170 171// Bufferization of a function that is reading and writing. %t0 is writable, so 172// no copy should be inserted. 173 174// CHECK-LABEL: func @inner_func( 175// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 176func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { 177 // CHECK-NOT: copy 178 %f = arith.constant 1.0 : f32 179 %c0 = arith.constant 0 : index 180 %c1 = arith.constant 1 : index 181 // CHECK: memref.store %{{.*}}, %[[arg0]] 182 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 183 // CHECK: %[[load:.*]] = memref.load %[[arg0]] 184 %1 = tensor.extract %0[%c1] : tensor<?xf32> 185 // CHECK: return %[[load]] : f32 186 return %0, %1 : tensor<?xf32>, f32 187} 188 189// CHECK-LABEL: func @call_func_with_non_tensor_return( 190// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 191func.func @call_func_with_non_tensor_return( 192 %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) { 193 // CHECK-NOT: alloc 194 // CHECK-NOT: copy 195 // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]]) 196 %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32) 197 // CHECK: return %[[call]] : f32 198 return %1, %0 : f32, tensor<?xf32> 199} 200 201// ----- 202 203// Bufferization of a function that is reading and writing. %t0 is not writable, 204// so a copy is needed. 205 206// CHECK-LABEL: func @inner_func( 207// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 208func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { 209 // CHECK-NOT: copy 210 %f = arith.constant 1.0 : f32 211 %c0 = arith.constant 0 : index 212 %c1 = arith.constant 1 : index 213 // CHECK: memref.store %{{.*}}, %[[arg0]] 214 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 215 // CHECK: %[[load:.*]] = memref.load %[[arg0]] 216 %1 = tensor.extract %0[%c1] : tensor<?xf32> 217 // CHECK: return %[[load]] : f32 218 return %0, %1 : tensor<?xf32>, f32 219} 220 221// CHECK-LABEL: func @call_func_with_non_tensor_return( 222// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 223func.func @call_func_with_non_tensor_return( 224 %t0: tensor<?xf32> {bufferization.writable = false}) -> (f32, tensor<?xf32>) { 225 // CHECK: %[[alloc:.*]] = memref.alloc 226 // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]] 227 // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 228 // CHECK: %[[call:.*]] = call @inner_func(%[[casted]]) 229 %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32) 230 231 // Note: The tensor return value cannot fold away because the CallOp 232 // bufferized out-of-place. 233 // CHECK: return %[[call]], %[[casted]] : f32, memref<?xf32 234 return %1, %0 : f32, tensor<?xf32> 235} 236 237// ----- 238 239// A chain of function calls. The last function f0 is potentially writing to the 240// buffer. This becomes a problem when bufferizing main and a copy must be 241// inserted then. (No copies in the other functions.) 242 243// CHECK-LABEL: func private @f0( 244func.func private @f0(tensor<?xf32>) -> (f32) 245 246// CHECK-LABEL: func @f1( 247// CHECK-SAME: %[[t1:.*]]: memref<?xf32 248// CHECK: %[[r1:.*]] = call @f0(%[[t1]]) 249// CHECK: return %[[r1]] 250func.func @f1(%t: tensor<?xf32>) -> (f32) { 251 %0 = call @f0(%t) : (tensor<?xf32>) -> (f32) 252 return %0 : f32 253} 254 255// CHECK-LABEL: func @f2( 256// CHECK-SAME: %[[t2:.*]]: memref<?xf32 257// CHECK: %[[r2:.*]] = call @f1(%[[t2]]) 258// CHECK: return %[[r2]] 259func.func @f2(%t: tensor<?xf32>) -> (f32) { 260 %0 = call @f1(%t) : (tensor<?xf32>) -> (f32) 261 return %0 : f32 262} 263 264// CHECK-LABEL: func @main( 265// CHECK-SAME: %[[t3:.*]]: memref<?xf32 266// CHECK: %[[alloc:.*]] = memref.alloc 267// CHECK-DAG: memref.copy %[[t3]], %[[alloc]] 268// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 269// CHECK: call @f2(%[[casted]]) 270func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) { 271 %0 = call @f2(%t) : (tensor<?xf32>) -> (f32) 272 return %0 : f32 273} 274 275// ----- 276 277// This function does not read, just write. We need an alloc, but no copy. 278 279// CHECK-LABEL: func @does_not_read( 280// CHECK-NOT: alloc 281// CHECK-NOT: copy 282func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> { 283 %f0 = arith.constant 0.0 : f32 284 %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32> 285 return %r : tensor<?xf32> 286} 287 288// CHECK-LABEL: func @main( 289// CHECK-SAME: %[[t:.*]]: memref<?xf32 290// CHECK: %[[alloc:.*]] = memref.alloc 291// CHECK-NOT: copy 292// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] 293// CHECK-NOT: copy 294// CHECK: call @does_not_read(%[[casted]]) 295// CHECK: %[[r:.*]] = memref.load %[[casted]] 296func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 { 297 %0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>) 298 %idx = arith.constant 4 : index 299 %r = tensor.extract %0[%idx] : tensor<?xf32> 300 return %r : f32 301} 302 303// ----- 304 305// Alloc and copy must be inserted because the arith.constant is read-only. 306 307// CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> 308// CHECK: func private @some_external_func(memref<4xi32, strided<[?], offset: ?>>) 309func.func private @some_external_func(tensor<4xi32>) 310 311// CHECK: func @main() 312func.func @main() { 313// CHECK-DAG: %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32> 314 %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> 315 316// CHECK-DAG: %[[alloc:.*]] = memref.alloc 317// CHECK-DAG: %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, strided<[?], offset: ?>> 318// CHECK-DAG: memref.copy %[[A]], %[[alloc]] 319// CHECK: call @some_external_func(%[[B]]) : (memref<4xi32, strided<[?], offset: ?>>) -> () 320 call @some_external_func(%A) : (tensor<4xi32>) -> () 321 322 return 323} 324 325// ----- 326 327// Alloc and copy must be inserted because the arith.constant is read-only. The 328// function call is inside of an scf.execute_region. 329 330// CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> 331// CHECK: func private @some_external_func_within_scf_execute(memref<4xi32, strided<[?], offset: ?>>) 332func.func private @some_external_func_within_scf_execute(tensor<4xi32>) 333 334// CHECK: func @main() 335func.func @main() { 336// CHECK-DAG: %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32> 337 %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> 338 339// Note: The scf.execute_region canonicalizes away. 340 341// CHECK-DAG: %[[alloc:.*]] = memref.alloc 342// CHECK-DAG: %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, strided<[?], offset: ?>> 343// CHECK-DAG: memref.copy %[[A]], %[[alloc]] 344// CHECK: call @some_external_func_within_scf_execute(%[[B]]) : (memref<4xi32, strided<[?], offset: ?>>) -> () 345 scf.execute_region { 346 func.call @some_external_func_within_scf_execute(%A) : (tensor<4xi32>) -> () 347 scf.yield 348 } 349 350 return 351} 352 353// ----- 354 355// A write inside an scf.execute_region. An equivalent tensor is yielded. 356 357// CHECK-LABEL: func @execute_region_test( 358// CHECK-SAME: %[[m1:.*]]: memref<?xf32 359func.func @execute_region_test(%t1 : tensor<?xf32>) 360 -> (f32, tensor<?xf32>, f32) 361{ 362 %f1 = arith.constant 0.0 : f32 363 %f2 = arith.constant 1.0 : f32 364 %idx = arith.constant 7 : index 365 366 // scf.execute_region is canonicalized away after bufferization. So just the 367 // memref.store is left over. 368 369 // CHECK-NOT: alloc 370 // CHECK-NOT: copy 371 // CHECK: memref.store %{{.*}}, %[[m1]][%{{.*}}] 372 %0, %1, %2 = scf.execute_region -> (f32, tensor<?xf32>, f32) { 373 %t2 = tensor.insert %f2 into %t1[%idx] : tensor<?xf32> 374 scf.yield %f1, %t2, %f2 : f32, tensor<?xf32>, f32 375 } 376 377 // CHECK: return %{{.*}}, %{{.*}} : f32, f32 378 return %0, %1, %2 : f32, tensor<?xf32>, f32 379} 380 381// ----- 382 383// CHECK: func private @some_external_func(memref<?xf32, strided<[?], offset: ?>>) 384func.func private @some_external_func(tensor<?xf32>) 385 386// CHECK: func @scf_for_with_tensor_insert_slice( 387// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> 388// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> 389// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>> 390func.func @scf_for_with_tensor_insert_slice( 391 %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>, 392 %lb : index, %ub : index, %step : index) 393 -> (tensor<?xf32>, tensor<?xf32>) 394{ 395 // CHECK-NEXT: scf.for 396 %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) 397 -> (tensor<?xf32>, tensor<?xf32>) 398 { 399 // CHECK-NEXT: %[[SVA:.*]] = memref.subview %[[A]] 400 // CHECK-NEXT: memref.copy %[[C]], %[[SVA]] : memref<4xf32, strided<[?], offset: ?>> to memref<4xf32, strided<[?], offset: ?>> 401 %ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor<?xf32> 402 403 // CHECK-NEXT: %[[SVB:.*]] = memref.subview %[[B]] 404 // CHECK-NEXT: memref.copy %[[C]], %[[SVB]] : memref<4xf32, strided<[?], offset: ?>> to memref<4xf32, strided<[?], offset: ?>> 405 %ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor<?xf32> 406 407 // scf.yield is empty and is elided 408 // CHECK-NOT: scf.yield 409 scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32> 410 } 411 412 // Swaparoo requires bufferizing the whole function to figure out who's who. 413 return %r0#1, %r0#0: tensor<?xf32>, tensor<?xf32> 414} 415 416// CHECK: func @bar( 417// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> 418// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> 419// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>> 420func.func @bar( 421 %A : tensor<?xf32> {bufferization.writable = true}, 422 %B : tensor<?xf32> {bufferization.writable = true}, 423 %C : tensor<4xf32> {bufferization.writable = true}, 424 %lb : index, %ub : index, %step : index) 425 -> (tensor<?xf32>, tensor<?xf32>) 426{ 427// CHECK-DAG: call @scf_for_with_tensor_insert_slice(%[[A]], %[[B]], %[[C]] 428 %r0:2 = call @scf_for_with_tensor_insert_slice(%A, %B, %C, %lb, %ub, %step) : 429 (tensor<?xf32>, tensor<?xf32>, tensor<4xf32>, index, index, index) 430 -> (tensor<?xf32>, tensor<?xf32>) 431 432 // %r0#0 requires a copy because we have no idea what the function is doing. 433// CHECK-DAG: %[[alloc:.*]] = memref.alloc 434// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 435// CHECK-DAG: memref.copy %[[B]], %[[alloc]] 436// CHECK-NEXT: call @some_external_func(%[[casted]]) : (memref<?xf32, strided<[?], offset: ?>>) -> () 437 call @some_external_func(%r0#0) : (tensor<?xf32>) -> () 438 439// CHECK: return 440 return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32> 441} 442 443// ----- 444 445// CHECK: func @init_and_dot( 446// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>> 447// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>> 448// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<f32, strided<[], offset: ?>> 449func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> { 450 // CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32 451 %v0 = arith.constant 0.0 : f32 452 453 // CHECK-NEXT: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32, strided<[], offset: ?>>) 454 %d = linalg.fill ins(%v0 : f32) outs(%c : tensor<f32>) -> tensor<f32> 455 456 // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) outs(%[[C]] : memref<f32, strided<[], offset: ?>>) 457 %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>) 458 outs(%d: tensor<f32>) -> tensor<f32> 459 460 // CHECK-NEXT: return 461 return %e : tensor<f32> 462} 463 464// CHECK: func @main() 465func.func @main() { 466 // CHECK-DAG: %[[C0:.*]] = arith.constant 0{{.*}} : f32 467 // CHECK-DAG: %[[C1:.*]] = arith.constant 1{{.*}} : f32 468 // CHECK-DAG: %[[C2:.*]] = arith.constant 2{{.*}} : f32 469 %v0 = arith.constant 0.0 : f32 470 %v1 = arith.constant 1.0 : f32 471 %v2 = arith.constant 2.0 : f32 472 473 // CHECK-NEXT: %[[A:.*]] = memref.alloc() {alignment = 64 : i64} : memref<64xf32> 474 // CHECK-NEXT: %[[B:.*]] = memref.alloc() {alignment = 64 : i64} : memref<64xf32> 475 // CHECK-NEXT: %[[C:.*]] = memref.alloc() {alignment = 64 : i64} : memref<f32> 476 // CHECK-DAG: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, strided<[?], offset: ?>> 477 // CHECK-DAG: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, strided<[?], offset: ?>> 478 // CHECK-DAG: %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, strided<[], offset: ?>> 479 %A = bufferization.alloc_tensor() : tensor<64xf32> 480 %B = bufferization.alloc_tensor() : tensor<64xf32> 481 %C = bufferization.alloc_tensor() : tensor<f32> 482 483 // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) 484 // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) 485 // CHECK-DAG: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32>) 486 %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> 487 %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> 488 %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32> 489 490 // CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]]) 491 %res = call @init_and_dot(%AA, %BB, %CC) : 492 (tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32> 493 494 // CHECK-NEXT: %[[dC:.*]] = memref.cast %[[cC]] : memref<f32, {{.*}}> to memref<*xf32> 495 %res2 = tensor.cast %res: tensor<f32> to tensor<*xf32> 496 497 // CHECK-NEXT: call @printMemrefF32(%[[dC]]) : (memref<*xf32>) -> () 498 call @printMemrefF32(%res2) : (tensor<*xf32>) -> () 499 500 // CHECK-NEXT: return 501 return 502} 503 504// CHECK: func private @printMemrefF32(memref<*xf32>) 505func.func private @printMemrefF32(tensor<*xf32>) 506 507// ----- 508 509// CHECK: func private @external_func(memref<?xf32, strided<[?], offset: ?>>) 510func.func private @external_func(tensor<?xf32>) 511 512// CHECK: func @callee( 513// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<?xf32> 514// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<?xf32, strided<[?], offset: ?>> 515// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<?xf32, strided<[?], offset: ?>> 516func.func @callee( 517 %A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>}, 518 %B : tensor<?xf32>, 519 %C : tensor<?xf32>) { 520// CHECK-NEXT: %[[CASTED:.*]] = memref.cast %[[A]] : memref<?xf32> to memref<?xf32, strided<[?], offset: ?>> 521// CHECK-NEXT: call @external_func(%[[CASTED]]) : (memref<?xf32, strided<[?], offset: ?>>) -> () 522 call @external_func(%A) : (tensor<?xf32>) -> () 523 524// CHECK-NEXT: call @external_func(%[[B]]) : (memref<?xf32, strided<[?], offset: ?>>) -> () 525 call @external_func(%B) : (tensor<?xf32>) -> () 526 527// CHECK-NEXT: call @external_func(%[[C]]) : (memref<?xf32, strided<[?], offset: ?>>) -> () 528 call @external_func(%C) : (tensor<?xf32>) -> () 529 530 return 531} 532 533// CHECK: func @entry( 534// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<?xf32> 535// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<?xf32> 536// CHECK-SAME: %[[C:[0-9a-zA-Z]*]]: memref<?xf32, strided<[?], offset: ?>> 537func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false}, 538 %B : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false}, 539 %C : tensor<?xf32> {bufferization.writable = false}) { 540// Note: `callee` does not write to its bbArg directly, but `external_func` 541// does. Inside `callee`, the writes via `external_func` do not cause a 542// conflict. However, inside `entry`, the writes do cause a conflict because 543// %A, %B and %C are not inplaceable. This test case shows that this kind of 544// conflict detection has a "transitive" nature. 545// CHECK-DAG: %[[ALLOC_A:.*]] = memref.alloc 546// CHECK-DAG: %[[CASTED_A:.*]] = memref.cast %[[ALLOC_A]] 547// CHECK-DAG: %[[ALLOC_B:.*]] = memref.alloc 548// CHECK-DAG: %[[CASTED_B:.*]] = memref.cast %[[ALLOC_B]] 549// CHECK-DAG: %[[ALLOC_C:.*]] = memref.alloc 550// CHECK-DAG: %[[CASTED_C:.*]] = memref.cast %[[ALLOC_C]] 551// CHECK-DAG: memref.copy %[[A]], %[[ALLOC_A]] 552// CHECK-DAG: memref.copy %[[B]], %[[ALLOC_B]] 553// CHECK-DAG: memref.copy %[[C]], %[[ALLOC_C]] 554// CHECK-NEXT: call @callee(%[[CASTED_A]], %[[CASTED_B]], %[[CASTED_C]]) 555 call @callee(%A, %B, %C) : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> () 556 return 557} 558 559// ----- 560 561// No alloc or copy inside of the loop. 562 563// CHECK-LABEL: func @inner_func( 564// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 565func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { 566 %f = arith.constant 1.0 : f32 567 %c0 = arith.constant 0 : index 568 // CHECK: memref.store %{{.*}}, %[[arg0]] 569 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 570 return %0 : tensor<?xf32> 571} 572 573// CHECK-LABEL: func @equivalent_func_arg( 574// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 575func.func @equivalent_func_arg(%t0: tensor<?xf32> {bufferization.writable = true}, 576 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> { 577 // CHECK-NOT: alloc 578 // CHECK-NOT: copy 579 // CHECK: scf.for {{.*}} iter_args(%[[t1:.*]] = %[[arg0]]) 580 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 581 // CHECK: call @inner_func(%[[t1]]) 582 %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32> 583 // CHECK: scf.yield %[[t1]] 584 scf.yield %3 : tensor<?xf32> 585 } 586 return %1: tensor<?xf32> 587} 588 589// ----- 590 591// inner_func_2 modifies the bbArg, but the loop yields the original value. A 592// buffer copy must be inserted inside the loop. 593 594// CHECK-LABEL: func @inner_func_2( 595// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 596func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> { 597 %f = arith.constant 1.0 : f32 598 %c0 = arith.constant 0 : index 599 // CHECK: memref.store %{{.*}}, %[[arg0]] 600 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 601 return %0 : tensor<?xf32> 602} 603 604// CHECK-LABEL: func @equivalent_func_arg_2( 605// CHECK-SAME: %[[arg0:.*]]: memref<?xf32 606func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = true}, 607 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> { 608 // CHECK: scf.for {{.*}} { 609 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 610 // CHECK: %[[alloc:.*]] = memref.alloc 611 // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]] 612 // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]] 613 // CHECK: call @inner_func_2(%[[casted]]) 614 // CHECK-NOT: scf.yield 615 %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32> 616 scf.yield %t1 : tensor<?xf32> 617 } 618 return %1: tensor<?xf32> 619} 620 621// ----- 622 623// Bufferize without fully dynamic layout maps. 624 625// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, strided{{.*}}>) -> vector<4xf32> { 626// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32> 627func.func @transfer_read( 628 %A : tensor<?xf32> {bufferization.writable = false}) 629 -> (vector<4xf32>) 630{ 631 %c0 = arith.constant 0 : index 632 %f0 = arith.constant 0.0 : f32 633 634// CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, strided{{.*}}>, vector<4xf32> 635 %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32> 636 637// CHECK: return %[[RES]] : vector<4xf32> 638 return %0 : vector<4xf32> 639} 640 641// ----- 642 643// CHECK-LABEL: func @main( 644func.func @main() { 645 // CHECK: %[[const:.*]] = memref.get_global 646 %t = arith.constant dense<[1.0, 2.0, 3.0]> : tensor<3xf32> 647 // CHECK: %[[alloc:.*]] = memref.alloc 648 // CHECK: memref.copy %[[const]], %[[alloc]] 649 // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<3xf32> to memref<*xf32> 650 %unranked = tensor.cast %t : tensor<3xf32> to tensor<*xf32> 651 // CHECK: call @maybe_writing_func(%[[casted]]) 652 func.call @maybe_writing_func(%unranked) : (tensor<*xf32>) -> () 653 return 654} 655 656// This function may write to buffer(%ptr). 657func.func private @maybe_writing_func(%ptr : tensor<*xf32>) 658 659// ----- 660 661// Test if other callables are left intact and don't cause trouble. 662 663llvm.func @llvm_func() 664 665func.func @call_llvm_func() { 666 llvm.call @llvm_func() : () -> () 667 return 668} 669 670// ----- 671 672// CHECK-LABEL: func @to_memref_op_unsupported( 673// CHECK-SAME: %[[arg0:.*]]: memref<?xf32, 674func.func @to_memref_op_unsupported( 675 %t1: tensor<?xf32> {bufferization.writable = true}, %idx1: index, 676 %idx2: index, %idx3: index, %v1: vector<5xf32>) -> (vector<5xf32>) { 677 678 // Insert a copy because we cannot analyze what happens with the result of a 679 // to_memref op. 680 // CHECK: %[[alloc:.*]] = memref.alloc 681 // CHECK: memref.copy %[[arg0]], %[[alloc]] 682 %0 = bufferization.to_memref %t1 : tensor<?xf32> to memref<?xf32> 683 // CHECK: "test.foo"(%[[alloc]]) 684 "test.foo"(%0) : (memref<?xf32>) -> () 685 686 // CHECK: vector.transfer_read %[[arg0]] 687 %cst = arith.constant 0.0 : f32 688 %r1 = vector.transfer_read %t1[%idx3], %cst : tensor<?xf32>, vector<5xf32> 689 690 return %r1 : vector<5xf32> 691} 692 693// ----- 694 695// Note: The cf.br canonicalizes away, so there's nothing to check here. There 696// is a detailed test in ControlFlow/bufferize.mlir. 697 698// CHECK-LABEL: func @br_in_func( 699func.func @br_in_func(%t: tensor<5xf32>) -> tensor<5xf32> { 700 cf.br ^bb1(%t : tensor<5xf32>) 701^bb1(%arg1 : tensor<5xf32>): 702 func.return %arg1 : tensor<5xf32> 703} 704 705// ----- 706 707// Cyclic call graphs with tensors are not supported by One-Shot Bufferize. 708// However, if a function signature does not have any tensor arguments or 709// results, calls to that function are not seen as an "edge" in the fuction 710// call graph. 711 712// CHECK-LABEL: func.func @foo(%{{.*}}: memref<5xf32>) -> memref<5xf32> 713func.func @foo(%m: memref<5xf32>) -> memref<5xf32> { 714 %0 = tensor.empty() : tensor<5xf32> 715 %1 = func.call @bar(%0, %m) 716 : (tensor<5xf32>, memref<5xf32>) -> (memref<5xf32>) 717 return %1 : memref<5xf32> 718} 719 720// CHECK: func.func @bar(%{{.*}}: memref<5xf32, strided<[?], offset: ?>>, %arg1: memref<5xf32>) -> memref<5xf32> 721func.func @bar(%t: tensor<5xf32>, %m: memref<5xf32>) -> memref<5xf32> { 722 %0 = func.call @foo(%m) : (memref<5xf32>) -> (memref<5xf32>) 723 return %0 : memref<5xf32> 724} 725 726// ----- 727 728// A recursive function. 729 730// CHECK-LABEL: func.func @foo( 731// CHECK-SAME: %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>> { 732func.func @foo(%t: tensor<5xf32>) -> tensor<5xf32> { 733 // We are conservative around recursive functions. The analysis cannot handle 734 // them, so we have to assume the op operand of the call op bufferizes to a 735 // memory read and write. This causes a copy in this test case. 736 // CHECK: %[[copy:.*]] = memref.alloc() {alignment = 64 : i64} : memref<5xf32> 737 // CHECK: memref.copy %[[arg0]], %[[copy]] 738 // CHECK: %[[cast:.*]] = memref.cast %[[copy]] : memref<5xf32> to memref<5xf32, strided<[?], offset: ?>> 739 // CHECK: %[[call:.*]] = call @foo(%[[cast]]) 740 %0 = call @foo(%t) : (tensor<5xf32>) -> (tensor<5xf32>) 741 742 // CHECK: memref.load %[[arg0]] 743 %c0 = arith.constant 0 : index 744 %extr = tensor.extract %t[%c0] : tensor<5xf32> 745 vector.print %extr : f32 746 747 // CHECK: return %[[call]] 748 return %0 : tensor<5xf32> 749} 750 751// ----- 752 753// Two functions calling each other recursively. 754 755// CHECK-LABEL: func.func @foo( 756// CHECK-SAME: %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>> { 757// CHECK: %[[call:.*]] = call @bar(%[[arg0]]) : (memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>> 758// CHECK: return %[[call]] 759// CHECK: } 760func.func @foo(%t: tensor<5xf32>) -> tensor<5xf32> { 761 %0 = call @bar(%t) : (tensor<5xf32>) -> (tensor<5xf32>) 762 return %0 : tensor<5xf32> 763} 764 765// CHECK-LABEL: func.func @bar( 766// CHECK-SAME: %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>> { 767// CHECK: %[[call:.*]] = call @foo(%[[arg0]]) : (memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>> 768// CHECK: return %[[call]] 769// CHECK: } 770func.func @bar(%t: tensor<5xf32>) -> tensor<5xf32>{ 771 %0 = call @foo(%t) : (tensor<5xf32>) -> (tensor<5xf32>) 772 return %0 : tensor<5xf32> 773} 774 775// ----- 776 777// The two func.return operands have different types after bufferization. Make 778// sure that memref.cast ops are inserted. 779 780// CHECK-LABEL: func @result_type_mismatch({{.*}}) -> memref<5xf32, strided<[?], offset: ?>> 781func.func @result_type_mismatch(%c: i1) -> tensor<5xf32> { 782 // CHECK: %[[alloc:.*]] = memref.alloc() {alignment = 64 : i64} : memref<10xf32> 783 %t = tensor.empty() : tensor<10xf32> 784 cf.cond_br %c, ^bb1, ^bb2 785^bb1: 786 // CHECK: %[[m0:.*]] = memref.subview %[[alloc]][0] [5] [2] : memref<10xf32> to memref<5xf32, strided<[2]>> 787 // CHECK: %[[cast0:.*]] = memref.cast %[[m0]] : memref<5xf32, strided<[2]>> to memref<5xf32, strided<[?], offset: ?>> 788 %0 = tensor.extract_slice %t[0][5][2] : tensor<10xf32> to tensor<5xf32> 789 // CHECK: return %[[cast0]] : memref<5xf32, strided<[?], offset: ?> 790 return %0 : tensor<5xf32> 791^bb2: 792 // CHECK: %[[m1:.*]] = memref.subview %[[alloc]][2] [5] [1] : memref<10xf32> to memref<5xf32, strided<[1], offset: 2>> 793 // CHECK: %[[cast1:.*]] = memref.cast %[[m1]] : memref<5xf32, strided<[1], offset: 2>> to memref<5xf32, strided<[?], offset: ?>> 794 %1 = tensor.extract_slice %t[2][5][1] : tensor<10xf32> to tensor<5xf32> 795 // CHECK: return %[[cast1]] : memref<5xf32, strided<[?], offset: ?>> 796 return %1 : tensor<5xf32> 797} 798 799