1// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=fully-dynamic-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s 2// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT 3// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=infer-layout-map" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE 4 5// Note: function-boundary-type-conversion=infer-layout-map with 6// promote-buffer-results-to-out-params is an unsupported combination. 7 8// Note: This bufferization is not very efficient yet, but it works. 9 10// CHECK-LABEL: func @callee( 11// CHECK-SAME: %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>, 12// CHECK-SAME: %[[arg1:.*]]: memref<5xf32, strided<[?], offset: ?>>) { 13// This alloc is not needed, but it is inserted due to the out-of-place 14// bufferization of the tensor.insert. With a better layering of the out param 15// promotion pass, this alloc could be avoided. 16// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> 17// CHECK: memref.copy %[[arg0]], %[[alloc]] 18// CHECK: memref.store %{{.*}}, %[[alloc]] 19// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] 20// CHECK: memref.copy %[[casted]], %[[arg1]] 21// CHECK: return 22// CHECK: } 23 24// CHECK-NO-LAYOUT-LABEL: func @callee( 25// CHECK-NO-LAYOUT-SAME: %[[arg0:.*]]: memref<5xf32>, 26// CHECK-NO-LAYOUT-SAME: %[[arg1:.*]]: memref<5xf32>) { 27// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> 28// CHECK-NO-LAYOUT: memref.copy %[[arg0]], %[[alloc]] 29// CHECK-NO-LAYOUT: memref.store {{.*}}, %[[alloc]] 30// CHECK-NO-LAYOUT: memref.copy %[[alloc]], %[[arg1]] 31 32// CHECK-BASELINE-LABEL: func @callee( 33// CHECK-BASELINE-SAME: %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32> { 34// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> 35// CHECK-BASELINE: memref.copy %[[arg0]], %[[alloc]] 36// CHECK-BASELINE: memref.store {{.*}}, %[[alloc]] 37// CHECK-BASELINE: return %[[alloc]] 38func.func @callee(%t: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) { 39 %c0 = arith.constant 0 : index 40 %cst = arith.constant 8.0 : f32 41 // This must bufferize out-of-place. 42 %1 = tensor.insert %cst into %t[%c0] : tensor<5xf32> 43 // Instead of returning %1, copy into new out param. %t will disappear 44 // entirely because the buffer is equivalent to a bbArg. 45 return %t, %1 : tensor<5xf32>, tensor<5xf32> 46} 47 48// CHECK: func @main(%[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> (f32, f32) { 49// CHECK: %[[alloc:.*]] = memref.alloc() : memref<5xf32> 50// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<5xf32> to memref<5xf32, strided<[?], offset: ?>> 51// CHECK: call @callee(%[[arg0]], %[[casted]]) 52// CHECK: %[[l1:.*]] = memref.load %[[arg0]] 53// CHECK: %[[l2:.*]] = memref.load %[[casted]] 54// CHECK: return %[[l1]], %[[l2]] 55// CHECK: } 56 57// CHECK-NO-LAYOUT-LABEL: func @main(%{{.*}}: memref<5xf32>) -> (f32, f32) { 58// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() : memref<5xf32> 59// CHECK-NO-LAYOUT: call @callee(%{{.*}}, %[[alloc]]) 60func.func @main(%t: tensor<5xf32>) -> (f32, f32) { 61 %c0 = arith.constant 0 : index 62 %0, %1 = func.call @callee(%t) 63 : (tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) 64 %2 = tensor.extract %0[%c0] : tensor<5xf32> 65 %3 = tensor.extract %1[%c0] : tensor<5xf32> 66 return %2, %3 : f32, f32 67} 68 69// ----- 70 71// CHECK-LABEL: func @callee( 72// CHECK-SAME: %{{.*}}: index, 73// CHECK-SAME: %[[r:.*]]: memref<2x5xf32, strided<[?, ?], offset: ?>>) { 74// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32> 75// CHECK: %[[subview:.*]] = memref.subview %[[alloc]]{{.*}} : memref<10x20xf32> to memref<2x5xf32, strided<[20, 1], offset: ?>> 76// CHECK: %[[casted:.*]] = memref.cast %[[subview]] 77// CHECK: memref.copy %[[casted]], %[[r]] 78 79// CHECK-NO-LAYOUT-LABEL: func @callee( 80// CHECK-NO-LAYOUT-SAME: %{{.*}}: index, 81// CHECK-NO-LAYOUT-SAME: %[[r:.*]]: memref<2x5xf32>) { 82// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32> 83// CHECK-NO-LAYOUT: %[[subview:.*]] = memref.subview %[[alloc]] 84// Note: This alloc is not needed, but it is inserted before the returned buffer 85// is promoted to an out param to reconcile mismatching layout maps on return 86// value and function signature. 87// CHECK-NO-LAYOUT: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<2x5xf32> 88// CHECK-NO-LAYOUT: memref.copy %[[subview]], %[[alloc2]] 89// CHECK-NO-LAYOUT: memref.copy %[[alloc2]], %[[r]] 90 91// CHECK-BASELINE-LABEL: func @callee( 92// CHECK-BASELINE-SAME: %{{.*}}: index) -> memref<2x5xf32, strided<[20, 1], offset: ?>> { 93// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32> 94// CHECK-BASELINE: %[[subview:.*]] = memref.subview %[[alloc]] 95// CHECK-BASELINE: return %[[subview]] 96func.func @callee(%idx: index) -> tensor<2x5xf32> { 97 %0 = bufferization.alloc_tensor() : tensor<10x20xf32> 98 %1 = tensor.extract_slice %0[%idx, %idx][2, 5][1, 1] : tensor<10x20xf32> to tensor<2x5xf32> 99 return %1 : tensor<2x5xf32> 100} 101 102// CHECK: func @main( 103// CHECK: %[[alloc:.*]] = memref.alloc() : memref<2x5xf32> 104// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<2x5xf32> to memref<2x5xf32, strided<[?, ?], offset: ?>> 105// CHECK: call @callee(%{{.*}}, %[[casted]]) 106// CHECK: memref.load %[[casted]] 107 108// CHECK-NO-LAYOUT: func @main( 109// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() : memref<2x5xf32> 110// CHECK-NO-LAYOUT: call @callee(%{{.*}}, %[[alloc]]) 111// CHECK-NO-LAYOUT: memref.load %[[alloc]] 112 113// CHECK-BASELINE: func @main( 114// CHECK-BASELINE: %[[call:.*]] = call @callee 115// CHECK-BASELINE: memref.load %[[call]] 116func.func @main(%idx: index) -> f32 { 117 %c0 = arith.constant 0 : index 118 %0 = func.call @callee(%idx) : (index) -> (tensor<2x5xf32>) 119 %1 = tensor.extract %0[%c0, %c0] : tensor<2x5xf32> 120 return %1 : f32 121} 122