1// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=fully-dynamic-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s
2// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT
3// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=infer-layout-map" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE
4
5// Note: function-boundary-type-conversion=infer-layout-map with
6// promote-buffer-results-to-out-params is an unsupported combination.
7
8// Note: This bufferization is not very efficient yet, but it works.
9
10// CHECK-LABEL: func @callee(
11//  CHECK-SAME:              %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>,
12//  CHECK-SAME:              %[[arg1:.*]]: memref<5xf32, strided<[?], offset: ?>>) {
13// This alloc is not needed, but it is inserted due to the out-of-place
14// bufferization of the tensor.insert. With a better layering of the out param
15// promotion pass, this alloc could be avoided.
16//       CHECK:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
17//       CHECK:   memref.copy %[[arg0]], %[[alloc]]
18//       CHECK:   memref.store %{{.*}}, %[[alloc]]
19//       CHECK:   %[[casted:.*]] = memref.cast %[[alloc]]
20//       CHECK:   memref.copy %[[casted]], %[[arg1]]
21//       CHECK:   return
22//       CHECK: }
23
24// CHECK-NO-LAYOUT-LABEL: func @callee(
25//  CHECK-NO-LAYOUT-SAME:     %[[arg0:.*]]: memref<5xf32>,
26//  CHECK-NO-LAYOUT-SAME:     %[[arg1:.*]]: memref<5xf32>) {
27//       CHECK-NO-LAYOUT:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
28//       CHECK-NO-LAYOUT:   memref.copy %[[arg0]], %[[alloc]]
29//       CHECK-NO-LAYOUT:   memref.store {{.*}}, %[[alloc]]
30//       CHECK-NO-LAYOUT:   memref.copy %[[alloc]], %[[arg1]]
31
32// CHECK-BASELINE-LABEL: func @callee(
33//  CHECK-BASELINE-SAME:     %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32> {
34//       CHECK-BASELINE:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
35//       CHECK-BASELINE:   memref.copy %[[arg0]], %[[alloc]]
36//       CHECK-BASELINE:   memref.store {{.*}}, %[[alloc]]
37//       CHECK-BASELINE:   return %[[alloc]]
38func.func @callee(%t: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) {
39  %c0 = arith.constant 0 : index
40  %cst = arith.constant 8.0 : f32
41  // This must bufferize out-of-place.
42  %1 = tensor.insert %cst into %t[%c0] : tensor<5xf32>
43  // Instead of returning %1, copy into new out param. %t will disappear
44  // entirely because the buffer is equivalent to a bbArg.
45  return %t, %1 : tensor<5xf32>, tensor<5xf32>
46}
47
48// CHECK: func @main(%[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> (f32, f32) {
49// CHECK:   %[[alloc:.*]] = memref.alloc() : memref<5xf32>
50// CHECK:   %[[casted:.*]] = memref.cast %[[alloc]] : memref<5xf32> to memref<5xf32, strided<[?], offset: ?>>
51// CHECK:   call @callee(%[[arg0]], %[[casted]])
52// CHECK:   %[[l1:.*]] = memref.load %[[arg0]]
53// CHECK:   %[[l2:.*]] = memref.load %[[casted]]
54// CHECK:   return %[[l1]], %[[l2]]
55// CHECK: }
56
57// CHECK-NO-LAYOUT-LABEL: func @main(%{{.*}}: memref<5xf32>) -> (f32, f32) {
58//       CHECK-NO-LAYOUT:   %[[alloc:.*]] = memref.alloc() : memref<5xf32>
59//       CHECK-NO-LAYOUT:   call @callee(%{{.*}}, %[[alloc]])
60func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
61  %c0 = arith.constant 0 : index
62  %0, %1 = func.call @callee(%t)
63      : (tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>)
64  %2 = tensor.extract %0[%c0] : tensor<5xf32>
65  %3 = tensor.extract %1[%c0] : tensor<5xf32>
66  return %2, %3 : f32, f32
67}
68
69// -----
70
71// CHECK-LABEL: func @callee(
72//  CHECK-SAME:     %{{.*}}: index,
73//  CHECK-SAME:     %[[r:.*]]: memref<2x5xf32, strided<[?, ?], offset: ?>>) {
74//       CHECK:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
75//       CHECK:   %[[subview:.*]] = memref.subview %[[alloc]]{{.*}} : memref<10x20xf32> to memref<2x5xf32, strided<[20, 1], offset: ?>>
76//       CHECK:   %[[casted:.*]] = memref.cast %[[subview]]
77//       CHECK:   memref.copy %[[casted]], %[[r]]
78
79// CHECK-NO-LAYOUT-LABEL: func @callee(
80//  CHECK-NO-LAYOUT-SAME:              %{{.*}}: index,
81//  CHECK-NO-LAYOUT-SAME:              %[[r:.*]]: memref<2x5xf32>) {
82//       CHECK-NO-LAYOUT:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
83//       CHECK-NO-LAYOUT:   %[[subview:.*]] = memref.subview %[[alloc]]
84// Note: This alloc is not needed, but it is inserted before the returned buffer
85// is promoted to an out param to reconcile mismatching layout maps on return
86// value and function signature.
87//       CHECK-NO-LAYOUT:   %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<2x5xf32>
88//       CHECK-NO-LAYOUT:   memref.copy %[[subview]], %[[alloc2]]
89//       CHECK-NO-LAYOUT:   memref.copy %[[alloc2]], %[[r]]
90
91// CHECK-BASELINE-LABEL: func @callee(
92//  CHECK-BASELINE-SAME:     %{{.*}}: index) -> memref<2x5xf32, strided<[20, 1], offset: ?>> {
93//       CHECK-BASELINE:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
94//       CHECK-BASELINE:   %[[subview:.*]] = memref.subview %[[alloc]]
95//       CHECK-BASELINE:   return %[[subview]]
96func.func @callee(%idx: index) -> tensor<2x5xf32> {
97  %0 = bufferization.alloc_tensor() : tensor<10x20xf32>
98  %1 = tensor.extract_slice %0[%idx, %idx][2, 5][1, 1] : tensor<10x20xf32> to tensor<2x5xf32>
99  return %1 : tensor<2x5xf32>
100}
101
102// CHECK: func @main(
103// CHECK:   %[[alloc:.*]] = memref.alloc() : memref<2x5xf32>
104// CHECK:   %[[casted:.*]] = memref.cast %[[alloc]] : memref<2x5xf32> to memref<2x5xf32, strided<[?, ?], offset: ?>>
105// CHECK:   call @callee(%{{.*}}, %[[casted]])
106// CHECK:   memref.load %[[casted]]
107
108// CHECK-NO-LAYOUT: func @main(
109// CHECK-NO-LAYOUT:   %[[alloc:.*]] = memref.alloc() : memref<2x5xf32>
110// CHECK-NO-LAYOUT:   call @callee(%{{.*}}, %[[alloc]])
111// CHECK-NO-LAYOUT:   memref.load %[[alloc]]
112
113// CHECK-BASELINE: func @main(
114// CHECK-BASELINE:   %[[call:.*]] = call @callee
115// CHECK-BASELINE:   memref.load %[[call]]
116func.func @main(%idx: index) -> f32 {
117  %c0 = arith.constant 0 : index
118  %0 = func.call @callee(%idx) : (index) -> (tensor<2x5xf32>)
119  %1 = tensor.extract %0[%c0, %c0] : tensor<2x5xf32>
120  return %1 : f32
121}
122