xref: /llvm-project/mlir/test/Dialect/Linalg/bufferize.mlir (revision ced2fc7819d5ddea616ec330f18e08ff284c1868)
1// RUN: mlir-opt --one-shot-bufferize="dialect-filter=linalg,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -canonicalize -cse -split-input-file %s | FileCheck %s
2
3#map0 = affine_map<(d0) -> (d0)>
4
5// In-depth checking of a basic case, this is testing
6// - bufferization.to_memref / bufferization.to_tensor materializations are
7//   properly inserted
8// - payload is correctly carried over
9// - affine maps are correctly carried over
10// Later tests will not check all these details.
11
12// CHECK: #map = affine_map<(d0) -> (d0)>
13// CHECK-LABEL:   func @basic(
14// CHECK-SAME:                %[[TENSOR:.*]]: tensor<4xf32>) -> tensor<4xf32> {
15// CHECK-DAG:       %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor<4xf32> to memref<4xf32>
16// CHECK-DAG:       %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32>
17// CHECK:           linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]}
18// CHECK-SAME:      ins(%[[MEMREF]] : memref<4xf32>)
19// CHECK-SAME:      outs(%[[RESULT_MEMREF]] : memref<4xf32>) {
20// CHECK:           ^bb0(%[[RESULT1:.*]]: f32, %[[UNUSED:.*]]: f32):
21// CHECK:             %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32
22// CHECK:             linalg.yield %[[DIM1]] : f32
23// CHECK:           }
24// CHECK:           %[[RESULT:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref<4xf32>
25// CHECK:           return %[[RESULT]] : tensor<4xf32>
26func.func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> {
27    %0 = linalg.generic {
28      indexing_maps = [#map0, #map0],
29      iterator_types = ["parallel"]
30    } ins(%arg0 : tensor<4xf32>)
31      outs(%arg0 : tensor<4xf32>) {
32      ^bb0(%gen_arg1: f32, %out: f32):
33        %tmp1 = math.exp %gen_arg1 : f32
34        linalg.yield %tmp1 : f32
35    } -> tensor<4xf32>
36    return %0 : tensor<4xf32>
37}
38
39
40// -----
41
42#map0 = affine_map<(d0) -> (d0)>
43
44// Same as above but with tensor.empty op.
45
46// CHECK: #map = affine_map<(d0) -> (d0)>
47// CHECK-LABEL: func @empty_tensor(
48// CHECK-SAME:      %[[IN:.*]]: tensor<?xf32>, %[[SIZE:.*]]: index)
49// CHECK-DAG:     %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : tensor<?xf32> to memref<?xf32>
50// CHECK-DAG:     %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref<?xf32>
51// CHECK:         linalg.generic
52// CHECK-SAME:    ins(%[[MEMREF]] : memref<?xf32>)
53// CHECK-SAME:    outs(%[[OUT_BUF]] : memref<?xf32>) {
54func.func @empty_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
55  %init = tensor.empty(%size) : tensor<?xf32>
56  %0 = linalg.generic {
57    indexing_maps = [#map0, #map0],
58    iterator_types = ["parallel"]
59  } ins(%in : tensor<?xf32>)
60    outs(%init : tensor<?xf32>) {
61    ^bb0(%gen_arg1: f32, %out: f32):
62      %tmp1 = math.exp %gen_arg1 : f32
63      linalg.yield %tmp1 : f32
64  } -> tensor<?xf32>
65  return %0 : tensor<?xf32>
66}
67
68
69// -----
70
71#map0 = affine_map<(d0) -> (d0)>
72
73// CHECK-LABEL:   func @multiple_results
74// CHECK:           %[[RESULT0:.*]] = memref.alloc() {{.*}} : memref<4xf32>
75// CHECK:           %[[RESULT1:.*]] = memref.alloc() {{.*}} : memref<4xf32>
76// CHECK:           linalg.generic
77// CHECK-SAME:      ins(%{{.*}} : memref<4xf32>)
78// CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
79// CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32):
80func.func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
81    %0, %1 = linalg.generic {
82      indexing_maps = [#map0, #map0, #map0],
83      iterator_types = ["parallel"]
84    } ins(%arg0 : tensor<4xf32>)
85      outs (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) {
86      ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32):
87        %tmp1 = math.exp %gen_arg1 : f32
88        linalg.yield %tmp1, %tmp1 : f32, f32
89    } -> (tensor<4xf32>, tensor<4xf32>)
90    return %0, %1 : tensor<4xf32>, tensor<4xf32>
91}
92
93// -----
94
95#map_2d = affine_map<(d0, d1) -> (d0, d1)>
96
97// Check that the allocs properly consider the different shapes of the output
98// operands. The permuted indexing maps translate to different output shapes.
99
100// CHECK-LABEL:   func @dynamic_results(
101// CHECK-SAME:                          %[[ARG:.*]]: tensor<?x?xf32>
102// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
103// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
104// CHECK-DAG:       %[[DIM0:.*]] = tensor.dim %[[ARG]], %[[C0]] : tensor<?x?xf32>
105// CHECK-DAG:       %[[DIM1:.*]] = tensor.dim %[[ARG]], %[[C1]] : tensor<?x?xf32>
106// CHECK-DAG:       %[[RESULT0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref<?x?xf32>
107// CHECK-DAG:       %[[RESULT1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref<?x?xf32>
108// CHECK-DAG:       %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : tensor<?x?xf32> to memref<?x?xf32>
109// CHECK:           linalg.generic
110// CHECK-SAME:      ins(%[[MEMREF_ARG]] : memref<?x?xf32>)
111// CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>)
112func.func @dynamic_results(%arg0: tensor<?x?xf32>)
113         -> (tensor<?x?xf32>, tensor<?x?xf32>) {
114    %0, %1 = linalg.generic {
115      indexing_maps = [#map_2d, #map_2d, #map_2d],
116      iterator_types = ["parallel", "parallel"]
117    } ins(%arg0 : tensor<?x?xf32>)
118      outs (%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
119      ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32):
120        %tmp1 = math.exp %gen_arg1 : f32
121        linalg.yield %tmp1, %tmp1 : f32, f32
122    } -> (tensor<?x?xf32>, tensor<?x?xf32>)
123    return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
124}
125
126// -----
127
128#accesses = [
129  affine_map<(i, j, k) -> (j, i, k)>,
130  affine_map<(i, j, k) -> (i, j)>
131]
132
133#trait = {
134  indexing_maps = #accesses,
135  iterator_types = ["parallel", "parallel", "reduction"]
136}
137
138// Check the bufferization of init tensors.
139
140// CHECK-LABEL:   func @generic_with_init_tensor(
141// CHECK-SAME:                                   %[[ARG0_TENSOR:.*]]: tensor<2x3x4xvector<3x4xi4>>,
142// CHECK-SAME:                                   %[[ARG1_TENSOR:.*]]: tensor<3x2xf32>) -> tensor<3x2xf32> {
143// CHECK-DAG:       %[[INIT_BUFFER:.*]] = memref.alloc() {{.*}} : memref<3x2xf32>
144// CHECK-DAG:       %[[ARG0_MEMREF:.*]] = bufferization.to_memref %[[ARG0_TENSOR]] : tensor<2x3x4xvector<3x4xi4>>
145// CHECK-DAG:       %[[ARG1_MEMREF:.*]] = bufferization.to_memref %[[ARG1_TENSOR]] : tensor<3x2xf32>
146// CHECK:           memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32>
147// CHECK:           linalg.generic
148// CHECK-SAME:      ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>)
149// CHECK-SAME:      outs(%[[INIT_BUFFER]] : memref<3x2xf32>) {
150func.func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>,
151  %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
152
153  %0 = linalg.generic #trait
154    ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
155   outs(%arg1 : tensor<3x2xf32>) {
156    ^bb(%v0: vector<3x4xi4>, %v1: f32) :
157      linalg.yield %v1 : f32
158  } -> tensor<3x2xf32>
159
160  return %0 : tensor<3x2xf32>
161}
162
163// -----
164
165// CHECK-LABEL: func @bufferize_fill(
166// CHECK-SAME:    %[[IN:.*]]: tensor<?xf32>
167func.func @bufferize_fill(%arg0: tensor<?xf32>) -> tensor<?xf32> {
168  %c0 = arith.constant 0.0 : f32
169  // CHECK: %[[ALLOC:.*]] = memref.alloc
170  // CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref<?xf32>)
171  // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<?xf32>
172  // CHECK: return %[[TENSOR]]
173  %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?xf32>) -> tensor<?xf32>
174  return %0 : tensor<?xf32>
175}
176
177// -----
178
179// CHECK-LABEL:   func @bufferize_dot
180func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor<f32>) -> tensor<f32> {
181  %dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>)
182                    outs(%out : tensor<f32>) -> tensor<f32>
183  return %dot : tensor<f32>
184  // CHECK: %[[ALLOC:.*]] = memref.alloc
185  // TODO: The copy is not necessary.
186  // CHECK: memref.copy {{.*}}, %[[ALLOC]]
187  // CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>)
188  // CHECK-SAME:       outs(%[[ALLOC:.*]] : memref<f32>)
189  // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<f32>
190  // CHECK: return %[[OUT_TENSOR]]
191}
192
193// -----
194
195// CHECK-LABEL: func @bufferize_softmax(
196//  CHECK-SAME:     %[[arg0:.*]]: tensor<2x16x32xf32>, %[[arg1:.*]]: tensor<2x16x32xf32>
197//       CHECK:   %[[m0:.*]] = bufferization.to_memref %[[arg0]]
198//       CHECK:   %[[alloc:.*]] = memref.alloc()
199//   CHECK-NOT:   memref.copy
200//       CHECK:   linalg.softmax dimension(2) ins(%[[m0]] : {{.*}}) outs(%[[alloc:.*]] : {{.*}})
201//       CHECK:   %[[result:.*]] = bufferization.to_tensor %[[alloc]]
202//       CHECK:   return %[[result]]
203func.func @bufferize_softmax(%arg0: tensor<2x16x32xf32>, %arg1: tensor<2x16x32xf32>) -> tensor<2x16x32xf32> {
204  %1 = linalg.softmax dimension(2)
205      ins(%arg0 : tensor<2x16x32xf32>)
206      outs(%arg1: tensor<2x16x32xf32>) -> tensor<2x16x32xf32>
207  return %1 : tensor<2x16x32xf32>
208}
209