xref: /llvm-project/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir (revision ced2fc7819d5ddea616ec330f18e08ff284c1868)
1// Note: Default is function-boundary-type-conversion=infer-layout-map
2// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1" -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s
3
4// Run fuzzer with different seeds.
5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null
7// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null
8
9// Test bufferization using memref types that have no layout map.
10// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
11
12// Test bufferization using memref types that have fully dynamic layout maps.
13// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 function-boundary-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-FULLY-DYNAMIC-LAYOUT-MAP
14
15
16// Bufferization of bodiless function with no tensor return value.
17
18// CHECK-LABEL: func private @private_func(memref<?xf32, strided<[?], offset: ?>>
19// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func(memref<?xf32>)
20func.func private @private_func(tensor<?xf32>) -> ()
21
22// CHECK-LABEL: func private @private_func_2d(memref<?x?xf32, strided<[?, ?], offset: ?>>
23// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func_2d(memref<?x?xf32>)
24func.func private @private_func_2d(tensor<?x?xf32>) -> ()
25
26// CHECK-LABEL: func @empty_func() {
27// CHECK-NO-LAYOUT-MAP-LABEL: func @empty_func() {
28// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @empty_func() {
29func.func @empty_func() -> () {
30  return
31}
32
33// -----
34
35// A bodiless function that returns something that is not a tensor.
36
37// CHECK: func private @external_func_with_return_val(memref<4xi32, strided{{.*}}>) -> f32
38// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func private @external_func_with_return_val(memref<4xi32,
39// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: strided<[?], offset: ?>>
40// CHECK-NO-LAYOUT-MAP-LABEL: func private @external_func_with_return_val(memref<4xi32>)
41func.func private @external_func_with_return_val(tensor<4xi32>) -> f32
42
43// -----
44
45// Bufferization of bodiless function that returns a tensor.
46
47// CHECK: func.func private @foo(memref<?xf32, strided<[?], offset: ?>>) -> (f32, memref<?xf32, strided<[?], offset: ?>>, f32)
48func.func private @foo(%t : tensor<?xf32>) -> (f32, tensor<?xf32>, f32)
49
50// CHECK: func.func @call_to_unknown_tensor_returning_func(
51// CHECK-SAME: %[[arg0:.*]]: memref<?xf32, strided<[?], offset: ?>>) {
52func.func @call_to_unknown_tensor_returning_func(%t : tensor<?xf32>) {
53  // CHECK: call @foo(%[[arg0]]) : (memref<?xf32, strided<[?], offset: ?>>) -> (f32, memref<?xf32, strided<[?], offset: ?>>, f32)
54  call @foo(%t) : (tensor<?xf32>) -> (f32, tensor<?xf32>, f32)
55  return
56}
57
58// -----
59
60// A function that returns a non-equivalent tensor with layout map.
61
62// CHECK-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32, strided<[10, 1], offset: ?>>
63//       CHECK:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32>
64//       CHECK:   %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, strided<[10, 1], offset: ?>>
65//       CHECK:   return %[[subview]]
66
67// CHECK-NO-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32>
68//       CHECK-NO-LAYOUT-MAP:   %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32>
69//       CHECK-NO-LAYOUT-MAP:   %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, strided<[10, 1], offset: ?>>
70//       CHECK-NO-LAYOUT-MAP:   %[[alloc_no_layout:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref<2x?xf32>
71//       CHECK-NO-LAYOUT-MAP:   memref.copy %[[subview]], %[[alloc_no_layout]]
72// TODO: %alloc should be deallocated here, but we currently do not dealloc
73// buffers that are inserted due to to_tensor/to_memref canonicalization (when
74// the buffer types have different layout maps).
75//       CHECK-NO-LAYOUT-MAP:   return %[[alloc_no_layout]]
76
77// CHECK-FULLY-DYNAMIC-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32,
78//  CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: strided<[?, ?], offset: ?>> {
79func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>)
80{
81  %t = bufferization.alloc_tensor() : tensor<20x10xf32>
82  %0 = tensor.extract_slice %t[%idx, %idx][2, %sz][1, 1]
83      : tensor<20x10xf32> to tensor<2x?xf32>
84  return %0 : tensor<2x?xf32>
85}
86
87// -----
88
89// CHECK-NO-LAYOUT-MAP-LABEL:   func.func @foo(
90// CHECK-NO-LAYOUT-MAP-SAME:                   %[[VAL_0:.*]]: memref<3x8xf16>) -> memref<3x8xf16> {
91// CHECK-NO-LAYOUT-MAP:           return %[[VAL_0]] : memref<3x8xf16>
92// CHECK-NO-LAYOUT-MAP:         }
93func.func @foo(%arg0: tensor<3x8xf16>) -> tensor<3x8xf16> {
94  return %arg0 : tensor<3x8xf16>
95}
96
97// CHECK-NO-LAYOUT-MAP-LABEL:   func.func @call_extract_slice(
98// CHECK-NO-LAYOUT-MAP-SAME:                                  %[[VAL_0:.*]]: memref<4x8xf16>) -> memref<3x8xf16> {
99// CHECK-NO-LAYOUT-MAP:           %[[VAL_1:.*]] = memref.subview %[[VAL_0]][1, 0] [3, 8] [1, 1] : memref<4x8xf16> to memref<3x8xf16, strided<[8, 1], offset: 8>>
100// CHECK-NO-LAYOUT-MAP:           %[[VAL_2:.*]] = memref.alloc() {alignment = 64 : i64} : memref<3x8xf16>
101// CHECK-NO-LAYOUT-MAP:           memref.copy %[[VAL_1]], %[[VAL_2]] : memref<3x8xf16, strided<[8, 1], offset: 8>> to memref<3x8xf16>
102// CHECK-NO-LAYOUT-MAP:           %[[VAL_3:.*]] = call @foo(%[[VAL_2]]) : (memref<3x8xf16>) -> memref<3x8xf16>
103// CHECK-NO-LAYOUT-MAP:           return %[[VAL_3]] : memref<3x8xf16>
104// CHECK-NO-LAYOUT-MAP:         }
105func.func @call_extract_slice(%arg0: tensor<4x8xf16>) -> (tensor<3x8xf16>) {
106  %0 = tensor.extract_slice %arg0[1, 0] [3, 8] [1, 1] : tensor<4x8xf16> to tensor<3x8xf16>
107  %1 = call @foo(%0) : (tensor<3x8xf16>) -> tensor<3x8xf16>
108  return %1 : tensor<3x8xf16>
109}
110
111// -----
112
113// CHECK-LABEL: func private @private_func
114// CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func(memref<?xf32>) -> f32
115func.func private @private_func(tensor<?xf32>) -> (f32)
116
117// private_func may modify the buffer arg, but that's OK because %t is writable.
118// No alloc/copy should be inserted.
119
120// CHECK-LABEL: func @main(
121//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
122//   CHECK-NOT: alloc
123//   CHECK-NOT: copy
124//       CHECK: call @private_func(%[[t]])
125func.func @main(%t: tensor<?xf32> {bufferization.writable = true}) -> (f32) {
126  %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
127  return %0 : f32
128}
129
130// -----
131
132// CHECK-LABEL: func private @private_func
133func.func private @private_func(tensor<?xf32>) -> (f32)
134
135// private_func may modify the buffer arg, %t is not writable. A copy is needed.
136
137// CHECK-LABEL: func @main(
138//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
139//       CHECK: %[[alloc:.*]] = memref.alloc
140//   CHECK-DAG: memref.copy %[[t]], %[[alloc]]
141//   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
142//       CHECK: call @private_func(%[[casted]])
143func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
144  %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
145  return %0 : f32
146}
147
148// -----
149
150// Test bufferization of a function without tensor args.
151
152// CHECK-LABEL: func @func_without_tensor_args
153func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
154  // CHECK: %[[alloc:.*]] = memref.alloc()
155  %0 = bufferization.alloc_tensor() : tensor<10xf32>
156
157  %c0 = arith.constant 0 : index
158  // CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
159  %1 = vector.transfer_write %v, %0[%c0] : vector<10xf32>, tensor<10xf32>
160
161  %cst = arith.constant 0.0 : f32
162  // CHECK: vector.transfer_read %[[alloc]]
163  %r = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<11xf32>
164
165  vector.print %r : vector<11xf32>
166  return
167}
168
169// -----
170
171// Bufferization of a function that is reading and writing. %t0 is writable, so
172// no copy should be inserted.
173
174// CHECK-LABEL: func @inner_func(
175//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
176func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
177  // CHECK-NOT: copy
178  %f = arith.constant 1.0 : f32
179  %c0 = arith.constant 0 : index
180  %c1 = arith.constant 1 : index
181  // CHECK: memref.store %{{.*}}, %[[arg0]]
182  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
183  // CHECK: %[[load:.*]] = memref.load %[[arg0]]
184  %1 = tensor.extract %0[%c1] : tensor<?xf32>
185  // CHECK: return %[[load]] : f32
186  return %0, %1 : tensor<?xf32>, f32
187}
188
189// CHECK-LABEL: func @call_func_with_non_tensor_return(
190//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
191func.func @call_func_with_non_tensor_return(
192    %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
193  // CHECK-NOT: alloc
194  // CHECK-NOT: copy
195  // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
196  %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
197  // CHECK: return %[[call]] : f32
198  return %1, %0 : f32, tensor<?xf32>
199}
200
201// -----
202
203// Bufferization of a function that is reading and writing. %t0 is not writable,
204// so a copy is needed.
205
206// CHECK-LABEL: func @inner_func(
207//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
208func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
209  // CHECK-NOT: copy
210  %f = arith.constant 1.0 : f32
211  %c0 = arith.constant 0 : index
212  %c1 = arith.constant 1 : index
213  // CHECK: memref.store %{{.*}}, %[[arg0]]
214  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
215  // CHECK: %[[load:.*]] = memref.load %[[arg0]]
216  %1 = tensor.extract %0[%c1] : tensor<?xf32>
217  // CHECK: return %[[load]] : f32
218  return %0, %1 : tensor<?xf32>, f32
219}
220
221// CHECK-LABEL: func @call_func_with_non_tensor_return(
222//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
223func.func @call_func_with_non_tensor_return(
224    %t0: tensor<?xf32> {bufferization.writable = false}) -> (f32, tensor<?xf32>) {
225  // CHECK: %[[alloc:.*]] = memref.alloc
226  // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
227  // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
228  // CHECK: %[[call:.*]] = call @inner_func(%[[casted]])
229  %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
230
231  // Note: The tensor return value cannot fold away because the CallOp
232  // bufferized out-of-place.
233  // CHECK: return %[[call]], %[[casted]] : f32, memref<?xf32
234  return %1, %0 : f32, tensor<?xf32>
235}
236
237// -----
238
239// A chain of function calls. The last function f0 is potentially writing to the
240// buffer. This becomes a problem when bufferizing main and a copy must be
241// inserted then. (No copies in the other functions.)
242
243// CHECK-LABEL: func private @f0(
244func.func private @f0(tensor<?xf32>) -> (f32)
245
246// CHECK-LABEL: func @f1(
247//  CHECK-SAME:     %[[t1:.*]]: memref<?xf32
248//       CHECK:   %[[r1:.*]] = call @f0(%[[t1]])
249//       CHECK:   return %[[r1]]
250func.func @f1(%t: tensor<?xf32>) -> (f32) {
251  %0 = call @f0(%t) : (tensor<?xf32>) -> (f32)
252  return %0 : f32
253}
254
255// CHECK-LABEL: func @f2(
256//  CHECK-SAME:     %[[t2:.*]]: memref<?xf32
257//       CHECK:   %[[r2:.*]] = call @f1(%[[t2]])
258//       CHECK:   return %[[r2]]
259func.func @f2(%t: tensor<?xf32>) -> (f32) {
260  %0 = call @f1(%t) : (tensor<?xf32>) -> (f32)
261  return %0 : f32
262}
263
264// CHECK-LABEL: func @main(
265//  CHECK-SAME:     %[[t3:.*]]: memref<?xf32
266//       CHECK: %[[alloc:.*]] = memref.alloc
267//   CHECK-DAG: memref.copy %[[t3]], %[[alloc]]
268//   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
269//       CHECK: call @f2(%[[casted]])
270func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
271  %0 = call @f2(%t) : (tensor<?xf32>) -> (f32)
272  return %0 : f32
273}
274
275// -----
276
277// This function does not read, just write. We need an alloc, but no copy.
278
279// CHECK-LABEL: func @does_not_read(
280//   CHECK-NOT:   alloc
281//   CHECK-NOT:   copy
282func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
283  %f0 = arith.constant 0.0 : f32
284  %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
285  return %r : tensor<?xf32>
286}
287
288// CHECK-LABEL: func @main(
289//  CHECK-SAME:     %[[t:.*]]: memref<?xf32
290//       CHECK:   %[[alloc:.*]] = memref.alloc
291//   CHECK-NOT:   copy
292//       CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
293//   CHECK-NOT:   copy
294//       CHECK:   call @does_not_read(%[[casted]])
295//       CHECK:   %[[r:.*]] = memref.load %[[casted]]
296func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 {
297  %0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>)
298  %idx = arith.constant 4 : index
299  %r = tensor.extract %0[%idx] : tensor<?xf32>
300  return %r : f32
301}
302
303// -----
304
305// Alloc and copy must be inserted because the arith.constant is read-only.
306
307//      CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]>
308//      CHECK: func private @some_external_func(memref<4xi32, strided<[?], offset: ?>>)
309func.func private @some_external_func(tensor<4xi32>)
310
311//      CHECK: func @main()
312func.func @main() {
313//  CHECK-DAG:   %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32>
314  %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
315
316//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
317//  CHECK-DAG:   %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, strided<[?], offset: ?>>
318//  CHECK-DAG:   memref.copy %[[A]], %[[alloc]]
319//      CHECK:   call @some_external_func(%[[B]]) : (memref<4xi32, strided<[?], offset: ?>>) -> ()
320  call @some_external_func(%A) : (tensor<4xi32>) -> ()
321
322  return
323}
324
325// -----
326
327// Alloc and copy must be inserted because the arith.constant is read-only. The
328// function call is inside of an scf.execute_region.
329
330//      CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]>
331//      CHECK: func private @some_external_func_within_scf_execute(memref<4xi32, strided<[?], offset: ?>>)
332func.func private @some_external_func_within_scf_execute(tensor<4xi32>)
333
334//      CHECK: func @main()
335func.func @main() {
336//  CHECK-DAG:   %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32>
337  %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
338
339// Note: The scf.execute_region canonicalizes away.
340
341//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
342//  CHECK-DAG:   %[[B:.*]] = memref.cast %[[alloc]] : memref<4xi32> to memref<4xi32, strided<[?], offset: ?>>
343//  CHECK-DAG:   memref.copy %[[A]], %[[alloc]]
344//      CHECK:   call @some_external_func_within_scf_execute(%[[B]]) : (memref<4xi32, strided<[?], offset: ?>>) -> ()
345  scf.execute_region {
346    func.call @some_external_func_within_scf_execute(%A) : (tensor<4xi32>) -> ()
347    scf.yield
348  }
349
350  return
351}
352
353// -----
354
355// A write inside an scf.execute_region. An equivalent tensor is yielded.
356
357// CHECK-LABEL: func @execute_region_test(
358//  CHECK-SAME:     %[[m1:.*]]: memref<?xf32
359func.func @execute_region_test(%t1 : tensor<?xf32>)
360    -> (f32, tensor<?xf32>, f32)
361{
362  %f1 = arith.constant 0.0 : f32
363  %f2 = arith.constant 1.0 : f32
364  %idx = arith.constant 7 : index
365
366  // scf.execute_region is canonicalized away after bufferization. So just the
367  // memref.store is left over.
368
369  // CHECK-NOT: alloc
370  // CHECK-NOT: copy
371  // CHECK: memref.store %{{.*}}, %[[m1]][%{{.*}}]
372  %0, %1, %2 = scf.execute_region -> (f32, tensor<?xf32>, f32) {
373    %t2 = tensor.insert %f2 into %t1[%idx] : tensor<?xf32>
374    scf.yield %f1, %t2, %f2 : f32, tensor<?xf32>, f32
375  }
376
377  // CHECK: return %{{.*}}, %{{.*}} : f32, f32
378  return %0, %1, %2 : f32, tensor<?xf32>, f32
379}
380
381// -----
382
383//      CHECK:  func private @some_external_func(memref<?xf32, strided<[?], offset: ?>>)
384func.func private @some_external_func(tensor<?xf32>)
385
386//      CHECK:  func @scf_for_with_tensor_insert_slice(
387// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
388// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
389// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
390func.func @scf_for_with_tensor_insert_slice(
391    %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>,
392    %lb : index, %ub : index, %step : index)
393  -> (tensor<?xf32>, tensor<?xf32>)
394{
395  // CHECK-NEXT: scf.for
396  %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
397      -> (tensor<?xf32>, tensor<?xf32>)
398  {
399    // CHECK-NEXT:   %[[SVA:.*]] = memref.subview %[[A]]
400    // CHECK-NEXT:   memref.copy %[[C]], %[[SVA]] : memref<4xf32, strided<[?], offset: ?>> to memref<4xf32, strided<[?], offset: ?>>
401    %ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor<?xf32>
402
403    // CHECK-NEXT:   %[[SVB:.*]] = memref.subview %[[B]]
404    // CHECK-NEXT:   memref.copy %[[C]], %[[SVB]] : memref<4xf32, strided<[?], offset: ?>> to memref<4xf32, strided<[?], offset: ?>>
405    %ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor<?xf32>
406
407    // scf.yield is empty and is elided
408    //  CHECK-NOT:   scf.yield
409    scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
410  }
411
412  // Swaparoo requires bufferizing the whole function to figure out who's who.
413  return %r0#1, %r0#0: tensor<?xf32>, tensor<?xf32>
414}
415
416//      CHECK:  func @bar(
417// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
418// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
419// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
420func.func @bar(
421    %A : tensor<?xf32> {bufferization.writable = true},
422    %B : tensor<?xf32> {bufferization.writable = true},
423    %C : tensor<4xf32> {bufferization.writable = true},
424    %lb : index, %ub : index, %step : index)
425  -> (tensor<?xf32>, tensor<?xf32>)
426{
427//  CHECK-DAG:   call @scf_for_with_tensor_insert_slice(%[[A]], %[[B]], %[[C]]
428  %r0:2 = call @scf_for_with_tensor_insert_slice(%A, %B, %C, %lb, %ub, %step) :
429      (tensor<?xf32>, tensor<?xf32>, tensor<4xf32>, index, index, index)
430        -> (tensor<?xf32>, tensor<?xf32>)
431
432  // %r0#0 requires a copy because we have no idea what the function is doing.
433//  CHECK-DAG:   %[[alloc:.*]] = memref.alloc
434//  CHECK-DAG:   %[[casted:.*]] = memref.cast %[[alloc]]
435//  CHECK-DAG:   memref.copy %[[B]], %[[alloc]]
436// CHECK-NEXT:   call @some_external_func(%[[casted]]) : (memref<?xf32, strided<[?], offset: ?>>) -> ()
437  call @some_external_func(%r0#0) : (tensor<?xf32>) -> ()
438
439//      CHECK:   return
440  return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
441}
442
443// -----
444
445//      CHECK:  func @init_and_dot(
446// CHECK-SAME:    %[[A:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
447// CHECK-SAME:    %[[B:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
448// CHECK-SAME:    %[[C:[a-zA-Z0-9]*]]: memref<f32, strided<[], offset: ?>>
449func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
450  // CHECK-NEXT:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
451  %v0 = arith.constant 0.0 : f32
452
453  // CHECK-NEXT:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32, strided<[], offset: ?>>)
454  %d = linalg.fill ins(%v0 : f32) outs(%c : tensor<f32>) -> tensor<f32>
455
456  // CHECK-NEXT:   linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) outs(%[[C]] : memref<f32, strided<[], offset: ?>>)
457  %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>)
458    outs(%d: tensor<f32>) -> tensor<f32>
459
460  // CHECK-NEXT:   return
461  return %e : tensor<f32>
462}
463
464//      CHECK:  func @main()
465func.func @main() {
466  //  CHECK-DAG:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
467  //  CHECK-DAG:   %[[C1:.*]] = arith.constant 1{{.*}} : f32
468  //  CHECK-DAG:   %[[C2:.*]] = arith.constant 2{{.*}} : f32
469  %v0 = arith.constant 0.0 : f32
470  %v1 = arith.constant 1.0 : f32
471  %v2 = arith.constant 2.0 : f32
472
473  // CHECK-NEXT:   %[[A:.*]] = memref.alloc() {alignment = 64 : i64} : memref<64xf32>
474  // CHECK-NEXT:   %[[B:.*]] = memref.alloc() {alignment = 64 : i64} : memref<64xf32>
475  // CHECK-NEXT:   %[[C:.*]] = memref.alloc() {alignment = 64 : i64} : memref<f32>
476  //  CHECK-DAG:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, strided<[?], offset: ?>>
477  //  CHECK-DAG:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, strided<[?], offset: ?>>
478  //  CHECK-DAG:   %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, strided<[], offset: ?>>
479  %A = bufferization.alloc_tensor() : tensor<64xf32>
480  %B = bufferization.alloc_tensor() : tensor<64xf32>
481  %C = bufferization.alloc_tensor() : tensor<f32>
482
483  //  CHECK-DAG:   linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>)
484  //  CHECK-DAG:   linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>)
485  //  CHECK-DAG:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32>)
486  %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
487  %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
488  %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>
489
490  // CHECK-NEXT:   call @init_and_dot(%[[cA]], %[[cB]], %[[cC]])
491  %res = call @init_and_dot(%AA, %BB, %CC) :
492    (tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32>
493
494  // CHECK-NEXT:   %[[dC:.*]] = memref.cast %[[cC]] : memref<f32, {{.*}}> to memref<*xf32>
495  %res2 = tensor.cast %res: tensor<f32> to tensor<*xf32>
496
497  // CHECK-NEXT:   call @printMemrefF32(%[[dC]]) : (memref<*xf32>) -> ()
498  call @printMemrefF32(%res2) : (tensor<*xf32>) -> ()
499
500  // CHECK-NEXT:   return
501  return
502}
503
504//     CHECK:   func private @printMemrefF32(memref<*xf32>)
505func.func private @printMemrefF32(tensor<*xf32>)
506
507// -----
508
509// CHECK: func private @external_func(memref<?xf32, strided<[?], offset: ?>>)
510func.func private @external_func(tensor<?xf32>)
511
512//      CHECK: func @callee(
513// CHECK-SAME:   %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
514// CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<?xf32, strided<[?], offset: ?>>
515// CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<?xf32, strided<[?], offset: ?>>
516func.func @callee(
517    %A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>},
518    %B : tensor<?xf32>,
519    %C : tensor<?xf32>) {
520// CHECK-NEXT: %[[CASTED:.*]] = memref.cast %[[A]] : memref<?xf32> to memref<?xf32, strided<[?], offset: ?>>
521// CHECK-NEXT: call @external_func(%[[CASTED]]) : (memref<?xf32, strided<[?], offset: ?>>) -> ()
522  call @external_func(%A) : (tensor<?xf32>) -> ()
523
524// CHECK-NEXT: call @external_func(%[[B]]) : (memref<?xf32, strided<[?], offset: ?>>) -> ()
525  call @external_func(%B) : (tensor<?xf32>) -> ()
526
527// CHECK-NEXT: call @external_func(%[[C]]) : (memref<?xf32, strided<[?], offset: ?>>) -> ()
528  call @external_func(%C) : (tensor<?xf32>) -> ()
529
530  return
531}
532
533//      CHECK: func @entry(
534// CHECK-SAME:   %[[A:[0-9a-zA-Z]*]]: memref<?xf32>
535// CHECK-SAME:   %[[B:[0-9a-zA-Z]*]]: memref<?xf32>
536// CHECK-SAME:   %[[C:[0-9a-zA-Z]*]]: memref<?xf32, strided<[?], offset: ?>>
537func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
538                 %B : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i)[s0, s1] -> (i)>, bufferization.writable = false},
539                 %C : tensor<?xf32> {bufferization.writable = false}) {
540// Note: `callee` does not write to its bbArg directly, but `external_func`
541// does. Inside `callee`, the writes via `external_func` do not cause a
542// conflict. However, inside `entry`, the writes do cause a conflict because
543// %A, %B and %C are not inplaceable. This test case shows that this kind of
544// conflict detection has a "transitive" nature.
545//  CHECK-DAG: %[[ALLOC_A:.*]] = memref.alloc
546//  CHECK-DAG: %[[CASTED_A:.*]] = memref.cast %[[ALLOC_A]]
547//  CHECK-DAG: %[[ALLOC_B:.*]] = memref.alloc
548//  CHECK-DAG: %[[CASTED_B:.*]] = memref.cast %[[ALLOC_B]]
549//  CHECK-DAG: %[[ALLOC_C:.*]] = memref.alloc
550//  CHECK-DAG: %[[CASTED_C:.*]] = memref.cast %[[ALLOC_C]]
551//  CHECK-DAG: memref.copy %[[A]], %[[ALLOC_A]]
552//  CHECK-DAG: memref.copy %[[B]], %[[ALLOC_B]]
553//  CHECK-DAG: memref.copy %[[C]], %[[ALLOC_C]]
554// CHECK-NEXT: call @callee(%[[CASTED_A]], %[[CASTED_B]], %[[CASTED_C]])
555  call @callee(%A, %B, %C) : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> ()
556  return
557}
558
559// -----
560
561// No alloc or copy inside of the loop.
562
563// CHECK-LABEL: func @inner_func(
564//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
565func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
566  %f = arith.constant 1.0 : f32
567  %c0 = arith.constant 0 : index
568  // CHECK: memref.store %{{.*}}, %[[arg0]]
569  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
570  return %0 : tensor<?xf32>
571}
572
573// CHECK-LABEL: func @equivalent_func_arg(
574//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
575func.func @equivalent_func_arg(%t0: tensor<?xf32> {bufferization.writable = true},
576                               %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
577  // CHECK-NOT: alloc
578  // CHECK-NOT: copy
579  // CHECK: scf.for {{.*}} iter_args(%[[t1:.*]] = %[[arg0]])
580  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
581    // CHECK: call @inner_func(%[[t1]])
582    %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32>
583    // CHECK: scf.yield %[[t1]]
584    scf.yield %3 : tensor<?xf32>
585  }
586  return %1: tensor<?xf32>
587}
588
589// -----
590
591// inner_func_2 modifies the bbArg, but the loop yields the original value. A
592// buffer copy must be inserted inside the loop.
593
594// CHECK-LABEL: func @inner_func_2(
595//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
596func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> {
597  %f = arith.constant 1.0 : f32
598  %c0 = arith.constant 0 : index
599  // CHECK: memref.store %{{.*}}, %[[arg0]]
600  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
601  return %0 : tensor<?xf32>
602}
603
604// CHECK-LABEL: func @equivalent_func_arg_2(
605//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32
606func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = true},
607                                 %c0: index, %c10: index, %c1: index) -> tensor<?xf32> {
608  // CHECK: scf.for {{.*}} {
609  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
610    // CHECK: %[[alloc:.*]] = memref.alloc
611    // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
612    // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
613    // CHECK: call @inner_func_2(%[[casted]])
614    // CHECK-NOT: scf.yield
615    %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32>
616    scf.yield %t1 : tensor<?xf32>
617  }
618  return %1: tensor<?xf32>
619}
620
621// -----
622
623// Bufferize without fully dynamic layout maps.
624
625// CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, strided{{.*}}>) -> vector<4xf32> {
626// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32>
627func.func @transfer_read(
628    %A : tensor<?xf32> {bufferization.writable = false})
629  -> (vector<4xf32>)
630{
631  %c0 = arith.constant 0 : index
632  %f0 = arith.constant 0.0 : f32
633
634//       CHECK: %[[RES:.*]] = vector.transfer_read {{.*}} : memref<?xf32, strided{{.*}}>, vector<4xf32>
635  %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32>
636
637//       CHECK: return %[[RES]] : vector<4xf32>
638  return %0 : vector<4xf32>
639}
640
641// -----
642
643// CHECK-LABEL: func @main(
644func.func @main() {
645  // CHECK: %[[const:.*]] = memref.get_global
646  %t = arith.constant dense<[1.0, 2.0, 3.0]> : tensor<3xf32>
647  // CHECK: %[[alloc:.*]] = memref.alloc
648  // CHECK: memref.copy %[[const]], %[[alloc]]
649  // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<3xf32> to memref<*xf32>
650  %unranked = tensor.cast %t : tensor<3xf32> to tensor<*xf32>
651  // CHECK: call @maybe_writing_func(%[[casted]])
652  func.call @maybe_writing_func(%unranked) : (tensor<*xf32>) -> ()
653  return
654}
655
656// This function may write to buffer(%ptr).
657func.func private @maybe_writing_func(%ptr : tensor<*xf32>)
658
659// -----
660
661// Test if other callables are left intact and don't cause trouble.
662
663llvm.func @llvm_func()
664
665func.func @call_llvm_func() {
666  llvm.call @llvm_func() : () -> ()
667  return
668}
669
670// -----
671
672// CHECK-LABEL: func @to_memref_op_unsupported(
673//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32,
674func.func @to_memref_op_unsupported(
675    %t1: tensor<?xf32> {bufferization.writable = true}, %idx1: index,
676    %idx2: index, %idx3: index, %v1: vector<5xf32>) -> (vector<5xf32>) {
677
678  // Insert a copy because we cannot analyze what happens with the result of a
679  // to_memref op.
680  // CHECK: %[[alloc:.*]] = memref.alloc
681  // CHECK: memref.copy %[[arg0]], %[[alloc]]
682  %0 = bufferization.to_memref %t1 : tensor<?xf32> to memref<?xf32>
683  // CHECK: "test.foo"(%[[alloc]])
684  "test.foo"(%0) : (memref<?xf32>) -> ()
685
686  // CHECK: vector.transfer_read %[[arg0]]
687  %cst = arith.constant 0.0 : f32
688  %r1 = vector.transfer_read %t1[%idx3], %cst : tensor<?xf32>, vector<5xf32>
689
690  return %r1 : vector<5xf32>
691}
692
693// -----
694
695// Note: The cf.br canonicalizes away, so there's nothing to check here. There
696// is a detailed test in ControlFlow/bufferize.mlir.
697
698// CHECK-LABEL: func @br_in_func(
699func.func @br_in_func(%t: tensor<5xf32>) -> tensor<5xf32> {
700  cf.br ^bb1(%t : tensor<5xf32>)
701^bb1(%arg1 : tensor<5xf32>):
702  func.return %arg1 : tensor<5xf32>
703}
704
705// -----
706
707// Cyclic call graphs with tensors are not supported by One-Shot Bufferize.
708// However, if a function signature does not have any tensor arguments or
709// results, calls to that function are not seen as an "edge" in the fuction
710// call graph.
711
712// CHECK-LABEL: func.func @foo(%{{.*}}: memref<5xf32>) -> memref<5xf32>
713func.func @foo(%m: memref<5xf32>) -> memref<5xf32> {
714  %0 = tensor.empty() : tensor<5xf32>
715  %1 = func.call @bar(%0, %m)
716      : (tensor<5xf32>, memref<5xf32>) -> (memref<5xf32>)
717  return %1 : memref<5xf32>
718}
719
720// CHECK: func.func @bar(%{{.*}}: memref<5xf32, strided<[?], offset: ?>>, %arg1: memref<5xf32>) -> memref<5xf32>
721func.func @bar(%t: tensor<5xf32>, %m: memref<5xf32>) -> memref<5xf32> {
722  %0 = func.call @foo(%m) : (memref<5xf32>) -> (memref<5xf32>)
723  return %0 : memref<5xf32>
724}
725
726// -----
727
728// A recursive function.
729
730// CHECK-LABEL: func.func @foo(
731//  CHECK-SAME:     %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>> {
732func.func @foo(%t: tensor<5xf32>) -> tensor<5xf32> {
733  // We are conservative around recursive functions. The analysis cannot handle
734  // them, so we have to assume the op operand of the call op bufferizes to a
735  // memory read and write. This causes a copy in this test case.
736  // CHECK: %[[copy:.*]] = memref.alloc() {alignment = 64 : i64} : memref<5xf32>
737  // CHECK: memref.copy %[[arg0]], %[[copy]]
738  // CHECK: %[[cast:.*]] = memref.cast %[[copy]] : memref<5xf32> to memref<5xf32, strided<[?], offset: ?>>
739  // CHECK: %[[call:.*]] = call @foo(%[[cast]])
740  %0 = call @foo(%t) : (tensor<5xf32>) -> (tensor<5xf32>)
741
742  // CHECK: memref.load %[[arg0]]
743  %c0 = arith.constant 0 : index
744  %extr = tensor.extract %t[%c0] : tensor<5xf32>
745  vector.print %extr : f32
746
747  // CHECK: return %[[call]]
748  return %0 : tensor<5xf32>
749}
750
751// -----
752
753// Two functions calling each other recursively.
754
755// CHECK-LABEL: func.func @foo(
756//  CHECK-SAME:     %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>> {
757//       CHECK:   %[[call:.*]] = call @bar(%[[arg0]]) : (memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>>
758//       CHECK:   return %[[call]]
759//       CHECK: }
760func.func @foo(%t: tensor<5xf32>) -> tensor<5xf32> {
761  %0 = call @bar(%t) : (tensor<5xf32>) -> (tensor<5xf32>)
762  return %0 : tensor<5xf32>
763}
764
765// CHECK-LABEL: func.func @bar(
766//  CHECK-SAME:     %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>> {
767//       CHECK:   %[[call:.*]] = call @foo(%[[arg0]]) : (memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32, strided<[?], offset: ?>>
768//       CHECK:   return %[[call]]
769//       CHECK: }
770func.func @bar(%t: tensor<5xf32>) -> tensor<5xf32>{
771  %0 = call @foo(%t) : (tensor<5xf32>) -> (tensor<5xf32>)
772  return %0 : tensor<5xf32>
773}
774
775// -----
776
777// The two func.return operands have different types after bufferization. Make
778// sure that memref.cast ops are inserted.
779
780// CHECK-LABEL: func @result_type_mismatch({{.*}}) -> memref<5xf32, strided<[?], offset: ?>>
781func.func @result_type_mismatch(%c: i1) -> tensor<5xf32> {
782  // CHECK: %[[alloc:.*]] = memref.alloc() {alignment = 64 : i64} : memref<10xf32>
783  %t = tensor.empty() : tensor<10xf32>
784  cf.cond_br %c, ^bb1, ^bb2
785^bb1:
786  // CHECK: %[[m0:.*]] = memref.subview %[[alloc]][0] [5] [2] : memref<10xf32> to memref<5xf32, strided<[2]>>
787  // CHECK: %[[cast0:.*]] = memref.cast %[[m0]] : memref<5xf32, strided<[2]>> to memref<5xf32, strided<[?], offset: ?>>
788  %0 = tensor.extract_slice %t[0][5][2] : tensor<10xf32> to tensor<5xf32>
789  // CHECK: return %[[cast0]] : memref<5xf32, strided<[?], offset: ?>
790  return %0 : tensor<5xf32>
791^bb2:
792  // CHECK: %[[m1:.*]] = memref.subview %[[alloc]][2] [5] [1] : memref<10xf32> to memref<5xf32, strided<[1], offset: 2>>
793  // CHECK: %[[cast1:.*]] = memref.cast %[[m1]] : memref<5xf32, strided<[1], offset: 2>> to memref<5xf32, strided<[?], offset: ?>>
794  %1 = tensor.extract_slice %t[2][5][1] : tensor<10xf32> to tensor<5xf32>
795  // CHECK: return %[[cast1]] : memref<5xf32, strided<[?], offset: ?>>
796  return %1 : tensor<5xf32>
797}
798
799