xref: /llvm-project/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir (revision ced2fc7819d5ddea616ec330f18e08ff284c1868)
1// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
2
3// Run fuzzer with different seeds.
4// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null
5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null
6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null
7
8// Try different heuristics. Not checking the result, just make sure that we do
9// not crash.
10// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=bottom-up-from-terminators" -split-input-file -o /dev/null
11// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=top-down" -split-input-file -o /dev/null
12
13// TODO: Extract op-specific test cases and move them to their respective
14// dialects.
15
16//===----------------------------------------------------------------------===//
17// Simple cases
18//===----------------------------------------------------------------------===//
19
20// -----
21
22// CHECK-LABEL: func @extract_slice_fun(
23func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = false},
24//  CHECK-SAME:              bufferization.access = "read"
25                             %B : tensor<?xf32> {bufferization.writable = true})
26//  CHECK-SAME:              bufferization.access = "read"
27  -> (tensor<4xf32>, tensor<8xf32>)
28{
29  // tensor.extract_slice is not used in a write, it is not compelled to
30  // bufferize out of place. Let callers decide whether they want to create
31  // aliasing subviews at all call sites or whether they allocate.
32  // This is true irrespective of whether the function argument is inplaceable.
33  //     CHECK: tensor.extract_slice
34  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
35  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
36
37  //     CHECK: tensor.extract_slice
38  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
39  %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32>
40
41  return %r0, %r1: tensor<4xf32>, tensor<8xf32>
42}
43
44// -----
45
46// CHECK-LABEL: func @insert_slice_fun(
47func.func @insert_slice_fun(%A : tensor<?xf32> {bufferization.writable = false},
48//  CHECK-SAME:             bufferization.access = "read"
49                            %B : tensor<?xf32> {bufferization.writable = true},
50//  CHECK-SAME:             bufferization.access = "read-write"
51                            %C : tensor<4xf32> {bufferization.writable = false})
52//  CHECK-SAME:             bufferization.access = "read"
53  -> (tensor<?xf32>, tensor<?xf32>)
54{
55  // must bufferize out of place.
56  //      CHECK: tensor.insert_slice
57  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]}
58  %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
59
60  // bufferizes inplace.
61  //      CHECK: tensor.insert_slice
62  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
63  %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
64
65  //      CHECK: return
66  // CHECK-SAME: __equivalent_func_args__ = [-1, 1]
67  return %r0, %r1: tensor<?xf32>, tensor<?xf32>
68}
69
70// -----
71
72// CHECK-LABEL: func @conflict_on_B(
73func.func @conflict_on_B(%A : tensor<4x4xf32> {bufferization.writable = true},
74//  CHECK-SAME:          bufferization.access = "read"
75                         %B : tensor<4x4xf32> {bufferization.writable = true})
76//  CHECK-SAME:          bufferization.access = "read-write"
77  -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>)
78{
79  // matmul output operand interferes with input operand.
80  //     CHECK: linalg.matmul
81  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
82  %C = linalg.matmul  ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>)
83                     outs(%B: tensor<4x4xf32>)
84    -> tensor<4x4xf32>
85
86  // matmul output operand interferes with input operand.
87  //     CHECK: linalg.matmul
88  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
89  %D = linalg.matmul  ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>)
90                     outs(%B: tensor<4x4xf32>)
91    -> tensor<4x4xf32>
92
93  // matmul output operand does not interferes with input operand.
94  //     CHECK: linalg.matmul
95  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
96  %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
97                     outs(%B: tensor<4x4xf32>)
98    -> tensor<4x4xf32>
99
100  //      CHECK: return
101  // CHECK-SAME: __equivalent_func_args__ = [-1, -1, 1]
102  return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>
103}
104
105//===----------------------------------------------------------------------===//
106// Length-1 producer-consumer cases.
107//===----------------------------------------------------------------------===//
108
109// -----
110
111// CHECK-LABEL: func @extract_slice_extract_slice(
112func.func @extract_slice_extract_slice(
113    %A : tensor<?xf32> {bufferization.writable = true},
114//  CHECK-SAME:         bufferization.access = "read"
115    %B : tensor<?xf32> {bufferization.writable = false})
116//  CHECK-SAME:         bufferization.access = "read"
117  -> (tensor<2xf32>, tensor<2xf32>)
118{
119  // tensor.extract_slice is not used in a write, it is not compelled to
120  // bufferize out of place. Let callers decide whether they want to create
121  // aliasing subviews at all call sites or whether they allocate.
122  // This is true irrespective of whether the function argument is inplaceable.
123  // CHECK: {__inplace_operands_attr__ = ["true"]}
124  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
125
126  // CHECK: {__inplace_operands_attr__ = ["true"]}
127  %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>
128
129  // CHECK: {__inplace_operands_attr__ = ["true"]}
130  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
131
132  // CHECK: {__inplace_operands_attr__ = ["true"]}
133  %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>
134
135  return %r1, %r3: tensor<2xf32>, tensor<2xf32>
136}
137
138// -----
139
140// CHECK-LABEL: func @insert_slice_insert_slice(
141func.func @insert_slice_insert_slice(
142    %A : tensor<?xf32> {bufferization.writable = true},
143//  CHECK-SAME:         bufferization.access = "read-write"
144    %A2 : tensor<4xf32> {bufferization.writable = true},
145//  CHECK-SAME:          bufferization.access = "read-write"
146    %A3 : tensor<2xf32> {bufferization.writable = true},
147//  CHECK-SAME:          bufferization.access = "read"
148    %B : tensor<?xf32> {bufferization.writable = false},
149//  CHECK-SAME:         bufferization.access = "read"
150    %B2 : tensor<4xf32> {bufferization.writable = false},
151//  CHECK-SAME:          bufferization.access = "read"
152    %B3 : tensor<2xf32> {bufferization.writable = false})
153//  CHECK-SAME:          bufferization.access = "read"
154  -> (tensor<?xf32>, tensor<?xf32>)
155{
156  // CHECK: {__inplace_operands_attr__ = ["true", "true"]}
157  %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>
158
159  // CHECK: {__inplace_operands_attr__ = ["true", "true"]}
160  %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
161
162  // CHECK: {__inplace_operands_attr__ = ["true", "false"]}
163  %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>
164
165  // CHECK: {__inplace_operands_attr__ = ["true", "false"]}
166  %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
167
168  //      CHECK: return
169  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
170  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
171}
172
173// -----
174
175// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice
176func.func @extract_slice_nonmatching_insert_slice(
177    %A : tensor<?xf32> {bufferization.writable = true},
178    %B : tensor<?xf32> {bufferization.writable = false},
179    %idx: index)
180  -> (tensor<?xf32>, tensor<?xf32>)
181{
182  // %r1 bufferizes inplace because %A is inplaceable.
183  // %r0 is an overlapping tensor.extract_slice that does not match, it must be
184  // out of place.
185  //      CHECK: tensor.extract_slice
186  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
187  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
188
189  // %r1 can bufferize inplace fine.
190  //      CHECK: tensor.insert_slice
191  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
192  %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
193
194  // %r3 does bufferizes inplace because %B is not inplaceable.
195  // %r0 is an overlapping tensor.extract_slice that does not match, but does
196  // not alias with the buffer coming from %r3 so it can actually bufferize
197  // inplace.
198  //      CHECK: tensor.extract_slice
199  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
200  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
201
202  // %r3 cannot bufferize inplace since %B is not inplaceable.
203  //      CHECK: tensor.insert_slice
204  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]}
205  %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
206
207  //      CHECK: return
208  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
209  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
210}
211
212// -----
213
214// CHECK-LABEL: func @extract_slice_matching_insert_slice
215func.func @extract_slice_matching_insert_slice(
216    %A : tensor<?xf32> {bufferization.writable = true},
217    %B : tensor<?xf32> {bufferization.writable = false})
218  -> (tensor<?xf32>, tensor<?xf32>)
219{
220  // %r1 bufferizes inplace because %A is inplaceable.
221  // %r0 is a tensor.extract_slice that matches, it can also be bufferized
222  // inplace.
223  //      CHECK: tensor.extract_slice
224  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
225  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
226
227  //      CHECK: tensor.insert_slice
228  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
229  %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
230
231  // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized
232  // inplace.
233  //      CHECK: tensor.extract_slice
234  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
235  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
236
237  // tensor.insert_slice cannot bufferize inplace.
238  // This should have been captured by a canonicalization pattern and it would
239  // be unproductive to have special logic in bufferization to encode matching
240  // insert_slice(extract_slice(A), A).
241  //      CHECK: tensor.insert_slice
242  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]}
243  %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
244
245  //      CHECK: return
246  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
247  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
248}
249
250// -----
251
252// CHECK-LABEL: @read_of_matching_insert_slice_source
253func.func @read_of_matching_insert_slice_source(
254    %A : tensor<?xf32> {bufferization.writable = true},
255    %idx : index,
256    %idx2 : index)
257  -> (tensor<?xf32>, vector<5xf32>)
258{
259  %cst = arith.constant 0.0 : f32
260  %cst2 = arith.constant 1.0 : f32
261
262  //      CHECK: tensor.extract_slice
263  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
264  %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
265
266  //      CHECK: linalg.fill
267  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
268  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
269
270  //      CHECK: tensor.insert_slice
271  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
272  %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
273
274  %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
275
276  //      CHECK: return
277  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
278  return %2, %3 : tensor<?xf32>, vector<5xf32>
279}
280
281// -----
282
283// CHECK-LABEL: @read_of_matching_insert_slice_source_interleaved
284func.func @read_of_matching_insert_slice_source_interleaved(
285    %A : tensor<?xf32> {bufferization.writable = true},
286    %idx : index,
287    %idx2 : index,
288    %idx3 : index)
289  -> (tensor<?xf32>, vector<5xf32>)
290{
291  %cst = arith.constant 0.0 : f32
292  %cst2 = arith.constant 1.0 : f32
293
294  //      CHECK: tensor.extract_slice
295  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
296  %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
297
298  //      CHECK: linalg.fill
299  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
300  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
301
302  //      CHECK: tensor.insert_slice
303  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
304  %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
305
306  //      CHECK: tensor.extract_slice
307  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
308  %4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor<?xf32> to tensor<?xf32>
309
310  //      CHECK: linalg.fill
311  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
312  %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?xf32>) -> tensor<?xf32>
313
314  %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
315
316  //      CHECK: tensor.insert_slice
317  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
318  %6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32>
319
320  //      CHECK: return
321  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
322  return %6, %3 : tensor<?xf32>, vector<5xf32>
323}
324
325// -----
326
327// CHECK-LABEL: func @extract_slice_linalg_readonly_use
328func.func @extract_slice_linalg_readonly_use(
329    %A : tensor<?x?xf32> {bufferization.writable = false},
330    %B : tensor<4x4xf32> {bufferization.writable = false},
331    %C : tensor<4x4xf32> {bufferization.writable = true})
332  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
333{
334  // tensor.extract_slice is only used as a read, no interference irrespective
335  // of user's inplace status.
336  //     CHECK: tensor.extract_slice
337  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
338  %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
339
340  // matmul output operand is not inplaceable at the function boundary.
341  //     CHECK: linalg.matmul
342  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
343  %D = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
344                     outs(%B: tensor<4x4xf32>)
345    -> tensor<4x4xf32>
346
347  // matmul output operand is inplaceable at the function boundary.
348  //     CHECK: linalg.matmul
349  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
350  %E = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
351                     outs(%C: tensor<4x4xf32>)
352    -> tensor<4x4xf32>
353
354  //      CHECK: return
355  // CHECK-SAME: __equivalent_func_args__ = [-1, 2]
356  return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
357}
358
359// -----
360
361// CHECK-LABEL: func @extract_slice_to_linalg_write_use
362func.func @extract_slice_to_linalg_write_use(
363    %A : tensor<4x4xf32> {bufferization.writable = false},
364    %B : tensor<?x?xf32> {bufferization.writable = false},
365    %C : tensor<?x?xf32> {bufferization.writable = true})
366  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
367{
368  // Step 4. %sB forward propagates to a write in %D but it is not inplace.
369  // So this is only ever read and can bufferize inplace.
370  //     CHECK: tensor.extract_slice
371  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
372  %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
373
374  // Step 3. %sB has a read interference in %E, it does not bufferize inplace.
375  //     CHECK: linalg.matmul
376  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
377  %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
378                     outs(%sB: tensor<4x4xf32>)
379    -> tensor<4x4xf32>
380
381  // Step 2. %sC forward propagates to an inplace write in %E.
382  // %sC backward propagates to %C which is inplaceable.
383  // As a consequence this is bufferized inplace.
384  //     CHECK: tensor.extract_slice
385  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
386  %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
387
388  // Step 1. %sC backprops to the tensor.extract_slice producer which is not
389  // considered an interference. This bufferizes inplace.
390  //     CHECK: linalg.matmul
391  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
392  %E = linalg.matmul  ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
393                     outs(%sC: tensor<4x4xf32>)
394    -> tensor<4x4xf32>
395
396  return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
397}
398
399// -----
400
401// CHECK-LABEL: func @insert_slice_double_extract_slice
402func.func @insert_slice_double_extract_slice(
403    %s1: index,
404    %s2: index,
405    %s3: index,
406    %s4: index,
407    %A: tensor<8x6xf32> {bufferization.writable = false},
408    %B: tensor<6x6xf32> {bufferization.writable = false},
409    %C: tensor<30x20xf32> {bufferization.writable = true})
410  -> tensor<30x20xf32>
411{
412  //      CHECK: tensor.extract_slice
413  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none", "none", "none"]}
414  %15 = tensor.extract_slice %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<30x20xf32> to tensor<?x?xf32>
415
416  //      CHECK: linalg.matmul
417  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
418  %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
419
420  //      CHECK: tensor.extract_slice
421  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
422  %19 = tensor.extract_slice %18[0, 0] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
423
424  //      CHECK: tensor.insert_slice
425  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none", "none", "none"]}
426  %20 = tensor.insert_slice %19 into %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<?x?xf32> into tensor<30x20xf32>
427
428  //      CHECK: return
429  // CHECK-SAME: __equivalent_func_args__ = [6]
430  return %20 : tensor<30x20xf32>
431}
432
433//===----------------------------------------------------------------------===//
434// Transitive cases
435//===----------------------------------------------------------------------===//
436
437// -----
438
439// CHECK-LABEL: func @extract_slice_to_linalg_write_use
440func.func @extract_slice_to_linalg_write_use(
441    %A : tensor<4x4xf32> {bufferization.writable = false},
442    %B : tensor<?x?xf32> {bufferization.writable = false},
443    %C : tensor<?x?xf32> {bufferization.writable = true})
444  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
445{
446  // Step 4. %sB forward propagates to an inplace write in %D.
447  // %sB backward propagates to %B which is not inplaceable.
448  // As a consequence this is bufferized out of place.
449  //     CHECK: tensor.extract_slice
450  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
451  %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
452
453  // Step 3. %sB backprops to the tensor.extract_slice producer which is not
454  // considered an interference. This bufferizes inplace.
455  //     CHECK: linalg.matmul
456  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
457  %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
458                     outs(%sB: tensor<4x4xf32>)
459    -> tensor<4x4xf32>
460
461  // Step 2. %sC forward propagates to an inplace write in %E.
462  // %sC backward propagates to %C which is inplaceable.
463  // As a consequence this is bufferized inplace.
464  //     CHECK: tensor.extract_slice
465  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
466  %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
467
468  // Step 1. %sC backprops to the tensor.extract_slice producer which is not
469  // considered an interference. This bufferizes inplace.
470  //     CHECK: linalg.matmul
471  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
472  %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
473                     outs(%sC: tensor<4x4xf32>)
474    -> tensor<4x4xf32>
475
476  return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
477}
478
479// -----
480
481// CHECK-LABEL: func @nested_extract_slice_and_insert
482func.func @nested_extract_slice_and_insert(
483    %A : tensor<?x?xf32> {bufferization.writable = false},
484    %B : tensor<?x?xf32> {bufferization.writable = true},
485    %C : tensor<?x?xf32> {bufferization.writable = true},
486    %idx : index,
487    %sz1 : index,
488    %sz2 : index)
489  ->  (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
490{
491  %f0 = arith.constant 0.0 : f32
492
493  // 2-level matching tensor.extract_slice / tensor.insert_slice into non
494  // inplaceable %A.
495  //   - %rA is not inplaceable because %A is not inplaceable at function boundary.
496  //   - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable
497  //   - this propagates to %FA and %ssA being inplaceable.
498  //   - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not
499  //     inplaceable and so %sA is not inplaceable.
500  //     CHECK: tensor.extract_slice
501  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
502  // CHECK-NEXT: tensor.extract_slice
503  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
504  // CHECK-NEXT: fill
505  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
506  // CHECK-NEXT: tensor.insert_slice
507  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
508  // CHECK-NEXT: tensor.insert_slice
509  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]}
510  %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
511  %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
512  %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
513  %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
514  %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
515
516  // 3-level matching tensor.extract_slice / tensor.insert_slice into
517  // inplaceable %B.
518  // CHECK-NEXT: tensor.extract_slice
519  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
520  // CHECK-NEXT: tensor.extract_slice
521  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
522  // CHECK-NEXT: tensor.extract_slice
523  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
524  // CHECK-NEXT: fill
525  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
526  // CHECK-NEXT: tensor.insert_slice
527  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
528  // CHECK-NEXT: tensor.insert_slice
529  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
530  // CHECK-NEXT: tensor.insert_slice
531  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
532  %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
533  %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
534  %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
535  %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32>
536  %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
537  %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
538  %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
539
540  // 2-level matching tensor.extract_slice / tensor.insert_slice into
541  // inplaceable %C with a twist.
542  // Throw a wrench in the system: %rsC production sizes do not match %ssC.
543  // CHECK-NEXT: tensor.extract_slice
544  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
545  // The tensor.insert_slice that would be candidate for matching does not actually
546  // match. That tensor.insert_slice can still be bufferized inplace nonetheless
547  // but this tensor.extract_slice, which bufferizes to an inplace write, cannot.
548  // CHECK-NEXT: tensor.extract_slice
549  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]}
550  // CHECK-NEXT: fill
551  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
552  // CHECK-NEXT: tensor.insert_slice
553  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
554  // CHECK-NEXT: tensor.insert_slice
555  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
556  %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
557  %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
558  %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32>
559  %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor<?x4xf32> into tensor<?x?xf32>
560  %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
561
562  //      CHECK: return
563  // CHECK-SAME: __equivalent_func_args__ = [-1, 1, 2]
564  return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
565}
566
567// -----
568
569//===----------------------------------------------------------------------===//
570// Cross function boundary cases.
571//===----------------------------------------------------------------------===//
572
573func.func private @foo(tensor<64xf32>)
574
575// CHECK-LABEL: dependence_through_call
576func.func @dependence_through_call(%I : tensor<64xf32> {bufferization.writable = true}) {
577  %f1 = arith.constant 1.000000e+00 : f32
578  %f2 = arith.constant 2.000000e+00 : f32
579
580  // 2. %B already bufferizes inplace, %A would alias and have a different
581  // value. The calls to `foo` are determined to read conservatively, so %A
582  // cannot bufferize inplace.
583  //     CHECK: fill
584  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
585  %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
586
587  // 1. Bufferizes inplace: no alias to %A is yet possible.
588  //     CHECK: fill
589  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
590  %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
591
592  call @foo(%A) : (tensor<64xf32>) -> ()
593  call @foo(%B) : (tensor<64xf32>) -> ()
594
595  return
596}
597
598// -----
599
600func.func private @foo(tensor<64xf32>)
601
602func.func private @bar(%A : tensor<64xf32>) {
603  call @foo(%A) : (tensor<64xf32>) -> ()
604  return
605}
606
607func.func @read_dependence_through_scf_and_call(
608    %I : tensor<64xf32> {bufferization.writable = true},
609    %I2 : tensor<64xf32> {bufferization.writable = true}) {
610  %c0 = arith.constant 0 : index
611  %c1 = arith.constant 1 : index
612  %c10 = arith.constant 10 : index
613  %f1 = arith.constant 1.000000e+00 : f32
614  %f2 = arith.constant 2.000000e+00 : f32
615
616  // 5. %B bufferizes inplace, %A would alias and have a different value.
617  // The calls to `foo` are determined to read conservatively, so %A cannot
618  // bufferize inplace.
619  //     CHECK: fill
620  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
621  %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
622
623  // 4. Bufferizes inplace: no alias to %A is yet possible.
624  //     CHECK: fill
625  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
626  %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
627
628  // 3. Does not read or write, bufferizes inplace.
629  //      CHECK: scf.for
630  // CHECK-NEXT: scf.yield
631  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
632  //      CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "true"]}
633  %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B)
634    -> (tensor<64xf32>, tensor<64xf32>)
635  {
636    scf.yield %0, %1 : tensor<64xf32>, tensor<64xf32>
637  }
638  call @foo(%r#0) : (tensor<64xf32>) -> ()
639  call @foo(%r#1) : (tensor<64xf32>) -> ()
640
641  // 2. %B2 already bufferizes inplace, %A2 would alias and have a different
642  // value. The calls to `foo` are determined to read conservatively, so %A2
643  // cannot bufferize inplace.
644  //     CHECK: fill
645  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
646  %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
647
648  // 1. Bufferizes inplace: no alias to %A2 is yet possible.
649  //     CHECK: fill
650  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
651  %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
652
653  call @bar(%A2) : (tensor<64xf32>) -> ()
654  call @bar(%B2) : (tensor<64xf32>) -> ()
655  return
656}
657
658// -----
659
660//===----------------------------------------------------------------------===//
661// Transitive cases through extract_slice.
662//===----------------------------------------------------------------------===//
663
664// CHECK-LABEL: func @write_into_constant_via_alias
665func.func @write_into_constant_via_alias(%v : vector<5xi32>,
666                                    %s1 : index, %s2 : index,
667                                    %s3 : index) -> tensor<?xi32> {
668  %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
669  //      CHECK: tensor.extract_slice
670  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
671  %b = tensor.extract_slice %A[%s1][%s2][1] : tensor<4xi32> to tensor<?xi32>
672  //      CHECK: vector.transfer_write
673  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
674  %r = vector.transfer_write %v, %b[%s3] : vector<5xi32>, tensor<?xi32>
675  return %r : tensor<?xi32>
676}
677
678// -----
679
680func.func @matmul_on_tensors(
681    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
682    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
683    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
684    -> tensor<256x256xf32>
685{
686  %c0 = arith.constant 0 : index
687  %cst_0 = arith.constant 0.000000e+00 : f32
688  %cst_1 = arith.constant 1.000000e+00 : f32
689
690  %7 = bufferization.alloc_tensor() : tensor<256x256xf32>
691
692  //      CHECK: linalg.fill
693  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
694  //      CHECK: linalg.fill
695  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
696  %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
697  %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
698
699  //      CHECK: tensor.extract_slice
700  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
701  //      CHECK: tensor.extract_slice
702  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
703  //      CHECK: linalg.matmul
704  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
705  %sA = tensor.extract_slice %8[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32>
706  %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
707  %r = linalg.matmul
708         ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
709        outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
710
711  //      CHECK: return
712  // CHECK-SAME: __equivalent_func_args__ = [2]
713  return %r : tensor<256x256xf32>
714}
715
716// -----
717
718func.func @matmul_on_tensors(
719    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
720    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
721    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
722    -> tensor<256x256xf32>
723{
724  %c0 = arith.constant 0 : index
725  %cst_0 = arith.constant 0.000000e+00 : f32
726  %cst_1 = arith.constant 1.000000e+00 : f32
727
728  %7 = bufferization.alloc_tensor() : tensor<256x256xf32>
729
730  //     CHECK: linalg.fill
731  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
732  //      CHECK: vector.transfer_write
733  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
734  %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
735  %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
736  %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
737
738  //      CHECK: linalg.fill
739  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
740  //      CHECK: vector.transfer_write
741  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
742  %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
743  %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
744  %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
745
746  //      CHECK: tensor.extract_slice
747  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
748  //      CHECK: tensor.extract_slice
749  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
750  //      CHECK: linalg.matmul
751  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
752  %sA = tensor.extract_slice %10[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32>
753  %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
754  %r = linalg.matmul
755         ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
756        outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
757
758  //      CHECK: return
759  // CHECK-SAME: __equivalent_func_args__ = [2]
760  return %r : tensor<256x256xf32>
761}
762
763// -----
764
765//===----------------------------------------------------------------------===//
766// Chain of tensor.insert_slice is better traversed in reverse order without
767// prioritizing  the tensor.insert_slice ops.
768//===----------------------------------------------------------------------===//
769
770// CHECK-LABEL: func @insert_slice_chain(
771func.func @insert_slice_chain(
772    %v1: vector<32x90xf32>,
773    %v2: vector<30x90xf32>,
774    %arg0: tensor<62x126xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
775// CHECK-SAME: bufferization.access = "none"
776    %arg1: tensor<126x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
777// CHECK-SAME: bufferization.access = "none"
778    %arg2: tensor<62x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
779// CHECK-SAME: bufferization.access = "write"
780  -> tensor<62x90xf32> attributes {passthrough = [["prefer-vector-width", "512"]], target_cpu = "skylake-avx512"}
781{
782  %c0 = arith.constant 0 : index
783  %cst = arith.constant 0.000000e+00 : f32
784
785  //      CHECK: linalg.fill
786  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
787  %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32>
788
789  //      CHECK: tensor.extract_slice
790  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
791  %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
792  //      CHECK: vector.transfer_write
793  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
794  %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
795  //      CHECK: tensor.insert_slice
796  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
797  %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
798
799  //      CHECK: tensor.extract_slice
800  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
801  %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
802  //      CHECK: vector.transfer_write
803  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
804  %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
805  //      CHECK: tensor.insert_slice
806  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
807  %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
808
809  //      CHECK: return
810  // CHECK-SAME: __equivalent_func_args__ = [4]
811  return %15 : tensor<62x90xf32>
812}
813
814// -----
815
816//===----------------------------------------------------------------------===//
817// Insert point issue cases.
818//===----------------------------------------------------------------------===//
819
820// Only test IR validity wrt dominance.
821// CHECK-LABEL: func @ip
822func.func @ip(%t: tensor<10x20xf32> {bufferization.writable = true},
823         %x: index, %y: index, %v: vector<5x6xf32>)
824  -> tensor<10x20xf32>
825{
826  %c0 = arith.constant 0 : index
827  %c256 = arith.constant 256 : index
828  %c257 = arith.constant 257 : index
829  %r = scf.for %arg0 = %c0 to %c257 step %c256 iter_args(%arg1 = %t) -> (tensor<10x20xf32>) {
830    %t1 = tensor.extract_slice %arg1[%x, 0] [5, %y] [1, 1] : tensor<10x20xf32> to tensor<5x?xf32>
831    %t11 = tensor.extract_slice %t1[0, 0] [5, %y] [1, 1] : tensor<5x?xf32> to tensor<5x?xf32>
832    %t2 = vector.transfer_write %v, %t11[%c0, %c0] : vector<5x6xf32>, tensor<5x?xf32>
833    %t3 = tensor.insert_slice %t2 into %arg1[%x, 0] [5, %y] [1, 1] : tensor<5x?xf32> into tensor<10x20xf32>
834    scf.yield %t3 : tensor<10x20xf32>
835  }
836
837  //      CHECK: return
838  // CHECK-SAME: __equivalent_func_args__ = [0]
839 return %r : tensor<10x20xf32>
840}
841
842// -----
843
844#accesses = [
845  affine_map<(i) -> (i)>,
846  affine_map<(i) -> (i)>,
847  affine_map<(i) -> (i)>
848]
849#trait = {
850  indexing_maps = #accesses,
851  iterator_types = ["parallel"]
852}
853
854// CHECK-LABEL: func @linalg_op_same_out_tensors(
855func.func @linalg_op_same_out_tensors(
856    %t1: tensor<?xf32> {bufferization.writable = true},
857// CHECK-SAME:          bufferization.access = "read"
858    %t2: tensor<?xf32> {bufferization.writable = true})
859// CHECK-SAME:          bufferization.access = "write"
860  -> (tensor<?xf32>, tensor<?xf32>){
861
862  //      CHECK: linalg.generic
863  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]
864  %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
865                               outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
866      ^bb(%0: f32, %1: f32, %2 : f32) :
867        linalg.yield %0, %0 : f32, f32
868    } -> (tensor<?xf32>, tensor<?xf32>)
869
870  //      CHECK: return
871  // CHECK-SAME: __equivalent_func_args__ = [1, -1]
872  return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32>
873}
874
875// -----
876
877#accesses = [
878  affine_map<(i) -> (i)>,
879  affine_map<(i) -> (i)>,
880  affine_map<(i) -> (i)>,
881  affine_map<(i) -> (i)>
882]
883#trait = {
884  indexing_maps = #accesses,
885  iterator_types = ["parallel"]
886}
887
888// CHECK-LABEL: func @linalg_op_same_out_tensors_2(
889func.func @linalg_op_same_out_tensors_2(
890    %t1: tensor<?xf32> {bufferization.writable = true},
891// CHECK-SAME:          bufferization.access = "read"
892    %t2: tensor<?xf32> {bufferization.writable = true})
893// CHECK-SAME:          bufferization.access = "write"
894        -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
895
896  //      CHECK: linalg.generic
897  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false", "false"]
898  %o:3 = linalg.generic #trait
899          ins(%t1 : tensor<?xf32>)
900          outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
901      ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) :
902        linalg.yield %0, %0, %0 : f32, f32, f32
903    } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
904
905  //      CHECK: return
906  // CHECK-SAME: __equivalent_func_args__ = [1, -1, -1]
907  return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
908}
909
910// -----
911
912// CHECK-LABEL: func @double_insert_slice_into_alias
913func.func @double_insert_slice_into_alias(
914    %v1: vector<32x90xf32>,
915    %v2: vector<30x90xf32>,
916    %arg2: tensor<62x90xf32> {bufferization.writable = true},
917    %s1: index, %s2: index, %s3: index, %s4: index)
918  -> (tensor<62x90xf32>, tensor<?x?xf32>)
919{
920  %c0 = arith.constant 0 : index
921
922  // Cannot bufferize inplace this extract_slice because both operand and result
923  // are modified and returned separately.
924  //      CHECK: tensor.extract_slice
925  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none", "none", "none"]
926  %e = tensor.extract_slice %arg2[%s1, %s2][%s3, %s4][1, 1] : tensor<62x90xf32> to tensor<?x?xf32>
927
928  //      CHECK: tensor.extract_slice
929  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
930  %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
931  //      CHECK: vector.transfer_write
932  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
933  %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
934  //      CHECK: tensor.insert_slice
935  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
936  %8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
937
938  //      CHECK: tensor.extract_slice
939  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
940  %10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor<?x?xf32> to tensor<30x90xf32>
941  //      CHECK: vector.transfer_write
942  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
943  %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
944  //      CHECK: tensor.insert_slice
945  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
946  %15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<?x?xf32>
947
948  //      CHECK: return
949  // CHECK-SAME: __equivalent_func_args__ = [2, -1]
950  return %8, %15 : tensor<62x90xf32>, tensor<?x?xf32>
951}
952
953// -----
954
955// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_1
956func.func @interleaved_extract_insert_slice_chain_1(
957    %arg2: tensor<62x90xf32> {bufferization.writable = true})
958  -> (tensor<62x90xf32>)
959{
960  //      CHECK: tensor.extract_slice
961  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
962  %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
963
964  // TODO: This should bufferize inplace once we have a proper range analysis.
965  //      CHECK: tensor.extract_slice
966  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]
967  %10 = tensor.extract_slice %arg2[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
968
969
970  //      CHECK: tensor.insert_slice
971  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
972  %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
973
974
975  //      CHECK: tensor.insert_slice
976  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
977  %15 = tensor.insert_slice %10 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
978
979  //      CHECK: return
980  // CHECK-SAME: __equivalent_func_args__ = [0]
981  return %15 : tensor<62x90xf32>
982}
983
984// -----
985
986// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_2
987func.func @interleaved_extract_insert_slice_chain_2(
988    %arg2: tensor<62x90xf32> {bufferization.writable = true})
989  -> (tensor<62x90xf32>)
990{
991  //      CHECK: tensor.extract_slice
992  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
993  %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
994
995  // The slices are overlapping, so this can never bufferize inplace.
996  //      CHECK: tensor.extract_slice
997  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]
998  %10 = tensor.extract_slice %arg2[31, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
999
1000
1001  //      CHECK: tensor.insert_slice
1002  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
1003  %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
1004
1005
1006  //      CHECK: tensor.insert_slice
1007  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
1008  %15 = tensor.insert_slice %10 into %8[31, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
1009
1010  //      CHECK: return
1011  // CHECK-SAME: __equivalent_func_args__ = [0]
1012  return %15 : tensor<62x90xf32>
1013}
1014
1015// -----
1016
1017// CHECK-LABEL: func @extract_once_insert_twice
1018func.func @extract_once_insert_twice(
1019    %arg2: tensor<62x90xf32> {bufferization.writable = true})
1020  -> (tensor<62x90xf32>)
1021{
1022  //      CHECK: tensor.extract_slice
1023  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]
1024  %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
1025
1026  //      CHECK: tensor.insert_slice
1027  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
1028  %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
1029
1030  //      CHECK: tensor.insert_slice
1031  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
1032  %15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
1033
1034  //      CHECK: return
1035  // CHECK-SAME: __equivalent_func_args__ = [0]
1036  return %15 : tensor<62x90xf32>
1037}
1038
1039// -----
1040
1041// CHECK-LABEL: func @some_use
1042func.func @some_use(%A : tensor<?xf32> {bufferization.writable = true},
1043                    %v : vector<5xf32>) -> (tensor<?xf32>) {
1044  %idx = arith.constant 0 : index
1045  //      CHECK: vector.transfer_write
1046  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
1047  %0 = vector.transfer_write %v, %A[%idx] : vector<5xf32>, tensor<?xf32>
1048  return %0 : tensor<?xf32>
1049}
1050
1051
1052// CHECK-LABEL: func @main_func
1053func.func @main_func(%A : tensor<?xf32> {bufferization.writable = true},
1054                     %v : vector<5xf32>) -> (tensor<?xf32>) {
1055  //      CHECK: call
1056  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
1057  %0 = call @some_use(%A, %v) : (tensor<?xf32>, vector<5xf32>) -> (tensor<?xf32>)
1058  return %0 : tensor<?xf32>
1059}
1060
1061// -----
1062
1063// CHECK-LABEL: func @to_tensor_op_not_writable
1064func.func @to_tensor_op_not_writable(%m: memref<?xf32>, %v:  vector<5xf32>,
1065                                     %idx1: index, %idx2: index)
1066    -> vector<10xf32> {
1067  %0 = bufferization.to_tensor %m restrict : memref<?xf32> to tensor<?xf32>
1068
1069  // Write to the tensor. Cannot be inplace due to tensor_load.
1070  //      CHECK: vector.transfer_write
1071  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
1072  %w = vector.transfer_write %v, %0[%idx1] : vector<5xf32>, tensor<?xf32>
1073
1074  // Read from the tensor and return result.
1075  %cst = arith.constant 0.0 : f32
1076  %r = vector.transfer_read %w[%idx2], %cst : tensor<?xf32>, vector<10xf32>
1077  return %r : vector<10xf32>
1078}
1079
1080// -----
1081
1082// CHECK-LABEL: func @inner_func
1083func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
1084  //      CHECK: return
1085  // CHECK-SAME: __equivalent_func_args__ = [0]
1086  return %t : tensor<?xf32>
1087}
1088
1089func.func @equivalent_func_arg(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> {
1090  // This test does not check IR. It just asserts there is no failure due to
1091  // non-equivalent scf.for yield values.
1092  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
1093    %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32>
1094    scf.yield %3 : tensor<?xf32>
1095  }
1096  return %1: tensor<?xf32>
1097}
1098
1099// -----
1100
1101// CHECK-LABEL: func @inner_func_2
1102func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> {
1103  %f = arith.constant 1.0 : f32
1104  %c0 = arith.constant 0 : index
1105  %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
1106  //      CHECK: return
1107  // CHECK-SAME: __equivalent_func_args__ = [0]
1108  return %0 : tensor<?xf32>
1109}
1110
1111func.func @equivalent_func_arg_2(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> {
1112  // This test does not check IR. It just asserts there is no failure due to
1113  // non-equivalent scf.for yield values.
1114  %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
1115    %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32>
1116    scf.yield %3 : tensor<?xf32>
1117  }
1118  return %1: tensor<?xf32>
1119}
1120
1121// -----
1122
1123// CHECK-LABEL: func @write_after_select_read_one
1124//  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
1125func.func @write_after_select_read_one(
1126    %t1 : tensor<?xf32> {bufferization.writable = true},
1127    %t2 : tensor<?xf32> {bufferization.writable = true},
1128    %c : i1)
1129  -> (f32, tensor<?xf32>)
1130{
1131  %cst = arith.constant 0.0 : f32
1132  %idx = arith.constant 0 : index
1133
1134  //      CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]]
1135  // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "false", "true"]}
1136  %s = arith.select %c, %t1, %t2 : tensor<?xf32>
1137  //      CHECK: tensor.insert
1138  // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "true", "none"]}
1139  %w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
1140  //      CHECK: tensor.extract
1141  // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "none"]}
1142  %f = tensor.extract %t1[%idx] : tensor<?xf32>
1143
1144  return %f, %w : f32, tensor<?xf32>
1145}
1146
1147// -----
1148
1149// CHECK-LABEL: func @write_after_select_read_both
1150//  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
1151func.func @write_after_select_read_both(
1152    %t1 : tensor<?xf32> {bufferization.writable = true},
1153    %t2 : tensor<?xf32> {bufferization.writable = true},
1154    %c : i1)
1155  -> (f32, f32, tensor<?xf32>)
1156{
1157  %cst = arith.constant 0.0 : f32
1158  %idx = arith.constant 0 : index
1159
1160  //      CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]]
1161  // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "false", "false"]}
1162  %s = arith.select %c, %t1, %t2 : tensor<?xf32>
1163  //      CHECK: tensor.insert
1164  // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "true", "none"]}
1165  %w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
1166  //      CHECK: tensor.extract
1167  // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "none"]}
1168  %f = tensor.extract %t1[%idx] : tensor<?xf32>
1169  //      CHECK: tensor.extract
1170  // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "none"]}
1171  %f2 = tensor.extract %t2[%idx] : tensor<?xf32>
1172
1173  return %f, %f2, %w : f32, f32, tensor<?xf32>
1174}
1175
1176// -----
1177
1178// CHECK-LABEL: func @write_after_select_no_conflict
1179//  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
1180func.func @write_after_select_no_conflict(
1181    %t1 : tensor<?xf32> {bufferization.writable = true},
1182    %t2 : tensor<?xf32> {bufferization.writable = true},
1183    %c : i1)
1184  -> (f32, tensor<?xf32>)
1185{
1186  %cst = arith.constant 0.0 : f32
1187  %idx = arith.constant 0 : index
1188
1189  //      CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]]
1190  // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "true", "true"]}
1191  %s = arith.select %c, %t1, %t2 : tensor<?xf32>
1192  //      CHECK: tensor.insert
1193  // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "true", "none"]}
1194  %w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
1195  //      CHECK: tensor.extract
1196  // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "none"]}
1197  %f = tensor.extract %w[%idx] : tensor<?xf32>
1198
1199  return %f, %w : f32, tensor<?xf32>
1200}
1201
1202// -----
1203
1204// CHECK-LABEL: func @write_to_same_tensor_in_loop_out_of_place(
1205func.func @write_to_same_tensor_in_loop_out_of_place(
1206    %A : tensor<?xf32> {bufferization.writable = true},
1207    %B : tensor<?xf32> {bufferization.writable = true},
1208    %lb : index, %ub : index, %step : index, %sz: index)
1209  -> (tensor<?xf32>)
1210{
1211  // CHECK: scf.for {{.*}} {
1212  %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
1213    %i2 = arith.index_cast %i : index to i32
1214    %i3 = arith.sitofp %i2 : i32 to f32
1215    // The tensor.insert is out-of-place because the %B is written multiple
1216    // times inside a loop.
1217    //      CHECK: tensor.insert
1218    // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "false", "none"]}
1219    %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
1220    //      CHECK: tensor.insert_slice
1221    // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
1222    %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
1223    scf.yield %A2 : tensor<?xf32>
1224  }
1225  // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
1226
1227  return %r0 : tensor<?xf32>
1228}
1229
1230// -----
1231
1232// CHECK-LABEL: func @write_to_same_alloc_tensor_in_place(
1233func.func @write_to_same_alloc_tensor_in_place(
1234    %A : tensor<?xf32> {bufferization.writable = true},
1235    %lb : index, %ub : index, %step : index, %sz: index, %sz2: index)
1236  -> (tensor<?xf32>)
1237{
1238  %B = bufferization.alloc_tensor(%sz2) : tensor<?xf32>
1239
1240  // CHECK: scf.for {{.*}} {
1241  %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
1242    %i2 = arith.index_cast %i : index to i32
1243    %i3 = arith.sitofp %i2 : i32 to f32
1244    // %B is written multiple times inside a loop, but it is an alloc_tensor.
1245    //      CHECK: tensor.insert
1246    // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "true", "none"]}
1247    %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
1248    //      CHECK: tensor.insert_slice
1249    // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
1250    %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
1251    scf.yield %A2 : tensor<?xf32>
1252  }
1253  // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
1254
1255  return %r0 : tensor<?xf32>
1256}
1257
1258// -----
1259
1260// CHECK-LABEL: func @write_to_same_alloc_tensor_out_of_place(
1261func.func @write_to_same_alloc_tensor_out_of_place(
1262    %A : tensor<?xf32> {bufferization.writable = true},
1263    %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32)
1264  -> (tensor<?xf32>)
1265{
1266  %B = bufferization.alloc_tensor(%sz2) : tensor<?xf32>
1267  %C = tensor.insert %f into %B[%lb] : tensor<?xf32>
1268
1269  // CHECK: scf.for {{.*}} {
1270  %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
1271    %i2 = arith.index_cast %i : index to i32
1272    %i3 = arith.sitofp %i2 : i32 to f32
1273    // %C is written multiple times inside a loop. Even though %C aliases with
1274    // an alloc_tensor, out-of-bounds bufferization is necessary because there
1275    // is another alias (%C) outside of the loop.
1276    //      CHECK: tensor.insert
1277    // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "false", "none"]}
1278    %B2 = tensor.insert %i3 into %C[%i] : tensor<?xf32>
1279    //      CHECK: tensor.insert_slice
1280    // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
1281    %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
1282    scf.yield %A2 : tensor<?xf32>
1283  }
1284  // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
1285
1286  return %r0 : tensor<?xf32>
1287}
1288
1289// -----
1290
1291// CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"})
1292func.func private @ext_func(%t: tensor<?xf32>)
1293
1294// CHECK: func.func @private_func_read_write(%{{.*}}: tensor<5xf32> {bufferization.access = "read"})
1295func.func @private_func_read_write(%t: tensor<5xf32>) -> f32 {
1296  %c0 = arith.constant 0 : index
1297  // Bufferizes out-of-place because `ext_func` may modify the buffer.
1298  // CHECK: tensor.cast {{.*}} {__inplace_operands_attr__ = ["false"]}
1299  %0 = tensor.cast %t : tensor<5xf32> to tensor<?xf32>
1300  func.call @ext_func(%0) : (tensor<?xf32>) -> ()
1301  %1 = tensor.extract %t[%c0] : tensor<5xf32>
1302  return %1 : f32
1303}
1304
1305// -----
1306
1307// CHECK-LABEL: func.func private @print_buffer(tensor<*xf32> {bufferization.access = "read"})
1308func.func private @print_buffer(%t: tensor<*xf32> {bufferization.access = "read"})
1309
1310// CHECK: func.func @private_func_read(%{{.*}}: tensor<5xf32> {bufferization.access = "read"})
1311func.func @private_func_read(%t: tensor<5xf32>) -> f32 {
1312  %c0 = arith.constant 0 : index
1313  // Bufferizes in-place because `print_buffer` is read-only.
1314  // CHECK: tensor.cast {{.*}} {__inplace_operands_attr__ = ["true"]}
1315  %0 = tensor.cast %t : tensor<5xf32> to tensor<*xf32>
1316  // CHECK: call @print_buffer(%cast) {__inplace_operands_attr__ = ["true"]}
1317  func.call @print_buffer(%0) : (tensor<*xf32>) -> ()
1318  %1 = tensor.extract %t[%c0] : tensor<5xf32>
1319  return %1 : f32
1320}
1321
1322// -----
1323
1324// CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"}, tensor<?xf32> {bufferization.access = "read-write"})
1325func.func private @ext_func(%t1: tensor<?xf32>, %t2: tensor<?xf32>)
1326
1327// CHECK: func.func @private_func_two_params_writing(%{{.*}}: tensor<?xf32> {bufferization.access = "read"})
1328func.func @private_func_two_params_writing(%t: tensor<?xf32>) {
1329  // Both operands bufferize out-of-place because both bufferize to a memory
1330  // write.
1331  // CHECK: call @ext_func(%{{.*}}, %{{.*}}) {__inplace_operands_attr__ = ["false", "false"]}
1332  func.call @ext_func(%t, %t) : (tensor<?xf32>, tensor<?xf32>) -> ()
1333  return
1334}
1335
1336// -----
1337
1338// CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"}) -> (tensor<5xf32>, tensor<6xf32>)
1339func.func private @ext_func(%t: tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>)
1340
1341// CHECK: func.func @private_func_aliasing(%{{.*}}: tensor<?xf32> {bufferization.access = "read"})
1342func.func @private_func_aliasing(%t: tensor<?xf32>) -> f32 {
1343  %c0 = arith.constant 0 : index
1344  // Bufferizes out-of-place because either one of the two reuslts may alias
1345  // with the argument and one of the results is read afterwards.
1346  // CHECK: call @ext_func(%{{.*}}) {__inplace_operands_attr__ = ["false"]} : (tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>)
1347  %0, %1 = func.call @ext_func(%t) : (tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>)
1348  %2 = tensor.extract %1[%c0] : tensor<6xf32>
1349  return %2 : f32
1350}
1351
1352// -----
1353
1354// CHECK-LABEL: func @recursive_function
1355func.func @recursive_function(%a: tensor<?xf32>, %b: tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
1356  // The analysis does not support recursive function calls and is conservative
1357  // around them.
1358  // CHECK: call @recursive_function
1359  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "false"]}
1360  %0:2 = call @recursive_function(%a, %b) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>)
1361  return %0#0, %0#1 : tensor<?xf32>, tensor<?xf32>
1362}
1363
1364// -----
1365
1366// CHECK-ALIAS-SETS-LABEL: func @multiple_returns(
1367func.func @multiple_returns(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> {
1368  cf.cond_br %c, ^bb1, ^bb2
1369^bb1:
1370  return %t0 : tensor<5xf32>
1371^bb2:
1372  return %t1 : tensor<5xf32>
1373}
1374
1375//       CHECK-ALIAS-SETS: func @caller(
1376//  CHECK-ALIAS-SETS-SAME:     %{{.*}}: i1, %[[t0:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t1:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t2:.*]]: tensor<5xf32> {bufferization.access = "none"})
1377func.func @caller(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) {
1378  // Check that alias sets are computed correctly.
1379  //      CHECK-ALIAS-SETS: %[[result:.*]] = call @multiple_returns
1380  // CHECK-ALIAS-SETS-SAME: {__inplace_operands_attr__ = ["none", "true", "true", "true"],
1381  // CHECK-ALIAS-SETS-SAME:  __opresult_alias_set_attr__ = [{{\[}}"%[[result]]", "%[[t0]]", "%[[t1]]"]]}
1382  call @multiple_returns(%c, %t0, %t1, %t2) : (i1, tensor<5xf32>, tensor<5xf32>, tensor<5xf32>) -> (tensor<5xf32>)
1383  return
1384}
1385
1386// -----
1387
1388// CHECK-ALIAS-SETS-LABEL: func @multiple_equivalent_returns(
1389func.func @multiple_equivalent_returns(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> {
1390  cf.cond_br %c, ^bb1, ^bb2
1391^bb1:
1392  return %t0 : tensor<5xf32>
1393^bb2:
1394  return %t0 : tensor<5xf32>
1395}
1396
1397//       CHECK-ALIAS-SETS: func @caller(
1398//  CHECK-ALIAS-SETS-SAME:     %{{.*}}: i1, %[[t0:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t1:.*]]: tensor<5xf32> {bufferization.access = "none"}, %[[t2:.*]]: tensor<5xf32> {bufferization.access = "none"})
1399func.func @caller(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> {
1400  // Check that equivalence sets are computed correctly.
1401  //      CHECK-ALIAS-SETS: %[[result:.*]] = call @multiple_equivalent_returns
1402  // CHECK-ALIAS-SETS-SAME: {__inplace_operands_attr__ = ["none", "true", "true", "true"],
1403  // CHECK-ALIAS-SETS-SAME:  __opresult_alias_set_attr__ = [{{\[}}"%[[result]]", "%[[t0]]"]]}
1404  %r = call @multiple_equivalent_returns(%c, %t0, %t1, %t2) : (i1, tensor<5xf32>, tensor<5xf32>, tensor<5xf32>) -> (tensor<5xf32>)
1405  // CHECK-ALIAS-SETS-SAME: {__equivalent_func_args__ = [1], __inplace_operands_attr__ = ["true"]} %[[result]] : tensor<5xf32>
1406  return %r : tensor<5xf32>
1407}
1408
1409