1// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only bufferize-function-boundaries" -split-input-file | FileCheck %s
2
3/// All combinations of matmul(fill(extract(alloc_tensor)), fill(extract(%alloc_tensor)), %arg2)
4/// These should all be inplaceable except the first op.
5
6// -----
7
8// CHECK-LABEL: func @fill_extract_matmul_
9func.func @fill_extract_matmul_1234(
10    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
11    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
12    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
13  -> tensor<256x256xf32>
14{
15  %c0 = arith.constant 0 : index
16  %cst = arith.constant 0.000000e+00 : f32
17  %cst_0 = arith.constant 1.000000e+00 : f32
18  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
19
20  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
21  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
22  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
23  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
24  // CHECK: {__inplace_operands_attr__ = ["true"]}
25  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
26  // CHECK: {__inplace_operands_attr__ = ["true"]}
27  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
28  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
29  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
30  return %5 : tensor<256x256xf32>
31}
32
33// -----
34
35// CHECK-LABEL: func @fill_extract_matmul_
36func.func @fill_extract_matmul_1243(
37    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
38    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
39    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
40  -> tensor<256x256xf32>
41{
42  %c0 = arith.constant 0 : index
43  %cst = arith.constant 0.000000e+00 : f32
44  %cst_0 = arith.constant 1.000000e+00 : f32
45  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
46
47  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
48  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
49  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
50  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
51  // CHECK: {__inplace_operands_attr__ = ["true"]}
52  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
53  // CHECK: {__inplace_operands_attr__ = ["true"]}
54  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
55  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
56  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
57  return %5 : tensor<256x256xf32>
58}
59
60// -----
61
62// CHECK-LABEL: func @fill_extract_matmul_
63func.func @fill_extract_matmul_1324(
64    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
65    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
66    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
67  -> tensor<256x256xf32>
68{
69  %c0 = arith.constant 0 : index
70  %cst = arith.constant 0.000000e+00 : f32
71  %cst_0 = arith.constant 1.000000e+00 : f32
72  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
73
74  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
75  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
76  // CHECK: {__inplace_operands_attr__ = ["true"]}
77  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
78  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
79  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
80  // CHECK: {__inplace_operands_attr__ = ["true"]}
81  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
82  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
83  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
84  return %5 : tensor<256x256xf32>
85}
86
87// -----
88
89// CHECK-LABEL: func @fill_extract_matmul_
90func.func @fill_extract_matmul_1342(
91    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
92    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
93    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
94  -> tensor<256x256xf32>
95{
96  %c0 = arith.constant 0 : index
97  %cst = arith.constant 0.000000e+00 : f32
98  %cst_0 = arith.constant 1.000000e+00 : f32
99  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
100
101  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
102  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
103  // CHECK: {__inplace_operands_attr__ = ["true"]}
104  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
105  // CHECK: {__inplace_operands_attr__ = ["true"]}
106  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
107  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
108  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
109  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
110  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
111  return %5 : tensor<256x256xf32>
112}
113
114// -----
115
116// CHECK-LABEL: func @fill_extract_matmul_
117func.func @fill_extract_matmul_1423(
118    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
119    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
120    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
121  -> tensor<256x256xf32>
122{
123  %c0 = arith.constant 0 : index
124  %cst = arith.constant 0.000000e+00 : f32
125  %cst_0 = arith.constant 1.000000e+00 : f32
126  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
127
128  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
129  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
130  // CHECK: {__inplace_operands_attr__ = ["true"]}
131  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
132  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
133  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
134  // CHECK: {__inplace_operands_attr__ = ["true"]}
135  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
136  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
137  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
138  return %5 : tensor<256x256xf32>
139}
140
141// -----
142
143// CHECK-LABEL: func @fill_extract_matmul_
144func.func @fill_extract_matmul_1432(
145    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
146    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
147    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
148  -> tensor<256x256xf32>
149{
150  %c0 = arith.constant 0 : index
151  %cst = arith.constant 0.000000e+00 : f32
152  %cst_0 = arith.constant 1.000000e+00 : f32
153  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
154
155  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
156  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
157  // CHECK: {__inplace_operands_attr__ = ["true"]}
158  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
159  // CHECK: {__inplace_operands_attr__ = ["true"]}
160  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
161  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
162  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
163  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
164  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
165  return %5 : tensor<256x256xf32>
166}
167
168// -----
169
170// CHECK-LABEL: func @fill_extract_matmul_
171func.func @fill_extract_matmul_2134(
172    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
173    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
174    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
175  -> tensor<256x256xf32>
176{
177  %c0 = arith.constant 0 : index
178  %cst = arith.constant 0.000000e+00 : f32
179  %cst_0 = arith.constant 1.000000e+00 : f32
180  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
181
182  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
183  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
184  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
185  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
186  // CHECK: {__inplace_operands_attr__ = ["true"]}
187  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
188  // CHECK: {__inplace_operands_attr__ = ["true"]}
189  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
190  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
191  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
192  return %5 : tensor<256x256xf32>
193}
194
195// -----
196
197// CHECK-LABEL: func @fill_extract_matmul_
198func.func @fill_extract_matmul_2143(
199    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
200    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
201    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
202  -> tensor<256x256xf32>
203{
204  %c0 = arith.constant 0 : index
205  %cst = arith.constant 0.000000e+00 : f32
206  %cst_0 = arith.constant 1.000000e+00 : f32
207  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
208
209  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
210  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
211  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
212  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
213  // CHECK: {__inplace_operands_attr__ = ["true"]}
214  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
215  // CHECK: {__inplace_operands_attr__ = ["true"]}
216  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
217  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
218  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
219  return %5 : tensor<256x256xf32>
220}
221
222// -----
223
224// CHECK-LABEL: func @fill_extract_matmul_
225func.func @fill_extract_matmul_2314(
226    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
227    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
228    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
229  -> tensor<256x256xf32>
230{
231  %c0 = arith.constant 0 : index
232  %cst = arith.constant 0.000000e+00 : f32
233  %cst_0 = arith.constant 1.000000e+00 : f32
234  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
235
236  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
237  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
238  // CHECK: {__inplace_operands_attr__ = ["true"]}
239  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
240  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
241  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
242  // CHECK: {__inplace_operands_attr__ = ["true"]}
243  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
244  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
245  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
246  return %5 : tensor<256x256xf32>
247}
248
249// -----
250
251// CHECK-LABEL: func @fill_extract_matmul_
252func.func @fill_extract_matmul_2341(
253    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
254    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
255    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
256  -> tensor<256x256xf32>
257{
258  %c0 = arith.constant 0 : index
259  %cst = arith.constant 0.000000e+00 : f32
260  %cst_0 = arith.constant 1.000000e+00 : f32
261  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
262
263  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
264  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
265  // CHECK: {__inplace_operands_attr__ = ["true"]}
266  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
267  // CHECK: {__inplace_operands_attr__ = ["true"]}
268  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
269  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
270  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
271  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
272  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
273  return %5 : tensor<256x256xf32>
274}
275
276// -----
277
278// CHECK-LABEL: func @fill_extract_matmul_
279func.func @fill_extract_matmul_2413(
280    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
281    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
282    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
283  -> tensor<256x256xf32>
284{
285  %c0 = arith.constant 0 : index
286  %cst = arith.constant 0.000000e+00 : f32
287  %cst_0 = arith.constant 1.000000e+00 : f32
288  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
289
290  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
291  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
292  // CHECK: {__inplace_operands_attr__ = ["true"]}
293  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
294  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
295  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
296  // CHECK: {__inplace_operands_attr__ = ["true"]}
297  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
298  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
299  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
300  return %5 : tensor<256x256xf32>
301}
302
303// -----
304
305// CHECK-LABEL: func @fill_extract_matmul_
306func.func @fill_extract_matmul_2431(
307    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
308    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
309    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
310  -> tensor<256x256xf32>
311{
312  %c0 = arith.constant 0 : index
313  %cst = arith.constant 0.000000e+00 : f32
314  %cst_0 = arith.constant 1.000000e+00 : f32
315  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
316
317  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
318  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
319  // CHECK: {__inplace_operands_attr__ = ["true"]}
320  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
321  // CHECK: {__inplace_operands_attr__ = ["true"]}
322  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
323  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
324  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
325  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
326  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
327  return %5 : tensor<256x256xf32>
328}
329
330// -----
331
332// CHECK-LABEL: func @fill_extract_matmul_
333func.func @fill_extract_matmul_3124(
334    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
335    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
336    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
337  -> tensor<256x256xf32>
338{
339  %c0 = arith.constant 0 : index
340  %cst = arith.constant 0.000000e+00 : f32
341  %cst_0 = arith.constant 1.000000e+00 : f32
342  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
343
344  // CHECK: {__inplace_operands_attr__ = ["false"]}
345  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
346  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
347  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
348  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
349  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
350  // CHECK: {__inplace_operands_attr__ = ["true"]}
351  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
352  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
353  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
354  return %5 : tensor<256x256xf32>
355}
356
357// -----
358
359// CHECK-LABEL: func @fill_extract_matmul_
360func.func @fill_extract_matmul_3142(
361    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
362    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
363    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
364  -> tensor<256x256xf32>
365{
366  %c0 = arith.constant 0 : index
367  %cst = arith.constant 0.000000e+00 : f32
368  %cst_0 = arith.constant 1.000000e+00 : f32
369  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
370
371  // CHECK: {__inplace_operands_attr__ = ["false"]}
372  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
373  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
374  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
375  // CHECK: {__inplace_operands_attr__ = ["true"]}
376  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
377  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
378  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
379  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
380  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
381  return %5 : tensor<256x256xf32>
382}
383
384// -----
385
386// CHECK-LABEL: func @fill_extract_matmul_
387func.func @fill_extract_matmul_3214(
388    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
389    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
390    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})  -> tensor<256x256xf32>
391{
392  %c0 = arith.constant 0 : index
393  %cst = arith.constant 0.000000e+00 : f32
394  %cst_0 = arith.constant 1.000000e+00 : f32
395  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
396
397  // CHECK: {__inplace_operands_attr__ = ["false"]}
398  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
399  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
400  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
401  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
402  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
403  // CHECK: {__inplace_operands_attr__ = ["true"]}
404  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
405  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
406  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
407  return %5 : tensor<256x256xf32>
408}
409
410// -----
411
412// CHECK-LABEL: func @fill_extract_matmul_
413func.func @fill_extract_matmul_3241(
414    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
415    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
416    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
417  -> tensor<256x256xf32>
418{
419  %c0 = arith.constant 0 : index
420  %cst = arith.constant 0.000000e+00 : f32
421  %cst_0 = arith.constant 1.000000e+00 : f32
422  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
423
424  // CHECK: {__inplace_operands_attr__ = ["false"]}
425  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
426  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
427  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
428  // CHECK: {__inplace_operands_attr__ = ["true"]}
429  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
430  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
431  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
432  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
433  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
434  return %5 : tensor<256x256xf32>
435}
436
437// -----
438
439// CHECK-LABEL: func @fill_extract_matmul_
440func.func @fill_extract_matmul_3412(
441    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
442    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
443    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
444  -> tensor<256x256xf32>
445{
446  %c0 = arith.constant 0 : index
447  %cst = arith.constant 0.000000e+00 : f32
448  %cst_0 = arith.constant 1.000000e+00 : f32
449  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
450
451  // CHECK: {__inplace_operands_attr__ = ["false"]}
452  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
453  // CHECK: {__inplace_operands_attr__ = ["true"]}
454  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
455  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
456  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
457  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
458  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
459  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
460  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
461  return %5 : tensor<256x256xf32>
462}
463
464// -----
465
466// CHECK-LABEL: func @fill_extract_matmul_
467func.func @fill_extract_matmul_3421(
468    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
469    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
470    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
471  -> tensor<256x256xf32>
472{
473  %c0 = arith.constant 0 : index
474  %cst = arith.constant 0.000000e+00 : f32
475  %cst_0 = arith.constant 1.000000e+00 : f32
476  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
477
478  // CHECK: {__inplace_operands_attr__ = ["false"]}
479  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
480  // CHECK: {__inplace_operands_attr__ = ["true"]}
481  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
482  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
483  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
484  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
485  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
486  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
487  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
488  return %5 : tensor<256x256xf32>
489}
490
491// -----
492
493// CHECK-LABEL: func @fill_extract_matmul_
494func.func @fill_extract_matmul_4123(
495    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
496    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
497    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
498  -> tensor<256x256xf32>
499{
500  %c0 = arith.constant 0 : index
501  %cst = arith.constant 0.000000e+00 : f32
502  %cst_0 = arith.constant 1.000000e+00 : f32
503  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
504
505  // CHECK: {__inplace_operands_attr__ = ["false"]}
506  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
507  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
508  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
509  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
510  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
511  // CHECK: {__inplace_operands_attr__ = ["true"]}
512  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
513  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
514  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
515  return %5 : tensor<256x256xf32>
516}
517
518// -----
519
520// CHECK-LABEL: func @fill_extract_matmul_
521func.func @fill_extract_matmul_4132(
522    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
523    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
524    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
525  -> tensor<256x256xf32>
526{
527  %c0 = arith.constant 0 : index
528  %cst = arith.constant 0.000000e+00 : f32
529  %cst_0 = arith.constant 1.000000e+00 : f32
530  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
531
532  // CHECK: {__inplace_operands_attr__ = ["false"]}
533  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
534  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
535  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
536  // CHECK: {__inplace_operands_attr__ = ["true"]}
537  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
538  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
539  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
540  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
541  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
542  return %5 : tensor<256x256xf32>
543}
544
545// -----
546
547// CHECK-LABEL: func @fill_extract_matmul_
548func.func @fill_extract_matmul_4213(
549    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
550    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
551    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
552  -> tensor<256x256xf32>
553{
554  %c0 = arith.constant 0 : index
555  %cst = arith.constant 0.000000e+00 : f32
556  %cst_0 = arith.constant 1.000000e+00 : f32
557  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
558
559  // CHECK: {__inplace_operands_attr__ = ["false"]}
560  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
561  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
562  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
563  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
564  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
565  // CHECK: {__inplace_operands_attr__ = ["true"]}
566  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
567  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
568  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
569  return %5 : tensor<256x256xf32>
570}
571
572// -----
573
574// CHECK-LABEL: func @fill_extract_matmul_
575func.func @fill_extract_matmul_4231(
576    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
577    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
578    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
579  -> tensor<256x256xf32>
580{
581  %c0 = arith.constant 0 : index
582  %cst = arith.constant 0.000000e+00 : f32
583  %cst_0 = arith.constant 1.000000e+00 : f32
584  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
585
586  // CHECK: {__inplace_operands_attr__ = ["false"]}
587  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
588  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
589  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
590  // CHECK: {__inplace_operands_attr__ = ["true"]}
591  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
592  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
593  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
594  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
595  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
596  return %5 : tensor<256x256xf32>
597}
598
599// -----
600
601// CHECK-LABEL: func @fill_extract_matmul_
602func.func @fill_extract_matmul_4312(
603    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
604    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
605    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
606  -> tensor<256x256xf32>
607{
608  %c0 = arith.constant 0 : index
609  %cst = arith.constant 0.000000e+00 : f32
610  %cst_0 = arith.constant 1.000000e+00 : f32
611  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
612
613  // CHECK: {__inplace_operands_attr__ = ["false"]}
614  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
615  // CHECK: {__inplace_operands_attr__ = ["true"]}
616  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
617  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
618  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
619  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
620  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
621  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
622  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
623  return %5 : tensor<256x256xf32>
624}
625
626// -----
627
628// CHECK-LABEL: func @fill_extract_matmul_
629func.func @fill_extract_matmul_4321(
630    %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
631    %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
632    %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
633  -> tensor<256x256xf32>
634{
635  %c0 = arith.constant 0 : index
636  %cst = arith.constant 0.000000e+00 : f32
637  %cst_0 = arith.constant 1.000000e+00 : f32
638  %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
639
640  // CHECK: {__inplace_operands_attr__ = ["false"]}
641  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
642  // CHECK: {__inplace_operands_attr__ = ["true"]}
643  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
644  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
645  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
646  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
647  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
648  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
649  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
650  return %5 : tensor<256x256xf32>
651}
652