xref: /llvm-project/mlir/test/Dialect/Linalg/transform-op-pad.mlir (revision a758bcdbd92efb64a3482eb95d2769d74e33f5bb)
1// RUN: mlir-opt --transform-interpreter -split-input-file -verify-diagnostics %s | FileCheck %s
2
3#map = affine_map<()[s0] -> (-s0 + 12, 7)>
4
5// CHECK-LABEL: @static_sizes_output_divisible
6func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
7                                         %arg1: tensor<12x25xf32>,
8                                         %arg2: tensor<24x25xf32>,
9                                         %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
10  %0 = affine.min #map()[%iv2]
11
12  //      CHECK: %[[T0:.*]] = tensor.extract_slice %
13  //      CHECK: %[[T1:.*]] = tensor.extract_slice %
14  //      CHECK: %[[T2:.*]] = tensor.extract_slice %
15  %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
16  %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
17  %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
18
19  //  CHECK-DAG: %[[CST:.*]] = arith.constant 0.
20
21  //      CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold
22  //      CHECK: tensor.yield %[[CST]]
23  //      CHECK: %[[T4:.*]] = tensor.pad %[[T1]] nofold
24
25  //      CHECK: %[[T5:.*]] = linalg.matmul
26  // CHECK-SAME:              ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>)
27  // CHECK-SAME:              outs(%[[T2]] : tensor<4x5xf32>)
28
29  //      CHECK: %[[T6:.*]] = tensor.extract_slice %[[T5]]
30  //      CHECK: %[[T7:.*]] = bufferization.materialize_in_destination %[[T6]] in %[[T2]]
31  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
32  %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
33  func.return %5 : tensor<24x25xf32>
34}
35
36module attributes {transform.with_named_sequence} {
37  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
38    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
39    %padded, %pad, %copy_back = transform.structured.pad %0 {
40      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
41      padding_dimensions=[0, 1, 2],
42      nofold_flags=[1, 1, 0]
43    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.op<"bufferization.materialize_in_destination">)
44    %p = transform.num_associations %copy_back : (!transform.op<"bufferization.materialize_in_destination">) -> !transform.param<i64>
45    // expected-remark @below {{1}}
46    transform.debug.emit_param_as_remark %p : !transform.param<i64>
47    transform.yield
48  }
49}
50
51// -----
52
53#map = affine_map<()[s0] -> (-s0 + 12, 7)>
54
55// CHECK-LABEL: @pad_to_multiple
56func.func @pad_to_multiple(%arg0: tensor<24x12xf32>,
57                           %arg1: tensor<12x25xf32>,
58                           %arg2: tensor<24x25xf32>,
59                           %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
60  %0 = affine.min #map()[%iv2]
61  %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
62  %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
63  %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
64
65  //      CHECK: linalg.matmul
66  // CHECK-SAME:     ins(%{{.*}}, %{{.*}} : tensor<4x7xf32>, tensor<7x6xf32>)
67  // CHECK-SAME:     outs(%{{.*}} : tensor<4x6xf32>)
68  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
69  %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
70  func.return %5 : tensor<24x25xf32>
71}
72
73module attributes {transform.with_named_sequence} {
74  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
75    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
76    %padded, %pad, %copy_back = transform.structured.pad %0 pad_to_multiple_of [2, 2, 1] {
77      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
78      padding_dimensions=[0, 1, 2],
79      nofold_flags=[1, 1, 0]
80    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
81    transform.yield
82  }
83}
84
85// -----
86
87#map = affine_map<()[s0] -> (-s0 + 12, 7)>
88
89// CHECK-LABEL: @parametrized_pad_to_multiple
90func.func @parametrized_pad_to_multiple(%arg0: tensor<24x12xf32>,
91                                        %arg1: tensor<12x25xf32>,
92                                        %arg2: tensor<24x25xf32>,
93                                        %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
94  %0 = affine.min #map()[%iv2]
95  %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
96  %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
97  %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
98
99  //      CHECK: linalg.matmul
100  // CHECK-SAME:     ins(%{{.*}}, %{{.*}} : tensor<4x7xf32>, tensor<7x6xf32>)
101  // CHECK-SAME:     outs(%{{.*}} : tensor<4x6xf32>)
102  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
103  %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
104  func.return %5 : tensor<24x25xf32>
105}
106
107
108module attributes {transform.with_named_sequence} {
109  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
110    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
111    %c2 = transform.param.constant 2 : i64 -> !transform.param<i64>
112    %padded, %pad, %copy_back = transform.structured.pad %0 pad_to_multiple_of [%c2, 2, 1] {
113      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
114      padding_dimensions=[0, 1, 2],
115      nofold_flags=[1, 1, 0]
116    } : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op, !transform.any_op)
117    transform.yield
118  }
119}
120
121// -----
122
123#map = affine_map<()[s0] -> (-s0 + 12, 7)>
124
125// CHECK-LABEL: @static_sizes_output_divisible_on_empty_op
126func.func @static_sizes_output_divisible_on_empty_op(%arg0: tensor<24x12xf32>,
127    %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>, %iv0: index,
128    %iv1: index, %iv2: index) -> tensor<24x25xf32> {
129  %0 = affine.min #map()[%iv2]
130
131  //      CHECK: %[[T0:.*]] = tensor.empty
132  //      CHECK: %[[T1:.*]] = tensor.empty
133  //      CHECK: %[[T2:.*]] = tensor.empty
134  %1 = tensor.empty(%0) : tensor<4x?xf32>
135  %2 = tensor.empty(%0) : tensor<?x5xf32>
136  %3 = tensor.empty() : tensor<4x5xf32>
137
138  //  CHECK-DAG: %[[CST:.*]] = arith.constant 0.
139
140  //      CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold
141  //      CHECK: tensor.yield %[[CST]]
142  //      CHECK: %[[T4:.*]] = tensor.pad %[[T1]] nofold
143
144  //      CHECK: %[[T5:.*]] = linalg.matmul
145  // CHECK-SAME:              ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>)
146  // CHECK-SAME:              outs(%[[T2]] : tensor<4x5xf32>)
147  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
148  %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
149  func.return %5 : tensor<24x25xf32>
150}
151
152module attributes {transform.with_named_sequence} {
153  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
154    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
155    %padded, %pad, %copy_back = transform.structured.pad %0 {
156      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
157      padding_dimensions=[0, 1, 2],
158      nofold_flags=[1, 1, 0]
159    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
160    transform.yield
161  }
162}
163
164// -----
165
166func.func @pad(%arg0: tensor<24x12xf32>,
167               %arg1: tensor<12x25xf32>,
168               %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
169  // expected-note @below {{when applied to this op}}
170  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
171  func.return %0 : tensor<24x25xf32>
172}
173
174module attributes {transform.with_named_sequence} {
175  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
176    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
177    // expected-error @below {{op expects a padding value of type 'f32', got 0 : i32}}
178    %padded, %pad, %copy_back = transform.structured.pad %0 {
179      padding_values=[0: i32, 0.0 : f32, 0.0 : f32],
180      padding_dimensions=[0, 1, 2],
181      nofold_flags=[1, 1, 0]
182    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
183    transform.yield
184  }
185}
186
187// -----
188
189func.func @pad(%arg0: tensor<24x12xf32>,
190               %arg1: tensor<12x25xf32>,
191               %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
192  // expected-note @below {{when applied to this op}}
193  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
194  func.return %0 : tensor<24x25xf32>
195}
196
197module attributes {transform.with_named_sequence} {
198  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
199    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
200    // expected-error @below {{expects a padding that parses to 'f32', got "{foo}"}}
201    %padded, %pad, %copy_back = transform.structured.pad %0 {
202      padding_values=["{foo}", 0.0 : f32, 0.0 : f32],
203      padding_dimensions=[0, 1, 2],
204      nofold_flags=[1, 1, 0]
205    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
206    transform.yield
207  }
208}
209
210// -----
211
212// With all padded being static, there's nothing to pad. However, with the
213// `nofold` attribute set (see `nofold_flags`), the corresponding pad Ops are
214// preserved.
215
216// CHECK-LABEL: @zero_pad_static(
217func.func @zero_pad_static(%arg0: tensor<24x12xf32>,
218                           %arg1: tensor<12x25xf32>,
219                           %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
220
221// CHECK-SAME:      %[[ARG_0:.*]]: tensor<24x12xf32>,
222// CHECK-SAME:      %[[ARG_1:.*]]: tensor<12x25xf32>,
223// CHECK-SAME:      %[[ARG_2:.*]]: tensor<24x25xf32>) -> tensor<24x25xf32> {
224
225// CHECK:           %[[PAD_ARG_0:.*]] = tensor.pad %[[ARG_0]] nofold low[0, 0] high[0, 0]
226// CHECK:           %[[PAD_ARG_1:.*]] = tensor.pad %[[ARG_1]] nofold low[0, 0] high[0, 0]
227// CHECK-NOT:       tensor.pad
228
229// CHECK:           %[[MATMUL:.*]] = linalg.matmul
230// CHECK-SAME:      ins(%[[PAD_ARG_0:.*]], %[[PAD_ARG_1:.*]] : tensor<24x12xf32>, tensor<12x25xf32>)
231// CHECK-SAME:      outs(%[[ARG_2]]
232  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
233  func.return %0 : tensor<24x25xf32>
234}
235
236module attributes {transform.with_named_sequence} {
237  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
238    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
239    %padded, %pad, %copy_back = transform.structured.pad %0 {
240      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
241      padding_dimensions=[0, 1, 2],
242      nofold_flags=[1, 1, 0]
243    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
244    transform.yield
245  }
246}
247
248// -----
249
250// With all padded dims being static, there's nothing to pad. However, with the
251// `nofold` attribute set (see `nofold_flags`), the corresponding pad Ops are
252// preserved. Same as above, but some dims are now dynamic.
253
254// CHECK-LABEL: @zero_pad_dynamic(
255func.func @zero_pad_dynamic(%arg0: tensor<?x12xf32>,
256                            %arg1: tensor<12x?xf32>,
257                            %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
258
259// CHECK-SAME:      %[[ARG_0:.*]]: tensor<?x12xf32>,
260// CHECK-SAME:      %[[ARG_1:.*]]: tensor<12x?xf32>,
261// CHECK-SAME:      %[[ARG_2:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
262
263// CHECK:           %[[PAD_ARG_0:.*]] = tensor.pad %[[ARG_0]] nofold low[0, 0] high[0, 0]
264// CHECK:           %[[PAD_ARG_1:.*]] = tensor.pad %[[ARG_1]] nofold low[0, 0] high[0, 0]
265// CHECK:           %[[PAD_ARG_2:.*]] = tensor.pad %[[ARG_2]] nofold low[0, 0] high[0, 0]
266
267// CHECK:           %[[MATMUL:.*]] = linalg.matmul
268// CHECK-SAME:      ins(%[[PAD_ARG_0:.*]], %[[PAD_ARG_1:.*]] : tensor<?x12xf32>, tensor<12x?xf32>)
269// CHECK-SAME:      outs(%[[PAD_ARG_2]]
270  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x12xf32>, tensor<12x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
271  func.return %0 : tensor<?x?xf32>
272}
273
274module attributes {transform.with_named_sequence} {
275  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
276    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
277    %padded, %pad, %copy_back = transform.structured.pad %0 {
278      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
279      // Note - only the static dim is padded
280      padding_dimensions=[2],
281      nofold_flags=[1, 1, 1]
282    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
283    transform.yield
284  }
285}
286
287// -----
288
289// Impossible to get a bound for padding - fails
290
291func.func @negative_no_ub_estimate(%arg0: tensor<?x12xf32>,
292                                   %arg1: tensor<12x?xf32>,
293                                   %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
294
295  // expected-note @below {{target op}}
296  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x12xf32>, tensor<12x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
297  func.return %0 : tensor<?x?xf32>
298}
299
300module attributes {transform.with_named_sequence} {
301  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
302    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
303    // expected-error @below {{ailed to pad op}}
304    %padded, %pad, %copy_back = transform.structured.pad %0 {
305      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
306      // Note - attempting to pad non-static dim
307      padding_dimensions=[1],
308      nofold_flags=[1, 1, 1]
309    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
310    transform.yield
311  }
312}
313
314// -----
315
316// Check that the padding can be applied even when the output argument of the
317// linalg op is not produced by an empty op or an extract_slice op.
318
319// CHECK-DAG: #[[$MAP_MIN:.*]] = affine_map<(d0) -> (-d0 + 2044, 16)>
320// CHECK-DAG: #[[$MAP_TO_16:.*]] = affine_map<(d0) -> (-d0 + 16)>
321// CHECK-LABEL: @outs_not_produced_by_empty_or_extract_slice(
322// CHECK-SAME: %[[A:[^: ]*]]: tensor<128x2044xf32>,
323// CHECK-SAME: %[[B:[^: ]*]]: tensor<2044x128xf32>)
324func.func @outs_not_produced_by_empty_or_extract_slice(%a : tensor<128x2044xf32>, %b : tensor<2044x128xf32>) -> tensor<128x128xf32> {
325  %cst = arith.constant 0.000000e+00 : f32
326  %0 = tensor.empty() : tensor<128x128xf32>
327  %9 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128x128xf32>) -> tensor<128x128xf32>
328
329  %c0 = arith.constant 0 : index
330  %c16 = arith.constant 16 : index
331  %c2044 = arith.constant 2044 : index
332  // CHECK: scf.for %[[ARG3:.*]] = {{.*}} iter_args(%[[ARG4:.*]] = %{{.*}})
333  %10 = scf.for %arg3 = %c0 to %c2044 step %c16 iter_args(%arg4 = %9) -> (tensor<128x128xf32>) {
334    // CHECK: %[[MIN:.*]] = affine.min #[[$MAP_MIN]](%[[ARG3]])
335    %11 = affine.min affine_map<(d0) -> (-d0 + 2044, 16)>(%arg3)
336    // CHECK: %[[A_SLICE:.*]] = tensor.extract_slice %[[A]]
337    // CHECK: %[[B_SLICE:.*]] = tensor.extract_slice %[[B]]
338    %extracted_slice_2 = tensor.extract_slice %a[0, %arg3] [128, %11] [1, 1] : tensor<128x2044xf32> to tensor<128x?xf32>
339    %extracted_slice_3 = tensor.extract_slice %b[%arg3, 0] [%11, 128] [1, 1] : tensor<2044x128xf32> to tensor<?x128xf32>
340    // CHECK-DAG: %[[CST:.*]] = arith.constant 0.
341
342    // CHECK-DAG: %[[TO_16:.*]] = affine.apply #[[$MAP_TO_16]](%[[MIN]])
343    // CHECK: %[[PADDED_A_SLICE:.*]] = tensor.pad %[[A_SLICE]] nofold low[0, 0] high[0, %[[TO_16]]]
344    // CHECK: tensor.yield %[[CST]]
345    // CHECK: %[[PADDED_B_SLICE:.*]] = tensor.pad %[[B_SLICE]] nofold
346    // The output shape is already padded, so actually we shouldn't
347    // add anything to the upper bound.
348    // CHECK: %[[PADDED_ARG4:.*]] = tensor.pad %[[ARG4]] nofold low[{{.*}}] high[0, 0]
349
350    //      CHECK: %[[T5:.*]] = linalg.matmul
351    // CHECK-SAME:              ins(%[[PADDED_A_SLICE]], %[[PADDED_B_SLICE]] : tensor<128x16xf32>, tensor<16x128xf32>)
352    // CHECK-SAME:              outs(%[[PADDED_ARG4]] : tensor<128x128xf32>)
353    %res = linalg.matmul ins(%extracted_slice_2, %extracted_slice_3 : tensor<128x?xf32>, tensor<?x128xf32>) outs(%arg4 : tensor<128x128xf32>) -> tensor<128x128xf32>
354    scf.yield %res : tensor<128x128xf32>
355  }
356  return %10 : tensor<128x128xf32>
357}
358
359module attributes {transform.with_named_sequence} {
360  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
361    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
362    %padded, %pad, %copy_back = transform.structured.pad %0 {
363      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
364      padding_dimensions=[0, 1, 2],
365      nofold_flags=[1, 1, 1]
366    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
367    transform.yield
368  }
369}
370
371// -----
372
373#map = affine_map<()[s0] -> (-s0 + 12, 7)>
374
375// CHECK-LABEL: @pack_everything
376func.func @pack_everything(%arg0: tensor<24x12xf32>,
377                           %arg1: tensor<12x25xf32>,
378                           %arg2: tensor<24x25xf32>,
379                           %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
380  %0 = affine.min #map()[%iv2]
381
382  //      CHECK: %[[T0:.*]] = tensor.extract_slice %
383  //      CHECK: %[[T1:.*]] = tensor.extract_slice %
384  //      CHECK: %[[T2:.*]] = tensor.extract_slice %
385  %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
386  %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
387  %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
388
389  //  CHECK-DAG: %[[CST:.*]] = arith.constant 0.
390
391  //      CHECK: %[[PAD0:.*]] = tensor.pad %[[T0]] nofold
392  //      CHECK: %[[PAD1:.*]] = tensor.pad %[[T1]] nofold
393  //      CHECK: %[[PAD2:.*]] = tensor.pad %[[T2]] nofold
394
395  //      CHECK: %[[T5:.*]] = linalg.matmul
396  // CHECK-SAME:              ins(%[[PAD0]], %[[PAD1]] : tensor<4x7xf32>, tensor<7x5xf32>)
397  // CHECK-SAME:              outs(%[[PAD2]] : tensor<4x5xf32>)
398
399  // Get unpadded result (no-op in this example).
400  //      CHECK: %[[T6:.*]] = tensor.extract_slice %[[T5]]
401  // Copy back result to the original buffer, so that the destination of the
402  // computation does not change.
403  //      CHECK: %[[T7:.*]] = bufferization.materialize_in_destination %[[T6]] in %[[T2]]
404  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
405
406  //      CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]] into %{{.*}}
407  %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
408  func.return %5 : tensor<24x25xf32>
409}
410
411module attributes {transform.with_named_sequence} {
412  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
413    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
414    %padded, %pad, %copy_back = transform.structured.pad %0 {
415      padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
416      padding_dimensions=[0, 1, 2],
417      nofold_flags=[1, 1, 1]
418    } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
419    transform.yield
420  }
421}
422