1// RUN: mlir-opt --transform-interpreter -split-input-file -verify-diagnostics %s | FileCheck %s 2 3#map = affine_map<()[s0] -> (-s0 + 12, 7)> 4 5// CHECK-LABEL: @static_sizes_output_divisible 6func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, 7 %arg1: tensor<12x25xf32>, 8 %arg2: tensor<24x25xf32>, 9 %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { 10 %0 = affine.min #map()[%iv2] 11 12 // CHECK: %[[T0:.*]] = tensor.extract_slice % 13 // CHECK: %[[T1:.*]] = tensor.extract_slice % 14 // CHECK: %[[T2:.*]] = tensor.extract_slice % 15 %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> 16 %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32> 17 %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> 18 19 // CHECK-DAG: %[[CST:.*]] = arith.constant 0. 20 21 // CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold 22 // CHECK: tensor.yield %[[CST]] 23 // CHECK: %[[T4:.*]] = tensor.pad %[[T1]] nofold 24 25 // CHECK: %[[T5:.*]] = linalg.matmul 26 // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) 27 // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) 28 29 // CHECK: %[[T6:.*]] = tensor.extract_slice %[[T5]] 30 // CHECK: %[[T7:.*]] = bufferization.materialize_in_destination %[[T6]] in %[[T2]] 31 %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> 32 %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> 33 func.return %5 : tensor<24x25xf32> 34} 35 36module attributes {transform.with_named_sequence} { 37 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 38 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 39 %padded, %pad, %copy_back = transform.structured.pad %0 { 40 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 41 padding_dimensions=[0, 1, 2], 42 nofold_flags=[1, 1, 0] 43 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.op<"bufferization.materialize_in_destination">) 44 %p = transform.num_associations %copy_back : (!transform.op<"bufferization.materialize_in_destination">) -> !transform.param<i64> 45 // expected-remark @below {{1}} 46 transform.debug.emit_param_as_remark %p : !transform.param<i64> 47 transform.yield 48 } 49} 50 51// ----- 52 53#map = affine_map<()[s0] -> (-s0 + 12, 7)> 54 55// CHECK-LABEL: @pad_to_multiple 56func.func @pad_to_multiple(%arg0: tensor<24x12xf32>, 57 %arg1: tensor<12x25xf32>, 58 %arg2: tensor<24x25xf32>, 59 %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { 60 %0 = affine.min #map()[%iv2] 61 %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> 62 %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32> 63 %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> 64 65 // CHECK: linalg.matmul 66 // CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<4x7xf32>, tensor<7x6xf32>) 67 // CHECK-SAME: outs(%{{.*}} : tensor<4x6xf32>) 68 %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> 69 %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> 70 func.return %5 : tensor<24x25xf32> 71} 72 73module attributes {transform.with_named_sequence} { 74 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 75 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 76 %padded, %pad, %copy_back = transform.structured.pad %0 pad_to_multiple_of [2, 2, 1] { 77 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 78 padding_dimensions=[0, 1, 2], 79 nofold_flags=[1, 1, 0] 80 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 81 transform.yield 82 } 83} 84 85// ----- 86 87#map = affine_map<()[s0] -> (-s0 + 12, 7)> 88 89// CHECK-LABEL: @parametrized_pad_to_multiple 90func.func @parametrized_pad_to_multiple(%arg0: tensor<24x12xf32>, 91 %arg1: tensor<12x25xf32>, 92 %arg2: tensor<24x25xf32>, 93 %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { 94 %0 = affine.min #map()[%iv2] 95 %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> 96 %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32> 97 %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> 98 99 // CHECK: linalg.matmul 100 // CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<4x7xf32>, tensor<7x6xf32>) 101 // CHECK-SAME: outs(%{{.*}} : tensor<4x6xf32>) 102 %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> 103 %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> 104 func.return %5 : tensor<24x25xf32> 105} 106 107 108module attributes {transform.with_named_sequence} { 109 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 110 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 111 %c2 = transform.param.constant 2 : i64 -> !transform.param<i64> 112 %padded, %pad, %copy_back = transform.structured.pad %0 pad_to_multiple_of [%c2, 2, 1] { 113 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 114 padding_dimensions=[0, 1, 2], 115 nofold_flags=[1, 1, 0] 116 } : (!transform.any_op, !transform.param<i64>) -> (!transform.any_op, !transform.any_op, !transform.any_op) 117 transform.yield 118 } 119} 120 121// ----- 122 123#map = affine_map<()[s0] -> (-s0 + 12, 7)> 124 125// CHECK-LABEL: @static_sizes_output_divisible_on_empty_op 126func.func @static_sizes_output_divisible_on_empty_op(%arg0: tensor<24x12xf32>, 127 %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>, %iv0: index, 128 %iv1: index, %iv2: index) -> tensor<24x25xf32> { 129 %0 = affine.min #map()[%iv2] 130 131 // CHECK: %[[T0:.*]] = tensor.empty 132 // CHECK: %[[T1:.*]] = tensor.empty 133 // CHECK: %[[T2:.*]] = tensor.empty 134 %1 = tensor.empty(%0) : tensor<4x?xf32> 135 %2 = tensor.empty(%0) : tensor<?x5xf32> 136 %3 = tensor.empty() : tensor<4x5xf32> 137 138 // CHECK-DAG: %[[CST:.*]] = arith.constant 0. 139 140 // CHECK: %[[T3:.*]] = tensor.pad %[[T0]] nofold 141 // CHECK: tensor.yield %[[CST]] 142 // CHECK: %[[T4:.*]] = tensor.pad %[[T1]] nofold 143 144 // CHECK: %[[T5:.*]] = linalg.matmul 145 // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) 146 // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) 147 %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> 148 %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> 149 func.return %5 : tensor<24x25xf32> 150} 151 152module attributes {transform.with_named_sequence} { 153 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 154 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 155 %padded, %pad, %copy_back = transform.structured.pad %0 { 156 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 157 padding_dimensions=[0, 1, 2], 158 nofold_flags=[1, 1, 0] 159 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 160 transform.yield 161 } 162} 163 164// ----- 165 166func.func @pad(%arg0: tensor<24x12xf32>, 167 %arg1: tensor<12x25xf32>, 168 %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { 169 // expected-note @below {{when applied to this op}} 170 %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> 171 func.return %0 : tensor<24x25xf32> 172} 173 174module attributes {transform.with_named_sequence} { 175 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 176 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 177 // expected-error @below {{op expects a padding value of type 'f32', got 0 : i32}} 178 %padded, %pad, %copy_back = transform.structured.pad %0 { 179 padding_values=[0: i32, 0.0 : f32, 0.0 : f32], 180 padding_dimensions=[0, 1, 2], 181 nofold_flags=[1, 1, 0] 182 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 183 transform.yield 184 } 185} 186 187// ----- 188 189func.func @pad(%arg0: tensor<24x12xf32>, 190 %arg1: tensor<12x25xf32>, 191 %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { 192 // expected-note @below {{when applied to this op}} 193 %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> 194 func.return %0 : tensor<24x25xf32> 195} 196 197module attributes {transform.with_named_sequence} { 198 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 199 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 200 // expected-error @below {{expects a padding that parses to 'f32', got "{foo}"}} 201 %padded, %pad, %copy_back = transform.structured.pad %0 { 202 padding_values=["{foo}", 0.0 : f32, 0.0 : f32], 203 padding_dimensions=[0, 1, 2], 204 nofold_flags=[1, 1, 0] 205 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 206 transform.yield 207 } 208} 209 210// ----- 211 212// With all padded being static, there's nothing to pad. However, with the 213// `nofold` attribute set (see `nofold_flags`), the corresponding pad Ops are 214// preserved. 215 216// CHECK-LABEL: @zero_pad_static( 217func.func @zero_pad_static(%arg0: tensor<24x12xf32>, 218 %arg1: tensor<12x25xf32>, 219 %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { 220 221// CHECK-SAME: %[[ARG_0:.*]]: tensor<24x12xf32>, 222// CHECK-SAME: %[[ARG_1:.*]]: tensor<12x25xf32>, 223// CHECK-SAME: %[[ARG_2:.*]]: tensor<24x25xf32>) -> tensor<24x25xf32> { 224 225// CHECK: %[[PAD_ARG_0:.*]] = tensor.pad %[[ARG_0]] nofold low[0, 0] high[0, 0] 226// CHECK: %[[PAD_ARG_1:.*]] = tensor.pad %[[ARG_1]] nofold low[0, 0] high[0, 0] 227// CHECK-NOT: tensor.pad 228 229// CHECK: %[[MATMUL:.*]] = linalg.matmul 230// CHECK-SAME: ins(%[[PAD_ARG_0:.*]], %[[PAD_ARG_1:.*]] : tensor<24x12xf32>, tensor<12x25xf32>) 231// CHECK-SAME: outs(%[[ARG_2]] 232 %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> 233 func.return %0 : tensor<24x25xf32> 234} 235 236module attributes {transform.with_named_sequence} { 237 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 238 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 239 %padded, %pad, %copy_back = transform.structured.pad %0 { 240 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 241 padding_dimensions=[0, 1, 2], 242 nofold_flags=[1, 1, 0] 243 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 244 transform.yield 245 } 246} 247 248// ----- 249 250// With all padded dims being static, there's nothing to pad. However, with the 251// `nofold` attribute set (see `nofold_flags`), the corresponding pad Ops are 252// preserved. Same as above, but some dims are now dynamic. 253 254// CHECK-LABEL: @zero_pad_dynamic( 255func.func @zero_pad_dynamic(%arg0: tensor<?x12xf32>, 256 %arg1: tensor<12x?xf32>, 257 %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> { 258 259// CHECK-SAME: %[[ARG_0:.*]]: tensor<?x12xf32>, 260// CHECK-SAME: %[[ARG_1:.*]]: tensor<12x?xf32>, 261// CHECK-SAME: %[[ARG_2:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> { 262 263// CHECK: %[[PAD_ARG_0:.*]] = tensor.pad %[[ARG_0]] nofold low[0, 0] high[0, 0] 264// CHECK: %[[PAD_ARG_1:.*]] = tensor.pad %[[ARG_1]] nofold low[0, 0] high[0, 0] 265// CHECK: %[[PAD_ARG_2:.*]] = tensor.pad %[[ARG_2]] nofold low[0, 0] high[0, 0] 266 267// CHECK: %[[MATMUL:.*]] = linalg.matmul 268// CHECK-SAME: ins(%[[PAD_ARG_0:.*]], %[[PAD_ARG_1:.*]] : tensor<?x12xf32>, tensor<12x?xf32>) 269// CHECK-SAME: outs(%[[PAD_ARG_2]] 270 %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x12xf32>, tensor<12x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> 271 func.return %0 : tensor<?x?xf32> 272} 273 274module attributes {transform.with_named_sequence} { 275 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 276 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 277 %padded, %pad, %copy_back = transform.structured.pad %0 { 278 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 279 // Note - only the static dim is padded 280 padding_dimensions=[2], 281 nofold_flags=[1, 1, 1] 282 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 283 transform.yield 284 } 285} 286 287// ----- 288 289// Impossible to get a bound for padding - fails 290 291func.func @negative_no_ub_estimate(%arg0: tensor<?x12xf32>, 292 %arg1: tensor<12x?xf32>, 293 %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> { 294 295 // expected-note @below {{target op}} 296 %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x12xf32>, tensor<12x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> 297 func.return %0 : tensor<?x?xf32> 298} 299 300module attributes {transform.with_named_sequence} { 301 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 302 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 303 // expected-error @below {{ailed to pad op}} 304 %padded, %pad, %copy_back = transform.structured.pad %0 { 305 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 306 // Note - attempting to pad non-static dim 307 padding_dimensions=[1], 308 nofold_flags=[1, 1, 1] 309 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 310 transform.yield 311 } 312} 313 314// ----- 315 316// Check that the padding can be applied even when the output argument of the 317// linalg op is not produced by an empty op or an extract_slice op. 318 319// CHECK-DAG: #[[$MAP_MIN:.*]] = affine_map<(d0) -> (-d0 + 2044, 16)> 320// CHECK-DAG: #[[$MAP_TO_16:.*]] = affine_map<(d0) -> (-d0 + 16)> 321// CHECK-LABEL: @outs_not_produced_by_empty_or_extract_slice( 322// CHECK-SAME: %[[A:[^: ]*]]: tensor<128x2044xf32>, 323// CHECK-SAME: %[[B:[^: ]*]]: tensor<2044x128xf32>) 324func.func @outs_not_produced_by_empty_or_extract_slice(%a : tensor<128x2044xf32>, %b : tensor<2044x128xf32>) -> tensor<128x128xf32> { 325 %cst = arith.constant 0.000000e+00 : f32 326 %0 = tensor.empty() : tensor<128x128xf32> 327 %9 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128x128xf32>) -> tensor<128x128xf32> 328 329 %c0 = arith.constant 0 : index 330 %c16 = arith.constant 16 : index 331 %c2044 = arith.constant 2044 : index 332 // CHECK: scf.for %[[ARG3:.*]] = {{.*}} iter_args(%[[ARG4:.*]] = %{{.*}}) 333 %10 = scf.for %arg3 = %c0 to %c2044 step %c16 iter_args(%arg4 = %9) -> (tensor<128x128xf32>) { 334 // CHECK: %[[MIN:.*]] = affine.min #[[$MAP_MIN]](%[[ARG3]]) 335 %11 = affine.min affine_map<(d0) -> (-d0 + 2044, 16)>(%arg3) 336 // CHECK: %[[A_SLICE:.*]] = tensor.extract_slice %[[A]] 337 // CHECK: %[[B_SLICE:.*]] = tensor.extract_slice %[[B]] 338 %extracted_slice_2 = tensor.extract_slice %a[0, %arg3] [128, %11] [1, 1] : tensor<128x2044xf32> to tensor<128x?xf32> 339 %extracted_slice_3 = tensor.extract_slice %b[%arg3, 0] [%11, 128] [1, 1] : tensor<2044x128xf32> to tensor<?x128xf32> 340 // CHECK-DAG: %[[CST:.*]] = arith.constant 0. 341 342 // CHECK-DAG: %[[TO_16:.*]] = affine.apply #[[$MAP_TO_16]](%[[MIN]]) 343 // CHECK: %[[PADDED_A_SLICE:.*]] = tensor.pad %[[A_SLICE]] nofold low[0, 0] high[0, %[[TO_16]]] 344 // CHECK: tensor.yield %[[CST]] 345 // CHECK: %[[PADDED_B_SLICE:.*]] = tensor.pad %[[B_SLICE]] nofold 346 // The output shape is already padded, so actually we shouldn't 347 // add anything to the upper bound. 348 // CHECK: %[[PADDED_ARG4:.*]] = tensor.pad %[[ARG4]] nofold low[{{.*}}] high[0, 0] 349 350 // CHECK: %[[T5:.*]] = linalg.matmul 351 // CHECK-SAME: ins(%[[PADDED_A_SLICE]], %[[PADDED_B_SLICE]] : tensor<128x16xf32>, tensor<16x128xf32>) 352 // CHECK-SAME: outs(%[[PADDED_ARG4]] : tensor<128x128xf32>) 353 %res = linalg.matmul ins(%extracted_slice_2, %extracted_slice_3 : tensor<128x?xf32>, tensor<?x128xf32>) outs(%arg4 : tensor<128x128xf32>) -> tensor<128x128xf32> 354 scf.yield %res : tensor<128x128xf32> 355 } 356 return %10 : tensor<128x128xf32> 357} 358 359module attributes {transform.with_named_sequence} { 360 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 361 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 362 %padded, %pad, %copy_back = transform.structured.pad %0 { 363 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 364 padding_dimensions=[0, 1, 2], 365 nofold_flags=[1, 1, 1] 366 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 367 transform.yield 368 } 369} 370 371// ----- 372 373#map = affine_map<()[s0] -> (-s0 + 12, 7)> 374 375// CHECK-LABEL: @pack_everything 376func.func @pack_everything(%arg0: tensor<24x12xf32>, 377 %arg1: tensor<12x25xf32>, 378 %arg2: tensor<24x25xf32>, 379 %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> { 380 %0 = affine.min #map()[%iv2] 381 382 // CHECK: %[[T0:.*]] = tensor.extract_slice % 383 // CHECK: %[[T1:.*]] = tensor.extract_slice % 384 // CHECK: %[[T2:.*]] = tensor.extract_slice % 385 %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> 386 %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32> 387 %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> 388 389 // CHECK-DAG: %[[CST:.*]] = arith.constant 0. 390 391 // CHECK: %[[PAD0:.*]] = tensor.pad %[[T0]] nofold 392 // CHECK: %[[PAD1:.*]] = tensor.pad %[[T1]] nofold 393 // CHECK: %[[PAD2:.*]] = tensor.pad %[[T2]] nofold 394 395 // CHECK: %[[T5:.*]] = linalg.matmul 396 // CHECK-SAME: ins(%[[PAD0]], %[[PAD1]] : tensor<4x7xf32>, tensor<7x5xf32>) 397 // CHECK-SAME: outs(%[[PAD2]] : tensor<4x5xf32>) 398 399 // Get unpadded result (no-op in this example). 400 // CHECK: %[[T6:.*]] = tensor.extract_slice %[[T5]] 401 // Copy back result to the original buffer, so that the destination of the 402 // computation does not change. 403 // CHECK: %[[T7:.*]] = bufferization.materialize_in_destination %[[T6]] in %[[T2]] 404 %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> 405 406 // CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]] into %{{.*}} 407 %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> 408 func.return %5 : tensor<24x25xf32> 409} 410 411module attributes {transform.with_named_sequence} { 412 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 413 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 414 %padded, %pad, %copy_back = transform.structured.pad %0 { 415 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 416 padding_dimensions=[0, 1, 2], 417 nofold_flags=[1, 1, 1] 418 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 419 transform.yield 420 } 421} 422