1 2// RUN: mlir-opt --transform-interpreter -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s 3 4#map = affine_map<()[s0] -> (-s0 + 12, 7)> 5 6// CHECK-LABEL: func @pad_to_memory_space( 7// CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, 8// CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, 9// CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, 10func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, 11 %arg1: tensor<12x25xf32>, 12 %arg2: tensor<24x25xf32>, 13 %iv0 : index, %iv1 : index, 14 %iv2 : index) -> tensor<24x25xf32> { 15 %0 = affine.min #map()[%iv2] 16 17 // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] 18 %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> 19 // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] 20 %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32> 21 // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] 22 %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> 23 24 // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> 25 // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] 26 // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] 27 // CHECK: memref.copy %[[s0]], %[[alloc0_view]] 28 29 // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> 30 // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] 31 // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] 32 // CHECK: memref.copy %[[s1]], %[[alloc1_view]] 33 34 // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> 35 // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] 36 // No subview because there is 0 padding 37 // CHECK: memref.copy %[[s2]], %[[alloc2]] 38 39 // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) 40 // Copy back result. 41 // CHECK: memref.copy %[[alloc2]], %[[s2]] 42 %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> 43 44 // insert_slice bufferizes to a no-op. 45 %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> 46 func.return %5 : tensor<24x25xf32> 47} 48 49module attributes {transform.with_named_sequence} { 50 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.consumed}) { 51 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 52 %padded, %pad, %copy_back = transform.structured.pad %0 { 53 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 54 padding_dimensions=[0, 1, 2], 55 nofold_flags=[1, 1, 1] 56 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 57 %buffer, %new_ops = transform.structured.bufferize_to_allocation %pad {memory_space = 3, emit_dealloc} : !transform.any_op 58 %2 = transform.bufferization.one_shot_bufferize %arg1 {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op 59 60 transform.yield 61 } 62} 63 64// ----- 65 66#map = affine_map<()[s0] -> (-s0 + 12, 7)> 67 68// CHECK-LABEL: func @vectorize_and_bufferize_pad( 69// CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, 70// CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, 71// CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, 72func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, 73 %arg1: tensor<12x25xf32>, 74 %arg2: tensor<24x25xf32>, 75 %iv0 : index, %iv1 : index, 76 %iv2 : index) -> tensor<24x25xf32> { 77 %0 = affine.min #map()[%iv2] 78 79 // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] 80 %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> 81 // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] 82 %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32> 83 // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] 84 %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> 85 86 // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] 87 // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> 88 // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] 89 90 // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] 91 // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> 92 // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] 93 94 // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] 95 // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> 96 // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] 97 98 // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) 99 // Copy back result. 100 // CHECK: memref.copy %[[alloc2]], %[[s2]] 101 %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> 102 103 // insert_slice bufferizes to a no-op. 104 %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> 105 func.return %5 : tensor<24x25xf32> 106} 107 108module attributes {transform.with_named_sequence} { 109 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.consumed}) { 110 %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 111 %padded, %pad, %copy_back = transform.structured.pad %0 { 112 padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], 113 padding_dimensions=[0, 1, 2], 114 nofold_flags=[1, 1, 1] 115 } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 116 transform.structured.vectorize %pad vector_sizes [10, 12] : !transform.any_op 117 %vector_write = transform.structured.match ops{["vector.transfer_write"]} in %arg1 : (!transform.any_op) -> !transform.any_op 118 %mask_op = transform.get_parent_op %vector_write {op_name = "vector.mask"} : (!transform.any_op) -> !transform.any_op 119 %buffer, %new_ops = transform.structured.bufferize_to_allocation %mask_op {memory_space = 3, emit_dealloc} : !transform.any_op 120 %2 = transform.bufferization.one_shot_bufferize %arg1 {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op 121 transform.yield 122 } 123} 124