168ae0d78SAlex Zinenko// RUN: mlir-opt %s \ 2*b33b91a2SOleksandr "Alex" Zinenko// RUN: --pass-pipeline="builtin.module(transform-interpreter{ \ 3*b33b91a2SOleksandr "Alex" Zinenko// RUN: debug-bind-trailing-args=linalg.matmul,linalg.elemwise_binary},\ 4*b33b91a2SOleksandr "Alex" Zinenko// RUN: canonicalize,cse,symbol-dce)" \ 568ae0d78SAlex Zinenko// RUN: --split-input-file --verify-diagnostics 668ae0d78SAlex Zinenko// ****************************** IMPORTANT NOTE ****************************** 768ae0d78SAlex Zinenko// 868ae0d78SAlex Zinenko// If you are changing this file, you may also need to change 968ae0d78SAlex Zinenko// mlir/docs/Tutorials/Transform accordingly. 1068ae0d78SAlex Zinenko// 1168ae0d78SAlex Zinenko// **************************************************************************** 1268ae0d78SAlex Zinenko 1368ae0d78SAlex Zinenko// Original function to optimize. 1468ae0d78SAlex Zinenkofunc.func @fc_relu(%lhs: tensor<512x512xf32>, %rhs: tensor<512x512xf32>, 1568ae0d78SAlex Zinenko %bias: tensor<512x512xf32>, %output: tensor<512x512xf32>) 1668ae0d78SAlex Zinenko -> tensor<512x512xf32> { 1768ae0d78SAlex Zinenko // Matrix-matrix multiplication. 1868ae0d78SAlex Zinenko 1968ae0d78SAlex Zinenko // expected-note @below {{nested payload op}} 2068ae0d78SAlex Zinenko %matmul = linalg.matmul ins(%lhs, %rhs: tensor<512x512xf32>, tensor<512x512xf32>) 2168ae0d78SAlex Zinenko outs(%output: tensor<512x512xf32>) -> tensor<512x512xf32> 2268ae0d78SAlex Zinenko 2368ae0d78SAlex Zinenko // Elementwise addition. 2468ae0d78SAlex Zinenko 2568ae0d78SAlex Zinenko // expected-note @below {{ancestor payload op}} 2668ae0d78SAlex Zinenko %biased = linalg.elemwise_binary { fun = #linalg.binary_fn<add> } 2768ae0d78SAlex Zinenko ins(%matmul, %bias : tensor<512x512xf32>, tensor<512x512xf32>) 2868ae0d78SAlex Zinenko outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32> 2968ae0d78SAlex Zinenko 3068ae0d78SAlex Zinenko // Elementwise max with 0 (ReLU). 3168ae0d78SAlex Zinenko %c0f = arith.constant 0.0 : f32 3268ae0d78SAlex Zinenko %relued = linalg.elemwise_binary { fun = #linalg.binary_fn<max_signed> } 3368ae0d78SAlex Zinenko ins(%biased, %c0f : tensor<512x512xf32>, f32) 3468ae0d78SAlex Zinenko outs(%output : tensor<512x512xf32>) -> tensor<512x512xf32> 3568ae0d78SAlex Zinenko func.return %relued : tensor<512x512xf32> 3668ae0d78SAlex Zinenko} 3768ae0d78SAlex Zinenko 3868ae0d78SAlex Zinenko// Declaration of the "microkernel" function that we will be targeting. 3968ae0d78SAlex Zinenkofunc.func private @microkernel( 4068ae0d78SAlex Zinenko %lhs: tensor<4x512xf32>, 4168ae0d78SAlex Zinenko %rhs: tensor<512x4xf32>, 4268ae0d78SAlex Zinenko %bias: tensor<4x4xf32>, 4368ae0d78SAlex Zinenko %init: tensor<4x4xf32>, 4468ae0d78SAlex Zinenko %output: tensor<4x4xf32>) -> tensor<4x4xf32> 4568ae0d78SAlex Zinenko 46*b33b91a2SOleksandr "Alex" Zinenkomodule attributes {transform.with_named_sequence} { 47*b33b91a2SOleksandr "Alex" Zinenko transform.named_sequence @__transform_main( 48*b33b91a2SOleksandr "Alex" Zinenko %arg0: !transform.any_op, 4968ae0d78SAlex Zinenko %arg1: !transform.op<"linalg.matmul">, 50*b33b91a2SOleksandr "Alex" Zinenko %arg2: !transform.op<"linalg.elemwise_binary">) { 5168ae0d78SAlex Zinenko // Since the %arg2 handle is associated with both elementwise operations, 5268ae0d78SAlex Zinenko // we need to split it into two handles so we can target only the second 5368ae0d78SAlex Zinenko // elementwise operation. 5468ae0d78SAlex Zinenko %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elemwise_binary">) 5568ae0d78SAlex Zinenko -> (!transform.any_op, !transform.any_op) 5668ae0d78SAlex Zinenko 5768ae0d78SAlex Zinenko // The actual tiling transformation takes tile sizes as attributes. It produces a 5868ae0d78SAlex Zinenko // handle to the loop generated during tiling. 5996ff0255SOleksandr "Alex" Zinenko %tiled, %loop = transform.structured.tile_using_forall %max tile_sizes [8, 32] 6068ae0d78SAlex Zinenko : (!transform.any_op) -> (!transform.any_op, !transform.any_op) 6168ae0d78SAlex Zinenko 6268ae0d78SAlex Zinenko // We can now fuse the other operations into the loop. Here, we fuse 6368ae0d78SAlex Zinenko // operations one-by-one. This requires the operation that is being fused 6468ae0d78SAlex Zinenko // to define the value used within the loop, so the order of such fusions 6568ae0d78SAlex Zinenko // is important. We could also use "transform.merge_handles" to obtain 6668ae0d78SAlex Zinenko // a single handle to all operations and give it to `fuse_into_containing_op` 6768ae0d78SAlex Zinenko // that would take care of the ordering in this case. 6868ae0d78SAlex Zinenko %add_fused, %loop2 = transform.structured.fuse_into_containing_op %add into %loop 6968ae0d78SAlex Zinenko : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) 7068ae0d78SAlex Zinenko %matmul_fused, %loop3 = transform.structured.fuse_into_containing_op %arg1 into %loop2 7168ae0d78SAlex Zinenko : (!transform.op<"linalg.matmul">, !transform.any_op) -> (!transform.any_op, !transform.any_op) 7268ae0d78SAlex Zinenko 7368ae0d78SAlex Zinenko // Tile again to get the desired size. Note that this time this tiles the 7468ae0d78SAlex Zinenko // "add" operation and fuses matmul into the loop, but doesn't affect the 7568ae0d78SAlex Zinenko // "max" operation. This illustrates the precise targeting with the transform 7668ae0d78SAlex Zinenko // dialect. Otherwise, it is difficult to differentiate "add" and "max", both 7768ae0d78SAlex Zinenko // of which having the same kind. 7896ff0255SOleksandr "Alex" Zinenko %tiled_second, %loop_second = transform.structured.tile_using_forall %add_fused tile_sizes [4, 4] 7968ae0d78SAlex Zinenko : (!transform.any_op) -> (!transform.any_op, !transform.any_op) 8068ae0d78SAlex Zinenko %matmul_fused_2, %loop_second_2 = 8168ae0d78SAlex Zinenko transform.structured.fuse_into_containing_op %matmul_fused into %loop_second 8268ae0d78SAlex Zinenko : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) 8368ae0d78SAlex Zinenko 8468ae0d78SAlex Zinenko // Since outlining is currently only implemented for region-holding operations 8568ae0d78SAlex Zinenko // such as loops, use tiling to size 1 to materialize the outer loop that is 8668ae0d78SAlex Zinenko // going to be outlined. 8796ff0255SOleksandr "Alex" Zinenko %_0, %loop_third = transform.structured.tile_using_forall %tiled_second tile_sizes [1] 8868ae0d78SAlex Zinenko : (!transform.any_op) -> (!transform.any_op, !transform.any_op) 8968ae0d78SAlex Zinenko // expected-note @below {{handle to invalidated ops}} 9068ae0d78SAlex Zinenko %f, %outline_target = transform.structured.fuse_into_containing_op %matmul_fused_2 into %loop_third 9168ae0d78SAlex Zinenko : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) 9268ae0d78SAlex Zinenko 9368ae0d78SAlex Zinenko // expected-note @below {{invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them}} 9468ae0d78SAlex Zinenko %func, %call = transform.loop.outline %outline_target {func_name = "outlined"} 9568ae0d78SAlex Zinenko : (!transform.any_op) -> (!transform.any_op, !transform.op<"func.call">) 9668ae0d78SAlex Zinenko 9768ae0d78SAlex Zinenko // expected-error @below {{uses a handle invalidated by a previously executed transform op}} 982798b72aSOleksandr "Alex" Zinenko transform.debug.emit_remark_at %f, "fused" : !transform.any_op 9968ae0d78SAlex Zinenko 10068ae0d78SAlex Zinenko transform.yield 10168ae0d78SAlex Zinenko } 102*b33b91a2SOleksandr "Alex" Zinenko} 103