1// RUN: mlir-opt %s --transform-interpreter --split-input-file | FileCheck %s 2 3// CHECK-LABEL: func @matmul_tensors 4func.func @matmul_tensors( 5 %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) 6 -> tensor<8x32xf32> { 7// CHECK-NOT: linalg 8// CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> 9// CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> 10 %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) 11 outs(%arg2: tensor<8x32xf32>) 12 -> tensor<8x32xf32> 13 return %0 : tensor<8x32xf32> 14} 15 16module attributes {transform.with_named_sequence} { 17 transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.consumed}) { 18 %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op 19 %1, %loops:3 = transform.structured.tile_using_for %0 tile_sizes [8, 4, 2] 20 : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) 21 %2 = transform.get_parent_op %1 {isolated_from_above} : (!transform.any_op) -> !transform.any_op 22 transform.structured.vectorize_children_and_apply_patterns %2 : (!transform.any_op) -> !transform.any_op 23 %b = transform.bufferization.one_shot_bufferize 24 layout{IdentityLayoutMap} %module_op 25 {bufferize_function_boundaries = true, allow_return_allocs = true} 26 : (!transform.any_op) -> !transform.any_op 27 28 %f = transform.structured.match ops{["func.func"]} in %b 29 : (!transform.any_op) -> !transform.any_op 30 31 // TODO: group these lower-level controls into various properly named vector 32 // lowering TD macros. 33 transform.apply_patterns to %f { 34 transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct" 35 } : !transform.any_op 36 37 transform.apply_patterns to %f { 38 transform.apply_patterns.vector.transfer_permutation_patterns 39 } : !transform.any_op 40 41 transform.apply_patterns to %f { 42 transform.apply_patterns.vector.lower_multi_reduction lowering_strategy = "innerparallel" 43 } : !transform.any_op 44 45 transform.apply_patterns to %f { 46 transform.apply_patterns.vector.split_transfer_full_partial split_transfer_strategy = "linalg-copy" 47 } : !transform.any_op 48 49 transform.apply_patterns to %f { 50 transform.apply_patterns.vector.transfer_to_scf max_transfer_rank = 1 full_unroll = true 51 } : !transform.any_op 52 53 transform.apply_patterns to %f { 54 transform.apply_patterns.vector.lower_transfer max_transfer_rank = 1 55 } : !transform.any_op 56 57 transform.apply_patterns to %f { 58 transform.apply_patterns.vector.lower_shape_cast 59 } : !transform.any_op 60 61 transform.apply_patterns to %f { 62 transform.apply_patterns.vector.lower_transpose lowering_strategy = "shuffle_1d" 63 } : !transform.any_op 64 transform.yield 65 } 66} 67 68// ----- 69 70// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)> 71// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)> 72// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)> 73// CHECK-LABEL: func.func @fold_arith_extf_into_contract 74// CHECK-SAME: (%[[ARG0:.*]]: vector<64x64xf16>, %[[ARG1:.*]]: vector<64x64xf16>, %[[ARG2:.*]]: vector<64x64xf32>) 75// CHECK-NEXT: %[[R:.+]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]], 76// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} 77// CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] : vector<64x64xf16>, vector<64x64xf16> into vector<64x64xf32> 78// CHECK-NEXT: return %[[R]] : vector<64x64xf32> 79func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { 80 %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> 81 %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> 82 %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> 83 return %result : vector<64x64xf32> 84} 85 86module attributes {transform.with_named_sequence} { 87 transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { 88 %func = transform.structured.match ops{["func.func"]} in %module_op : (!transform.any_op) -> !transform.any_op 89 transform.apply_patterns to %func { 90 transform.apply_patterns.vector.fold_arith_extension 91 } : !transform.any_op 92 transform.yield 93 } 94} 95 96// ----- 97 98// CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 99// CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, 100// CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { 101// CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> 102// CHECK: return %[[RES]] : vector<[4]x[4]xi32> 103func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { 104 %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> 105 %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> 106 %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> 107 %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> 108 return %mul: vector<[4]x[4]xi32> 109} 110 111// CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 112// CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, 113// CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { 114// CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> 115// CHECK: return %[[RES]] : vector<8x16xf32> 116func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { 117 %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> 118 %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> 119 %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> 120 %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> 121 return %mul: vector<8x16xf32> 122} 123 124module attributes {transform.with_named_sequence} { 125 transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) { 126 %func = transform.structured.match ops{["func.func"]} in %module_op : (!transform.any_op) -> !transform.any_op 127 transform.apply_patterns to %func { 128 transform.apply_patterns.vector.elementwise_to_vector 129 } : !transform.any_op 130 transform.yield 131 } 132} 133