1// RUN: mlir-opt %s -split-input-file --linalg-specialize-generic-ops | FileCheck %s 2 3#umap = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 4func.func @unary_op_exp(%A: tensor<?x?x?xf32>, %Out: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> { 5 %0 = linalg.generic 6 {indexing_maps = [#umap, #umap], iterator_types = ["parallel", "parallel","parallel"]} 7 ins(%A : tensor<?x?x?xf32>) outs(%Out : tensor<?x?x?xf32>) { 8 ^bb0(%in: f32, %out: f32): 9 %1 = math.exp %in : f32 10 linalg.yield %1 : f32 11 } -> tensor<?x?x?xf32> 12 return %0 : tensor<?x?x?xf32> 13} 14 15// CHECK-LABEL: unary_op_exp 16// CHECK-SAME: %[[A:.+]]: tensor<?x?x?xf32>, %[[Out:.+]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> 17// CHECK-NOT: linalg.generic 18// CHECK: linalg.exp ins(%[[A]] : tensor<?x?x?xf32>) outs(%[[Out]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32> 19 20// ----- 21 22#map = affine_map<(d0, d1) -> (d0, d1)> 23func.func @binary_op_div(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> { 24 %0 = linalg.generic 25 {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} 26 ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) { 27 ^bb0(%in: f32, %in_0: f32, %out: f32): 28 %1 = arith.divf %in, %in_0 : f32 29 linalg.yield %1 : f32 30 } -> tensor<?x?xf32> 31 return %0 : tensor<?x?xf32> 32} 33 34// CHECK-LABEL: binary_op_div 35// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32> 36// CHECK-NOT: linalg.generic 37// CHECK: linalg.div ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32> 38 39// ----- 40 41#map = affine_map<(d0, d1, d2) -> (d0, d2)> 42#map1 = affine_map<(d0, d1, d2) -> (d2, d1)> 43#map2 = affine_map<(d0, d1, d2) -> (d0, d1)> 44func.func @op_matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> { 45 %0 = linalg.generic 46 {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} 47 ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) { 48 ^bb0(%in: f32, %in_0: f32, %out: f32): 49 %1 = arith.mulf %in, %in_0 : f32 50 %2 = arith.addf %out, %1 : f32 51 linalg.yield %2 : f32 52 } -> tensor<?x?xf32> 53 return %0 : tensor<?x?xf32> 54} 55 56// CHECK-LABEL: op_matmul 57// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32> 58// CHECK-NOT: linalg.generic 59// CHECK: linalg.matmul ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32> 60 61// ----- 62 63#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> 64#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> 65#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> 66func.func @op_batch_matmul(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>, %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> { 67 %0 = linalg.generic 68 {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} 69 ins(%A, %B : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%Out : tensor<2x16x16xf32>) { 70 ^bb0(%in: f32, %in_0: f32, %out: f32): 71 %1 = arith.mulf %in, %in_0 : f32 72 %2 = arith.addf %out, %1 : f32 73 linalg.yield %2 : f32 74 } -> tensor<2x16x16xf32> 75 return %0 : tensor<2x16x16xf32> 76} 77 78// CHECK-LABEL: op_batch_matmul 79// CHECK-SAME: %[[A:.+]]: tensor<2x16x8xf32>, %[[B:.+]]: tensor<2x8x16xf32>, %[[Out:.+]]: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> 80// CHECK-NOT: linalg.generic 81// CHECK: linalg.batch_matmul ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%[[Out]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32> 82 83// ----- 84 85// This is a multi-reduction linalg.generic and cannot be lifted to matrix multiply 86#mapA = affine_map<(m, n, k1, k2) -> (m, k1, k2)> 87#mapB = affine_map<(m, n, k1, k2) -> (k2, k1, n)> 88#mapC = affine_map<(m, n, k1, k2) -> (m, n)> 89func.func @negative_op_multi_reduction(%A: tensor<10x20x30xf32>, 90 %B: tensor<30x20x40xf32>, 91 %C: tensor<10x40xf32>) -> tensor<10x40xf32> { 92 %0 = linalg.generic 93 {indexing_maps = [#mapA, #mapB, #mapC], 94 iterator_types = ["parallel", "parallel", "reduction", "reduction"]} 95 ins(%A, %B : tensor<10x20x30xf32>, tensor<30x20x40xf32>) 96 outs(%C : tensor<10x40xf32>) { 97 ^bb0(%a: f32, %b: f32, %c: f32): 98 %1 = arith.mulf %a, %b : f32 99 %2 = arith.addf %c, %1 : f32 100 linalg.yield %2 : f32 101 } -> tensor<10x40xf32> 102 return %0 : tensor<10x40xf32> 103} 104 105// CHECK-LABEL: negative_op_multi_reduction 106// CHECK: linalg.generic 107 108// ----- 109 110// TODO: matvec 111#map = affine_map<(d0, d1) -> (d0, d1)> 112#map1 = affine_map<(d0, d1) -> (d1)> 113#map2 = affine_map<(d0, d1) -> (d0)> 114func.func @op_matvec(%A: tensor<?x?xf32>, %B: tensor<?xf32>, %Out: tensor<?xf32>) -> tensor<?xf32> { 115 %0 = linalg.generic 116 {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "reduction"]} 117 ins(%A, %B : tensor<?x?xf32>, tensor<?xf32>) outs(%Out : tensor<?xf32>) { 118 ^bb0(%in: f32, %in_0: f32, %out: f32): 119 %1 = arith.mulf %in, %in_0 : f32 120 %2 = arith.addf %out, %1 : f32 121 linalg.yield %2 : f32 122 } -> tensor<?xf32> 123 return %0 : tensor<?xf32> 124} 125// CHECK-LABEL: op_matvec 126// CHECK: linalg.generic 127