xref: /llvm-project/mlir/test/Dialect/Linalg/specialize-generic-ops.mlir (revision 3efac5c68ac3117e8488a7fa247e45951e52936f)
1// RUN: mlir-opt %s -split-input-file --linalg-specialize-generic-ops | FileCheck %s
2
3#umap = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
4func.func @unary_op_exp(%A: tensor<?x?x?xf32>, %Out: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
5  %0 = linalg.generic
6          {indexing_maps = [#umap, #umap], iterator_types = ["parallel", "parallel","parallel"]}
7          ins(%A : tensor<?x?x?xf32>) outs(%Out : tensor<?x?x?xf32>) {
8  ^bb0(%in: f32, %out: f32):
9    %1 = math.exp %in : f32
10    linalg.yield %1 : f32
11  } -> tensor<?x?x?xf32>
12  return %0 : tensor<?x?x?xf32>
13}
14
15// CHECK-LABEL: unary_op_exp
16// CHECK-SAME: %[[A:.+]]: tensor<?x?x?xf32>, %[[Out:.+]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
17// CHECK-NOT: linalg.generic
18// CHECK: linalg.exp ins(%[[A]] : tensor<?x?x?xf32>) outs(%[[Out]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
19
20// -----
21
22#map = affine_map<(d0, d1) -> (d0, d1)>
23func.func @binary_op_div(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
24  %0 = linalg.generic
25         {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]}
26         ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
27  ^bb0(%in: f32, %in_0: f32, %out: f32):
28    %1 = arith.divf %in, %in_0 : f32
29    linalg.yield %1 : f32
30  } -> tensor<?x?xf32>
31  return %0 : tensor<?x?xf32>
32}
33
34// CHECK-LABEL: binary_op_div
35// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>,  %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
36// CHECK-NOT: linalg.generic
37// CHECK: linalg.div ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
38
39// -----
40
41#map = affine_map<(d0, d1, d2) -> (d0, d2)>
42#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
43#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
44func.func @op_matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
45  %0 = linalg.generic
46         {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
47         ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
48   ^bb0(%in: f32, %in_0: f32, %out: f32):
49     %1 = arith.mulf %in, %in_0 : f32
50     %2 = arith.addf %out, %1 : f32
51     linalg.yield %2 : f32
52   } -> tensor<?x?xf32>
53   return %0 : tensor<?x?xf32>
54}
55
56// CHECK-LABEL: op_matmul
57// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>,  %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
58// CHECK-NOT: linalg.generic
59// CHECK: linalg.matmul ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
60
61// -----
62
63#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
64#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
65#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
66func.func @op_batch_matmul(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>, %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
67  %0 = linalg.generic
68           {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
69           ins(%A, %B : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%Out : tensor<2x16x16xf32>) {
70  ^bb0(%in: f32, %in_0: f32, %out: f32):
71    %1 = arith.mulf %in, %in_0 : f32
72    %2 = arith.addf %out, %1 : f32
73    linalg.yield %2 : f32
74  } -> tensor<2x16x16xf32>
75  return %0 : tensor<2x16x16xf32>
76}
77
78// CHECK-LABEL: op_batch_matmul
79// CHECK-SAME: %[[A:.+]]: tensor<2x16x8xf32>, %[[B:.+]]: tensor<2x8x16xf32>,  %[[Out:.+]]: tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
80// CHECK-NOT: linalg.generic
81// CHECK: linalg.batch_matmul ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%[[Out]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
82
83// -----
84
85// This is a multi-reduction linalg.generic and cannot be lifted to matrix multiply
86#mapA = affine_map<(m, n, k1, k2) -> (m, k1, k2)>
87#mapB = affine_map<(m, n, k1, k2) -> (k2, k1, n)>
88#mapC = affine_map<(m, n, k1, k2) -> (m, n)>
89func.func @negative_op_multi_reduction(%A: tensor<10x20x30xf32>,
90                                       %B: tensor<30x20x40xf32>,
91                                       %C: tensor<10x40xf32>) -> tensor<10x40xf32> {
92  %0 = linalg.generic
93           {indexing_maps = [#mapA, #mapB, #mapC],
94            iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
95  ins(%A, %B : tensor<10x20x30xf32>, tensor<30x20x40xf32>)
96  outs(%C : tensor<10x40xf32>) {
97  ^bb0(%a: f32, %b: f32, %c: f32):
98    %1 = arith.mulf %a, %b : f32
99    %2 = arith.addf %c, %1 : f32
100    linalg.yield %2 : f32
101  } -> tensor<10x40xf32>
102  return %0 : tensor<10x40xf32>
103}
104
105// CHECK-LABEL: negative_op_multi_reduction
106// CHECK: linalg.generic
107
108// -----
109
110// TODO: matvec
111#map = affine_map<(d0, d1) -> (d0, d1)>
112#map1 = affine_map<(d0, d1) -> (d1)>
113#map2 = affine_map<(d0, d1) -> (d0)>
114func.func @op_matvec(%A: tensor<?x?xf32>, %B: tensor<?xf32>, %Out: tensor<?xf32>) -> tensor<?xf32> {
115  %0 = linalg.generic
116          {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "reduction"]}
117          ins(%A, %B : tensor<?x?xf32>, tensor<?xf32>) outs(%Out : tensor<?xf32>) {
118      ^bb0(%in: f32, %in_0: f32, %out: f32):
119        %1 = arith.mulf %in, %in_0 : f32
120        %2 = arith.addf %out, %1 : f32
121        linalg.yield %2 : f32
122  } -> tensor<?xf32>
123  return %0 : tensor<?xf32>
124}
125// CHECK-LABEL: op_matvec
126// CHECK: linalg.generic
127