xref: /llvm-project/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir (revision e7328a9eb22307d80f86f668a75c2b082ee8636e)
1// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-erase-unused-operands-and-results | FileCheck %s
2// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-erase-unnecessary-inputs | FileCheck %s --check-prefix=CHECK-INPUT
3
4// CHECK-LABEL: func @remove_deadargs_generic_basic
5//  CHECK-SAME: (%[[ARG0:.*]]: tensor<?xf32>) -> tensor<?xf32> {
6//       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
7//  CHECK-SAME: ins(%[[ARG0]] : tensor<?xf32>)
8//  CHECK-SAME: outs({{.*}} : tensor<?xf32>) {
9#map0 = affine_map<(d0) -> (d0)>
10func.func @remove_deadargs_generic_basic(%arg0: tensor<?xf32>) -> (tensor<?xf32>) {
11  %c0 = arith.constant 0 : index
12  %cst = arith.constant 7.0 : f32
13  %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
14  %1 = tensor.empty(%0) : tensor<?xf32>
15  %2 = tensor.empty(%0) : tensor<?xf32>
16  %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor<?xf32>, tensor<?xf32>) outs (%2:tensor<?xf32>) {
17  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
18    %4 = arith.addf  %arg1, %cst : f32
19        linalg.yield %4 : f32
20  } -> tensor<?xf32>
21  return %3 : tensor<?xf32>
22}
23
24// -----
25
26// CHECK-LABEL: func @remove_deadargs_generic_mixedaccess
27//       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
28//   CHECK-NOT: ins
29//  CHECK-SAME: outs({{.*}} : tensor<?x?xf32>) {
30#map0 = affine_map<(d0, d1) -> (d0, d1)>
31#map1 = affine_map<(d0, d1) -> (d1, d0)>
32func.func @remove_deadargs_generic_mixedaccess(%arg0: tensor<?x?xf32>) -> (tensor<?x?xf32>) {
33  %c0 = arith.constant 0 : index
34  %c1 = arith.constant 0 : index
35  %cst1 = arith.constant 7.0 : f32
36  %cst2 = arith.constant 6.0 : f32
37  %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
38  %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
39  %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
40  %3 = tensor.empty(%1, %0) : tensor<?x?xf32>
41  %4 = tensor.empty(%0, %1) : tensor<?x?xf32>
42  %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%4:tensor<?x?xf32>) {
43  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
44    %6 = arith.divf  %cst1, %cst2 : f32
45        linalg.yield %6 : f32
46  } -> tensor<?x?xf32>
47  return %5 : tensor<?x?xf32>
48}
49
50// -----
51
52// Test case: Most basic case. Adding a vector to itself.
53
54#map = affine_map<(d0) -> (d0)>
55
56// CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0)>
57// CHECK-LABEL: @basic
58func.func @basic(%arg0: tensor<?xf32>) -> tensor<?xf32> {
59  // CHECK: linalg.generic{{.*}}[#[[$MAP]], #[[$MAP]]]
60  // CHECK:   attrs =  {someattr}
61  // CHECK:   ^bb0(%[[BBARG:.*]]: f32, %{{.*}}: f32):
62  // CHECK:     arith.addf %[[BBARG]], %[[BBARG]]
63  %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]}
64     ins(%arg0, %arg0 : tensor<?xf32>, tensor<?xf32>)
65    outs(%arg0 : tensor<?xf32>) attrs = {someattr} {
66  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
67    %1 = arith.addf %arg1, %arg2 : f32
68    linalg.yield %1 : f32
69  } -> tensor<?xf32>
70  return %0 : tensor<?xf32>
71}
72
73// -----
74
75// Test case: Different indexing maps mean that args are not redundant, despite
76// being the same Value.
77
78#map0 = affine_map<(d0, d1) -> (d0, d1)>
79#map1 = affine_map<(d0, d1) -> (d1, d0)>
80
81// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
82// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1, d0)>
83// CHECK-LABEL: @distinct_affine_maps
84func.func @distinct_affine_maps(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
85  // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]]
86  %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
87     ins(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
88    outs(%arg0 : tensor<?x?xf32>) {
89  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
90    %1 = arith.addf %arg1, %arg2 : f32
91    linalg.yield %1 : f32
92  } -> tensor<?x?xf32>
93  return %0 : tensor<?x?xf32>
94}
95
96// -----
97
98// Test case: Check rewriting mechanics for mixed redundant and
99// non-redundant args.
100
101#map0 = affine_map<(d0, d1) -> (d0, d1)>
102#map1 = affine_map<(d0, d1) -> (d1, d0)>
103
104// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
105// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1, d0)>
106// CHECK-LABEL: @mixed_redundant_non_redundant
107func.func @mixed_redundant_non_redundant(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> {
108  // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]]
109  // CHECK:   ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32):
110  // CHECK:     "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]])
111  %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
112     ins(%arg0, %arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
113    outs(%arg0 : tensor<?x?xf32>) {
114  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
115    %1 = "test.elementwise_mappable"(%arg1, %arg2, %arg3) : (f32, f32, f32) -> f32
116    linalg.yield %1 : f32
117  } -> tensor<?x?xf32>
118  return %0 : tensor<?x?xf32>
119}
120
121// -----
122
123// Test case: Check rewriting mechanics for multiple different redundant args.
124
125#map = affine_map<(d0) -> (d0)>
126
127// CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0)>
128// CHECK-LABEL: @multiple_different_redundant_args
129func.func @multiple_different_redundant_args(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
130  // CHECK: linalg.generic{{.*}}[#[[$MAP]], #[[$MAP]], #[[$MAP]]]
131  // CHECK:   ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32):
132  // CHECK:     "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]], %[[BBARG1]])
133  %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]}
134     ins(%arg0, %arg1, %arg0, %arg1 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
135    outs(%arg0 : tensor<?xf32>) {
136  ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
137    %1 = "test.elementwise_mappable"(%arg2, %arg3, %arg4, %arg5) : (f32, f32, f32, f32) -> f32
138    linalg.yield %1 : f32
139  } -> tensor<?xf32>
140  return %0 : tensor<?xf32>
141}
142
143// -----
144
145// Drop dead result.
146
147#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
148#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
149#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
150#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
151#map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
152func.func @drop_dead_results(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
153  %0:4 = linalg.generic {
154      indexing_maps = [#map0, #map1, #map2, #map3, #map4],
155      iterator_types = ["parallel", "parallel", "parallel"]}
156      ins(%arg0 : tensor<?x?x?xf32>)
157      outs(%arg0, %arg0, %arg0, %arg0
158          : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
159    ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
160      %1 = arith.addf %b0, %b0: f32
161      linalg.yield %1, %1, %1, %1 : f32, f32, f32, f32
162    } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>)
163  return %0#0, %0#2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
164}
165//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
166//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
167//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
168//      CHECK: func @drop_dead_results(
169// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
170//      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
171// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
172// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
173//      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
174
175// -----
176
177// Current argmax lowering to `linalg.generic`. Cannot drop the
178// first return even though it isnt used since it has an internal
179// use.
180#map0 = affine_map<(d0) -> (d0)>
181#map1 = affine_map<(d0) -> ()>
182func.func @argmax_lowering(%arg0 : tensor<?xf32>) -> tensor<i32> {
183  %init0 = tensor.empty() : tensor<f32>
184  %init1 = tensor.empty() : tensor<i32>
185  %0:2 = linalg.generic {
186    indexing_maps = [#map0, #map1, #map1],
187    iterator_types = ["reduction"]}
188    ins(%arg0 : tensor<?xf32>)
189    outs(%init0, %init1 : tensor<f32>, tensor<i32>) {
190  ^bb0(%b0: f32, %b1: f32, %b2: i32):
191    %8 = linalg.index 0 : index
192    %9 = arith.index_cast %8 : index to i32
193    %10 = arith.cmpf oge, %b0, %b1 : f32
194    %11 = arith.select %10, %b0, %b1 : f32
195    %12 = arith.cmpf oeq, %b0, %b1 : f32
196    %13 = arith.minsi %9, %b2 : i32
197    %14 = arith.select %10, %9, %b2 : i32
198    %15 = arith.select %12, %13, %14 : i32
199    linalg.yield %11, %15 : f32, i32
200  } -> (tensor<f32>, tensor<i32>)
201  return %0#1 : tensor<i32>
202}
203//      CHECK: func @argmax_lowering(
204// CHECK-SAME:     %[[ARG0:.+]]: tensor<?xf32>
205//  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<f32>
206//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<i32>
207//      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
208// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
209//      CHECK:   return %[[GENERIC]]#1
210
211// -----
212
213// Do not remove operand needed for loop dim.
214func.func @loop_dim_operand(%arg0 : tensor<?xf32>) -> tensor<i32> {
215  %cst = arith.constant 0 : i32
216  %init = tensor.empty() : tensor<i32>
217  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
218  %0 = linalg.generic {
219      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
220      iterator_types = ["reduction"]}
221      ins(%arg0 : tensor<?xf32>) outs(%fill : tensor<i32>) {
222    ^bb0(%b0: f32, %b1: i32):
223      %1 = linalg.index 0 : index
224      %2 = arith.index_cast %1 : index to i32
225      %3 = arith.addi %b1, %2 : i32
226      linalg.yield %3 : i32
227    } -> tensor<i32>
228  return %0 : tensor<i32>
229}
230//      CHECK: func @loop_dim_operand(
231// CHECK-SAME:     %[[ARG0:.+]]: tensor<?xf32>
232//      CHECK:   linalg.generic
233// CHECK-SAME:       ins(%[[ARG0]] :
234
235// -----
236
237// Do not remove outs operand needed for loop bound computation.
238func.func @loop_dim_outs_operand(%arg0 : index) -> tensor<i32> {
239  %cst = arith.constant 0 : i32
240  %init1 = tensor.empty(%arg0) : tensor<?xi32>
241  %init = tensor.empty() : tensor<i32>
242  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
243  %0:2 = linalg.generic {
244      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
245      iterator_types = ["parallel"]}
246      outs(%init1, %fill : tensor<?xi32>, tensor<i32>) {
247    ^bb0(%b0: i32, %b1: i32):
248      %1 = linalg.index 0 : index
249      %2 = arith.index_cast %1 : index to i32
250      %3 = arith.addi %b1, %2 : i32
251      linalg.yield %2, %3 : i32, i32
252    } -> (tensor<?xi32>, tensor<i32>)
253  return %0#1 : tensor<i32>
254}
255//      CHECK: func @loop_dim_outs_operand(
256// CHECK-SAME:     %[[ARG0:.+]]: index
257//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ARG0]])
258//      CHECK:   linalg.generic
259// CHECK-SAME:       outs(%[[INIT]]
260
261// -----
262
263#map0 = affine_map<(d0, d1) -> (d0, d1)>
264#map1 = affine_map<(d0, d1) -> (d1, d0)>
265#map2 = affine_map<(d0, d1) -> (d0)>
266#map3 = affine_map<(d0, d1) -> (d1)>
267func.func @multiple_redundant_args(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?xi32>,
268    %arg2 : tensor<?xi32>, %arg3 : tensor<?x?xi32>, %arg4 : tensor<?xi32>) -> tensor<?xi32> {
269  %0 = linalg.generic {
270      indexing_maps = [#map3, #map0, #map0, #map2, #map1, #map1, #map2],
271      iterator_types = ["parallel", "reduction"]}
272      ins(%arg4, %arg0, %arg0, %arg1, %arg3, %arg3
273          : tensor<?xi32>, tensor<?x?xi32>, tensor<?x?xi32>, tensor<?xi32>, tensor<?x?xi32>, tensor<?x?xi32>)
274      outs(%arg2 : tensor<?xi32>) {
275    ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32, %b4 : i32, %b5 : i32, %b6 : i32):
276      %1 = arith.addi %b0, %b1 : i32
277      %2 = arith.addi %1, %b2 : i32
278      %3 = arith.addi %2, %b3 : i32
279      %4 = arith.addi %3, %b4 : i32
280      %5 = arith.addi %4, %b5 : i32
281      %6 = arith.addi %5, %b6 : i32
282      linalg.yield %6 : i32
283    } -> tensor<?xi32>
284  return %0 : tensor<?xi32>
285}
286//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d1)>
287//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
288//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0)>
289//  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
290//      CHECK: func @multiple_redundant_args(
291// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xi32>
292// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xi32>
293// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xi32>
294// CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?xi32>
295// CHECK-SAME:     %[[ARG4:[a-zA-Z0-9_]+]]: tensor<?xi32>)
296//      CHECK:   %[[RETURN:.+]] = linalg.generic
297// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
298// CHECK-SAME:       iterator_types = ["parallel", "reduction"]
299// CHECK-SAME:       ins(%[[ARG4]], %[[ARG0]], %[[ARG1]], %[[ARG3]] :
300// CHECK-SAME:       outs(%[[ARG2]] :
301//      CHECK:   ^{{.+}}(%[[B0:[a-zA-Z0-9]+]]: i32
302// CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: i32
303// CHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: i32
304// CHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: i32
305// CHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: i32)
306//      CHECK:     %[[T0:.+]] = arith.addi %[[B0]], %[[B1]]
307//      CHECK:     %[[T1:.+]] = arith.addi %[[T0]], %[[B1]]
308//      CHECK:     %[[T2:.+]] = arith.addi %[[T1]], %[[B2]]
309//      CHECK:     %[[T3:.+]] = arith.addi %[[T2]], %[[B3]]
310//      CHECK:     %[[T4:.+]] = arith.addi %[[T3]], %[[B3]]
311//      CHECK:     %[[T5:.+]] = arith.addi %[[T4]], %[[B4]]
312//      CHECK:     linalg.yield %[[T5]]
313//      CHECK:  return %[[RETURN]]
314
315// -----
316
317// Drop redundant results.
318
319#map = affine_map<(d0, d1) -> (d0, d1)>
320func.func @drop_redundant_results(
321    %arg0 : tensor<?x?xf32>) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
322  %0:2 = linalg.generic {
323    indexing_maps = [#map, #map, #map],
324    iterator_types = ["parallel", "parallel"]}
325    ins(%arg0 : tensor<?x?xf32>)
326    outs(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
327    ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
328      %1 = arith.addf %b0, %b0 : f32
329      linalg.yield %1, %1 : f32, f32
330    } -> (tensor<?x?xf32>, tensor<?x?xf32>)
331  return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32>
332}
333//      CHECK: func @drop_redundant_results
334// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?xf32>
335//      CHECK:   %[[GENERIC:.+]] = linalg.generic
336// CHECK-SAME:       outs(%[[ARG0]] :
337//      CHECK:   return %[[GENERIC]]
338
339// -----
340
341// Drop dead result with different tensors.
342
343#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
344#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
345#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
346#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
347#map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
348func.func @drop_dead_results_with_different_tensors(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
349  %c0 = arith.constant 0 : index
350  %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
351  %c1 = arith.constant 1 : index
352  %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
353  %c2 = arith.constant 2 : index
354  %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
355  %init0 = tensor.empty(%d0, %d1, %d2) : tensor<?x?x?xf32>
356  %0:4 = linalg.generic {
357      indexing_maps = [#map0, #map1, #map2, #map3, #map4],
358      iterator_types = ["parallel", "parallel", "parallel"]}
359      ins(%arg0 : tensor<?x?x?xf32>)
360      outs(%arg0, %arg0, %init0, %init0
361          : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
362    ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
363      linalg.yield %b0, %b0, %b3, %b4 : f32, f32, f32, f32
364    } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>)
365  return %0#0, %0#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
366}
367
368//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
369//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
370//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
371//      CHECK: func @drop_dead_results_with_different_tensors(
372// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
373//      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
374// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
375// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
376//      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
377
378// -----
379
380// Drop dead result with unused cycles.
381
382#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
383#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
384#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
385#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
386#map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
387func.func @drop_dead_results_with_unused_cycles(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
388  %c0 = arith.constant 0 : index
389  %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
390  %c1 = arith.constant 1 : index
391  %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
392  %c2 = arith.constant 2 : index
393  %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
394  %init0 = tensor.empty(%d0, %d1, %d2) : tensor<?x?x?xf32>
395  %0:4 = linalg.generic {
396      indexing_maps = [#map0, #map1, #map2, #map3, #map4],
397      iterator_types = ["parallel", "parallel", "parallel"]}
398      ins(%arg0 : tensor<?x?x?xf32>)
399      outs(%arg0, %arg0, %init0, %init0
400          : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
401    ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
402      %1 = arith.addf %b0, %b0: f32
403      %2 = arith.addf %b0, %b3: f32
404      %3 = arith.addf %b0, %b4: f32
405      linalg.yield %1, %1, %2, %3 : f32, f32, f32, f32
406    } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>)
407  return %0#0, %0#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
408}
409
410//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
411//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
412//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
413//      CHECK: func @drop_dead_results_with_unused_cycles(
414// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
415//      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
416// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
417// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
418//      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
419
420// -----
421
422// Drop only the results not used by others.
423
424#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
425#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
426#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
427#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
428func.func @drop_only_the_results_not_used_by_others(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) {
429  %c0 = arith.constant 0 : index
430  %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
431  %c1 = arith.constant 1 : index
432  %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
433  %c2 = arith.constant 2 : index
434  %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
435  %init0 = tensor.empty(%d0, %d1, %d2) : tensor<?x?x?xf32>
436  %0:3 = linalg.generic {
437      indexing_maps = [#map0, #map1, #map2, #map3],
438      iterator_types = ["parallel", "parallel", "parallel"]}
439      ins(%arg0 : tensor<?x?x?xf32>)
440      outs(%arg0, %init0, %init0
441          : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
442    ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
443      linalg.yield %b2, %b1, %b3 : f32, f32, f32
444    } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>)
445  return %0#0 : tensor<?x?x?xf32>
446}
447
448//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
449//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
450//      CHECK: func @drop_only_the_results_not_used_by_others(
451// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
452//      CHECK:   %[[INIT:.+]] = tensor.empty
453//      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
454// CHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP2]]]
455// CHECK-SAME:       outs(%[[ARG0]], %[[INIT]] :
456//      CHECK:   return %[[GENERIC]]#0
457
458// -----
459
460// Drop only the cycles not used by others.
461
462#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
463#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
464#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
465#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)>
466func.func @drop_only_the_cycles_not_used_by_others(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) {
467  %c0 = arith.constant 0 : index
468  %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
469  %c1 = arith.constant 1 : index
470  %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
471  %c2 = arith.constant 2 : index
472  %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
473  %init0 = tensor.empty(%d0, %d1, %d2) : tensor<?x?x?xf32>
474  %0:3 = linalg.generic {
475      indexing_maps = [#map0, #map1, #map2, #map3],
476      iterator_types = ["parallel", "parallel", "parallel"]}
477      ins(%arg0 : tensor<?x?x?xf32>)
478      outs(%arg0, %init0, %init0
479          : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
480    ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
481      %1 = arith.addf %b1, %b2: f32
482      %2 = arith.addf %b1, %b3 : f32
483      linalg.yield %1, %b1, %2 : f32, f32, f32
484    } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>)
485  return %0#0 : tensor<?x?x?xf32>
486}
487
488//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
489//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)>
490//      CHECK: func @drop_only_the_cycles_not_used_by_others(
491// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
492//      CHECK:   %[[INIT:.+]] = tensor.empty
493//      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
494// CHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP2]]]
495// CHECK-SAME:       outs(%[[ARG0]], %[[INIT]] :
496//      CHECK:   return %[[GENERIC]]#0
497
498
499// -----
500
501// CHECK-INPUT-LABEL: func @remove_unnecessary_input(
502//  CHECK-INPUT-SAME:     %[[a:.*]]: tensor<?xf32>, %[[b:.*]]: tensor<?xf32>
503#map = affine_map<(d0) -> (d0)>
504func.func @remove_unnecessary_input(%a: tensor<?xf32>, %b: tensor<?xf32>)
505    -> tensor<?xf32>
506{
507  //      CHECK-INPUT: %[[result:.*]] = linalg.generic {indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel"]}
508  // CHECK-INPUT-SAME:     ins(%[[a]] : tensor<?xf32>) outs(%[[b]] : tensor<?xf32>) {
509  //      CHECK-INPUT: ^bb0(%[[in:.*]]: f32, %[[out:.*]]: f32):
510  //      CHECK-INPUT:   %[[add:.*]] = arith.addf %[[in]], %[[out]]
511  //      CHECK-INPUT:   linalg.yield %[[add]]
512  //      CHECK-INPUT: } -> tensor<?xf32>
513  //      CHECK-INPUT: return %[[result]]
514  %0 = linalg.generic
515    {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]}
516    ins(%a, %b : tensor<?xf32>, tensor<?xf32>) outs(%b : tensor<?xf32>) {
517  ^bb0(%in: f32, %in_2: f32, %out: f32):
518    %16 = arith.addf %in, %in_2 : f32
519    linalg.yield %16 : f32
520  } -> tensor<?xf32>
521  return %0 : tensor<?xf32>
522}
523