xref: /llvm-project/mlir/test/Dialect/Linalg/decompose-ops.mlir (revision 0a1569a400491e264060b8a6ff7b7f64e1865496)
1// RUN: mlir-opt -test-linalg-decompose-ops -cse -split-input-file %s | FileCheck %s
2// RUN: mlir-opt -test-linalg-decompose-ops=remove-dead-args-and-results -cse -split-input-file %s | FileCheck %s --check-prefix=CANONICALIZECHECK
3
4func.func @simple_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : tensor<?xf32>)
5    -> (tensor<?x?xf32>, tensor<?x?xf32>) {
6  %c0 = arith.constant 0 : index
7  %c1 = arith.constant 1 : index
8  %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
9  %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
10  %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
11  %init2 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
12  %result:2 = linalg.generic {
13    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>,
14                     affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>,
15                     affine_map<(d0, d1) -> (d0, d1)>],
16    iterator_types = ["parallel", "parallel"]}
17    ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?xf32>, tensor<?xf32>)
18    outs(%init1, %init2 : tensor<?x?xf32>, tensor<?x?xf32>) {
19    ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
20      %0 = arith.addf %b0, %b1 : f32
21      %1 = arith.mulf %0, %b2 : f32
22      linalg.yield %0, %1 : f32, f32
23    } -> (tensor<?x?xf32>, tensor<?x?xf32>)
24  return %result#0, %result#1 : tensor<?x?xf32>, tensor<?x?xf32>
25}
26//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
27//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0)>
28//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1)>
29//  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
30//      CHECK: func @simple_op(
31// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
32// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
33// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xf32>
34//  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
35//  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
36//  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
37//  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
38//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
39//  CHECK-DAG:   %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
40//  CHECK-DAG:   %[[GENERIC1:.+]]:3 = linalg.generic
41// CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP3]]]
42// CHECK-SAME:       ["parallel", "parallel"]
43// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] :
44// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT1]] :
45// CHECK-NEXT:   ^bb0(
46// CHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
47// CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
48// CHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: f32
49// CHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: f32
50// CHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
51// CHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32):
52// CHECK-NEXT:     %[[S0:.+]] = arith.addf %[[B0]], %[[B1]]
53// CHECK-NEXT:     linalg.yield %[[S0]], %{{[a-zA-Z0-9_]+}}, %[[S0]]
54//      CHECK:   %[[GENERIC2:.+]]:2 = linalg.generic
55// CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]]]
56// CHECK-SAME:       ["parallel", "parallel"]
57// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#2 :
58// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]] :
59// CHECK-NEXT:   ^bb0(
60// CHECK-SAME:       %[[B6:[a-zA-Z0-9_]+]]: f32
61// CHECK-SAME:       %[[B7:[a-zA-Z0-9_]+]]: f32
62// CHECK-SAME:       %[[B8:[a-zA-Z0-9_]+]]: f32
63// CHECK-SAME:       %[[B9:[a-zA-Z0-9_]+]]: f32
64// CHECK-SAME:       %[[B10:[a-zA-Z0-9_]+]]: f32
65// CHECK-SAME:       %[[B11:[a-zA-Z0-9_]+]]: f32):
66// CHECK-NEXT:     %[[S1:.+]] = arith.mulf %[[B9]], %[[B8]]
67// CHECK-NEXT:     linalg.yield %[[B9]], %[[S1]]
68//      CHECK:   return %[[GENERIC1]]#0, %[[GENERIC2]]#1
69
70// With cse + canonicalization
71
72//  CANONICALIZECHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
73//  CANONICALIZECHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0)>
74//  CANONICALIZECHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1, d0)>
75//  CANONICALIZECHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1)>
76//      CANONICALIZECHECK: func @simple_op(
77// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
78// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
79// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xf32>
80//  CANONICALIZECHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
81//  CANONICALIZECHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
82//  CANONICALIZECHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
83//  CANONICALIZECHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
84//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
85//  CANONICALIZECHECK-DAG:   %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
86//  CANONICALIZECHECK-DAG:   %[[GENERIC1:.+]] = linalg.generic
87// CANONICALIZECHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
88// CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
89// CANONICALIZECHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
90// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
91// CANONICALIZECHECK-NEXT:   ^bb0(
92// CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
93// CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
94// CANONICALIZECHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: f32):
95// CANONICALIZECHECK-NEXT:     %[[S0:.+]] = arith.addf %[[B0]], %[[B1]]
96// CANONICALIZECHECK-NEXT:     linalg.yield %[[S0]]
97//      CANONICALIZECHECK:   %[[GENERIC2:.+]] = linalg.generic
98// CANONICALIZECHECK-SAME:       [#[[MAP3]], #[[MAP2]], #[[MAP0]]]
99// CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
100// CANONICALIZECHECK-SAME:       ins(%[[ARG2]], %[[GENERIC1]] :
101// CANONICALIZECHECK-SAME:       outs(%[[INIT2]] :
102// CANONICALIZECHECK-NEXT:   ^bb0(
103// CANONICALIZECHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: f32
104// CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
105// CANONICALIZECHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32):
106// CANONICALIZECHECK-NEXT:     %[[S1:.+]] = arith.mulf %[[B4]], %[[B3]]
107// CANONICALIZECHECK-NEXT:     linalg.yield %[[S1]]
108//      CANONICALIZECHECK:   return %[[GENERIC1]], %[[GENERIC2]]
109
110
111// -----
112
113func.func @simple_op_permuted_outputs(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : tensor<?xf32>)
114    -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) {
115  %c0 = arith.constant 0 : index
116  %c1 = arith.constant 1 : index
117  %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
118  %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
119  %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
120  %init2 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
121  %result:3 = linalg.generic {
122    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>,
123                     affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>,
124                     affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
125    iterator_types = ["parallel", "parallel"]}
126    ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?xf32>, tensor<?xf32>)
127    outs(%init1, %init2, %init2 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) {
128    ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32, %b5 : f32) :
129      %0 = arith.addf %b0, %b1 : f32
130      %1 = arith.mulf %0, %b2 : f32
131      linalg.yield %0, %1, %0 : f32, f32, f32
132    } -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
133  return %result#0, %result#1, %result#2 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
134}
135//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
136//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0)>
137//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1)>
138//  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
139//      CHECK: func @simple_op_permuted_outputs(
140// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
141// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
142// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xf32>
143//  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
144//  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
145//  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
146//  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
147//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
148//  CHECK-DAG:   %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
149//  CHECK-DAG:   %[[GENERIC1:.+]]:4 = linalg.generic
150// CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP0]], #[[MAP3]]]
151// CHECK-SAME:       ["parallel", "parallel"]
152// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] :
153// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] :
154// CHECK-NEXT:   ^bb0(
155// CHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
156// CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
157// CHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: f32
158// CHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: f32
159// CHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
160// CHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32
161// CHECK-SAME:       %[[B6:[a-zA-Z0-9_]+]]: f32):
162// CHECK-NEXT:     %[[S0:.+]] = arith.addf %[[B0]], %[[B1]]
163// CHECK-NEXT:     linalg.yield %[[S0]], %{{[a-zA-Z0-9_]+}}, %[[S0]]
164//      CHECK:   %[[GENERIC2:.+]]:3 = linalg.generic
165// CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]], #[[MAP0]]]
166// CHECK-SAME:       ["parallel", "parallel"]
167// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#3 :
168// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT2]] :
169// CHECK-NEXT:   ^bb0(
170// CHECK-SAME:       %[[B7:[a-zA-Z0-9_]+]]: f32
171// CHECK-SAME:       %[[B8:[a-zA-Z0-9_]+]]: f32
172// CHECK-SAME:       %[[B9:[a-zA-Z0-9_]+]]: f32
173// CHECK-SAME:       %[[B10:[a-zA-Z0-9_]+]]: f32
174// CHECK-SAME:       %[[B11:[a-zA-Z0-9_]+]]: f32
175// CHECK-SAME:       %[[B12:[a-zA-Z0-9_]+]]: f32):
176// CHECK-NEXT:     %[[S1:.+]] = arith.mulf %[[B10]], %[[B9]]
177// CHECK-NEXT:     linalg.yield %[[B10]], %[[S1]], %[[B10]]
178//      CHECK:   return %[[GENERIC1]]#0, %[[GENERIC2]]#1, %[[GENERIC1]]#2
179
180//  CANONICALIZECHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
181//  CANONICALIZECHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0)>
182//  CANONICALIZECHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1, d0)>
183//  CANONICALIZECHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1)>
184//      CANONICALIZECHECK: func @simple_op_permuted_outputs(
185// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
186// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
187// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xf32>
188//  CANONICALIZECHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
189//  CANONICALIZECHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
190//  CANONICALIZECHECK-DAG:   %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]]
191//  CANONICALIZECHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
192//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = tensor.empty(%[[D1]], %[[D0]])
193//  CANONICALIZECHECK-DAG:   %[[INIT2:.+]] = tensor.empty(%[[D0]], %[[D1]])
194//  CANONICALIZECHECK-DAG:   %[[GENERIC1:.+]]:2 = linalg.generic
195// CANONICALIZECHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
196// CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
197// CANONICALIZECHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
198// CANONICALIZECHECK-SAME:       outs(%[[INIT1]], %[[INIT2]] :
199// CANONICALIZECHECK-NEXT:   ^bb0(
200// CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
201// CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
202// CANONICALIZECHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: f32):
203// CANONICALIZECHECK-NEXT:     %[[S0:.+]] = arith.addf %[[B0]], %[[B1]]
204// CANONICALIZECHECK-NEXT:     linalg.yield %[[S0]], %[[S0]]
205//      CANONICALIZECHECK:   %[[GENERIC2:.+]] = linalg.generic
206// CANONICALIZECHECK-SAME:       [#[[MAP3]], #[[MAP2]], #[[MAP0]]]
207// CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
208// CANONICALIZECHECK-SAME:       ins(%[[ARG2]], %[[GENERIC1]]#0 :
209// CANONICALIZECHECK-SAME:       outs(%[[INIT2]] :
210// CANONICALIZECHECK-NEXT:   ^bb0(
211// CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
212// CANONICALIZECHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32
213// CANONICALIZECHECK-SAME:       %[[B6:[a-zA-Z0-9_]+]]: f32):
214// CANONICALIZECHECK-NEXT:     %[[S1:.+]] = arith.mulf %[[B5]], %[[B4]]
215// CANONICALIZECHECK-NEXT:     linalg.yield %[[S1]]
216//      CANONICALIZECHECK:   return %[[GENERIC1]]#0, %[[GENERIC2]], %[[GENERIC1]]#1
217
218// -----
219
220#map0 = affine_map<(d0, d1) -> (d0, d1)>
221#map1 = affine_map<(d0, d1) -> (d0)>
222#map2 = affine_map<(d0, d1) -> (d1, d0)>
223func.func @multi_statement(%arg0 : tensor<10x20xf32>, %arg1 : tensor<10xi32>) -> tensor<20x10xf64> {
224  %init = tensor.empty() : tensor<20x10xf64>
225  %0 = linalg.generic {
226    indexing_maps = [#map0, #map1, #map2],
227    iterator_types = ["parallel", "parallel"]}
228    ins(%arg0, %arg1 : tensor<10x20xf32>, tensor<10xi32>)
229    outs(%init : tensor<20x10xf64>) {
230    ^bb0(%b0 : f32, %b1 : i32, %b2 : f64):
231      %1 = arith.sitofp %b1 : i32 to f64
232      %2 = arith.extf %b0 : f32 to f64
233      %3 = arith.addf %1, %2 : f64
234      linalg.yield %3 : f64
235    } -> tensor<20x10xf64>
236  return %0 : tensor<20x10xf64>
237}
238
239//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
240//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0)>
241//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1, d0)>
242//      CHECK: func @multi_statement(
243// CHECK-SAME:     %[[ARG0:.+]]: tensor<10x20xf32>
244// CHECK-SAME:     %[[ARG1:.+]]: tensor<10xi32>)
245//  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<20x10xf64>
246//  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<10x20xf64>
247//      CHECK:   %[[GENERIC0:.+]]:2 = linalg.generic
248// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
249// CHECK-SAME:       iterator_types = ["parallel", "parallel"]
250// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
251// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
252// CHECK-NEXT:     ^bb0(
253// CHECK-SAME:         %[[B0:.+]]: f32
254// CHECK-SAME:         %[[B1:.+]]: i32
255// CHECK-SAME:         %[[B2:[a-zA-Z0-9_]+]]: f64
256// CHECK-SAME:         %[[B3:.+]]: f64
257// CHECK-NEXT:       %[[S0:.+]] = arith.sitofp %[[B1]] : i32 to f64
258// CHECK-NEXT:       linalg.yield %{{.+}}, %[[S0]]
259//      CHECK:   %[[GENERIC1:.+]]:2 = linalg.generic
260// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP2]], #[[MAP0]]]
261// CHECK-SAME:       iterator_types = ["parallel", "parallel"]
262// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1 :
263// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
264// CHECK-NEXT:     ^bb0(
265// CHECK-SAME:         %[[B4:.+]]: f32
266// CHECK-SAME:         %[[B5:.+]]: i32
267// CHECK-SAME:         %[[B6:[a-zA-Z0-9_]+]]: f64
268// CHECK-SAME:         %[[B7:[a-zA-Z0-9_]+]]: f64
269// CHECK-SAME:         %[[B8:.+]]: f64
270// CHECK-NEXT:       %[[S1:.+]] = arith.extf %[[B4]] : f32 to f64
271// CHECK-NEXT:       linalg.yield %{{.+}}, %[[S1]]
272//      CHECK:   %[[GENERIC2:.+]] = linalg.generic
273// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP0]], #[[MAP2]]]
274// CHECK-SAME:       iterator_types = ["parallel", "parallel"]
275// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1, %[[GENERIC1]]#1 :
276// CHECK-SAME:       outs(%[[INIT0]] :
277// CHECK-NEXT:     ^bb0(
278// CHECK-SAME:         %[[B9:.+]]: f32
279// CHECK-SAME:         %[[B10:.+]]: i32
280// CHECK-SAME:         %[[B11:[a-zA-Z0-9_]+]]: f64
281// CHECK-SAME:         %[[B12:[a-zA-Z0-9_]+]]: f64
282// CHECK-SAME:         %[[B13:.+]]: f64
283// CHECK-NEXT:       %[[S2:.+]] = arith.addf %[[B11]], %[[B12]] : f64
284// CHECK-NEXT:       linalg.yield %[[S2]]
285//      CHECK:   return %[[GENERIC2]]
286
287//  CANONICALIZECHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0)>
288//  CANONICALIZECHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
289//  CANONICALIZECHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1, d0)>
290//      CANONICALIZECHECK: func @multi_statement(
291// CANONICALIZECHECK-SAME:     %[[ARG0:.+]]: tensor<10x20xf32>
292// CANONICALIZECHECK-SAME:     %[[ARG1:.+]]: tensor<10xi32>)
293//  CANONICALIZECHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<20x10xf64>
294//  CANONICALIZECHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<10x20xf64>
295//      CANONICALIZECHECK:   %[[GENERIC0:.+]] = linalg.generic
296// CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
297// CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
298// CANONICALIZECHECK-SAME:       ins(%[[ARG1]] :
299// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
300// CANONICALIZECHECK-NEXT:     ^bb0(
301// CANONICALIZECHECK-SAME:         %[[B0:.+]]: i32
302// CANONICALIZECHECK-SAME:         %[[B1:.+]]: f64
303// CANONICALIZECHECK-NEXT:       %[[S0:.+]] = arith.sitofp %[[B0]] : i32 to f64
304// CANONICALIZECHECK-NEXT:       linalg.yield %[[S0]]
305//      CANONICALIZECHECK:   %[[GENERIC1:.+]] = linalg.generic
306// CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP1]]]
307// CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
308// CANONICALIZECHECK-SAME:       ins(%[[ARG0]] :
309// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
310// CANONICALIZECHECK-NEXT:     ^bb0(
311// CANONICALIZECHECK-SAME:         %[[B2:.+]]: f32
312// CANONICALIZECHECK-SAME:         %[[B3:.+]]: f64
313// CANONICALIZECHECK-NEXT:       %[[S1:.+]] = arith.extf %[[B2]] : f32 to f64
314// CANONICALIZECHECK-NEXT:       linalg.yield %[[S1]]
315//      CANONICALIZECHECK:   %[[GENERIC2:.+]] = linalg.generic
316// CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP1]], #[[MAP2]]]
317// CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
318// CANONICALIZECHECK-SAME:       ins(%[[GENERIC0]], %[[GENERIC1]] :
319// CANONICALIZECHECK-SAME:       outs(%[[INIT0]] :
320// CANONICALIZECHECK-NEXT:     ^bb0(
321// CANONICALIZECHECK-SAME:         %[[B4:[a-zA-Z0-9_]+]]: f64
322// CANONICALIZECHECK-SAME:         %[[B5:[a-zA-Z0-9_]+]]: f64
323// CANONICALIZECHECK-SAME:         %[[B6:.+]]: f64
324// CANONICALIZECHECK-NEXT:       %[[S2:.+]] = arith.addf %[[B4]], %[[B5]] : f64
325// CANONICALIZECHECK-NEXT:       linalg.yield %[[S2]]
326//      CANONICALIZECHECK:   return %[[GENERIC2]]
327
328// -----
329
330#map0 = affine_map<(d0, d1) -> (d0)>
331#map1 = affine_map<(d0, d1) -> (d1)>
332#map2 = affine_map<(d0, d1) -> (d0, d1)>
333#map3 = affine_map<(d0, d1) -> (d1, d0)>
334func.func @destination_passing_style(
335    %arg0 : tensor<?xf32>, %arg1 : tensor<?xf32>,
336    %arg2 : tensor<?x?xf32>, %arg3 : tensor<?x?xf32>)
337    -> (tensor<?x?xf32>, tensor<?x?xf32>) {
338  %0:2 = linalg.generic {
339      indexing_maps = [#map0, #map1, #map2, #map3],
340      iterator_types = ["parallel", "parallel"]}
341      ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
342      outs(%arg2, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>) {
343      ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
344        %1 = arith.addf %b0, %b2 : f32
345        %2 = arith.mulf %b1, %b3 : f32
346        linalg.yield %1, %2 : f32, f32
347    } -> (tensor<?x?xf32>, tensor<?x?xf32>)
348  return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32>
349}
350//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0)>
351//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d1)>
352//  CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0, d1)>
353//  CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
354//      CHECK: func.func @destination_passing_style(
355// CHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?xf32>
356// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
357// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
358// CHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?xf32>)
359//      CHECK:   %[[GENERIC1:.+]]:3 = linalg.generic
360// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
361// CHECK-SAME:       iterator_types = ["parallel", "parallel"]
362// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
363// CHECK-SAME:       outs(%[[ARG2]], %[[ARG3]], %[[ARG2]] :
364// CHECK-NEXT:   ^bb0(
365// CHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
366// CHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
367// CHECK-SAME:       %[[ARG6:[a-zA-Z0-9_]+]]: f32
368// CHECK-SAME:       %[[ARG7:[a-zA-Z0-9_]+]]: f32
369// CHECK-SAME:       %[[ARG8:[a-zA-Z0-9_]+]]: f32
370// CHECK-NEXT:     %[[S1:.+]] = arith.addf %[[ARG4]], %[[ARG6]]
371// CHECK-NEXT:     linalg.yield %[[S1]], %{{.+}}, %[[S1]]
372//      CHECK:   %[[GENERIC2:.+]]:2 = linalg.generic
373// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP2]], #[[MAP3]]]
374// CHECK-SAME:       iterator_types = ["parallel", "parallel"]
375// CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC1]]#2 :
376// CHECK-SAME:       outs(%[[ARG2]], %[[ARG3]] :
377// CHECK-NEXT:   ^bb0(
378// CHECK-SAME:       %[[ARG9:[a-zA-Z0-9_]+]]: f32
379// CHECK-SAME:       %[[ARG10:[a-zA-Z0-9_]+]]: f32
380// CHECK-SAME:       %[[ARG11:[a-zA-Z0-9_]+]]: f32
381// CHECK-SAME:       %[[ARG12:[a-zA-Z0-9_]+]]: f32
382// CHECK-SAME:       %[[ARG13:[a-zA-Z0-9_]+]]: f32
383// CHECK-NEXT:     %[[S2:.+]] = arith.mulf %[[ARG10]], %[[ARG12]]
384// CHECK-NEXT:     linalg.yield %[[ARG11]], %[[S2]]
385//      CHECK:   return %[[GENERIC1]]#0, %[[GENERIC2]]#1
386
387//  CANONICALIZECHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0)>
388//  CANONICALIZECHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
389//  CANONICALIZECHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d1)>
390//  CANONICALIZECHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)>
391//      CANONICALIZECHECK: func.func @destination_passing_style(
392// CANONICALIZECHECK-SAME:     %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?xf32>
393// CANONICALIZECHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
394// CANONICALIZECHECK-SAME:     %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
395// CANONICALIZECHECK-SAME:     %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?xf32>)
396//      CANONICALIZECHECK:   %[[GENERIC1:.+]] = linalg.generic
397// CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
398// CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
399// CANONICALIZECHECK-SAME:       ins(%[[ARG0]] :
400// CANONICALIZECHECK-SAME:       outs(%[[ARG2]] :
401// CANONICALIZECHECK-NEXT:   ^bb0(
402// CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
403// CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
404// CANONICALIZECHECK-NEXT:     %[[S1:.+]] = arith.addf %[[ARG4]], %[[ARG5]]
405// CANONICALIZECHECK-NEXT:     linalg.yield %[[S1]]
406//      CANONICALIZECHECK:   %[[GENERIC2:.+]]:2 = linalg.generic
407// CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP2]], #[[MAP1]], #[[MAP1]], #[[MAP3]]]
408// CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
409// CANONICALIZECHECK-SAME:       ins(%[[ARG1]], %[[GENERIC1]] :
410// CANONICALIZECHECK-SAME:       outs(%[[ARG2]], %[[ARG3]] :
411// CANONICALIZECHECK-NEXT:   ^bb0(
412// CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
413// CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
414// CANONICALIZECHECK-SAME:       %[[ARG6:[a-zA-Z0-9_]+]]: f32
415// CANONICALIZECHECK-SAME:       %[[ARG7:[a-zA-Z0-9_]+]]: f32
416// CANONICALIZECHECK-NEXT:     %[[S2:.+]] = arith.mulf %[[ARG4]], %[[ARG6]]
417// CANONICALIZECHECK-NEXT:     linalg.yield %[[ARG5]], %[[S2]]
418//      CANONICALIZECHECK:   return %[[GENERIC1]], %[[GENERIC2]]#1
419