xref: /llvm-project/mlir/test/Dialect/Linalg/loops.mlir (revision 0d4efa27252cbbea4b5672d4d8ffc15a3ba51d83)
1// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck %s
2// RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s
3
4// Test that we can lower all the way to LLVM without crashing, don't check results here.
5// RUN: mlir-opt %s -convert-linalg-to-loops -test-lower-to-llvm -o=/dev/null 2>&1
6
7// CHECK: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
8
9// CHECKPARALLEL: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
10
11func.func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
12  %c0 = arith.constant 0 : index
13  %c1 = arith.constant 1 : index
14  %A = memref.view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
15  %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
16  %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
17  linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
18               outs(%C: memref<?x?xf32>)
19  return
20}
21// CHECK-LABEL: func @matmul(%{{.*}}: memref<?xi8>,
22//  CHECK-SAME: [[M:arg[0-9]+]]: index
23//  CHECK-SAME: [[N:arg[0-9]+]]: index
24//  CHECK-SAME: [[K:arg[0-9]+]]: index
25//       CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
26//       CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
27//       CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
28//       CHECK: scf.for {{.*}} to %[[M]]
29//       CHECK:   scf.for {{.*}} to %[[N]]
30//       CHECK:     scf.for {{.*}} to %[[K]]
31//   CHECK-DAG:       %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
32//   CHECK-DAG:       %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
33//   CHECK-DAG:       %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32
34//   CHECK-DAG:       %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
35//   CHECK-DAG:       %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32
36//       CHECK:       store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
37
38// CHECKPARALLEL-LABEL: func @matmul(%{{.*}}: memref<?xi8>,
39//  CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index
40//  CHECKPARALLEL-SAME: [[N:arg[0-9]+]]: index
41//  CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index
42//       CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
43//       CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
44//       CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
45//       CHECKPARALLEL: scf.parallel {{.*}} to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} {
46//       CHECKPARALLEL:   scf.for {{.*}} to %[[K]]
47//   CHECKPARALLEL-DAG:     %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
48//   CHECKPARALLEL-DAG:     %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
49//   CHECKPARALLEL-DAG:     %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32
50//   CHECKPARALLEL-DAG:     %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
51//   CHECKPARALLEL-DAG:     %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32
52//       CHECKPARALLEL:     store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
53
54
55
56func.func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) {
57  %c0 = arith.constant 0 : index
58  %c1 = arith.constant 1 : index
59  %2 = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
60  %3 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
61  %4 = memref.view %arg0[%c0][%N] : memref<?xi8> to memref<?xf32>
62  linalg.matvec ins(%2, %3: memref<?x?xf32>, memref<?xf32>)
63               outs(%4 : memref<?xf32>)
64  return
65}
66// CHECK-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
67//  CHECK-SAME: [[M:arg[0-9]+]]: index
68//  CHECK-SAME: [[K:arg[0-9]+]]: index
69//       CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
70//       CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
71//       CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
72//       CHECK: scf.for {{.*}} to %[[M]]
73//       CHECK:   scf.for {{.*}} to %[[K]]
74//   CHECK-DAG:     %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
75//   CHECK-DAG:     %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
76//   CHECK-DAG:     %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32
77//   CHECK-DAG:     %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32>
78//   CHECK-DAG:     %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32
79//       CHECK:     store %[[res]], %[[C]][%{{.*}}] : memref<?xf32>
80
81// CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
82//  CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index
83//  CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index
84//       CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
85//       CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
86//       CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
87//       CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) {
88//       CHECKPARALLEL:   scf.for {{.*}} to %[[K]]
89//   CHECKPARALLEL-DAG:     %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
90//   CHECKPARALLEL-DAG:     %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
91//   CHECKPARALLEL-DAG:     %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32
92//   CHECKPARALLEL-DAG:     %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32>
93//   CHECKPARALLEL-DAG:     %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32
94//       CHECKPARALLEL:     store %[[res]], %[[C]][%{{.*}}] : memref<?xf32>
95
96
97func.func @dot(%arg0: memref<?xi8>, %M: index) {
98  %c0 = arith.constant 0 : index
99  %c1 = arith.constant 1 : index
100  %1 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
101  %2 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
102  %3 = memref.view %arg0[%c0][] : memref<?xi8> to memref<f32>
103  linalg.dot ins(%1, %2 : memref<?xf32>, memref<?xf32>)
104            outs(%3 : memref<f32>)
105  return
106}
107// CHECK-LABEL: func @dot(%{{.*}}: memref<?xi8>,
108//  CHECK-SAME: [[K:arg[0-9]+]]: index
109//       CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
110//       CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
111//       CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
112//       CHECK: scf.for {{.*}} to %[[K]]
113//   CHECK-DAG:   %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32>
114//   CHECK-DAG:   %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
115//   CHECK-DAG:   %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32
116//   CHECK-DAG:   %[[c:.*]] = memref.load %[[C]][] : memref<f32>
117//   CHECK-DAG:   %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32
118//       CHECK:   store %[[res]], %[[C]][] : memref<f32>
119
120// CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref<?xi8>,
121//  CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index
122//       CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
123//       CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
124//       CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
125//       CHECKPARALLEL: scf.for {{.*}} to %[[K]]
126//   CHECKPARALLEL-DAG:   %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32>
127//   CHECKPARALLEL-DAG:   %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
128//   CHECKPARALLEL-DAG:   %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32
129//   CHECKPARALLEL-DAG:   %[[c:.*]] = memref.load %[[C]][] : memref<f32>
130//   CHECKPARALLEL-DAG:   %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32
131//       CHECKPARALLEL:   store %[[res]], %[[C]][] : memref<f32>
132
133
134func.func @dot_int(%arg0: memref<?xi32>, %arg1: memref<?xi32>,
135                   %arg3: memref<i32>) {
136  // Verifies that we use the correct arith operations for integers.
137  linalg.dot ins(%arg0, %arg1 : memref<?xi32>, memref<?xi32>)
138             outs(%arg3 : memref<i32>)
139  return
140}
141// CHECK-LABEL: func @dot_int(
142//       CHECK:   %[[inc:.*]] = arith.muli {{.*}} : i32
143//  CHECK-NEXT:   %[[res:.*]] = arith.addi {{.*}}, %[[inc]] : i32
144//  CHECK-NEXT:   store %[[res]], {{.*}} : memref<i32>
145
146
147func.func @dot_bool(%arg0: memref<?xi1>, %arg1: memref<?xi1>,
148                    %arg3: memref<i1>) {
149  // Verifies that we use the correct (saturating) arith operations for booleans.
150  linalg.dot ins(%arg0, %arg1 : memref<?xi1>, memref<?xi1>)
151             outs(%arg3 : memref<i1>)
152  return
153}
154// CHECK-LABEL: func @dot_bool(
155//       CHECK:   %[[inc:.*]] = arith.andi {{.*}} : i1
156//  CHECK-NEXT:   %[[res:.*]] = arith.ori {{.*}}, %[[inc]] : i1
157//  CHECK-NEXT:   store %[[res]], {{.*}} : memref<i1>
158
159
160func.func @dot_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>, %arg2: memref<f32>) {
161  linalg.dot ins(%arg0, %arg1 : memref<?xf32, strided<[1], offset: ?>>,
162                                memref<?xf32, strided<[1], offset: ?>>)
163            outs(%arg2:  memref<f32>)
164  return
165}
166// CHECK-LABEL: func @dot_view(
167//       CHECK:   %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<f32>) {
168//       CHECK: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, strided<[1], offset: ?>>
169//       CHECK: scf.for {{.*}} to %[[K]]
170//   CHECK-DAG:   %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
171//   CHECK-DAG:   %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
172//   CHECK-DAG:   %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32
173//   CHECK-DAG:   %[[c:.*]] = memref.load %{{.*}}[] : memref<f32>
174//   CHECK-DAG:   %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32
175//       CHECK:   store %[[res]], %{{.*}}[] : memref<f32>
176
177// CHECKPARALLEL-LABEL: func @dot_view(
178//       CHECKPARALLEL:   %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<f32>) {
179//       CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, strided<[1], offset: ?>>
180//       CHECKPARALLEL: scf.for {{.*}} to %[[K]]
181//   CHECKPARALLEL-DAG:   %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
182//   CHECKPARALLEL-DAG:   %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
183//   CHECKPARALLEL-DAG:   %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32
184//   CHECKPARALLEL-DAG:   %[[c:.*]] = memref.load %{{.*}}[] : memref<f32>
185//   CHECKPARALLEL-DAG:   %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32
186//       CHECKPARALLEL:   store %[[res]], %{{.*}}[] : memref<f32>
187
188func.func @fill_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: f32) {
189  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
190  return
191}
192// CHECK-LABEL: func @fill_view(
193//       CHECK: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: f32) {
194//       CHECK:   scf.for {{.*}} to %{{.*}}
195//       CHECK:     store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
196
197// CHECKPARALLEL-LABEL: func @fill_view(
198//       CHECKPARALLEL: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: f32) {
199//       CHECKPARALLEL:   scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
200//       CHECKPARALLEL:     store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
201
202func.func @fill_view0(%arg0: memref<f32>, %arg1: f32) {
203  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<f32>)
204  return
205}
206// CHECK-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) {
207//       CHECK:   store %{{.*}}, %{{.*}}[] : memref<f32>
208
209// CHECKPARALLEL-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) {
210//       CHECKPARALLEL:   store %{{.*}}, %{{.*}}[] : memref<f32>
211
212func.func @fill_view3(%arg0: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg1: f32) {
213  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
214  return
215}
216// CHECK-LABEL: func @fill_view3(
217//       CHECK: %{{.*}}: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %{{.*}}: f32) {
218//       CHECK:   scf.for {{.*}} to %{{.*}}
219//       CHECK:     scf.for {{.*}} to %{{.*}}
220//       CHECK:       scf.for {{.*}} to %{{.*}}
221//       CHECK:         store %{{.*}}, {{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
222
223// CHECKPARALLEL-LABEL: func @fill_view3(
224//       CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %{{.*}}: f32) {
225//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
226//       CHECKPARALLEL:     store %{{.*}}, {{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
227
228func.func @copy_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>) {
229  linalg.generic {
230    iterator_types = ["parallel"],
231    indexing_maps = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>] }
232    ins(%arg0: memref<?xf32, strided<[1], offset: ?>>)
233   outs(%arg1: memref<?xf32, strided<[1], offset: ?>>) {
234    ^bb0(%a: f32, %b: f32):
235      linalg.yield %a : f32
236  }
237  return
238}
239// CHECK-LABEL: func @copy_view(
240//       CHECK: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>) {
241//       CHECK:   scf.for {{.*}} to %{{.*}}
242//       CHECK:     %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
243//       CHECK:     store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
244
245// CHECKPARALLEL-LABEL: func @copy_view(
246//       CHECKPARALLEL: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>) {
247//       CHECKPARALLEL:   scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
248//       CHECKPARALLEL:     %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
249//       CHECKPARALLEL:     store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
250
251#accesses = [
252  affine_map<(i, j, k) -> (i, j)>,
253  affine_map<(i, j, k) -> (i, j, k)>,
254  affine_map<(i, j, k) -> (i, k, j)>
255]
256#trait2 = {
257  iterator_types = ["parallel", "parallel", "parallel"],
258  indexing_maps = #accesses,
259  library_call = "some_external_function_name_2",
260  doc = "B(i,j,k), C(i,k,j) = foo(A(i, j), B(i,j,k), C(i,k,j))"
261}
262func.func @generic_region(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg2: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
263  linalg.generic #trait2
264    ins(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>)
265   outs(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
266                       memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
267    ^bb0(%a: f32, %b: f32, %c: f32):
268      %d = arith.mulf %a, %b : f32
269      %e = arith.addf %c, %d : f32
270      linalg.yield %d, %e : f32, f32
271  }
272  return
273}
274// CHECK-LABEL: @generic_region
275//       CHECK: scf.for %[[i:.*]] = {{.*}}
276//       CHECK:   scf.for %[[j:.*]] = {{.*}}
277//       CHECK:     scf.for %[[k:.*]] = {{.*}}
278//       CHECK:       %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, strided<[?, 1], offset: ?>>
279//       CHECK:       %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
280//       CHECK:       %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
281//       CHECK:       %[[d:.*]] = arith.mulf %[[a]], %[[b]] : f32
282//       CHECK:       %[[e:.*]] = arith.addf %[[c]], %[[d]] : f32
283//       CHECK:       store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
284//       CHECK:       store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
285
286// CHECKPARALLEL-LABEL: @generic_region
287//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
288//       CHECKPARALLEL:   %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, strided<[?, 1], offset: ?>>
289//       CHECKPARALLEL:   %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
290//       CHECKPARALLEL:   %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
291//       CHECKPARALLEL:   %[[d:.*]] = arith.mulf %[[a]], %[[b]] : f32
292//       CHECKPARALLEL:   %[[e:.*]] = arith.addf %[[c]], %[[d]] : f32
293//       CHECKPARALLEL:   store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
294//       CHECKPARALLEL:   store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>
295
296#trait4 = {
297  iterator_types = ["parallel", "parallel", "parallel"],
298  indexing_maps = #accesses,
299  library_call = "some_external_function_name_2",
300  doc = "B(i,j,k), C(i,k,j) = foo(A(i, j) * B(i,j,k), i * j * k + C(i,k,j))"
301}
302func.func @generic_index_region(
303        %arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>,
304        %arg1: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
305        %arg2: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
306  linalg.generic #trait4
307      ins(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
308     outs(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
309                         memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
310    ^bb0(%a: f32, %b: f32, %c: f32):
311      %i = linalg.index 0 : index
312      %j = linalg.index 1 : index
313      %k = linalg.index 2 : index
314      %result_1 = arith.mulf %a, %b : f32
315
316      %ij = arith.addi %i, %j : index
317      %ijk = arith.addi %ij, %k : index
318      %ijk_int = arith.index_cast %ijk : index to i32
319      %ijk_float = arith.sitofp %ijk_int : i32 to f32
320
321      %result_2 = arith.addf %c, %ijk_float : f32
322      linalg.yield %result_1, %result_2 : f32, f32
323  }
324  return
325}
326
327// CHECK-LABEL: @generic_index_region
328//       CHECK: scf.for %[[i:.*]] = {{.*}}
329//       CHECK:   scf.for %[[j:.*]] = {{.*}}
330//       CHECK:     scf.for %[[k:.*]] = {{.*}}
331//       CHECK:       %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]]
332//       CHECK:       %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]]
333//       CHECK:       %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]]
334//       CHECK:       %[[result_1:.*]] = arith.mulf %[[a]], %[[b]] : f32
335//       CHECK:       %[[ij:.*]] = arith.addi %[[i]], %[[j]] : index
336//       CHECK:       %[[ijk:.*]] = arith.addi %[[ij]], %[[k]] : index
337//       CHECK:       %[[ijk_int:.*]] = arith.index_cast %[[ijk]] : index to i32
338//       CHECK:       %[[ijk_float:.*]] = arith.sitofp %[[ijk_int]] : i32 to f32
339//       CHECK:       %[[result_2:.*]] = arith.addf %[[c]], %[[ijk_float]] : f32
340//       CHECK:       store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]]
341//       CHECK:       store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]]
342
343// CHECKPARALLEL-LABEL: @generic_index_region
344//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
345//       CHECKPARALLEL:   %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]]
346//       CHECKPARALLEL:   %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]]
347//       CHECKPARALLEL:   %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]]
348//       CHECKPARALLEL:   %[[result_1:.*]] = arith.mulf %[[a]], %[[b]] : f32
349//       CHECKPARALLEL:   %[[ij:.*]] = arith.addi %[[i]], %[[j]] : index
350//       CHECKPARALLEL:   %[[ijk:.*]] = arith.addi %[[ij]], %[[k]] : index
351//       CHECKPARALLEL:   %[[ijk_int:.*]] = arith.index_cast %[[ijk]] : index to i32
352//       CHECKPARALLEL:   %[[ijk_float:.*]] = arith.sitofp %[[ijk_int]] : i32 to f32
353//       CHECKPARALLEL:   %[[result_2:.*]] = arith.addf %[[c]], %[[ijk_float]] : f32
354//       CHECKPARALLEL:   store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]]
355//       CHECKPARALLEL:   store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]]
356
357// -----
358
359#broadcast_access = [
360  affine_map<(i, j) -> ()>,
361  affine_map<(i, j) -> (i, j)>
362]
363
364#trait_broadcast = {
365  indexing_maps = #broadcast_access,
366  iterator_types = ["parallel", "parallel"],
367  library_call = "some_broadcast_external_fn"
368}
369
370func.func @generic_op_zero_rank(%arg0: memref<f32>, %arg1: memref<3x4xf32>)
371{
372  linalg.generic #trait_broadcast
373      ins(%arg0 : memref<f32>)
374     outs(%arg1 : memref<3x4xf32>) {
375    ^bb(%a: f32, %b: f32) :
376      linalg.yield %a : f32
377  }
378  return
379}
380
381// CHECK-LABEL: @generic_op_zero_rank
382//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
383//  CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
384//       CHECK: scf.for %[[i:.*]] = {{.*}}
385//       CHECK:   scf.for %[[j:.*]] = {{.*}}
386//       CHECK:     %[[a:.*]] = memref.load %[[ARG0]][]
387//       CHECK:     store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
388
389// CHECKPARALLEL-LABEL: @generic_op_zero_rank
390//  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
391//  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
392//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
393//       CHECKPARALLEL:   %[[a:.*]] = memref.load %[[ARG0]][]
394//       CHECKPARALLEL:   store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
395
396func.func @generic_op_scalar(%arg0: f32, %arg1: memref<3x4xf32>)
397{
398  linalg.generic #trait_broadcast
399      ins(%arg0 : f32)
400     outs(%arg1 : memref<3x4xf32>) {
401    ^bb(%a: f32, %b: f32) :
402      linalg.yield %a : f32
403  }
404  return
405}
406
407// CHECK-LABEL: @generic_op_scalar
408//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: f32
409//  CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
410//       CHECK: scf.for %[[i:.*]] = {{.*}}
411//       CHECK:   scf.for %[[j:.*]] = {{.*}}
412//       CHECK:     store %[[ARG0]], %[[ARG1]][%[[i]], %[[j]]]
413
414// CHECKPARALLEL-LABEL: @generic_op_scalar
415//  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: f32
416//  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
417//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
418//       CHECKPARALLEL:   store %[[ARG0]], %[[ARG1]][%[[i]], %[[j]]]
419
420func.func @generic_index_op_zero_rank(%arg0: memref<i32>, %arg1: memref<3x4xi32>)
421{
422  linalg.generic #trait_broadcast
423      ins(%arg0 : memref<i32>)
424     outs(%arg1 : memref<3x4xi32>) {
425    ^bb(%a: i32, %b: i32) :
426      %i = linalg.index 0 : index
427      %j = linalg.index 1 : index
428      %ij = arith.addi %i, %j : index
429      %ij_int = arith.index_cast %ij : index to i32
430      %result = arith.addi %a, %ij_int : i32
431      linalg.yield %result : i32
432  }
433  return
434}
435
436// CHECK-LABEL: @generic_index_op_zero_rank
437//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
438//  CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
439//       CHECK: scf.for %[[i:.*]] = {{.*}}
440//       CHECK:   scf.for %[[j:.*]] = {{.*}}
441//       CHECK:     %[[a:.*]] = memref.load %[[ARG0]][
442//       CHECK:     %[[ij:.*]] = arith.addi %[[i]], %[[j]] : index
443//       CHECK:     %[[ij_int:.*]] = arith.index_cast %[[ij]] : index to i32
444//       CHECK:     %[[result:.*]] = arith.addi %[[a]], %[[ij_int]] : i32
445//       CHECK:     store %[[result]], %[[ARG1]][%[[i]], %[[j]]]
446
447// CHECKPARALLEL-LABEL: @generic_index_op_zero_rank
448//  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
449//  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
450//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
451//       CHECKPARALLEL:   %[[a:.*]] = memref.load %[[ARG0]][
452//       CHECKPARALLEL:   %[[ij:.*]] = arith.addi %[[i]], %[[j]] : index
453//       CHECKPARALLEL:   %[[ij_int:.*]] = arith.index_cast %[[ij]] : index to i32
454//       CHECKPARALLEL:   %[[result:.*]] = arith.addi %[[a]], %[[ij_int]] : i32
455//       CHECKPARALLEL:   store %[[result]], %[[ARG1]][%[[i]], %[[j]]]
456
457#reduce_1D_access = [
458  affine_map<(i) -> (i)>,
459  affine_map<(i) -> ()>
460]
461
462#trait_reduce_1D = {
463  indexing_maps = #reduce_1D_access,
464  iterator_types = ["reduction"],
465  library_call = "some_reduce_external_fn"
466}
467
468func.func @generic_op_1D_reduce(%arg0: memref<?xf32>, %arg1: memref<f32>)
469{
470  linalg.generic #trait_reduce_1D
471      ins(%arg0 : memref<?xf32>)
472     outs(%arg1 : memref<f32>) {
473    ^bb(%a: f32, %b: f32) :
474      %0 = arith.addf %a, %b : f32
475      linalg.yield %0 : f32
476  }
477  return
478}
479// CHECK-LABEL: @generic_op_1D_reduce
480//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
481//  CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
482//       CHECK: scf.for %[[i:.*]] = {{.*}}
483//       CHECK:   %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
484//       CHECK:   %[[b:.*]] = memref.load %[[ARG1]][]
485//       CHECK:   %[[c:.*]] = arith.addf %[[a]], %[[b]] : f32
486//       CHECK:   store %[[c]], %[[ARG1]][]
487
488// CHECKPARALLEL-LABEL: @generic_op_1D_reduce
489//  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
490//  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
491//       CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}}
492//       CHECKPARALLEL:   %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
493//       CHECKPARALLEL:   %[[b:.*]] = memref.load %[[ARG1]][]
494//       CHECKPARALLEL:   %[[c:.*]] = arith.addf %[[a]], %[[b]] : f32
495//       CHECKPARALLEL:   store %[[c]], %[[ARG1]][]
496
497
498#reduce_init_1D_access = [
499  affine_map<(i) -> (i)>,
500  affine_map<(i) -> ()>,
501  affine_map<(i) -> ()>
502]
503
504#trait_reduce_init_1D = {
505  indexing_maps = #reduce_init_1D_access,
506  iterator_types = ["reduction"],
507  library_call = "some_reduce_external_fn"
508}
509
510func.func @generic_index_op_1D_reduce(%arg0: memref<?xf32>,
511                                %arg1: memref<f32>,
512                                %arg2: memref<f32>)
513{
514  linalg.generic #trait_reduce_init_1D
515      ins(%arg0, %arg1 : memref<?xf32>, memref<f32>)
516     outs(%arg2 : memref<f32>) {
517    ^bb(%a: f32, %b: f32, %c: f32) :
518      %i = linalg.index 0 : index
519      %0 = arith.constant 0 : index
520      %1 = arith.cmpi eq, %0, %i : index
521      %2 = arith.select %1, %b, %c : f32
522      %3 = arith.addf %a, %2 : f32
523      linalg.yield %3 : f32
524  }
525  return
526}
527// CHECK-LABEL: @generic_index_op_1D_reduce
528//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
529//  CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
530//  CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
531//       CHECK: scf.for %[[i:.*]] = {{.*}}
532//       CHECK:   %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
533//       CHECK:   %[[b:.*]] = memref.load %[[ARG1]][]
534//       CHECK:   %[[c:.*]] = memref.load %[[ARG2]][]
535//       CHECK:   %[[d:.*]] = arith.select %{{.*}}, %[[b]], %[[c]]
536//       CHECK:   %[[e:.*]] = arith.addf %[[a]], %[[d]]
537//       CHECK:   store %[[e]], %[[ARG2]][]
538
539// CHECKPARALLEL-LABEL: @generic_index_op_1D_reduce
540//  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
541//  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
542//  CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
543//       CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}}
544//       CHECKPARALLEL:   %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
545//       CHECKPARALLEL:   %[[b:.*]] = memref.load %[[ARG1]][]
546//       CHECKPARALLEL:   %[[c:.*]] = memref.load %[[ARG2]][]
547//       CHECKPARALLEL:   %[[d:.*]] = arith.select %{{.*}}, %[[b]], %[[c]]
548//       CHECKPARALLEL:   %[[e:.*]] = arith.addf %[[a]], %[[d]]
549//       CHECKPARALLEL:   store %[[e]], %[[ARG2]][]
550
551#trait_const_fill = {
552  indexing_maps = [affine_map<(i) -> (i)>],
553  iterator_types = ["parallel"],
554  library_call = "some_external_fn"
555}
556func.func @generic_const_init(%arg0: memref<?xf32>) {
557        %cst = arith.constant 1.0 : f32
558  linalg.generic #trait_const_fill outs(%arg0 : memref<?xf32>) {
559    ^bb0(%arg1: f32):
560      linalg.yield %cst : f32
561    }
562    return
563}
564// CHECK-LABEL: @generic_const_init
565//  CHECK-SAME: %[[ARG0:.*]]: memref<?xf32>
566//       CHECK: %[[CONST:.*]] = arith.constant 1.000000e+00 : f32
567//       CHECK: scf.for %[[i:.*]] = {{.*}}
568//       CHECK:   store %[[CONST]], %[[ARG0]]
569
570// CHECKPARALLEL-LABEL: @generic_const_init
571//  CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref<?xf32>
572//       CHECKPARALLEL: %[[CONST:.*]] = arith.constant 1.000000e+00 : f32
573//       CHECKPARALLEL: scf.parallel (%[[i:.*]])
574//       CHECKPARALLEL:   store %[[CONST]], %[[ARG0]]
575
576#scalar_access = [
577  affine_map<() -> ()>,
578  affine_map<() -> ()>,
579  affine_map<() -> ()>
580]
581#scalar_trait = {
582  iterator_types = [],
583  indexing_maps = #scalar_access,
584  library_call = "some_external_fn"
585}
586func.func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>, %arg3 : i1)
587{
588  linalg.generic #scalar_trait
589    ins(%arg0, %arg1 : memref<f32>, memref<f32>)
590   outs(%arg2 : memref<f32>) {
591  ^bb(%a : f32, %b : f32, %c : f32) :
592    %result = scf.if %arg3 -> (f32) {
593      scf.yield %a : f32
594    } else {
595      scf.yield %b : f32
596    }
597    linalg.yield %result : f32
598  }
599  return
600}
601// CHECK-LABEL: @scalar_code
602//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
603//  CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
604//  CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
605//   CHECK-NOT: scf.for
606//       CHECK: memref.load %[[ARG0]][]
607//       CHECK: memref.load %[[ARG1]][]
608//       CHECK: scf.if
609//       CHECK: scf.yield
610//       CHECK: else
611//       CHECK: scf.yield
612//       CHECK: store %{{.*}}, %[[ARG2]][]
613
614// CHECKPARALLEL-LABEL: @scalar_code
615//  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
616//  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
617//  CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
618//   CHECKPARALLEL-NOT: scf.for
619//       CHECKPARALLEL: memref.load %[[ARG0]][]
620//       CHECKPARALLEL: memref.load %[[ARG1]][]
621//       CHECKPARALLEL: scf.if
622//       CHECKPARALLEL: scf.yield
623//       CHECKPARALLEL: else
624//       CHECKPARALLEL: scf.yield
625//       CHECKPARALLEL: store %{{.*}}, %[[ARG2]][]
626
627//----------------------------------------------------------------------------//
628// Named ops to loops.
629//----------------------------------------------------------------------------//
630func.func @batch_reduce_matmul_as_contract(
631    %A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?xf32>) {
632  linalg.contract
633      indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>,
634                       affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>,
635                       affine_map<(d0, d1, d2, d3) -> (d1, d2)>]
636      ins(%A, %B : memref<?x?x?xf32>, memref<?x?x?xf32>)
637      outs(%C : memref<?x?xf32>)
638  return
639}
640// CHECK-LABEL: @batch_reduce_matmul_as_contract
641//  CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
642//  CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
643//  CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?xf32>
644//       CHECK: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
645//       CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
646//       CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
647//       CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
648//       CHECK: scf.for %[[b:.*]] = %{{.*}} to %[[B]]
649//       CHECK:   scf.for %[[m:.*]] = %{{.*}} to %[[M]]
650//       CHECK:     scf.for %[[n:.*]] = %{{.*}} to %[[N]]
651//       CHECK:       scf.for %[[k:.*]] = %{{.*}} to %[[K]]
652//       CHECK:       %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
653//       CHECK:       %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
654//       CHECK:       %[[vc:.*]] = memref.load %[[mC]][%[[m]], %[[n]]] : memref<?x?xf32>
655//       CHECK:       %[[inc:.*]] = arith.mulf %[[va]], %[[vb]] : f32
656//       CHECK:       %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
657//       CHECK:       store %[[res]], %[[mC]][%[[m]], %[[n]]] : memref<?x?xf32>
658
659// CHECKPARALLEL-LABEL: @batch_reduce_matmul_as_contract
660//  CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
661//  CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
662//  CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?xf32>
663//       CHECKPARALLEL: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
664//       CHECKPARALLEL: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
665//       CHECKPARALLEL: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
666//       CHECKPARALLEL: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
667//       CHECKPARALLEL: scf.for %[[b:.*]] = %{{.*}} to %[[B]]
668//       CHECKPARALLEL:   scf.parallel (%[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[M]], %[[N]]) step ({{.*}}) {
669//       CHECKPARALLEL:     scf.for %[[k:.*]] = %{{.*}} to %[[K]]
670//       CHECKPARALLEL:         %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
671//       CHECKPARALLEL:         %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
672//       CHECKPARALLEL:         %[[vc:.*]] = memref.load %[[mC]][%[[m]], %[[n]]] : memref<?x?xf32>
673//       CHECKPARALLEL:         %[[inc:.*]] = arith.mulf %[[va]], %[[vb]] : f32
674//       CHECKPARALLEL:         %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
675//       CHECKPARALLEL:         store %[[res]], %[[mC]][%[[m]], %[[n]]] : memref<?x?xf32>
676
677func.func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) {
678  linalg.batch_matmul ins(%A, %B : memref<?x?x?xf32>, memref<?x?x?xf32>)
679                     outs(%C : memref<?x?x?xf32>)
680  return
681}
682// CHECK-LABEL: @named_batch_matmul
683//  CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
684//  CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
685//  CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
686//       CHECK: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
687//       CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
688//       CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
689//       CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
690//       CHECK: scf.for %[[b:.*]] = %{{.*}} to %[[B]]
691//       CHECK:   scf.for %[[m:.*]] = %{{.*}} to %[[M]]
692//       CHECK:     scf.for %[[n:.*]] = %{{.*}} to %[[N]]
693//       CHECK:       scf.for %[[k:.*]] = %{{.*}} to %[[K]]
694//       CHECK:       %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
695//       CHECK:       %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
696//       CHECK:       %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
697//       CHECK:       %[[inc:.*]] = arith.mulf %[[va]], %[[vb]] : f32
698//       CHECK:       %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
699//       CHECK:       store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
700
701// CHECKPARALLEL-LABEL: @named_batch_matmul
702//  CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
703//  CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
704//  CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
705//       CHECKPARALLEL: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
706//       CHECKPARALLEL: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
707//       CHECKPARALLEL: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
708//       CHECKPARALLEL: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
709//       CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) {
710//       CHECKPARALLEL:   scf.for %[[k:.*]] = %{{.*}} to %[[K]]
711//       CHECKPARALLEL:       %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
712//       CHECKPARALLEL:       %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
713//       CHECKPARALLEL:       %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
714//       CHECKPARALLEL:       %[[inc:.*]] = arith.mulf %[[va]], %[[vb]] : f32
715//       CHECKPARALLEL:       %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
716//       CHECKPARALLEL:       store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
717
718
719func.func @conv1d_no_symbols(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>) -> () {
720  linalg.conv_1d ins(%in, %filter : memref<?xf32>, memref<?xf32>)
721                outs(%out : memref<?xf32>)
722  return
723}
724
725// CHECK-LABEL: @conv1d_no_symbols
726//  CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32>
727//  CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32>
728//  CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32>
729//   CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
730//   CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
731//       CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32>
732//       CHECK: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32>
733//       CHECK: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
734//       CHECK:   scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
735//       CHECK:     %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]])
736//       CHECK:     %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32>
737//       CHECK:     %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32>
738//       CHECK:     %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32>
739//       CHECK:     %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32
740//       CHECK:     %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
741//       CHECK:     store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32>
742
743// CHECKPARALLEL-LABEL: @conv1d_no_symbols
744//  CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32>
745//  CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32>
746//  CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32>
747//   CHECKPARALLEL-DAG: %[[c0:.*]] = arith.constant 0 : index
748//   CHECKPARALLEL-DAG: %[[c1:.*]] = arith.constant 1 : index
749//       CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32>
750//       CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32>
751//       CHECKPARALLEL: scf.parallel (%[[b:.*]]) = (%[[c0]]) to (%[[dim1]]) step (%[[c1]]) {
752//       CHECKPARALLEL:   scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
753//       CHECKPARALLEL:     %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]])
754//       CHECKPARALLEL:     %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32>
755//       CHECKPARALLEL:     %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32>
756//       CHECKPARALLEL:     %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32>
757//       CHECKPARALLEL:     %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32
758//       CHECKPARALLEL:     %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
759//       CHECKPARALLEL:     store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32>
760
761
762func.func @conv2d_no_symbols(%in : memref<?x?xf32>, %filter : memref<?x?xf32>, %out : memref<?x?xf32>) -> () {
763  linalg.conv_2d ins(%in, %filter : memref<?x?xf32>, memref<?x?xf32>)
764                outs(%out: memref<?x?xf32>)
765  return
766}
767// CHECK-LABEL: @conv2d_no_symbols
768//  CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32>
769//  CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?xf32>
770//  CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32>
771//   CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
772//   CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
773//       CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32>
774//       CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32>
775//       CHECK: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32>
776//       CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32>
777//       CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] {
778//       CHECK:   scf.for %[[arg4:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] {
779//       CHECK:     scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
780//       CHECK:       scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
781//       CHECK:         %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]])
782//       CHECK:         %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]])
783//       CHECK:         %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32>
784
785//       CHECK:         %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32>
786//       CHECK:         %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
787
788//       CHECK:         %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32
789//       CHECK:         %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
790//       CHECK:         store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
791
792// CHECKPARALLEL-LABEL: @conv2d_no_symbols
793//  CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32>
794//  CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?xf32>
795//  CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32>
796//   CHECKPARALLEL-DAG: %[[c0:.*]] = arith.constant 0 : index
797//   CHECKPARALLEL-DAG: %[[c1:.*]] = arith.constant 1 : index
798//       CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32>
799//       CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32>
800//       CHECKPARALLEL: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32>
801//       CHECKPARALLEL: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32>
802//       CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]]) = (%[[c0]], %[[c0]]) to (%[[dim2]], %[[dim3]]) step (%[[c1]], %[[c1]]) {
803//       CHECKPARALLEL:   scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
804//       CHECKPARALLEL:     scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
805//       CHECKPARALLEL:       %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]])
806//       CHECKPARALLEL:       %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]])
807//       CHECKPARALLEL:       %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32>
808//       CHECKPARALLEL:       %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32>
809//       CHECKPARALLEL:       %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
810//       CHECKPARALLEL:       %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32
811//       CHECKPARALLEL:       %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
812//       CHECKPARALLEL:       store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
813
814
815func.func @conv3d_no_symbols(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %out : memref<?x?x?xf32>) -> () {
816  linalg.conv_3d ins(%in, %filter : memref<?x?x?xf32>, memref<?x?x?xf32>)
817                outs(%out : memref<?x?x?xf32>)
818  return
819}
820
821// CHECK-LABEL: @conv3d_no_symbols
822//  CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
823//  CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
824//  CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
825//       CHECK-DAG: %[[c2:.*]] = arith.constant 2 : index
826//       CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
827//       CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
828//       CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
829//       CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
830//       CHECK: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
831//       CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
832//       CHECK: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
833//       CHECK: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
834//       CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] {
835//       CHECK:   scf.for %[[arg4:.*]] = %[[c0]] to %[[dim4]] step %[[c1]] {
836//       CHECK:     scf.for %[[arg5:.*]] = %[[c0]] to %[[dim5]] step %[[c1]] {
837//       CHECK:       scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
838//       CHECK:         scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
839//       CHECK:           scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] {
840//       CHECK:             %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]])
841//       CHECK:             %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]])
842//       CHECK:             %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]])
843//       CHECK:             %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32>
844
845//       CHECK:             %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32>
846//       CHECK:             %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
847
848//       CHECK:             %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32
849//       CHECK:             %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
850//       CHECK:             store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
851
852// CHECKPARALLEL-LABEL: @conv3d_no_symbols
853//  CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
854//  CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
855//  CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
856//       CHECKPARALLEL-DAG: %[[c2:.*]] = arith.constant 2 : index
857//       CHECKPARALLEL-DAG: %[[c0:.*]] = arith.constant 0 : index
858//       CHECKPARALLEL-DAG: %[[c1:.*]] = arith.constant 1 : index
859//       CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
860//       CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
861//       CHECKPARALLEL: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
862//       CHECKPARALLEL: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
863//       CHECKPARALLEL: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
864//       CHECKPARALLEL: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
865//       CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[c0]], %[[c0]], %[[c0]]) to (%[[dim3]], %[[dim4]], %[[dim5]]) step (%[[c1]], %[[c1]], %[[c1]]) {
866//       CHECKPARALLEL:   scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
867//       CHECKPARALLEL:     scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
868//       CHECKPARALLEL:       scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] {
869//       CHECKPARALLEL:         %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]])
870//       CHECKPARALLEL:         %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]])
871//       CHECKPARALLEL:         %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]])
872//       CHECKPARALLEL:         %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32>
873//       CHECKPARALLEL:         %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32>
874//       CHECKPARALLEL:         %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
875//       CHECKPARALLEL:         %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32
876//       CHECKPARALLEL:         %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32
877//       CHECKPARALLEL:         store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
878
879// -----
880
881func.func @lower_to_loops_with_rank_reducing_subviews(
882    %arg0 : memref<?xi32>, %arg1 : memref<?x?xi32>, %arg2 : index,
883    %arg3 : index, %arg4 : index) {
884  %0 = memref.subview %arg0[%arg2] [%arg3] [1]
885      : memref<?xi32> to memref<?xi32, strided<[1], offset: ?>>
886  %1 = memref.subview %arg1[0, %arg4] [1, %arg3] [1, 1]
887      : memref<?x?xi32> to memref<?xi32, strided<[1], offset: ?>>
888  linalg.generic {
889    iterator_types = ["parallel"],
890    indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>]}
891    ins(%0: memref<?xi32, strided<[1], offset: ?>>)
892   outs(%1: memref<?xi32, strided<[1], offset: ?>>) {
893    ^bb0(%a: i32, %b: i32):
894      linalg.yield %a : i32
895  }
896  return
897}
898// CHECK-LABEL: func @lower_to_loops_with_rank_reducing_subviews
899//       CHECK:   scf.for %[[IV:.+]] = %{{.+}} to %{{.+}} step %{{.+}} {
900//       CHECK:     %[[VAL:.+]] = memref.load %{{.+}}[%[[IV]]]
901//       CHECK:     memref.store %[[VAL]], %{{.+}}[%[[IV]]]
902//       CHECK:   }
903
904// CHECKPARALLEL-LABEL: func @lower_to_loops_with_rank_reducing_subviews
905//       CHECKPARALLEL:   scf.parallel (%[[IV:.+]]) = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
906//       CHECKPARALLEL:     %[[VAL:.+]] = memref.load %{{.+}}[%[[IV]]]
907//       CHECKPARALLEL:     memref.store %[[VAL]], %{{.+}}[%[[IV]]]
908//       CHECKPARALLEL:   }
909
910// -----
911
912func.func @transpose(%input: memref<?xf32>,
913                     %init: memref<?xf32>) {
914  linalg.transpose ins(%input:memref<?xf32>)
915                   outs(%init:memref<?xf32>)
916                   permutation = [0]
917  return
918}
919// CHECK-LABEL:   func.func @transpose(
920// CHECK-SAME:                         %[[VAL_0:.*]]: memref<?xf32>,
921// CHECK-SAME:                         %[[VAL_1:.*]]: memref<?xf32>) {
922//      CHECK:      %[[VAL_2:.*]] = arith.constant 1 : index
923//      CHECK:      %[[VAL_3:.*]] = arith.constant 0 : index
924//      CHECK:      %[[VAL_4:.*]] = memref.dim %[[VAL_0]], %[[VAL_3]] : memref<?xf32>
925//      CHECK:      scf.for %[[VAL_5:.*]] = %[[VAL_3]] to %[[VAL_4]] step %[[VAL_2]] {
926//      CHECK:        %[[VAL_6:.*]] = memref.load %[[VAL_0]]{{\[}}%[[VAL_5]]] : memref<?xf32>
927//      CHECK:        memref.store %[[VAL_6]], %[[VAL_1]]{{\[}}%[[VAL_5]]] : memref<?xf32>
928//      CHECK:      }
929//      CHECK:      return
930//      CHECK:    }
931
932// CHECKPARALLEL-LABEL:   func.func @transpose(
933// CHECKPARALLEL-SAME:                         %[[VAL_0:.*]]: memref<?xf32>,
934// CHECKPARALLEL-SAME:                         %[[VAL_1:.*]]: memref<?xf32>) {
935//      CHECKPARALLEL:      %[[VAL_2:.*]] = arith.constant 1 : index
936//      CHECKPARALLEL:      %[[VAL_3:.*]] = arith.constant 0 : index
937//      CHECKPARALLEL:      %[[VAL_4:.*]] = memref.dim %[[VAL_0]], %[[VAL_3]] : memref<?xf32>
938//      CHECKPARALLEL:      scf.parallel (%[[VAL_5:.*]]) = (%[[VAL_3]]) to (%[[VAL_4]]) step (%[[VAL_2]]) {
939//      CHECKPARALLEL:        %[[VAL_6:.*]] = memref.load %[[VAL_0]]{{\[}}%[[VAL_5]]] : memref<?xf32>
940//      CHECKPARALLEL:        memref.store %[[VAL_6]], %[[VAL_1]]{{\[}}%[[VAL_5]]] : memref<?xf32>
941//      CHECKPARALLEL:        scf.reduce
942//      CHECKPARALLEL:      }
943//      CHECKPARALLEL:      return
944//      CHECKPARALLEL:    }
945