1// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck %s 2// RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s 3 4// Test that we can lower all the way to LLVM without crashing, don't check results here. 5// RUN: mlir-opt %s -convert-linalg-to-loops -test-lower-to-llvm -o=/dev/null 2>&1 6 7// CHECK: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> 8 9// CHECKPARALLEL: #[[$stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> 10 11func.func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) { 12 %c0 = arith.constant 0 : index 13 %c1 = arith.constant 1 : index 14 %A = memref.view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32> 15 %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32> 16 %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32> 17 linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>) 18 outs(%C: memref<?x?xf32>) 19 return 20} 21// CHECK-LABEL: func @matmul(%{{.*}}: memref<?xi8>, 22// CHECK-SAME: [[M:arg[0-9]+]]: index 23// CHECK-SAME: [[N:arg[0-9]+]]: index 24// CHECK-SAME: [[K:arg[0-9]+]]: index 25// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 26// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 27// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 28// CHECK: scf.for {{.*}} to %[[M]] 29// CHECK: scf.for {{.*}} to %[[N]] 30// CHECK: scf.for {{.*}} to %[[K]] 31// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 32// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 33// CHECK-DAG: %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32 34// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 35// CHECK-DAG: %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32 36// CHECK: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 37 38// CHECKPARALLEL-LABEL: func @matmul(%{{.*}}: memref<?xi8>, 39// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index 40// CHECKPARALLEL-SAME: [[N:arg[0-9]+]]: index 41// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index 42// CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 43// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 44// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 45// CHECKPARALLEL: scf.parallel {{.*}} to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} { 46// CHECKPARALLEL: scf.for {{.*}} to %[[K]] 47// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 48// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 49// CHECKPARALLEL-DAG: %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32 50// CHECKPARALLEL-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 51// CHECKPARALLEL-DAG: %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32 52// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 53 54 55 56func.func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) { 57 %c0 = arith.constant 0 : index 58 %c1 = arith.constant 1 : index 59 %2 = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32> 60 %3 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32> 61 %4 = memref.view %arg0[%c0][%N] : memref<?xi8> to memref<?xf32> 62 linalg.matvec ins(%2, %3: memref<?x?xf32>, memref<?xf32>) 63 outs(%4 : memref<?xf32>) 64 return 65} 66// CHECK-LABEL: func @matvec(%{{.*}}: memref<?xi8>, 67// CHECK-SAME: [[M:arg[0-9]+]]: index 68// CHECK-SAME: [[K:arg[0-9]+]]: index 69// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 70// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32> 71// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32> 72// CHECK: scf.for {{.*}} to %[[M]] 73// CHECK: scf.for {{.*}} to %[[K]] 74// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 75// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32> 76// CHECK-DAG: %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32 77// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32> 78// CHECK-DAG: %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32 79// CHECK: store %[[res]], %[[C]][%{{.*}}] : memref<?xf32> 80 81// CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref<?xi8>, 82// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index 83// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index 84// CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32> 85// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32> 86// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32> 87// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) { 88// CHECKPARALLEL: scf.for {{.*}} to %[[K]] 89// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32> 90// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32> 91// CHECKPARALLEL-DAG: %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32 92// CHECKPARALLEL-DAG: %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32> 93// CHECKPARALLEL-DAG: %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32 94// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}] : memref<?xf32> 95 96 97func.func @dot(%arg0: memref<?xi8>, %M: index) { 98 %c0 = arith.constant 0 : index 99 %c1 = arith.constant 1 : index 100 %1 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32> 101 %2 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32> 102 %3 = memref.view %arg0[%c0][] : memref<?xi8> to memref<f32> 103 linalg.dot ins(%1, %2 : memref<?xf32>, memref<?xf32>) 104 outs(%3 : memref<f32>) 105 return 106} 107// CHECK-LABEL: func @dot(%{{.*}}: memref<?xi8>, 108// CHECK-SAME: [[K:arg[0-9]+]]: index 109// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32> 110// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32> 111// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32> 112// CHECK: scf.for {{.*}} to %[[K]] 113// CHECK-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32> 114// CHECK-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32> 115// CHECK-DAG: %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32 116// CHECK-DAG: %[[c:.*]] = memref.load %[[C]][] : memref<f32> 117// CHECK-DAG: %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32 118// CHECK: store %[[res]], %[[C]][] : memref<f32> 119 120// CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref<?xi8>, 121// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index 122// CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32> 123// CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32> 124// CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32> 125// CHECKPARALLEL: scf.for {{.*}} to %[[K]] 126// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32> 127// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32> 128// CHECKPARALLEL-DAG: %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32 129// CHECKPARALLEL-DAG: %[[c:.*]] = memref.load %[[C]][] : memref<f32> 130// CHECKPARALLEL-DAG: %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32 131// CHECKPARALLEL: store %[[res]], %[[C]][] : memref<f32> 132 133 134func.func @dot_int(%arg0: memref<?xi32>, %arg1: memref<?xi32>, 135 %arg3: memref<i32>) { 136 // Verifies that we use the correct arith operations for integers. 137 linalg.dot ins(%arg0, %arg1 : memref<?xi32>, memref<?xi32>) 138 outs(%arg3 : memref<i32>) 139 return 140} 141// CHECK-LABEL: func @dot_int( 142// CHECK: %[[inc:.*]] = arith.muli {{.*}} : i32 143// CHECK-NEXT: %[[res:.*]] = arith.addi {{.*}}, %[[inc]] : i32 144// CHECK-NEXT: store %[[res]], {{.*}} : memref<i32> 145 146 147func.func @dot_bool(%arg0: memref<?xi1>, %arg1: memref<?xi1>, 148 %arg3: memref<i1>) { 149 // Verifies that we use the correct (saturating) arith operations for booleans. 150 linalg.dot ins(%arg0, %arg1 : memref<?xi1>, memref<?xi1>) 151 outs(%arg3 : memref<i1>) 152 return 153} 154// CHECK-LABEL: func @dot_bool( 155// CHECK: %[[inc:.*]] = arith.andi {{.*}} : i1 156// CHECK-NEXT: %[[res:.*]] = arith.ori {{.*}}, %[[inc]] : i1 157// CHECK-NEXT: store %[[res]], {{.*}} : memref<i1> 158 159 160func.func @dot_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>, %arg2: memref<f32>) { 161 linalg.dot ins(%arg0, %arg1 : memref<?xf32, strided<[1], offset: ?>>, 162 memref<?xf32, strided<[1], offset: ?>>) 163 outs(%arg2: memref<f32>) 164 return 165} 166// CHECK-LABEL: func @dot_view( 167// CHECK: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<f32>) { 168// CHECK: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, strided<[1], offset: ?>> 169// CHECK: scf.for {{.*}} to %[[K]] 170// CHECK-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 171// CHECK-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 172// CHECK-DAG: %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32 173// CHECK-DAG: %[[c:.*]] = memref.load %{{.*}}[] : memref<f32> 174// CHECK-DAG: %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32 175// CHECK: store %[[res]], %{{.*}}[] : memref<f32> 176 177// CHECKPARALLEL-LABEL: func @dot_view( 178// CHECKPARALLEL: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<f32>) { 179// CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, strided<[1], offset: ?>> 180// CHECKPARALLEL: scf.for {{.*}} to %[[K]] 181// CHECKPARALLEL-DAG: %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 182// CHECKPARALLEL-DAG: %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 183// CHECKPARALLEL-DAG: %[[inc:.*]] = arith.mulf %[[a]], %[[b]] : f32 184// CHECKPARALLEL-DAG: %[[c:.*]] = memref.load %{{.*}}[] : memref<f32> 185// CHECKPARALLEL-DAG: %[[res:.*]] = arith.addf %[[c]], %[[inc]] : f32 186// CHECKPARALLEL: store %[[res]], %{{.*}}[] : memref<f32> 187 188func.func @fill_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: f32) { 189 linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?xf32, strided<[1], offset: ?>>) 190 return 191} 192// CHECK-LABEL: func @fill_view( 193// CHECK: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: f32) { 194// CHECK: scf.for {{.*}} to %{{.*}} 195// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 196 197// CHECKPARALLEL-LABEL: func @fill_view( 198// CHECKPARALLEL: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: f32) { 199// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { 200// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 201 202func.func @fill_view0(%arg0: memref<f32>, %arg1: f32) { 203 linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<f32>) 204 return 205} 206// CHECK-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) { 207// CHECK: store %{{.*}}, %{{.*}}[] : memref<f32> 208 209// CHECKPARALLEL-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) { 210// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref<f32> 211 212func.func @fill_view3(%arg0: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg1: f32) { 213 linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) 214 return 215} 216// CHECK-LABEL: func @fill_view3( 217// CHECK: %{{.*}}: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %{{.*}}: f32) { 218// CHECK: scf.for {{.*}} to %{{.*}} 219// CHECK: scf.for {{.*}} to %{{.*}} 220// CHECK: scf.for {{.*}} to %{{.*}} 221// CHECK: store %{{.*}}, {{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 222 223// CHECKPARALLEL-LABEL: func @fill_view3( 224// CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %{{.*}}: f32) { 225// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { 226// CHECKPARALLEL: store %{{.*}}, {{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 227 228func.func @copy_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>) { 229 linalg.generic { 230 iterator_types = ["parallel"], 231 indexing_maps = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>] } 232 ins(%arg0: memref<?xf32, strided<[1], offset: ?>>) 233 outs(%arg1: memref<?xf32, strided<[1], offset: ?>>) { 234 ^bb0(%a: f32, %b: f32): 235 linalg.yield %a : f32 236 } 237 return 238} 239// CHECK-LABEL: func @copy_view( 240// CHECK: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>) { 241// CHECK: scf.for {{.*}} to %{{.*}} 242// CHECK: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 243// CHECK: store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 244 245// CHECKPARALLEL-LABEL: func @copy_view( 246// CHECKPARALLEL: %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>) { 247// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { 248// CHECKPARALLEL: %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 249// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>> 250 251#accesses = [ 252 affine_map<(i, j, k) -> (i, j)>, 253 affine_map<(i, j, k) -> (i, j, k)>, 254 affine_map<(i, j, k) -> (i, k, j)> 255] 256#trait2 = { 257 iterator_types = ["parallel", "parallel", "parallel"], 258 indexing_maps = #accesses, 259 library_call = "some_external_function_name_2", 260 doc = "B(i,j,k), C(i,k,j) = foo(A(i, j), B(i,j,k), C(i,k,j))" 261} 262func.func @generic_region(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg2: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) { 263 linalg.generic #trait2 264 ins(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>) 265 outs(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, 266 memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) { 267 ^bb0(%a: f32, %b: f32, %c: f32): 268 %d = arith.mulf %a, %b : f32 269 %e = arith.addf %c, %d : f32 270 linalg.yield %d, %e : f32, f32 271 } 272 return 273} 274// CHECK-LABEL: @generic_region 275// CHECK: scf.for %[[i:.*]] = {{.*}} 276// CHECK: scf.for %[[j:.*]] = {{.*}} 277// CHECK: scf.for %[[k:.*]] = {{.*}} 278// CHECK: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, strided<[?, 1], offset: ?>> 279// CHECK: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 280// CHECK: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 281// CHECK: %[[d:.*]] = arith.mulf %[[a]], %[[b]] : f32 282// CHECK: %[[e:.*]] = arith.addf %[[c]], %[[d]] : f32 283// CHECK: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 284// CHECK: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 285 286// CHECKPARALLEL-LABEL: @generic_region 287// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) 288// CHECKPARALLEL: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, strided<[?, 1], offset: ?>> 289// CHECKPARALLEL: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 290// CHECKPARALLEL: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 291// CHECKPARALLEL: %[[d:.*]] = arith.mulf %[[a]], %[[b]] : f32 292// CHECKPARALLEL: %[[e:.*]] = arith.addf %[[c]], %[[d]] : f32 293// CHECKPARALLEL: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 294// CHECKPARALLEL: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>> 295 296#trait4 = { 297 iterator_types = ["parallel", "parallel", "parallel"], 298 indexing_maps = #accesses, 299 library_call = "some_external_function_name_2", 300 doc = "B(i,j,k), C(i,k,j) = foo(A(i, j) * B(i,j,k), i * j * k + C(i,k,j))" 301} 302func.func @generic_index_region( 303 %arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, 304 %arg1: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, 305 %arg2: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) { 306 linalg.generic #trait4 307 ins(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>) 308 outs(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, 309 memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) { 310 ^bb0(%a: f32, %b: f32, %c: f32): 311 %i = linalg.index 0 : index 312 %j = linalg.index 1 : index 313 %k = linalg.index 2 : index 314 %result_1 = arith.mulf %a, %b : f32 315 316 %ij = arith.addi %i, %j : index 317 %ijk = arith.addi %ij, %k : index 318 %ijk_int = arith.index_cast %ijk : index to i32 319 %ijk_float = arith.sitofp %ijk_int : i32 to f32 320 321 %result_2 = arith.addf %c, %ijk_float : f32 322 linalg.yield %result_1, %result_2 : f32, f32 323 } 324 return 325} 326 327// CHECK-LABEL: @generic_index_region 328// CHECK: scf.for %[[i:.*]] = {{.*}} 329// CHECK: scf.for %[[j:.*]] = {{.*}} 330// CHECK: scf.for %[[k:.*]] = {{.*}} 331// CHECK: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] 332// CHECK: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] 333// CHECK: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] 334// CHECK: %[[result_1:.*]] = arith.mulf %[[a]], %[[b]] : f32 335// CHECK: %[[ij:.*]] = arith.addi %[[i]], %[[j]] : index 336// CHECK: %[[ijk:.*]] = arith.addi %[[ij]], %[[k]] : index 337// CHECK: %[[ijk_int:.*]] = arith.index_cast %[[ijk]] : index to i32 338// CHECK: %[[ijk_float:.*]] = arith.sitofp %[[ijk_int]] : i32 to f32 339// CHECK: %[[result_2:.*]] = arith.addf %[[c]], %[[ijk_float]] : f32 340// CHECK: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] 341// CHECK: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] 342 343// CHECKPARALLEL-LABEL: @generic_index_region 344// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) 345// CHECKPARALLEL: %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] 346// CHECKPARALLEL: %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] 347// CHECKPARALLEL: %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] 348// CHECKPARALLEL: %[[result_1:.*]] = arith.mulf %[[a]], %[[b]] : f32 349// CHECKPARALLEL: %[[ij:.*]] = arith.addi %[[i]], %[[j]] : index 350// CHECKPARALLEL: %[[ijk:.*]] = arith.addi %[[ij]], %[[k]] : index 351// CHECKPARALLEL: %[[ijk_int:.*]] = arith.index_cast %[[ijk]] : index to i32 352// CHECKPARALLEL: %[[ijk_float:.*]] = arith.sitofp %[[ijk_int]] : i32 to f32 353// CHECKPARALLEL: %[[result_2:.*]] = arith.addf %[[c]], %[[ijk_float]] : f32 354// CHECKPARALLEL: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] 355// CHECKPARALLEL: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] 356 357// ----- 358 359#broadcast_access = [ 360 affine_map<(i, j) -> ()>, 361 affine_map<(i, j) -> (i, j)> 362] 363 364#trait_broadcast = { 365 indexing_maps = #broadcast_access, 366 iterator_types = ["parallel", "parallel"], 367 library_call = "some_broadcast_external_fn" 368} 369 370func.func @generic_op_zero_rank(%arg0: memref<f32>, %arg1: memref<3x4xf32>) 371{ 372 linalg.generic #trait_broadcast 373 ins(%arg0 : memref<f32>) 374 outs(%arg1 : memref<3x4xf32>) { 375 ^bb(%a: f32, %b: f32) : 376 linalg.yield %a : f32 377 } 378 return 379} 380 381// CHECK-LABEL: @generic_op_zero_rank 382// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32> 383// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> 384// CHECK: scf.for %[[i:.*]] = {{.*}} 385// CHECK: scf.for %[[j:.*]] = {{.*}} 386// CHECK: %[[a:.*]] = memref.load %[[ARG0]][] 387// CHECK: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] 388 389// CHECKPARALLEL-LABEL: @generic_op_zero_rank 390// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32> 391// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> 392// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) 393// CHECKPARALLEL: %[[a:.*]] = memref.load %[[ARG0]][] 394// CHECKPARALLEL: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] 395 396func.func @generic_op_scalar(%arg0: f32, %arg1: memref<3x4xf32>) 397{ 398 linalg.generic #trait_broadcast 399 ins(%arg0 : f32) 400 outs(%arg1 : memref<3x4xf32>) { 401 ^bb(%a: f32, %b: f32) : 402 linalg.yield %a : f32 403 } 404 return 405} 406 407// CHECK-LABEL: @generic_op_scalar 408// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: f32 409// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> 410// CHECK: scf.for %[[i:.*]] = {{.*}} 411// CHECK: scf.for %[[j:.*]] = {{.*}} 412// CHECK: store %[[ARG0]], %[[ARG1]][%[[i]], %[[j]]] 413 414// CHECKPARALLEL-LABEL: @generic_op_scalar 415// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: f32 416// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> 417// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) 418// CHECKPARALLEL: store %[[ARG0]], %[[ARG1]][%[[i]], %[[j]]] 419 420func.func @generic_index_op_zero_rank(%arg0: memref<i32>, %arg1: memref<3x4xi32>) 421{ 422 linalg.generic #trait_broadcast 423 ins(%arg0 : memref<i32>) 424 outs(%arg1 : memref<3x4xi32>) { 425 ^bb(%a: i32, %b: i32) : 426 %i = linalg.index 0 : index 427 %j = linalg.index 1 : index 428 %ij = arith.addi %i, %j : index 429 %ij_int = arith.index_cast %ij : index to i32 430 %result = arith.addi %a, %ij_int : i32 431 linalg.yield %result : i32 432 } 433 return 434} 435 436// CHECK-LABEL: @generic_index_op_zero_rank 437// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32> 438// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> 439// CHECK: scf.for %[[i:.*]] = {{.*}} 440// CHECK: scf.for %[[j:.*]] = {{.*}} 441// CHECK: %[[a:.*]] = memref.load %[[ARG0]][ 442// CHECK: %[[ij:.*]] = arith.addi %[[i]], %[[j]] : index 443// CHECK: %[[ij_int:.*]] = arith.index_cast %[[ij]] : index to i32 444// CHECK: %[[result:.*]] = arith.addi %[[a]], %[[ij_int]] : i32 445// CHECK: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] 446 447// CHECKPARALLEL-LABEL: @generic_index_op_zero_rank 448// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32> 449// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> 450// CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) 451// CHECKPARALLEL: %[[a:.*]] = memref.load %[[ARG0]][ 452// CHECKPARALLEL: %[[ij:.*]] = arith.addi %[[i]], %[[j]] : index 453// CHECKPARALLEL: %[[ij_int:.*]] = arith.index_cast %[[ij]] : index to i32 454// CHECKPARALLEL: %[[result:.*]] = arith.addi %[[a]], %[[ij_int]] : i32 455// CHECKPARALLEL: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] 456 457#reduce_1D_access = [ 458 affine_map<(i) -> (i)>, 459 affine_map<(i) -> ()> 460] 461 462#trait_reduce_1D = { 463 indexing_maps = #reduce_1D_access, 464 iterator_types = ["reduction"], 465 library_call = "some_reduce_external_fn" 466} 467 468func.func @generic_op_1D_reduce(%arg0: memref<?xf32>, %arg1: memref<f32>) 469{ 470 linalg.generic #trait_reduce_1D 471 ins(%arg0 : memref<?xf32>) 472 outs(%arg1 : memref<f32>) { 473 ^bb(%a: f32, %b: f32) : 474 %0 = arith.addf %a, %b : f32 475 linalg.yield %0 : f32 476 } 477 return 478} 479// CHECK-LABEL: @generic_op_1D_reduce 480// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32> 481// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 482// CHECK: scf.for %[[i:.*]] = {{.*}} 483// CHECK: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] 484// CHECK: %[[b:.*]] = memref.load %[[ARG1]][] 485// CHECK: %[[c:.*]] = arith.addf %[[a]], %[[b]] : f32 486// CHECK: store %[[c]], %[[ARG1]][] 487 488// CHECKPARALLEL-LABEL: @generic_op_1D_reduce 489// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32> 490// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 491// CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}} 492// CHECKPARALLEL: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] 493// CHECKPARALLEL: %[[b:.*]] = memref.load %[[ARG1]][] 494// CHECKPARALLEL: %[[c:.*]] = arith.addf %[[a]], %[[b]] : f32 495// CHECKPARALLEL: store %[[c]], %[[ARG1]][] 496 497 498#reduce_init_1D_access = [ 499 affine_map<(i) -> (i)>, 500 affine_map<(i) -> ()>, 501 affine_map<(i) -> ()> 502] 503 504#trait_reduce_init_1D = { 505 indexing_maps = #reduce_init_1D_access, 506 iterator_types = ["reduction"], 507 library_call = "some_reduce_external_fn" 508} 509 510func.func @generic_index_op_1D_reduce(%arg0: memref<?xf32>, 511 %arg1: memref<f32>, 512 %arg2: memref<f32>) 513{ 514 linalg.generic #trait_reduce_init_1D 515 ins(%arg0, %arg1 : memref<?xf32>, memref<f32>) 516 outs(%arg2 : memref<f32>) { 517 ^bb(%a: f32, %b: f32, %c: f32) : 518 %i = linalg.index 0 : index 519 %0 = arith.constant 0 : index 520 %1 = arith.cmpi eq, %0, %i : index 521 %2 = arith.select %1, %b, %c : f32 522 %3 = arith.addf %a, %2 : f32 523 linalg.yield %3 : f32 524 } 525 return 526} 527// CHECK-LABEL: @generic_index_op_1D_reduce 528// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32> 529// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 530// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32> 531// CHECK: scf.for %[[i:.*]] = {{.*}} 532// CHECK: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] 533// CHECK: %[[b:.*]] = memref.load %[[ARG1]][] 534// CHECK: %[[c:.*]] = memref.load %[[ARG2]][] 535// CHECK: %[[d:.*]] = arith.select %{{.*}}, %[[b]], %[[c]] 536// CHECK: %[[e:.*]] = arith.addf %[[a]], %[[d]] 537// CHECK: store %[[e]], %[[ARG2]][] 538 539// CHECKPARALLEL-LABEL: @generic_index_op_1D_reduce 540// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32> 541// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 542// CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32> 543// CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}} 544// CHECKPARALLEL: %[[a:.*]] = memref.load %[[ARG0]][%[[i]]] 545// CHECKPARALLEL: %[[b:.*]] = memref.load %[[ARG1]][] 546// CHECKPARALLEL: %[[c:.*]] = memref.load %[[ARG2]][] 547// CHECKPARALLEL: %[[d:.*]] = arith.select %{{.*}}, %[[b]], %[[c]] 548// CHECKPARALLEL: %[[e:.*]] = arith.addf %[[a]], %[[d]] 549// CHECKPARALLEL: store %[[e]], %[[ARG2]][] 550 551#trait_const_fill = { 552 indexing_maps = [affine_map<(i) -> (i)>], 553 iterator_types = ["parallel"], 554 library_call = "some_external_fn" 555} 556func.func @generic_const_init(%arg0: memref<?xf32>) { 557 %cst = arith.constant 1.0 : f32 558 linalg.generic #trait_const_fill outs(%arg0 : memref<?xf32>) { 559 ^bb0(%arg1: f32): 560 linalg.yield %cst : f32 561 } 562 return 563} 564// CHECK-LABEL: @generic_const_init 565// CHECK-SAME: %[[ARG0:.*]]: memref<?xf32> 566// CHECK: %[[CONST:.*]] = arith.constant 1.000000e+00 : f32 567// CHECK: scf.for %[[i:.*]] = {{.*}} 568// CHECK: store %[[CONST]], %[[ARG0]] 569 570// CHECKPARALLEL-LABEL: @generic_const_init 571// CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref<?xf32> 572// CHECKPARALLEL: %[[CONST:.*]] = arith.constant 1.000000e+00 : f32 573// CHECKPARALLEL: scf.parallel (%[[i:.*]]) 574// CHECKPARALLEL: store %[[CONST]], %[[ARG0]] 575 576#scalar_access = [ 577 affine_map<() -> ()>, 578 affine_map<() -> ()>, 579 affine_map<() -> ()> 580] 581#scalar_trait = { 582 iterator_types = [], 583 indexing_maps = #scalar_access, 584 library_call = "some_external_fn" 585} 586func.func @scalar_code(%arg0: memref<f32>, %arg1 : memref<f32>, %arg2 : memref<f32>, %arg3 : i1) 587{ 588 linalg.generic #scalar_trait 589 ins(%arg0, %arg1 : memref<f32>, memref<f32>) 590 outs(%arg2 : memref<f32>) { 591 ^bb(%a : f32, %b : f32, %c : f32) : 592 %result = scf.if %arg3 -> (f32) { 593 scf.yield %a : f32 594 } else { 595 scf.yield %b : f32 596 } 597 linalg.yield %result : f32 598 } 599 return 600} 601// CHECK-LABEL: @scalar_code 602// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32> 603// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 604// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32> 605// CHECK-NOT: scf.for 606// CHECK: memref.load %[[ARG0]][] 607// CHECK: memref.load %[[ARG1]][] 608// CHECK: scf.if 609// CHECK: scf.yield 610// CHECK: else 611// CHECK: scf.yield 612// CHECK: store %{{.*}}, %[[ARG2]][] 613 614// CHECKPARALLEL-LABEL: @scalar_code 615// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32> 616// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32> 617// CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32> 618// CHECKPARALLEL-NOT: scf.for 619// CHECKPARALLEL: memref.load %[[ARG0]][] 620// CHECKPARALLEL: memref.load %[[ARG1]][] 621// CHECKPARALLEL: scf.if 622// CHECKPARALLEL: scf.yield 623// CHECKPARALLEL: else 624// CHECKPARALLEL: scf.yield 625// CHECKPARALLEL: store %{{.*}}, %[[ARG2]][] 626 627//----------------------------------------------------------------------------// 628// Named ops to loops. 629//----------------------------------------------------------------------------// 630func.func @batch_reduce_matmul_as_contract( 631 %A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?xf32>) { 632 linalg.contract 633 indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, 634 affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>, 635 affine_map<(d0, d1, d2, d3) -> (d1, d2)>] 636 ins(%A, %B : memref<?x?x?xf32>, memref<?x?x?xf32>) 637 outs(%C : memref<?x?xf32>) 638 return 639} 640// CHECK-LABEL: @batch_reduce_matmul_as_contract 641// CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 642// CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 643// CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?xf32> 644// CHECK: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32> 645// CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32> 646// CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32> 647// CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32> 648// CHECK: scf.for %[[b:.*]] = %{{.*}} to %[[B]] 649// CHECK: scf.for %[[m:.*]] = %{{.*}} to %[[M]] 650// CHECK: scf.for %[[n:.*]] = %{{.*}} to %[[N]] 651// CHECK: scf.for %[[k:.*]] = %{{.*}} to %[[K]] 652// CHECK: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32> 653// CHECK: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32> 654// CHECK: %[[vc:.*]] = memref.load %[[mC]][%[[m]], %[[n]]] : memref<?x?xf32> 655// CHECK: %[[inc:.*]] = arith.mulf %[[va]], %[[vb]] : f32 656// CHECK: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 657// CHECK: store %[[res]], %[[mC]][%[[m]], %[[n]]] : memref<?x?xf32> 658 659// CHECKPARALLEL-LABEL: @batch_reduce_matmul_as_contract 660// CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 661// CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 662// CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?xf32> 663// CHECKPARALLEL: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32> 664// CHECKPARALLEL: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32> 665// CHECKPARALLEL: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32> 666// CHECKPARALLEL: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32> 667// CHECKPARALLEL: scf.for %[[b:.*]] = %{{.*}} to %[[B]] 668// CHECKPARALLEL: scf.parallel (%[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[M]], %[[N]]) step ({{.*}}) { 669// CHECKPARALLEL: scf.for %[[k:.*]] = %{{.*}} to %[[K]] 670// CHECKPARALLEL: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32> 671// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32> 672// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[mC]][%[[m]], %[[n]]] : memref<?x?xf32> 673// CHECKPARALLEL: %[[inc:.*]] = arith.mulf %[[va]], %[[vb]] : f32 674// CHECKPARALLEL: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 675// CHECKPARALLEL: store %[[res]], %[[mC]][%[[m]], %[[n]]] : memref<?x?xf32> 676 677func.func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) { 678 linalg.batch_matmul ins(%A, %B : memref<?x?x?xf32>, memref<?x?x?xf32>) 679 outs(%C : memref<?x?x?xf32>) 680 return 681} 682// CHECK-LABEL: @named_batch_matmul 683// CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 684// CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 685// CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 686// CHECK: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32> 687// CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32> 688// CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32> 689// CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32> 690// CHECK: scf.for %[[b:.*]] = %{{.*}} to %[[B]] 691// CHECK: scf.for %[[m:.*]] = %{{.*}} to %[[M]] 692// CHECK: scf.for %[[n:.*]] = %{{.*}} to %[[N]] 693// CHECK: scf.for %[[k:.*]] = %{{.*}} to %[[K]] 694// CHECK: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32> 695// CHECK: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32> 696// CHECK: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32> 697// CHECK: %[[inc:.*]] = arith.mulf %[[va]], %[[vb]] : f32 698// CHECK: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 699// CHECK: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32> 700 701// CHECKPARALLEL-LABEL: @named_batch_matmul 702// CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 703// CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 704// CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 705// CHECKPARALLEL: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32> 706// CHECKPARALLEL: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32> 707// CHECKPARALLEL: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32> 708// CHECKPARALLEL: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32> 709// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) { 710// CHECKPARALLEL: scf.for %[[k:.*]] = %{{.*}} to %[[K]] 711// CHECKPARALLEL: %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32> 712// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32> 713// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32> 714// CHECKPARALLEL: %[[inc:.*]] = arith.mulf %[[va]], %[[vb]] : f32 715// CHECKPARALLEL: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 716// CHECKPARALLEL: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32> 717 718 719func.func @conv1d_no_symbols(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>) -> () { 720 linalg.conv_1d ins(%in, %filter : memref<?xf32>, memref<?xf32>) 721 outs(%out : memref<?xf32>) 722 return 723} 724 725// CHECK-LABEL: @conv1d_no_symbols 726// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32> 727// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32> 728// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32> 729// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 730// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index 731// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32> 732// CHECK: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32> 733// CHECK: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 734// CHECK: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 735// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) 736// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32> 737// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32> 738// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32> 739// CHECK: %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32 740// CHECK: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 741// CHECK: store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32> 742 743// CHECKPARALLEL-LABEL: @conv1d_no_symbols 744// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?xf32> 745// CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32> 746// CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32> 747// CHECKPARALLEL-DAG: %[[c0:.*]] = arith.constant 0 : index 748// CHECKPARALLEL-DAG: %[[c1:.*]] = arith.constant 1 : index 749// CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32> 750// CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32> 751// CHECKPARALLEL: scf.parallel (%[[b:.*]]) = (%[[c0]]) to (%[[dim1]]) step (%[[c1]]) { 752// CHECKPARALLEL: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 753// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) 754// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32> 755// CHECKPARALLEL: %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32> 756// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32> 757// CHECKPARALLEL: %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32 758// CHECKPARALLEL: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 759// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32> 760 761 762func.func @conv2d_no_symbols(%in : memref<?x?xf32>, %filter : memref<?x?xf32>, %out : memref<?x?xf32>) -> () { 763 linalg.conv_2d ins(%in, %filter : memref<?x?xf32>, memref<?x?xf32>) 764 outs(%out: memref<?x?xf32>) 765 return 766} 767// CHECK-LABEL: @conv2d_no_symbols 768// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32> 769// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?xf32> 770// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32> 771// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 772// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index 773// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32> 774// CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32> 775// CHECK: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32> 776// CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32> 777// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { 778// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] { 779// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 780// CHECK: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 781// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) 782// CHECK: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) 783// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32> 784 785// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32> 786// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32> 787 788// CHECK: %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32 789// CHECK: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 790// CHECK: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32> 791 792// CHECKPARALLEL-LABEL: @conv2d_no_symbols 793// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?xf32> 794// CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?xf32> 795// CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32> 796// CHECKPARALLEL-DAG: %[[c0:.*]] = arith.constant 0 : index 797// CHECKPARALLEL-DAG: %[[c1:.*]] = arith.constant 1 : index 798// CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32> 799// CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32> 800// CHECKPARALLEL: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32> 801// CHECKPARALLEL: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32> 802// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]]) = (%[[c0]], %[[c0]]) to (%[[dim2]], %[[dim3]]) step (%[[c1]], %[[c1]]) { 803// CHECKPARALLEL: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 804// CHECKPARALLEL: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 805// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) 806// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) 807// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32> 808// CHECKPARALLEL: %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32> 809// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32> 810// CHECKPARALLEL: %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32 811// CHECKPARALLEL: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 812// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32> 813 814 815func.func @conv3d_no_symbols(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %out : memref<?x?x?xf32>) -> () { 816 linalg.conv_3d ins(%in, %filter : memref<?x?x?xf32>, memref<?x?x?xf32>) 817 outs(%out : memref<?x?x?xf32>) 818 return 819} 820 821// CHECK-LABEL: @conv3d_no_symbols 822// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 823// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 824// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 825// CHECK-DAG: %[[c2:.*]] = arith.constant 2 : index 826// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 827// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index 828// CHECK: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32> 829// CHECK: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32> 830// CHECK: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32> 831// CHECK: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32> 832// CHECK: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32> 833// CHECK: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32> 834// CHECK: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] { 835// CHECK: scf.for %[[arg4:.*]] = %[[c0]] to %[[dim4]] step %[[c1]] { 836// CHECK: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim5]] step %[[c1]] { 837// CHECK: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 838// CHECK: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 839// CHECK: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { 840// CHECK: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) 841// CHECK: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) 842// CHECK: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) 843// CHECK: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32> 844 845// CHECK: %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32> 846// CHECK: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32> 847 848// CHECK: %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32 849// CHECK: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 850// CHECK: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32> 851 852// CHECKPARALLEL-LABEL: @conv3d_no_symbols 853// CHECKPARALLEL-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 854// CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 855// CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?x?xf32> 856// CHECKPARALLEL-DAG: %[[c2:.*]] = arith.constant 2 : index 857// CHECKPARALLEL-DAG: %[[c0:.*]] = arith.constant 0 : index 858// CHECKPARALLEL-DAG: %[[c1:.*]] = arith.constant 1 : index 859// CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32> 860// CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32> 861// CHECKPARALLEL: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32> 862// CHECKPARALLEL: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32> 863// CHECKPARALLEL: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32> 864// CHECKPARALLEL: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32> 865// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[c0]], %[[c0]], %[[c0]]) to (%[[dim3]], %[[dim4]], %[[dim5]]) step (%[[c1]], %[[c1]], %[[c1]]) { 866// CHECKPARALLEL: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { 867// CHECKPARALLEL: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { 868// CHECKPARALLEL: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { 869// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) 870// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) 871// CHECKPARALLEL: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) 872// CHECKPARALLEL: %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32> 873// CHECKPARALLEL: %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32> 874// CHECKPARALLEL: %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32> 875// CHECKPARALLEL: %[[inc:.*]] = arith.mulf %[[vb]], %[[va]] : f32 876// CHECKPARALLEL: %[[res:.*]] = arith.addf %[[vc]], %[[inc]] : f32 877// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32> 878 879// ----- 880 881func.func @lower_to_loops_with_rank_reducing_subviews( 882 %arg0 : memref<?xi32>, %arg1 : memref<?x?xi32>, %arg2 : index, 883 %arg3 : index, %arg4 : index) { 884 %0 = memref.subview %arg0[%arg2] [%arg3] [1] 885 : memref<?xi32> to memref<?xi32, strided<[1], offset: ?>> 886 %1 = memref.subview %arg1[0, %arg4] [1, %arg3] [1, 1] 887 : memref<?x?xi32> to memref<?xi32, strided<[1], offset: ?>> 888 linalg.generic { 889 iterator_types = ["parallel"], 890 indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>]} 891 ins(%0: memref<?xi32, strided<[1], offset: ?>>) 892 outs(%1: memref<?xi32, strided<[1], offset: ?>>) { 893 ^bb0(%a: i32, %b: i32): 894 linalg.yield %a : i32 895 } 896 return 897} 898// CHECK-LABEL: func @lower_to_loops_with_rank_reducing_subviews 899// CHECK: scf.for %[[IV:.+]] = %{{.+}} to %{{.+}} step %{{.+}} { 900// CHECK: %[[VAL:.+]] = memref.load %{{.+}}[%[[IV]]] 901// CHECK: memref.store %[[VAL]], %{{.+}}[%[[IV]]] 902// CHECK: } 903 904// CHECKPARALLEL-LABEL: func @lower_to_loops_with_rank_reducing_subviews 905// CHECKPARALLEL: scf.parallel (%[[IV:.+]]) = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) { 906// CHECKPARALLEL: %[[VAL:.+]] = memref.load %{{.+}}[%[[IV]]] 907// CHECKPARALLEL: memref.store %[[VAL]], %{{.+}}[%[[IV]]] 908// CHECKPARALLEL: } 909 910// ----- 911 912func.func @transpose(%input: memref<?xf32>, 913 %init: memref<?xf32>) { 914 linalg.transpose ins(%input:memref<?xf32>) 915 outs(%init:memref<?xf32>) 916 permutation = [0] 917 return 918} 919// CHECK-LABEL: func.func @transpose( 920// CHECK-SAME: %[[VAL_0:.*]]: memref<?xf32>, 921// CHECK-SAME: %[[VAL_1:.*]]: memref<?xf32>) { 922// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index 923// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index 924// CHECK: %[[VAL_4:.*]] = memref.dim %[[VAL_0]], %[[VAL_3]] : memref<?xf32> 925// CHECK: scf.for %[[VAL_5:.*]] = %[[VAL_3]] to %[[VAL_4]] step %[[VAL_2]] { 926// CHECK: %[[VAL_6:.*]] = memref.load %[[VAL_0]]{{\[}}%[[VAL_5]]] : memref<?xf32> 927// CHECK: memref.store %[[VAL_6]], %[[VAL_1]]{{\[}}%[[VAL_5]]] : memref<?xf32> 928// CHECK: } 929// CHECK: return 930// CHECK: } 931 932// CHECKPARALLEL-LABEL: func.func @transpose( 933// CHECKPARALLEL-SAME: %[[VAL_0:.*]]: memref<?xf32>, 934// CHECKPARALLEL-SAME: %[[VAL_1:.*]]: memref<?xf32>) { 935// CHECKPARALLEL: %[[VAL_2:.*]] = arith.constant 1 : index 936// CHECKPARALLEL: %[[VAL_3:.*]] = arith.constant 0 : index 937// CHECKPARALLEL: %[[VAL_4:.*]] = memref.dim %[[VAL_0]], %[[VAL_3]] : memref<?xf32> 938// CHECKPARALLEL: scf.parallel (%[[VAL_5:.*]]) = (%[[VAL_3]]) to (%[[VAL_4]]) step (%[[VAL_2]]) { 939// CHECKPARALLEL: %[[VAL_6:.*]] = memref.load %[[VAL_0]]{{\[}}%[[VAL_5]]] : memref<?xf32> 940// CHECKPARALLEL: memref.store %[[VAL_6]], %[[VAL_1]]{{\[}}%[[VAL_5]]] : memref<?xf32> 941// CHECKPARALLEL: scf.reduce 942// CHECKPARALLEL: } 943// CHECKPARALLEL: return 944// CHECKPARALLEL: } 945