1// RUN: mlir-opt --one-shot-bufferize="dialect-filter=linalg,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -canonicalize -cse -split-input-file %s | FileCheck %s 2 3#map0 = affine_map<(d0) -> (d0)> 4 5// In-depth checking of a basic case, this is testing 6// - bufferization.to_memref / bufferization.to_tensor materializations are 7// properly inserted 8// - payload is correctly carried over 9// - affine maps are correctly carried over 10// Later tests will not check all these details. 11 12// CHECK: #map = affine_map<(d0) -> (d0)> 13// CHECK-LABEL: func @basic( 14// CHECK-SAME: %[[TENSOR:.*]]: tensor<4xf32>) -> tensor<4xf32> { 15// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : tensor<4xf32> to memref<4xf32> 16// CHECK-DAG: %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32> 17// CHECK: linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} 18// CHECK-SAME: ins(%[[MEMREF]] : memref<4xf32>) 19// CHECK-SAME: outs(%[[RESULT_MEMREF]] : memref<4xf32>) { 20// CHECK: ^bb0(%[[RESULT1:.*]]: f32, %[[UNUSED:.*]]: f32): 21// CHECK: %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32 22// CHECK: linalg.yield %[[DIM1]] : f32 23// CHECK: } 24// CHECK: %[[RESULT:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref<4xf32> 25// CHECK: return %[[RESULT]] : tensor<4xf32> 26func.func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> { 27 %0 = linalg.generic { 28 indexing_maps = [#map0, #map0], 29 iterator_types = ["parallel"] 30 } ins(%arg0 : tensor<4xf32>) 31 outs(%arg0 : tensor<4xf32>) { 32 ^bb0(%gen_arg1: f32, %out: f32): 33 %tmp1 = math.exp %gen_arg1 : f32 34 linalg.yield %tmp1 : f32 35 } -> tensor<4xf32> 36 return %0 : tensor<4xf32> 37} 38 39 40// ----- 41 42#map0 = affine_map<(d0) -> (d0)> 43 44// Same as above but with tensor.empty op. 45 46// CHECK: #map = affine_map<(d0) -> (d0)> 47// CHECK-LABEL: func @empty_tensor( 48// CHECK-SAME: %[[IN:.*]]: tensor<?xf32>, %[[SIZE:.*]]: index) 49// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : tensor<?xf32> to memref<?xf32> 50// CHECK-DAG: %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref<?xf32> 51// CHECK: linalg.generic 52// CHECK-SAME: ins(%[[MEMREF]] : memref<?xf32>) 53// CHECK-SAME: outs(%[[OUT_BUF]] : memref<?xf32>) { 54func.func @empty_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> { 55 %init = tensor.empty(%size) : tensor<?xf32> 56 %0 = linalg.generic { 57 indexing_maps = [#map0, #map0], 58 iterator_types = ["parallel"] 59 } ins(%in : tensor<?xf32>) 60 outs(%init : tensor<?xf32>) { 61 ^bb0(%gen_arg1: f32, %out: f32): 62 %tmp1 = math.exp %gen_arg1 : f32 63 linalg.yield %tmp1 : f32 64 } -> tensor<?xf32> 65 return %0 : tensor<?xf32> 66} 67 68 69// ----- 70 71#map0 = affine_map<(d0) -> (d0)> 72 73// CHECK-LABEL: func @multiple_results 74// CHECK: %[[RESULT0:.*]] = memref.alloc() {{.*}} : memref<4xf32> 75// CHECK: %[[RESULT1:.*]] = memref.alloc() {{.*}} : memref<4xf32> 76// CHECK: linalg.generic 77// CHECK-SAME: ins(%{{.*}} : memref<4xf32>) 78// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>) 79// CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32): 80func.func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { 81 %0, %1 = linalg.generic { 82 indexing_maps = [#map0, #map0, #map0], 83 iterator_types = ["parallel"] 84 } ins(%arg0 : tensor<4xf32>) 85 outs (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) { 86 ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32): 87 %tmp1 = math.exp %gen_arg1 : f32 88 linalg.yield %tmp1, %tmp1 : f32, f32 89 } -> (tensor<4xf32>, tensor<4xf32>) 90 return %0, %1 : tensor<4xf32>, tensor<4xf32> 91} 92 93// ----- 94 95#map_2d = affine_map<(d0, d1) -> (d0, d1)> 96 97// Check that the allocs properly consider the different shapes of the output 98// operands. The permuted indexing maps translate to different output shapes. 99 100// CHECK-LABEL: func @dynamic_results( 101// CHECK-SAME: %[[ARG:.*]]: tensor<?x?xf32> 102// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 103// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 104// CHECK-DAG: %[[DIM0:.*]] = tensor.dim %[[ARG]], %[[C0]] : tensor<?x?xf32> 105// CHECK-DAG: %[[DIM1:.*]] = tensor.dim %[[ARG]], %[[C1]] : tensor<?x?xf32> 106// CHECK-DAG: %[[RESULT0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref<?x?xf32> 107// CHECK-DAG: %[[RESULT1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref<?x?xf32> 108// CHECK-DAG: %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : tensor<?x?xf32> to memref<?x?xf32> 109// CHECK: linalg.generic 110// CHECK-SAME: ins(%[[MEMREF_ARG]] : memref<?x?xf32>) 111// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>) 112func.func @dynamic_results(%arg0: tensor<?x?xf32>) 113 -> (tensor<?x?xf32>, tensor<?x?xf32>) { 114 %0, %1 = linalg.generic { 115 indexing_maps = [#map_2d, #map_2d, #map_2d], 116 iterator_types = ["parallel", "parallel"] 117 } ins(%arg0 : tensor<?x?xf32>) 118 outs (%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) { 119 ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32): 120 %tmp1 = math.exp %gen_arg1 : f32 121 linalg.yield %tmp1, %tmp1 : f32, f32 122 } -> (tensor<?x?xf32>, tensor<?x?xf32>) 123 return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32> 124} 125 126// ----- 127 128#accesses = [ 129 affine_map<(i, j, k) -> (j, i, k)>, 130 affine_map<(i, j, k) -> (i, j)> 131] 132 133#trait = { 134 indexing_maps = #accesses, 135 iterator_types = ["parallel", "parallel", "reduction"] 136} 137 138// Check the bufferization of init tensors. 139 140// CHECK-LABEL: func @generic_with_init_tensor( 141// CHECK-SAME: %[[ARG0_TENSOR:.*]]: tensor<2x3x4xvector<3x4xi4>>, 142// CHECK-SAME: %[[ARG1_TENSOR:.*]]: tensor<3x2xf32>) -> tensor<3x2xf32> { 143// CHECK-DAG: %[[INIT_BUFFER:.*]] = memref.alloc() {{.*}} : memref<3x2xf32> 144// CHECK-DAG: %[[ARG0_MEMREF:.*]] = bufferization.to_memref %[[ARG0_TENSOR]] : tensor<2x3x4xvector<3x4xi4>> 145// CHECK-DAG: %[[ARG1_MEMREF:.*]] = bufferization.to_memref %[[ARG1_TENSOR]] : tensor<3x2xf32> 146// CHECK: memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32> 147// CHECK: linalg.generic 148// CHECK-SAME: ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>) 149// CHECK-SAME: outs(%[[INIT_BUFFER]] : memref<3x2xf32>) { 150func.func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>, 151 %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) { 152 153 %0 = linalg.generic #trait 154 ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) 155 outs(%arg1 : tensor<3x2xf32>) { 156 ^bb(%v0: vector<3x4xi4>, %v1: f32) : 157 linalg.yield %v1 : f32 158 } -> tensor<3x2xf32> 159 160 return %0 : tensor<3x2xf32> 161} 162 163// ----- 164 165// CHECK-LABEL: func @bufferize_fill( 166// CHECK-SAME: %[[IN:.*]]: tensor<?xf32> 167func.func @bufferize_fill(%arg0: tensor<?xf32>) -> tensor<?xf32> { 168 %c0 = arith.constant 0.0 : f32 169 // CHECK: %[[ALLOC:.*]] = memref.alloc 170 // CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref<?xf32>) 171 // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<?xf32> 172 // CHECK: return %[[TENSOR]] 173 %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?xf32>) -> tensor<?xf32> 174 return %0 : tensor<?xf32> 175} 176 177// ----- 178 179// CHECK-LABEL: func @bufferize_dot 180func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor<f32>) -> tensor<f32> { 181 %dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>) 182 outs(%out : tensor<f32>) -> tensor<f32> 183 return %dot : tensor<f32> 184 // CHECK: %[[ALLOC:.*]] = memref.alloc 185 // TODO: The copy is not necessary. 186 // CHECK: memref.copy {{.*}}, %[[ALLOC]] 187 // CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>) 188 // CHECK-SAME: outs(%[[ALLOC:.*]] : memref<f32>) 189 // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<f32> 190 // CHECK: return %[[OUT_TENSOR]] 191} 192 193// ----- 194 195// CHECK-LABEL: func @bufferize_softmax( 196// CHECK-SAME: %[[arg0:.*]]: tensor<2x16x32xf32>, %[[arg1:.*]]: tensor<2x16x32xf32> 197// CHECK: %[[m0:.*]] = bufferization.to_memref %[[arg0]] 198// CHECK: %[[alloc:.*]] = memref.alloc() 199// CHECK-NOT: memref.copy 200// CHECK: linalg.softmax dimension(2) ins(%[[m0]] : {{.*}}) outs(%[[alloc:.*]] : {{.*}}) 201// CHECK: %[[result:.*]] = bufferization.to_tensor %[[alloc]] 202// CHECK: return %[[result]] 203func.func @bufferize_softmax(%arg0: tensor<2x16x32xf32>, %arg1: tensor<2x16x32xf32>) -> tensor<2x16x32xf32> { 204 %1 = linalg.softmax dimension(2) 205 ins(%arg0 : tensor<2x16x32xf32>) 206 outs(%arg1: tensor<2x16x32xf32>) -> tensor<2x16x32xf32> 207 return %1 : tensor<2x16x32xf32> 208} 209