1// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-erase-unused-operands-and-results | FileCheck %s 2// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-erase-unnecessary-inputs | FileCheck %s --check-prefix=CHECK-INPUT 3 4// CHECK-LABEL: func @remove_deadargs_generic_basic 5// CHECK-SAME: (%[[ARG0:.*]]: tensor<?xf32>) -> tensor<?xf32> { 6// CHECK: %[[GENERIC_OP:.*]] = linalg.generic 7// CHECK-SAME: ins(%[[ARG0]] : tensor<?xf32>) 8// CHECK-SAME: outs({{.*}} : tensor<?xf32>) { 9#map0 = affine_map<(d0) -> (d0)> 10func.func @remove_deadargs_generic_basic(%arg0: tensor<?xf32>) -> (tensor<?xf32>) { 11 %c0 = arith.constant 0 : index 12 %cst = arith.constant 7.0 : f32 13 %0 = tensor.dim %arg0, %c0 : tensor<?xf32> 14 %1 = tensor.empty(%0) : tensor<?xf32> 15 %2 = tensor.empty(%0) : tensor<?xf32> 16 %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor<?xf32>, tensor<?xf32>) outs (%2:tensor<?xf32>) { 17 ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): 18 %4 = arith.addf %arg1, %cst : f32 19 linalg.yield %4 : f32 20 } -> tensor<?xf32> 21 return %3 : tensor<?xf32> 22} 23 24// ----- 25 26// CHECK-LABEL: func @remove_deadargs_generic_mixedaccess 27// CHECK: %[[GENERIC_OP:.*]] = linalg.generic 28// CHECK-NOT: ins 29// CHECK-SAME: outs({{.*}} : tensor<?x?xf32>) { 30#map0 = affine_map<(d0, d1) -> (d0, d1)> 31#map1 = affine_map<(d0, d1) -> (d1, d0)> 32func.func @remove_deadargs_generic_mixedaccess(%arg0: tensor<?x?xf32>) -> (tensor<?x?xf32>) { 33 %c0 = arith.constant 0 : index 34 %c1 = arith.constant 0 : index 35 %cst1 = arith.constant 7.0 : f32 36 %cst2 = arith.constant 6.0 : f32 37 %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32> 38 %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32> 39 %2 = tensor.empty(%0, %1) : tensor<?x?xf32> 40 %3 = tensor.empty(%1, %0) : tensor<?x?xf32> 41 %4 = tensor.empty(%0, %1) : tensor<?x?xf32> 42 %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%4:tensor<?x?xf32>) { 43 ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): 44 %6 = arith.divf %cst1, %cst2 : f32 45 linalg.yield %6 : f32 46 } -> tensor<?x?xf32> 47 return %5 : tensor<?x?xf32> 48} 49 50// ----- 51 52// Test case: Most basic case. Adding a vector to itself. 53 54#map = affine_map<(d0) -> (d0)> 55 56// CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0)> 57// CHECK-LABEL: @basic 58func.func @basic(%arg0: tensor<?xf32>) -> tensor<?xf32> { 59 // CHECK: linalg.generic{{.*}}[#[[$MAP]], #[[$MAP]]] 60 // CHECK: attrs = {someattr} 61 // CHECK: ^bb0(%[[BBARG:.*]]: f32, %{{.*}}: f32): 62 // CHECK: arith.addf %[[BBARG]], %[[BBARG]] 63 %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} 64 ins(%arg0, %arg0 : tensor<?xf32>, tensor<?xf32>) 65 outs(%arg0 : tensor<?xf32>) attrs = {someattr} { 66 ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): 67 %1 = arith.addf %arg1, %arg2 : f32 68 linalg.yield %1 : f32 69 } -> tensor<?xf32> 70 return %0 : tensor<?xf32> 71} 72 73// ----- 74 75// Test case: Different indexing maps mean that args are not redundant, despite 76// being the same Value. 77 78#map0 = affine_map<(d0, d1) -> (d0, d1)> 79#map1 = affine_map<(d0, d1) -> (d1, d0)> 80 81// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> 82// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1, d0)> 83// CHECK-LABEL: @distinct_affine_maps 84func.func @distinct_affine_maps(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> { 85 // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]] 86 %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} 87 ins(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) 88 outs(%arg0 : tensor<?x?xf32>) { 89 ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): 90 %1 = arith.addf %arg1, %arg2 : f32 91 linalg.yield %1 : f32 92 } -> tensor<?x?xf32> 93 return %0 : tensor<?x?xf32> 94} 95 96// ----- 97 98// Test case: Check rewriting mechanics for mixed redundant and 99// non-redundant args. 100 101#map0 = affine_map<(d0, d1) -> (d0, d1)> 102#map1 = affine_map<(d0, d1) -> (d1, d0)> 103 104// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> 105// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1, d0)> 106// CHECK-LABEL: @mixed_redundant_non_redundant 107func.func @mixed_redundant_non_redundant(%arg0: tensor<?x?xf32>) -> tensor<?x?xf32> { 108 // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]] 109 // CHECK: ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32): 110 // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]]) 111 %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} 112 ins(%arg0, %arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) 113 outs(%arg0 : tensor<?x?xf32>) { 114 ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): 115 %1 = "test.elementwise_mappable"(%arg1, %arg2, %arg3) : (f32, f32, f32) -> f32 116 linalg.yield %1 : f32 117 } -> tensor<?x?xf32> 118 return %0 : tensor<?x?xf32> 119} 120 121// ----- 122 123// Test case: Check rewriting mechanics for multiple different redundant args. 124 125#map = affine_map<(d0) -> (d0)> 126 127// CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0)> 128// CHECK-LABEL: @multiple_different_redundant_args 129func.func @multiple_different_redundant_args(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> { 130 // CHECK: linalg.generic{{.*}}[#[[$MAP]], #[[$MAP]], #[[$MAP]]] 131 // CHECK: ^bb0(%[[BBARG0:.*]]: f32, %[[BBARG1:.*]]: f32, %{{[a-zA-Z0-9]+}}: f32): 132 // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]], %[[BBARG1]]) 133 %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]} 134 ins(%arg0, %arg1, %arg0, %arg1 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) 135 outs(%arg0 : tensor<?xf32>) { 136 ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): 137 %1 = "test.elementwise_mappable"(%arg2, %arg3, %arg4, %arg5) : (f32, f32, f32, f32) -> f32 138 linalg.yield %1 : f32 139 } -> tensor<?xf32> 140 return %0 : tensor<?xf32> 141} 142 143// ----- 144 145// Drop dead result. 146 147#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 148#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 149#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 150#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> 151#map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)> 152func.func @drop_dead_results(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) { 153 %0:4 = linalg.generic { 154 indexing_maps = [#map0, #map1, #map2, #map3, #map4], 155 iterator_types = ["parallel", "parallel", "parallel"]} 156 ins(%arg0 : tensor<?x?x?xf32>) 157 outs(%arg0, %arg0, %arg0, %arg0 158 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) { 159 ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : 160 %1 = arith.addf %b0, %b0: f32 161 linalg.yield %1, %1, %1, %1 : f32, f32, f32, f32 162 } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) 163 return %0#0, %0#2 : tensor<?x?x?xf32>, tensor<?x?x?xf32> 164} 165// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 166// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 167// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d0, d2)> 168// CHECK: func @drop_dead_results( 169// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>) 170// CHECK: %[[GENERIC:.+]]:2 = linalg.generic 171// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] 172// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : 173// CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 174 175// ----- 176 177// Current argmax lowering to `linalg.generic`. Cannot drop the 178// first return even though it isnt used since it has an internal 179// use. 180#map0 = affine_map<(d0) -> (d0)> 181#map1 = affine_map<(d0) -> ()> 182func.func @argmax_lowering(%arg0 : tensor<?xf32>) -> tensor<i32> { 183 %init0 = tensor.empty() : tensor<f32> 184 %init1 = tensor.empty() : tensor<i32> 185 %0:2 = linalg.generic { 186 indexing_maps = [#map0, #map1, #map1], 187 iterator_types = ["reduction"]} 188 ins(%arg0 : tensor<?xf32>) 189 outs(%init0, %init1 : tensor<f32>, tensor<i32>) { 190 ^bb0(%b0: f32, %b1: f32, %b2: i32): 191 %8 = linalg.index 0 : index 192 %9 = arith.index_cast %8 : index to i32 193 %10 = arith.cmpf oge, %b0, %b1 : f32 194 %11 = arith.select %10, %b0, %b1 : f32 195 %12 = arith.cmpf oeq, %b0, %b1 : f32 196 %13 = arith.minsi %9, %b2 : i32 197 %14 = arith.select %10, %9, %b2 : i32 198 %15 = arith.select %12, %13, %14 : i32 199 linalg.yield %11, %15 : f32, i32 200 } -> (tensor<f32>, tensor<i32>) 201 return %0#1 : tensor<i32> 202} 203// CHECK: func @argmax_lowering( 204// CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32> 205// CHECK-DAG: %[[INIT0:.+]] = tensor.empty() : tensor<f32> 206// CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<i32> 207// CHECK: %[[GENERIC:.+]]:2 = linalg.generic 208// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : 209// CHECK: return %[[GENERIC]]#1 210 211// ----- 212 213// Do not remove operand needed for loop dim. 214func.func @loop_dim_operand(%arg0 : tensor<?xf32>) -> tensor<i32> { 215 %cst = arith.constant 0 : i32 216 %init = tensor.empty() : tensor<i32> 217 %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32> 218 %0 = linalg.generic { 219 indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], 220 iterator_types = ["reduction"]} 221 ins(%arg0 : tensor<?xf32>) outs(%fill : tensor<i32>) { 222 ^bb0(%b0: f32, %b1: i32): 223 %1 = linalg.index 0 : index 224 %2 = arith.index_cast %1 : index to i32 225 %3 = arith.addi %b1, %2 : i32 226 linalg.yield %3 : i32 227 } -> tensor<i32> 228 return %0 : tensor<i32> 229} 230// CHECK: func @loop_dim_operand( 231// CHECK-SAME: %[[ARG0:.+]]: tensor<?xf32> 232// CHECK: linalg.generic 233// CHECK-SAME: ins(%[[ARG0]] : 234 235// ----- 236 237// Do not remove outs operand needed for loop bound computation. 238func.func @loop_dim_outs_operand(%arg0 : index) -> tensor<i32> { 239 %cst = arith.constant 0 : i32 240 %init1 = tensor.empty(%arg0) : tensor<?xi32> 241 %init = tensor.empty() : tensor<i32> 242 %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32> 243 %0:2 = linalg.generic { 244 indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], 245 iterator_types = ["parallel"]} 246 outs(%init1, %fill : tensor<?xi32>, tensor<i32>) { 247 ^bb0(%b0: i32, %b1: i32): 248 %1 = linalg.index 0 : index 249 %2 = arith.index_cast %1 : index to i32 250 %3 = arith.addi %b1, %2 : i32 251 linalg.yield %2, %3 : i32, i32 252 } -> (tensor<?xi32>, tensor<i32>) 253 return %0#1 : tensor<i32> 254} 255// CHECK: func @loop_dim_outs_operand( 256// CHECK-SAME: %[[ARG0:.+]]: index 257// CHECK: %[[INIT:.+]] = tensor.empty(%[[ARG0]]) 258// CHECK: linalg.generic 259// CHECK-SAME: outs(%[[INIT]] 260 261// ----- 262 263#map0 = affine_map<(d0, d1) -> (d0, d1)> 264#map1 = affine_map<(d0, d1) -> (d1, d0)> 265#map2 = affine_map<(d0, d1) -> (d0)> 266#map3 = affine_map<(d0, d1) -> (d1)> 267func.func @multiple_redundant_args(%arg0 : tensor<?x?xi32>, %arg1 : tensor<?xi32>, 268 %arg2 : tensor<?xi32>, %arg3 : tensor<?x?xi32>, %arg4 : tensor<?xi32>) -> tensor<?xi32> { 269 %0 = linalg.generic { 270 indexing_maps = [#map3, #map0, #map0, #map2, #map1, #map1, #map2], 271 iterator_types = ["parallel", "reduction"]} 272 ins(%arg4, %arg0, %arg0, %arg1, %arg3, %arg3 273 : tensor<?xi32>, tensor<?x?xi32>, tensor<?x?xi32>, tensor<?xi32>, tensor<?x?xi32>, tensor<?x?xi32>) 274 outs(%arg2 : tensor<?xi32>) { 275 ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32, %b4 : i32, %b5 : i32, %b6 : i32): 276 %1 = arith.addi %b0, %b1 : i32 277 %2 = arith.addi %1, %b2 : i32 278 %3 = arith.addi %2, %b3 : i32 279 %4 = arith.addi %3, %b4 : i32 280 %5 = arith.addi %4, %b5 : i32 281 %6 = arith.addi %5, %b6 : i32 282 linalg.yield %6 : i32 283 } -> tensor<?xi32> 284 return %0 : tensor<?xi32> 285} 286// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d1)> 287// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)> 288// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0)> 289// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1) -> (d1, d0)> 290// CHECK: func @multiple_redundant_args( 291// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xi32> 292// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xi32> 293// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?xi32> 294// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?xi32> 295// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: tensor<?xi32>) 296// CHECK: %[[RETURN:.+]] = linalg.generic 297// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]] 298// CHECK-SAME: iterator_types = ["parallel", "reduction"] 299// CHECK-SAME: ins(%[[ARG4]], %[[ARG0]], %[[ARG1]], %[[ARG3]] : 300// CHECK-SAME: outs(%[[ARG2]] : 301// CHECK: ^{{.+}}(%[[B0:[a-zA-Z0-9]+]]: i32 302// CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: i32 303// CHECK-SAME: %[[B2:[a-zA-Z0-9_]+]]: i32 304// CHECK-SAME: %[[B3:[a-zA-Z0-9_]+]]: i32 305// CHECK-SAME: %[[B4:[a-zA-Z0-9_]+]]: i32) 306// CHECK: %[[T0:.+]] = arith.addi %[[B0]], %[[B1]] 307// CHECK: %[[T1:.+]] = arith.addi %[[T0]], %[[B1]] 308// CHECK: %[[T2:.+]] = arith.addi %[[T1]], %[[B2]] 309// CHECK: %[[T3:.+]] = arith.addi %[[T2]], %[[B3]] 310// CHECK: %[[T4:.+]] = arith.addi %[[T3]], %[[B3]] 311// CHECK: %[[T5:.+]] = arith.addi %[[T4]], %[[B4]] 312// CHECK: linalg.yield %[[T5]] 313// CHECK: return %[[RETURN]] 314 315// ----- 316 317// Drop redundant results. 318 319#map = affine_map<(d0, d1) -> (d0, d1)> 320func.func @drop_redundant_results( 321 %arg0 : tensor<?x?xf32>) -> (tensor<?x?xf32>, tensor<?x?xf32>) { 322 %0:2 = linalg.generic { 323 indexing_maps = [#map, #map, #map], 324 iterator_types = ["parallel", "parallel"]} 325 ins(%arg0 : tensor<?x?xf32>) 326 outs(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) { 327 ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): 328 %1 = arith.addf %b0, %b0 : f32 329 linalg.yield %1, %1 : f32, f32 330 } -> (tensor<?x?xf32>, tensor<?x?xf32>) 331 return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32> 332} 333// CHECK: func @drop_redundant_results 334// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32> 335// CHECK: %[[GENERIC:.+]] = linalg.generic 336// CHECK-SAME: outs(%[[ARG0]] : 337// CHECK: return %[[GENERIC]] 338 339// ----- 340 341// Drop dead result with different tensors. 342 343#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 344#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 345#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 346#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> 347#map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)> 348func.func @drop_dead_results_with_different_tensors(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) { 349 %c0 = arith.constant 0 : index 350 %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32> 351 %c1 = arith.constant 1 : index 352 %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32> 353 %c2 = arith.constant 2 : index 354 %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32> 355 %init0 = tensor.empty(%d0, %d1, %d2) : tensor<?x?x?xf32> 356 %0:4 = linalg.generic { 357 indexing_maps = [#map0, #map1, #map2, #map3, #map4], 358 iterator_types = ["parallel", "parallel", "parallel"]} 359 ins(%arg0 : tensor<?x?x?xf32>) 360 outs(%arg0, %arg0, %init0, %init0 361 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) { 362 ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : 363 linalg.yield %b0, %b0, %b3, %b4 : f32, f32, f32, f32 364 } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) 365 return %0#0, %0#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32> 366} 367 368// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 369// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 370// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 371// CHECK: func @drop_dead_results_with_different_tensors( 372// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>) 373// CHECK: %[[GENERIC:.+]]:2 = linalg.generic 374// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] 375// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : 376// CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 377 378// ----- 379 380// Drop dead result with unused cycles. 381 382#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 383#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 384#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 385#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> 386#map4 = affine_map<(d0, d1, d2) -> (d2, d0, d1)> 387func.func @drop_dead_results_with_unused_cycles(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) { 388 %c0 = arith.constant 0 : index 389 %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32> 390 %c1 = arith.constant 1 : index 391 %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32> 392 %c2 = arith.constant 2 : index 393 %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32> 394 %init0 = tensor.empty(%d0, %d1, %d2) : tensor<?x?x?xf32> 395 %0:4 = linalg.generic { 396 indexing_maps = [#map0, #map1, #map2, #map3, #map4], 397 iterator_types = ["parallel", "parallel", "parallel"]} 398 ins(%arg0 : tensor<?x?x?xf32>) 399 outs(%arg0, %arg0, %init0, %init0 400 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) { 401 ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : 402 %1 = arith.addf %b0, %b0: f32 403 %2 = arith.addf %b0, %b3: f32 404 %3 = arith.addf %b0, %b4: f32 405 linalg.yield %1, %1, %2, %3 : f32, f32, f32, f32 406 } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) 407 return %0#0, %0#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32> 408} 409 410// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 411// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 412// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 413// CHECK: func @drop_dead_results_with_unused_cycles( 414// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>) 415// CHECK: %[[GENERIC:.+]]:2 = linalg.generic 416// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] 417// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : 418// CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 419 420// ----- 421 422// Drop only the results not used by others. 423 424#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 425#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 426#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 427#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> 428func.func @drop_only_the_results_not_used_by_others(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) { 429 %c0 = arith.constant 0 : index 430 %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32> 431 %c1 = arith.constant 1 : index 432 %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32> 433 %c2 = arith.constant 2 : index 434 %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32> 435 %init0 = tensor.empty(%d0, %d1, %d2) : tensor<?x?x?xf32> 436 %0:3 = linalg.generic { 437 indexing_maps = [#map0, #map1, #map2, #map3], 438 iterator_types = ["parallel", "parallel", "parallel"]} 439 ins(%arg0 : tensor<?x?x?xf32>) 440 outs(%arg0, %init0, %init0 441 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) { 442 ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : 443 linalg.yield %b2, %b1, %b3 : f32, f32, f32 444 } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) 445 return %0#0 : tensor<?x?x?xf32> 446} 447 448// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 449// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 450// CHECK: func @drop_only_the_results_not_used_by_others( 451// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>) 452// CHECK: %[[INIT:.+]] = tensor.empty 453// CHECK: %[[GENERIC:.+]]:2 = linalg.generic 454// CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] 455// CHECK-SAME: outs(%[[ARG0]], %[[INIT]] : 456// CHECK: return %[[GENERIC]]#0 457 458// ----- 459 460// Drop only the cycles not used by others. 461 462#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 463#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 464#map2 = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 465#map3 = affine_map<(d0, d1, d2) -> (d1, d0, d2)> 466func.func @drop_only_the_cycles_not_used_by_others(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) { 467 %c0 = arith.constant 0 : index 468 %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32> 469 %c1 = arith.constant 1 : index 470 %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32> 471 %c2 = arith.constant 2 : index 472 %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32> 473 %init0 = tensor.empty(%d0, %d1, %d2) : tensor<?x?x?xf32> 474 %0:3 = linalg.generic { 475 indexing_maps = [#map0, #map1, #map2, #map3], 476 iterator_types = ["parallel", "parallel", "parallel"]} 477 ins(%arg0 : tensor<?x?x?xf32>) 478 outs(%arg0, %init0, %init0 479 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) { 480 ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : 481 %1 = arith.addf %b1, %b2: f32 482 %2 = arith.addf %b1, %b3 : f32 483 linalg.yield %1, %b1, %2 : f32, f32, f32 484 } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) 485 return %0#0 : tensor<?x?x?xf32> 486} 487 488// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 489// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1, d2, d0)> 490// CHECK: func @drop_only_the_cycles_not_used_by_others( 491// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>) 492// CHECK: %[[INIT:.+]] = tensor.empty 493// CHECK: %[[GENERIC:.+]]:2 = linalg.generic 494// CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] 495// CHECK-SAME: outs(%[[ARG0]], %[[INIT]] : 496// CHECK: return %[[GENERIC]]#0 497 498 499// ----- 500 501// CHECK-INPUT-LABEL: func @remove_unnecessary_input( 502// CHECK-INPUT-SAME: %[[a:.*]]: tensor<?xf32>, %[[b:.*]]: tensor<?xf32> 503#map = affine_map<(d0) -> (d0)> 504func.func @remove_unnecessary_input(%a: tensor<?xf32>, %b: tensor<?xf32>) 505 -> tensor<?xf32> 506{ 507 // CHECK-INPUT: %[[result:.*]] = linalg.generic {indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel"]} 508 // CHECK-INPUT-SAME: ins(%[[a]] : tensor<?xf32>) outs(%[[b]] : tensor<?xf32>) { 509 // CHECK-INPUT: ^bb0(%[[in:.*]]: f32, %[[out:.*]]: f32): 510 // CHECK-INPUT: %[[add:.*]] = arith.addf %[[in]], %[[out]] 511 // CHECK-INPUT: linalg.yield %[[add]] 512 // CHECK-INPUT: } -> tensor<?xf32> 513 // CHECK-INPUT: return %[[result]] 514 %0 = linalg.generic 515 {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} 516 ins(%a, %b : tensor<?xf32>, tensor<?xf32>) outs(%b : tensor<?xf32>) { 517 ^bb0(%in: f32, %in_2: f32, %out: f32): 518 %16 = arith.addf %in, %in_2 : f32 519 linalg.yield %16 : f32 520 } -> tensor<?xf32> 521 return %0 : tensor<?xf32> 522} 523