1// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s 2 3// Run fuzzer with different seeds. 4// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -split-input-file -o /dev/null 5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -split-input-file -o /dev/null 6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -split-input-file -o /dev/null 7 8// Try different heuristics. Not checking the result, just make sure that we do 9// not crash. 10// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=bottom-up-from-terminators" -split-input-file -o /dev/null 11// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-heuristic=top-down" -split-input-file -o /dev/null 12 13// TODO: Extract op-specific test cases and move them to their respective 14// dialects. 15 16//===----------------------------------------------------------------------===// 17// Simple cases 18//===----------------------------------------------------------------------===// 19 20// ----- 21 22// CHECK-LABEL: func @extract_slice_fun( 23func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = false}, 24// CHECK-SAME: bufferization.access = "read" 25 %B : tensor<?xf32> {bufferization.writable = true}) 26// CHECK-SAME: bufferization.access = "read" 27 -> (tensor<4xf32>, tensor<8xf32>) 28{ 29 // tensor.extract_slice is not used in a write, it is not compelled to 30 // bufferize out of place. Let callers decide whether they want to create 31 // aliasing subviews at all call sites or whether they allocate. 32 // This is true irrespective of whether the function argument is inplaceable. 33 // CHECK: tensor.extract_slice 34 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 35 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 36 37 // CHECK: tensor.extract_slice 38 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 39 %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32> 40 41 return %r0, %r1: tensor<4xf32>, tensor<8xf32> 42} 43 44// ----- 45 46// CHECK-LABEL: func @insert_slice_fun( 47func.func @insert_slice_fun(%A : tensor<?xf32> {bufferization.writable = false}, 48// CHECK-SAME: bufferization.access = "read" 49 %B : tensor<?xf32> {bufferization.writable = true}, 50// CHECK-SAME: bufferization.access = "read-write" 51 %C : tensor<4xf32> {bufferization.writable = false}) 52// CHECK-SAME: bufferization.access = "read" 53 -> (tensor<?xf32>, tensor<?xf32>) 54{ 55 // must bufferize out of place. 56 // CHECK: tensor.insert_slice 57 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]} 58 %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 59 60 // bufferizes inplace. 61 // CHECK: tensor.insert_slice 62 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 63 %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 64 65 // CHECK: return 66 // CHECK-SAME: __equivalent_func_args__ = [-1, 1] 67 return %r0, %r1: tensor<?xf32>, tensor<?xf32> 68} 69 70// ----- 71 72// CHECK-LABEL: func @conflict_on_B( 73func.func @conflict_on_B(%A : tensor<4x4xf32> {bufferization.writable = true}, 74// CHECK-SAME: bufferization.access = "read" 75 %B : tensor<4x4xf32> {bufferization.writable = true}) 76// CHECK-SAME: bufferization.access = "read-write" 77 -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>) 78{ 79 // matmul output operand interferes with input operand. 80 // CHECK: linalg.matmul 81 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} 82 %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) 83 outs(%B: tensor<4x4xf32>) 84 -> tensor<4x4xf32> 85 86 // matmul output operand interferes with input operand. 87 // CHECK: linalg.matmul 88 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} 89 %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) 90 outs(%B: tensor<4x4xf32>) 91 -> tensor<4x4xf32> 92 93 // matmul output operand does not interferes with input operand. 94 // CHECK: linalg.matmul 95 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 96 %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) 97 outs(%B: tensor<4x4xf32>) 98 -> tensor<4x4xf32> 99 100 // CHECK: return 101 // CHECK-SAME: __equivalent_func_args__ = [-1, -1, 1] 102 return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32> 103} 104 105//===----------------------------------------------------------------------===// 106// Length-1 producer-consumer cases. 107//===----------------------------------------------------------------------===// 108 109// ----- 110 111// CHECK-LABEL: func @extract_slice_extract_slice( 112func.func @extract_slice_extract_slice( 113 %A : tensor<?xf32> {bufferization.writable = true}, 114// CHECK-SAME: bufferization.access = "read" 115 %B : tensor<?xf32> {bufferization.writable = false}) 116// CHECK-SAME: bufferization.access = "read" 117 -> (tensor<2xf32>, tensor<2xf32>) 118{ 119 // tensor.extract_slice is not used in a write, it is not compelled to 120 // bufferize out of place. Let callers decide whether they want to create 121 // aliasing subviews at all call sites or whether they allocate. 122 // This is true irrespective of whether the function argument is inplaceable. 123 // CHECK: {__inplace_operands_attr__ = ["true"]} 124 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 125 126 // CHECK: {__inplace_operands_attr__ = ["true"]} 127 %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> 128 129 // CHECK: {__inplace_operands_attr__ = ["true"]} 130 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 131 132 // CHECK: {__inplace_operands_attr__ = ["true"]} 133 %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> 134 135 return %r1, %r3: tensor<2xf32>, tensor<2xf32> 136} 137 138// ----- 139 140// CHECK-LABEL: func @insert_slice_insert_slice( 141func.func @insert_slice_insert_slice( 142 %A : tensor<?xf32> {bufferization.writable = true}, 143// CHECK-SAME: bufferization.access = "read-write" 144 %A2 : tensor<4xf32> {bufferization.writable = true}, 145// CHECK-SAME: bufferization.access = "read-write" 146 %A3 : tensor<2xf32> {bufferization.writable = true}, 147// CHECK-SAME: bufferization.access = "read" 148 %B : tensor<?xf32> {bufferization.writable = false}, 149// CHECK-SAME: bufferization.access = "read" 150 %B2 : tensor<4xf32> {bufferization.writable = false}, 151// CHECK-SAME: bufferization.access = "read" 152 %B3 : tensor<2xf32> {bufferization.writable = false}) 153// CHECK-SAME: bufferization.access = "read" 154 -> (tensor<?xf32>, tensor<?xf32>) 155{ 156 // CHECK: {__inplace_operands_attr__ = ["true", "true"]} 157 %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> 158 159 // CHECK: {__inplace_operands_attr__ = ["true", "true"]} 160 %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 161 162 // CHECK: {__inplace_operands_attr__ = ["true", "false"]} 163 %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> 164 165 // CHECK: {__inplace_operands_attr__ = ["true", "false"]} 166 %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 167 168 // CHECK: return 169 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 170 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 171} 172 173// ----- 174 175// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice 176func.func @extract_slice_nonmatching_insert_slice( 177 %A : tensor<?xf32> {bufferization.writable = true}, 178 %B : tensor<?xf32> {bufferization.writable = false}, 179 %idx: index) 180 -> (tensor<?xf32>, tensor<?xf32>) 181{ 182 // %r1 bufferizes inplace because %A is inplaceable. 183 // %r0 is an overlapping tensor.extract_slice that does not match, it must be 184 // out of place. 185 // CHECK: tensor.extract_slice 186 // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} 187 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 188 189 // %r1 can bufferize inplace fine. 190 // CHECK: tensor.insert_slice 191 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} 192 %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32> 193 194 // %r3 does bufferizes inplace because %B is not inplaceable. 195 // %r0 is an overlapping tensor.extract_slice that does not match, but does 196 // not alias with the buffer coming from %r3 so it can actually bufferize 197 // inplace. 198 // CHECK: tensor.extract_slice 199 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 200 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 201 202 // %r3 cannot bufferize inplace since %B is not inplaceable. 203 // CHECK: tensor.insert_slice 204 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]} 205 %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32> 206 207 // CHECK: return 208 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 209 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 210} 211 212// ----- 213 214// CHECK-LABEL: func @extract_slice_matching_insert_slice 215func.func @extract_slice_matching_insert_slice( 216 %A : tensor<?xf32> {bufferization.writable = true}, 217 %B : tensor<?xf32> {bufferization.writable = false}) 218 -> (tensor<?xf32>, tensor<?xf32>) 219{ 220 // %r1 bufferizes inplace because %A is inplaceable. 221 // %r0 is a tensor.extract_slice that matches, it can also be bufferized 222 // inplace. 223 // CHECK: tensor.extract_slice 224 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 225 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 226 227 // CHECK: tensor.insert_slice 228 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 229 %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 230 231 // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized 232 // inplace. 233 // CHECK: tensor.extract_slice 234 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 235 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 236 237 // tensor.insert_slice cannot bufferize inplace. 238 // This should have been captured by a canonicalization pattern and it would 239 // be unproductive to have special logic in bufferization to encode matching 240 // insert_slice(extract_slice(A), A). 241 // CHECK: tensor.insert_slice 242 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]} 243 %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 244 245 // CHECK: return 246 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 247 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 248} 249 250// ----- 251 252// CHECK-LABEL: @read_of_matching_insert_slice_source 253func.func @read_of_matching_insert_slice_source( 254 %A : tensor<?xf32> {bufferization.writable = true}, 255 %idx : index, 256 %idx2 : index) 257 -> (tensor<?xf32>, vector<5xf32>) 258{ 259 %cst = arith.constant 0.0 : f32 260 %cst2 = arith.constant 1.0 : f32 261 262 // CHECK: tensor.extract_slice 263 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 264 %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32> 265 266 // CHECK: linalg.fill 267 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 268 %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32> 269 270 // CHECK: tensor.insert_slice 271 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 272 %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32> 273 274 %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32> 275 276 // CHECK: return 277 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 278 return %2, %3 : tensor<?xf32>, vector<5xf32> 279} 280 281// ----- 282 283// CHECK-LABEL: @read_of_matching_insert_slice_source_interleaved 284func.func @read_of_matching_insert_slice_source_interleaved( 285 %A : tensor<?xf32> {bufferization.writable = true}, 286 %idx : index, 287 %idx2 : index, 288 %idx3 : index) 289 -> (tensor<?xf32>, vector<5xf32>) 290{ 291 %cst = arith.constant 0.0 : f32 292 %cst2 = arith.constant 1.0 : f32 293 294 // CHECK: tensor.extract_slice 295 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} 296 %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32> 297 298 // CHECK: linalg.fill 299 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 300 %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32> 301 302 // CHECK: tensor.insert_slice 303 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 304 %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32> 305 306 // CHECK: tensor.extract_slice 307 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 308 %4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor<?xf32> to tensor<?xf32> 309 310 // CHECK: linalg.fill 311 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 312 %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?xf32>) -> tensor<?xf32> 313 314 %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32> 315 316 // CHECK: tensor.insert_slice 317 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 318 %6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32> 319 320 // CHECK: return 321 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 322 return %6, %3 : tensor<?xf32>, vector<5xf32> 323} 324 325// ----- 326 327// CHECK-LABEL: func @extract_slice_linalg_readonly_use 328func.func @extract_slice_linalg_readonly_use( 329 %A : tensor<?x?xf32> {bufferization.writable = false}, 330 %B : tensor<4x4xf32> {bufferization.writable = false}, 331 %C : tensor<4x4xf32> {bufferization.writable = true}) 332 -> (tensor<4x4xf32>, tensor<4x4xf32>) 333{ 334 // tensor.extract_slice is only used as a read, no interference irrespective 335 // of user's inplace status. 336 // CHECK: tensor.extract_slice 337 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 338 %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 339 340 // matmul output operand is not inplaceable at the function boundary. 341 // CHECK: linalg.matmul 342 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} 343 %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) 344 outs(%B: tensor<4x4xf32>) 345 -> tensor<4x4xf32> 346 347 // matmul output operand is inplaceable at the function boundary. 348 // CHECK: linalg.matmul 349 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 350 %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) 351 outs(%C: tensor<4x4xf32>) 352 -> tensor<4x4xf32> 353 354 // CHECK: return 355 // CHECK-SAME: __equivalent_func_args__ = [-1, 2] 356 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 357} 358 359// ----- 360 361// CHECK-LABEL: func @extract_slice_to_linalg_write_use 362func.func @extract_slice_to_linalg_write_use( 363 %A : tensor<4x4xf32> {bufferization.writable = false}, 364 %B : tensor<?x?xf32> {bufferization.writable = false}, 365 %C : tensor<?x?xf32> {bufferization.writable = true}) 366 -> (tensor<4x4xf32>, tensor<4x4xf32>) 367{ 368 // Step 4. %sB forward propagates to a write in %D but it is not inplace. 369 // So this is only ever read and can bufferize inplace. 370 // CHECK: tensor.extract_slice 371 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 372 %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 373 374 // Step 3. %sB has a read interference in %E, it does not bufferize inplace. 375 // CHECK: linalg.matmul 376 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} 377 %D = linalg.matmul ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>) 378 outs(%sB: tensor<4x4xf32>) 379 -> tensor<4x4xf32> 380 381 // Step 2. %sC forward propagates to an inplace write in %E. 382 // %sC backward propagates to %C which is inplaceable. 383 // As a consequence this is bufferized inplace. 384 // CHECK: tensor.extract_slice 385 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 386 %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 387 388 // Step 1. %sC backprops to the tensor.extract_slice producer which is not 389 // considered an interference. This bufferizes inplace. 390 // CHECK: linalg.matmul 391 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 392 %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) 393 outs(%sC: tensor<4x4xf32>) 394 -> tensor<4x4xf32> 395 396 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 397} 398 399// ----- 400 401// CHECK-LABEL: func @insert_slice_double_extract_slice 402func.func @insert_slice_double_extract_slice( 403 %s1: index, 404 %s2: index, 405 %s3: index, 406 %s4: index, 407 %A: tensor<8x6xf32> {bufferization.writable = false}, 408 %B: tensor<6x6xf32> {bufferization.writable = false}, 409 %C: tensor<30x20xf32> {bufferization.writable = true}) 410 -> tensor<30x20xf32> 411{ 412 // CHECK: tensor.extract_slice 413 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none", "none", "none"]} 414 %15 = tensor.extract_slice %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<30x20xf32> to tensor<?x?xf32> 415 416 // CHECK: linalg.matmul 417 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 418 %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32> 419 420 // CHECK: tensor.extract_slice 421 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 422 %19 = tensor.extract_slice %18[0, 0] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 423 424 // CHECK: tensor.insert_slice 425 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none", "none", "none"]} 426 %20 = tensor.insert_slice %19 into %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<?x?xf32> into tensor<30x20xf32> 427 428 // CHECK: return 429 // CHECK-SAME: __equivalent_func_args__ = [6] 430 return %20 : tensor<30x20xf32> 431} 432 433//===----------------------------------------------------------------------===// 434// Transitive cases 435//===----------------------------------------------------------------------===// 436 437// ----- 438 439// CHECK-LABEL: func @extract_slice_to_linalg_write_use 440func.func @extract_slice_to_linalg_write_use( 441 %A : tensor<4x4xf32> {bufferization.writable = false}, 442 %B : tensor<?x?xf32> {bufferization.writable = false}, 443 %C : tensor<?x?xf32> {bufferization.writable = true}) 444 -> (tensor<4x4xf32>, tensor<4x4xf32>) 445{ 446 // Step 4. %sB forward propagates to an inplace write in %D. 447 // %sB backward propagates to %B which is not inplaceable. 448 // As a consequence this is bufferized out of place. 449 // CHECK: tensor.extract_slice 450 // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} 451 %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 452 453 // Step 3. %sB backprops to the tensor.extract_slice producer which is not 454 // considered an interference. This bufferizes inplace. 455 // CHECK: linalg.matmul 456 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 457 %D = linalg.matmul ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>) 458 outs(%sB: tensor<4x4xf32>) 459 -> tensor<4x4xf32> 460 461 // Step 2. %sC forward propagates to an inplace write in %E. 462 // %sC backward propagates to %C which is inplaceable. 463 // As a consequence this is bufferized inplace. 464 // CHECK: tensor.extract_slice 465 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 466 %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 467 468 // Step 1. %sC backprops to the tensor.extract_slice producer which is not 469 // considered an interference. This bufferizes inplace. 470 // CHECK: linalg.matmul 471 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 472 %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) 473 outs(%sC: tensor<4x4xf32>) 474 -> tensor<4x4xf32> 475 476 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 477} 478 479// ----- 480 481// CHECK-LABEL: func @nested_extract_slice_and_insert 482func.func @nested_extract_slice_and_insert( 483 %A : tensor<?x?xf32> {bufferization.writable = false}, 484 %B : tensor<?x?xf32> {bufferization.writable = true}, 485 %C : tensor<?x?xf32> {bufferization.writable = true}, 486 %idx : index, 487 %sz1 : index, 488 %sz2 : index) 489 -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) 490{ 491 %f0 = arith.constant 0.0 : f32 492 493 // 2-level matching tensor.extract_slice / tensor.insert_slice into non 494 // inplaceable %A. 495 // - %rA is not inplaceable because %A is not inplaceable at function boundary. 496 // - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable 497 // - this propagates to %FA and %ssA being inplaceable. 498 // - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not 499 // inplaceable and so %sA is not inplaceable. 500 // CHECK: tensor.extract_slice 501 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} 502 // CHECK-NEXT: tensor.extract_slice 503 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 504 // CHECK-NEXT: fill 505 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 506 // CHECK-NEXT: tensor.insert_slice 507 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 508 // CHECK-NEXT: tensor.insert_slice 509 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]} 510 %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 511 %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 512 %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> 513 %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32> 514 %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 515 516 // 3-level matching tensor.extract_slice / tensor.insert_slice into 517 // inplaceable %B. 518 // CHECK-NEXT: tensor.extract_slice 519 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 520 // CHECK-NEXT: tensor.extract_slice 521 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 522 // CHECK-NEXT: tensor.extract_slice 523 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 524 // CHECK-NEXT: fill 525 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 526 // CHECK-NEXT: tensor.insert_slice 527 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 528 // CHECK-NEXT: tensor.insert_slice 529 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} 530 // CHECK-NEXT: tensor.insert_slice 531 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 532 %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 533 %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32> 534 %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> 535 %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32> 536 %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> 537 %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32> 538 %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 539 540 // 2-level matching tensor.extract_slice / tensor.insert_slice into 541 // inplaceable %C with a twist. 542 // Throw a wrench in the system: %rsC production sizes do not match %ssC. 543 // CHECK-NEXT: tensor.extract_slice 544 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 545 // The tensor.insert_slice that would be candidate for matching does not actually 546 // match. That tensor.insert_slice can still be bufferized inplace nonetheless 547 // but this tensor.extract_slice, which bufferizes to an inplace write, cannot. 548 // CHECK-NEXT: tensor.extract_slice 549 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]} 550 // CHECK-NEXT: fill 551 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 552 // CHECK-NEXT: tensor.insert_slice 553 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} 554 // CHECK-NEXT: tensor.insert_slice 555 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 556 %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 557 %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32> 558 %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32> 559 %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor<?x4xf32> into tensor<?x?xf32> 560 %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 561 562 // CHECK: return 563 // CHECK-SAME: __equivalent_func_args__ = [-1, 1, 2] 564 return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32> 565} 566 567// ----- 568 569//===----------------------------------------------------------------------===// 570// Cross function boundary cases. 571//===----------------------------------------------------------------------===// 572 573func.func private @foo(tensor<64xf32>) 574 575// CHECK-LABEL: dependence_through_call 576func.func @dependence_through_call(%I : tensor<64xf32> {bufferization.writable = true}) { 577 %f1 = arith.constant 1.000000e+00 : f32 578 %f2 = arith.constant 2.000000e+00 : f32 579 580 // 2. %B already bufferizes inplace, %A would alias and have a different 581 // value. The calls to `foo` are determined to read conservatively, so %A 582 // cannot bufferize inplace. 583 // CHECK: fill 584 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 585 %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> 586 587 // 1. Bufferizes inplace: no alias to %A is yet possible. 588 // CHECK: fill 589 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 590 %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> 591 592 call @foo(%A) : (tensor<64xf32>) -> () 593 call @foo(%B) : (tensor<64xf32>) -> () 594 595 return 596} 597 598// ----- 599 600func.func private @foo(tensor<64xf32>) 601 602func.func private @bar(%A : tensor<64xf32>) { 603 call @foo(%A) : (tensor<64xf32>) -> () 604 return 605} 606 607func.func @read_dependence_through_scf_and_call( 608 %I : tensor<64xf32> {bufferization.writable = true}, 609 %I2 : tensor<64xf32> {bufferization.writable = true}) { 610 %c0 = arith.constant 0 : index 611 %c1 = arith.constant 1 : index 612 %c10 = arith.constant 10 : index 613 %f1 = arith.constant 1.000000e+00 : f32 614 %f2 = arith.constant 2.000000e+00 : f32 615 616 // 5. %B bufferizes inplace, %A would alias and have a different value. 617 // The calls to `foo` are determined to read conservatively, so %A cannot 618 // bufferize inplace. 619 // CHECK: fill 620 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 621 %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> 622 623 // 4. Bufferizes inplace: no alias to %A is yet possible. 624 // CHECK: fill 625 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 626 %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> 627 628 // 3. Does not read or write, bufferizes inplace. 629 // CHECK: scf.for 630 // CHECK-NEXT: scf.yield 631 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 632 // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "true"]} 633 %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B) 634 -> (tensor<64xf32>, tensor<64xf32>) 635 { 636 scf.yield %0, %1 : tensor<64xf32>, tensor<64xf32> 637 } 638 call @foo(%r#0) : (tensor<64xf32>) -> () 639 call @foo(%r#1) : (tensor<64xf32>) -> () 640 641 // 2. %B2 already bufferizes inplace, %A2 would alias and have a different 642 // value. The calls to `foo` are determined to read conservatively, so %A2 643 // cannot bufferize inplace. 644 // CHECK: fill 645 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 646 %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> 647 648 // 1. Bufferizes inplace: no alias to %A2 is yet possible. 649 // CHECK: fill 650 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 651 %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> 652 653 call @bar(%A2) : (tensor<64xf32>) -> () 654 call @bar(%B2) : (tensor<64xf32>) -> () 655 return 656} 657 658// ----- 659 660//===----------------------------------------------------------------------===// 661// Transitive cases through extract_slice. 662//===----------------------------------------------------------------------===// 663 664// CHECK-LABEL: func @write_into_constant_via_alias 665func.func @write_into_constant_via_alias(%v : vector<5xi32>, 666 %s1 : index, %s2 : index, 667 %s3 : index) -> tensor<?xi32> { 668 %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> 669 // CHECK: tensor.extract_slice 670 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} 671 %b = tensor.extract_slice %A[%s1][%s2][1] : tensor<4xi32> to tensor<?xi32> 672 // CHECK: vector.transfer_write 673 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 674 %r = vector.transfer_write %v, %b[%s3] : vector<5xi32>, tensor<?xi32> 675 return %r : tensor<?xi32> 676} 677 678// ----- 679 680func.func @matmul_on_tensors( 681 %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 682 %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 683 %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true}) 684 -> tensor<256x256xf32> 685{ 686 %c0 = arith.constant 0 : index 687 %cst_0 = arith.constant 0.000000e+00 : f32 688 %cst_1 = arith.constant 1.000000e+00 : f32 689 690 %7 = bufferization.alloc_tensor() : tensor<256x256xf32> 691 692 // CHECK: linalg.fill 693 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 694 // CHECK: linalg.fill 695 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 696 %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> 697 %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> 698 699 // CHECK: tensor.extract_slice 700 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 701 // CHECK: tensor.extract_slice 702 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 703 // CHECK: linalg.matmul 704 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 705 %sA = tensor.extract_slice %8[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32> 706 %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> 707 %r = linalg.matmul 708 ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) 709 outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> 710 711 // CHECK: return 712 // CHECK-SAME: __equivalent_func_args__ = [2] 713 return %r : tensor<256x256xf32> 714} 715 716// ----- 717 718func.func @matmul_on_tensors( 719 %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 720 %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 721 %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true}) 722 -> tensor<256x256xf32> 723{ 724 %c0 = arith.constant 0 : index 725 %cst_0 = arith.constant 0.000000e+00 : f32 726 %cst_1 = arith.constant 1.000000e+00 : f32 727 728 %7 = bufferization.alloc_tensor() : tensor<256x256xf32> 729 730 // CHECK: linalg.fill 731 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 732 // CHECK: vector.transfer_write 733 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 734 %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> 735 %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> 736 %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> 737 738 // CHECK: linalg.fill 739 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 740 // CHECK: vector.transfer_write 741 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 742 %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> 743 %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> 744 %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> 745 746 // CHECK: tensor.extract_slice 747 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 748 // CHECK: tensor.extract_slice 749 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 750 // CHECK: linalg.matmul 751 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 752 %sA = tensor.extract_slice %10[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32> 753 %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> 754 %r = linalg.matmul 755 ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) 756 outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> 757 758 // CHECK: return 759 // CHECK-SAME: __equivalent_func_args__ = [2] 760 return %r : tensor<256x256xf32> 761} 762 763// ----- 764 765//===----------------------------------------------------------------------===// 766// Chain of tensor.insert_slice is better traversed in reverse order without 767// prioritizing the tensor.insert_slice ops. 768//===----------------------------------------------------------------------===// 769 770// CHECK-LABEL: func @insert_slice_chain( 771func.func @insert_slice_chain( 772 %v1: vector<32x90xf32>, 773 %v2: vector<30x90xf32>, 774 %arg0: tensor<62x126xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 775// CHECK-SAME: bufferization.access = "none" 776 %arg1: tensor<126x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 777// CHECK-SAME: bufferization.access = "none" 778 %arg2: tensor<62x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true}) 779// CHECK-SAME: bufferization.access = "write" 780 -> tensor<62x90xf32> attributes {passthrough = [["prefer-vector-width", "512"]], target_cpu = "skylake-avx512"} 781{ 782 %c0 = arith.constant 0 : index 783 %cst = arith.constant 0.000000e+00 : f32 784 785 // CHECK: linalg.fill 786 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] 787 %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32> 788 789 // CHECK: tensor.extract_slice 790 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 791 %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 792 // CHECK: vector.transfer_write 793 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 794 %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> 795 // CHECK: tensor.insert_slice 796 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 797 %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 798 799 // CHECK: tensor.extract_slice 800 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 801 %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> 802 // CHECK: vector.transfer_write 803 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 804 %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> 805 // CHECK: tensor.insert_slice 806 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 807 %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> 808 809 // CHECK: return 810 // CHECK-SAME: __equivalent_func_args__ = [4] 811 return %15 : tensor<62x90xf32> 812} 813 814// ----- 815 816//===----------------------------------------------------------------------===// 817// Insert point issue cases. 818//===----------------------------------------------------------------------===// 819 820// Only test IR validity wrt dominance. 821// CHECK-LABEL: func @ip 822func.func @ip(%t: tensor<10x20xf32> {bufferization.writable = true}, 823 %x: index, %y: index, %v: vector<5x6xf32>) 824 -> tensor<10x20xf32> 825{ 826 %c0 = arith.constant 0 : index 827 %c256 = arith.constant 256 : index 828 %c257 = arith.constant 257 : index 829 %r = scf.for %arg0 = %c0 to %c257 step %c256 iter_args(%arg1 = %t) -> (tensor<10x20xf32>) { 830 %t1 = tensor.extract_slice %arg1[%x, 0] [5, %y] [1, 1] : tensor<10x20xf32> to tensor<5x?xf32> 831 %t11 = tensor.extract_slice %t1[0, 0] [5, %y] [1, 1] : tensor<5x?xf32> to tensor<5x?xf32> 832 %t2 = vector.transfer_write %v, %t11[%c0, %c0] : vector<5x6xf32>, tensor<5x?xf32> 833 %t3 = tensor.insert_slice %t2 into %arg1[%x, 0] [5, %y] [1, 1] : tensor<5x?xf32> into tensor<10x20xf32> 834 scf.yield %t3 : tensor<10x20xf32> 835 } 836 837 // CHECK: return 838 // CHECK-SAME: __equivalent_func_args__ = [0] 839 return %r : tensor<10x20xf32> 840} 841 842// ----- 843 844#accesses = [ 845 affine_map<(i) -> (i)>, 846 affine_map<(i) -> (i)>, 847 affine_map<(i) -> (i)> 848] 849#trait = { 850 indexing_maps = #accesses, 851 iterator_types = ["parallel"] 852} 853 854// CHECK-LABEL: func @linalg_op_same_out_tensors( 855func.func @linalg_op_same_out_tensors( 856 %t1: tensor<?xf32> {bufferization.writable = true}, 857// CHECK-SAME: bufferization.access = "read" 858 %t2: tensor<?xf32> {bufferization.writable = true}) 859// CHECK-SAME: bufferization.access = "write" 860 -> (tensor<?xf32>, tensor<?xf32>){ 861 862 // CHECK: linalg.generic 863 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"] 864 %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>) 865 outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) { 866 ^bb(%0: f32, %1: f32, %2 : f32) : 867 linalg.yield %0, %0 : f32, f32 868 } -> (tensor<?xf32>, tensor<?xf32>) 869 870 // CHECK: return 871 // CHECK-SAME: __equivalent_func_args__ = [1, -1] 872 return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32> 873} 874 875// ----- 876 877#accesses = [ 878 affine_map<(i) -> (i)>, 879 affine_map<(i) -> (i)>, 880 affine_map<(i) -> (i)>, 881 affine_map<(i) -> (i)> 882] 883#trait = { 884 indexing_maps = #accesses, 885 iterator_types = ["parallel"] 886} 887 888// CHECK-LABEL: func @linalg_op_same_out_tensors_2( 889func.func @linalg_op_same_out_tensors_2( 890 %t1: tensor<?xf32> {bufferization.writable = true}, 891// CHECK-SAME: bufferization.access = "read" 892 %t2: tensor<?xf32> {bufferization.writable = true}) 893// CHECK-SAME: bufferization.access = "write" 894 -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){ 895 896 // CHECK: linalg.generic 897 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false", "false"] 898 %o:3 = linalg.generic #trait 899 ins(%t1 : tensor<?xf32>) 900 outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) { 901 ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) : 902 linalg.yield %0, %0, %0 : f32, f32, f32 903 } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) 904 905 // CHECK: return 906 // CHECK-SAME: __equivalent_func_args__ = [1, -1, -1] 907 return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32> 908} 909 910// ----- 911 912// CHECK-LABEL: func @double_insert_slice_into_alias 913func.func @double_insert_slice_into_alias( 914 %v1: vector<32x90xf32>, 915 %v2: vector<30x90xf32>, 916 %arg2: tensor<62x90xf32> {bufferization.writable = true}, 917 %s1: index, %s2: index, %s3: index, %s4: index) 918 -> (tensor<62x90xf32>, tensor<?x?xf32>) 919{ 920 %c0 = arith.constant 0 : index 921 922 // Cannot bufferize inplace this extract_slice because both operand and result 923 // are modified and returned separately. 924 // CHECK: tensor.extract_slice 925 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none", "none", "none"] 926 %e = tensor.extract_slice %arg2[%s1, %s2][%s3, %s4][1, 1] : tensor<62x90xf32> to tensor<?x?xf32> 927 928 // CHECK: tensor.extract_slice 929 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 930 %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 931 // CHECK: vector.transfer_write 932 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 933 %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> 934 // CHECK: tensor.insert_slice 935 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 936 %8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 937 938 // CHECK: tensor.extract_slice 939 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 940 %10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor<?x?xf32> to tensor<30x90xf32> 941 // CHECK: vector.transfer_write 942 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 943 %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> 944 // CHECK: tensor.insert_slice 945 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 946 %15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<?x?xf32> 947 948 // CHECK: return 949 // CHECK-SAME: __equivalent_func_args__ = [2, -1] 950 return %8, %15 : tensor<62x90xf32>, tensor<?x?xf32> 951} 952 953// ----- 954 955// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_1 956func.func @interleaved_extract_insert_slice_chain_1( 957 %arg2: tensor<62x90xf32> {bufferization.writable = true}) 958 -> (tensor<62x90xf32>) 959{ 960 // CHECK: tensor.extract_slice 961 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 962 %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 963 964 // TODO: This should bufferize inplace once we have a proper range analysis. 965 // CHECK: tensor.extract_slice 966 // CHECK-SAME: {__inplace_operands_attr__ = ["false"] 967 %10 = tensor.extract_slice %arg2[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> 968 969 970 // CHECK: tensor.insert_slice 971 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 972 %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 973 974 975 // CHECK: tensor.insert_slice 976 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 977 %15 = tensor.insert_slice %10 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> 978 979 // CHECK: return 980 // CHECK-SAME: __equivalent_func_args__ = [0] 981 return %15 : tensor<62x90xf32> 982} 983 984// ----- 985 986// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_2 987func.func @interleaved_extract_insert_slice_chain_2( 988 %arg2: tensor<62x90xf32> {bufferization.writable = true}) 989 -> (tensor<62x90xf32>) 990{ 991 // CHECK: tensor.extract_slice 992 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 993 %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 994 995 // The slices are overlapping, so this can never bufferize inplace. 996 // CHECK: tensor.extract_slice 997 // CHECK-SAME: {__inplace_operands_attr__ = ["false"] 998 %10 = tensor.extract_slice %arg2[31, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> 999 1000 1001 // CHECK: tensor.insert_slice 1002 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 1003 %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 1004 1005 1006 // CHECK: tensor.insert_slice 1007 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 1008 %15 = tensor.insert_slice %10 into %8[31, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> 1009 1010 // CHECK: return 1011 // CHECK-SAME: __equivalent_func_args__ = [0] 1012 return %15 : tensor<62x90xf32> 1013} 1014 1015// ----- 1016 1017// CHECK-LABEL: func @extract_once_insert_twice 1018func.func @extract_once_insert_twice( 1019 %arg2: tensor<62x90xf32> {bufferization.writable = true}) 1020 -> (tensor<62x90xf32>) 1021{ 1022 // CHECK: tensor.extract_slice 1023 // CHECK-SAME: {__inplace_operands_attr__ = ["false"] 1024 %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 1025 1026 // CHECK: tensor.insert_slice 1027 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 1028 %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 1029 1030 // CHECK: tensor.insert_slice 1031 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 1032 %15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 1033 1034 // CHECK: return 1035 // CHECK-SAME: __equivalent_func_args__ = [0] 1036 return %15 : tensor<62x90xf32> 1037} 1038 1039// ----- 1040 1041// CHECK-LABEL: func @some_use 1042func.func @some_use(%A : tensor<?xf32> {bufferization.writable = true}, 1043 %v : vector<5xf32>) -> (tensor<?xf32>) { 1044 %idx = arith.constant 0 : index 1045 // CHECK: vector.transfer_write 1046 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] 1047 %0 = vector.transfer_write %v, %A[%idx] : vector<5xf32>, tensor<?xf32> 1048 return %0 : tensor<?xf32> 1049} 1050 1051 1052// CHECK-LABEL: func @main_func 1053func.func @main_func(%A : tensor<?xf32> {bufferization.writable = true}, 1054 %v : vector<5xf32>) -> (tensor<?xf32>) { 1055 // CHECK: call 1056 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] 1057 %0 = call @some_use(%A, %v) : (tensor<?xf32>, vector<5xf32>) -> (tensor<?xf32>) 1058 return %0 : tensor<?xf32> 1059} 1060 1061// ----- 1062 1063// CHECK-LABEL: func @to_tensor_op_not_writable 1064func.func @to_tensor_op_not_writable(%m: memref<?xf32>, %v: vector<5xf32>, 1065 %idx1: index, %idx2: index) 1066 -> vector<10xf32> { 1067 %0 = bufferization.to_tensor %m restrict : memref<?xf32> to tensor<?xf32> 1068 1069 // Write to the tensor. Cannot be inplace due to tensor_load. 1070 // CHECK: vector.transfer_write 1071 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] 1072 %w = vector.transfer_write %v, %0[%idx1] : vector<5xf32>, tensor<?xf32> 1073 1074 // Read from the tensor and return result. 1075 %cst = arith.constant 0.0 : f32 1076 %r = vector.transfer_read %w[%idx2], %cst : tensor<?xf32>, vector<10xf32> 1077 return %r : vector<10xf32> 1078} 1079 1080// ----- 1081 1082// CHECK-LABEL: func @inner_func 1083func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { 1084 // CHECK: return 1085 // CHECK-SAME: __equivalent_func_args__ = [0] 1086 return %t : tensor<?xf32> 1087} 1088 1089func.func @equivalent_func_arg(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> { 1090 // This test does not check IR. It just asserts there is no failure due to 1091 // non-equivalent scf.for yield values. 1092 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 1093 %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32> 1094 scf.yield %3 : tensor<?xf32> 1095 } 1096 return %1: tensor<?xf32> 1097} 1098 1099// ----- 1100 1101// CHECK-LABEL: func @inner_func_2 1102func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> { 1103 %f = arith.constant 1.0 : f32 1104 %c0 = arith.constant 0 : index 1105 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 1106 // CHECK: return 1107 // CHECK-SAME: __equivalent_func_args__ = [0] 1108 return %0 : tensor<?xf32> 1109} 1110 1111func.func @equivalent_func_arg_2(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> { 1112 // This test does not check IR. It just asserts there is no failure due to 1113 // non-equivalent scf.for yield values. 1114 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 1115 %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32> 1116 scf.yield %3 : tensor<?xf32> 1117 } 1118 return %1: tensor<?xf32> 1119} 1120 1121// ----- 1122 1123// CHECK-LABEL: func @write_after_select_read_one 1124// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32> 1125func.func @write_after_select_read_one( 1126 %t1 : tensor<?xf32> {bufferization.writable = true}, 1127 %t2 : tensor<?xf32> {bufferization.writable = true}, 1128 %c : i1) 1129 -> (f32, tensor<?xf32>) 1130{ 1131 %cst = arith.constant 0.0 : f32 1132 %idx = arith.constant 0 : index 1133 1134 // CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]] 1135 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "true"]} 1136 %s = arith.select %c, %t1, %t2 : tensor<?xf32> 1137 // CHECK: tensor.insert 1138 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 1139 %w = tensor.insert %cst into %s[%idx] : tensor<?xf32> 1140 // CHECK: tensor.extract 1141 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 1142 %f = tensor.extract %t1[%idx] : tensor<?xf32> 1143 1144 return %f, %w : f32, tensor<?xf32> 1145} 1146 1147// ----- 1148 1149// CHECK-LABEL: func @write_after_select_read_both 1150// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32> 1151func.func @write_after_select_read_both( 1152 %t1 : tensor<?xf32> {bufferization.writable = true}, 1153 %t2 : tensor<?xf32> {bufferization.writable = true}, 1154 %c : i1) 1155 -> (f32, f32, tensor<?xf32>) 1156{ 1157 %cst = arith.constant 0.0 : f32 1158 %idx = arith.constant 0 : index 1159 1160 // CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]] 1161 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "false"]} 1162 %s = arith.select %c, %t1, %t2 : tensor<?xf32> 1163 // CHECK: tensor.insert 1164 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 1165 %w = tensor.insert %cst into %s[%idx] : tensor<?xf32> 1166 // CHECK: tensor.extract 1167 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 1168 %f = tensor.extract %t1[%idx] : tensor<?xf32> 1169 // CHECK: tensor.extract 1170 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 1171 %f2 = tensor.extract %t2[%idx] : tensor<?xf32> 1172 1173 return %f, %f2, %w : f32, f32, tensor<?xf32> 1174} 1175 1176// ----- 1177 1178// CHECK-LABEL: func @write_after_select_no_conflict 1179// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32> 1180func.func @write_after_select_no_conflict( 1181 %t1 : tensor<?xf32> {bufferization.writable = true}, 1182 %t2 : tensor<?xf32> {bufferization.writable = true}, 1183 %c : i1) 1184 -> (f32, tensor<?xf32>) 1185{ 1186 %cst = arith.constant 0.0 : f32 1187 %idx = arith.constant 0 : index 1188 1189 // CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]] 1190 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "true"]} 1191 %s = arith.select %c, %t1, %t2 : tensor<?xf32> 1192 // CHECK: tensor.insert 1193 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 1194 %w = tensor.insert %cst into %s[%idx] : tensor<?xf32> 1195 // CHECK: tensor.extract 1196 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 1197 %f = tensor.extract %w[%idx] : tensor<?xf32> 1198 1199 return %f, %w : f32, tensor<?xf32> 1200} 1201 1202// ----- 1203 1204// CHECK-LABEL: func @write_to_same_tensor_in_loop_out_of_place( 1205func.func @write_to_same_tensor_in_loop_out_of_place( 1206 %A : tensor<?xf32> {bufferization.writable = true}, 1207 %B : tensor<?xf32> {bufferization.writable = true}, 1208 %lb : index, %ub : index, %step : index, %sz: index) 1209 -> (tensor<?xf32>) 1210{ 1211 // CHECK: scf.for {{.*}} { 1212 %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 1213 %i2 = arith.index_cast %i : index to i32 1214 %i3 = arith.sitofp %i2 : i32 to f32 1215 // The tensor.insert is out-of-place because the %B is written multiple 1216 // times inside a loop. 1217 // CHECK: tensor.insert 1218 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} 1219 %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32> 1220 // CHECK: tensor.insert_slice 1221 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 1222 %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32> 1223 scf.yield %A2 : tensor<?xf32> 1224 } 1225 // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} 1226 1227 return %r0 : tensor<?xf32> 1228} 1229 1230// ----- 1231 1232// CHECK-LABEL: func @write_to_same_alloc_tensor_in_place( 1233func.func @write_to_same_alloc_tensor_in_place( 1234 %A : tensor<?xf32> {bufferization.writable = true}, 1235 %lb : index, %ub : index, %step : index, %sz: index, %sz2: index) 1236 -> (tensor<?xf32>) 1237{ 1238 %B = bufferization.alloc_tensor(%sz2) : tensor<?xf32> 1239 1240 // CHECK: scf.for {{.*}} { 1241 %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 1242 %i2 = arith.index_cast %i : index to i32 1243 %i3 = arith.sitofp %i2 : i32 to f32 1244 // %B is written multiple times inside a loop, but it is an alloc_tensor. 1245 // CHECK: tensor.insert 1246 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 1247 %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32> 1248 // CHECK: tensor.insert_slice 1249 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 1250 %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32> 1251 scf.yield %A2 : tensor<?xf32> 1252 } 1253 // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} 1254 1255 return %r0 : tensor<?xf32> 1256} 1257 1258// ----- 1259 1260// CHECK-LABEL: func @write_to_same_alloc_tensor_out_of_place( 1261func.func @write_to_same_alloc_tensor_out_of_place( 1262 %A : tensor<?xf32> {bufferization.writable = true}, 1263 %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32) 1264 -> (tensor<?xf32>) 1265{ 1266 %B = bufferization.alloc_tensor(%sz2) : tensor<?xf32> 1267 %C = tensor.insert %f into %B[%lb] : tensor<?xf32> 1268 1269 // CHECK: scf.for {{.*}} { 1270 %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 1271 %i2 = arith.index_cast %i : index to i32 1272 %i3 = arith.sitofp %i2 : i32 to f32 1273 // %C is written multiple times inside a loop. Even though %C aliases with 1274 // an alloc_tensor, out-of-bounds bufferization is necessary because there 1275 // is another alias (%C) outside of the loop. 1276 // CHECK: tensor.insert 1277 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} 1278 %B2 = tensor.insert %i3 into %C[%i] : tensor<?xf32> 1279 // CHECK: tensor.insert_slice 1280 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 1281 %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32> 1282 scf.yield %A2 : tensor<?xf32> 1283 } 1284 // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} 1285 1286 return %r0 : tensor<?xf32> 1287} 1288 1289// ----- 1290 1291// CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"}) 1292func.func private @ext_func(%t: tensor<?xf32>) 1293 1294// CHECK: func.func @private_func_read_write(%{{.*}}: tensor<5xf32> {bufferization.access = "read"}) 1295func.func @private_func_read_write(%t: tensor<5xf32>) -> f32 { 1296 %c0 = arith.constant 0 : index 1297 // Bufferizes out-of-place because `ext_func` may modify the buffer. 1298 // CHECK: tensor.cast {{.*}} {__inplace_operands_attr__ = ["false"]} 1299 %0 = tensor.cast %t : tensor<5xf32> to tensor<?xf32> 1300 func.call @ext_func(%0) : (tensor<?xf32>) -> () 1301 %1 = tensor.extract %t[%c0] : tensor<5xf32> 1302 return %1 : f32 1303} 1304 1305// ----- 1306 1307// CHECK-LABEL: func.func private @print_buffer(tensor<*xf32> {bufferization.access = "read"}) 1308func.func private @print_buffer(%t: tensor<*xf32> {bufferization.access = "read"}) 1309 1310// CHECK: func.func @private_func_read(%{{.*}}: tensor<5xf32> {bufferization.access = "read"}) 1311func.func @private_func_read(%t: tensor<5xf32>) -> f32 { 1312 %c0 = arith.constant 0 : index 1313 // Bufferizes in-place because `print_buffer` is read-only. 1314 // CHECK: tensor.cast {{.*}} {__inplace_operands_attr__ = ["true"]} 1315 %0 = tensor.cast %t : tensor<5xf32> to tensor<*xf32> 1316 // CHECK: call @print_buffer(%cast) {__inplace_operands_attr__ = ["true"]} 1317 func.call @print_buffer(%0) : (tensor<*xf32>) -> () 1318 %1 = tensor.extract %t[%c0] : tensor<5xf32> 1319 return %1 : f32 1320} 1321 1322// ----- 1323 1324// CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"}, tensor<?xf32> {bufferization.access = "read-write"}) 1325func.func private @ext_func(%t1: tensor<?xf32>, %t2: tensor<?xf32>) 1326 1327// CHECK: func.func @private_func_two_params_writing(%{{.*}}: tensor<?xf32> {bufferization.access = "read"}) 1328func.func @private_func_two_params_writing(%t: tensor<?xf32>) { 1329 // Both operands bufferize out-of-place because both bufferize to a memory 1330 // write. 1331 // CHECK: call @ext_func(%{{.*}}, %{{.*}}) {__inplace_operands_attr__ = ["false", "false"]} 1332 func.call @ext_func(%t, %t) : (tensor<?xf32>, tensor<?xf32>) -> () 1333 return 1334} 1335 1336// ----- 1337 1338// CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"}) -> (tensor<5xf32>, tensor<6xf32>) 1339func.func private @ext_func(%t: tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>) 1340 1341// CHECK: func.func @private_func_aliasing(%{{.*}}: tensor<?xf32> {bufferization.access = "read"}) 1342func.func @private_func_aliasing(%t: tensor<?xf32>) -> f32 { 1343 %c0 = arith.constant 0 : index 1344 // Bufferizes out-of-place because either one of the two reuslts may alias 1345 // with the argument and one of the results is read afterwards. 1346 // CHECK: call @ext_func(%{{.*}}) {__inplace_operands_attr__ = ["false"]} : (tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>) 1347 %0, %1 = func.call @ext_func(%t) : (tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>) 1348 %2 = tensor.extract %1[%c0] : tensor<6xf32> 1349 return %2 : f32 1350} 1351 1352// ----- 1353 1354// CHECK-LABEL: func @recursive_function 1355func.func @recursive_function(%a: tensor<?xf32>, %b: tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) { 1356 // The analysis does not support recursive function calls and is conservative 1357 // around them. 1358 // CHECK: call @recursive_function 1359 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "false"]} 1360 %0:2 = call @recursive_function(%a, %b) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) 1361 return %0#0, %0#1 : tensor<?xf32>, tensor<?xf32> 1362} 1363 1364// ----- 1365 1366// CHECK-ALIAS-SETS-LABEL: func @multiple_returns( 1367func.func @multiple_returns(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> { 1368 cf.cond_br %c, ^bb1, ^bb2 1369^bb1: 1370 return %t0 : tensor<5xf32> 1371^bb2: 1372 return %t1 : tensor<5xf32> 1373} 1374 1375// CHECK-ALIAS-SETS: func @caller( 1376// CHECK-ALIAS-SETS-SAME: %{{.*}}: i1, %[[t0:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t1:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t2:.*]]: tensor<5xf32> {bufferization.access = "none"}) 1377func.func @caller(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) { 1378 // Check that alias sets are computed correctly. 1379 // CHECK-ALIAS-SETS: %[[result:.*]] = call @multiple_returns 1380 // CHECK-ALIAS-SETS-SAME: {__inplace_operands_attr__ = ["none", "true", "true", "true"], 1381 // CHECK-ALIAS-SETS-SAME: __opresult_alias_set_attr__ = [{{\[}}"%[[result]]", "%[[t0]]", "%[[t1]]"]]} 1382 call @multiple_returns(%c, %t0, %t1, %t2) : (i1, tensor<5xf32>, tensor<5xf32>, tensor<5xf32>) -> (tensor<5xf32>) 1383 return 1384} 1385 1386// ----- 1387 1388// CHECK-ALIAS-SETS-LABEL: func @multiple_equivalent_returns( 1389func.func @multiple_equivalent_returns(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> { 1390 cf.cond_br %c, ^bb1, ^bb2 1391^bb1: 1392 return %t0 : tensor<5xf32> 1393^bb2: 1394 return %t0 : tensor<5xf32> 1395} 1396 1397// CHECK-ALIAS-SETS: func @caller( 1398// CHECK-ALIAS-SETS-SAME: %{{.*}}: i1, %[[t0:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t1:.*]]: tensor<5xf32> {bufferization.access = "none"}, %[[t2:.*]]: tensor<5xf32> {bufferization.access = "none"}) 1399func.func @caller(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> { 1400 // Check that equivalence sets are computed correctly. 1401 // CHECK-ALIAS-SETS: %[[result:.*]] = call @multiple_equivalent_returns 1402 // CHECK-ALIAS-SETS-SAME: {__inplace_operands_attr__ = ["none", "true", "true", "true"], 1403 // CHECK-ALIAS-SETS-SAME: __opresult_alias_set_attr__ = [{{\[}}"%[[result]]", "%[[t0]]"]]} 1404 %r = call @multiple_equivalent_returns(%c, %t0, %t1, %t2) : (i1, tensor<5xf32>, tensor<5xf32>, tensor<5xf32>) -> (tensor<5xf32>) 1405 // CHECK-ALIAS-SETS-SAME: {__equivalent_func_args__ = [1], __inplace_operands_attr__ = ["true"]} %[[result]] : tensor<5xf32> 1406 return %r : tensor<5xf32> 1407} 1408 1409