one-shot-module-bufferize-analysis.mlir - OpenGrok cross reference for /llvm-project/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir

Lines Matching full:tensor
23 func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = false},
25                              %B : tensor<?xf32> {bufferization.writable = true})
27   -> (tensor<4xf32>, tensor<8xf32>)
29   // tensor.extract_slice is not used in a write, it is not compelled to
33   //     CHECK: tensor.extract_slice
35   %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
37   //     CHECK: tensor.extract_slice
39   %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32>
41   return %r0, %r1: tensor<4xf32>, tensor<8xf32>
47 func.func @insert_slice_fun(%A : tensor<?xf32> {bufferization.writable = false},
49                             %B : tensor<?xf32> {bufferization.writable = true},
51                             %C : tensor<4xf32> {bufferization.writable = false})
53   -> (tensor<?xf32>, tensor<?xf32>)
56   //      CHECK: tensor.insert_slice
58   %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
61   //      CHECK: tensor.insert_slice
63   %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
67   return %r0, %r1: tensor<?xf32>, tensor<?xf32>
73 func.func @conflict_on_B(%A : tensor<4x4xf32> {bufferization.writable = true},
75                          %B : tensor<4x4xf32> {bufferization.writable = true})
77   -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>)
82   %C = linalg.matmul  ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>)
83                      outs(%B: tensor<4x4xf32>)
84     -> tensor<4x4xf32>
89   %D = linalg.matmul  ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>)
90                      outs(%B: tensor<4x4xf32>)
91     -> tensor<4x4xf32>
96   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
97                      outs(%B: tensor<4x4xf32>)
98     -> tensor<4x4xf32>
102   return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>
113     %A : tensor<?xf32> {bufferization.writable = true},
115     %B : tensor<?xf32> {bufferization.writable = false})
117   -> (tensor<2xf32>, tensor<2xf32>)
119   // tensor.extract_slice is not used in a write, it is not compelled to
124   %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
127   %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>
130   %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
133   %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>
135   return %r1, %r3: tensor<2xf32>, tensor<2xf32>
142     %A : tensor<?xf32> {bufferization.writable = true},
144     %A2 : tensor<4xf32> {bufferization.writable = true},
146     %A3 : tensor<2xf32> {bufferization.writable = true},
148     %B : tensor<?xf32> {bufferization.writable = false},
150     %B2 : tensor<4xf32> {bufferization.writable = false},
152     %B3 : tensor<2xf32> {bufferization.writable = false})
154   -> (tensor<?xf32>, tensor<?xf32>)
157   %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>
160   %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
163   %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>
166   %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
170   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
177     %A : tensor<?xf32> {bufferization.writable = true},
178     %B : tensor<?xf32> {bufferization.writable = false},
180   -> (tensor<?xf32>, tensor<?xf32>)
183   // %r0 is an overlapping tensor.extract_slice that does not match, it must be
185   //      CHECK: tensor.extract_slice
187   %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
190   //      CHECK: tensor.insert_slice
192   %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
195   // %r0 is an overlapping tensor.extract_slice that does not match, but does
198   //      CHECK: tensor.extract_slice
200   %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
203   //      CHECK: tensor.insert_slice
205   %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
209   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
216     %A : tensor<?xf32> {bufferization.writable = true},
217     %B : tensor<?xf32> {bufferization.writable = false})
218   -> (tensor<?xf32>, tensor<?xf32>)
221   // %r0 is a tensor.extract_slice that matches, it can also be bufferized
223   //      CHECK: tensor.extract_slice
225   %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
227   //      CHECK: tensor.insert_slice
229   %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
231   // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized
233   //      CHECK: tensor.extract_slice
235   %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
237   // tensor.insert_slice cannot bufferize inplace.
241   //      CHECK: tensor.insert_slice
243   %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
247   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
254     %A : tensor<?xf32> {bufferization.writable = true},
257   -> (tensor<?xf32>, vector<5xf32>)
262   //      CHECK: tensor.extract_slice
264   %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
268   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
270   //      CHECK: tensor.insert_slice
272   %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
274   %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
278   return %2, %3 : tensor<?xf32>, vector<5xf32>
285     %A : tensor<?xf32> {bufferization.writable = true},
289   -> (tensor<?xf32>, vector<5xf32>)
294   //      CHECK: tensor.extract_slice
296   %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
300   %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
302   //      CHECK: tensor.insert_slice
304   %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
306   //      CHECK: tensor.extract_slice
308   %4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor<?xf32> to tensor<?xf32>
312   %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?xf32>) -> tensor<?xf32>
314   %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
316   //      CHECK: tensor.insert_slice
318   %6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32>
322   return %6, %3 : tensor<?xf32>, vector<5xf32>
329     %A : tensor<?x?xf32> {bufferization.writable = false},
330     %B : tensor<4x4xf32> {bufferization.writable = false},
331     %C : tensor<4x4xf32> {bufferization.writable = true})
332   ->  (tensor<4x4xf32>, tensor<4x4xf32>)
334   // tensor.extract_slice is only used as a read, no interference irrespective
336   //     CHECK: tensor.extract_slice
338   %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
343   %D = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
344                      outs(%B: tensor<4x4xf32>)
345     -> tensor<4x4xf32>
350   %E = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
351                      outs(%C: tensor<4x4xf32>)
352     -> tensor<4x4xf32>
356   return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
363     %A : tensor<4x4xf32> {bufferization.writable = false},
364     %B : tensor<?x?xf32> {bufferization.writable = false},
365     %C : tensor<?x?xf32> {bufferization.writable = true})
366   ->  (tensor<4x4xf32>, tensor<4x4xf32>)
370   //     CHECK: tensor.extract_slice
372   %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
377   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
378                      outs(%sB: tensor<4x4xf32>)
379     -> tensor<4x4xf32>
384   //     CHECK: tensor.extract_slice
386   %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
388   // Step 1. %sC backprops to the tensor.extract_slice producer which is not
392   %E = linalg.matmul  ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
393                      outs(%sC: tensor<4x4xf32>)
394     -> tensor<4x4xf32>
396   return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
407     %A: tensor<8x6xf32> {bufferization.writable = false},
408     %B: tensor<6x6xf32> {bufferization.writable = false},
409     %C: tensor<30x20xf32> {bufferization.writable = true})
410   -> tensor<30x20xf32>
412   //      CHECK: tensor.extract_slice
414   %15 = tensor.extract_slice %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<30x20xf32> to tensor<?x?xf32>
418   %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
420   //      CHECK: tensor.extract_slice
422   %19 = tensor.extract_slice %18[0, 0] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
424   //      CHECK: tensor.insert_slice
426   %20 = tensor.insert_slice %19 into %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<?x?xf32> into tensor<30x20xf32>
430   return %20 : tensor<30x20xf32>
441     %A : tensor<4x4xf32> {bufferization.writable = false},
442     %B : tensor<?x?xf32> {bufferization.writable = false},
443     %C : tensor<?x?xf32> {bufferization.writable = true})
444   ->  (tensor<4x4xf32>, tensor<4x4xf32>)
449   //     CHECK: tensor.extract_slice
451   %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
453   // Step 3. %sB backprops to the tensor.extract_slice producer which is not
457   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
458                      outs(%sB: tensor<4x4xf32>)
459     -> tensor<4x4xf32>
464   //     CHECK: tensor.extract_slice
466   %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
468   // Step 1. %sC backprops to the tensor.extract_slice producer which is not
472   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
473                      outs(%sC: tensor<4x4xf32>)
474     -> tensor<4x4xf32>
476   return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
483     %A : tensor<?x?xf32> {bufferization.writable = false},
484     %B : tensor<?x?xf32> {bufferization.writable = true},
485     %C : tensor<?x?xf32> {bufferization.writable = true},
489   ->  (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
493   // 2-level matching tensor.extract_slice / tensor.insert_slice into non
500   //     CHECK: tensor.extract_slice
502   // CHECK-NEXT: tensor.extract_slice
506   // CHECK-NEXT: tensor.insert_slice
508   // CHECK-NEXT: tensor.insert_slice
510   %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
511   %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
512   %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
513   %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
514   %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
516   // 3-level matching tensor.extract_slice / tensor.insert_slice into
518   // CHECK-NEXT: tensor.extract_slice
520   // CHECK-NEXT: tensor.extract_slice
522   // CHECK-NEXT: tensor.extract_slice
526   // CHECK-NEXT: tensor.insert_slice
528   // CHECK-NEXT: tensor.insert_slice
530   // CHECK-NEXT: tensor.insert_slice
532   %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
533   %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
534   %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
535   %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32>
536   %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
537   %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
538   %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
540   // 2-level matching tensor.extract_slice / tensor.insert_slice into
543   // CHECK-NEXT: tensor.extract_slice
545   // The tensor.insert_slice that would be candidate for matching does not actually
546   // match. That tensor.insert_slice can still be bufferized inplace nonetheless
547   // but this tensor.extract_slice, which bufferizes to an inplace write, cannot.
548   // CHECK-NEXT: tensor.extract_slice
552   // CHECK-NEXT: tensor.insert_slice
554   // CHECK-NEXT: tensor.insert_slice
556   %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
557   %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
558   %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32>
559   %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor<?x4xf32> into tensor<?x?xf32>
560   %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
564   return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
573 func.func private @foo(tensor<64xf32>)
576 func.func @dependence_through_call(%I : tensor<64xf32> {bufferization.writable = true}) {
585   %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
590   %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
592   call @foo(%A) : (tensor<64xf32>) -> ()
593   call @foo(%B) : (tensor<64xf32>) -> ()
600 func.func private @foo(tensor<64xf32>)
602 func.func private @bar(%A : tensor<64xf32>) {
603   call @foo(%A) : (tensor<64xf32>) -> ()
608     %I : tensor<64xf32> {bufferization.writable = true},
609     %I2 : tensor<64xf32> {bufferization.writable = true}) {
621   %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
626   %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
634     -> (tensor<64xf32>, tensor<64xf32>)
636     scf.yield %0, %1 : tensor<64xf32>, tensor<64xf32>
638   call @foo(%r#0) : (tensor<64xf32>) -> ()
639   call @foo(%r#1) : (tensor<64xf32>) -> ()
646   %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
651   %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
653   call @bar(%A2) : (tensor<64xf32>) -> ()
654   call @bar(%B2) : (tensor<64xf32>) -> ()
667                                     %s3 : index) -> tensor<?xi32> {
668   %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
669   //      CHECK: tensor.extract_slice
671   %b = tensor.extract_slice %A[%s1][%s2][1] : tensor<4xi32> to tensor<?xi32>
674   %r = vector.transfer_write %v, %b[%s3] : vector<5xi32>, tensor<?xi32>
675   return %r : tensor<?xi32>
681     %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
682     %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
683     %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
684     -> tensor<256x256xf32>
690   %7 = bufferization.alloc_tensor() : tensor<256x256xf32>
696   %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
697   %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
699   //      CHECK: tensor.extract_slice
701   //      CHECK: tensor.extract_slice
705   %sA = tensor.extract_slice %8[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32>
706   %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
708          ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
709         outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
713   return %r : tensor<256x256xf32>
719     %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
720     %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
721     %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
722     -> tensor<256x256xf32>
728   %7 = bufferization.alloc_tensor() : tensor<256x256xf32>
734   %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
735   %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
736   %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
742   %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
743   %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
744   %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
746   //      CHECK: tensor.extract_slice
748   //      CHECK: tensor.extract_slice
752   %sA = tensor.extract_slice %10[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32>
753   %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
755          ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
756         outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
760   return %r : tensor<256x256xf32>
766 // Chain of tensor.insert_slice is better traversed in reverse order without
767 // prioritizing  the tensor.insert_slice ops.
774     %arg0: tensor<62x126xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
776     %arg1: tensor<126x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false},
778     %arg2: tensor<62x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true})
780   -> tensor<62x90xf32> attributes {passthrough = [["prefer-vector-width", "512"]], target_cpu = "skylake-avx512"}
787   %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32>
789   //      CHECK: tensor.extract_slice
791   %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
794   %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
795   //      CHECK: tensor.insert_slice
797   %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
799   //      CHECK: tensor.extract_slice
801   %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
804   %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
805   //      CHECK: tensor.insert_slice
807   %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
811   return %15 : tensor<62x90xf32>
822 func.func @ip(%t: tensor<10x20xf32> {bufferization.writable = true},
824   -> tensor<10x20xf32>
829   %r = scf.for %arg0 = %c0 to %c257 step %c256 iter_args(%arg1 = %t) -> (tensor<10x20xf32>) {
830     %t1 = tensor.extract_slice %arg1[%x, 0] [5, %y] [1, 1] : tensor<10x20xf32> to tensor<5x?xf32>
831     %t11 = tensor.extract_slice %t1[0, 0] [5, %y] [1, 1] : tensor<5x?xf32> to tensor<5x?xf32>
832     %t2 = vector.transfer_write %v, %t11[%c0, %c0] : vector<5x6xf32>, tensor<5x?xf32>
833     %t3 = tensor.insert_slice %t2 into %arg1[%x, 0] [5, %y] [1, 1] : tensor<5x?xf32> into tensor<10x20xf32>
834     scf.yield %t3 : tensor<10x20xf32>
839  return %r : tensor<10x20xf32>
856     %t1: tensor<?xf32> {bufferization.writable = true},
858     %t2: tensor<?xf32> {bufferization.writable = true})
860   -> (tensor<?xf32>, tensor<?xf32>){
864   %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
865                                outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
868     } -> (tensor<?xf32>, tensor<?xf32>)
872   return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32>
890     %t1: tensor<?xf32> {bufferization.writable = true},
892     %t2: tensor<?xf32> {bufferization.writable = true})
894         -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
899           ins(%t1 : tensor<?xf32>)
900           outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
903     } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
907   return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
916     %arg2: tensor<62x90xf32> {bufferization.writable = true},
918   -> (tensor<62x90xf32>, tensor<?x?xf32>)
924   //      CHECK: tensor.extract_slice
926   %e = tensor.extract_slice %arg2[%s1, %s2][%s3, %s4][1, 1] : tensor<62x90xf32> to tensor<?x?xf32>
928   //      CHECK: tensor.extract_slice
930   %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
933   %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
934   //      CHECK: tensor.insert_slice
936   %8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
938   //      CHECK: tensor.extract_slice
940   %10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor<?x?xf32> to tensor<30x90xf32>
943   %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
944   //      CHECK: tensor.insert_slice
946   %15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<?x?xf32>
950   return %8, %15 : tensor<62x90xf32>, tensor<?x?xf32>
957     %arg2: tensor<62x90xf32> {bufferization.writable = true})
958   -> (tensor<62x90xf32>)
960   //      CHECK: tensor.extract_slice
962   %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
965   //      CHECK: tensor.extract_slice
967   %10 = tensor.extract_slice %arg2[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
970   //      CHECK: tensor.insert_slice
972   %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
975   //      CHECK: tensor.insert_slice
977   %15 = tensor.insert_slice %10 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
981   return %15 : tensor<62x90xf32>
988     %arg2: tensor<62x90xf32> {bufferization.writable = true})
989   -> (tensor<62x90xf32>)
991   //      CHECK: tensor.extract_slice
993   %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
996   //      CHECK: tensor.extract_slice
998   %10 = tensor.extract_slice %arg2[31, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
1001   //      CHECK: tensor.insert_slice
1003   %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
1006   //      CHECK: tensor.insert_slice
1008   %15 = tensor.insert_slice %10 into %8[31, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
1012   return %15 : tensor<62x90xf32>
1019     %arg2: tensor<62x90xf32> {bufferization.writable = true})
1020   -> (tensor<62x90xf32>)
1022   //      CHECK: tensor.extract_slice
1024   %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
1026   //      CHECK: tensor.insert_slice
1028   %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
1030   //      CHECK: tensor.insert_slice
1032   %15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
1036   return %15 : tensor<62x90xf32>
1042 func.func @some_use(%A : tensor<?xf32> {bufferization.writable = true},
1043                     %v : vector<5xf32>) -> (tensor<?xf32>) {
1047   %0 = vector.transfer_write %v, %A[%idx] : vector<5xf32>, tensor<?xf32>
1048   return %0 : tensor<?xf32>
1053 func.func @main_func(%A : tensor<?xf32> {bufferization.writable = true},
1054                      %v : vector<5xf32>) -> (tensor<?xf32>) {
1057   %0 = call @some_use(%A, %v) : (tensor<?xf32>, vector<5xf32>) -> (tensor<?xf32>)
1058   return %0 : tensor<?xf32>
1067   %0 = bufferization.to_tensor %m restrict : memref<?xf32> to tensor<?xf32>
1069   // Write to the tensor. Cannot be inplace due to tensor_load.
1072   %w = vector.transfer_write %v, %0[%idx1] : vector<5xf32>, tensor<?xf32>
1074   // Read from the tensor and return result.
1076   %r = vector.transfer_read %w[%idx2], %cst : tensor<?xf32>, vector<10xf32>
1083 func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
1086   return %t : tensor<?xf32>
1089 func.func @equivalent_func_arg(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> {
1092   %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
1093     %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32>
1094     scf.yield %3 : tensor<?xf32>
1096   return %1: tensor<?xf32>
1102 func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> {
1105   %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
1108   return %0 : tensor<?xf32>
1111 func.func @equivalent_func_arg_2(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> {
1114   %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
1115     %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32>
1116     scf.yield %3 : tensor<?xf32>
1118   return %1: tensor<?xf32>
1124 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
1126     %t1 : tensor<?xf32> {bufferization.writable = true},
1127     %t2 : tensor<?xf32> {bufferization.writable = true},
1129   -> (f32, tensor<?xf32>)
1136   %s = arith.select %c, %t1, %t2 : tensor<?xf32>
1137   //      CHECK: tensor.insert
1139   %w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
1140   //      CHECK: tensor.extract
1142   %f = tensor.extract %t1[%idx] : tensor<?xf32>
1144   return %f, %w : f32, tensor<?xf32>
1150 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
1152     %t1 : tensor<?xf32> {bufferization.writable = true},
1153     %t2 : tensor<?xf32> {bufferization.writable = true},
1155   -> (f32, f32, tensor<?xf32>)
1162   %s = arith.select %c, %t1, %t2 : tensor<?xf32>
1163   //      CHECK: tensor.insert
1165   %w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
1166   //      CHECK: tensor.extract
1168   %f = tensor.extract %t1[%idx] : tensor<?xf32>
1169   //      CHECK: tensor.extract
1171   %f2 = tensor.extract %t2[%idx] : tensor<?xf32>
1173   return %f, %f2, %w : f32, f32, tensor<?xf32>
1179 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
1181     %t1 : tensor<?xf32> {bufferization.writable = true},
1182     %t2 : tensor<?xf32> {bufferization.writable = true},
1184   -> (f32, tensor<?xf32>)
1191   %s = arith.select %c, %t1, %t2 : tensor<?xf32>
1192   //      CHECK: tensor.insert
1194   %w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
1195   //      CHECK: tensor.extract
1197   %f = tensor.extract %w[%idx] : tensor<?xf32>
1199   return %f, %w : f32, tensor<?xf32>
1206     %A : tensor<?xf32> {bufferization.writable = true},
1207     %B : tensor<?xf32> {bufferization.writable = true},
1209   -> (tensor<?xf32>)
1212   %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
1215     // The tensor.insert is out-of-place because the %B is written multiple
1217     //      CHECK: tensor.insert
1219     %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
1220     //      CHECK: tensor.insert_slice
1222     %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
1223     scf.yield %A2 : tensor<?xf32>
1227   return %r0 : tensor<?xf32>
1234     %A : tensor<?xf32> {bufferization.writable = true},
1236   -> (tensor<?xf32>)
1238   %B = bufferization.alloc_tensor(%sz2) : tensor<?xf32>
1241   %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
1245     //      CHECK: tensor.insert
1247     %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
1248     //      CHECK: tensor.insert_slice
1250     %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
1251     scf.yield %A2 : tensor<?xf32>
1255   return %r0 : tensor<?xf32>
1262     %A : tensor<?xf32> {bufferization.writable = true},
1264   -> (tensor<?xf32>)
1266   %B = bufferization.alloc_tensor(%sz2) : tensor<?xf32>
1267   %C = tensor.insert %f into %B[%lb] : tensor<?xf32>
1270   %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
1276     //      CHECK: tensor.insert
1278     %B2 = tensor.insert %i3 into %C[%i] : tensor<?xf32>
1279     //      CHECK: tensor.insert_slice
1281     %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
1282     scf.yield %A2 : tensor<?xf32>
1286   return %r0 : tensor<?xf32>
1291 // CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"})
1292 func.func private @ext_func(%t: tensor<?xf32>)
1294 // CHECK: func.func @private_func_read_write(%{{.*}}: tensor<5xf32> {bufferization.access = "read"})
1295 func.func @private_func_read_write(%t: tensor<5xf32>) -> f32 {
1298   // CHECK: tensor.cast {{.*}} {__inplace_operands_attr__ = ["false"]}
1299   %0 = tensor.cast %t : tensor<5xf32> to tensor<?xf32>
1300   func.call @ext_func(%0) : (tensor<?xf32>) -> ()
1301   %1 = tensor.extract %t[%c0] : tensor<5xf32>
1307 // CHECK-LABEL: func.func private @print_buffer(tensor<*xf32> {bufferization.access = "read"})
1308 func.func private @print_buffer(%t: tensor<*xf32> {bufferization.access = "read"})
1310 // CHECK: func.func @private_func_read(%{{.*}}: tensor<5xf32> {bufferization.access = "read"})
1311 func.func @private_func_read(%t: tensor<5xf32>) -> f32 {
1314   // CHECK: tensor.cast {{.*}} {__inplace_operands_attr__ = ["true"]}
1315   %0 = tensor.cast %t : tensor<5xf32> to tensor<*xf32>
1317   func.call @print_buffer(%0) : (tensor<*xf32>) -> ()
1318   %1 = tensor.extract %t[%c0] : tensor<5xf32>
1324 // CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"}, tensor<?xf32> {bufferization.access = "read-write"})
1325 func.func private @ext_func(%t1: tensor<?xf32>, %t2: tensor<?xf32>)
1327 // CHECK: func.func @private_func_two_params_writing(%{{.*}}: tensor<?xf32> {bufferization.access = "read"})
1328 func.func @private_func_two_params_writing(%t: tensor<?xf32>) {
1332   func.call @ext_func(%t, %t) : (tensor<?xf32>, tensor<?xf32>) -> ()
1338 // CHECK-LABEL: func.func private @ext_func(tensor<?xf32> {bufferization.access = "read-write"}) -> (tensor<5xf32>, tensor<6xf32>)
1339 func.func private @ext_func(%t: tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>)
1341 // CHECK: func.func @private_func_aliasing(%{{.*}}: tensor<?xf32> {bufferization.access = "read"})
1342 func.func @private_func_aliasing(%t: tensor<?xf32>) -> f32 {
1346   // CHECK: call @ext_func(%{{.*}}) {__inplace_operands_attr__ = ["false"]} : (tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>)
1347   %0, %1 = func.call @ext_func(%t) : (tensor<?xf32>) -> (tensor<5xf32>, tensor<6xf32>)
1348   %2 = tensor.extract %1[%c0] : tensor<6xf32>
1355 func.func @recursive_function(%a: tensor<?xf32>, %b: tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>) {
1360   %0:2 = call @recursive_function(%a, %b) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>, tensor<?xf32>)
1361   return %0#0, %0#1 : tensor<?xf32>, tensor<?xf32>
1367 func.func @multiple_returns(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> {
1370   return %t0 : tensor<5xf32>
1372   return %t1 : tensor<5xf32>
1376 //  CHECK-ALIAS-SETS-SAME:     %{{.*}}: i1, %[[t0:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t1:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t2:.*]]: tensor<5xf32> {bufferization.access = "none"})
1377 func.func @caller(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) {
1382   call @multiple_returns(%c, %t0, %t1, %t2) : (i1, tensor<5xf32>, tensor<5xf32>, tensor<5xf32>) -> (tensor<5xf32>)
1389 func.func @multiple_equivalent_returns(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> {
1392   return %t0 : tensor<5xf32>
1394   return %t0 : tensor<5xf32>
1398 //  CHECK-ALIAS-SETS-SAME:     %{{.*}}: i1, %[[t0:.*]]: tensor<5xf32> {bufferization.access = "read"}, %[[t1:.*]]: tensor<5xf32> {bufferization.access = "none"}, %[[t2:.*]]: tensor<5xf32> {bufferization.access = "none"})
1399 func.func @caller(%c: i1, %t0: tensor<5xf32>, %t1: tensor<5xf32>, %t2: tensor<5xf32>) -> tensor<5xf32> {
1404   %r = call @multiple_equivalent_returns(%c, %t0, %t1, %t2) : (i1, tensor<5xf32>, tensor<5xf32>, tensor<5xf32>) -> (tensor<5xf32>)
1405   // CHECK-ALIAS-SETS-SAME: {__equivalent_func_args__ = [1], __inplace_operands_attr__ = ["true"]} %[[result]] : tensor<5xf32>
1406   return %r : tensor<5xf32>