canonicalize.mlir - OpenGrok cross reference for /llvm-project/mlir/test/Dialect/Linalg/canonicalize.mlir

Lines Matching full:tensor
30 func.func @dce_zero_memref(%arg0 : memref<0xf32>, %arg1: tensor<0xf32>) -> tensor<0xf32> {
34   // tensor<0xf32> cannot be dce'ed
35   %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) {
38   } -> tensor<0xf32>
40   return %1: tensor<0xf32>
44 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<0xf32>
60 // CHECK-LABEL: func @tensor.cast(
61 func.func @tensor.cast(%a : tensor<3x4xf32>, %b : tensor<4x?xf32>, %c : tensor<3x?xf32>)
62   -> tensor<3x?xf32>
64   %ta = tensor.cast %a : tensor<3x4xf32> to tensor<?x?xf32>
65   %tb = tensor.cast %b : tensor<4x?xf32> to tensor<?x?xf32>
66   %tc = tensor.cast %c : tensor<3x?xf32> to tensor<?x?xf32>
68   //      CHECK:  linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>)
69   // CHECK-SAME:    outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32>
70   %0 = linalg.matmul ins(%ta, %tb: tensor<?x?xf32>, tensor<?x?xf32>)
71                     outs(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
73   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<3x?xf32>
75   return %1: tensor<3x?xf32>
80 // CHECK-LABEL: func @tensor.cast.unranked(
81 func.func @tensor.cast.unranked(%a : tensor<*xf32>, %b : tensor<*xf32>, %c : tensor<*xf32>)
82   -> tensor<*xf32>
84   //      CHECK:  tensor.cast
85   //      CHECK:  tensor.cast
86   //      CHECK:  tensor.cast
87   %ta = tensor.cast %a : tensor<*xf32> to tensor<?x?xf32>
88   %tb = tensor.cast %b : tensor<*xf32> to tensor<?x?xf32>
89   %tc = tensor.cast %c : tensor<*xf32> to tensor<?x?xf32>
91   //      CHECK:  linalg.matmul ins({{.*}}tensor<?x?xf32>, tensor<?x?xf32>)
92   // CHECK-SAME:    outs({{.*}}tensor<?x?xf32>) -> tensor<?x?xf32>
93   %0 = linalg.matmul ins(%ta, %tb: tensor<?x?xf32>, tensor<?x?xf32>)
94                     outs(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
96   //      CHECK:  tensor.cast
97   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<*xf32>
99   return %1: tensor<*xf32>
106     %a : tensor<?x?xf32>, %b : tensor<?x?xf32>, %c : tensor<?x?xf32>,
109   %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, tensor<?x?xf32>)
110                     outs(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
121 func.func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
122   -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
126   %0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
127   %1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
128   %2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
129   %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
133   } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
134     outs(%3, %3 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
137   } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
138   return %4, %5 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
141 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
142 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
148 func.func @remove_no_op_mismatched_types(%arg0 : tensor<?x?x?xf32>)
149   -> tensor<1x2x3xf32> {
150   %out = tensor.empty() : tensor<1x2x3xf32>
154   } ins(%arg0 : tensor<?x?x?xf32>)
155     outs(%out : tensor<1x2x3xf32>) {
158   } -> (tensor<1x2x3xf32>)
159   return %g : tensor<1x2x3xf32>
162 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
163 //       CHECK:     %[[CAST:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<1x2x3xf32>
169 func.func @cant_fold_to_tensor_cast(%arg0 : f32) -> tensor<f32> {
170   %out = tensor.empty() : tensor<f32>
175     outs(%out : tensor<f32>) {
178   } -> (tensor<f32>)
179   return %g : tensor<f32>
187 func.func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
191   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
192   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
193   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
199     ins(%arg0 : tensor<?x?xf32>) outs(%2 : tensor<?x?xf32>) {
202     } -> tensor<?x?xf32>
203   return %3 : tensor<?x?xf32>
212 func.func @keep_not_noop(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>)
213   -> (tensor<?x?xf32>, tensor<?x?xf32>) {
217   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
218   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
219   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
226     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
227     outs(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>) {
230     } -> (tensor<?x?xf32>, tensor<?x?xf32>)
231   return %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32>
252 //   CHECK-NOT:   tensor.pad
254 func.func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>,
255                          %arg2: tensor<?x?xf32>, %high : index) {
259   %0 = linalg.fill ins(%c0_i32 : i32) outs(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32>
260   %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>)
261                      outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32>
262   %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) {
265   } -> tensor<7x7xi32>
266   %3 = tensor.pad %arg2 low[%c0, %c0] high[%high, %high] {
268           tensor.yield %cst : f32
269   } : tensor<?x?xf32> to tensor<2x4xf32>
275 func.func @propagate_casts(%arg0 : tensor<?x?xf32>, %arg1 : f32, %arg2 : index,
276     %arg3 : index) -> tensor<?x?xf32> {
281   %0 = tensor.empty(%c21, %c42) : tensor<?x?xf32>
282   %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
283   %2 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
284   %3 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
285   %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
286   return %4 : tensor<?x?xf32>
289 //       CHECK:   %[[INIT:.+]] = tensor.empty
291 //       CHECK:   %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
292 //       CHECK:   %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
309 func.func @fold_fill_reshape() -> tensor<6x4xf32> {
311   %empty = tensor.empty() : tensor<1x2x3x4xf32>
312   // CHECK:      %[[COLLAPSE:.+]] = tensor.collapse_shape
314   // CHECK-SAME:   outs(%[[COLLAPSE]] : tensor<6x4xf32>)
315   %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
316   %reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]]
317       : tensor<1x2x3x4xf32> into tensor<6x4xf32>
318   // CHECK: return %[[FILL]] : tensor<6x4xf32>
319   return %reshape : tensor<6x4xf32>
325 //  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?x?x?xf32>
326 func.func @fold_fill_reshape_dynamic(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?xf32> {
328   // CHECK: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]]
329   %0 = linalg.fill ins(%zero : f32) outs(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
331   %1 = tensor.collapse_shape %0 [[0, 1, 2], [3, 4]]
332       : tensor<?x?x?x?x?xf32> into tensor<?x?xf32>
334   return %1 : tensor<?x?xf32>
344   %empty_dynamic = tensor.empty(%c1) : tensor<1x2x3x?xi1>
345   %filled = linalg.fill ins(%arg0 : i1) outs(%empty_dynamic : tensor<1x2x3x?xi1>) -> tensor<1x2x3x?xi1>
347   %extracted = tensor.extract %filled[%c0, %c0, %c0, %c0] : tensor<1x2x3x?xi1>
355 func.func @fill_pack() -> tensor<24x32x16x16xf32> {
356   %dest = tensor.empty() : tensor<384x512xf32>
358   %0 = tensor.empty() : tensor<24x32x16x16xf32>
359   %1 = linalg.fill ins(%cst : f32) outs(%dest : tensor<384x512xf32>) -> tensor<384x512xf32>
360   %pack = tensor.pack %1 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<384x512xf32> -> tensor<24x32x16x16xf32>
361   return %pack : tensor<24x32x16x16xf32>
364 // CHECK:         %[[PACKED_EMPTY:.+]] = tensor.empty() : tensor<24x32x16x16xf32>
370 func.func @fill_pack_general() -> tensor<1x1x8x4x4x8xi32>{
373   %9 = tensor.empty() : tensor<1x1x16x64xi32>
374   %extracted_slice_15 = tensor.extract_slice %9[0, 0, 0, 0] [1, 1, 16, 64] [1, 1, 1, 1] : tensor<1x1x16x64xi32> to tensor<1x1x16x64xi32>
375   %16 = linalg.fill ins(%c0_i32 : i32) outs(%extracted_slice_15 : tensor<1x1x16x64xi32>) -> tensor<1x1x16x64xi32>
376   %0 = bufferization.to_tensor %alloc restrict writable : memref<1x1x8x4x4x8xi32> to tensor<1x1x8x4x4x8xi32>
377   %pack_18 = tensor.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32>
378   return %pack_18 : tensor<1x1x8x4x4x8xi32>
383 // CHECK:         %[[TENSOR:.+]] = bufferization.to_tensor %[[ALLOC]]
384 // CHECK:         %[[FILL:.+]] = linalg.fill ins(%{{.+}}) outs(%[[TENSOR]]
390 func.func @dynamic_fill_pack(%arg0: tensor<?x?xf32>) -> tensor<?x?x16x16xf32> {
394   %0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
395   %dim = tensor.dim %0, %c0 : tensor<?x?xf32>
396   %dim_0 = tensor.dim %0, %c1 : tensor<?x?xf32>
399   %3 = tensor.empty(%1, %2) : tensor<?x?x16x16xf32>
400   %pack = tensor.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<?x?xf32> -> tensor<?x?x16x16xf32>
401   return %pack : tensor<?x?x16x16xf32>
408 // CHECK:         %[[D0:.+]] = tensor.dim %[[DEST]], %[[C0]]
409 // CHECK:         %[[D1:.+]] = tensor.dim %[[DEST]], %[[C1]]
412 // CHECK:         %[[PACKED_EMPTY:.+]] = tensor.empty(%[[PACKED_D0]], %[[PACKED_D1]]) : tensor<?x?x16x16xf32>
436 //       CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<412x276xf32>
439 func.func @fold_static_pad_fill() -> tensor<412x276xf32> {
441   %empty = tensor.empty() : tensor<400x273xf32>
442   %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
443   %pad = tensor.pad %fill low[4, 1] high[8, 2] {
445     tensor.yield %f0 : f32
446   } : tensor<400x273xf32> to tensor<412x276xf32>
447   return %pad : tensor<412x276xf32>
458 // CHECK-SAME: %[[SRC:.+]]: tensor<8x?x16x32xf32>, %[[LOW0:.+]]: index, %[[LOW3:.+]]: index, %[[HIGH2:.+]]: index, %[[HIGH3:.+]]: index
463 //      CHECK:   %[[DIM1:.+]] = tensor.dim %[[SRC]], %[[I1]] : tensor<8x?x16x32xf32>
467 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[S0]], %[[S1]], %[[S2]], %[[S3]]) : tensor<?x?x?x?xf32>
470 func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor<?x?x?x?xf32> {
472   %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
473   %pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] {
475     tensor.yield %f0 : f32
476   } : tensor<8x?x16x32xf32> to tensor<?x?x?x?xf32>
477   return %pad : tensor<?x?x?x?xf32>
483 func.func @no_fold_pad_fill_value_mismatch() -> tensor<412x276xf32> {
486   %empty = tensor.empty() : tensor<400x273xf32>
487   %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
488   // CHECK: tensor.pad
489   %pad = tensor.pad %fill low[4, 1] high[8, 2] {
491     tensor.yield %f1 : f32
492   } : tensor<400x273xf32> to tensor<412x276xf32>
493   return %pad : tensor<412x276xf32>
504 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
505 func.func @static_input_without_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
509   %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
510   %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
511   %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
512   %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
516   } ins(%arg0, %arg1 : tensor<2x3x4xf32>, tensor<?x?x?xf32>)
517     outs(%3 : tensor<?x?x?xf32>) {
521   } -> (tensor<?x?x?xf32>)
522   %5 = tensor.cast %4 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
523   return %5 : tensor<2x3x4xf32>
524     //  CHECK:      %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
526     //  CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
527     //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
534 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
535 func.func @static_input_with_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
539   %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
540   %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
541   %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
542   %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
543   %4 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
547   } ins(%arg0, %4 : tensor<2x3x4xf32>, tensor<2x?x?xf32>)
548     outs(%3 : tensor<?x?x?xf32>) {
552   } -> (tensor<?x?x?xf32>)
553   %6 = tensor.cast %5 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
554   return %6: tensor<2x3x4xf32>
555     //  CHECK:      %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
557     //  CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
558     //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
565 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>, %[[ARG2:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
566 func.func @static_output_with_cast(%arg0 : tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
570   %0 = tensor.dim %arg2, %c0 : tensor<2x3x4xf32>
571   %1 = tensor.dim %arg2, %c1 : tensor<2x3x4xf32>
572   %2 = tensor.dim %arg2, %c2 : tensor<2x3x4xf32>
573   %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
574   %4 = tensor.cast %3 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
575   %5 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
579   } ins(%arg0, %5 : tensor<?x?x?xf32>, tensor<2x?x?xf32>)
580     outs(%4 : tensor<2x3x4xf32>) {
584   } -> (tensor<2x3x4xf32>)
585   return %6: tensor<2x3x4xf32>
586     //  CHECK:      %[[CAST_ARG0:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
587     //  CHECK-NEXT: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
589     //  CHECK-SAME: ins(%[[CAST_ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
590     //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
595 // This test checks the folding of tensor.cast operation when the source value of cast
599 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
600 func.func @cast_source(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
604   %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
605   %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
606   %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
607   %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
608   %4 = tensor.cast %arg0 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
609   %5 = tensor.cast %arg1 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
613   } ins(%4, %5 : tensor<2x?x?xf32>, tensor<2x?x?xf32>)
614     outs(%3 : tensor<?x?x?xf32>) {
618   } -> (tensor<?x?x?xf32>)
619   %7 = tensor.cast %6 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
620   return %7: tensor<2x3x4xf32>
622     //  CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
623     //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
630 // CHECK-SAME:  (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<1x?x?xf32>,
631 func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2: index, %arg3: index, %arg4: index) -> tensor<?x?x?xf32> {
632   %0 = tensor.empty(%arg2, %arg3, %arg4) : tensor<?x?x?xf32>
633   %1 = tensor.cast %arg1 : tensor<1x?x?xf32> to tensor<?x?x?xf32>
637   } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<1x?x?xf32>)
638     outs(%0 : tensor<?x?x?xf32>) {
642   } -> tensor<?x?x?xf32>
643   return %2 : tensor<?x?x?xf32>
645 // CHECK-SAME: ins(%{{.*}}, %[[ARG1]] : tensor<1x?x?xf32>, tensor<1x?x?xf32>)
646 // CHECK-SAME: outs(%{{.*}} : tensor<1x?x?xf32>)
647 // CHECK: tensor.cast %[[GENERIC_OP]] : tensor<1x?x?xf32> to tensor<?x?x?xf32>
654 //  CHECK-SAME: (%[[INPUT:.+]]: tensor<?x?x?xf32>, %[[LOW0:.+]]: index, %[[LOW1:.+]]: index, %{{.+}}: index, %{{.+}}: index)
659 //       CHECK: %[[INIT:.+]] = tensor.empty()
662 //       CHECK: %[[D0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?xf32>
663 //       CHECK: %[[D1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x?xf32>
664 //       CHECK: %[[D2:.+]] = tensor.dim %[[INPUT]], %[[C2]] : tensor<?x?x?xf32>
665 //       CHECK: tensor.insert_slice %[[INPUT]] into %[[FILL]][%[[LOW0]], %[[OFFSET1]], 2] [%[[D0]], %[[D1]], %[[D2]]] [1, 1, 1]
666 func.func @insert_pad_into_fill(%input: tensor<?x?x?xf32>, %low0: index, %low1: index, %high1: index, %high2: index) -> tensor<8x384x384xf32> {
669   %pad = tensor.pad %input low[%low0, %low1, %c0] high[%c0, %high1, %high2] {
671     tensor.yield %f0 : f32
672   } : tensor<?x?x?xf32> to tensor<8x128x128xf32>
673   %empty = tensor.empty() : tensor<8x384x384xf32>
674   %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
675   %0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
676   return %0: tensor<8x384x384xf32>
682 //  CHECK-SAME: (%[[INPUT:.+]]: tensor<7x123x124xf32>, %[[A:.+]]: tensor<8x128x128xf32>, %[[OFFSET:.+]]: index)
684 //       CHECK:   %[[INSERT0:.+]] = tensor.insert_slice %[[A]] into %[[FILL]][%[[OFFSET]], 0, 0] [8, 128, 128] [1, 1, 1]
685 //       CHECK:   %[[INSERT1:.+]] = tensor.insert_slice %[[A]] into %[[INSERT0]][0, 128, %[[OFFSET]]] [8, 128, 128] [1, 1, 1]
686 //       CHECK:                  tensor.insert_slice %[[INPUT]] into %[[INSERT1]][1, 2, 256] [7, 123, 124] [1, 1, 1]
687 func.func @multi_insert_pad_into_fill(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
690   %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
692     tensor.yield %f0 : f32
693   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
694   %empty = tensor.empty() : tensor<8x384x384xf32>
695   %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
696   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
697   %1 = tensor.insert_slice %a   into %0   [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
698   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
699   return %2: tensor<8x384x384xf32>
705 func.func @multi_insert_pad_into_fill_overlap(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
708   // CHECK: tensor.pad
709   %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
711     tensor.yield %f0 : f32
712   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
713   %empty = tensor.empty() : tensor<8x384x384xf32>
714   %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
715   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
716   %1 = tensor.insert_slice %a   into %0   [0, 0, 129]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
718   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
719   return %2: tensor<8x384x384xf32>
725 func.func @multi_insert_pad_into_fill_overlap(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
728   // CHECK: tensor.pad
729   %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
731     tensor.yield %f0 : f32
732   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
733   %empty = tensor.empty() : tensor<8x384x384xf32>
734   %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
735   %0 = tensor.insert_slice %a   into %fill[0, 0, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
736   %1 = tensor.insert_slice %a   into %0   [0, 128, 255]    [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
738   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
739   return %2: tensor<8x384x384xf32>
745 func.func @multi_insert_pad_into_fill(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
748   // CHECK-NOT: tensor.pad
749   %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
751     tensor.yield %f0 : f32
752   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
753   %empty = tensor.empty() : tensor<8x384x384xf32>
754   %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
756   // CHECK-COUNT-3: tensor.insert_slice
757   %0 = tensor.insert_slice %a   into %fill[0, 0, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
758   %1 = tensor.insert_slice %a   into %0   [0, 1, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
759   %2 = tensor.insert_slice %pad into %1   [0, 256, 256]    [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
760   return %2: tensor<8x384x384xf32>
766 func.func @multi_insert_pad_into_fill_mismatch(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
770   // CHECK: tensor.pad
771   %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
773     tensor.yield %f0 : f32
774   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
775   %empty = tensor.empty() : tensor<8x384x384xf32>
777   %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
778   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
779   %1 = tensor.insert_slice %a   into %0   [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
780   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
781   return %2: tensor<8x384x384xf32>
786 func.func @fold_linalgop_with_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
787     %arg2 : tensor<?x?xf32>) -> (tensor<4x8xf32>, tensor<?x?xf32>) {
788   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
789       outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
790   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
791   return %1, %0 : tensor<4x8xf32>, tensor<?x?xf32>
794 //  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
795 //  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>
796 //  CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
797 //   CHECK-DAG:  %[[LHS_CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?xf32> to tensor<4x?xf32>
798 //   CHECK-DAG:  %[[RHS_CAST:.+]] = tensor.cast %[[ARG1]] : tensor<?x?xf32> to tensor<?x8xf32>
799 //   CHECK-DAG:  %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?xf32> to tensor<4x8xf32>
803 //       CHECK:  %[[RESULT_CAST:.+]] = tensor.cast %[[MATMUL]]
808 func.func private @some_use(%0 : tensor<4x8xf32>)
810 func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
811     %arg2 : tensor<?x?xf32>, %arg3 : i1) -> tensor<?x?xf32> {
812   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
813       outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
815     %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
816     func.call @some_use(%1) : (tensor<4x8xf32>) -> ()
818   return %0 : tensor<?x?xf32>
823 //  CHECK-SAME:     (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x?xf32>, %[[ARG2:.*]]: tensor<?x?xf32>, %[[ARG3:.*]]: i1)
824 //       CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
825 //  CHECK-SAME:      outs(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
827 //       CHECK:   %[[CAST:.*]] = tensor.cast %[[RES]] : tensor<?x?xf32> to tensor<4x8xf32>
828 //       CHECK:   func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> ()
830 //       CHECK: return %[[RES]] : tensor<?x?xf32>
835 func.func @fold_conv_op_with_cast_consumer(%arg0 : tensor<?x?x?x?xf32>,
836     %arg1 : tensor<?x?x?x?xf32>,  %arg2 : tensor<?x?x?x?xf32>) ->
837     (tensor<4x8x12x16xf32>, tensor<?x?x?x?xf32>) {
838   %0 = linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
839       outs(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
840   %1 = tensor.cast %0 : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
841   return %1, %0 : tensor<4x8x12x16xf32>, tensor<?x?x?x?xf32>
844 //  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
845 //  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
846 //  CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>)
847 //       CHECK:  %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
851 //       CHECK:  %[[RESULT_CAST:.+]] = tensor.cast %[[CONV]]
856 func.func @fold_multi_use_generic_op_with_consumer(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<2x3x4xf32>) {
860   %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
861   %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
862   %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
863   %empty1 = tensor.empty(%d1, %d2, %d0) : tensor<?x?x?xf32>
864   %empty2 = tensor.empty(%d2, %d1, %d0) : tensor<?x?x?xf32>
870       ins(%arg0 : tensor<?x?x?xf32>) outs(%empty1, %empty2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
873     } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
874   %1 = tensor.cast %0#1 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
875   return %0#0, %1 : tensor<?x?x?xf32>, tensor<2x3x4xf32>
878 //  CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>
879 //   CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<2x3x4xf32>
880 //   CHECK-DAG:   %[[CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<4x3x2xf32>
881 //   CHECK-DAG:   %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32>
885 //       CHECK:   %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 : tensor<3x2x4xf32> to tensor<?x?x?xf32>
915 func.func @erase_non_identity_noop(%arg0 : tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
919   } ins(%arg0 : tensor<?x?xf32>)
920     outs(%arg1 : tensor<?x?xf32>) {
923   } -> tensor<?x?xf32>
924   return %0 : tensor<?x?xf32>
929 //  CHECK-SAME:   (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x?xf32>)
930 //       CHECK:   return %[[ARG0]] : tensor<?x?xf32>
937 func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: tensor<4xf32>) {
942     ins(%1, %1 : tensor<4xf32>, tensor<4xf32>)
943     outs(%0 : tensor<4xf32>) {
946   } -> tensor<4xf32>
949                         outs(%36 : tensor<4xf32>) {
952   } -> tensor<4xf32>
959 func.func @dead_softmax(%arg0: tensor<16x64x256xf32>) -> tensor<16x64x256xf32> {
960   %0 = tensor.empty() : tensor<16x64x256xf32>
963     ins(%arg0 : tensor<16x64x256xf32>) outs(%0 : tensor<16x64x256xf32>) -> tensor<16x64x256xf32>
964   return %arg0 : tensor<16x64x256xf32>
970 //       CHECK: tensor.dim
971 //       CHECK: tensor.dim
972 //   CHECK-NOT: tensor.dim
974 func.func @canonicalize_dim_of_dest_style_op(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
977   %dim0_0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
978   %dim1_0 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
979   %0 = tensor.empty(%dim0_0, %dim1_0) : tensor<?x?xf32>
980   %1 = linalg.copy ins(%arg0 : tensor<?x?xf32>) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
981   %dim0_1 = tensor.dim %1, %c0 : tensor<?x?xf32>
982   %dim1_1 = tensor.dim %1, %c1 : tensor<?x?xf32>
983   %2 = tensor.empty(%dim0_1, %dim1_1) : tensor<?x?xf32>
984   %3 = linalg.copy ins(%1 : tensor<?x?xf32>) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
985   return %3: tensor<?x?xf32>
990 //  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
991 //  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
993 //       CHECK:   linalg.fill ins(%[[ZERO]] : f32) outs(%[[ARG1]] : tensor<?x?xf32>)
994 func.func @canonicalize_fill_to_copy_input(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
996   %fill = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
997   %copy = linalg.copy ins(%fill : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) -> tensor<?x?xf32>
998   return %copy : tensor<?x?xf32>
1004 //  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
1005 //  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
1006 //       CHECK:   linalg.copy ins(%[[ARG1]] : tensor<?x?xf32>) outs(%[[ARG0]] : tensor<?x?xf32>)
1007 func.func @canonicalize_fill_to_copy_dest(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
1009   %fill = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
1010   %copy = linalg.copy ins(%arg1 : tensor<?x?xf32>) outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
1011   return %copy : tensor<?x?xf32>
1017 //  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
1018 //  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
1020 //       CHECK:   linalg.fill ins(%[[ZERO]] : f32) outs(%[[ARG1]] : tensor<?x?xf32>)
1021 func.func @canonicalize_fill_to_transpose_input(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
1023   %fill = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
1024   %transpose = linalg.transpose ins(%fill : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) permutation = [1, 0]
1025   return %transpose : tensor<?x?xf32>
1031 //  CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<2x3xf32>
1032 //  CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<2x3xf32>)
1034 //       CHECK:       return %[[ARG0]] : tensor<2x3xf32>
1035 func.func @broadcast_same_shape(%input: tensor<2x3xf32>, %init: tensor<2x3xf32>) -> tensor<2x3xf32> {
1036   %0 = linalg.broadcast ins(%input: tensor<2x3xf32>) outs(%init: tensor<2x3xf32>) dimensions = []
1037   return %0 : tensor<2x3xf32>
1042 func.func @transpose_1d(%input: tensor<16xf32>,
1043                         %init: tensor<16xf32>) -> tensor<16xf32> {
1045       ins(%input:tensor<16xf32>)
1046       outs(%init:tensor<16xf32>)
1048   func.return %transpose : tensor<16xf32>
1052 //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<16xf32>,
1053 //  CHECK-SAME:     %[[INIT:[a-zA-Z0-9]+]]: tensor<16xf32>)
1055 //       CHECK:   return %[[INPUT]] : tensor<16xf32>
1059 func.func @transpose_identity_perm(%input: tensor<16x32x64xf32>,
1060                                    %init: tensor<16x32x64xf32>) -> tensor<16x32x64xf32> {
1062       ins(%input:tensor<16x32x64xf32>)
1063       outs(%init:tensor<16x32x64xf32>)
1065   func.return %transpose : tensor<16x32x64xf32>
1069 //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<16x32x64xf32>,
1070 //  CHECK-SAME:     %[[INIT:[a-zA-Z0-9]+]]: tensor<16x32x64xf32>)
1072 //       CHECK:   return %[[INPUT]] : tensor<16x32x64xf32>
1076 func.func @transpose_transpose_cancel(%input: tensor<5x4x3xf32>,
1077                                       %init1: tensor<4x3x5xf32>,
1078                                       %init2: tensor<5x4x3xf32>) -> tensor<5x4x3xf32> {
1080   //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<5x4x3xf32>
1081   //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<4x3x5xf32>
1082   //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<5x4x3xf32>
1084   //       CHECK:   return %[[INPUT]] : tensor<5x4x3xf32>
1086       ins(%input:tensor<5x4x3xf32>)
1087       outs(%init1:tensor<4x3x5xf32>)
1090       ins(%transpose1:tensor<4x3x5xf32>)
1091       outs(%init2:tensor<5x4x3xf32>)
1093   func.return %transpose2 : tensor<5x4x3xf32>
1098 func.func @transpose_transpose_fold(%input: tensor<5x4x3xf32>,
1099                                     %init1: tensor<4x3x5xf32>,
1100                                     %init2: tensor<3x4x5xf32>) -> tensor<3x4x5xf32> {
1102   //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<5x4x3xf32>
1103   //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<4x3x5xf32>
1104   //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<3x4x5xf32>
1105   //       CHECK:   %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[INPUT]] : tensor<5x4x3xf32>) outs(%[[INIT2]] : tensor<3x4x5xf32>) permutation = [2, 1, 0]
1107   //       CHECK:   return %[[TRANSPOSE]] : tensor<3x4x5xf32>
1109       ins(%input:tensor<5x4x3xf32>)
1110       outs(%init1:tensor<4x3x5xf32>)
1113       ins(%transpose1:tensor<4x3x5xf32>)
1114       outs(%init2:tensor<3x4x5xf32>)
1116   func.return %transpose2 : tensor<3x4x5xf32>
1121 func.func @broadcast_transpose_fold(%input: tensor<2x4x5xf32>,
1122                                     %init1: tensor<1x2x3x4x5x6xf32>,
1123                                     %init2: tensor<1x6x2x3x5x4xf32>) -> tensor<1x6x2x3x5x4xf32> {
1125   //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<2x4x5xf32>
1126   //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<1x2x3x4x5x6xf32>
1127   //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<1x6x2x3x5x4xf32>
1128   //       CHECK:   %[[TMP_INIT:.+]] = tensor.empty() : tensor<2x5x4xf32>
1129   //       CHECK:   %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[INPUT]] : tensor<2x4x5xf32>) outs(%[[TMP_INIT]] : tensor<2x5x4xf32>) permutation = [0, 2, 1]
1130   //       CHECK:   %[[BROADCAST:.+]] = linalg.broadcast ins(%[[TRANSPOSE]] : tensor<2x5x4xf32>) outs(%[[INIT2]] : tensor<1x6x2x3x5x4xf32>) dimensions = [0, 3, 1]
1131   //       CHECK:   return %[[BROADCAST]] : tensor<1x6x2x3x5x4xf32>
1133       ins(%input : tensor<2x4x5xf32>)
1134       outs(%init1 : tensor<1x2x3x4x5x6xf32>)
1137       ins(%broadcast : tensor<1x2x3x4x5x6xf32>)
1138       outs(%init2 : tensor<1x6x2x3x5x4xf32>)
1140   func.return %transpose : tensor<1x6x2x3x5x4xf32>
1145 func.func @broadcast_transpose_fold_dynamic(%input: tensor<?x?x5xf32>,
1146                                             %init1: tensor<1x?x3x?x5x6xf32>,
1147                                             %init2: tensor<1x3x?x6x5x?xf32>) -> tensor<1x3x?x6x5x?xf32> {
1149   //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<?x?x5xf32>
1150   //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<1x?x3x?x5x6xf32>
1151   //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<1x3x?x6x5x?xf32>
1154   //       CHECK:   %[[DIM0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x5xf32>
1155   //       CHECK:   %[[DIM1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x5xf32>
1156   //       CHECK:   %[[TMP_INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]]) : tensor<?x5x?xf32>
1157   //       CHECK:   %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[INPUT]] : tensor<?x?x5xf32>) outs(%[[TMP_INIT]] : tensor<?x5x?xf32>) permutation = [1, 2, 0]
1158   //       CHECK:   %[[BROADCAST:.+]] = linalg.broadcast ins(%[[TRANSPOSE]] : tensor<?x5x?xf32>) outs(%[[INIT2]] : tensor<1x3x?x6x5x?xf32>) dimensions = [0, 1, 3]
1159   //       CHECK:   return %[[BROADCAST]] : tensor<1x3x?x6x5x?xf32>
1161       ins(%input : tensor<?x?x5xf32>)
1162       outs(%init1 : tensor<1x?x3x?x5x6xf32>)
1165       ins(%broadcast : tensor<1x?x3x?x5x6xf32>)
1166       outs(%init2 : tensor<1x3x?x6x5x?xf32>)
1168   func.return %transpose : tensor<1x3x?x6x5x?xf32>
1173 func.func @broadcast_transpose_fold_2dim(%input: tensor<2xf32>,
1174                                          %init1: tensor<2x4xf32>,
1175                                          %init2: tensor<4x2xf32>) -> tensor<4x2xf32> {
1177   //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<2xf32>
1178   //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<2x4xf32>
1179   //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<4x2xf32>
1180   //       CHECK:   %[[BROADCAST:.+]] = linalg.broadcast ins(%[[INPUT]] : tensor<2xf32>) outs(%[[INIT2]] : tensor<4x2xf32>) dimensions = [0]
1181   //       CHECK:   return %[[BROADCAST]] : tensor<4x2xf32>
1183       ins(%input : tensor<2xf32>)
1184       outs(%init1 : tensor<2x4xf32>)
1187       ins(%broadcast : tensor<2x4xf32>)
1188       outs(%init2 : tensor<4x2xf32>)
1190   func.return %transpose : tensor<4x2xf32>
1197     -> tensor<5x?x?xf32>
1201   %0 = tensor.empty(%arg0, %arg1) : tensor<5x?x?xf32>
1202   %1 = linalg.fill ins(%cst0 : f32) outs(%0 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32>
1203   %2 = tensor.empty(%arg2, %arg3) : tensor<5x?x?xf32>
1204   %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32>
1205   %4 = tensor.concat dim(1) %1, %3 : (tensor<5x?x?xf32>, tensor<5x?x?xf32>) -> tensor<5x?x?xf32>
1206   return %4 : tensor<5x?x?xf32>
1214 //   CHECK-DAG:   %[[EMPTY0:.+]] = tensor.empty(%[[ARG0]], %[[ARG1]])
1215 //   CHECK-DAG:   %[[EMPTY1:.+]] = tensor.empty(%[[ARG2]], %[[ARG3]])
1216 //       CHECK:   %[[CONCAT:.+]] = tensor.concat dim(1) %[[EMPTY0]], %[[EMPTY1]]
1240 func.func @recursive_effect(%arg : tensor<1xf32>) {
1241   %init = arith.constant dense<0.0> : tensor<1xf32>
1242   %mapped = linalg.map ins(%arg:tensor<1xf32>) outs(%init :tensor<1xf32>)