Lines Matching full:tensor

30 func.func @dce_zero_memref(%arg0 : memref<0xf32>, %arg1: tensor<0xf32>) -> tensor<0xf32> {
34 // tensor<0xf32> cannot be dce'ed
35 %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) {
38 } -> tensor<0xf32>
40 return %1: tensor<0xf32>
44 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<0xf32>
60 // CHECK-LABEL: func @tensor.cast(
61 func.func @tensor.cast(%a : tensor<3x4xf32>, %b : tensor<4x?xf32>, %c : tensor<3x?xf32>)
62 -> tensor<3x?xf32>
64 %ta = tensor.cast %a : tensor<3x4xf32> to tensor<?x?xf32>
65 %tb = tensor.cast %b : tensor<4x?xf32> to tensor<?x?xf32>
66 %tc = tensor.cast %c : tensor<3x?xf32> to tensor<?x?xf32>
68 // CHECK: linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>)
69 // CHECK-SAME: outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32>
70 %0 = linalg.matmul ins(%ta, %tb: tensor<?x?xf32>, tensor<?x?xf32>)
71 outs(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
73 %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<3x?xf32>
75 return %1: tensor<3x?xf32>
80 // CHECK-LABEL: func @tensor.cast.unranked(
81 func.func @tensor.cast.unranked(%a : tensor<*xf32>, %b : tensor<*xf32>, %c : tensor<*xf32>)
82 -> tensor<*xf32>
84 // CHECK: tensor.cast
85 // CHECK: tensor.cast
86 // CHECK: tensor.cast
87 %ta = tensor.cast %a : tensor<*xf32> to tensor<?x?xf32>
88 %tb = tensor.cast %b : tensor<*xf32> to tensor<?x?xf32>
89 %tc = tensor.cast %c : tensor<*xf32> to tensor<?x?xf32>
91 // CHECK: linalg.matmul ins({{.*}}tensor<?x?xf32>, tensor<?x?xf32>)
92 // CHECK-SAME: outs({{.*}}tensor<?x?xf32>) -> tensor<?x?xf32>
93 %0 = linalg.matmul ins(%ta, %tb: tensor<?x?xf32>, tensor<?x?xf32>)
94 outs(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
96 // CHECK: tensor.cast
97 %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<*xf32>
99 return %1: tensor<*xf32>
106 %a : tensor<?x?xf32>, %b : tensor<?x?xf32>, %c : tensor<?x?xf32>,
109 %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, tensor<?x?xf32>)
110 outs(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
121 func.func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
122 -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
126 %0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
127 %1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
128 %2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
129 %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
133 } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
134 outs(%3, %3 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
137 } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
138 return %4, %5 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
141 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
142 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
148 func.func @remove_no_op_mismatched_types(%arg0 : tensor<?x?x?xf32>)
149 -> tensor<1x2x3xf32> {
150 %out = tensor.empty() : tensor<1x2x3xf32>
154 } ins(%arg0 : tensor<?x?x?xf32>)
155 outs(%out : tensor<1x2x3xf32>) {
158 } -> (tensor<1x2x3xf32>)
159 return %g : tensor<1x2x3xf32>
162 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
163 // CHECK: %[[CAST:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<1x2x3xf32>
169 func.func @cant_fold_to_tensor_cast(%arg0 : f32) -> tensor<f32> {
170 %out = tensor.empty() : tensor<f32>
175 outs(%out : tensor<f32>) {
178 } -> (tensor<f32>)
179 return %g : tensor<f32>
187 func.func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
191 %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
192 %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
193 %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
199 ins(%arg0 : tensor<?x?xf32>) outs(%2 : tensor<?x?xf32>) {
202 } -> tensor<?x?xf32>
203 return %3 : tensor<?x?xf32>
212 func.func @keep_not_noop(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>)
213 -> (tensor<?x?xf32>, tensor<?x?xf32>) {
217 %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
218 %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
219 %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
226 ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
227 outs(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>) {
230 } -> (tensor<?x?xf32>, tensor<?x?xf32>)
231 return %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32>
252 // CHECK-NOT: tensor.pad
254 func.func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>,
255 %arg2: tensor<?x?xf32>, %high : index) {
259 %0 = linalg.fill ins(%c0_i32 : i32) outs(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32>
260 %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>)
261 outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32>
262 %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) {
265 } -> tensor<7x7xi32>
266 %3 = tensor.pad %arg2 low[%c0, %c0] high[%high, %high] {
268 tensor.yield %cst : f32
269 } : tensor<?x?xf32> to tensor<2x4xf32>
275 func.func @propagate_casts(%arg0 : tensor<?x?xf32>, %arg1 : f32, %arg2 : index,
276 %arg3 : index) -> tensor<?x?xf32> {
281 %0 = tensor.empty(%c21, %c42) : tensor<?x?xf32>
282 %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
283 %2 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
284 %3 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
285 %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
286 return %4 : tensor<?x?xf32>
289 // CHECK: %[[INIT:.+]] = tensor.empty
291 // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
292 // CHECK: %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
309 func.func @fold_fill_reshape() -> tensor<6x4xf32> {
311 %empty = tensor.empty() : tensor<1x2x3x4xf32>
312 // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
314 // CHECK-SAME: outs(%[[COLLAPSE]] : tensor<6x4xf32>)
315 %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
316 %reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]]
317 : tensor<1x2x3x4xf32> into tensor<6x4xf32>
318 // CHECK: return %[[FILL]] : tensor<6x4xf32>
319 return %reshape : tensor<6x4xf32>
325 // CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?x?x?xf32>
326 func.func @fold_fill_reshape_dynamic(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?xf32> {
328 // CHECK: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]]
329 %0 = linalg.fill ins(%zero : f32) outs(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
331 %1 = tensor.collapse_shape %0 [[0, 1, 2], [3, 4]]
332 : tensor<?x?x?x?x?xf32> into tensor<?x?xf32>
334 return %1 : tensor<?x?xf32>
344 %empty_dynamic = tensor.empty(%c1) : tensor<1x2x3x?xi1>
345 %filled = linalg.fill ins(%arg0 : i1) outs(%empty_dynamic : tensor<1x2x3x?xi1>) -> tensor<1x2x3x?xi1>
347 %extracted = tensor.extract %filled[%c0, %c0, %c0, %c0] : tensor<1x2x3x?xi1>
355 func.func @fill_pack() -> tensor<24x32x16x16xf32> {
356 %dest = tensor.empty() : tensor<384x512xf32>
358 %0 = tensor.empty() : tensor<24x32x16x16xf32>
359 %1 = linalg.fill ins(%cst : f32) outs(%dest : tensor<384x512xf32>) -> tensor<384x512xf32>
360 %pack = tensor.pack %1 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %0 : tensor<384x512xf32> -> tensor<24x32x16x16xf32>
361 return %pack : tensor<24x32x16x16xf32>
364 // CHECK: %[[PACKED_EMPTY:.+]] = tensor.empty() : tensor<24x32x16x16xf32>
370 func.func @fill_pack_general() -> tensor<1x1x8x4x4x8xi32>{
373 %9 = tensor.empty() : tensor<1x1x16x64xi32>
374 %extracted_slice_15 = tensor.extract_slice %9[0, 0, 0, 0] [1, 1, 16, 64] [1, 1, 1, 1] : tensor<1x1x16x64xi32> to tensor<1x1x16x64xi32>
375 %16 = linalg.fill ins(%c0_i32 : i32) outs(%extracted_slice_15 : tensor<1x1x16x64xi32>) -> tensor<1x1x16x64xi32>
376 %0 = bufferization.to_tensor %alloc restrict writable : memref<1x1x8x4x4x8xi32> to tensor<1x1x8x4x4x8xi32>
377 %pack_18 = tensor.pack %16 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %0 : tensor<1x1x16x64xi32> -> tensor<1x1x8x4x4x8xi32>
378 return %pack_18 : tensor<1x1x8x4x4x8xi32>
383 // CHECK: %[[TENSOR:.+]] = bufferization.to_tensor %[[ALLOC]]
384 // CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}) outs(%[[TENSOR]]
390 func.func @dynamic_fill_pack(%arg0: tensor<?x?xf32>) -> tensor<?x?x16x16xf32> {
394 %0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
395 %dim = tensor.dim %0, %c0 : tensor<?x?xf32>
396 %dim_0 = tensor.dim %0, %c1 : tensor<?x?xf32>
399 %3 = tensor.empty(%1, %2) : tensor<?x?x16x16xf32>
400 %pack = tensor.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %3 : tensor<?x?xf32> -> tensor<?x?x16x16xf32>
401 return %pack : tensor<?x?x16x16xf32>
408 // CHECK: %[[D0:.+]] = tensor.dim %[[DEST]], %[[C0]]
409 // CHECK: %[[D1:.+]] = tensor.dim %[[DEST]], %[[C1]]
412 // CHECK: %[[PACKED_EMPTY:.+]] = tensor.empty(%[[PACKED_D0]], %[[PACKED_D1]]) : tensor<?x?x16x16xf32>
436 // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<412x276xf32>
439 func.func @fold_static_pad_fill() -> tensor<412x276xf32> {
441 %empty = tensor.empty() : tensor<400x273xf32>
442 %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
443 %pad = tensor.pad %fill low[4, 1] high[8, 2] {
445 tensor.yield %f0 : f32
446 } : tensor<400x273xf32> to tensor<412x276xf32>
447 return %pad : tensor<412x276xf32>
458 // CHECK-SAME: %[[SRC:.+]]: tensor<8x?x16x32xf32>, %[[LOW0:.+]]: index, %[[LOW3:.+]]: index, %[[HIGH2:.+]]: index, %[[HIGH3:.+]]: index
463 // CHECK: %[[DIM1:.+]] = tensor.dim %[[SRC]], %[[I1]] : tensor<8x?x16x32xf32>
467 // CHECK: %[[INIT:.+]] = tensor.empty(%[[S0]], %[[S1]], %[[S2]], %[[S3]]) : tensor<?x?x?x?xf32>
470 func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor<?x?x?x?xf32> {
472 %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
473 %pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] {
475 tensor.yield %f0 : f32
476 } : tensor<8x?x16x32xf32> to tensor<?x?x?x?xf32>
477 return %pad : tensor<?x?x?x?xf32>
483 func.func @no_fold_pad_fill_value_mismatch() -> tensor<412x276xf32> {
486 %empty = tensor.empty() : tensor<400x273xf32>
487 %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
488 // CHECK: tensor.pad
489 %pad = tensor.pad %fill low[4, 1] high[8, 2] {
491 tensor.yield %f1 : f32
492 } : tensor<400x273xf32> to tensor<412x276xf32>
493 return %pad : tensor<412x276xf32>
504 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
505 func.func @static_input_without_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
509 %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
510 %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
511 %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
512 %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
516 } ins(%arg0, %arg1 : tensor<2x3x4xf32>, tensor<?x?x?xf32>)
517 outs(%3 : tensor<?x?x?xf32>) {
521 } -> (tensor<?x?x?xf32>)
522 %5 = tensor.cast %4 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
523 return %5 : tensor<2x3x4xf32>
524 // CHECK: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
526 // CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
527 // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
534 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
535 func.func @static_input_with_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
539 %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
540 %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
541 %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
542 %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
543 %4 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
547 } ins(%arg0, %4 : tensor<2x3x4xf32>, tensor<2x?x?xf32>)
548 outs(%3 : tensor<?x?x?xf32>) {
552 } -> (tensor<?x?x?xf32>)
553 %6 = tensor.cast %5 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
554 return %6: tensor<2x3x4xf32>
555 // CHECK: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
557 // CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
558 // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
565 // CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>, %[[ARG2:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
566 func.func @static_output_with_cast(%arg0 : tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
570 %0 = tensor.dim %arg2, %c0 : tensor<2x3x4xf32>
571 %1 = tensor.dim %arg2, %c1 : tensor<2x3x4xf32>
572 %2 = tensor.dim %arg2, %c2 : tensor<2x3x4xf32>
573 %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
574 %4 = tensor.cast %3 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
575 %5 = tensor.cast %arg1 : tensor<?x?x?xf32> to tensor<2x?x?xf32>
579 } ins(%arg0, %5 : tensor<?x?x?xf32>, tensor<2x?x?xf32>)
580 outs(%4 : tensor<2x3x4xf32>) {
584 } -> (tensor<2x3x4xf32>)
585 return %6: tensor<2x3x4xf32>
586 // CHECK: %[[CAST_ARG0:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
587 // CHECK-NEXT: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
589 // CHECK-SAME: ins(%[[CAST_ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
590 // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
595 // This test checks the folding of tensor.cast operation when the source value of cast
599 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
600 func.func @cast_source(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
604 %0 = tensor.dim %arg0, %c0 : tensor<2x3x4xf32>
605 %1 = tensor.dim %arg0, %c1 : tensor<2x3x4xf32>
606 %2 = tensor.dim %arg0, %c2 : tensor<2x3x4xf32>
607 %3 = tensor.empty(%0, %1, %2) : tensor<?x?x?xf32>
608 %4 = tensor.cast %arg0 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
609 %5 = tensor.cast %arg1 : tensor<2x3x4xf32> to tensor<2x?x?xf32>
613 } ins(%4, %5 : tensor<2x?x?xf32>, tensor<2x?x?xf32>)
614 outs(%3 : tensor<?x?x?xf32>) {
618 } -> (tensor<?x?x?xf32>)
619 %7 = tensor.cast %6 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
620 return %7: tensor<2x3x4xf32>
622 // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
623 // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
630 // CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<1x?x?xf32>,
631 func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2: index, %arg3: index, %arg4: index) -> tensor<?x?x?xf32> {
632 %0 = tensor.empty(%arg2, %arg3, %arg4) : tensor<?x?x?xf32>
633 %1 = tensor.cast %arg1 : tensor<1x?x?xf32> to tensor<?x?x?xf32>
637 } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<1x?x?xf32>)
638 outs(%0 : tensor<?x?x?xf32>) {
642 } -> tensor<?x?x?xf32>
643 return %2 : tensor<?x?x?xf32>
645 // CHECK-SAME: ins(%{{.*}}, %[[ARG1]] : tensor<1x?x?xf32>, tensor<1x?x?xf32>)
646 // CHECK-SAME: outs(%{{.*}} : tensor<1x?x?xf32>)
647 // CHECK: tensor.cast %[[GENERIC_OP]] : tensor<1x?x?xf32> to tensor<?x?x?xf32>
654 // CHECK-SAME: (%[[INPUT:.+]]: tensor<?x?x?xf32>, %[[LOW0:.+]]: index, %[[LOW1:.+]]: index, %{{.+}}: index, %{{.+}}: index)
659 // CHECK: %[[INIT:.+]] = tensor.empty()
662 // CHECK: %[[D0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?xf32>
663 // CHECK: %[[D1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x?xf32>
664 // CHECK: %[[D2:.+]] = tensor.dim %[[INPUT]], %[[C2]] : tensor<?x?x?xf32>
665 // CHECK: tensor.insert_slice %[[INPUT]] into %[[FILL]][%[[LOW0]], %[[OFFSET1]], 2] [%[[D0]], %[[D1]], %[[D2]]] [1, 1, 1]
666 func.func @insert_pad_into_fill(%input: tensor<?x?x?xf32>, %low0: index, %low1: index, %high1: index, %high2: index) -> tensor<8x384x384xf32> {
669 %pad = tensor.pad %input low[%low0, %low1, %c0] high[%c0, %high1, %high2] {
671 tensor.yield %f0 : f32
672 } : tensor<?x?x?xf32> to tensor<8x128x128xf32>
673 %empty = tensor.empty() : tensor<8x384x384xf32>
674 %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
675 %0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
676 return %0: tensor<8x384x384xf32>
682 // CHECK-SAME: (%[[INPUT:.+]]: tensor<7x123x124xf32>, %[[A:.+]]: tensor<8x128x128xf32>, %[[OFFSET:.+]]: index)
684 // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %[[A]] into %[[FILL]][%[[OFFSET]], 0, 0] [8, 128, 128] [1, 1, 1]
685 // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %[[A]] into %[[INSERT0]][0, 128, %[[OFFSET]]] [8, 128, 128] [1, 1, 1]
686 // CHECK: tensor.insert_slice %[[INPUT]] into %[[INSERT1]][1, 2, 256] [7, 123, 124] [1, 1, 1]
687 func.func @multi_insert_pad_into_fill(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
690 %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
692 tensor.yield %f0 : f32
693 } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
694 %empty = tensor.empty() : tensor<8x384x384xf32>
695 %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
696 %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
697 %1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
698 %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
699 return %2: tensor<8x384x384xf32>
705 func.func @multi_insert_pad_into_fill_overlap(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
708 // CHECK: tensor.pad
709 %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
711 tensor.yield %f0 : f32
712 } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
713 %empty = tensor.empty() : tensor<8x384x384xf32>
714 %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
715 %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
716 %1 = tensor.insert_slice %a into %0 [0, 0, 129] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
718 %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
719 return %2: tensor<8x384x384xf32>
725 func.func @multi_insert_pad_into_fill_overlap(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
728 // CHECK: tensor.pad
729 %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
731 tensor.yield %f0 : f32
732 } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
733 %empty = tensor.empty() : tensor<8x384x384xf32>
734 %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
735 %0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
736 %1 = tensor.insert_slice %a into %0 [0, 128, 255] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
738 %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
739 return %2: tensor<8x384x384xf32>
745 func.func @multi_insert_pad_into_fill(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
748 // CHECK-NOT: tensor.pad
749 %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
751 tensor.yield %f0 : f32
752 } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
753 %empty = tensor.empty() : tensor<8x384x384xf32>
754 %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
756 // CHECK-COUNT-3: tensor.insert_slice
757 %0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
758 %1 = tensor.insert_slice %a into %0 [0, 1, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
759 %2 = tensor.insert_slice %pad into %1 [0, 256, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
760 return %2: tensor<8x384x384xf32>
766 func.func @multi_insert_pad_into_fill_mismatch(%input: tensor<7x123x124xf32>, %a: tensor<8x128x128xf32>, %offset: index) -> tensor<8x384x384xf32> {
770 // CHECK: tensor.pad
771 %pad = tensor.pad %input low[1, 2, 0] high[0, 3, 4] {
773 tensor.yield %f0 : f32
774 } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
775 %empty = tensor.empty() : tensor<8x384x384xf32>
777 %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
778 %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
779 %1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
780 %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
781 return %2: tensor<8x384x384xf32>
786 func.func @fold_linalgop_with_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
787 %arg2 : tensor<?x?xf32>) -> (tensor<4x8xf32>, tensor<?x?xf32>) {
788 %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
789 outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
790 %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
791 return %1, %0 : tensor<4x8xf32>, tensor<?x?xf32>
794 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
795 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>
796 // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
797 // CHECK-DAG: %[[LHS_CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?xf32> to tensor<4x?xf32>
798 // CHECK-DAG: %[[RHS_CAST:.+]] = tensor.cast %[[ARG1]] : tensor<?x?xf32> to tensor<?x8xf32>
799 // CHECK-DAG: %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?xf32> to tensor<4x8xf32>
803 // CHECK: %[[RESULT_CAST:.+]] = tensor.cast %[[MATMUL]]
808 func.func private @some_use(%0 : tensor<4x8xf32>)
810 func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
811 %arg2 : tensor<?x?xf32>, %arg3 : i1) -> tensor<?x?xf32> {
812 %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
813 outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
815 %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
816 func.call @some_use(%1) : (tensor<4x8xf32>) -> ()
818 return %0 : tensor<?x?xf32>
823 // CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x?xf32>, %[[ARG2:.*]]: tensor<?x?xf32>, %[[ARG3:.*]]: i1)
824 // CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
825 // CHECK-SAME: outs(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
827 // CHECK: %[[CAST:.*]] = tensor.cast %[[RES]] : tensor<?x?xf32> to tensor<4x8xf32>
828 // CHECK: func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> ()
830 // CHECK: return %[[RES]] : tensor<?x?xf32>
835 func.func @fold_conv_op_with_cast_consumer(%arg0 : tensor<?x?x?x?xf32>,
836 %arg1 : tensor<?x?x?x?xf32>, %arg2 : tensor<?x?x?x?xf32>) ->
837 (tensor<4x8x12x16xf32>, tensor<?x?x?x?xf32>) {
838 %0 = linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
839 outs(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
840 %1 = tensor.cast %0 : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
841 return %1, %0 : tensor<4x8x12x16xf32>, tensor<?x?x?x?xf32>
844 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
845 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>
846 // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32>)
847 // CHECK: %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
851 // CHECK: %[[RESULT_CAST:.+]] = tensor.cast %[[CONV]]
856 func.func @fold_multi_use_generic_op_with_consumer(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<2x3x4xf32>) {
860 %d0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
861 %d1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
862 %d2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
863 %empty1 = tensor.empty(%d1, %d2, %d0) : tensor<?x?x?xf32>
864 %empty2 = tensor.empty(%d2, %d1, %d0) : tensor<?x?x?xf32>
870 ins(%arg0 : tensor<?x?x?xf32>) outs(%empty1, %empty2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
873 } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
874 %1 = tensor.cast %0#1 : tensor<?x?x?xf32> to tensor<2x3x4xf32>
875 return %0#0, %1 : tensor<?x?x?xf32>, tensor<2x3x4xf32>
878 // CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>
879 // CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<2x3x4xf32>
880 // CHECK-DAG: %[[CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<4x3x2xf32>
881 // CHECK-DAG: %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32>
885 // CHECK: %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 : tensor<3x2x4xf32> to tensor<?x?x?xf32>
915 func.func @erase_non_identity_noop(%arg0 : tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
919 } ins(%arg0 : tensor<?x?xf32>)
920 outs(%arg1 : tensor<?x?xf32>) {
923 } -> tensor<?x?xf32>
924 return %0 : tensor<?x?xf32>
929 // CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x?xf32>)
930 // CHECK: return %[[ARG0]] : tensor<?x?xf32>
937 func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: tensor<4xf32>) {
942 ins(%1, %1 : tensor<4xf32>, tensor<4xf32>)
943 outs(%0 : tensor<4xf32>) {
946 } -> tensor<4xf32>
949 outs(%36 : tensor<4xf32>) {
952 } -> tensor<4xf32>
959 func.func @dead_softmax(%arg0: tensor<16x64x256xf32>) -> tensor<16x64x256xf32> {
960 %0 = tensor.empty() : tensor<16x64x256xf32>
963 ins(%arg0 : tensor<16x64x256xf32>) outs(%0 : tensor<16x64x256xf32>) -> tensor<16x64x256xf32>
964 return %arg0 : tensor<16x64x256xf32>
970 // CHECK: tensor.dim
971 // CHECK: tensor.dim
972 // CHECK-NOT: tensor.dim
974 func.func @canonicalize_dim_of_dest_style_op(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
977 %dim0_0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
978 %dim1_0 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
979 %0 = tensor.empty(%dim0_0, %dim1_0) : tensor<?x?xf32>
980 %1 = linalg.copy ins(%arg0 : tensor<?x?xf32>) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
981 %dim0_1 = tensor.dim %1, %c0 : tensor<?x?xf32>
982 %dim1_1 = tensor.dim %1, %c1 : tensor<?x?xf32>
983 %2 = tensor.empty(%dim0_1, %dim1_1) : tensor<?x?xf32>
984 %3 = linalg.copy ins(%1 : tensor<?x?xf32>) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
985 return %3: tensor<?x?xf32>
990 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
991 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
993 // CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[ARG1]] : tensor<?x?xf32>)
994 func.func @canonicalize_fill_to_copy_input(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
996 %fill = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
997 %copy = linalg.copy ins(%fill : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) -> tensor<?x?xf32>
998 return %copy : tensor<?x?xf32>
1004 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
1005 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
1006 // CHECK: linalg.copy ins(%[[ARG1]] : tensor<?x?xf32>) outs(%[[ARG0]] : tensor<?x?xf32>)
1007 func.func @canonicalize_fill_to_copy_dest(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
1009 %fill = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
1010 %copy = linalg.copy ins(%arg1 : tensor<?x?xf32>) outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
1011 return %copy : tensor<?x?xf32>
1017 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
1018 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>)
1020 // CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[ARG1]] : tensor<?x?xf32>)
1021 func.func @canonicalize_fill_to_transpose_input(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) -> tensor<?x?xf32> {
1023 %fill = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
1024 %transpose = linalg.transpose ins(%fill : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) permutation = [1, 0]
1025 return %transpose : tensor<?x?xf32>
1031 // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<2x3xf32>
1032 // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<2x3xf32>)
1034 // CHECK: return %[[ARG0]] : tensor<2x3xf32>
1035 func.func @broadcast_same_shape(%input: tensor<2x3xf32>, %init: tensor<2x3xf32>) -> tensor<2x3xf32> {
1036 %0 = linalg.broadcast ins(%input: tensor<2x3xf32>) outs(%init: tensor<2x3xf32>) dimensions = []
1037 return %0 : tensor<2x3xf32>
1042 func.func @transpose_1d(%input: tensor<16xf32>,
1043 %init: tensor<16xf32>) -> tensor<16xf32> {
1045 ins(%input:tensor<16xf32>)
1046 outs(%init:tensor<16xf32>)
1048 func.return %transpose : tensor<16xf32>
1052 // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<16xf32>,
1053 // CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: tensor<16xf32>)
1055 // CHECK: return %[[INPUT]] : tensor<16xf32>
1059 func.func @transpose_identity_perm(%input: tensor<16x32x64xf32>,
1060 %init: tensor<16x32x64xf32>) -> tensor<16x32x64xf32> {
1062 ins(%input:tensor<16x32x64xf32>)
1063 outs(%init:tensor<16x32x64xf32>)
1065 func.return %transpose : tensor<16x32x64xf32>
1069 // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<16x32x64xf32>,
1070 // CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: tensor<16x32x64xf32>)
1072 // CHECK: return %[[INPUT]] : tensor<16x32x64xf32>
1076 func.func @transpose_transpose_cancel(%input: tensor<5x4x3xf32>,
1077 %init1: tensor<4x3x5xf32>,
1078 %init2: tensor<5x4x3xf32>) -> tensor<5x4x3xf32> {
1080 // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<5x4x3xf32>
1081 // CHECK-SAME: %[[INIT1:[a-zA-Z0-9]+]]: tensor<4x3x5xf32>
1082 // CHECK-SAME: %[[INIT2:[a-zA-Z0-9]+]]: tensor<5x4x3xf32>
1084 // CHECK: return %[[INPUT]] : tensor<5x4x3xf32>
1086 ins(%input:tensor<5x4x3xf32>)
1087 outs(%init1:tensor<4x3x5xf32>)
1090 ins(%transpose1:tensor<4x3x5xf32>)
1091 outs(%init2:tensor<5x4x3xf32>)
1093 func.return %transpose2 : tensor<5x4x3xf32>
1098 func.func @transpose_transpose_fold(%input: tensor<5x4x3xf32>,
1099 %init1: tensor<4x3x5xf32>,
1100 %init2: tensor<3x4x5xf32>) -> tensor<3x4x5xf32> {
1102 // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<5x4x3xf32>
1103 // CHECK-SAME: %[[INIT1:[a-zA-Z0-9]+]]: tensor<4x3x5xf32>
1104 // CHECK-SAME: %[[INIT2:[a-zA-Z0-9]+]]: tensor<3x4x5xf32>
1105 // CHECK: %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[INPUT]] : tensor<5x4x3xf32>) outs(%[[INIT2]] : tensor<3x4x5xf32>) permutation = [2, 1, 0]
1107 // CHECK: return %[[TRANSPOSE]] : tensor<3x4x5xf32>
1109 ins(%input:tensor<5x4x3xf32>)
1110 outs(%init1:tensor<4x3x5xf32>)
1113 ins(%transpose1:tensor<4x3x5xf32>)
1114 outs(%init2:tensor<3x4x5xf32>)
1116 func.return %transpose2 : tensor<3x4x5xf32>
1121 func.func @broadcast_transpose_fold(%input: tensor<2x4x5xf32>,
1122 %init1: tensor<1x2x3x4x5x6xf32>,
1123 %init2: tensor<1x6x2x3x5x4xf32>) -> tensor<1x6x2x3x5x4xf32> {
1125 // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<2x4x5xf32>
1126 // CHECK-SAME: %[[INIT1:[a-zA-Z0-9]+]]: tensor<1x2x3x4x5x6xf32>
1127 // CHECK-SAME: %[[INIT2:[a-zA-Z0-9]+]]: tensor<1x6x2x3x5x4xf32>
1128 // CHECK: %[[TMP_INIT:.+]] = tensor.empty() : tensor<2x5x4xf32>
1129 // CHECK: %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[INPUT]] : tensor<2x4x5xf32>) outs(%[[TMP_INIT]] : tensor<2x5x4xf32>) permutation = [0, 2, 1]
1130 // CHECK: %[[BROADCAST:.+]] = linalg.broadcast ins(%[[TRANSPOSE]] : tensor<2x5x4xf32>) outs(%[[INIT2]] : tensor<1x6x2x3x5x4xf32>) dimensions = [0, 3, 1]
1131 // CHECK: return %[[BROADCAST]] : tensor<1x6x2x3x5x4xf32>
1133 ins(%input : tensor<2x4x5xf32>)
1134 outs(%init1 : tensor<1x2x3x4x5x6xf32>)
1137 ins(%broadcast : tensor<1x2x3x4x5x6xf32>)
1138 outs(%init2 : tensor<1x6x2x3x5x4xf32>)
1140 func.return %transpose : tensor<1x6x2x3x5x4xf32>
1145 func.func @broadcast_transpose_fold_dynamic(%input: tensor<?x?x5xf32>,
1146 %init1: tensor<1x?x3x?x5x6xf32>,
1147 %init2: tensor<1x3x?x6x5x?xf32>) -> tensor<1x3x?x6x5x?xf32> {
1149 // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<?x?x5xf32>
1150 // CHECK-SAME: %[[INIT1:[a-zA-Z0-9]+]]: tensor<1x?x3x?x5x6xf32>
1151 // CHECK-SAME: %[[INIT2:[a-zA-Z0-9]+]]: tensor<1x3x?x6x5x?xf32>
1154 // CHECK: %[[DIM0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x5xf32>
1155 // CHECK: %[[DIM1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x5xf32>
1156 // CHECK: %[[TMP_INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]]) : tensor<?x5x?xf32>
1157 // CHECK: %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[INPUT]] : tensor<?x?x5xf32>) outs(%[[TMP_INIT]] : tensor<?x5x?xf32>) permutation = [1, 2, 0]
1158 // CHECK: %[[BROADCAST:.+]] = linalg.broadcast ins(%[[TRANSPOSE]] : tensor<?x5x?xf32>) outs(%[[INIT2]] : tensor<1x3x?x6x5x?xf32>) dimensions = [0, 1, 3]
1159 // CHECK: return %[[BROADCAST]] : tensor<1x3x?x6x5x?xf32>
1161 ins(%input : tensor<?x?x5xf32>)
1162 outs(%init1 : tensor<1x?x3x?x5x6xf32>)
1165 ins(%broadcast : tensor<1x?x3x?x5x6xf32>)
1166 outs(%init2 : tensor<1x3x?x6x5x?xf32>)
1168 func.return %transpose : tensor<1x3x?x6x5x?xf32>
1173 func.func @broadcast_transpose_fold_2dim(%input: tensor<2xf32>,
1174 %init1: tensor<2x4xf32>,
1175 %init2: tensor<4x2xf32>) -> tensor<4x2xf32> {
1177 // CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<2xf32>
1178 // CHECK-SAME: %[[INIT1:[a-zA-Z0-9]+]]: tensor<2x4xf32>
1179 // CHECK-SAME: %[[INIT2:[a-zA-Z0-9]+]]: tensor<4x2xf32>
1180 // CHECK: %[[BROADCAST:.+]] = linalg.broadcast ins(%[[INPUT]] : tensor<2xf32>) outs(%[[INIT2]] : tensor<4x2xf32>) dimensions = [0]
1181 // CHECK: return %[[BROADCAST]] : tensor<4x2xf32>
1183 ins(%input : tensor<2xf32>)
1184 outs(%init1 : tensor<2x4xf32>)
1187 ins(%broadcast : tensor<2x4xf32>)
1188 outs(%init2 : tensor<4x2xf32>)
1190 func.return %transpose : tensor<4x2xf32>
1197 -> tensor<5x?x?xf32>
1201 %0 = tensor.empty(%arg0, %arg1) : tensor<5x?x?xf32>
1202 %1 = linalg.fill ins(%cst0 : f32) outs(%0 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32>
1203 %2 = tensor.empty(%arg2, %arg3) : tensor<5x?x?xf32>
1204 %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor<5x?x?xf32>) -> tensor<5x?x?xf32>
1205 %4 = tensor.concat dim(1) %1, %3 : (tensor<5x?x?xf32>, tensor<5x?x?xf32>) -> tensor<5x?x?xf32>
1206 return %4 : tensor<5x?x?xf32>
1214 // CHECK-DAG: %[[EMPTY0:.+]] = tensor.empty(%[[ARG0]], %[[ARG1]])
1215 // CHECK-DAG: %[[EMPTY1:.+]] = tensor.empty(%[[ARG2]], %[[ARG3]])
1216 // CHECK: %[[CONCAT:.+]] = tensor.concat dim(1) %[[EMPTY0]], %[[EMPTY1]]
1240 func.func @recursive_effect(%arg : tensor<1xf32>) {
1241 %init = arith.constant dense<0.0> : tensor<1xf32>
1242 %mapped = linalg.map ins(%arg:tensor<1xf32>) outs(%init :tensor<1xf32>)