1// RUN: mlir-opt %s -transform-interpreter -canonicalize -cse -split-input-file | FileCheck %s 2 3// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> 4// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> 5// CHECK: func @dynamic_pad_tensor_3_4( 6// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32> 7// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 8// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 9// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 10// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 11// CHECK-DAG: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] 12// CHECK-DAG: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] 13// CHECK-DAG: %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]] 14// CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] 15// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]] 16// CHECK: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = 17// CHECK: %[[SWAP_RESULT:.*]] = scf.if 18// CHECK: tensor.generate 19// CHECK: else 20// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] 21// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] 22// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] 23// CHECK: return %[[RESULT]] 24 25func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor<?x?xf32>, 26 %pad_value: f32) -> tensor<?x?xf32> { 27 %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { 28 ^bb0(%arg1: index, %arg2: index): 29 tensor.yield %pad_value : f32 30 } : tensor<?x?xf32> to tensor<?x?xf32> 31 return %0 : tensor<?x?xf32> 32} 33 34module attributes {transform.with_named_sequence} { 35 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 36 %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op 37 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 38 transform.yield 39 } 40} 41 42// ----- 43 44// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)> 45// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> 46// CHECK: func @dynamic_pad_tensor_0_3( 47// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32> 48// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 49// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 50// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 51// CHECK-DAG: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] 52// CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]] 53// CHECK-DAG: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] 54// CHECK-DAG: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]] 55// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = 56// CHECK: %[[SWAP_RESULT:.*]] = scf.if 57// CHECK: tensor.generate 58// CHECK: else 59// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] 60// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] 61// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] 62// CHECK: return %[[RESULT]] 63 64func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor<?x?xf32>, 65 %pad_value: f32) -> tensor<?x?xf32> { 66 %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { 67 ^bb0(%arg1: index, %arg2: index): 68 tensor.yield %pad_value : f32 69 } : tensor<?x?xf32> to tensor<?x?xf32> 70 return %0 : tensor<?x?xf32> 71} 72 73module attributes {transform.with_named_sequence} { 74 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 75 %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op 76 %1, %loop = transform.structured.tile_using_for %0 tile_sizes [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) 77 transform.yield 78 } 79} 80 81// ----- 82 83// CHECK-LABEL: func @static_pad_tensor_3_4( 84// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32> 85// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 86// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 87// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 88// CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index 89// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index 90// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] 91// CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = 92// CHECK: %[[SWAP_RESULT:.*]] = scf.if 93// CHECK: tensor.generate 94// CHECK: else 95// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] 96// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] 97// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] 98// CHECK: return %[[RESULT]] 99 100func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>, 101 %pad_value: f32) -> tensor<15x16xf32> { 102 %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { 103 ^bb0(%arg1: index, %arg2: index): 104 tensor.yield %pad_value : f32 105 } : tensor<7x9xf32> to tensor<15x16xf32> 106 return %0 : tensor<15x16xf32> 107} 108 109module attributes {transform.with_named_sequence} { 110 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 111 %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op 112 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 113 transform.yield 114 } 115} 116 117// ----- 118 119// CHECK-LABEL: func @fuse_static_pad_tensor_3_4( 120// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32> 121// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 122// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 123// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 124// CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index 125// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index 126// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] 127// CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = 128// CHECK: %[[SWAP_RESULT:.*]] = scf.if 129// CHECK: tensor.generate 130// CHECK: else 131// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] 132// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] 133// CHECK: %[[COPY:.*]] = linalg.copy ins(%[[SWAP_RESULT:.*]] 134// CHECK: tensor.insert_slice %[[COPY]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] 135// CHECK: return %[[RESULT]] 136 137func.func @fuse_static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>, 138 %pad_value: f32) -> tensor<15x16xf32> { 139 %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { 140 ^bb0(%arg1: index, %arg2: index): 141 tensor.yield %pad_value : f32 142 } : tensor<7x9xf32> to tensor<15x16xf32> 143 %empty = tensor.empty() : tensor<15x16xf32> 144 %1 = linalg.copy ins(%0 : tensor<15x16xf32>) outs(%empty : tensor<15x16xf32>) -> tensor<15x16xf32> 145 return %1 : tensor<15x16xf32> 146} 147 148module attributes {transform.with_named_sequence} { 149 transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) { 150 %copy = transform.structured.match ops{["linalg.copy"]} in %arg1 151 : (!transform.any_op) -> !transform.any_op 152 %a, %b, %c = transform.structured.fuse %copy [2, 3] 153 : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 154 transform.yield 155 } 156} 157 158// ----- 159 160// CHECK-LABEL: func @static_pad_tensor_0_3( 161// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32> 162// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 163// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 164// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index 165// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = 166// CHECK: %[[SWAP_RESULT:.*]] = scf.if 167// CHECK: tensor.generate 168// CHECK: else 169// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] 170// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] 171// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] 172// CHECK: return %[[RESULT]] 173 174func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>, 175 %pad_value: f32) -> tensor<15x16xf32> { 176 %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { 177 ^bb0(%arg1: index, %arg2: index): 178 tensor.yield %pad_value : f32 179 } : tensor<7x9xf32> to tensor<15x16xf32> 180 return %0 : tensor<15x16xf32> 181} 182 183module attributes {transform.with_named_sequence} { 184 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 185 %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op 186 %1, %loop = transform.structured.tile_using_for %0 tile_sizes [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) 187 transform.yield 188 } 189} 190 191// ----- 192 193// CHECK-LABEL: func @static_pad_tile_evenly_0_3( 194// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32> 195// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 196// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 197// CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index 198// CHECK: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = 199// CHECK: %[[R2:.*]] = scf.if 200// CHECK: %[[GEN:.*]] = tensor.generate 201// CHECK: scf.yield %[[GEN]] : tensor<14x3xf32> 202// CHECK: else 203// CHECK: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> 204// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] 205// CHECK: scf.yield %[[PAD]] : tensor<14x3xf32> 206// CHECK: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> 207// CHECK: scf.yield %[[R3]] : tensor<14x15xf32> 208// CHECK: return %[[RESULT]] : tensor<14x15xf32> 209 210func.func @static_pad_tile_evenly_0_3(%input_tensor: tensor<7x9xf32>, 211 %output_tensor: tensor<14x15xf32>, 212 %pad_value: f32) -> tensor<14x15xf32> { 213 %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] { 214 ^bb0(%arg1: index, %arg2: index): 215 tensor.yield %pad_value : f32 216 } : tensor<7x9xf32> to tensor<14x15xf32> 217 return %0 : tensor<14x15xf32> 218} 219 220module attributes {transform.with_named_sequence} { 221 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 222 %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op 223 %1, %loop = transform.structured.tile_using_for %0 tile_sizes [0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) 224 transform.yield 225 } 226} 227 228// ----- 229 230// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)> 231// CHECK: func.func @NC_to_NCnc 232// CHECK-SAME: %[[IN:.*]]: tensor<128x256xf32>, 233// CHECK-SAME: %[[OUT:.*]]: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> { 234// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 235// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 236// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 237// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 238// CHECK: %[[RES0:.*]] = scf.for %[[N:.*]] = %[[C0]] to %[[C4]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<4x8x32x32xf32>) { 239// CHECK: %[[RES1:.+]] = scf.for %[[C:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<4x8x32x32xf32>) { 240// CHECK-DAG: %[[IN_N:.+]] = affine.apply #[[MAP0]](%[[N]]) 241// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]]) 242// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_N]], %[[IN_C]]] [64, 128] [1, 1] : tensor<128x256xf32> to tensor<64x128xf32> 243// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[N]], %[[C]], 0, 0] [2, 4, 32, 32] [1, 1, 1, 1] : tensor<4x8x32x32xf32> to tensor<2x4x32x32xf32> 244// CHECK: %[[SUB_RES:.*]] = tensor.pack 245// CHECK-SAME: %[[SUB_IN]] inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %[[SUB_OUT]] 246// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] 247// CHECK: scf.yield %[[INSERT]] : tensor<4x8x32x32xf32> 248// CHECK: } 249// CHECK: scf.yield %[[RES1:.*]] : tensor<4x8x32x32xf32> 250// CHECK: } 251// CHECK: return %[[RES0:.*]] : tensor<4x8x32x32xf32> 252// CHECK: } 253func.func @NC_to_NCnc(%arg0: tensor<128x256xf32>, %arg1: tensor<4x8x32x32xf32>) -> tensor<4x8x32x32xf32> { 254 %0 = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [32, 32] into %arg1 : tensor<128x256xf32> -> tensor<4x8x32x32xf32> 255 return %0 : tensor<4x8x32x32xf32> 256} 257 258module attributes {transform.with_named_sequence} { 259 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 260 %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 261 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 262 transform.yield 263 } 264} 265 266// ----- 267 268// CHECK: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 8)> 269// CHECK: func.func @KC_to_CKkc 270// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: 271// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: 272// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 273// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index 274// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index 275// CHECK: scf.for %[[C:.+]] = %[[C0]] to %[[C32]] step %[[C2]] 276// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP0]](%[[C]]) 277// CHECK: %[[INPUT_SLICE:.+]] = tensor.extract_slice %[[IN]] 278// CHECK-SAME: [0, %[[IN_C]]] [128, 16] 279// CHECK: %[[OUTPUT_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[C]], 0, 0, 0] [2, 4, 32, 8] 280// CHECK: tensor.pack 281// CHECK-SAME: %[[INPUT_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] 282// CHECK-SAME: into %[[OUTPUT_SLICE]] 283func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> { 284 %0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32> 285 return %0 : tensor<32x4x32x8xf32> 286} 287 288module attributes {transform.with_named_sequence} { 289 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 290 %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 291 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 292 transform.yield 293 } 294} 295 296// ----- 297 298// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 2)> 299// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -2 + 15, 8)> 300// CHECK: func.func @pad_and_pack_static( 301// CHECK-SAME: %[[IN:.*]]: tensor<13x15xf32>, 302// CHECK-SAME: %[[OUT:.*]]: tensor<2x8x8x2xf32>, 303// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<2x8x8x2xf32> { 304// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 305// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 306// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 307// CHECK-DAG: %[[RES0:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[C8]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[OUT]]) -> (tensor<2x8x8x2xf32>) { 308// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP0]](%[[J]]) 309// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]]) 310// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][0, %[[IN_J]]] [13, %[[IN_J_SZ]]] [1, 1] 311// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][0, %[[J]], 0, 0] [2, 4, 8, 2] [1, 1, 1, 1] 312// CHECK: %[[SUB_RES:.*]] = tensor.pack 313// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] 314// CHECK-SAME: into %[[SUB_OUT]] 315// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] 316// CHECK: scf.yield %[[INSERT]] : tensor<2x8x8x2xf32> 317// CHECK: } 318// CHECK: return %[[RES0:.*]] : tensor<2x8x8x2xf32> 319// CHECK: } 320func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> { 321 %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32> 322 return %0 : tensor<2x8x8x2xf32> 323} 324 325module attributes {transform.with_named_sequence} { 326 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 327 %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 328 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 329 transform.yield 330 } 331} 332 333// ----- 334 335// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> 336// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> 337// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)> 338// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -8 + s0, d0 * 8)> 339// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 * 2)> 340// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0, d1)[s0] -> (d1 * -2 + s0, d0 * 2)> 341// CHECK: func.func @pad_and_pack_partially_dynamic( 342// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32>, 343// CHECK-SAME: %[[OUT:.*]]: tensor<?x?x8x2xf32>, 344// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<?x?x8x2xf32> { 345// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 346// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 347// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 348// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 349// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x8x2xf32> 350// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x8x2xf32> 351// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x8x2xf32>) { 352// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x8x2xf32>) { 353// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]] 354// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]] 355// CHECK-DAG: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]]) 356// CHECK-DAG: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]] 357// CHECK-DAG: %[[IN_J:.*]] = affine.apply #[[MAP4]](%[[J]]) 358// CHECK-DAG: %[[IN_J_SZ:.*]] = affine.min #[[MAP5]] 359// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 360// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], 8, 2] [1, 1, 1, 1] : tensor<?x?x8x2xf32> to tensor<?x?x8x2xf32> 361// CHECK: %[[SUB_RES:.*]] = tensor.pack 362// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] 363// CHECK-SAME: into %[[SUB_OUT]] 364// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[SUB_RES]] into %[[ITER1]] 365// CHECK: scf.yield %[[INSERT]] : tensor<?x?x8x2xf32> 366// CHECK: } 367// CHECK: scf.yield %[[RES1:.*]] : tensor<?x?x8x2xf32> 368// CHECK: } 369// CHECK: return %[[VAL_34:.*]] : tensor<?x?x8x2xf32> 370// CHECK: } 371func.func @pad_and_pack_partially_dynamic(%input: tensor<?x?xf32>, %output: tensor<?x?x8x2xf32>, %pad: f32) -> tensor<?x?x8x2xf32> { 372 %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<?x?xf32> -> tensor<?x?x8x2xf32> 373 return %0 : tensor<?x?x8x2xf32> 374} 375 376module attributes {transform.with_named_sequence} { 377 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 378 %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 379 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 380 transform.yield 381 } 382} 383 384// ----- 385 386// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> 387// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> 388// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 * s0)> 389// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s0, -(d1 * s0) + s1)> 390// CHECK: func.func @pad_and_pack_fully_dynamic( 391// CHECK-SAME: %[[IN:.*]]: tensor<?x?xf32>, 392// CHECK-SAME: %[[OUT:.*]]: tensor<?x?x?x?xf32>, 393// CHECK-SAME: %[[PAD:.*]]: f32, 394// CHECK-SAME: %[[TILE_0:.*]]: index, 395// CHECK-SAME: %[[TILE_1:.*]]: index) -> tensor<?x?x?x?xf32> { 396// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 397// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 398// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 399// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 400// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 401// CHECK-DAG: %[[OUT_D0:.*]] = tensor.dim %[[OUT]], %[[C0]] : tensor<?x?x?x?xf32> 402// CHECK-DAG: %[[OUT_D1:.*]] = tensor.dim %[[OUT]], %[[C1]] : tensor<?x?x?x?xf32> 403// CHECK: %[[RES0:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[OUT_D0]] step %[[C2]] iter_args(%[[ITER0:.*]] = %[[OUT]]) -> (tensor<?x?x?x?xf32>) { 404// CHECK: %[[RES1:.*]] = scf.for %[[J:.*]] = %[[C0]] to %[[OUT_D1]] step %[[C4]] iter_args(%[[ITER1:.*]] = %[[ITER0]]) -> (tensor<?x?x?x?xf32>) { 405// CHECK-DAG: %[[OUT_I_SZ:.*]] = affine.min #[[MAP0]](%[[I]])[%[[OUT_D0]]] 406// CHECK-DAG: %[[OUT_J_SZ:.*]] = affine.min #[[MAP1]](%[[J]])[%[[OUT_D1]]] 407// CHECK-DAG: %[[IN_D0:.*]] = tensor.dim %[[IN]], %[[C0]] 408// CHECK-DAG: %[[IN_D1:.*]] = tensor.dim %[[IN]], %[[C1]] 409// CHECK: %[[IN_I:.*]] = affine.apply #[[MAP2]](%[[I]])[%[[TILE_0]]] 410// CHECK: %[[IN_I_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_I_SZ]], %[[I]])[%[[TILE_0]], %[[IN_D0]]] 411// CHECK: %[[IN_J:.*]] = affine.apply #[[MAP2]](%[[J]])[%[[TILE_1]]] 412// CHECK: %[[IN_J_SZ:.*]] = affine.min #[[MAP3]](%[[OUT_J_SZ]], %[[J]])[%[[TILE_1]], %[[IN_D1]]] 413// CHECK: %[[SUB_IN:.*]] = tensor.extract_slice %[[IN]][%[[IN_I]], %[[IN_J]]] [%[[IN_I_SZ]], %[[IN_J_SZ]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 414// CHECK: %[[OUT_D2:.+]] = tensor.dim %[[ITER1]], %[[C2]] 415// CHECK: %[[OUT_D3:.+]] = tensor.dim %[[ITER1]], %[[C3]] 416// CHECK: %[[SUB_OUT:.*]] = tensor.extract_slice %[[ITER1]][%[[I]], %[[J]], 0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]], %[[OUT_D2]], %[[OUT_D3]]] [1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32> 417// CHECK: %[[PACK:.*]] = tensor.pack 418// CHECK-SAME: %[[SUB_IN]] padding_value(%[[PAD]] : f32) inner_dims_pos = [0, 1] inner_tiles = [%[[TILE_0]], %[[TILE_1]]] 419// CHECK-SAME: into %[[SUB_OUT]] 420// CHECK: %[[INSERT:.*]] = tensor.insert_slice %[[PACK]] into %[[ITER1]] 421// CHECK: scf.yield %[[INSERT]] : tensor<?x?x?x?xf32> 422// CHECK: } 423// CHECK: scf.yield %[[RES1:.*]] : tensor<?x?x?x?xf32> 424// CHECK: } 425// CHECK: return %[[RES0:.*]] : tensor<?x?x?x?xf32> 426// CHECK: } 427func.func @pad_and_pack_fully_dynamic(%source: tensor<?x?xf32>, %dest: tensor<?x?x?x?xf32>, %pad: f32, %tile_n : index, %tile_m : index) -> tensor<?x?x?x?xf32> { 428 %0 = tensor.pack %source padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32> 429 return %0 : tensor<?x?x?x?xf32> 430} 431 432module attributes {transform.with_named_sequence} { 433 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 434 %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 435 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 436 transform.yield 437 } 438} 439 440// ----- 441 442// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)> 443// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)> 444// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)> 445// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)> 446// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)> 447// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)> 448// CHECK: func.func @NCnc_to_NC 449// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: 450// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: 451// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 452// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 453// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 454// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index 455// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index 456// CHECK: %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]] 457// CHECK: %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]] 458// CHECK-DAG: %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]]) 459// CHECK-DAG: %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]]) 460// CHECK-DAG: %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]]) 461// CHECK-DAG: %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]]) 462// CHECK-DAG: %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]]) 463// CHECK-DAG: %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]]) 464// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[IN]] 465// CHECK-SAME: [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16] 466// CHECK-SAME: : tensor<8x8x32x16xf32> to tensor<?x?x32x16xf32> 467// CHECK: %[[EMPTY:.+]] = tensor.empty 468// CHECK: %[[UNPACK:.+]] = tensor.unpack 469// CHECK-SAME: %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16] 470// CHECK-SAME: into %[[EMPTY]] 471// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]] 472// CHECK-SAME: [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4] 473// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]] 474// CHECK-SAME: into %{{.+}}[%[[I]], %[[J]]] [2, 4] 475// CHECK: scf.yield %[[RES]] 476func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { 477 %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> 478 return %0 : tensor<256x128xf32> 479} 480 481module attributes {transform.with_named_sequence} { 482 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 483 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 484 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 485 transform.yield 486 } 487} 488 489// ----- 490 491// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)> 492// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)> 493// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)> 494// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 8)> 495// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 8)> 496// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 8 - d0 floordiv 8 + 1)> 497// CHECK: func.func @CKkc_to_KC 498// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: 499// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: 500// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 501// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 502// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 503// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index 504// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index 505// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C128]] step %[[C2]] 506// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C256]] step %[[C4]] 507// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]]) 508// CHECK-DAG: %[[OFFSET_K:.+]] = affine.apply #[[MAP1]](%[[K]]) 509// CHECK-DAG: %[[IN_K_SZ:.+]] = affine.apply #[[MAP2]](%[[K]]) 510// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP4]](%[[C]]) 511// CHECK-DAG: %[[OFFSET_C:.+]] = affine.apply #[[MAP5]](%[[C]]) 512// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP6]](%[[C]]) 513// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] 514// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], %[[IN_K_SZ]], 32, 8] 515// CHECK: %[[EMPTY:.+]] = tensor.empty 516// CHECK: %[[UNPACK:.+]] = tensor.unpack 517// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] 518// CHECK-SAME: into %[[EMPTY]] 519// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]] 520// CHECK-SAME: [%[[OFFSET_K]], %[[OFFSET_C]]] [2, 4] 521// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]] 522// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4] 523// CHECK: scf.yield %[[RES]] 524func.func @CKkc_to_KC(%source: tensor<32x4x32x8xf32>, %dest: tensor<128x256xf32>) -> tensor<128x256xf32> { 525 %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %dest : tensor<32x4x32x8xf32> -> tensor<128x256xf32> 526 return %0 : tensor<128x256xf32> 527} 528 529module attributes {transform.with_named_sequence} { 530 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 531 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 532 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 533 transform.yield 534 } 535} 536 537// ----- 538 539// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 2)> 540// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 4)> 541// CHECK: func.func @perfect_CKkc_to_KC 542// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: 543// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: 544// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 545// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 546// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 547// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 548// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index 549// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C2]] 550// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[C128]] step %[[C4]] 551// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]]) 552// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP1]](%[[C]]) 553// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] 554// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [1, 1, 2, 4] 555// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [2, 4] 556// CHECK: %[[UNPACK:.+]] = tensor.unpack 557// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] 558// CHECK-SAME: into %[[ITER_SLICE]] 559// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] 560// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [2, 4] 561// CHECK: scf.yield %[[RES]] 562func.func @perfect_CKkc_to_KC(%source: tensor<32x4x2x4xf32>, %dest: tensor<8x128xf32>) -> tensor<8x128xf32> { 563 %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 4] into %dest : tensor<32x4x2x4xf32> -> tensor<8x128xf32> 564 return %0 : tensor<8x128xf32> 565} 566 567module attributes {transform.with_named_sequence} { 568 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 569 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 570 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 571 transform.yield 572 } 573} 574 575// ----- 576 577// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> 578// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> 579// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 floordiv 2)> 580// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0) -> (d0 ceildiv 2)> 581// CHECK: func.func @dynamic_perfect_CKkc_to_KC 582// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]: 583// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: 584// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 585// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 586// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 587// CHECK-DAG: %[[DIM_0:.+]] = tensor.dim %[[OUT]], %[[C0]] 588// CHECK-DAG: %[[DIM_1:.+]] = tensor.dim %[[OUT]], %[[C1]] 589// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[DIM_0]] step %[[C2]] 590// CHECK: %{{.+}} = scf.for %[[C:.+]] = %[[C0]] to %[[DIM_1]] step %[[C4]] 591// CHECK-DAG: %[[OUT_K_SZ:.+]] = affine.min #[[MAP0]](%[[K]])[%[[DIM_0]]] 592// CHECK-DAG: %[[OUT_C_SZ:.+]] = affine.min #[[MAP1]](%[[C]])[%[[DIM_1]]] 593// CHECK-DAG: %[[IN_K:.+]] = affine.apply #[[MAP2]](%[[K]]) 594// CHECK-DAG: %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]]) 595// CHECK-DAG: %[[IN_C_SZ:.+]] = affine.apply #[[MAP3]](%[[OUT_C_SZ]]) 596// CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[IN]] 597// CHECK: [%[[IN_C]], %[[IN_K]], 0, 0] [%[[IN_C_SZ]], 1, 2, 2] 598// CHECK: %[[ITER_SLICE:.+]] = tensor.extract_slice %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]] 599// CHECK: %[[UNPACK:.+]] = tensor.unpack 600// CHECK-SAME: %[[IN_SLICE]] outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] 601// CHECK-SAME: into %[[ITER_SLICE]] 602// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] 603// CHECK-SAME: into %{{.+}}[%[[K]], %[[C]]] [%[[OUT_K_SZ]], %[[OUT_C_SZ]]] 604// CHECK: scf.yield %[[RES]] 605 606func.func @dynamic_perfect_CKkc_to_KC(%source: tensor<?x?x2x2xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> { 607 %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [2, 2] into %dest : tensor<?x?x2x2xf32> -> tensor<?x?xf32> 608 return %0 : tensor<?x?xf32> 609} 610 611module attributes {transform.with_named_sequence} { 612 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 613 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 614 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 615 transform.yield 616 } 617} 618 619// ----- 620 621// CHECK: #[[MAP:.+]] = affine_map<(d0) -> (d0 floordiv 2)> 622// CHECK: func.func @perfect_NKPQk_to_NPQK( 623// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x4x6x6x2xf32>, 624// CHECK-SAME: %{{.+}}: tensor<1x6x6x8xf32>) 625// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 626// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 627// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index 628// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 629// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 630// CHECK: %{{.+}} = scf.for %[[P:.+]] = %[[C0]] to %[[C6]] step %[[C1]] 631// CHECK: %{{.+}} = scf.for %[[Q:.+]] = %[[C0]] to %[[C6]] step %[[C1]] 632// CHECK: %{{.+}} = scf.for %[[K:.+]] = %[[C0]] to %[[C8]] step %[[C4]] 633// CHECK: %[[K_SZ:.+]] = affine.apply #[[MAP]](%[[K]]) 634// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[K_SZ]], %[[P]], %[[Q]], 0] 635// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[P]], %[[Q]], %[[K]]] 636// CHECK: %[[UNPACK:.+]] = tensor.unpack 637// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] 638// CHECK-SAME: into %[[SLICE_DEST]] 639// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK]] 640// CHECK-SAME: into %{{.+}}[0, %[[P]], %[[Q]], %[[K]]] 641// CHECK: scf.yield %[[RES]] 642 643func.func @perfect_NKPQk_to_NPQK(%source: tensor<1x4x6x6x2xf32>, %dest: tensor<1x6x6x8xf32>) -> tensor<1x6x6x8xf32> { 644 %0 = tensor.unpack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x4x6x6x2xf32> -> tensor<1x6x6x8xf32> 645 return %0 : tensor<1x6x6x8xf32> 646} 647 648module attributes {transform.with_named_sequence} { 649 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 650 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 651 %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) 652 transform.yield 653 } 654} 655 656// ----- 657 658func.func private @get_dynamic_tile_size() -> index 659 660// CHECK-LABEL: func.func @fully_dynamic_unpack 661// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]] 662// CHECK-SAME: %[[DST:[0-9a-zA-Z]+]] 663// CHECK: %[[INNER_TS:.+]] = call @get_dynamic_tile_size() : () -> index 664// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[DST]]) 665// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) 666// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[SRC]] 667// CHECK: %[[EMPTY:.+]] = tensor.empty 668// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[SLICE]] 669// CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [%[[INNER_TS]], %[[INNER_TS]]] into %[[EMPTY]] 670func.func @fully_dynamic_unpack(%source: tensor<?x?x?x?xf32>, %dest: tensor<?x?xf32>) -> tensor<?x?xf32> { 671 %0 = func.call @get_dynamic_tile_size() : () -> index 672 %1 = tensor.unpack %source inner_dims_pos = [1, 0] inner_tiles = [%0, %0] into %dest : tensor<?x?x?x?xf32> -> tensor<?x?xf32> 673 return %1 : tensor<?x?xf32> 674} 675 676module attributes {transform.with_named_sequence} { 677 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 678 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 679 %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [4, 8] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 680 transform.yield 681 } 682} 683 684// ----- 685 686// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * 2)> 687// CHECK: func.func @perfect_NPQK_to_NKPQk 688// CHECK-SAME: %[[SOURCE:.+]]: tensor<1x6x6x8xf32>, 689// CHECK-SAME: %{{.+}}: tensor<1x4x6x6x2xf32>) 690// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 691// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index 692// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index 693// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index 694// CHECK: %{{.+}} = scf.for %[[ARG2:.+]] = %[[C0]] to %[[C4]] step %[[C1]] 695// CHECK: %{{.+}} = scf.for %[[ARG4:.+]] = %[[C0]] to %[[C6]] step %[[C1]] 696// CHECK: %{{.+}} = scf.for %[[ARG6:.+]] = %[[C0]] to %[[C6]] step %[[C1]] 697// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP1]](%[[ARG2]]) 698// CHECK: %[[SLICE_SOURCE:.+]] = tensor.extract_slice %[[SOURCE]][0, %[[ARG4]], %[[ARG6]], %[[APPLY]]] 699// CHECK: %[[SLICE_DEST:.+]] = tensor.extract_slice %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0] 700// CHECK: %[[PACK:.+]] = tensor.pack 701// CHECK-SAME: %[[SLICE_SOURCE]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] 702// CHECK-SAME: into %[[SLICE_DEST]] 703// CHECK: %[[RES:.+]] = tensor.insert_slice %[[PACK]] 704// CHECK-SAME: into %{{.+}}[0, %[[ARG2]], %[[ARG4]], %[[ARG6]], 0] 705// CHECK: scf.yield %[[RES]] 706 707func.func @perfect_NPQK_to_NKPQk(%source: tensor<1x6x6x8xf32>, %dest: tensor<1x4x6x6x2xf32>) -> tensor<1x4x6x6x2xf32> { 708 %0 = tensor.pack %source outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [2] into %dest : tensor<1x6x6x8xf32> -> tensor<1x4x6x6x2xf32> 709 return %0 : tensor<1x4x6x6x2xf32> 710} 711 712module attributes {transform.with_named_sequence} { 713 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 714 %0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 715 %1, %loops:4 = transform.structured.tile_using_for %0 tile_sizes [1, 1, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op) 716 transform.yield 717 } 718} 719