1// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s 2 3func.func @vectorize_dynamic_identity(%arg0: tensor<?xf32>, 4 %arg1: tensor<?xf32>, 5 %arg2: tensor<?xf32>) -> tensor<?xf32> { 6 %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, 7 affine_map<(d0) -> (d0)>, 8 affine_map<(d0) -> (d0)>], 9 iterator_types = ["parallel"] } 10 ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>) 11 outs(%arg2 : tensor<?xf32>) { 12 ^bb(%in0: f32, %in1: f32, %out: f32) : 13 %0 = arith.addf %in0, %in1 : f32 14 linalg.yield %0 : f32 15 } -> tensor<?xf32> 16 return %0 : tensor<?xf32> 17} 18 19// CHECK-LABEL: @vectorize_dynamic_identity 20// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index 21// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?xf32> 22// CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> 23// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 24// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 25// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 26// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32> 27// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32> 28 29module attributes {transform.with_named_sequence} { 30 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 31 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 32 transform.structured.vectorize %0 vector_sizes [4] : !transform.any_op 33 transform.yield 34 } 35} 36 37// ----- 38 39func.func @vectorize_dynamic_identity_with_constant(%arg0: tensor<?xf32>, 40 %arg1: tensor<?xf32>, 41 %arg2: tensor<?xf32>) -> tensor<?xf32> { 42 %c4 = arith.constant 4 : index 43 %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, 44 affine_map<(d0) -> (d0)>, 45 affine_map<(d0) -> (d0)>], 46 iterator_types = ["parallel"] } 47 ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>) 48 outs(%arg2 : tensor<?xf32>) { 49 ^bb(%in0: f32, %in1: f32, %out: f32) : 50 %0 = arith.addf %in0, %in1 : f32 51 linalg.yield %0 : f32 52 } -> tensor<?xf32> 53 return %0 : tensor<?xf32> 54} 55 56// CHECK-LABEL: @vectorize_dynamic_identity_with_constant 57// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index 58// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?xf32> 59// CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> 60// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 61// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 62// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 63// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32> 64// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32> 65 66module attributes {transform.with_named_sequence} { 67 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 68 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 69 %size = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op 70 transform.structured.vectorize %0 vector_sizes [%size] : !transform.any_op, !transform.any_op 71 transform.yield 72 } 73} 74 75// ----- 76 77func.func @vectorize_dynamic_identity_with_param(%arg0: tensor<?xf32>, 78 %arg1: tensor<?xf32>, 79 %arg2: tensor<?xf32>) -> tensor<?xf32> { 80 %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, 81 affine_map<(d0) -> (d0)>, 82 affine_map<(d0) -> (d0)>], 83 iterator_types = ["parallel"] } 84 ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>) 85 outs(%arg2 : tensor<?xf32>) { 86 ^bb(%in0: f32, %in1: f32, %out: f32) : 87 %0 = arith.addf %in0, %in1 : f32 88 linalg.yield %0 : f32 89 } -> tensor<?xf32> 90 return %0 : tensor<?xf32> 91} 92 93// CHECK-LABEL: @vectorize_dynamic_identity_with_param 94// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index 95// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?xf32> 96// CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> 97// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 98// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 99// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 100// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32> 101// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32> 102 103module attributes {transform.with_named_sequence} { 104 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 105 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 106 %vector_size = transform.param.constant 4 : i64 -> !transform.param<i64> 107 transform.structured.vectorize %0 vector_sizes [%vector_size] : !transform.any_op, !transform.param<i64> 108 transform.yield 109 } 110} 111 112// ----- 113 114func.func @vectorize_dynamic_1d_broadcast(%arg0: tensor<?xf32>, 115 %arg1: tensor<?xf32>, 116 %arg2: tensor<?xf32>) -> tensor<?xf32> { 117 %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (0)>, 118 affine_map<(d0) -> (d0)>, 119 affine_map<(d0) -> (d0)>], 120 iterator_types = ["parallel"] } 121 ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>) 122 outs(%arg2 : tensor<?xf32>) { 123 ^bb(%in0: f32, %in1: f32, %out: f32) : 124 %0 = arith.addf %in0, %in1 : f32 125 linalg.yield %0 : f32 126 } -> tensor<?xf32> 127 return %0 : tensor<?xf32> 128} 129 130// CHECK-LABEL: @vectorize_dynamic_1d_broadcast 131// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index 132// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?xf32> 133// CHECK: %[[VAL_7:.*]] = vector.transfer_read %{{.*}} {permutation_map = #{{.*}}} : tensor<?xf32>, vector<4xf32> 134// CHECK: %[[VAL_9:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> 135// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 136// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 137// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_7]], %[[VAL_10]] : vector<4xf32> 138// CHECK: %[[VAL_14:.*]] = vector.mask %{{.*}} { vector.transfer_write %[[VAL_13]], {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32> 139 140module attributes {transform.with_named_sequence} { 141 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 142 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 143 transform.structured.vectorize %0 vector_sizes [4] : !transform.any_op 144 transform.yield 145 } 146} 147 148// ----- 149 150#map = affine_map<(d0, d1) -> (d0, d1)> 151#map1 = affine_map<(d0, d1) -> (d0, 0)> 152 153func.func @dynamic_generic_with_reduction_and_broadcast(%arg0: tensor<?x?xf32>, %init: tensor<?x?xf32>) -> (tensor<?x?xf32>) { 154 %0 = linalg.generic { indexing_maps = [#map, #map1], 155 iterator_types = ["parallel", "reduction"]} 156 ins(%arg0 : tensor<?x?xf32>) 157 outs(%init : tensor<?x?xf32>) { 158 ^bb0(%in: f32, %out: f32): 159 %1 = arith.addf %in, %out : f32 160 linalg.yield %1 : f32 161 } -> tensor<?x?xf32> 162 return %0 : tensor<?x?xf32> 163} 164// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1) -> (d0)> 165 166// CHECK-LABEL: func.func @dynamic_generic_with_reduction_and_broadcast( 167// CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?xf32>, 168// CHECK-SAME: %[[VAL_1:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> { 169// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index 170// CHECK: %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?xf32> 171// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index 172// CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf32> 173// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index 174// CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32 175// CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]] : vector<4x4xi1> 176// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_7]] {in_bounds = [true, true]} : tensor<?x?xf32>, vector<4x4xf32> } : vector<4x4xi1> -> vector<4x4xf32> 177// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 178// CHECK: %[[VAL_11:.*]] = vector.create_mask %[[VAL_3]] : vector<4xi1> 179// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_11]] { vector.transfer_read %[[VAL_1]]{{\[}}%[[VAL_6]], %[[VAL_6]]], %[[VAL_10]] {in_bounds = [true], permutation_map = #[[$MAP]]} : tensor<?x?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 180// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.multi_reduction <add>, %[[VAL_9]], %[[VAL_12]] [1] : vector<4x4xf32> to vector<4xf32> } : vector<4x4xi1> -> vector<4xf32> 181// CHECK: %[[VAL_14:.*]] = arith.constant 0 : index 182// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_11]] { vector.transfer_write %[[VAL_13]], %[[VAL_1]]{{\[}}%[[VAL_14]], %[[VAL_14]]] {in_bounds = [true], permutation_map = #[[$MAP]]} : vector<4xf32>, tensor<?x?xf32> } : vector<4xi1> -> tensor<?x?xf32> 183// CHECK: return %[[VAL_15]] : tensor<?x?xf32> 184 185module attributes {transform.with_named_sequence} { 186 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 187 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 188 transform.structured.vectorize %0 vector_sizes [4, 4] : !transform.any_op 189 transform.yield 190 } 191} 192 193// ----- 194 195func.func @vectorize_dynamic_2d_transpose(%arg0: tensor<?x?xf32>, 196 %arg1: tensor<?x?xf32>, 197 %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> { 198 %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, 199 affine_map<(d0, d1) -> (d0, d1)>, 200 affine_map<(d0, d1) -> (d0, d1)>], 201 iterator_types = ["parallel", "parallel"] } 202 ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) 203 outs(%arg2 : tensor<?x?xf32>) { 204 ^bb(%in0: f32, %in1: f32, %out: f32) : 205 %0 = arith.addf %in0, %in1 : f32 206 linalg.yield %0 : f32 207 } -> tensor<?x?xf32> 208 return %0 : tensor<?x?xf32> 209} 210 211// CHECK-LABEL: @vectorize_dynamic_2d_transpose 212// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index 213// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?x?xf32> 214// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index 215// CHECK: %[[VAL_6:.*]] = tensor.dim %{{.*}}, %[[VAL_5]] : tensor<?x?xf32> 216// CHECK: %[[VAL_9:.*]] = vector.create_mask %[[VAL_6]], %[[VAL_4]] : vector<8x4xi1> 217// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor<?x?xf32>, vector<4x8xf32> } : vector<8x4xi1> -> vector<4x8xf32> 218// CHECK: %[[VAL_12:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<4x8xi1> 219// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<?x?xf32>, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> 220// CHECK: %[[VAL_14:.*]] = arith.constant 0.000000e+00 : f32 221// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<?x?xf32>, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> 222// CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_10]], %[[VAL_13]] : vector<4x8xf32> 223// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_12]] { vector.transfer_write %[[VAL_16]], %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, tensor<?x?xf32> } : vector<4x8xi1> -> tensor<?x?xf32> 224 225module attributes {transform.with_named_sequence} { 226 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 227 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 228 transform.structured.vectorize %0 vector_sizes [4, 8] : !transform.any_op 229 transform.yield 230 } 231} 232 233// ----- 234 235func.func @vectorize_dynamic_generic_2d_broadcast(%arg0: tensor<?x?xf32>, 236 %arg1: tensor<?x?xf32>, 237 %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> { 238 %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (0, d1)>, 239 affine_map<(d0, d1) -> (d0, d1)>, 240 affine_map<(d0, d1) -> (d0, d1)>], 241 iterator_types = ["parallel", "parallel"] } 242 ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) 243 outs(%arg2 : tensor<?x?xf32>) { 244 ^bb(%in0: f32, %in1: f32, %out: f32) : 245 %0 = arith.addf %in0, %in1 : f32 246 linalg.yield %0 : f32 247 } -> tensor<?x?xf32> 248 return %0 : tensor<?x?xf32> 249} 250 251// CHECK-LABEL: @vectorize_dynamic_generic_2d_broadcast 252// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index 253// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor<?x?xf32> 254// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index 255// CHECK: %[[VAL_6:.*]] = tensor.dim %{{.*}}, %[[VAL_5]] : tensor<?x?xf32> 256// CHECK: %[[VAL_9:.*]] = vector.create_mask %[[VAL_6]] : vector<8xi1> 257// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_9]] { vector.transfer_read %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor<?x?xf32>, vector<4x8xf32> } : vector<8xi1> -> vector<4x8xf32> 258// CHECK: %[[VAL_12:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<4x8xi1> 259// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<?x?xf32>, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> 260// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_12]] { vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<?x?xf32>, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> 261// CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_10]], %[[VAL_13]] : vector<4x8xf32> 262// CHECK: %[[VAL_18:.*]] = vector.mask %[[VAL_12]] { vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, tensor<?x?xf32> } : vector<4x8xi1> -> tensor<?x?xf32> 263 264module attributes {transform.with_named_sequence} { 265 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 266 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 267 transform.structured.vectorize %0 vector_sizes [4, 8] : !transform.any_op 268 transform.yield 269 } 270} 271 272// ----- 273 274func.func @vectorize_dynamic_reduction(%arg0: tensor<?x?xf32>, 275 %arg1: tensor<?xf32>) -> tensor<?xf32> { 276 %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 277 affine_map<(d0, d1) -> (d0)>], 278 iterator_types = ["parallel", "reduction"] } 279 ins(%arg0 : tensor<?x?xf32>) 280 outs(%arg1 : tensor<?xf32>) { 281 ^bb(%in: f32, %out: f32) : 282 %0 = arith.addf %in, %out : f32 283 linalg.yield %0 : f32 284 } -> tensor<?xf32> 285 return %0 : tensor<?xf32> 286} 287 288module attributes {transform.with_named_sequence} { 289 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 290 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 291 transform.structured.vectorize %0 vector_sizes [4, 8] : !transform.any_op 292 transform.yield 293 } 294} 295 296// CHECK-LABEL: @vectorize_dynamic_reduction( 297// CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?xf32>, 298// CHECK-SAME: %[[VAL_1:.*]]: tensor<?xf32>) -> tensor<?xf32> { 299// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index 300// CHECK: %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?xf32> 301// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index 302// CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf32> 303// CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]] : vector<4x8xi1> 304// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = [true, true]} : tensor<?x?xf32>, vector<4x8xf32> } : vector<4x8xi1> -> vector<4x8xf32> 305// CHECK: %[[VAL_11:.*]] = vector.create_mask %[[VAL_3]] : vector<4xi1> 306// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_11]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = [true]} : tensor<?xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 307// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.multi_reduction <add>, %[[VAL_9]], %[[VAL_12]] [1] : vector<4x8xf32> to vector<4xf32> } : vector<4x8xi1> -> vector<4xf32> 308// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_11]] { vector.transfer_write %[[VAL_13]], %[[VAL_1]]{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<?xf32> } : vector<4xi1> -> tensor<?xf32> 309// CHECK: return %[[VAL_15]] : tensor<?xf32> 310// CHECK: } 311 312// ----- 313 314func.func @vectorize_dynamic_transpose_reduction(%arg0: tensor<?x?x?xf32>, 315 %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 316 %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, 317 affine_map<(d0, d1, d2) -> (d2, d1)>], 318 iterator_types = ["reduction", "parallel", "parallel"] } 319 ins(%arg0 : tensor<?x?x?xf32>) 320 outs(%arg1 : tensor<?x?xf32>) { 321 ^bb(%in: f32, %out: f32) : 322 %0 = arith.addf %in, %out : f32 323 linalg.yield %0 : f32 324 } -> tensor<?x?xf32> 325 return %0 : tensor<?x?xf32> 326} 327 328module attributes {transform.with_named_sequence} { 329 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 330 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 331 transform.structured.vectorize %0 vector_sizes [4, 8, 16] : !transform.any_op 332 transform.yield 333 } 334} 335 336// CHECK-LABEL: @vectorize_dynamic_transpose_reduction( 337// CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?x?xf32>, 338// CHECK-SAME: %[[VAL_1:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> { 339// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index 340// CHECK: %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xf32> 341// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index 342// CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32> 343// CHECK: %[[VAL_6:.*]] = arith.constant 2 : index 344// CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?x?xf32> 345// CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]], %[[VAL_7]] : vector<4x8x16xi1> 346// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = [true, true, true]} : tensor<?x?x?xf32>, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> 347// CHECK: %[[VAL_13:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_5]] : vector<16x8xi1> 348// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_13]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor<?x?xf32>, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> 349// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_10]] { vector.multi_reduction <add>, %[[VAL_11]], %[[VAL_14]] [0] : vector<4x8x16xf32> to vector<8x16xf32> } : vector<4x8x16xi1> -> vector<8x16xf32> 350// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_13]] { vector.transfer_write %[[VAL_15]], %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<8x16xf32>, tensor<?x?xf32> } : vector<16x8xi1> -> tensor<?x?xf32> 351 352// ----- 353 354func.func @vectorize_dynamic_transpose_reduction_with_params(%arg0: tensor<?x?x?xf32>, 355 %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 356 %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, 357 affine_map<(d0, d1, d2) -> (d2, d1)>], 358 iterator_types = ["reduction", "parallel", "parallel"] } 359 ins(%arg0 : tensor<?x?x?xf32>) 360 outs(%arg1 : tensor<?x?xf32>) { 361 ^bb(%in: f32, %out: f32) : 362 %0 = arith.addf %in, %out : f32 363 linalg.yield %0 : f32 364 } -> tensor<?x?xf32> 365 return %0 : tensor<?x?xf32> 366} 367 368module attributes {transform.with_named_sequence} { 369 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 370 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 371 %vector_size_0 = transform.param.constant 4 : i64 -> !transform.param<i64> 372 %vector_size_2 = transform.param.constant 16 : i64 -> !transform.param<i64> 373 transform.structured.vectorize %0 vector_sizes 374 [%vector_size_0, 8, %vector_size_2] : !transform.any_op, !transform.param<i64>, !transform.param<i64> 375 transform.yield 376 } 377} 378 379// CHECK-LABEL: @vectorize_dynamic_transpose_reduction_with_params( 380// CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?x?xf32>, 381// CHECK-SAME: %[[VAL_1:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32> { 382// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index 383// CHECK: %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xf32> 384// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index 385// CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32> 386// CHECK: %[[VAL_6:.*]] = arith.constant 2 : index 387// CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?x?xf32> 388// CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]], %[[VAL_7]] : vector<4x8x16xi1> 389// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = [true, true, true]} : tensor<?x?x?xf32>, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> 390// CHECK: %[[VAL_13:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_5]] : vector<16x8xi1> 391// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_13]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor<?x?xf32>, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> 392// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_10]] { vector.multi_reduction <add>, %[[VAL_11]], %[[VAL_14]] [0] : vector<4x8x16xf32> to vector<8x16xf32> } : vector<4x8x16xi1> -> vector<8x16xf32> 393// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_13]] { vector.transfer_write %[[VAL_15]], %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<8x16xf32>, tensor<?x?xf32> } : vector<16x8xi1> -> tensor<?x?xf32> 394 395// ----- 396 397func.func @vectorize_partial_dynamic_identity(%arg0: tensor<8x?xf32>, 398 %arg1: tensor<8x?xf32>, 399 %arg2: tensor<8x?xf32>) -> tensor<8x?xf32> { 400 %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 401 affine_map<(d0, d1) -> (d0, d1)>, 402 affine_map<(d0, d1) -> (d0, d1)>], 403 iterator_types = ["parallel", "parallel"] } 404 ins(%arg0, %arg1 : tensor<8x?xf32>, tensor<8x?xf32>) 405 outs(%arg2 : tensor<8x?xf32>) { 406 ^bb(%in0: f32, %in1: f32, %out: f32) : 407 %0 = arith.addf %in0, %in1 : f32 408 linalg.yield %0 : f32 409 } -> tensor<8x?xf32> 410 return %0 : tensor<8x?xf32> 411} 412 413// CHECK-LABEL: func.func @vectorize_partial_dynamic_identity( 414// CHECK-SAME: %[[VAL_0:.*]]: tensor<8x?xf32>, %[[VAL_1:.*]]: tensor<8x?xf32>, %[[VAL_2:.*]]: tensor<8x?xf32>) -> tensor<8x?xf32> { 415// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index 416// CHECK-DAG: %[[VAL_4:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<8x?xf32> 417// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : index 418// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32 419// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 8 : index 420// CHECK: %[[VAL_8:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_4]] : vector<8x32xi1> 421// CHECK: %[[VAL_9:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_0]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_6]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> 422// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32 423// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_1]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_10]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> 424// CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32 425// CHECK: %[[VAL_13:.*]] = vector.mask %[[VAL_8]] { vector.transfer_read %[[VAL_2]][%[[VAL_5]], %[[VAL_5]]], %[[VAL_12]] {in_bounds = [true, true]} : tensor<8x?xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> 426// CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] : vector<8x32xf32> 427// CHECK: %[[VAL_15:.*]] = arith.constant 0 : index 428// CHECK: %[[VAL_16:.*]] = vector.mask %[[VAL_8]] { vector.transfer_write %[[VAL_14]], %[[VAL_2]][%[[VAL_15]], %[[VAL_15]]] {in_bounds = [true, true]} : vector<8x32xf32>, tensor<8x?xf32> } : vector<8x32xi1> -> tensor<8x?xf32> 429 430 431module attributes {transform.with_named_sequence} { 432 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 433 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 434 transform.structured.vectorize %0 vector_sizes [8, 32] : !transform.any_op 435 transform.yield 436 } 437} 438 439// ----- 440 441func.func @do_not_generate_masks(%arg0: tensor<8x32xf32>, 442 %arg1: tensor<8x32xf32>, 443 %arg2: tensor<8x32xf32>) -> tensor<8x32xf32> { 444 %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 445 affine_map<(d0, d1) -> (d0, d1)>, 446 affine_map<(d0, d1) -> (d0, d1)>], 447 iterator_types = ["parallel", "parallel"] } 448 ins(%arg0, %arg1 : tensor<8x32xf32>, tensor<8x32xf32>) 449 outs(%arg2 : tensor<8x32xf32>) { 450 ^bb(%in0: f32, %in1: f32, %out: f32) : 451 %0 = arith.addf %in0, %in1 : f32 452 linalg.yield %0 : f32 453 } -> tensor<8x32xf32> 454 return %0 : tensor<8x32xf32> 455} 456 457// CHECK-LABEL: func.func @do_not_generate_masks 458// CHECK-NOT: vector.mask 459 460module attributes {transform.with_named_sequence} { 461 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 462 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 463 transform.structured.vectorize %0 vector_sizes [8, 32] : !transform.any_op 464 transform.yield 465 } 466} 467 468// ----- 469 470func.func @vectorize_static_shape_with_mask(%arg0: tensor<8x30xf32>, 471 %arg1: tensor<8x30xf32>, 472 %arg2: tensor<8x30xf32>) -> tensor<8x30xf32> { 473 %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 474 affine_map<(d0, d1) -> (d0, d1)>, 475 affine_map<(d0, d1) -> (d0, d1)>], 476 iterator_types = ["parallel", "parallel"] } 477 ins(%arg0, %arg1 : tensor<8x30xf32>, tensor<8x30xf32>) 478 outs(%arg2 : tensor<8x30xf32>) { 479 ^bb(%in0: f32, %in1: f32, %out: f32) : 480 %0 = arith.addf %in0, %in1 : f32 481 linalg.yield %0 : f32 482 } -> tensor<8x30xf32> 483 return %0 : tensor<8x30xf32> 484} 485 486// CHECK-LABEL: func.func @vectorize_static_shape_with_mask( 487// CHECK-SAME: %[[VAL_0:.*]]: tensor<8x30xf32>, %[[VAL_1:.*]]: tensor<8x30xf32>, %[[VAL_2:.*]]: tensor<8x30xf32>) -> tensor<8x30xf32> { 488// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index 489// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f32 490// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 8 : index 491// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 30 : index 492// CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_6]] : vector<8x32xi1> 493// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_0]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_4]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> 494// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 495// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_1]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> 496// CHECK: %[[VAL_11:.*]] = arith.constant 0.000000e+00 : f32 497// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %[[VAL_2]][%[[VAL_3]], %[[VAL_3]]], %[[VAL_11]] {in_bounds = [true, true]} : tensor<8x30xf32>, vector<8x32xf32> } : vector<8x32xi1> -> vector<8x32xf32> 498// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<8x32xf32> 499// CHECK: %[[VAL_14:.*]] = arith.constant 0 : index 500// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %[[VAL_13]], %[[VAL_2]][%[[VAL_14]], %[[VAL_14]]] {in_bounds = [true, true]} : vector<8x32xf32>, tensor<8x30xf32> } : vector<8x32xi1> -> tensor<8x30xf32> 501 502module attributes {transform.with_named_sequence} { 503 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 504 %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op 505 transform.structured.vectorize %0 vector_sizes [8, 32] : !transform.any_op 506 transform.yield 507 } 508} 509 510// ----- 511 512func.func @vectorize_dynamic_fill(%A : tensor<?x?xf32>, %arg0 : f32) -> tensor<?x?xf32> { 513 %0 = linalg.fill ins(%arg0 : f32) outs(%A : tensor<?x?xf32>) -> tensor<?x?xf32> 514 return %0 : tensor<?x?xf32> 515} 516 517// CHECK-LABEL: func.func @vectorize_dynamic_fill 518// CHECK: %[[DIM0:.*]] = tensor.dim 519// CHECK: %[[DIM1:.*]] = tensor.dim 520// CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM1]] : vector<8x16xi1> 521// CHECK: %[[BCAST:.*]] = vector.broadcast %{{.*}} : f32 to vector<8x16xf32> 522// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = [true, true]} : vector<8x16xf32>, tensor<?x?xf32> } : vector<8x16xi1> 523 524module attributes {transform.with_named_sequence} { 525 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 526 %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op 527 transform.structured.vectorize %0 vector_sizes [8, 16] : !transform.any_op 528 transform.yield 529 } 530} 531 532// ----- 533 534// CHECK: #[[MAP:.*]] = affine_map<(d0, d1) -> (d1, d0)> 535// CHECK: func @test_masked_vectorize_linalg_transpose 536func.func @test_masked_vectorize_linalg_transpose(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 537 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 538 // CHECK-DAG: %[[D0:.*]] = tensor.dim %arg0, %[[C0]] 539 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 540 // CHECK-DAG: %[[D1:.*]] = tensor.dim %arg0, %[[C1]] 541 // CHECK: %[[MASK0:.*]] = vector.create_mask %[[D0]], %[[D1]] 542 // CHECK: %[[LOAD:.*]] = vector.mask %[[MASK0]] { vector.transfer_read %arg0{{.+}} permutation_map = #[[MAP]]{{.+}} } 543 // CHECK-SAME: vector<4x2xi1> -> vector<2x4xf32> 544 // CHECK: %[[MASK1:.*]] = vector.create_mask %[[D1]], %[[D0]] 545 // CHECK: %[[WRITE:.*]] = vector.mask %[[MASK1]] { vector.transfer_write %[[LOAD]], %arg1{{.+}} } 546 // CHECK-SAME: vector<2x4xi1> -> tensor<?x?xf32> 547 // CHECK: return %[[WRITE]] 548 %0 = linalg.transpose ins(%arg0 : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) permutation = [1, 0] 549 return %0 : tensor<?x?xf32> 550} 551 552module attributes {transform.with_named_sequence} { 553 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 554 %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1 : (!transform.any_op) -> !transform.any_op 555 transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op 556 transform.yield 557 } 558} 559 560// ----- 561 562// CHECK-LABEL: func @test_masked_vectorize_linalg_copy 563func.func @test_masked_vectorize_linalg_copy(%A : memref<?x?xf32>, %B : memref<?x?xf32>) { 564 // CHECK: %[[c0:.*]] = arith.constant 0 : index 565 // CHECK: %[[d0:.*]] = memref.dim %{{.*}}, %[[c0]] : memref<?x?xf32> 566 // CHECK: %[[c1:.*]] = arith.constant 1 : index 567 // CHECK: %[[d1:.*]] = memref.dim %{{.*}}, %[[c1]] : memref<?x?xf32> 568 // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> 569 // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<?x?xf32>, vector<2x4xf32> } : vector<2x4xi1> -> vector<2x4xf32> 570 // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<2x4xf32>, memref<?x?xf32> } : vector<2x4xi1> 571 linalg.copy ins(%A : memref<?x?xf32>) outs(%B : memref<?x?xf32>) 572 return 573} 574 575module attributes {transform.with_named_sequence} { 576 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 577 %0 = transform.structured.match ops{["linalg.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op 578 transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op 579 transform.yield 580 } 581} 582 583// ----- 584 585// CHECK-LABEL: func @test_masked_vectorize_pad 586func.func @test_masked_vectorize_pad( 587 %0 : tensor<?x?xf32>, %h0 : index, %h1 : index) 588 -> tensor<2x4xf32> 589{ 590 // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32 591 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 592 // CHECK-DAG: %[[c0_0:.*]] = arith.constant 0 : index 593 // CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> 594 // CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> 595 // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> 596 // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { 597 // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_0]], %[[c0_0]]], %[[c42]] 598 // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32> 599 // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> 600 // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index 601 // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4xf32> 602 // CHECK: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_1]], %[[c0_1]]] 603 // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor<2x4xf32> 604 %cst = arith.constant 42.43 : f32 605 %c0 = arith.constant 0 : index 606 %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1] { 607 ^bb0(%hh1: index, %hh2: index): 608 tensor.yield %cst : f32 609 } : tensor<?x?xf32> to tensor<2x4xf32> 610 return %1: tensor<2x4xf32> 611} 612 613module attributes {transform.with_named_sequence} { 614 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 615 %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 616 : (!transform.any_op) -> !transform.any_op 617 transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op 618 transform.yield 619 } 620} 621 622// ----- 623 624// CHECK: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> 625// CHECK: func @test_masked_vectorize_dynamic_pad 626func.func @test_masked_vectorize_dynamic_pad( 627 %0 : tensor<?x?xf32>, %h0 : index, %h1 : index) 628 -> tensor<?x?xf32> 629{ 630 // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32 631 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 632 // CHECK-DAG: %[[res_d0:.+]] = affine.apply #[[MAP]]() 633 // CHECK-DAG: %[[res_d1:.+]] = affine.apply #[[MAP]]() 634 // CHECK: %[[c0_2:.*]] = arith.constant 0 : index 635 // CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> 636 // CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> 637 // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> 638 // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { 639 // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[c42]] 640 // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32> 641 // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> 642 // CHECK-DAG: %[[empty:.*]] = tensor.empty(%[[res_d0]], %[[res_d1]]) : tensor<?x?xf32> 643 // CHECK-DAG: %[[c0_3:.*]] = arith.constant 0 : index 644 // CHECK: %[[mask_2:.*]] = vector.create_mask %[[res_d0]], %[[res_d1]] : vector<2x4xi1> 645 // CHECK: %[[masked_write:.*]] = vector.mask %[[mask_2]] { 646 // CHECK-SAME: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_3]], %[[c0_3]]] 647 // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor<?x?xf32> 648 // CHECK: return %[[masked_write]] : tensor<?x?xf32> 649 %cst = arith.constant 42.43 : f32 650 %c0 = arith.constant 0 : index 651 %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1] { 652 ^bb0(%hh1: index, %hh2: index): 653 tensor.yield %cst : f32 654 } : tensor<?x?xf32> to tensor<?x?xf32> 655 return %1: tensor<?x?xf32> 656} 657 658module attributes {transform.with_named_sequence} { 659 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 660 %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 661 : (!transform.any_op) -> !transform.any_op 662 transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op 663 transform.yield 664 } 665} 666 667// ----- 668 669// Input identical as the test in vectorization-with-patterns.mlir. Output is 670// different - vector sizes are inferred (rather than user-specified) and hence 671// masking was used. 672 673func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> { 674 %pack = tensor.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32> 675 return %pack : tensor<4x1x32x16x2xf32> 676} 677// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 678// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 679// CHECK: %[[read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] 680// CHECK-SAME: {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32> 681// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> 682// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32> 683// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index 684// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<4x1x32x16x2xf32> 685// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] 686// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<4x1x32x16x2xf32>, tensor<4x1x32x16x2xf32> 687// CHECK: return %[[write]] : tensor<4x1x32x16x2xf32> 688 689module attributes {transform.with_named_sequence} { 690 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 691 %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 692 transform.structured.vectorize %0 vector_sizes [4, 1, 32] : !transform.any_op 693 transform.yield 694 } 695} 696 697// ----- 698 699// Input identical as the test in vectorization-with-patterns.mlir. Output is 700// different - vector sizes are inferred (rather than user-specified) and hence 701// masking was used. 702 703func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { 704 %pad = arith.constant 0.000000e+00 : f32 705 %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> 706 return %pack : tensor<32x4x1x16x2xf32> 707} 708// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 709// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 710// CHECK-DAG: %[[c32:.*]] = arith.constant 32 : index 711// CHECK-DAG: %[[c7:.*]] = arith.constant 7 : index 712// CHECK-DAG: %[[c15:.*]] = arith.constant 15 : index 713// CHECK: %[[mask:.*]] = vector.create_mask %[[c32]], %[[c7]], %[[c15]] : vector<32x8x16xi1> 714// CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { 715// CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] 716// CHECK-SAME: {in_bounds = [true, true, true]} : tensor<32x7x15xf32>, vector<32x8x16xf32> 717// CHECK-SAME: } : vector<32x8x16xi1> -> vector<32x8x16xf32> 718// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[masked_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> 719// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> 720// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index 721// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> 722// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] 723// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> 724// CHECK: return %[[write]] : tensor<32x4x1x16x2xf32> 725 726module attributes {transform.with_named_sequence} { 727 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 728 %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 729 transform.structured.vectorize %0 vector_sizes [32, 4, 1] : !transform.any_op 730 transform.yield 731 } 732} 733 734// ----- 735 736func.func @test_vectorize_dynamic_pack(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> { 737 %pack = tensor.pack %arg0 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg1 : tensor<?x?xf32> -> tensor<?x?x16x2xf32> 738 return %pack : tensor<?x?x16x2xf32> 739} 740// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 741// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 742// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index 743// CHECK-DAG: %[[d0:.*]] = tensor.dim {{.*}} %[[c0]] : tensor<?x?x16x2xf32> 744// CHECK-DAG: %[[d1:.*]] = tensor.dim {{.*}} %[[c1]] : tensor<?x?x16x2xf32> 745// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index 746// CHECK-DAG: %[[c0_0:.*]] = arith.constant 0 : index 747// CHECK-DAG: %[[c1_0:.*]] = arith.constant 1 : index 748// CHECK-DAG: %[[d0_0:.*]] = tensor.dim {{.*}} %[[c0_0]] : tensor<?x?xf32> 749// CHECK-DAG: %[[d1_0:.*]] = tensor.dim {{.*}} %[[c1_0]] : tensor<?x?xf32> 750// CHECK: %[[mask:.*]] = vector.create_mask %[[d0_0]], %[[d1_0]] : vector<8x16xi1> 751// CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { 752// CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_1]], %[[c0_1]]], %[[cst]] 753// CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<8x16xf32> 754// CHECK-SAME: } : vector<8x16xi1> -> vector<8x16xf32> 755// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[masked_read]] : vector<8x16xf32> to vector<4x2x1x16xf32> 756// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32> 757// CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index 758// CHECK-DAG: %[[c16:.*]] = arith.constant 16 : index 759// CHECK-DAG: %[[c2:.*]] = arith.constant 2 : index 760// CHECK-DAG: %[[empty:.*]] = tensor.empty(%[[d0]], %[[d1]]) : tensor<?x?x16x2xf32> 761// CHECK: %[[mask_0:.*]] = vector.create_mask %[[d0]], %[[d1]], %[[c16]], %[[c2]] : vector<4x1x16x2xi1> 762// CHECK: %[[masked_write:.*]] = vector.mask %[[mask_0]] { 763// CHECK-SAME: vector.transfer_write %[[transpose]], %[[empty]][%[[c0_2]], %[[c0_2]], %[[c0_2]], %[[c0_2]]] 764// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor<?x?x16x2xf32> 765// CHECK: return %[[masked_write]] : tensor<?x?x16x2xf32> 766 767module attributes {transform.with_named_sequence} { 768 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 769 %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 770 transform.structured.vectorize %0 vector_sizes [4, 1] : !transform.any_op 771 transform.yield 772 } 773} 774 775// ----- 776 777func.func @matmul(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) { 778 linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>) 779 outs(%C: memref<?x?xf32>) 780 return 781} 782 783// CHECK-LABEL: func.func @matmul( 784// CHECK-SAME: %[[A:.*]]: memref<?x?xf32>, %[[B:.*]]: memref<?x?xf32>, %[[C:.*]]: memref<?x?xf32>) { 785// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index 786// CHECK-DAG: %[[VAL_4:.*]] = memref.dim %[[A]], %[[VAL_3]] : memref<?x?xf32> 787// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index 788// CHECK-DAG: %[[VAL_6:.*]] = memref.dim %[[B]], %[[VAL_5]] : memref<?x?xf32> 789// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index 790// CHECK-DAG: %[[VAL_8:.*]] = memref.dim %[[A]], %[[VAL_7]] : memref<?x?xf32> 791// CHECK: %[[MASK_A:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_8]] : vector<8x4xi1> 792// CHECK: %[[LOAD_A:.*]] = vector.mask %[[MASK_A]] { vector.transfer_read %[[A]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true, true], permutation_map = #{{.*}}} : memref<?x?xf32>, vector<8x16x4xf32> } : vector<8x4xi1> -> vector<8x16x4xf32> 793// CHECK: %[[MASK_B:.*]] = vector.create_mask %[[VAL_8]], %[[VAL_6]] : vector<4x16xi1> 794// CHECK: %[[LOAD_B:.*]] = vector.mask %[[MASK_B]] { vector.transfer_read %[[B]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true, true], permutation_map = #{{.*}}} : memref<?x?xf32>, vector<8x16x4xf32> } : vector<4x16xi1> -> vector<8x16x4xf32> 795// CHECK: %[[MASK_C:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<8x16xi1> 796// CHECK: %[[LOAD_C:.*]] = vector.mask %[[MASK_C]] { vector.transfer_read %[[C]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true]} : memref<?x?xf32>, vector<8x16xf32> } : vector<8x16xi1> -> vector<8x16xf32> 797// CHECK: %[[MULF:.*]] = arith.mulf %[[LOAD_A]], %[[LOAD_B]] : vector<8x16x4xf32> 798// CHECK: %[[MASK_MULIT_RED:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]], %[[VAL_8]] : vector<8x16x4xi1> 799// CHECK: %[[MULTI_RED:.*]] = vector.mask %[[MASK_MULIT_RED]] { vector.multi_reduction <add>, %[[MULF]], %[[LOAD_C]] [2] : vector<8x16x4xf32> to vector<8x16xf32> } : vector<8x16x4xi1> -> vector<8x16xf32> 800// CHECK: %[[C2:.*]] = arith.constant 0 : index 801// CHECK: vector.mask %[[MASK_C]] { vector.transfer_write %[[MULTI_RED]], %[[C]]{{\[}}%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<8x16xf32>, memref<?x?xf32> } : vector<8x16xi1> 802 803module attributes {transform.with_named_sequence} { 804 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 805 %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 806 transform.structured.vectorize %matmul vector_sizes [8, 16, 4] : !transform.any_op 807 transform.yield 808 } 809} 810 811// ----- 812 813func.func @mmt4d(%A: memref<16x16x8x1xf32>, %B: memref<16x16x8x1xf32>, %C_in: memref<16x16x8x8xf32>) { 814 linalg.mmt4d ins(%A, %B: memref<16x16x8x1xf32>, memref<16x16x8x1xf32>) 815 outs(%C_in: memref<16x16x8x8xf32>) 816 return 817} 818 819// CHECK-LABEL: func.func @mmt4d( 820// CHECK-SAME: %[[A:.*]]: memref<16x16x8x1xf32>, %[[B:.*]]: memref<16x16x8x1xf32>, %[[C:.*]]: memref<16x16x8x8xf32>) { 821// CHECK: %[[VEC_A:.*]] = vector.transfer_read %[[A]]{{.*}} : memref<16x16x8x1xf32>, vector<16x16x16x8x8x1xf32> 822// CHECK: %[[VEC_B:.*]] = vector.transfer_read %[[B]]{{.*}} : memref<16x16x8x1xf32>, vector<16x16x16x8x8x1xf32> 823// CHECK: %[[VEC_C:.*]] = vector.transfer_read %[[C]]{{.*}} : memref<16x16x8x8xf32>, vector<16x16x8x8xf32> 824// CHECK: %[[MUL:.*]] = arith.mulf %[[VEC_A]], %[[VEC_B]] : vector<16x16x16x8x8x1xf32> 825// CHECK: %[[RED:.*]] = vector.multi_reduction <add>, %[[MUL]], %[[VEC_C]] [2, 5] : vector<16x16x16x8x8x1xf32> to vector<16x16x8x8xf32> 826// CHECK: vector.transfer_write %[[RED]], %[[C]]{{.*}} : vector<16x16x8x8xf32>, memref<16x16x8x8xf32> 827 828module attributes {transform.with_named_sequence} { 829 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 830 %mmt4d = transform.structured.match ops{["linalg.mmt4d"]} in %arg1 : (!transform.any_op) -> !transform.any_op 831 transform.structured.vectorize %mmt4d : !transform.any_op 832 transform.yield 833 } 834} 835 836// ----- 837 838func.func @matmul_scalable(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) { 839 linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>) 840 outs(%C: memref<?x?xf32>) 841 return 842} 843 844// CHECK-LABEL: func.func @matmul_scalable( 845// CHECK-SAME: %[[A:.*]]: memref<?x?xf32>, %[[B:.*]]: memref<?x?xf32>, %[[C:.*]]: memref<?x?xf32>) { 846// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index 847// CHECK-DAG: %[[VAL_4:.*]] = memref.dim %[[A]], %[[VAL_3]] : memref<?x?xf32> 848// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index 849// CHECK-DAG: %[[VAL_6:.*]] = memref.dim %[[B]], %[[VAL_5]] : memref<?x?xf32> 850// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index 851// CHECK-DAG: %[[VAL_8:.*]] = memref.dim %[[A]], %[[VAL_7]] : memref<?x?xf32> 852// CHECK: %[[MASK_A:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_8]] : vector<8x4xi1> 853// CHECK: %[[LOAD_A:.*]] = vector.mask %[[MASK_A]] { vector.transfer_read %[[A]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true, true], permutation_map = #{{.*}}} : memref<?x?xf32>, vector<8x[16]x4xf32> } : vector<8x4xi1> -> vector<8x[16]x4xf32> 854// CHECK: %[[MASK_B:.*]] = vector.create_mask %[[VAL_8]], %[[VAL_6]] : vector<4x[16]xi1> 855// CHECK: %[[LOAD_B:.*]] = vector.mask %[[MASK_B]] { vector.transfer_read %[[B]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true, true], permutation_map = #{{.*}}} : memref<?x?xf32>, vector<8x[16]x4xf32> } : vector<4x[16]xi1> -> vector<8x[16]x4xf32> 856// CHECK: %[[MASK_C:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]] : vector<8x[16]xi1> 857// CHECK: %[[LOAD_C:.*]] = vector.mask %[[MASK_C]] { vector.transfer_read %[[C]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true, true]} : memref<?x?xf32>, vector<8x[16]xf32> } : vector<8x[16]xi1> -> vector<8x[16]xf32> 858// CHECK: %[[MULF:.*]] = arith.mulf %[[LOAD_A]], %[[LOAD_B]] : vector<8x[16]x4xf32> 859// CHECK: %[[MASK_MULIT_RED:.*]] = vector.create_mask %[[VAL_4]], %[[VAL_6]], %[[VAL_8]] : vector<8x[16]x4xi1> 860// CHECK: %[[MULTI_RED:.*]] = vector.mask %[[MASK_MULIT_RED]] { vector.multi_reduction <add>, %[[MULF]], %[[LOAD_C]] [2] : vector<8x[16]x4xf32> to vector<8x[16]xf32> } : vector<8x[16]x4xi1> -> vector<8x[16]xf32> 861// CHECK: %[[C2:.*]] = arith.constant 0 : index 862// CHECK: vector.mask %[[MASK_C]] { vector.transfer_write %[[MULTI_RED]], %[[C]]{{\[}}%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<8x[16]xf32>, memref<?x?xf32> } : vector<8x[16]xi1> 863 864module attributes {transform.with_named_sequence} { 865 transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 866 %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op 867 transform.structured.vectorize %matmul vector_sizes [8, [16], 4] : !transform.any_op 868 transform.yield 869 } 870} 871 872// ----- 873 874// CHECK-LABEL: func @test_vectorize_dynamic_shapes_unpack 875func.func @test_vectorize_dynamic_shapes_unpack(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?x16x2xf32>) -> tensor<?x?xf32> { 876// CHECK: %[[C0:.*]] = arith.constant 0 877// CHECK: %[[DIM:.*]] = tensor.dim %arg0, %[[C0]] : tensor<?x?xf32> 878// CHECK: %[[C1:.*]] = arith.constant 1 : index 879// CHECK: %[[DIM0:.*]] = tensor.dim %arg0, %[[C1]] : tensor<?x?xf32> 880// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 881// CHECK: %[[C01:.*]] = arith.constant 0 882// CHECK: %[[C02:.*]] = arith.constant 0 883// CHECK: %[[DIM4:.*]] = tensor.dim %arg1, %[[C02]] : tensor<?x?x16x2xf32> 884// CHECK: %[[CNST14:.*]] = arith.constant 1 885// CHECK: %[[DIM6:.*]] = tensor.dim %arg1, %[[CNST14]] : tensor<?x?x16x2xf32> 886// CHECK: %[[CNST16:.*]] = arith.constant 16 : index 887// CHECK: %[[CNST2:.*]] = arith.constant 2 : index 888// CHECK: %[[readMsk0:.*]] = vector.create_mask %[[DIM4]], %[[DIM6]], %[[CNST16]], %[[CNST2]] : vector<2x1x16x2xi1> 889// CHECK: %[[read0:.*]] = vector.mask %[[readMsk0]] {{.*}} vector.transfer_read %{{.*}} : tensor<?x?x16x2xf32>, vector<2x1x16x2xf32> } : vector<2x1x16x2xi1> -> vector<2x1x16x2xf32> 890// CHECK: %[[trans0:.*]] = vector.transpose %[[read0]], [0, 3, 1, 2] : vector<2x1x16x2xf32> to vector<2x2x1x16xf32> 891// CHECK: %[[sc0:.*]] = vector.shape_cast %[[trans0]] : vector<2x2x1x16xf32> to vector<4x16xf32> 892// CHECK: %[[empt0:.*]] = tensor.empty 893// CHECK: %[[writeMsk0:.*]] = vector.create_mask {{.*}} : vector<4x16xi1> 894// CHECK: %[[write0:.*]] = vector.mask %[[writeMsk0:.*]] {{.*}} vector.transfer_write %[[sc0]], %[[empt0]] 895// CHECK: return %[[write0]] 896 %ret = tensor.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor<?x?x16x2xf32> -> tensor<?x?xf32> 897 return %ret : tensor<?x?xf32> 898} 899module attributes {transform.with_named_sequence} { 900 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 901 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 902 transform.structured.vectorize %0 vector_sizes [4, 16] : !transform.any_op 903 transform.yield 904 } 905} 906 907// ----- 908 909// CHECK-LABEL: func @test_vectorize_unpack 910func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { 911 // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 912 // CHECK: %[[C0:.*]]= arith.constant 0 : index 913 // CHECK: %[[C8:.*]] = arith.constant 8 : index 914 // CHECK: %[[C80:.*]] = arith.constant 8 : index 915 // CHECK: %[[C32:.*]] = arith.constant 32 : index 916 // CHECK: %[[C16:.*]] = arith.constant 16 : index 917 // CHECK: %[[MSK0:.*]] = vector.create_mask %[[C8]], %[[C80]], %[[C32]], %[[C16]] : vector<16x8x32x16xi1> 918 // CHECK: %[[READ0:.*]] = vector.mask %[[MSK0]] {{.*}} : vector<16x8x32x16xi1> -> vector<16x8x32x16xf32> 919 // CHECK: %[[TRANSP0:.*]] = vector.transpose %[[READ0]], [0, 2, 1, 3] : vector<16x8x32x16xf32> to vector<16x32x8x16xf32> 920 // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP0]] : vector<16x32x8x16xf32> to vector<512x128xf32> 921 // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<256x128xf32> 922 // CHECK: %[[C01:.*]] = arith.constant 0 : index 923 // CHECK: %[[C256:.*]] = arith.constant 256 : index 924 // CHECK: %[[C128:.*]] = arith.constant 128 : index 925 // CHECK: %[[WRITEMSK:.*]] = vector.create_mask %[[C256]], %[[C128]] : vector<512x128xi1> 926 // CHECK: %[[WRIT:.*]] = vector.mask %[[WRITEMSK]] {{.*}} : vector<512x128xi1> -> tensor<256x128xf32> 927 // CHECK: return %[[WRIT]] : tensor<256x128xf32> 928 %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> 929 return %0 : tensor<256x128xf32> 930 } 931 module attributes {transform.with_named_sequence} { 932 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 933 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 934 transform.structured.vectorize %0 vector_sizes [512, 128] : !transform.any_op 935 transform.yield 936 } 937} 938 939// ----- 940 941// CHECK-LABEL: func @test_vectorize_unpack_no_masks 942func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { 943 // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 944 // CHECK: %[[C0:.*]] = arith.constant 0 : index 945 // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> 946 // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> 947 // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> 948 // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<256x128xf32> 949 // CHECK: %[[C00:.*]] = arith.constant 0 : index 950 // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32> 951 // CHECK: return %[[WRIT]] : tensor<256x128xf32> 952 %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> 953 return %0 : tensor<256x128xf32> 954 } 955 module attributes {transform.with_named_sequence} { 956 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 957 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 958 transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op 959 transform.yield 960 } 961 } 962 963 // ----- 964 965 // CHECK-LABEL: test_vectorize_unpack_with_outer_perm 966 func.func @test_vectorize_unpack_with_outer_perm(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { 967 // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 968 // CHECK: %[[C0:.*]] = arith.constant 0 : index 969 // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> 970 // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> 971 // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> 972 // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<256x128xf32> 973 // CHECK: %[[C00:.*]] = arith.constant 0 : index 974 // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32> 975 // CHECK: return %[[WRIT]] : tensor<256x128xf32> 976 %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> 977 return %0 : tensor<256x128xf32> 978 } 979 module attributes {transform.with_named_sequence} { 980 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 981 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 982 transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op 983 transform.yield 984 } 985} 986 987 // ----- 988 989// CHECK-LABEL: test_vectorize_pack_no_vector_sizes 990func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> { 991 %pack = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32> 992 return %pack : tensor<2x4x16x2xf32> 993} 994// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 995// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 996// CHECK: %[[read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]]], %[[cst]] 997// CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32> 998// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<64x4xf32> to vector<4x16x2x2xf32> 999// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32> 1000// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index 1001// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4x16x2xf32> 1002// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] 1003// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32> 1004// CHECK: return %[[write]] : tensor<2x4x16x2xf32> 1005 1006module attributes {transform.with_named_sequence} { 1007 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 1008 %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 1009 transform.structured.vectorize %0 : !transform.any_op 1010 transform.yield 1011 } 1012} 1013 1014 // ----- 1015 1016// CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes 1017func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { 1018 %pad = arith.constant 0.000000e+00 : f32 1019 %pack = tensor.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> 1020 return %pack : tensor<32x4x1x16x2xf32> 1021} 1022// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 1023// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 1024// CHECK: %[[transfer_read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] 1025// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32> 1026// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[transfer_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> 1027// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> 1028// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index 1029// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> 1030// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] 1031// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> 1032// CHECK: return %[[write]] : tensor<32x4x1x16x2xf32> 1033 1034module attributes {transform.with_named_sequence} { 1035 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 1036 %0 = transform.structured.match ops{["tensor.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 1037 transform.structured.vectorize %0 : !transform.any_op 1038 transform.yield 1039 } 1040} 1041 1042 // ----- 1043 1044func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { 1045 // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 1046 // CHECK: %[[C0:.*]] = arith.constant 0 : index 1047 // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> 1048 // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> 1049 // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> 1050 // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<256x128xf32> 1051 // CHECK: %[[C00:.*]] = arith.constant 0 : index 1052 // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32> 1053 // CHECK: return %[[WRIT]] : tensor<256x128xf32> 1054 %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> 1055 return %0 : tensor<256x128xf32> 1056 } 1057 module attributes {transform.with_named_sequence} { 1058 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 1059 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 1060 transform.structured.vectorize %0 : !transform.any_op 1061 transform.yield 1062 } 1063 } 1064 1065 // ----- 1066 1067func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x4x16x16xf32>, %dest: tensor<64x127xf32>) -> tensor<64x127xf32> { 1068 // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 1069 // CHECK: %[[C0:.*]] = arith.constant 0 : index 1070 // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<8x4x16x16xf32>, vector<8x4x16x16xf32> 1071 // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x4x16x16xf32> to vector<4x16x8x16xf32> 1072 // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<4x16x8x16xf32> to vector<64x128xf32> 1073 // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<64x127xf32> 1074 // CHECK: %[[C00:.*]] = arith.constant 0 : index 1075 // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[EMPT]]{{\[}}%[[C00]], %[[C00]]] 1076 // CHECK-SAME: {in_bounds = [true, false]} : vector<64x128xf32>, tensor<64x127xf32> 1077 // CHECK: return %[[WRIT]] : tensor<64x127xf32> 1078 %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32> 1079 return %0 : tensor<64x127xf32> 1080 } 1081 module attributes {transform.with_named_sequence} { 1082 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 1083 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 1084 transform.structured.vectorize %0 : !transform.any_op 1085 transform.yield 1086 } 1087 } 1088 1089 // ----- 1090 1091func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf32>, %dest: tensor<7x16xf32>) -> tensor<7x16xf32> { 1092 %0 = tensor.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32> 1093 return %0 : tensor<7x16xf32> 1094 } 1095 // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 1096 // CHECK: %[[C0:.*]] = arith.constant 0 : index 1097 // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<4x7x4xf32>, vector<4x7x4xf32> 1098 // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 0, 2] : vector<4x7x4xf32> to vector<7x4x4xf32> 1099 // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<7x4x4xf32> to vector<7x16xf32> 1100 // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<7x16xf32> 1101 // CHECK: %[[C00:.*]] = arith.constant 0 : index 1102 // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<7x16xf32>, tensor<7x16xf32> 1103 // CHECK: return %[[WRIT]] : tensor<7x16xf32> 1104 module attributes {transform.with_named_sequence} { 1105 transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { 1106 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op 1107 transform.structured.vectorize %0 : !transform.any_op 1108 transform.yield 1109 } 1110 } 1111