1// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file -allow-unregistered-dialect | FileCheck %s 2 3// CHECK-LABEL: create_vector_mask_to_constant_mask 4func.func @create_vector_mask_to_constant_mask() -> (vector<4x3xi1>) { 5 %c2 = arith.constant 2 : index 6 %c3 = arith.constant 3 : index 7 // CHECK: vector.constant_mask [3, 2] : vector<4x3xi1> 8 %0 = vector.create_mask %c3, %c2 : vector<4x3xi1> 9 return %0 : vector<4x3xi1> 10} 11// ----- 12 13// CHECK-LABEL: create_scalable_vector_mask_to_constant_mask 14func.func @create_scalable_vector_mask_to_constant_mask() -> (vector<[8]xi1>) { 15 %c-1 = arith.constant -1 : index 16 // CHECK: vector.constant_mask [0] : vector<[8]xi1> 17 %0 = vector.create_mask %c-1 : vector<[8]xi1> 18 return %0 : vector<[8]xi1> 19} 20 21// ----- 22 23// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation 24func.func @create_vector_mask_to_constant_mask_truncation() -> (vector<4x3xi1>) { 25 %c2 = arith.constant 2 : index 26 %c5 = arith.constant 5 : index 27 // CHECK: vector.constant_mask [4, 2] : vector<4x3xi1> 28 %0 = vector.create_mask %c5, %c2 : vector<4x3xi1> 29 return %0 : vector<4x3xi1> 30} 31 32// ----- 33 34// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_neg 35func.func @create_vector_mask_to_constant_mask_truncation_neg() -> (vector<4x3xi1>) { 36 %cneg2 = arith.constant -2 : index 37 %c5 = arith.constant 5 : index 38 // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1> 39 %0 = vector.create_mask %c5, %cneg2 : vector<4x3xi1> 40 return %0 : vector<4x3xi1> 41} 42 43// ----- 44 45// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_zero 46func.func @create_vector_mask_to_constant_mask_truncation_zero() -> (vector<4x3xi1>) { 47 %c2 = arith.constant 2 : index 48 %c0 = arith.constant 0 : index 49 // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1> 50 %0 = vector.create_mask %c0, %c2 : vector<4x3xi1> 51 return %0 : vector<4x3xi1> 52} 53 54// ----- 55 56// CHECK-LABEL: create_vector_mask_to_constant_mask_scalable_all_true 57func.func @create_vector_mask_to_constant_mask_scalable_all_true() -> (vector<8x[16]xi1>) { 58 %c8 = arith.constant 8 : index 59 %c16 = arith.constant 16 : index 60 %0 = vector.vscale 61 %1 = arith.muli %0, %c16 : index 62 // CHECK: vector.constant_mask [8, 16] : vector<8x[16]xi1> 63 %10 = vector.create_mask %c8, %1 : vector<8x[16]xi1> 64 return %10 : vector<8x[16]xi1> 65} 66 67// ----- 68 69// CHECK-LABEL: create_mask_transpose_to_transposed_create_mask 70// CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index, %[[DIM2:.*]]: index 71func.func @create_mask_transpose_to_transposed_create_mask( 72 %dim0: index, %dim1: index, %dim2: index) -> (vector<2x3x4xi1>, vector<4x2x3xi1>) { 73 // CHECK: vector.create_mask %[[DIM0]], %[[DIM1]], %[[DIM2]] : vector<2x3x4xi1> 74 // CHECK: vector.create_mask %[[DIM2]], %[[DIM0]], %[[DIM1]] : vector<4x2x3xi1> 75 // CHECK-NOT: vector.transpose 76 %0 = vector.create_mask %dim0, %dim1, %dim2 : vector<2x3x4xi1> 77 %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1> 78 return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1> 79} 80 81// ----- 82 83// CHECK-LABEL: extract_from_create_mask 84// CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index 85func.func @extract_from_create_mask(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> { 86 %c2 = arith.constant 2 : index 87 %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1> 88 // CHECK: vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[4]x[4]xi1> 89 // CHECK-NOT: vector.extract 90 %extract = vector.extract %mask[1] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1> 91 return %extract : vector<[4]x[4]xi1> 92} 93 94// ----- 95 96// CHECK-LABEL: extract_from_create_mask_all_false 97func.func @extract_from_create_mask_all_false(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> { 98 %c2 = arith.constant 2 : index 99 %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1> 100 // CHECK: arith.constant dense<false> : vector<[4]x[4]xi1> 101 // CHECK-NOT: vector.extract 102 %extract = vector.extract %mask[2] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1> 103 return %extract : vector<[4]x[4]xi1> 104} 105 106// ----- 107 108// CHECK-LABEL: extract_from_create_mask_leading_scalable 109// CHECK-SAME: %[[DIM0:.*]]: index 110func.func @extract_from_create_mask_leading_scalable(%dim0: index) -> vector<8xi1> { 111 %c3 = arith.constant 3 : index 112 %mask = vector.create_mask %c3, %dim0 : vector<[4]x8xi1> 113 // CHECK: vector.create_mask %[[DIM0]] : vector<8xi1> 114 // CHECK-NOT: vector.extract 115 %extract = vector.extract %mask[1] : vector<8xi1> from vector<[4]x8xi1> 116 return %extract : vector<8xi1> 117} 118 119// ----- 120 121// CHECK-LABEL: extract_from_create_mask_dynamic_position 122// CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index 123func.func @extract_from_create_mask_dynamic_position(%dim0: index, %index: index) -> vector<6xi1> { 124 %c4 = arith.constant 4 : index 125 %c3 = arith.constant 3 : index 126 %mask = vector.create_mask %c3, %c4, %dim0 : vector<4x4x6xi1> 127 // CHECK: vector.create_mask %[[DIM0]] : vector<6xi1> 128 // CHECK-NOT: vector.extract 129 %extract = vector.extract %mask[2, %index] : vector<6xi1> from vector<4x4x6xi1> 130 return %extract : vector<6xi1> 131} 132 133// ----- 134 135// CHECK-LABEL: @extract_scalar_poison_idx 136func.func @extract_scalar_poison_idx(%a: vector<4x5xf32>) -> f32 { 137 // CHECK-NOT: vector.extract 138 // CHECK-NEXT: ub.poison : f32 139 %0 = vector.extract %a[-1, 0] : f32 from vector<4x5xf32> 140 return %0 : f32 141} 142 143// ----- 144 145// CHECK-LABEL: @extract_vector_poison_idx 146func.func @extract_vector_poison_idx(%a: vector<4x5xf32>) -> vector<5xf32> { 147 // CHECK-NOT: vector.extract 148 // CHECK-NEXT: ub.poison : vector<5xf32> 149 %0 = vector.extract %a[-1] : vector<5xf32> from vector<4x5xf32> 150 return %0 : vector<5xf32> 151} 152 153// ----- 154 155// CHECK-LABEL: @extract_multiple_poison_idx 156func.func @extract_multiple_poison_idx(%a: vector<4x5x8xf32>) 157 -> vector<8xf32> { 158 // CHECK-NOT: vector.extract 159 // CHECK-NEXT: ub.poison : vector<8xf32> 160 %0 = vector.extract %a[-1, -1] : vector<8xf32> from vector<4x5x8xf32> 161 return %0 : vector<8xf32> 162} 163 164// ----- 165 166// CHECK-LABEL: extract_from_create_mask_dynamic_position_all_false 167// CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index 168func.func @extract_from_create_mask_dynamic_position_all_false(%dim0: index, %index: index) -> vector<6xi1> { 169 %c0 = arith.constant 0 : index 170 %c1 = arith.constant 1 : index 171 %mask = vector.create_mask %c1, %c0, %dim0 : vector<1x4x6xi1> 172 // CHECK: arith.constant dense<false> : vector<6xi1> 173 // CHECK-NOT: vector.extract 174 %extract = vector.extract %mask[0, %index] : vector<6xi1> from vector<1x4x6xi1> 175 return %extract : vector<6xi1> 176} 177 178// ----- 179 180// CHECK-LABEL: extract_from_create_mask_dynamic_position_unknown 181// CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index 182func.func @extract_from_create_mask_dynamic_position_unknown(%dim0: index, %index: index) -> vector<6xi1> { 183 %c2 = arith.constant 2 : index 184 %mask = vector.create_mask %c2, %dim0 : vector<4x6xi1> 185 // CHECK: %[[C2:.*]] = arith.constant 2 : index 186 // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[DIM0]] : vector<4x6xi1> 187 // CHECK-NEXT: vector.extract %[[MASK]][%[[INDEX]]] : vector<6xi1> from vector<4x6xi1> 188 %extract = vector.extract %mask[%index] : vector<6xi1> from vector<4x6xi1> 189 return %extract : vector<6xi1> 190} 191 192// ----- 193 194// CHECK-LABEL: extract_from_create_mask_mixed_position_unknown 195// CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index 196func.func @extract_from_create_mask_mixed_position_unknown(%dim0: index, %index0: index) -> vector<4xi1> { 197 %c2 = arith.constant 2 : index 198 %mask = vector.create_mask %c2, %c2, %dim0 : vector<2x4x4xi1> 199 // CHECK: %[[C2:.*]] = arith.constant 2 : index 200 // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[C2]], %[[DIM0]] : vector<2x4x4xi1> 201 // CHECK-NEXT: vector.extract %[[MASK]][1, %[[INDEX]]] : vector<4xi1> from vector<2x4x4xi1> 202 %extract = vector.extract %mask[1, %index0] : vector<4xi1> from vector<2x4x4xi1> 203 return %extract : vector<4xi1> 204} 205 206// ----- 207 208// CHECK-LABEL: extract_from_non_constant_create_mask 209// CHECK-SAME: %[[DIM0:.*]]: index 210func.func @extract_from_non_constant_create_mask(%dim0: index) -> vector<[2]xi1> { 211 %mask = vector.create_mask %dim0, %dim0 : vector<[2]x[2]xi1> 212 // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM0]] : vector<[2]x[2]xi1> 213 // CHECK-NEXT: vector.extract %[[MASK]][0] : vector<[2]xi1> from vector<[2]x[2]xi1> 214 %extract = vector.extract %mask[0] : vector<[2]xi1> from vector<[2]x[2]xi1> 215 return %extract : vector<[2]xi1> 216} 217 218// ----- 219 220// CHECK-LABEL: constant_mask_transpose_to_transposed_constant_mask 221func.func @constant_mask_transpose_to_transposed_constant_mask() -> (vector<2x3x4xi1>, vector<4x2x3xi1>) { 222 // CHECK: vector.constant_mask [1, 2, 3] : vector<2x3x4xi1> 223 // CHECK: vector.constant_mask [3, 1, 2] : vector<4x2x3xi1> 224 // CHECK-NOT: vector.transpose 225 %0 = vector.constant_mask [1, 2, 3] : vector<2x3x4xi1> 226 %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1> 227 return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1> 228} 229 230// ----- 231 232func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) { 233 %0 = vector.constant_mask [2, 2] : vector<4x3xi1> 234 %1 = vector.extract_strided_slice %0 235 {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} 236 : vector<4x3xi1> to vector<2x2xi1> 237 // CHECK: vector.constant_mask [2, 2] : vector<2x2xi1> 238 return %1 : vector<2x2xi1> 239} 240 241// ----- 242 243func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) { 244 %0 = vector.constant_mask [2, 2] : vector<4x3xi1> 245 %1 = vector.extract_strided_slice %0 246 {offsets = [1, 0], sizes = [2, 2], strides = [1, 1]} 247 : vector<4x3xi1> to vector<2x2xi1> 248 // CHECK: vector.constant_mask [1, 2] : vector<2x2xi1> 249 return %1 : vector<2x2xi1> 250} 251 252// ----- 253 254func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) { 255 %0 = vector.constant_mask [2, 2] : vector<4x3xi1> 256 %1 = vector.extract_strided_slice %0 257 {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]} 258 : vector<4x3xi1> to vector<2x2xi1> 259 // CHECK: vector.constant_mask [2, 1] : vector<2x2xi1> 260 return %1 : vector<2x2xi1> 261} 262 263// ----- 264 265func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) { 266 %0 = vector.constant_mask [2, 2] : vector<4x3xi1> 267 %1 = vector.extract_strided_slice %0 268 {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} 269 : vector<4x3xi1> to vector<2x2xi1> 270 // CHECK: vector.constant_mask [0, 0] : vector<2x2xi1> 271 return %1 : vector<2x2xi1> 272} 273 274// ----- 275 276func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) { 277 %0 = vector.constant_mask [2, 2] : vector<4x3xi1> 278 %1 = vector.extract_strided_slice %0 279 {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]} 280 : vector<4x3xi1> to vector<2x1xi1> 281 // CHECK: vector.constant_mask [0, 0] : vector<2x1xi1> 282 return %1 : vector<2x1xi1> 283} 284 285// ----- 286 287func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) { 288 %0 = vector.constant_mask [2, 2] : vector<4x3xi1> 289 %1 = vector.extract_strided_slice %0 290 {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]} 291 : vector<4x3xi1> to vector<2x1xi1> 292 // CHECK: vector.constant_mask [2, 1] : vector<2x1xi1> 293 return %1 : vector<2x1xi1> 294} 295 296// ----- 297 298func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) { 299 %0 = vector.constant_mask [2, 2] : vector<4x3xi1> 300 %1 = vector.extract_strided_slice %0 301 {offsets = [1, 1], sizes = [2, 1], strides = [1, 1]} 302 : vector<4x3xi1> to vector<2x1xi1> 303 // CHECK: vector.constant_mask [1, 1] : vector<2x1xi1> 304 return %1 : vector<2x1xi1> 305} 306 307// ----- 308 309// CHECK-LABEL: extract_strided_fold 310// CHECK-SAME: (%[[ARG:.*]]: vector<4x3xi1>) 311// CHECK-NEXT: return %[[ARG]] : vector<4x3xi1> 312func.func @extract_strided_fold(%arg : vector<4x3xi1>) -> (vector<4x3xi1>) { 313 %0 = vector.extract_strided_slice %arg 314 {offsets = [0, 0], sizes = [4, 3], strides = [1, 1]} 315 : vector<4x3xi1> to vector<4x3xi1> 316 return %0 : vector<4x3xi1> 317} 318 319// ----- 320 321// CHECK-LABEL: extract_strided_fold_insert 322// CHECK-SAME: (%[[ARG:.*]]: vector<4x4xf32> 323// CHECK-NEXT: return %[[ARG]] : vector<4x4xf32> 324func.func @extract_strided_fold_insert(%a: vector<4x4xf32>, %b: vector<8x16xf32>) 325 -> (vector<4x4xf32>) { 326 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} 327 : vector<4x4xf32> into vector<8x16xf32> 328 %1 = vector.extract_strided_slice %0 329 {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]} 330 : vector<8x16xf32> to vector<4x4xf32> 331 return %1 : vector<4x4xf32> 332} 333 334// ----- 335 336// Case where the vector inserted is a subset of the vector extracted. 337// CHECK-LABEL: extract_strided_fold_insert 338// CHECK-SAME: (%[[ARG0:.*]]: vector<6x4xf32> 339// CHECK-NEXT: %[[EXT:.*]] = vector.extract_strided_slice %[[ARG0]] 340// CHECK-SAME: {offsets = [0, 0], sizes = [4, 4], strides = [1, 1]} 341// CHECK-SAME: : vector<6x4xf32> to vector<4x4xf32> 342// CHECK-NEXT: return %[[EXT]] : vector<4x4xf32> 343func.func @extract_strided_fold_insert(%a: vector<6x4xf32>, %b: vector<8x16xf32>) 344 -> (vector<4x4xf32>) { 345 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} 346 : vector<6x4xf32> into vector<8x16xf32> 347 %1 = vector.extract_strided_slice %0 348 {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]} 349 : vector<8x16xf32> to vector<4x4xf32> 350 return %1 : vector<4x4xf32> 351} 352 353// ----- 354 355// Negative test where the extract is not a subset of the element inserted. 356// CHECK-LABEL: extract_strided_fold_negative 357// CHECK-SAME: (%[[ARG0:.*]]: vector<4x4xf32>, %[[ARG1:.*]]: vector<8x16xf32> 358// CHECK: %[[INS:.*]] = vector.insert_strided_slice %[[ARG0]], %[[ARG1]] 359// CHECK-SAME: {offsets = [2, 2], strides = [1, 1]} 360// CHECK-SAME: : vector<4x4xf32> into vector<8x16xf32> 361// CHECK: %[[EXT:.*]] = vector.extract_strided_slice %[[INS]] 362// CHECK-SAME: {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]} 363// CHECK-SAME: : vector<8x16xf32> to vector<6x4xf32> 364// CHECK-NEXT: return %[[EXT]] : vector<6x4xf32> 365func.func @extract_strided_fold_negative(%a: vector<4x4xf32>, %b: vector<8x16xf32>) 366 -> (vector<6x4xf32>) { 367 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} 368 : vector<4x4xf32> into vector<8x16xf32> 369 %1 = vector.extract_strided_slice %0 370 {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]} 371 : vector<8x16xf32> to vector<6x4xf32> 372 return %1 : vector<6x4xf32> 373} 374 375// ----- 376 377// Case where we need to go through 2 level of insert element. 378// CHECK-LABEL: extract_strided_fold_insert 379// CHECK-SAME: (%[[ARG0:.*]]: vector<2x8xf32>, %[[ARG1:.*]]: vector<1x4xf32>, 380// CHECK-NEXT: %[[EXT:.*]] = vector.extract_strided_slice %[[ARG1]] 381// CHECK-SAME: {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]} 382// CHECK-SAME: : vector<1x4xf32> to vector<1x1xf32> 383// CHECK-NEXT: return %[[EXT]] : vector<1x1xf32> 384func.func @extract_strided_fold_insert(%a: vector<2x8xf32>, %b: vector<1x4xf32>, 385 %c : vector<1x4xf32>) -> (vector<1x1xf32>) { 386 %0 = vector.insert_strided_slice %b, %a {offsets = [0, 1], strides = [1, 1]} 387 : vector<1x4xf32> into vector<2x8xf32> 388 %1 = vector.insert_strided_slice %c, %0 {offsets = [1, 0], strides = [1, 1]} 389 : vector<1x4xf32> into vector<2x8xf32> 390 %2 = vector.extract_strided_slice %1 391 {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]} 392 : vector<2x8xf32> to vector<1x1xf32> 393 return %2 : vector<1x1xf32> 394} 395 396// ----- 397 398// CHECK-LABEL: transpose_1D_identity 399// CHECK-SAME: ([[ARG:%.*]]: vector<4xf32>) 400func.func @transpose_1D_identity(%arg : vector<4xf32>) -> vector<4xf32> { 401 // CHECK-NOT: transpose 402 %0 = vector.transpose %arg, [0] : vector<4xf32> to vector<4xf32> 403 // CHECK-NEXT: return [[ARG]] 404 return %0 : vector<4xf32> 405} 406 407// ----- 408 409// CHECK-LABEL: transpose_2D_identity 410// CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>) 411func.func @transpose_2D_identity(%arg : vector<4x3xf32>) -> vector<4x3xf32> { 412 // CHECK-NOT: transpose 413 %0 = vector.transpose %arg, [0, 1] : vector<4x3xf32> to vector<4x3xf32> 414 // CHECK-NEXT: return [[ARG]] 415 return %0 : vector<4x3xf32> 416} 417 418// ----- 419 420// CHECK-LABEL: transpose_3D_identity 421// CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>) 422func.func @transpose_3D_identity(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> { 423 // CHECK-NOT: transpose 424 %0 = vector.transpose %arg, [0, 1, 2] : vector<4x3x2xf32> to vector<4x3x2xf32> 425 // CHECK-NEXT: return [[ARG]] 426 return %0 : vector<4x3x2xf32> 427} 428 429// ----- 430 431// CHECK-LABEL: transpose_2D_sequence 432// CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>) 433func.func @transpose_2D_sequence(%arg : vector<4x3xf32>) -> vector<4x3xf32> { 434 // CHECK-NOT: transpose 435 %0 = vector.transpose %arg, [1, 0] : vector<4x3xf32> to vector<3x4xf32> 436 %1 = vector.transpose %0, [0, 1] : vector<3x4xf32> to vector<3x4xf32> 437 %2 = vector.transpose %1, [1, 0] : vector<3x4xf32> to vector<4x3xf32> 438 %3 = vector.transpose %2, [0, 1] : vector<4x3xf32> to vector<4x3xf32> 439 // CHECK: [[ADD:%.*]] = arith.addf [[ARG]], [[ARG]] 440 %4 = arith.addf %2, %3 : vector<4x3xf32> 441 // CHECK-NEXT: return [[ADD]] 442 return %4 : vector<4x3xf32> 443} 444 445// ----- 446 447// CHECK-LABEL: transpose_3D_sequence 448// CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>) 449func.func @transpose_3D_sequence(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> { 450 // CHECK: [[T0:%.*]] = vector.transpose [[ARG]], [2, 1, 0] 451 %0 = vector.transpose %arg, [1, 2, 0] : vector<4x3x2xf32> to vector<3x2x4xf32> 452 %1 = vector.transpose %0, [1, 0, 2] : vector<3x2x4xf32> to vector<2x3x4xf32> 453 // CHECK: [[T1:%.*]] = vector.transpose %arg0, [2, 1, 0] 454 %2 = vector.transpose %1, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32> 455 %3 = vector.transpose %2, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32> 456 // CHECK: [[MUL:%.*]] = arith.mulf [[T0]], [[T1]] 457 %4 = arith.mulf %1, %3 : vector<2x3x4xf32> 458 // CHECK: [[T5:%.*]] = vector.transpose [[MUL]], [2, 1, 0] 459 %5 = vector.transpose %4, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32> 460 // CHECK-NOT: transpose 461 %6 = vector.transpose %3, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32> 462 // CHECK: [[ADD:%.*]] = arith.addf [[T5]], [[ARG]] 463 %7 = arith.addf %5, %6 : vector<4x3x2xf32> 464 // CHECK-NEXT: return [[ADD]] 465 return %7 : vector<4x3x2xf32> 466} 467 468// ----- 469 470// CHECK-LABEL: cast_transfers 471func.func @cast_transfers(%A: memref<4x8xf32>) -> (vector<4x8xf32>) { 472 %c0 = arith.constant 0 : index 473 %f0 = arith.constant 0.0 : f32 474 %0 = memref.cast %A : memref<4x8xf32> to memref<?x?xf32> 475 476 // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<4x8xf32>, vector<4x8xf32> 477 %1 = vector.transfer_read %0[%c0, %c0], %f0 : memref<?x?xf32>, vector<4x8xf32> 478 479 // CHECK: vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, memref<4x8xf32> 480 vector.transfer_write %1, %0[%c0, %c0] : vector<4x8xf32>, memref<?x?xf32> 481 return %1 : vector<4x8xf32> 482} 483 484// ----- 485 486// CHECK-LABEL: cast_transfers 487func.func @cast_transfers(%A: tensor<4x8xf32>) -> (vector<4x8xf32>) { 488 %c0 = arith.constant 0 : index 489 %f0 = arith.constant 0.0 : f32 490 %0 = tensor.cast %A : tensor<4x8xf32> to tensor<?x?xf32> 491 492 // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<4x8xf32>, vector<4x8xf32> 493 %1 = vector.transfer_read %0[%c0, %c0], %f0 : tensor<?x?xf32>, vector<4x8xf32> 494 495 return %1 : vector<4x8xf32> 496} 497 498// ----- 499 500// CHECK-LABEL: func @insert_extract_transpose_2d( 501// CHECK-SAME: %[[V:[a-zA-Z0-9]*]]: vector<2x3xf32>, 502// CHECK-SAME: %[[F0:[a-zA-Z0-9]*]]: f32, 503// CHECK-SAME: %[[F1:[a-zA-Z0-9]*]]: f32, 504// CHECK-SAME: %[[F2:[a-zA-Z0-9]*]]: f32, 505// CHECK-SAME: %[[F3:[a-zA-Z0-9]*]]: f32 506func.func @insert_extract_transpose_2d( 507 %v: vector<2x3xf32>, %f0: f32, %f1: f32, %f2: f32, %f3: f32) 508-> (f32, f32, f32) 509{ 510 %0 = vector.insert %f0, %v[0, 0] : f32 into vector<2x3xf32> 511 %1 = vector.insert %f1, %0[0, 1] : f32 into vector<2x3xf32> 512 %2 = vector.insert %f2, %1[1, 0] : f32 into vector<2x3xf32> 513 %3 = vector.insert %f3, %2[1, 1] : f32 into vector<2x3xf32> 514 %4 = vector.transpose %3, [1, 0] : vector<2x3xf32> to vector<3x2xf32> 515 %5 = vector.insert %f3, %4[1, 0] : f32 into vector<3x2xf32> 516 %6 = vector.transpose %5, [1, 0] : vector<3x2xf32> to vector<2x3xf32> 517 518 // Expected %f2 from %2 = vector.insert %f2, %1[1, 0]. 519 %r1 = vector.extract %3[1, 0] : f32 from vector<2x3xf32> 520 521 // Expected %f1 from %1 = vector.insert %f1, %0[0, 1] followed by 522 // transpose [1, 0]. 523 %r2 = vector.extract %4[1, 0] : f32 from vector<3x2xf32> 524 525 // Expected %f2 from %2 = vector.insert %f2, %1[1, 0] followed by double 526 // transpose [1, 0]. 527 %r3 = vector.extract %6[1, 0] : f32 from vector<2x3xf32> 528 529 // CHECK-NEXT: return %[[F2]], %[[F1]], %[[F2]] : f32, f32, f32 530 return %r1, %r2, %r3 : f32, f32, f32 531} 532 533// ----- 534 535// CHECK-LABEL: insert_extract_chain 536// CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32> 537// CHECK-SAME: %[[V34:[a-zA-Z0-9]*]]: vector<3x4xf32> 538// CHECK-SAME: %[[V4:[a-zA-Z0-9]*]]: vector<4xf32> 539func.func @insert_extract_chain(%v234: vector<2x3x4xf32>, %v34: vector<3x4xf32>, %v4: vector<4xf32>) 540 -> (vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>) { 541 // CHECK-NEXT: %[[A34:.*]] = vector.insert 542 %A34 = vector.insert %v34, %v234[0]: vector<3x4xf32> into vector<2x3x4xf32> 543 // CHECK-NEXT: %[[B34:.*]] = vector.insert 544 %B34 = vector.insert %v34, %A34[1]: vector<3x4xf32> into vector<2x3x4xf32> 545 // CHECK-NEXT: %[[A4:.*]] = vector.insert 546 %A4 = vector.insert %v4, %B34[1, 0]: vector<4xf32> into vector<2x3x4xf32> 547 // CHECK-NEXT: %[[B4:.*]] = vector.insert 548 %B4 = vector.insert %v4, %A4[1, 1]: vector<4xf32> into vector<2x3x4xf32> 549 550 // Case 2.a. [1, 1] == insertpos ([1, 1]) 551 // Match %A4 insertionpos and fold to its source(i.e. %V4). 552 %r0 = vector.extract %B4[1, 1]: vector<4xf32> from vector<2x3x4xf32> 553 554 // Case 3.a. insertpos ([1]) is a prefix of [1, 0]. 555 // Traverse %B34 to its source(i.e. %V34@[*0*]). 556 // CHECK-NEXT: %[[R1:.*]] = vector.extract %[[V34]][0] 557 %r1 = vector.extract %B34[1, 0]: vector<4xf32> from vector<2x3x4xf32> 558 559 // Case 4. [1] is a prefix of insertpos ([1, 1]). 560 // Cannot traverse %B4. 561 // CHECK-NEXT: %[[R2:.*]] = vector.extract %[[B4]][1] 562 %r2 = vector.extract %B4[1]: vector<3x4xf32> from vector<2x3x4xf32> 563 564 // Case 5. [0] is disjoint from insertpos ([1, 1]). 565 // Traverse %B4 to its dest(i.e. %A4@[0]). 566 // Traverse %A4 to its dest(i.e. %B34@[0]). 567 // Traverse %B34 to its dest(i.e. %A34@[0]). 568 // Match %A34 insertionpos and fold to its source(i.e. %V34). 569 %r3 = vector.extract %B4[0]: vector<3x4xf32> from vector<2x3x4xf32> 570 571 // CHECK: return %[[V4]], %[[R1]], %[[R2]], %[[V34]] 572 return %r0, %r1, %r2, %r3: 573 vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32> 574} 575 576// ----- 577 578// CHECK-LABEL: func @insert_extract_transpose_3d( 579// CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32> 580func.func @insert_extract_transpose_3d( 581 %v234: vector<2x3x4xf32>, %v43: vector<4x3xf32>, %f0: f32) 582 -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>) { 583 584 %a432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32> 585 %b432 = vector.insert %f0, %a432[0, 0, 1] : f32 into vector<4x3x2xf32> 586 %c234 = vector.transpose %b432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32> 587 // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1] 588 // Case 5. %b432 = insert [0,0,1] (inter([.,2,1], [.,0,1]) == 0) prop to %v432 589 // Case 1. %a432 = transpose [2,1,0] posWithSentinels [-1,2,1] -> [1,2,-1] 590 // can extract directly from %v234, the rest folds. 591 // CHECK: %[[R0:.*]] = vector.extract %[[V234]][1, 2] 592 %r0 = vector.extract %c234[1, 2] : vector<4xf32> from vector<2x3x4xf32> 593 594 // CHECK-NEXT: vector.transpose 595 // CHECK-NEXT: vector.insert 596 // CHECK-NEXT: %[[F234:.*]] = vector.transpose 597 %d432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32> 598 %e432 = vector.insert %f0, %d432[0, 2, 1] : f32 into vector<4x3x2xf32> 599 %f234 = vector.transpose %e432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32> 600 // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1] 601 // Case 4. %b432 = insert [0,0,1] (inter([.,2,1], [.,2,1]) != 0) 602 // Bail, cannot do better than the current. 603 // CHECK: %[[R1:.*]] = vector.extract %[[F234]] 604 %r1 = vector.extract %f234[1, 2] : vector<4xf32> from vector<2x3x4xf32> 605 606 // CHECK-NEXT: vector.transpose 607 // CHECK-NEXT: vector.insert 608 // CHECK-NEXT: %[[H234:.*]] = vector.transpose 609 %g243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32> 610 %h243 = vector.insert %v43, %g243[0] : vector<4x3xf32> into vector<2x4x3xf32> 611 %i234 = vector.transpose %h243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32> 612 // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1] 613 // Case 3.b. %b432 = insert [0] is prefix of [0,.,.] but internal transpose. 614 // Bail, cannot do better than the current. 615 // CHECK: %[[R2:.*]] = vector.extract %[[H234]][0, 1] 616 %r2 = vector.extract %i234[0, 1] : vector<4xf32> from vector<2x3x4xf32> 617 618 // CHECK-NEXT: vector.transpose 619 // CHECK-NEXT: vector.insert 620 // CHECK-NEXT: %[[K234:.*]] = vector.transpose 621 %j243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32> 622 %k243 = vector.insert %v43, %j243[0] : vector<4x3xf32> into vector<2x4x3xf32> 623 %l234 = vector.transpose %k243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32> 624 // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1] 625 // Case 2.b. %b432 = insert [0] == [0,.,.] but internal transpose. 626 // Bail, cannot do better than the current. 627 // CHECK: %[[R3:.*]] = vector.extract %[[K234]][0] 628 %r3 = vector.extract %l234[0] : vector<3x4xf32> from vector<2x3x4xf32> 629 630 // CHECK-NEXT: return %[[R0]], %[[R1]], %[[R2]], %[[R3]] 631 return %r0, %r1, %r2, %r3: vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32> 632} 633 634// ----- 635 636// CHECK-LABEL: fold_extracts 637// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32> 638func.func @fold_extracts(%a : vector<3x4x5x6xf32>) -> (f32, vector<4x5x6xf32>) { 639 %b = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32> 640 %c = vector.extract %b[1, 2] : vector<6xf32> from vector<4x5x6xf32> 641 // CHECK-NEXT: vector.extract %[[A]][0, 1, 2, 3] : f32 from vector<3x4x5x6xf32> 642 %d = vector.extract %c[3] : f32 from vector<6xf32> 643 644 // CHECK-NEXT: vector.extract %[[A]][0] : vector<4x5x6xf32> from vector<3x4x5x6xf32> 645 %e = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32> 646 647 // CHECK-NEXT: return 648 return %d, %e : f32, vector<4x5x6xf32> 649} 650 651// ----- 652 653// CHECK-LABEL: fold_extract_transpose 654// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32> 655// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: vector<3x6x5x6xf32> 656func.func @fold_extract_transpose( 657 %a : vector<3x4x5x6xf32>, %b : vector<3x6x5x6xf32>) -> ( 658 vector<6xf32>, vector<6xf32>, vector<6xf32>) { 659 // [3] is a proper most minor identity map in transpose. 660 // Permutation is a self inverse and we have. 661 // [0, 2, 1] ^ -1 o [0, 1, 2] = [0, 2, 1] o [0, 1, 2] 662 // = [0, 2, 1] 663 // CHECK-NEXT: vector.extract %[[A]][0, 2, 1] : vector<6xf32> from vector<3x4x5x6xf32> 664 %0 = vector.transpose %a, [0, 2, 1, 3] : vector<3x4x5x6xf32> to vector<3x5x4x6xf32> 665 %1 = vector.extract %0[0, 1, 2] : vector<6xf32> from vector<3x5x4x6xf32> 666 667 // [3] is a proper most minor identity map in transpose. 668 // Permutation is a not self inverse and we have. 669 // [1, 2, 0] ^ -1 o [0, 1, 2] = [2, 0, 1] o [0, 1, 2] 670 // = [2, 0, 1] 671 // CHECK-NEXT: vector.extract %[[A]][2, 0, 1] : vector<6xf32> from vector<3x4x5x6xf32> 672 %2 = vector.transpose %a, [1, 2, 0, 3] : vector<3x4x5x6xf32> to vector<4x5x3x6xf32> 673 %3 = vector.extract %2[0, 1, 2] : vector<6xf32> from vector<4x5x3x6xf32> 674 675 // Not a minor identity map so intra-vector level has been permuted 676 // CHECK-NEXT: vector.transpose %[[B]], [0, 2, 3, 1] 677 // CHECK-NEXT: vector.extract %{{.*}}[0, 1, 2] 678 %4 = vector.transpose %b, [0, 2, 3, 1] : vector<3x6x5x6xf32> to vector<3x5x6x6xf32> 679 %5 = vector.extract %4[0, 1, 2] : vector<6xf32> from vector<3x5x6x6xf32> 680 681 return %1, %3, %5 : vector<6xf32>, vector<6xf32>, vector<6xf32> 682} 683 684// ----- 685 686// CHECK-LABEL: fold_extract_broadcast 687// CHECK-SAME: %[[A:.*]]: f32 688// CHECK: return %[[A]] : f32 689func.func @fold_extract_broadcast(%a : f32) -> f32 { 690 %b = vector.broadcast %a : f32 to vector<1x2x4xf32> 691 %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32> 692 return %r : f32 693} 694 695// ----- 696 697// CHECK-LABEL: fold_extract_broadcast_0dvec 698// CHECK-SAME: %[[A:.*]]: vector<f32> 699// CHECK: %[[B:.+]] = vector.extractelement %[[A]][] : vector<f32> 700// CHECK: return %[[B]] : f32 701func.func @fold_extract_broadcast_0dvec(%a : vector<f32>) -> f32 { 702 %b = vector.broadcast %a : vector<f32> to vector<1x2x4xf32> 703 %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32> 704 return %r : f32 705} 706 707// ----- 708 709// CHECK-LABEL: fold_extract_broadcast_negative 710// CHECK: vector.broadcast %{{.*}} : vector<1x1xf32> to vector<1x1x4xf32> 711// CHECK: vector.extract %{{.*}}[0, 0] : vector<4xf32> from vector<1x1x4xf32> 712func.func @fold_extract_broadcast_negative(%a : vector<1x1xf32>) -> vector<4xf32> { 713 %b = vector.broadcast %a : vector<1x1xf32> to vector<1x1x4xf32> 714 %r = vector.extract %b[0, 0] : vector<4xf32> from vector<1x1x4xf32> 715 return %r : vector<4xf32> 716} 717 718// ----- 719 720// CHECK-LABEL: fold_extract_splat 721// CHECK-SAME: %[[A:.*]]: f32 722// CHECK: return %[[A]] : f32 723func.func @fold_extract_splat(%a : f32) -> f32 { 724 %b = vector.splat %a : vector<1x2x4xf32> 725 %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32> 726 return %r : f32 727} 728 729// ----- 730 731// CHECK-LABEL: fold_extract_broadcast_vector 732// CHECK-SAME: %[[A:.*]]: vector<4xf32> 733// CHECK: return %[[A]] : vector<4xf32> 734func.func @fold_extract_broadcast_vector(%a : vector<4xf32>) -> vector<4xf32> { 735 %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32> 736 %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32> 737 return %r : vector<4xf32> 738} 739 740// ----- 741 742// CHECK-LABEL: fold_extract_broadcast 743// CHECK-SAME: %[[A:.*]]: vector<4xf32> 744// CHECK: %[[R:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32> 745// CHECK: return %[[R]] : f32 746func.func @fold_extract_broadcast(%a : vector<4xf32>) -> f32 { 747 %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32> 748 %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32> 749 return %r : f32 750} 751 752// ----- 753 754// CHECK-LABEL: fold_extract_broadcast 755// CHECK: %[[B:.*]] = vector.broadcast %{{.*}} : f32 to vector<4xf32> 756// CHECK: return %[[B]] : vector<4xf32> 757func.func @fold_extract_broadcast(%a : f32) -> vector<4xf32> { 758 %b = vector.broadcast %a : f32 to vector<1x2x4xf32> 759 %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32> 760 return %r : vector<4xf32> 761} 762 763// ----- 764 765// CHECK-LABEL: fold_extract_broadcast 766// CHECK-SAME: %[[A:.*]]: vector<1xf32> 767// CHECK: %[[R:.*]] = vector.broadcast %[[A]] : vector<1xf32> to vector<8xf32> 768// CHECK: return %[[R]] : vector<8xf32> 769func.func @fold_extract_broadcast(%a : vector<1xf32>) -> vector<8xf32> { 770 %b = vector.broadcast %a : vector<1xf32> to vector<1x8xf32> 771 %r = vector.extract %b[0] : vector<8xf32> from vector<1x8xf32> 772 return %r : vector<8xf32> 773} 774// ----- 775 776// CHECK-LABEL: @fold_extract_shuffle 777// CHECK-SAME: %[[A:.*]]: vector<8xf32>, %[[B:.*]]: vector<8xf32> 778// CHECK-NOT: vector.shuffle 779// CHECK: vector.extract %[[A]][0] : f32 from vector<8xf32> 780// CHECK: vector.extract %[[B]][0] : f32 from vector<8xf32> 781// CHECK: vector.extract %[[A]][7] : f32 from vector<8xf32> 782// CHECK: vector.extract %[[B]][7] : f32 from vector<8xf32> 783func.func @fold_extract_shuffle(%a : vector<8xf32>, %b : vector<8xf32>) 784 -> (f32, f32, f32, f32) { 785 %shuffle = vector.shuffle %a, %b [0, 8, 7, 15] : vector<8xf32>, vector<8xf32> 786 %e0 = vector.extract %shuffle[0] : f32 from vector<4xf32> 787 %e1 = vector.extract %shuffle[1] : f32 from vector<4xf32> 788 %e2 = vector.extract %shuffle[2] : f32 from vector<4xf32> 789 %e3 = vector.extract %shuffle[3] : f32 from vector<4xf32> 790 return %e0, %e1, %e2, %e3 : f32, f32, f32, f32 791} 792 793// ----- 794 795// CHECK-LABEL: func @fold_extract_shapecast 796// CHECK-SAME: (%[[A0:.*]]: vector<5x1x3x2xf32>, %[[A1:.*]]: vector<8x4x2xf32> 797// CHECK: %[[R0:.*]] = vector.extract %[[A0]][1, 0, 1, 1] : f32 from vector<5x1x3x2xf32> 798// CHECK: %[[R1:.*]] = vector.extract %[[A0]][1, 0, 2] : vector<2xf32> from vector<5x1x3x2xf32> 799// CHECK: %[[R2:.*]] = vector.extract %[[A1]][7] : vector<4x2xf32> from vector<8x4x2xf32> 800// CHECK: return %[[R0]], %[[R1]], %[[R2]], %[[A1]] : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32> 801func.func @fold_extract_shapecast(%arg0 : vector<5x1x3x2xf32>, 802 %arg1 : vector<8x4x2xf32>) 803 -> (f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>) { 804 %0 = vector.shape_cast %arg0 : vector<5x1x3x2xf32> to vector<15x2xf32> 805 %1 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<4x2x4x2xf32> 806 %2 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<1x8x4x2xf32> 807 %r1 = vector.extract %0[4, 1] : f32 from vector<15x2xf32> 808 %r2 = vector.extract %0[5] : vector<2xf32> from vector<15x2xf32> 809 %r3 = vector.extract %1[3, 1] : vector<4x2xf32> from vector<4x2x4x2xf32> 810 %r4 = vector.extract %2[0] : vector<8x4x2xf32> from vector<1x8x4x2xf32> 811 return %r1, %r2, %r3, %r4 : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32> 812} 813 814// ----- 815 816// CHECK-LABEL: fold_extract_shapecast_0d_result 817// CHECK-SAME: %[[IN:.*]]: vector<1x1x1xf32> 818// CHECK: %[[R:.*]] = vector.extract %[[IN]][0, 0, 0] : f32 from vector<1x1x1xf32> 819// CHECK: return %[[R]] : f32 820func.func @fold_extract_shapecast_0d_result(%arg0 : vector<1x1x1xf32>) -> f32 { 821 %0 = vector.shape_cast %arg0 : vector<1x1x1xf32> to vector<f32> 822 %r = vector.extract %0[] : f32 from vector<f32> 823 return %r : f32 824} 825 826// ----- 827 828// CHECK-LABEL: fold_extract_shapecast_0d_source 829// CHECK-SAME: %[[IN:.*]]: vector<f32> 830// CHECK: %[[R:.*]] = vector.extract %[[IN]][] : f32 from vector<f32> 831// CHECK: return %[[R]] : f32 832func.func @fold_extract_shapecast_0d_source(%arg0 : vector<f32>) -> f32 { 833 %0 = vector.shape_cast %arg0 : vector<f32> to vector<1xf32> 834 %r = vector.extract %0[0] : f32 from vector<1xf32> 835 return %r : f32 836} 837 838// ----- 839 840// CHECK-LABEL: fold_extract_shapecast_negative 841// CHECK: %[[V:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<2x4x2xf32> 842// CHECK: %[[R:.*]] = vector.extract %[[V]][1] : vector<4x2xf32> from vector<2x4x2xf32> 843// CHECK: return %[[R]] : vector<4x2xf32> 844func.func @fold_extract_shapecast_negative(%arg0 : vector<16xf32>) -> vector<4x2xf32> { 845 %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<2x4x2xf32> 846 %r = vector.extract %0[1] : vector<4x2xf32> from vector<2x4x2xf32> 847 return %r : vector<4x2xf32> 848} 849 850// ----- 851 852// CHECK-LABEL: fold_extract_shapecast_to_shapecast 853// CHECK-SAME: (%[[ARG:.+]]: vector<3x4xf32>) 854// CHECK: %[[R:.+]] = vector.shape_cast %[[ARG]] : vector<3x4xf32> to vector<12xf32> 855// CHECK: return %[[R]] 856func.func @fold_extract_shapecast_to_shapecast(%arg0 : vector<3x4xf32>) -> vector<12xf32> { 857 %0 = vector.shape_cast %arg0 : vector<3x4xf32> to vector<1x12xf32> 858 %r = vector.extract %0[0] : vector<12xf32> from vector<1x12xf32> 859 return %r : vector<12xf32> 860} 861 862// ----- 863 864// CHECK-LABEL: func @extract_no_fold_scalar_to_0d( 865// CHECK-SAME: %[[v:.*]]: vector<f32>) 866// CHECK: %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector<f32> 867// CHECK: return %[[extract]] 868func.func @extract_no_fold_scalar_to_0d(%v: vector<f32>) -> f32 { 869 %0 = vector.extract %v[] : f32 from vector<f32> 870 return %0 : f32 871} 872 873// ----- 874 875// CHECK-LABEL: func @insert_fold_same_rank( 876// CHECK-SAME: %[[v:.*]]: vector<2x2xf32>) 877// CHECK: %[[CST:.+]] = arith.constant 878// CHECK-SAME: : vector<2x2xf32> 879// CHECK-NOT: vector.insert 880// CHECK: return %[[CST]] 881func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> { 882 %cst = arith.constant dense<0.000000e+00> : vector<2x2xf32> 883 %0 = vector.insert %cst, %v [] : vector<2x2xf32> into vector<2x2xf32> 884 return %0 : vector<2x2xf32> 885} 886 887// ----- 888 889// CHECK-LABEL: func @insert_no_fold_scalar_to_0d( 890// CHECK-SAME: %[[v:.*]]: vector<f32>) 891// CHECK: %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector<f32> 892// CHECK: return %[[extract]] 893func.func @insert_no_fold_scalar_to_0d(%v: vector<f32>) -> vector<f32> { 894 %cst = arith.constant 0.000000e+00 : f32 895 %0 = vector.insert %cst, %v [] : f32 into vector<f32> 896 return %0 : vector<f32> 897} 898 899// ----- 900 901// CHECK-LABEL: dont_fold_expand_collapse 902// CHECK: %[[A:.*]] = vector.shape_cast %{{.*}} : vector<1x1x64xf32> to vector<1x1x8x8xf32> 903// CHECK: %[[B:.*]] = vector.shape_cast %{{.*}} : vector<1x1x8x8xf32> to vector<8x8xf32> 904// CHECK: return %[[B]] : vector<8x8xf32> 905func.func @dont_fold_expand_collapse(%arg0: vector<1x1x64xf32>) -> vector<8x8xf32> { 906 %0 = vector.shape_cast %arg0 : vector<1x1x64xf32> to vector<1x1x8x8xf32> 907 %1 = vector.shape_cast %0 : vector<1x1x8x8xf32> to vector<8x8xf32> 908 return %1 : vector<8x8xf32> 909} 910 911// ----- 912 913// CHECK-LABEL: func @fold_broadcast_shapecast 914// CHECK-SAME: (%[[V:.+]]: vector<4xf32>) 915// CHECK: return %[[V]] 916func.func @fold_broadcast_shapecast(%arg0: vector<4xf32>) -> vector<4xf32> { 917 %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x1x4xf32> 918 %1 = vector.shape_cast %0 : vector<1x1x4xf32> to vector<4xf32> 919 return %1 : vector<4xf32> 920} 921 922// ----- 923 924// CHECK-LABEL: func @canonicalize_broadcast_shapecast_scalar 925// CHECK: vector.broadcast 926// CHECK-NOT: vector.shape_cast 927func.func @canonicalize_broadcast_shapecast_scalar(%arg0: f32) -> vector<1xf32> { 928 %0 = vector.broadcast %arg0 : f32 to vector<1x1x1xf32> 929 %1 = vector.shape_cast %0 : vector<1x1x1xf32> to vector<1xf32> 930 return %1 : vector<1xf32> 931} 932 933// ----- 934 935// CHECK-LABEL: func @dont_fold_broadcast_shapecast_diff_shape 936// CHECK: vector.broadcast 937// CHECK: vector.shape_cast 938func.func @dont_fold_broadcast_shapecast_diff_shape(%arg0: vector<4xf32>) -> vector<8xf32> { 939 %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x2x4xf32> 940 %1 = vector.shape_cast %0 : vector<1x2x4xf32> to vector<8xf32> 941 return %1 : vector<8xf32> 942} 943 944// ----- 945 946// CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_broadcast 947// CHECK: vector.broadcast 948// CHECK-NOT: vector.shape_cast 949func.func @canonicalize_broadcast_shapecast_to_broadcast(%arg0: vector<3xf32>) -> vector<8x3xf32> { 950 %0 = vector.broadcast %arg0 : vector<3xf32> to vector<2x4x3xf32> 951 %1 = vector.shape_cast %0 : vector<2x4x3xf32> to vector<8x3xf32> 952 return %1 : vector<8x3xf32> 953} 954 955// ----- 956 957// CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_shapecast 958// CHECK-NOT: vector.broadcast 959// CHECK: vector.shape_cast {{.+}} : vector<3x4xf32> to vector<1x12xf32> 960func.func @canonicalize_broadcast_shapecast_to_shapecast(%arg0: vector<3x4xf32>) -> vector<1x12xf32> { 961 %0 = vector.broadcast %arg0 : vector<3x4xf32> to vector<1x1x3x4xf32> 962 %1 = vector.shape_cast %0 : vector<1x1x3x4xf32> to vector<1x12xf32> 963 return %1 : vector<1x12xf32> 964} 965 966// ----- 967 968// CHECK-LABEL: fold_vector_transfer_masks 969func.func @fold_vector_transfer_masks(%A: memref<?x?xf32>) -> (vector<4x8xf32>, vector<4x[4]xf32>) { 970 // CHECK: %[[C0:.+]] = arith.constant 0 : index 971 %c0 = arith.constant 0 : index 972 // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 973 %f0 = arith.constant 0.0 : f32 974 975 %mask = vector.constant_mask [8, 4] : vector<8x4xi1> 976 977 %arith_all_true_mask = arith.constant dense<true> : vector<4x[4]xi1> 978 979 // CHECK: vector.transfer_read %{{.*}}, %[[F0]] {permutation_map 980 %1 = vector.transfer_read %A[%c0, %c0], %f0, %mask 981 {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : memref<?x?xf32>, vector<4x8xf32> 982 983 // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {permutation_map 984 vector.transfer_write %1, %A[%c0, %c0], %mask 985 {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<4x8xf32>, memref<?x?xf32> 986 987 // CHECK: vector.transfer_read %{{.*}}, %[[F0]] : 988 %2 = vector.transfer_read %A[%c0, %c0], %f0, %arith_all_true_mask : memref<?x?xf32>, vector<4x[4]xf32> 989 990 // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] : 991 vector.transfer_write %2, %A[%c0, %c0], %arith_all_true_mask : vector<4x[4]xf32>, memref<?x?xf32> 992 993 // CHECK: return 994 return %1, %2 : vector<4x8xf32>, vector<4x[4]xf32> 995} 996 997// ----- 998 999// CHECK-LABEL: fold_vector_transfers 1000func.func @fold_vector_transfers(%A: memref<?x8xf32>) -> (vector<4x8xf32>, vector<4x9xf32>) { 1001 %c0 = arith.constant 0 : index 1002 %f0 = arith.constant 0.0 : f32 1003 1004 // CHECK: vector.transfer_read %{{.*}} {in_bounds = [false, true]} 1005 %1 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x8xf32> 1006 1007 // CHECK: vector.transfer_write %{{.*}} {in_bounds = [false, true]} 1008 vector.transfer_write %1, %A[%c0, %c0] : vector<4x8xf32>, memref<?x8xf32> 1009 1010 // Both dims may be out-of-bounds, attribute is elided. 1011 // CHECK: vector.transfer_read %{{.*}} 1012 // CHECK-NOT: in_bounds 1013 %2 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x9xf32> 1014 1015 // Both dims may be out-of-bounds, attribute is elided. 1016 // CHECK: vector.transfer_write %{{.*}} 1017 // CHECK-NOT: in_bounds 1018 vector.transfer_write %2, %A[%c0, %c0] : vector<4x9xf32>, memref<?x8xf32> 1019 1020 // CHECK: return 1021 return %1, %2 : vector<4x8xf32>, vector<4x9xf32> 1022} 1023 1024// ----- 1025 1026// CHECK-LABEL: bitcast_folding 1027// CHECK-SAME: %[[A:.*]]: vector<4x8xf32> 1028// CHECK-SAME: %[[B:.*]]: vector<2xi32> 1029// CHECK: return %[[A]], %[[B]] : vector<4x8xf32>, vector<2xi32> 1030func.func @bitcast_folding(%I1: vector<4x8xf32>, %I2: vector<2xi32>) -> (vector<4x8xf32>, vector<2xi32>) { 1031 %0 = vector.bitcast %I1 : vector<4x8xf32> to vector<4x8xf32> 1032 %1 = vector.bitcast %I2 : vector<2xi32> to vector<4xi16> 1033 %2 = vector.bitcast %1 : vector<4xi16> to vector<2xi32> 1034 return %0, %2 : vector<4x8xf32>, vector<2xi32> 1035} 1036 1037// CHECK-LABEL: func @bitcast_f16_to_f32 1038// bit pattern: 0x40004000 1039// CHECK-DAG: %[[CST1:.+]] = arith.constant dense<2.00390625> : vector<4xf32> 1040// bit pattern: 0x00000000 1041// CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0.000000e+00> : vector<4xf32> 1042// CHECK: return %[[CST0]], %[[CST1]] 1043func.func @bitcast_f16_to_f32() -> (vector<4xf32>, vector<4xf32>) { 1044 %cst0 = arith.constant dense<0.0> : vector<8xf16> // bit pattern: 0x0000 1045 %cst1 = arith.constant dense<2.0> : vector<8xf16> // bit pattern: 0x4000 1046 %cast0 = vector.bitcast %cst0: vector<8xf16> to vector<4xf32> 1047 %cast1 = vector.bitcast %cst1: vector<8xf16> to vector<4xf32> 1048 return %cast0, %cast1: vector<4xf32>, vector<4xf32> 1049} 1050 1051// CHECK-LABEL: func @bitcast_i8_to_i32 1052// bit pattern: 0xA0A0A0A0 1053// CHECK-DAG: %[[CST1:.+]] = arith.constant dense<-1600085856> : vector<4xi32> 1054// bit pattern: 0x00000000 1055// CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0> : vector<4xi32> 1056// CHECK: return %[[CST0]], %[[CST1]] 1057func.func @bitcast_i8_to_i32() -> (vector<4xi32>, vector<4xi32>) { 1058 %cst0 = arith.constant dense<0> : vector<16xi8> // bit pattern: 0x00 1059 %cst1 = arith.constant dense<160> : vector<16xi8> // bit pattern: 0xA0 1060 %cast0 = vector.bitcast %cst0: vector<16xi8> to vector<4xi32> 1061 %cast1 = vector.bitcast %cst1: vector<16xi8> to vector<4xi32> 1062 return %cast0, %cast1: vector<4xi32>, vector<4xi32> 1063} 1064 1065// ----- 1066 1067// CHECK-LABEL: broadcast_folding1 1068// CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4xi32> 1069// CHECK-NOT: vector.broadcast 1070// CHECK: return %[[CST]] 1071func.func @broadcast_folding1() -> vector<4xi32> { 1072 %0 = arith.constant 42 : i32 1073 %1 = vector.broadcast %0 : i32 to vector<4xi32> 1074 return %1 : vector<4xi32> 1075} 1076 1077// ----- 1078 1079// CHECK-LABEL: @broadcast_folding2 1080// CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4x16xi32> 1081// CHECK-NOT: vector.broadcast 1082// CHECK: return %[[CST]] 1083func.func @broadcast_folding2() -> vector<4x16xi32> { 1084 %0 = arith.constant 42 : i32 1085 %1 = vector.broadcast %0 : i32 to vector<16xi32> 1086 %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32> 1087 return %2 : vector<4x16xi32> 1088} 1089 1090// ----- 1091 1092// CHECK-LABEL: @fold_consecutive_broadcasts( 1093// CHECK-SAME: %[[ARG0:.*]]: i32 1094// CHECK: %[[RESULT:.*]] = vector.broadcast %[[ARG0]] : i32 to vector<4x16xi32> 1095// CHECK: return %[[RESULT]] 1096func.func @fold_consecutive_broadcasts(%a : i32) -> vector<4x16xi32> { 1097 %1 = vector.broadcast %a : i32 to vector<16xi32> 1098 %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32> 1099 return %2 : vector<4x16xi32> 1100} 1101 1102// ----- 1103 1104// CHECK-LABEL: shape_cast_constant 1105// CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<3x4x2xi32> 1106// CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<20x2xf32> 1107// CHECK: return %[[CST0]], %[[CST1]] : vector<20x2xf32>, vector<3x4x2xi32> 1108func.func @shape_cast_constant() -> (vector<20x2xf32>, vector<3x4x2xi32>) { 1109 %cst = arith.constant dense<2.000000e+00> : vector<5x4x2xf32> 1110 %cst_1 = arith.constant dense<1> : vector<12x2xi32> 1111 %0 = vector.shape_cast %cst : vector<5x4x2xf32> to vector<20x2xf32> 1112 %1 = vector.shape_cast %cst_1 : vector<12x2xi32> to vector<3x4x2xi32> 1113 return %0, %1 : vector<20x2xf32>, vector<3x4x2xi32> 1114} 1115 1116// ----- 1117 1118// CHECK-LABEL: extract_strided_constant 1119// CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<2x13x3xi32> 1120// CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<12x2xf32> 1121// CHECK: return %[[CST0]], %[[CST1]] : vector<12x2xf32>, vector<2x13x3xi32> 1122func.func @extract_strided_constant() -> (vector<12x2xf32>, vector<2x13x3xi32>) { 1123 %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32> 1124 %cst_1 = arith.constant dense<1> : vector<4x37x9xi32> 1125 %0 = vector.extract_strided_slice %cst 1126 {offsets = [2, 3], sizes = [12, 2], strides = [1, 1]} 1127 : vector<29x7xf32> to vector<12x2xf32> 1128 %1 = vector.extract_strided_slice %cst_1 1129 {offsets = [1, 2, 5], sizes = [2, 13, 3], strides = [1, 1, 1]} 1130 : vector<4x37x9xi32> to vector<2x13x3xi32> 1131 return %0, %1 : vector<12x2xf32>, vector<2x13x3xi32> 1132} 1133 1134// ----- 1135 1136// CHECK-LABEL: extract_strided_broadcast 1137// CHECK: %[[B:.*]] = vector.broadcast %{{.*}} : vector<4xf16> to vector<2x4xf16> 1138// CHECK-NEXT: return %[[B]] : vector<2x4xf16> 1139func.func @extract_strided_broadcast(%arg0: vector<4xf16>) -> vector<2x4xf16> { 1140 %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16> 1141 %1 = vector.extract_strided_slice %0 1142 {offsets = [0, 0], sizes = [2, 4], strides = [1, 1]} : 1143 vector<16x4xf16> to vector<2x4xf16> 1144 return %1 : vector<2x4xf16> 1145} 1146 1147// ----- 1148 1149// CHECK-LABEL: extract_strided_broadcast2 1150// CHECK: %[[E:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0], sizes = [2], strides = [1]} : vector<4xf16> to vector<2xf16> 1151// CHECK-NEXT: %[[B:.*]] = vector.broadcast %[[E]] : vector<2xf16> to vector<2x2xf16> 1152// CHECK-NEXT: return %[[B]] : vector<2x2xf16> 1153func.func @extract_strided_broadcast2(%arg0: vector<4xf16>) -> vector<2x2xf16> { 1154 %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16> 1155 %1 = vector.extract_strided_slice %0 1156 {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : 1157 vector<16x4xf16> to vector<2x2xf16> 1158 return %1 : vector<2x2xf16> 1159} 1160 1161// ----- 1162 1163// CHECK-LABEL: func @extract_strided_broadcast3 1164// CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>) 1165// CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x4xf32> 1166// CHECK: return %[[V]] 1167func.func @extract_strided_broadcast3(%arg0: vector<1xf32>) -> vector<1x4xf32> { 1168 %0 = vector.broadcast %arg0 : vector<1xf32> to vector<1x8xf32> 1169 %1 = vector.extract_strided_slice %0 1170 {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]} 1171 : vector<1x8xf32> to vector<1x4xf32> 1172 return %1 : vector<1x4xf32> 1173} 1174 1175// ----- 1176 1177// CHECK-LABEL: func @extract_strided_broadcast4 1178// CHECK-SAME: (%[[ARG:.+]]: f32) 1179// CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x4xf32> 1180// CHECK: return %[[V]] 1181func.func @extract_strided_broadcast4(%arg0: f32) -> vector<1x4xf32> { 1182 %0 = vector.broadcast %arg0 : f32 to vector<1x8xf32> 1183 %1 = vector.extract_strided_slice %0 1184 {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]} 1185 : vector<1x8xf32> to vector<1x4xf32> 1186 return %1 : vector<1x4xf32> 1187} 1188 1189// ----- 1190 1191// CHECK-LABEL: consecutive_shape_cast 1192// CHECK: %[[C:.*]] = vector.shape_cast %{{.*}} : vector<16xf16> to vector<4x4xf16> 1193// CHECK-NEXT: return %[[C]] : vector<4x4xf16> 1194func.func @consecutive_shape_cast(%arg0: vector<16xf16>) -> vector<4x4xf16> { 1195 %0 = vector.shape_cast %arg0 : vector<16xf16> to vector<2x8xf16> 1196 %1 = vector.shape_cast %0 : vector<2x8xf16> to vector<4x4xf16> 1197 return %1 : vector<4x4xf16> 1198} 1199 1200// ----- 1201 1202// CHECK-LABEL: func @dead_transfer_op 1203// CHECK-NOT: vector.transfer_read 1204// CHECK-NOT: vector.transfer_write 1205// CHECK: return 1206func.func @dead_transfer_op(%arg0 : tensor<4x4xf32>, %arg1 : memref<4x4xf32>, 1207 %v0 : vector<1x4xf32>) { 1208 %c0 = arith.constant 0 : index 1209 %cf0 = arith.constant 0.0 : f32 1210 %r = vector.transfer_read %arg1[%c0, %c0], %cf0 : 1211 memref<4x4xf32>, vector<1x4xf32> 1212 %w = vector.transfer_write %v0, %arg0[%c0, %c0] : 1213 vector<1x4xf32>, tensor<4x4xf32> 1214 return 1215} 1216 1217// ----- 1218 1219// CHECK-LABEL: func @dead_load 1220// CHECK-NOT: vector.maskedload 1221// CHECK-NOT: vector.gather 1222// CHECK-NOT: vector.expandload 1223// CHECK: return 1224func.func @dead_load(%base: memref<?xf32>, %indices: vector<16xi32>, 1225 %mask: vector<16xi1>, %passthru: vector<16xf32>) { 1226 %c0 = arith.constant 0 : index 1227 %0 = vector.maskedload %base[%c0], %mask, %passthru : 1228 memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 1229 %1 = vector.gather %base[%c0][%indices], %mask, %passthru : 1230 memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 1231 %2 = vector.expandload %base[%c0], %mask, %passthru : 1232 memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 1233 return 1234} 1235 1236// ----- 1237 1238#contraction_accesses0 = [ 1239 affine_map<(i, j, k) -> (i, k)>, 1240 affine_map<(i, j, k) -> (k, j)>, 1241 affine_map<(i, j, k) -> (i, j)> 1242] 1243#contraction_trait0 = { 1244 indexing_maps = #contraction_accesses0, 1245 iterator_types = ["parallel", "parallel", "reduction"] 1246} 1247 1248// CHECK-LABEL: func @contractions 1249// CHECK-SAME: %[[A:[0-9a-zA-Z]+]]: vector<2x3xf32> 1250// CHECK-SAME: %[[B:[0-9a-zA-Z]+]]: vector<3x4xf32> 1251// CHECK-SAME: %[[C:[0-9a-zA-Z]+]]: vector<2x4xf32> 1252// CHECK-SAME: %[[A_I8:[0-9a-zA-Z]+]]: vector<2x3xi8> 1253// CHECK-SAME: %[[B_I8:[0-9a-zA-Z]+]]: vector<3x4xi8> 1254// CHECK-SAME: %[[C_I8:[0-9a-zA-Z]+]]: vector<2x4xi8> 1255func.func @contractions(%a: vector<2x3xf32>, %b: vector<3x4xf32>, %c: vector<2x4xf32>, 1256 %a_i8: vector<2x3xi8>, %b_i8: vector<3x4xi8>, %c_i8: vector<2x4xi8>) 1257 -> (vector<2x4xf32>, vector<2x4xi8>) 1258{ 1259 // CHECK-NOT: arith.constant 1260 %vf_0 = arith.constant dense <0.0>: vector<2x4xf32> 1261 // CHECK-NOT: arith.addf 1262 // CHECK: %[[D:.*]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]] 1263 %0 = vector.contract #contraction_trait0 %a, %b, %vf_0: 1264 vector<2x3xf32>, vector<3x4xf32> into vector<2x4xf32> 1265 // CHECK-NOT: arith.addf 1266 %1 = arith.addf %0, %c: vector<2x4xf32> 1267 1268 // CHECK-NOT: arith.constant 1269 %vi8_0 = arith.constant dense <0>: vector<2x4xi8> 1270 // CHECK-NOT: arith.addi 1271 // CHECK: %[[D_I8:.*]] = vector.contract {{.*}} %[[A_I8]], %[[B_I8]], %[[C_I8]] 1272 %i8_0 = vector.contract #contraction_trait0 %a_i8, %b_i8, %vi8_0: 1273 vector<2x3xi8>, vector<3x4xi8> into vector<2x4xi8> 1274 // CHECK-NOT: arith.addi 1275 %i8_1 = arith.addi %i8_0, %c_i8: vector<2x4xi8> 1276 1277 // CHECK: return %[[D]], %[[D_I8]] 1278 return %1, %i8_1: vector<2x4xf32>, vector<2x4xi8> 1279} 1280 1281// ----- 1282 1283// CHECK-LABEL: func @transfer_folding_1 1284// CHECK-SAME: %[[T0:[0-9a-zA-Z]+]]: tensor<2x3x4xf32> 1285// CHECK-SAME: %[[T1:[0-9a-zA-Z]+]]: tensor<2x3x4xf32> 1286func.func @transfer_folding_1(%t0: tensor<2x3x4xf32>, %t1: tensor<2x3x4xf32>) 1287 -> (tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>) 1288{ 1289 %c0 = arith.constant 0 : index 1290 %pad = arith.constant 0.0 : f32 1291 %v = vector.transfer_read %t0[%c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : 1292 tensor<2x3x4xf32>, vector<2x3x4xf32> 1293 1294 %r0 = vector.transfer_write %v, %t1[%c0, %c0, %c0] {in_bounds = [true, true, true]} : 1295 vector<2x3x4xf32>, tensor<2x3x4xf32> 1296 1297 %t2 = "test.constant"() { value = dense<6.0> : tensor<2x3x4xf32>} : () -> (tensor<2x3x4xf32>) 1298 %r1 = vector.transfer_write %v, %t2[%c0, %c0, %c0] {in_bounds = [true, true, true]} : 1299 vector<2x3x4xf32>, tensor<2x3x4xf32> 1300 1301 1302 // CHECK-NEXT: some_op_that_may_have_side_effects 1303 %t3 = "some_op_that_may_have_side_effects"() : () -> (tensor<2x3x4xf32>) 1304 %r2 = vector.transfer_write %v, %t0[%c0, %c0, %c0] {in_bounds = [true, true, true]} : 1305 vector<2x3x4xf32>, tensor<2x3x4xf32> 1306 1307 // CHECK-NEXT: return %[[T0]], %[[T0]], %[[T0]] 1308 return %r0, %r1, %r2: tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32> 1309} 1310 1311// ----- 1312 1313// CHECK-LABEL: func @store_after_load_tensor 1314// CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>) 1315// CHECK-NOT: vector.transfer_read 1316// CHECK-NOT: vector.transfer_write 1317// CHECK: return %[[ARG]] : tensor<4x4xf32> 1318func.func @store_after_load_tensor(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> { 1319 %c1 = arith.constant 1 : index 1320 %c0 = arith.constant 0 : index 1321 %cf0 = arith.constant 0.0 : f32 1322 %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 : 1323 tensor<4x4xf32>, vector<1x4xf32> 1324 %w0 = vector.transfer_write %0, %arg0[%c1, %c0] : 1325 vector<1x4xf32>, tensor<4x4xf32> 1326 return %w0 : tensor<4x4xf32> 1327} 1328 1329// ----- 1330 1331// CHECK-LABEL: func @store_after_load_tensor_negative 1332// CHECK: vector.transfer_read 1333// CHECK: vector.transfer_write 1334// CHECK: return 1335func.func @store_after_load_tensor_negative(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> { 1336 %c1 = arith.constant 1 : index 1337 %c0 = arith.constant 0 : index 1338 %cf0 = arith.constant 0.0 : f32 1339 %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 : 1340 tensor<4x4xf32>, vector<1x4xf32> 1341 %w0 = vector.transfer_write %0, %arg0[%c0, %c0] : 1342 vector<1x4xf32>, tensor<4x4xf32> 1343 return %w0 : tensor<4x4xf32> 1344} 1345 1346// ----- 1347 1348// CHECK-LABEL: func @store_to_load_tensor 1349// CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<1x4xf32>, %[[V1:.*]]: vector<1x4xf32>) 1350// CHECK-NOT: vector.transfer_write 1351// CHECK-NOT: vector.transfer_read 1352// CHECK: return %[[V0]] : vector<1x4xf32> 1353func.func @store_to_load_tensor(%arg0 : tensor<4x4xf32>, 1354 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>) -> vector<1x4xf32> { 1355 %c1 = arith.constant 1 : index 1356 %c2 = arith.constant 2 : index 1357 %c0 = arith.constant 0 : index 1358 %cf0 = arith.constant 0.0 : f32 1359 %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} : 1360 vector<1x4xf32>, tensor<4x4xf32> 1361 %w1 = vector.transfer_write %v1, %w0[%c2, %c0] {in_bounds = [true, true]} : 1362 vector<1x4xf32>, tensor<4x4xf32> 1363 %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} : 1364 tensor<4x4xf32>, vector<1x4xf32> 1365 return %0 : vector<1x4xf32> 1366} 1367 1368// ----- 1369 1370// CHECK-LABEL: func @store_to_load_negative_tensor 1371// CHECK: vector.transfer_write 1372// CHECK: vector.transfer_write 1373// CHECK: %[[V:.*]] = vector.transfer_read 1374// CHECK: return %[[V]] : vector<1x4xf32> 1375func.func @store_to_load_negative_tensor(%arg0 : tensor<4x4xf32>, 1376 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> vector<1x4xf32> { 1377 %c1 = arith.constant 1 : index 1378 %c2 = arith.constant 2 : index 1379 %c0 = arith.constant 0 : index 1380 %cf0 = arith.constant 0.0 : f32 1381 %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} : 1382 vector<1x4xf32>, tensor<4x4xf32> 1383 %w1 = vector.transfer_write %v0, %w0[%i, %i] {in_bounds = [true, true]} : 1384 vector<1x4xf32>, tensor<4x4xf32> 1385 %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} : 1386 tensor<4x4xf32>, vector<1x4xf32> 1387 return %0 : vector<1x4xf32> 1388} 1389 1390// ----- 1391 1392// CHECK-LABEL: func @store_to_load_tensor_broadcast 1393// CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<4x2xf32>) 1394// CHECK: %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x2xf32> to vector<6x4x2xf32> 1395// CHECK: %[[T:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<6x4x2xf32> to vector<4x2x6xf32> 1396// CHECK: return %[[T]] : vector<4x2x6xf32> 1397func.func @store_to_load_tensor_broadcast(%arg0 : tensor<4x4xf32>, 1398 %v0 : vector<4x2xf32>) -> vector<4x2x6xf32> { 1399 %c0 = arith.constant 0 : index 1400 %cf0 = arith.constant 0.0 : f32 1401 %w0 = vector.transfer_write %v0, %arg0[%c0, %c0] {in_bounds = [true, true]} : 1402 vector<4x2xf32>, tensor<4x4xf32> 1403 %0 = vector.transfer_read %w0[%c0, %c0], %cf0 {in_bounds = [true, true, true], 1404 permutation_map = affine_map<(d0, d1) -> (d0, d1, 0)>} : 1405 tensor<4x4xf32>, vector<4x2x6xf32> 1406 return %0 : vector<4x2x6xf32> 1407} 1408 1409// ----- 1410 1411// CHECK-LABEL: func @store_to_load_tensor_broadcast_scalable 1412// CHECK-SAME: (%[[ARG:.*]]: tensor<?xf32>, %[[V0:.*]]: vector<[4]xf32>) 1413// CHECK: %[[B:.*]] = vector.broadcast %[[V0]] : vector<[4]xf32> to vector<6x[4]xf32> 1414// CHECK: return %[[B]] : vector<6x[4]xf32> 1415func.func @store_to_load_tensor_broadcast_scalable(%arg0 : tensor<?xf32>, 1416 %v0 : vector<[4]xf32>) -> vector<6x[4]xf32> { 1417 %c0 = arith.constant 0 : index 1418 %cf0 = arith.constant 0.0 : f32 1419 %w0 = vector.transfer_write %v0, %arg0[%c0] {in_bounds = [true]} : 1420 vector<[4]xf32>, tensor<?xf32> 1421 %0 = vector.transfer_read %w0[%c0], %cf0 {in_bounds = [true, true], 1422 permutation_map = affine_map<(d0) -> (0, d0)>} : 1423 tensor<?xf32>, vector<6x[4]xf32> 1424 return %0 : vector<6x[4]xf32> 1425} 1426 1427// ----- 1428 1429// CHECK-LABEL: func @store_to_load_tensor_perm_broadcast 1430// CHECK-SAME: (%[[ARG:.*]]: tensor<4x4x4xf32>, %[[V0:.*]]: vector<4x1xf32>) 1431// CHECK: %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x1xf32> to vector<100x5x4x1xf32> 1432// CHECK: %[[T:.*]] = vector.transpose %[[B]], [3, 0, 2, 1] : vector<100x5x4x1xf32> to vector<1x100x4x5xf32> 1433// CHECK: return %[[T]] : vector<1x100x4x5xf32> 1434func.func @store_to_load_tensor_perm_broadcast(%arg0 : tensor<4x4x4xf32>, 1435 %v0 : vector<4x1xf32>) -> vector<1x100x4x5xf32> { 1436 %c0 = arith.constant 0 : index 1437 %cf0 = arith.constant 0.0 : f32 1438 %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] {in_bounds = [true, true], 1439 permutation_map = affine_map<(d0, d1, d2) -> (d2, d1)>} : 1440 vector<4x1xf32>, tensor<4x4x4xf32> 1441 %0 = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 {in_bounds = [true, true, true, true], 1442 permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} : 1443 tensor<4x4x4xf32>, vector<1x100x4x5xf32> 1444 return %0 : vector<1x100x4x5xf32> 1445} 1446 1447// ----- 1448 1449 1450// CHECK-LABEL: func @dead_store_tensor 1451// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 1452// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 1453// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 1454// CHECK-NOT: vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]] 1455// CHECK: vector.transfer_write {{.*}}, {{.*}}[%[[C2]], %[[C0]] 1456// CHECK: %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]] 1457// CHECK: return %[[VTW]] : tensor<4x4xf32> 1458func.func @dead_store_tensor(%arg0 : tensor<4x4xf32>, 1459 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> { 1460 %c1 = arith.constant 1 : index 1461 %c2 = arith.constant 2 : index 1462 %c0 = arith.constant 0 : index 1463 %cf0 = arith.constant 0.0 : f32 1464 %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} : 1465 vector<1x4xf32>, tensor<4x4xf32> 1466 %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} : 1467 vector<1x4xf32>, tensor<4x4xf32> 1468 %w2 = vector.transfer_write %v1, %w1[%c1, %c0] {in_bounds = [true, true]} : 1469 vector<1x4xf32>, tensor<4x4xf32> 1470 return %w2 : tensor<4x4xf32> 1471} 1472 1473// ----- 1474 1475// CHECK-LABEL: func @dead_store_tensor_negative 1476// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 1477// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 1478// CHECK: vector.transfer_write 1479// CHECK: vector.transfer_write 1480// CHECK: vector.transfer_read 1481// CHECK: %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]] 1482// CHECK: return %[[VTW]] : tensor<4x4xf32> 1483func.func @dead_store_tensor_negative(%arg0 : tensor<4x4xf32>, 1484 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> { 1485 %c1 = arith.constant 1 : index 1486 %c2 = arith.constant 2 : index 1487 %c0 = arith.constant 0 : index 1488 %cf0 = arith.constant 0.0 : f32 1489 %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} : 1490 vector<1x4xf32>, tensor<4x4xf32> 1491 %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} : 1492 vector<1x4xf32>, tensor<4x4xf32> 1493 %0 = vector.transfer_read %w1[%i, %i], %cf0 {in_bounds = [true, true]} : 1494 tensor<4x4xf32>, vector<1x4xf32> 1495 %x = arith.addf %0, %0 : vector<1x4xf32> 1496 %w2 = vector.transfer_write %x, %w0[%c1, %c0] {in_bounds = [true, true]} : 1497 vector<1x4xf32>, tensor<4x4xf32> 1498 return %w2 : tensor<4x4xf32> 1499} 1500 1501// ----- 1502 1503// CHECK: #[[$MAP:[0-9a-z]+]] = affine_map<(d0, d1) -> (d1, d0)> 1504 1505// CHECK-LABEL: func @swap_extract_slice_transfer_write 1506// CHECK-SAME: %[[VEC:.*]]: vector<8x4xf32> 1507// CHECK-SAME: %[[INIT_TENSOR:.*]]: tensor<4x8xf32>, 1508// CHECK-SAME: %[[ITER_ARG:.*]]: tensor<64x64xf32>, 1509// CHECK-SAME: %[[IV:.*]]: index, %[[SZ:.*]]: index) 1510func.func @swap_extract_slice_transfer_write(%arg0 : vector<8x4xf32>, 1511 %arg1 : tensor<4x8xf32>, 1512 %arg2 : tensor<64x64xf32>, 1513 %iv : index, %sz : index) -> tensor<64x64xf32> { 1514 // CHECK: %[[C0:.*]] = arith.constant 0 : index 1515 %c0 = arith.constant 0 : index 1516 1517 // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ITER_ARG]] 1518 // CHECK-SAME: [%[[IV]], 16] [%[[SZ]], 8] 1519 // CHECK: %[[T1:.*]] = vector.transfer_write %[[VEC]] 1520 // CHECK-SAME: %[[T0]][%[[C0]], %[[C0]]] 1521 // CHECK-SAME: in_bounds = [true, false] 1522 // CHECK-SAME: permutation_map = #[[$MAP]] 1523 // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1]] into %[[ITER_ARG]] 1524 // CHECK-SAME: [%[[IV]], 16] [%[[SZ]], 8] 1525 %0 = vector.transfer_write %arg0, %arg1[%c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x4xf32>, tensor<4x8xf32> 1526 %1 = tensor.extract_slice %0[0, 0] [%sz, 8] [1, 1] : tensor<4x8xf32> to tensor<?x8xf32> 1527 %2 = tensor.insert_slice %1 into %arg2[%iv, 16] [%sz, 8] [1, 1] : tensor<?x8xf32> into tensor<64x64xf32> 1528 1529 // CHECK: return %[[T2]] 1530 func.return %2 : tensor<64x64xf32> 1531} 1532 1533// ----- 1534 1535// CHECK-LABEL: func @do_not_swap_extract_slice_transfer_write 1536// CHECK-SAME: %[[VEC:.*]]: vector<8xf32>, 1537// CHECK-SAME: %[[VEC_SMALL:.*]]: vector<4xf32>, 1538// CHECK-SAME: %[[INIT_TENSOR:.*]]: tensor<8xf32>, 1539// CHECK-SAME: %[[ITER_ARG:.*]]: tensor<64xf32>, 1540// CHECK-SAME: %[[IV:.*]]: index, %[[SZ:.*]]: index) 1541func.func @do_not_swap_extract_slice_transfer_write(%arg0 : vector<8xf32>, 1542 %arg1 : vector<4xf32>, 1543 %arg2 : tensor<8xf32>, 1544 %arg3 : tensor<64xf32>, 1545 %iv : index, %sz : index) -> (tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) { 1546 // CHECK: %[[C0:.*]] = arith.constant 0 : index 1547 %c0 = arith.constant 0 : index 1548 1549 // Don't swap if the extracted and inserted slices do not match. 1550 // CHECK: %[[T0:.*]] = vector.transfer_write %[[VEC]] 1551 // CHECK: %[[T1:.*]] = tensor.extract_slice %[[T0]] 1552 // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1]] 1553 %0 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32> 1554 %1 = tensor.extract_slice %0[0] [%iv] [1] : tensor<8xf32> to tensor<?xf32> 1555 %2 = tensor.insert_slice %1 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32> 1556 1557 // Don't swap if the TransferWriteOp takes a small vector. 1558 // CHECK: %[[T3:.*]] = vector.transfer_write %[[VEC_SMALL]] 1559 // CHECK: %[[T4:.*]] = tensor.extract_slice %[[T3]] 1560 // CHECK: %[[T5:.*]] = tensor.insert_slice %[[T4]] 1561 %3 = vector.transfer_write %arg1, %arg2[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<8xf32> 1562 %4 = tensor.extract_slice %3[0] [%sz] [1] : tensor<8xf32> to tensor<?xf32> 1563 %5 = tensor.insert_slice %4 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32> 1564 1565 // Don't swap if the one of the operations is rank-reducing. 1566 // CHECK: %[[T6:.*]] = vector.transfer_write %[[VEC]] 1567 // CHECK: %[[T7:.*]] = tensor.extract_slice %[[T6]] 1568 // CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]] 1569 %6 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32> 1570 %7 = tensor.extract_slice %6[0] [1] [1] : tensor<8xf32> to tensor<f32> 1571 %8 = tensor.insert_slice %7 into %arg3[%iv] [1] [1] : tensor<f32> into tensor<64xf32> 1572 1573 // CHECK: return %[[T2]], %[[T5]], %[[T8]] 1574 func.return %2, %5, %8 : tensor<64xf32>, tensor<64xf32>, tensor<64xf32> 1575} 1576 1577// ----- 1578 1579// CHECK-LABEL: func @vector_multi_reduction_single_parallel( 1580// CHECK-SAME: %[[v:.*]]: vector<2xf32>, 1581func.func @vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>) -> vector<2xf32> { 1582 %0 = vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32> 1583 1584// CHECK: return %[[v]] : vector<2xf32> 1585 return %0 : vector<2xf32> 1586} 1587 1588// ----- 1589 1590// CHECK-LABEL: func @masked_vector_multi_reduction_single_parallel( 1591// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, %{{.*}}: vector<2xf32>, 1592func.func @masked_vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>, %mask: vector<2xi1>) -> vector<2xf32> { 1593 %0 = vector.mask %mask { vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32> } : vector<2xi1> -> vector<2xf32> 1594// CHECK: return %[[VAL_0]] : vector<2xf32> 1595 return %0 : vector<2xf32> 1596} 1597 1598// ----- 1599 1600// CHECK-LABEL: func @vector_multi_reduction_unit_dimensions( 1601// CHECK-SAME: %[[SOURCE:.+]]: vector<5x1x4x1x20xf32>, %[[ACC:.+]]: vector<5x4x20xf32> 1602func.func @vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x4x20xf32>) -> vector<5x4x20xf32> { 1603// CHECK: %[[CAST:.+]] = vector.shape_cast %[[SOURCE]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32> 1604// CHECK: %[[RESULT:.+]] = arith.mulf %[[ACC]], %[[CAST]] : vector<5x4x20xf32> 1605 %0 = vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32> 1606 1607// CHECK: return %[[RESULT]] : vector<5x4x20xf32> 1608 return %0 : vector<5x4x20xf32> 1609} 1610 1611// ----- 1612// CHECK-LABEL: func.func @vector_multi_reduction_scalable( 1613// CHECK-SAME: %[[VAL_0:.*]]: vector<1x[4]x1xf32>, 1614// CHECK-SAME: %[[VAL_1:.*]]: vector<1x[4]xf32>, 1615// CHECK-SAME: %[[VAL_2:.*]]: vector<1x[4]x1xi1>) 1616func.func @vector_multi_reduction_scalable(%source: vector<1x[4]x1xf32>, 1617 %acc: vector<1x[4]xf32>, 1618 %mask: vector<1x[4]x1xi1>) -> vector<1x[4]xf32> { 1619// CHECK: %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<1x[4]x1xi1> to vector<1x[4]xi1> 1620// CHECK: %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x[4]x1xf32> to vector<1x[4]xf32> 1621// CHECK: %[[VAL_5:.*]] = arith.addf %[[VAL_1]], %[[VAL_4]] : vector<1x[4]xf32> 1622// CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<1x[4]xi1>, vector<1x[4]xf32> 1623 %0 = vector.mask %mask { vector.multi_reduction <add>, %source, %acc [2] : vector<1x[4]x1xf32> to vector<1x[4]xf32> } : 1624 vector<1x[4]x1xi1> -> vector<1x[4]xf32> 1625 1626 return %0 : vector<1x[4]xf32> 1627} 1628 1629// ----- 1630 1631// CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions 1632// CHECK-SAME: %[[VAL_0:.*]]: vector<5x1x4x1x20xf32>, %[[VAL_1:.*]]: vector<5x4x20xf32>, 1633// CHECK-SAME: %[[VAL_2:.*]]: vector<5x1x4x1x20xi1>) 1634func.func @masked_vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>, 1635 %acc: vector<5x4x20xf32>, 1636 %mask: vector<5x1x4x1x20xi1>) -> vector<5x4x20xf32> { 1637// CHECK: %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<5x1x4x1x20xi1> to vector<5x4x20xi1> 1638// CHECK: %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32> 1639// CHECK: %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : vector<5x4x20xf32> 1640// CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<5x4x20xi1>, vector<5x4x20xf32> 1641%0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32> } : 1642 vector<5x1x4x1x20xi1> -> vector<5x4x20xf32> 1643 return %0 : vector<5x4x20xf32> 1644} 1645 1646// ----- 1647 1648// CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_fail( 1649// CHECK-SAME: %[[SRC:.+]]: vector<5x1x4x1x20xf32>, %[[ACCUM:.+]]: vector<5x1x20xf32> 1650func.func @vector_multi_reduction_unit_dimensions_fail(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x1x20xf32>) -> vector<5x1x20xf32> { 1651// CHECK: %[[RES:.+]] = vector.multi_reduction <mul>, %[[SRC]], %[[ACCUM]] [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32> 1652 %0 = vector.multi_reduction <mul>, %source, %acc [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32> 1653 1654// CHECK: return %[[RES]] : vector<5x1x20xf32> 1655 return %0 : vector<5x1x20xf32> 1656} 1657 1658// ----- 1659 1660// CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_single_elem( 1661// CHECK-SAME: %[[SOURCE:.+]]: vector<1x1x1xf32>, %[[ACC:.+]]: f32 1662func.func @vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32) -> f32 { 1663// CHECK: %[[CAST:.+]] = vector.extract %[[SOURCE]][0, 0, 0] : f32 from vector<1x1x1xf32> 1664// CHECK: %[[RESULT:.+]] = arith.mulf %[[ACC]], %[[CAST]] : f32 1665 %0 = vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32 1666 1667// CHECK: return %[[RESULT]] : f32 1668 return %0 : f32 1669} 1670 1671// ----- 1672 1673// CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions_single_elem( 1674// CHECK-SAME: %[[VAL_0:.*]]: vector<1x1x1xf32>, %[[VAL_1:.*]]: f32, 1675// CHECK-SAME: %[[VAL_2:.*]]: vector<1x1x1xi1>) 1676func.func @masked_vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32, %mask: vector<1x1x1xi1>) -> f32 { 1677 // CHECK: %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0, 0, 0] : i1 from vector<1x1x1xi1> 1678 // CHECK: %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0, 0, 0] : f32 from vector<1x1x1xf32> 1679 // CHECK: %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : f32 1680 // CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : f32 1681 %0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32 } : vector<1x1x1xi1> -> f32 1682 return %0 : f32 1683} 1684 1685// ----- 1686 1687// CHECK-LABEL: func @insert_strided_slice_full_range 1688// CHECK-SAME: %[[SOURCE:.+]]: vector<16x16xf16>, %{{.+}}: vector<16x16xf16> 1689func.func @insert_strided_slice_full_range(%source: vector<16x16xf16>, %dest: vector<16x16xf16>) -> vector<16x16xf16> { 1690 %0 = vector.insert_strided_slice %source, %dest {offsets = [0, 0], strides = [1, 1]} : vector<16x16xf16> into vector<16x16xf16> 1691 // CHECK: return %[[SOURCE]] 1692 return %0: vector<16x16xf16> 1693} 1694 1695// ----- 1696 1697// CHECK-LABEL: extract_strided_splat 1698// CHECK: %[[B:.*]] = vector.splat %{{.*}} : vector<2x4xf16> 1699// CHECK-NEXT: return %[[B]] : vector<2x4xf16> 1700func.func @extract_strided_splat(%arg0: f16) -> vector<2x4xf16> { 1701 %0 = vector.splat %arg0 : vector<16x4xf16> 1702 %1 = vector.extract_strided_slice %0 1703 {offsets = [1, 0], sizes = [2, 4], strides = [1, 1]} : 1704 vector<16x4xf16> to vector<2x4xf16> 1705 return %1 : vector<2x4xf16> 1706} 1707 1708// ----- 1709 1710// CHECK-LABEL: func @insert_extract_to_broadcast 1711// CHECK-SAME: (%[[ARG0:.*]]: vector<1x1x4xf32>, %[[ARG1:.*]]: vector<4xf32>) 1712// CHECK: %[[V0:.*]] = vector.extract %[[ARG0]][0, 0] : vector<4xf32> from vector<1x1x4xf32> 1713// CHECK: %[[V1:.*]] = vector.broadcast %[[ARG1]] : vector<4xf32> to vector<1x1x4xf32> 1714// CHECK: return %[[V0]], %[[V1]] : vector<4xf32>, vector<1x1x4xf32> 1715func.func @insert_extract_to_broadcast(%arg0 : vector<1x1x4xf32>, 1716 %arg1 : vector<4xf32>) -> (vector<4xf32>, vector<1x1x4xf32>) { 1717 %0 = vector.extract %arg0[0, 0] : vector<4xf32> from vector<1x1x4xf32> 1718 %1 = vector.insert %arg1, %arg0 [0, 0] : vector<4xf32> into vector<1x1x4xf32> 1719 return %0, %1 : vector<4xf32>, vector<1x1x4xf32> 1720} 1721 1722// ----- 1723 1724// CHECK-LABEL: func.func @extract_splat_constant 1725// CHECK-DAG: %[[CST1:.*]] = arith.constant 1 : i32 1726// CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<7xf32> 1727// CHECK-NEXT: return %[[CST0]], %[[CST1]] : vector<7xf32>, i32 1728func.func @extract_splat_constant() -> (vector<7xf32>, i32) { 1729 %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32> 1730 %cst_1 = arith.constant dense<1> : vector<4x37x9xi32> 1731 %0 = vector.extract %cst[2] : vector<7xf32> from vector<29x7xf32> 1732 %1 = vector.extract %cst_1[1, 4, 5] : i32 from vector<4x37x9xi32> 1733 return %0, %1 : vector<7xf32>, i32 1734} 1735 1736// ----- 1737 1738// CHECK-LABEL: func.func @extract_1d_constant 1739// CHECK-DAG: %[[I32CST:.*]] = arith.constant 3 : i32 1740// CHECK-DAG: %[[IDXCST:.*]] = arith.constant 1 : index 1741// CHECK-DAG: %[[F32CST:.*]] = arith.constant 2.000000e+00 : f32 1742// CHECK-NEXT: return %[[I32CST]], %[[IDXCST]], %[[F32CST]] : i32, index, f32 1743func.func @extract_1d_constant() -> (i32, index, f32) { 1744 %icst = arith.constant dense<[1, 2, 3, 4]> : vector<4xi32> 1745 %e = vector.extract %icst[2] : i32 from vector<4xi32> 1746 %idx_cst = arith.constant dense<[0, 1, 2]> : vector<3xindex> 1747 %f = vector.extract %idx_cst[1] : index from vector<3xindex> 1748 %fcst = arith.constant dense<[2.000000e+00, 3.000000e+00, 4.000000e+00]> : vector<3xf32> 1749 %g = vector.extract %fcst[0] : f32 from vector<3xf32> 1750 return %e, %f, %g : i32, index, f32 1751} 1752 1753// ----- 1754 1755// CHECK-LABEL: func.func @extract_2d_constant 1756// CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32 1757// CHECK-DAG: %[[BCST:.*]] = arith.constant 2 : i32 1758// CHECK-DAG: %[[CCST:.*]] = arith.constant 3 : i32 1759// CHECK-DAG: %[[DCST:.*]] = arith.constant 5 : i32 1760// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32 1761func.func @extract_2d_constant() -> (i32, i32, i32, i32) { 1762 %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32> 1763 %a = vector.extract %cst[0, 0] : i32 from vector<2x3xi32> 1764 %b = vector.extract %cst[0, 2] : i32 from vector<2x3xi32> 1765 %c = vector.extract %cst[1, 0] : i32 from vector<2x3xi32> 1766 %d = vector.extract %cst[1, 2] : i32 from vector<2x3xi32> 1767 return %a, %b, %c, %d : i32, i32, i32, i32 1768} 1769 1770// ----- 1771 1772// CHECK-LABEL: func.func @extract_vector_2d_constant 1773// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32> 1774// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[3, 4, 5]> : vector<3xi32> 1775// CHECK-NEXT: return %[[ACST]], %[[BCST]] : vector<3xi32>, vector<3xi32> 1776func.func @extract_vector_2d_constant() -> (vector<3xi32>, vector<3xi32>) { 1777 %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32> 1778 %a = vector.extract %cst[0] : vector<3xi32> from vector<2x3xi32> 1779 %b = vector.extract %cst[1] : vector<3xi32> from vector<2x3xi32> 1780 return %a, %b : vector<3xi32>, vector<3xi32> 1781} 1782 1783// ----- 1784 1785// CHECK-LABEL: func.func @extract_3d_constant 1786// CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32 1787// CHECK-DAG: %[[BCST:.*]] = arith.constant 1 : i32 1788// CHECK-DAG: %[[CCST:.*]] = arith.constant 9 : i32 1789// CHECK-DAG: %[[DCST:.*]] = arith.constant 10 : i32 1790// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32 1791func.func @extract_3d_constant() -> (i32, i32, i32, i32) { 1792 %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32> 1793 %a = vector.extract %cst[0, 0, 0] : i32 from vector<2x3x2xi32> 1794 %b = vector.extract %cst[0, 0, 1] : i32 from vector<2x3x2xi32> 1795 %c = vector.extract %cst[1, 1, 1] : i32 from vector<2x3x2xi32> 1796 %d = vector.extract %cst[1, 2, 0] : i32 from vector<2x3x2xi32> 1797 return %a, %b, %c, %d : i32, i32, i32, i32 1798} 1799 1800// ----- 1801 1802// CHECK-LABEL: func.func @extract_vector_3d_constant 1803// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32> 1804// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[6, 7\], \[8, 9\], \[10, 11\]\]}}> : vector<3x2xi32> 1805// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[8, 9]> : vector<2xi32> 1806// CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[10, 11]> : vector<2xi32> 1807// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32> 1808func.func @extract_vector_3d_constant() -> (vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>) { 1809 %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32> 1810 %a = vector.extract %cst[0] : vector<3x2xi32> from vector<2x3x2xi32> 1811 %b = vector.extract %cst[1] : vector<3x2xi32> from vector<2x3x2xi32> 1812 %c = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32> 1813 %d = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32> 1814 return %a, %b, %c, %d : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32> 1815} 1816 1817// ----- 1818 1819// CHECK-LABEL: func.func @extract_splat_vector_3d_constant 1820// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2xi32> 1821// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<4> : vector<2xi32> 1822// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<5> : vector<2xi32> 1823// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<2xi32>, vector<2xi32>, vector<2xi32> 1824func.func @extract_splat_vector_3d_constant() -> (vector<2xi32>, vector<2xi32>, vector<2xi32>) { 1825 %cst = arith.constant dense<[[[0, 0], [1, 1], [2, 2]], [[3, 3], [4, 4], [5, 5]]]> : vector<2x3x2xi32> 1826 %a = vector.extract %cst[0, 0] : vector<2xi32> from vector<2x3x2xi32> 1827 %b = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32> 1828 %c = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32> 1829 return %a, %b, %c : vector<2xi32>, vector<2xi32>, vector<2xi32> 1830} 1831 1832// ----- 1833 1834// CHECK-LABEL: func.func @extract_strided_slice_1d_constant 1835// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32> 1836// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[1, 2]> : vector<2xi32> 1837// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<2> : vector<1xi32> 1838// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<2xi32>, vector<1xi32> 1839func.func @extract_strided_slice_1d_constant() -> (vector<3xi32>, vector<2xi32>, vector<1xi32>) { 1840 %cst = arith.constant dense<[0, 1, 2]> : vector<3xi32> 1841 %a = vector.extract_strided_slice %cst 1842 {offsets = [0], sizes = [3], strides = [1]} : vector<3xi32> to vector<3xi32> 1843 %b = vector.extract_strided_slice %cst 1844 {offsets = [1], sizes = [2], strides = [1]} : vector<3xi32> to vector<2xi32> 1845 %c = vector.extract_strided_slice %cst 1846 {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32> 1847 return %a, %b, %c : vector<3xi32>, vector<2xi32>, vector<1xi32> 1848} 1849 1850// ----- 1851 1852// CHECK-LABEL: func.func @extract_strided_slice_2d_constant 1853// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<1x1xi32> 1854// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[4, 5\]\]}}> : vector<1x2xi32> 1855// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[1, 2\], \[4, 5\]\]}}> : vector<2x2xi32> 1856// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32> 1857func.func @extract_strided_slice_2d_constant() -> (vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>) { 1858 %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32> 1859 %a = vector.extract_strided_slice %cst 1860 {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]} : vector<2x3xi32> to vector<1x1xi32> 1861 %b = vector.extract_strided_slice %cst 1862 {offsets = [1, 1], sizes = [1, 2], strides = [1, 1]} : vector<2x3xi32> to vector<1x2xi32> 1863 %c = vector.extract_strided_slice %cst 1864 {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]} : vector<2x3xi32> to vector<2x2xi32> 1865 return %a, %b, %c : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32> 1866} 1867 1868// ----- 1869 1870// CHECK-LABEL: func.func @extract_strided_slice_3d_constant 1871// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[\[8, 9\], \[10, 11\]\]\]}}> : vector<1x2x2xi32> 1872// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[\[2, 3\]\]\]}}> : vector<1x1x2xi32> 1873// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[\[6, 7\]\], \[\[10, 11\]\]\]}}> : vector<2x1x2xi32> 1874// CHECK-DAG: %[[DCST:.*]] = arith.constant dense<11> : vector<1x1x1xi32> 1875// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] 1876func.func @extract_strided_slice_3d_constant() -> (vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>) { 1877 %cst = arith.constant dense<[[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]]> : vector<3x2x2xi32> 1878 %a = vector.extract_strided_slice %cst 1879 {offsets = [2], sizes = [1], strides = [1]} : vector<3x2x2xi32> to vector<1x2x2xi32> 1880 %b = vector.extract_strided_slice %cst 1881 {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]} : vector<3x2x2xi32> to vector<1x1x2xi32> 1882 %c = vector.extract_strided_slice %cst 1883 {offsets = [1, 1, 0], sizes = [2, 1, 2], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<2x1x2xi32> 1884 %d = vector.extract_strided_slice %cst 1885 {offsets = [2, 1, 1], sizes = [1, 1, 1], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<1x1x1xi32> 1886 return %a, %b, %c, %d : vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32> 1887} 1888 1889// ----- 1890 1891// CHECK-LABEL: extract_extract_strided 1892// CHECK-SAME: %[[A:.*]]: vector<32x16x4xf16> 1893// CHECK: %[[V:.*]] = vector.extract %[[A]][9, 7] : vector<4xf16> from vector<32x16x4xf16> 1894// CHECK: return %[[V]] : vector<4xf16> 1895func.func @extract_extract_strided(%arg0: vector<32x16x4xf16>) -> vector<4xf16> { 1896 %1 = vector.extract_strided_slice %arg0 1897 {offsets = [7, 3], sizes = [10, 8], strides = [1, 1]} : 1898 vector<32x16x4xf16> to vector<10x8x4xf16> 1899 %2 = vector.extract %1[2, 4] : vector<4xf16> from vector<10x8x4xf16> 1900 return %2 : vector<4xf16> 1901} 1902 1903// ----- 1904 1905// CHECK-LABEL: extract_insert_strided 1906// CHECK-SAME: %[[A:.*]]: vector<6x4xf32> 1907// CHECK: %[[V:.*]] = vector.extract %[[A]][0, 2] : f32 from vector<6x4xf32> 1908// CHECK: return %[[V]] : f32 1909func.func @extract_insert_strided(%a: vector<6x4xf32>, %b: vector<8x16xf32>) 1910 -> f32 { 1911 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} 1912 : vector<6x4xf32> into vector<8x16xf32> 1913 %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32> 1914 return %2 : f32 1915} 1916 1917// ----- 1918 1919// CHECK-LABEL: extract_insert_rank_reduce 1920// CHECK-SAME: %[[A:.*]]: vector<4xf32> 1921// CHECK: %[[V:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32> 1922// CHECK: return %[[V]] : f32 1923func.func @extract_insert_rank_reduce(%a: vector<4xf32>, %b: vector<8x16xf32>) 1924 -> f32 { 1925 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1]} 1926 : vector<4xf32> into vector<8x16xf32> 1927 %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32> 1928 return %2 : f32 1929} 1930 1931// ----- 1932 1933// CHECK-LABEL: extract_insert_negative 1934// CHECK: vector.insert_strided_slice 1935// CHECK: vector.extract 1936func.func @extract_insert_negative(%a: vector<2x15xf32>, %b: vector<12x8x16xf32>) 1937 -> vector<16xf32> { 1938 %0 = vector.insert_strided_slice %a, %b {offsets = [4, 2, 0], strides = [1, 1]} 1939 : vector<2x15xf32> into vector<12x8x16xf32> 1940 %2 = vector.extract %0[4, 2] : vector<16xf32> from vector<12x8x16xf32> 1941 return %2 : vector<16xf32> 1942} 1943 1944// ----- 1945 1946// CHECK-LABEL: extract_insert_chain 1947// CHECK-SAME: (%[[A:.*]]: vector<2x16xf32>, %[[B:.*]]: vector<12x8x16xf32>, %[[C:.*]]: vector<2x16xf32>) 1948// CHECK: %[[V:.*]] = vector.extract %[[C]][0] : vector<16xf32> from vector<2x16xf32> 1949// CHECK: return %[[V]] : vector<16xf32> 1950func.func @extract_insert_chain(%a: vector<2x16xf32>, %b: vector<12x8x16xf32>, %c: vector<2x16xf32>) 1951 -> vector<16xf32> { 1952 %0 = vector.insert_strided_slice %c, %b {offsets = [4, 2, 0], strides = [1, 1]} 1953 : vector<2x16xf32> into vector<12x8x16xf32> 1954 %1 = vector.insert_strided_slice %a, %0 {offsets = [0, 2, 0], strides = [1, 1]} 1955 : vector<2x16xf32> into vector<12x8x16xf32> 1956 %2 = vector.extract %1[4, 2] : vector<16xf32> from vector<12x8x16xf32> 1957 return %2 : vector<16xf32> 1958} 1959 1960// ----- 1961 1962// CHECK-LABEL: extract_from_extract_chain_should_not_fold_dynamic_extracts 1963// CHECK-SAME: (%[[VEC:.*]]: vector<2x4xf32>, %[[IDX:.*]]: index) 1964// CHECK: %[[A:.*]] = vector.extract %[[VEC]][%[[IDX]]] : vector<4xf32> from vector<2x4xf32> 1965// CHECK: %[[B:.*]] = vector.extract %[[A]][1] : f32 from vector<4xf32> 1966func.func @extract_from_extract_chain_should_not_fold_dynamic_extracts(%v: vector<2x4xf32>, %index: index) -> f32 { 1967 %0 = vector.extract %v[%index] : vector<4xf32> from vector<2x4xf32> 1968 %1 = vector.extract %0[1] : f32 from vector<4xf32> 1969 return %1 : f32 1970} 1971 1972// ----- 1973 1974// CHECK-LABEL: extract_extract_strided2 1975// CHECK-SAME: %[[A:.*]]: vector<2x4xf32> 1976// CHECK: %[[V:.*]] = vector.extract %[[A]][1] : vector<4xf32> from vector<2x4xf32> 1977// CHECK: return %[[V]] : vector<4xf32> 1978func.func @extract_extract_strided2(%A: vector<2x4xf32>) 1979 -> (vector<4xf32>) { 1980 %0 = vector.extract_strided_slice %A {offsets = [1, 0], sizes = [1, 4], strides = [1, 1]} : vector<2x4xf32> to vector<1x4xf32> 1981 %1 = vector.extract %0[0] : vector<4xf32> from vector<1x4xf32> 1982 return %1 : vector<4xf32> 1983} 1984 1985// ----- 1986 1987// CHECK-LABEL: func @splat_fold 1988func.func @splat_fold() -> vector<4xf32> { 1989 %c = arith.constant 1.0 : f32 1990 %v = vector.splat %c : vector<4xf32> 1991 return %v : vector<4xf32> 1992 1993 // CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> 1994 // CHECK-NEXT: return [[V]] : vector<4xf32> 1995} 1996 1997// ----- 1998 1999// CHECK-LABEL: func @shuffle_1d 2000// CHECK: %[[V:.+]] = arith.constant dense<[3, 2, 5, 1]> : vector<4xi32> 2001// CHECK: return %[[V]] 2002func.func @shuffle_1d() -> vector<4xi32> { 2003 %v0 = arith.constant dense<[0, 1, 2]> : vector<3xi32> 2004 %v1 = arith.constant dense<[3, 4, 5]> : vector<3xi32> 2005 %shuffle = vector.shuffle %v0, %v1 [3, 2, 5, 1] : vector<3xi32>, vector<3xi32> 2006 return %shuffle : vector<4xi32> 2007} 2008 2009// CHECK-LABEL: func @shuffle_canonicalize_0d 2010func.func @shuffle_canonicalize_0d(%v0 : vector<i32>, %v1 : vector<i32>) -> vector<1xi32> { 2011 // CHECK: vector.broadcast %{{.*}} : vector<i32> to vector<1xi32> 2012 %shuffle = vector.shuffle %v0, %v1 [0] : vector<i32>, vector<i32> 2013 return %shuffle : vector<1xi32> 2014} 2015 2016// CHECK-LABEL: func @shuffle_fold1 2017// CHECK: %arg0 : vector<4xi32> 2018func.func @shuffle_fold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<4xi32> { 2019 %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4xi32>, vector<2xi32> 2020 return %shuffle : vector<4xi32> 2021} 2022 2023// CHECK-LABEL: func @shuffle_fold2 2024// CHECK: %arg1 : vector<2xi32> 2025func.func @shuffle_fold2(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<2xi32> { 2026 %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4xi32>, vector<2xi32> 2027 return %shuffle : vector<2xi32> 2028} 2029 2030// CHECK-LABEL: func @shuffle_fold3 2031// CHECK: return %arg0 : vector<4x5x6xi32> 2032func.func @shuffle_fold3(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<4x5x6xi32> { 2033 %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4x5x6xi32>, vector<2x5x6xi32> 2034 return %shuffle : vector<4x5x6xi32> 2035} 2036 2037// CHECK-LABEL: func @shuffle_fold4 2038// CHECK: return %arg1 : vector<2x5x6xi32> 2039func.func @shuffle_fold4(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<2x5x6xi32> { 2040 %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4x5x6xi32>, vector<2x5x6xi32> 2041 return %shuffle : vector<2x5x6xi32> 2042} 2043 2044// CHECK-LABEL: func @shuffle_nofold1 2045// CHECK: %[[V:.+]] = vector.shuffle %arg0, %arg1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32> 2046// CHECK: return %[[V]] 2047func.func @shuffle_nofold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<5xi32> { 2048 %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32> 2049 return %shuffle : vector<5xi32> 2050} 2051 2052// ----- 2053 2054// CHECK-LABEL: func @transpose_scalar_broadcast1 2055// CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>) 2056// CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x8xf32> 2057// CHECK: return %[[V]] : vector<1x8xf32> 2058func.func @transpose_scalar_broadcast1(%value: vector<1xf32>) -> vector<1x8xf32> { 2059 %bcast = vector.broadcast %value : vector<1xf32> to vector<8x1xf32> 2060 %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32> 2061 return %t : vector<1x8xf32> 2062} 2063 2064// ----- 2065 2066// CHECK-LABEL: func @transpose_scalar_broadcast2 2067// CHECK-SAME: (%[[ARG:.+]]: f32) 2068// CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x8xf32> 2069// CHECK: return %[[V]] : vector<1x8xf32> 2070func.func @transpose_scalar_broadcast2(%value: f32) -> vector<1x8xf32> { 2071 %bcast = vector.broadcast %value : f32 to vector<8x1xf32> 2072 %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32> 2073 return %t : vector<1x8xf32> 2074} 2075 2076// ----- 2077 2078// CHECK-LABEL: func @transpose_splat_constant 2079// CHECK: %[[CST:.+]] = arith.constant dense<5.000000e+00> : vector<8x4xf32> 2080// CHECK: return %[[CST]] 2081func.func @transpose_splat_constant() -> vector<8x4xf32> { 2082 %cst = arith.constant dense<5.0> : vector<4x8xf32> 2083 %0 = vector.transpose %cst, [1, 0] : vector<4x8xf32> to vector<8x4xf32> 2084 return %0 : vector<8x4xf32> 2085} 2086 2087// CHECK-LABEL: func @transpose_splat2( 2088// CHECK-SAME: %[[VAL_0:.*]]: f32) -> vector<3x4xf32> { 2089// CHECK: %[[VAL_1:.*]] = vector.splat %[[VAL_0]] : vector<3x4xf32> 2090// CHECK: return %[[VAL_1]] : vector<3x4xf32> 2091// CHECK: } 2092func.func @transpose_splat2(%arg : f32) -> vector<3x4xf32> { 2093 %splat = vector.splat %arg : vector<4x3xf32> 2094 %0 = vector.transpose %splat, [1, 0] : vector<4x3xf32> to vector<3x4xf32> 2095 return %0 : vector<3x4xf32> 2096} 2097 2098// ----- 2099 2100// CHECK-LABEL: func.func @insert_1d_constant 2101// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[9, 1, 2]> : vector<3xi32> 2102// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 9, 2]> : vector<3xi32> 2103// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[0, 1, 9]> : vector<3xi32> 2104// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<3xi32>, vector<3xi32> 2105func.func @insert_1d_constant() -> (vector<3xi32>, vector<3xi32>, vector<3xi32>) { 2106 %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32> 2107 %icst = arith.constant 9 : i32 2108 %a = vector.insert %icst, %vcst[0] : i32 into vector<3xi32> 2109 %b = vector.insert %icst, %vcst[1] : i32 into vector<3xi32> 2110 %c = vector.insert %icst, %vcst[2] : i32 into vector<3xi32> 2111 return %a, %b, %c : vector<3xi32>, vector<3xi32>, vector<3xi32> 2112} 2113 2114// ----- 2115 2116// CHECK-LABEL: func.func @insert_2d_constant 2117// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[99, 1, 2\], \[3, 4, 5\]\]}}> : vector<2x3xi32> 2118// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[3, 4, 99\]\]}}> : vector<2x3xi32> 2119// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[90, 91, 92\], \[3, 4, 5\]\]}}> : vector<2x3xi32> 2120// CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[90, 91, 92\]\]}}> : vector<2x3xi32> 2121// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] 2122func.func @insert_2d_constant() -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) { 2123 %vcst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32> 2124 %cst_scalar = arith.constant 99 : i32 2125 %cst_1d = arith.constant dense<[90, 91, 92]> : vector<3xi32> 2126 %a = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32> 2127 %b = vector.insert %cst_scalar, %vcst[1, 2] : i32 into vector<2x3xi32> 2128 %c = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32> 2129 %d = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32> 2130 return %a, %b, %c, %d : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32> 2131} 2132 2133// ----- 2134 2135// CHECK-LABEL: func.func @insert_2d_splat_constant 2136// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2x3xi32> 2137// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[99, 0, 0\], \[0, 0, 0\]\]}}> : vector<2x3xi32> 2138// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[0, 99, 0\]\]}}> : vector<2x3xi32> 2139// CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[33, 33, 33\], \[0, 0, 0\]\]}}> : vector<2x3xi32> 2140// CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[33, 33, 33\]\]}}> : vector<2x3xi32> 2141// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]] 2142func.func @insert_2d_splat_constant() 2143 -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) { 2144 %vcst = arith.constant dense<0> : vector<2x3xi32> 2145 %cst_zero = arith.constant 0 : i32 2146 %cst_scalar = arith.constant 99 : i32 2147 %cst_1d = arith.constant dense<33> : vector<3xi32> 2148 %a = vector.insert %cst_zero, %vcst[0, 0] : i32 into vector<2x3xi32> 2149 %b = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32> 2150 %c = vector.insert %cst_scalar, %vcst[1, 1] : i32 into vector<2x3xi32> 2151 %d = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32> 2152 %e = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32> 2153 return %a, %b, %c, %d, %e : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32> 2154} 2155 2156// ----- 2157 2158// CHECK-LABEL: func @insert_element_fold 2159// CHECK: %[[V:.+]] = arith.constant dense<[0, 1, 7, 3]> : vector<4xi32> 2160// CHECK: return %[[V]] 2161func.func @insert_element_fold() -> vector<4xi32> { 2162 %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> 2163 %s = arith.constant 7 : i32 2164 %i = arith.constant 2 : i32 2165 %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> 2166 return %1 : vector<4xi32> 2167} 2168 2169// ----- 2170 2171// CHECK-LABEL: func @insert_element_invalid_fold 2172func.func @insert_element_invalid_fold() -> vector<1xf32> { 2173 // Out-of-bound index here. 2174 %c26 = arith.constant 26 : index 2175 %cst_2 = arith.constant 1.60215309E+9 : f32 2176 %cst_20 = arith.constant dense<1.60215309E+9> : vector<1xf32> 2177// CHECK: vector.insertelement 2178 %46 = vector.insertelement %cst_2, %cst_20[%c26 : index] : vector<1xf32> 2179 return %46 : vector<1xf32> 2180} 2181 2182 2183// ----- 2184 2185// Do not crash on poison 2186// CHECK-LABEL: func @insert_poison_fold1 2187// CHECK: vector.insertelement 2188func.func @insert_poison_fold1() -> vector<4xi32> { 2189 %v = ub.poison : vector<4xi32> 2190 %s = arith.constant 7 : i32 2191 %i = arith.constant 2 : i32 2192 %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> 2193 return %1 : vector<4xi32> 2194} 2195 2196// ----- 2197 2198// Do not crash on poison 2199// CHECK-LABEL: func @insert_poison_fold2 2200// CHECK: vector.insertelement 2201func.func @insert_poison_fold2() -> vector<4xi32> { 2202 %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> 2203 %s = ub.poison : i32 2204 %i = arith.constant 2 : i32 2205 %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> 2206 return %1 : vector<4xi32> 2207} 2208 2209// ----- 2210 2211// Do not crash on poison 2212// CHECK-LABEL: func @insert_poison_fold3 2213// CHECK: vector.insertelement 2214func.func @insert_poison_fold3() -> vector<4xi32> { 2215 %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> 2216 %s = arith.constant 7 : i32 2217 %i = ub.poison : i32 2218 %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> 2219 return %1 : vector<4xi32> 2220} 2221 2222// ----- 2223 2224// CHECK-LABEL: func @extract_element_fold 2225// CHECK: %[[C:.+]] = arith.constant 5 : i32 2226// CHECK: return %[[C]] 2227func.func @extract_element_fold() -> i32 { 2228 %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32> 2229 %i = arith.constant 2 : i32 2230 %1 = vector.extractelement %v[%i : i32] : vector<4xi32> 2231 return %1 : i32 2232} 2233 2234// CHECK-LABEL: func @extract_element_splat_fold 2235// CHECK-SAME: (%[[ARG:.+]]: i32) 2236// CHECK: return %[[ARG]] 2237func.func @extract_element_splat_fold(%a : i32) -> i32 { 2238 %v = vector.splat %a : vector<4xi32> 2239 %i = arith.constant 2 : i32 2240 %1 = vector.extractelement %v[%i : i32] : vector<4xi32> 2241 return %1 : i32 2242} 2243 2244// ----- 2245 2246// Do not crash on poison 2247// CHECK-LABEL: func @extract_element_poison_fold1 2248// CHECK: vector.extractelement 2249func.func @extract_element_poison_fold1() -> i32 { 2250 %v = ub.poison : vector<4xi32> 2251 %i = arith.constant 2 : i32 2252 %1 = vector.extractelement %v[%i : i32] : vector<4xi32> 2253 return %1 : i32 2254} 2255 2256// ----- 2257 2258// Do not crash on poison 2259// CHECK-LABEL: func @extract_element_poison_fold2 2260// CHECK: vector.extractelement 2261func.func @extract_element_poison_fold2() -> i32 { 2262 %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32> 2263 %i = ub.poison : i32 2264 %1 = vector.extractelement %v[%i : i32] : vector<4xi32> 2265 return %1 : i32 2266} 2267 2268// ----- 2269 2270// CHECK-LABEL: func @reduce_one_element_vector_extract 2271// CHECK-SAME: (%[[V:.+]]: vector<1xf32>) 2272// CHECK: %[[S:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32> 2273// CHECK: return %[[S]] : f32 2274func.func @reduce_one_element_vector_extract(%a : vector<1xf32>) -> f32 { 2275 %s = vector.reduction <add>, %a : vector<1xf32> into f32 2276 return %s : f32 2277} 2278 2279// ----- 2280 2281// CHECK-LABEL: func @masked_reduce_one_element_vector_extract 2282// CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: vector<1xi1>) 2283func.func @masked_reduce_one_element_vector_extract(%a : vector<1xf32>, %mask : vector<1xi1>) -> f32 { 2284// CHECK: %[[VAL_2:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32> 2285 %s = vector.mask %mask { vector.reduction <add>, %a : vector<1xf32> into f32 } 2286 : vector<1xi1> -> f32 2287 return %s : f32 2288} 2289 2290// ----- 2291 2292// CHECK-LABEL: func @reduce_one_element_vector_addf 2293// CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32) 2294// CHECK: %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32> 2295// CHECK: %[[S:.+]] = arith.addf %[[A]], %arg1 : f32 2296// CHECK: return %[[S]] 2297func.func @reduce_one_element_vector_addf(%a : vector<1xf32>, %b: f32) -> f32 { 2298 %s = vector.reduction <add>, %a, %b : vector<1xf32> into f32 2299 return %s : f32 2300} 2301 2302// ----- 2303 2304// CHECK-LABEL: func @reduce_one_element_vector_addf_fastmath 2305// CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32) 2306// CHECK: %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32> 2307// CHECK: %[[S:.+]] = arith.addf %[[A]], %arg1 fastmath<nnan,ninf> : f32 2308// CHECK: return %[[S]] 2309func.func @reduce_one_element_vector_addf_fastmath(%a : vector<1xf32>, %b: f32) -> f32 { 2310 %s = vector.reduction <add>, %a, %b fastmath<nnan,ninf> : vector<1xf32> into f32 2311 return %s : f32 2312} 2313 2314// ----- 2315 2316// CHECK-LABEL: func @masked_reduce_one_element_vector_addf 2317// CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: f32, 2318// CHECK-SAME: %[[VAL_2:.*]]: vector<1xi1>) 2319func.func @masked_reduce_one_element_vector_addf(%a: vector<1xf32>, 2320 %b: f32, 2321 %mask: vector<1xi1>) -> f32 { 2322// CHECK: %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0] : i1 from vector<1xi1> 2323// CHECK: %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32> 2324// CHECK: %[[VAL_5:.*]] = arith.addf %[[VAL_4]], %[[VAL_1]] : f32 2325// CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_1]] : f32 2326 %s = vector.mask %mask { vector.reduction <add>, %a, %b : vector<1xf32> into f32 } 2327 : vector<1xi1> -> f32 2328 return %s : f32 2329} 2330 2331// ----- 2332 2333// CHECK-LABEL: func @reduce_one_element_vector_mulf 2334// CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32) 2335// CHECK: %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32> 2336// CHECK: %[[S:.+]] = arith.mulf %[[A]], %arg1 : f32 2337// CHECK: return %[[S]] 2338func.func @reduce_one_element_vector_mulf(%a : vector<1xf32>, %b: f32) -> f32 { 2339 %s = vector.reduction <mul>, %a, %b : vector<1xf32> into f32 2340 return %s : f32 2341} 2342 2343// ----- 2344 2345// CHECK-LABEL: func @dont_reduce_one_element_vector 2346// CHECK: vector.reduction 2347func.func @dont_reduce_one_element_vector(%a : vector<4xf32>) -> f32 { 2348 %s = vector.reduction <add>, %a : vector<4xf32> into f32 2349 return %s : f32 2350} 2351 2352// ----- 2353 2354// CHECK-LABEL: func @reduce_one_element_vector_maximumf 2355// CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32) 2356// CHECK: %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32> 2357// CHECK: %[[S:.+]] = arith.maximumf %[[A]], %[[B]] : f32 2358// CHECK: return %[[S]] 2359func.func @reduce_one_element_vector_maximumf(%a : vector<1xf32>, %b: f32) -> f32 { 2360 %s = vector.reduction <maximumf>, %a, %b : vector<1xf32> into f32 2361 return %s : f32 2362} 2363 2364// ----- 2365 2366// CHECK-LABEL: func @bitcast( 2367// CHECK-SAME: %[[ARG:.*]]: vector<4x8xf32>) -> vector<4x16xi16> { 2368// CHECK: vector.bitcast %[[ARG:.*]] : vector<4x8xf32> to vector<4x16xi16> 2369func.func @bitcast(%a: vector<4x8xf32>) -> vector<4x16xi16> { 2370 %0 = vector.bitcast %a : vector<4x8xf32> to vector<4x8xi32> 2371 %1 = vector.bitcast %0 : vector<4x8xi32> to vector<4x16xi16> 2372 return %1 : vector<4x16xi16> 2373} 2374 2375// ----- 2376 2377// CHECK-LABEL: @insert_strided_slice_splat 2378// CHECK-SAME: (%[[ARG:.*]]: f32) 2379// CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<8x16xf32> 2380// CHECK-NEXT: return %[[SPLAT]] : vector<8x16xf32> 2381func.func @insert_strided_slice_splat(%x: f32) -> (vector<8x16xf32>) { 2382 %splat0 = vector.splat %x : vector<4x4xf32> 2383 %splat1 = vector.splat %x : vector<8x16xf32> 2384 %0 = vector.insert_strided_slice %splat0, %splat1 {offsets = [2, 2], strides = [1, 1]} 2385 : vector<4x4xf32> into vector<8x16xf32> 2386 return %0 : vector<8x16xf32> 2387} 2388 2389 2390// ----- 2391 2392// CHECK-LABEL: @insert_extract_strided_slice 2393// CHECK-SAME: (%[[ARG:.*]]: vector<8x16xf32>) 2394// CHECK-NEXT: return %[[ARG]] : vector<8x16xf32> 2395func.func @insert_extract_strided_slice(%x: vector<8x16xf32>) -> (vector<8x16xf32>) { 2396 %0 = vector.extract_strided_slice %x {offsets = [0, 8], sizes = [2, 4], strides = [1, 1]} 2397 : vector<8x16xf32> to vector<2x4xf32> 2398 %1 = vector.insert_strided_slice %0, %x {offsets = [0, 8], strides = [1, 1]} 2399 : vector<2x4xf32> into vector<8x16xf32> 2400 return %1 : vector<8x16xf32> 2401} 2402 2403// ----- 2404 2405// CHECK-LABEL: func.func @insert_strided_1d_constant 2406// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[4, 1, 2]> : vector<3xi32> 2407// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 1, 4]> : vector<3xi32> 2408// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[5, 6, 2]> : vector<3xi32> 2409// CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[0, 5, 6]> : vector<3xi32> 2410// CHECK-DAG: %[[ECST:.*]] = arith.constant dense<[7, 8, 9]> : vector<3xi32> 2411// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]] 2412func.func @insert_strided_1d_constant() -> 2413 (vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>) { 2414 %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32> 2415 %cst_1 = arith.constant dense<4> : vector<1xi32> 2416 %cst_2 = arith.constant dense<[5, 6]> : vector<2xi32> 2417 %cst_3 = arith.constant dense<[7, 8, 9]> : vector<3xi32> 2418 %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [0], strides = [1]} : vector<1xi32> into vector<3xi32> 2419 %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2], strides = [1]} : vector<1xi32> into vector<3xi32> 2420 %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0], strides = [1]} : vector<2xi32> into vector<3xi32> 2421 %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1], strides = [1]} : vector<2xi32> into vector<3xi32> 2422 %e = vector.insert_strided_slice %cst_3, %vcst {offsets = [0], strides = [1]} : vector<3xi32> into vector<3xi32> 2423 return %a, %b, %c, %d, %e : vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32> 2424} 2425 2426// ----- 2427 2428// CHECK-LABEL: func.func @insert_strided_2d_constant 2429// CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[9, 3\], \[4, 5\]\]}}> : vector<3x2xi32> 2430// CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 9\]\]}}> : vector<3x2xi32> 2431// CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[18, 19\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32> 2432// CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[18, 19\], \[4, 5\]\]}}> : vector<3x2xi32> 2433// CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[18, 19\]\]}}> : vector<3x2xi32> 2434// CHECK-DAG: %[[FCST:.*]] = arith.constant dense<{{\[\[28, 29\], \[38, 39\], \[4, 5\]\]}}> : vector<3x2xi32> 2435// CHECK-DAG: %[[GCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[28, 29\], \[38, 39\]\]}}> : vector<3x2xi32> 2436// CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]], %[[FCST]], %[[GCST]] 2437func.func @insert_strided_2d_constant() -> 2438 (vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>) { 2439 %vcst = arith.constant dense<[[0, 1], [2, 3], [4, 5]]> : vector<3x2xi32> 2440 %cst_1 = arith.constant dense<9> : vector<1xi32> 2441 %cst_2 = arith.constant dense<[18, 19]> : vector<2xi32> 2442 %cst_3 = arith.constant dense<[[28, 29], [38, 39]]> : vector<2x2xi32> 2443 %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [1, 0], strides = [1]} : vector<1xi32> into vector<3x2xi32> 2444 %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2, 1], strides = [1]} : vector<1xi32> into vector<3x2xi32> 2445 %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32> 2446 %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32> 2447 %e = vector.insert_strided_slice %cst_2, %vcst {offsets = [2, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32> 2448 %f = vector.insert_strided_slice %cst_3, %vcst {offsets = [0, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32> 2449 %g = vector.insert_strided_slice %cst_3, %vcst {offsets = [1, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32> 2450 return %a, %b, %c, %d, %e, %f, %g : 2451 vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32> 2452} 2453 2454// ----- 2455 2456// CHECK-LABEL: func @shuffle_splat 2457// CHECK-SAME: (%[[ARG:.*]]: i32) 2458// CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<4xi32> 2459// CHECK-NEXT: return %[[SPLAT]] : vector<4xi32> 2460func.func @shuffle_splat(%x : i32) -> vector<4xi32> { 2461 %v0 = vector.splat %x : vector<4xi32> 2462 %v1 = vector.splat %x : vector<2xi32> 2463 %shuffle = vector.shuffle %v0, %v1 [2, 3, 4, 5] : vector<4xi32>, vector<2xi32> 2464 return %shuffle : vector<4xi32> 2465} 2466 2467 2468// ----- 2469 2470// CHECK-LABEL: func @insert_splat 2471// CHECK-SAME: (%[[ARG:.*]]: i32) 2472// CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<2x4x3xi32> 2473// CHECK-NEXT: return %[[SPLAT]] : vector<2x4x3xi32> 2474func.func @insert_splat(%x : i32) -> vector<2x4x3xi32> { 2475 %v0 = vector.splat %x : vector<4x3xi32> 2476 %v1 = vector.splat %x : vector<2x4x3xi32> 2477 %insert = vector.insert %v0, %v1[0] : vector<4x3xi32> into vector<2x4x3xi32> 2478 return %insert : vector<2x4x3xi32> 2479} 2480 2481// ----- 2482 2483// CHECK-LABEL: func.func @transfer_read_from_rank_reducing_extract_slice 2484// CHECK: tensor.extract_slice 2485// CHECK: vector.transfer_read 2486func.func @transfer_read_from_rank_reducing_extract_slice(%src: tensor<1x8x8x8xf32>, %i1: index, %i2: index, %i3: index, %i4: index) -> vector<4xf32> { 2487 %c0 = arith.constant 0 : index 2488 %f0 = arith.constant 0.000000e+00 : f32 2489 %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [1, 1, 1, 1] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32> 2490 %1 = vector.transfer_read %0[%c0, %i4, %c0], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32> 2491 return %1 : vector<4xf32> 2492} 2493 2494// ----- 2495 2496// CHECK-LABEL: func.func @extract_from_broadcast 2497func.func @extract_from_broadcast(%src: vector<1x1x1xf32>) -> vector<1xf32> { 2498 %0 = vector.broadcast %src : vector<1x1x1xf32> to vector<1x1x32x1xf32> 2499 2500 // CHECK-NEXT: %0 = vector.extract {{.*}}[0, 0] : vector<1xf32> from vector<1x1x1xf32> 2501 // CHECK-NEXT: return %0 : vector<1xf32> 2502 %1 = vector.extract %0[0, 0, 31] : vector<1xf32> from vector<1x1x32x1xf32> 2503 return %1: vector<1xf32> 2504} 2505 2506// CHECK-LABEL: func.func @extract_from_stretch_broadcast 2507func.func @extract_from_stretch_broadcast(%src: vector<3x1x2xf32>) -> f32 { 2508 // CHECK-NEXT: %0 = vector.extract {{.*}}[0, 0, 0] : f32 from vector<3x1x2xf32> 2509 // CHECK-NEXT: return %0 : f32 2510 %0 = vector.broadcast %src : vector<3x1x2xf32> to vector<3x4x2xf32> 2511 %1 = vector.extract %0[0, 2, 0] : f32 from vector<3x4x2xf32> 2512 return %1: f32 2513} 2514 2515// ----- 2516// CHECK-LABEL: func.func @extract_strided_slice_of_constant_mask 2517func.func @extract_strided_slice_of_constant_mask() -> vector<5x7xi1>{ 2518 // CHECK-NEXT: %[[RES:.*]] = vector.constant_mask [5, 4] : vector<5x7xi1> 2519 // CHECK-NEXT: return %[[RES]] : vector<5x7xi1> 2520 %c4 = arith.constant 4 : index 2521 %c10 = arith.constant 10 : index 2522 %mask = vector.create_mask %c10, %c4 : vector<12x7xi1> 2523 %res = vector.extract_strided_slice %mask {offsets = [3], sizes = [5], strides = [1]} : vector<12x7xi1> to vector<5x7xi1> 2524 return %res : vector<5x7xi1> 2525} 2526 2527// ----- 2528 2529// CHECK-LABEL: func.func @fold_extractelement_of_broadcast( 2530// CHECK-SAME: %[[f:.*]]: f32 2531// CHECK: return %[[f]] 2532func.func @fold_extractelement_of_broadcast(%f: f32) -> f32 { 2533 %0 = vector.broadcast %f : f32 to vector<15xf32> 2534 %c5 = arith.constant 5 : index 2535 %1 = vector.extractelement %0 [%c5 : index] : vector<15xf32> 2536 return %1 : f32 2537} 2538 2539// ----- 2540 2541// CHECK-LABEL: func.func @fold_0d_vector_reduction 2542func.func @fold_0d_vector_reduction(%arg0: vector<f32>) -> f32 { 2543 // CHECK-NEXT: %[[RES:.*]] = vector.extractelement %arg{{.*}}[] : vector<f32> 2544 // CHECK-NEXT: return %[[RES]] : f32 2545 %0 = vector.reduction <add>, %arg0 : vector<f32> into f32 2546 return %0 : f32 2547} 2548 2549// ----- 2550 2551// CHECK-LABEL: func @empty_vector_mask 2552func.func @empty_vector_mask(%mask : vector<8xi1>) { 2553// CHECK-NOT: vector.mask 2554 vector.mask %mask { } : vector<8xi1> 2555 return 2556} 2557 2558// ----- 2559 2560// CHECK-LABEL: func @empty_vector_mask_with_return 2561// CHECK-SAME: %[[IN:.*]]: vector<8xf32> 2562func.func @empty_vector_mask_with_return(%a : vector<8xf32>, %mask : vector<8xi1>) -> vector<8xf32> { 2563// CHECK-NOT: vector.mask 2564// CHECK: return %[[IN]] : vector<8xf32> 2565 %0 = vector.mask %mask { vector.yield %a : vector<8xf32> } : vector<8xi1> -> vector<8xf32> 2566 return %0 : vector<8xf32> 2567} 2568 2569// ----- 2570 2571// CHECK-LABEL: func @all_true_vector_mask 2572// CHECK-SAME: %[[IN:.*]]: tensor<3x4xf32> 2573func.func @all_true_vector_mask(%ta : tensor<3x4xf32>) -> vector<3x4xf32> { 2574// CHECK-NOT: vector.mask 2575// CHECK: %[[LD:.*]] = vector.transfer_read %[[IN]] 2576// CHECK: return %[[LD]] : vector<3x4xf32> 2577 %c0 = arith.constant 0 : index 2578 %cf0 = arith.constant 0.0 : f32 2579 %all_true = vector.constant_mask [3, 4] : vector<3x4xi1> 2580 %0 = vector.mask %all_true { vector.transfer_read %ta[%c0, %c0], %cf0 : tensor<3x4xf32>, vector<3x4xf32> } : vector<3x4xi1> -> vector<3x4xf32> 2581 return %0 : vector<3x4xf32> 2582} 2583 2584// ----- 2585 2586// CHECK-LABEL: func @all_true_vector_mask_no_result( 2587func.func @all_true_vector_mask_no_result(%a : vector<3x4xf32>, %m : memref<3x4xf32>) { 2588// CHECK-NOT: vector.mask 2589// CHECK: vector.transfer_write 2590 %c0 = arith.constant 0 : index 2591 %all_true = vector.constant_mask [3, 4] : vector<3x4xi1> 2592 vector.mask %all_true { vector.transfer_write %a, %m[%c0, %c0] : vector<3x4xf32>, memref<3x4xf32> } : vector<3x4xi1> 2593 return 2594} 2595 2596// ----- 2597 2598// CHECK-LABEL: func.func @fold_shape_cast_with_mask( 2599// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x4xi1> { 2600func.func @fold_shape_cast_with_mask(%arg0: tensor<1x?xf32>) -> vector<1x4xi1> { 2601// CHECK-NOT: vector.shape_cast 2602// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index 2603// CHECK: %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32> 2604// CHECK: %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x4xi1> 2605// CHECK: return %[[VAL_3]] : vector<1x4xi1> 2606 %c1 = arith.constant 1 : index 2607 %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32> 2608 %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x4x1x1xi1> 2609 %2 = vector.shape_cast %1 : vector<1x4x1x1xi1> to vector<1x4xi1> 2610 return %2 : vector<1x4xi1> 2611} 2612 2613// ----- 2614 2615// CHECK-LABEL: func.func @fold_shape_cast_with_mask_scalable( 2616// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[4]xi1> { 2617func.func @fold_shape_cast_with_mask_scalable(%arg0: tensor<1x?xf32>) -> vector<1x[4]xi1> { 2618// CHECK-NOT: vector.shape_cast 2619// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index 2620// CHECK: %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32> 2621// CHECK: %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[4]xi1> 2622// CHECK: return %[[VAL_3]] : vector<1x[4]xi1> 2623 %c1 = arith.constant 1 : index 2624 %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32> 2625 %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x[4]x1x1xi1> 2626 %2 = vector.shape_cast %1 : vector<1x[4]x1x1xi1> to vector<1x[4]xi1> 2627 return %2 : vector<1x[4]xi1> 2628} 2629 2630// ----- 2631 2632// Check that scalable "1" (i.e. [1]) is not folded 2633// CHECK-LABEL: func.func @fold_shape_cast_with_mask_scalable_one( 2634// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[1]xi1> { 2635func.func @fold_shape_cast_with_mask_scalable_one(%arg0: tensor<1x?xf32>) -> vector<1x[1]xi1>{ 2636// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index 2637// CHECK: %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32> 2638// CHECK: %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[1]xi1> 2639// CHECK: return %[[VAL_3]] : vector<1x[1]xi1> 2640 %c1 = arith.constant 1 : index 2641 %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32> 2642 %1 = vector.create_mask %c1, %dim, %c1 : vector<1x[1]x1xi1> 2643 %2 = vector.shape_cast %1 : vector<1x[1]x1xi1> to vector<1x[1]xi1> 2644 return %2 : vector<1x[1]xi1> 2645} 2646 2647// ----- 2648 2649// CHECK-LABEL: func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1> { 2650func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1>{ 2651// CHECK-NOT: vector.shape_cast 2652// CHECK: %[[VAL_0:.*]] = vector.constant_mask [1] : vector<4xi1> 2653// CHECK: return %[[VAL_0]] : vector<4xi1> 2654 %1 = vector.constant_mask [1, 1, 1] : vector<4x1x1xi1> 2655 %2 = vector.shape_cast %1 : vector<4x1x1xi1> to vector<4xi1> 2656 return %2 : vector<4xi1> 2657} 2658 2659// ----- 2660 2661// TODO: This IR could be canonicalized but the canonicalization pattern is not 2662// smart enough. For now, just make sure that we do not crash. 2663 2664// CHECK-LABEL: func.func @load_store_forwarding_rank_mismatch( 2665// CHECK: vector.transfer_write 2666// CHECK: vector.transfer_read 2667func.func @load_store_forwarding_rank_mismatch(%v0: vector<4x1x1xf32>, %arg0: tensor<4x4x4xf32>) -> (vector<1x100x4x5xf32>) { 2668 %c0 = arith.constant 0 : index 2669 %cf0 = arith.constant 0.0 : f32 2670 // d0 is explicitly written. 2671 %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] 2672 {in_bounds = [true, true, true], 2673 permutation_map = affine_map<(d0, d1, d2) -> (d2, d1, d0)>} : 2674 vector<4x1x1xf32>, tensor<4x4x4xf32> 2675 // d0 is implicitly read (rank-reduction of unit dim). 2676 %r = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 2677 {in_bounds = [true, true, true, true], 2678 permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} : 2679 tensor<4x4x4xf32>, vector<1x100x4x5xf32> 2680 return %r : vector<1x100x4x5xf32> 2681} 2682 2683// ----- 2684 2685// CHECK-LABEL: func.func @rank_0_shuffle_to_interleave( 2686// CHECK-SAME: %[[LHS:.*]]: vector<f64>, %[[RHS:.*]]: vector<f64>) 2687func.func @rank_0_shuffle_to_interleave(%arg0: vector<f64>, %arg1: vector<f64>) -> vector<2xf64> { 2688 // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<f64> -> vector<2xf64> 2689 // CHECK: return %[[ZIP]] 2690 %0 = vector.shuffle %arg0, %arg1 [0, 1] : vector<f64>, vector<f64> 2691 return %0 : vector<2xf64> 2692} 2693 2694// ----- 2695 2696// CHECK-LABEL: func.func @rank_1_shuffle_to_interleave( 2697// CHECK-SAME: %[[LHS:.*]]: vector<6xi32>, %[[RHS:.*]]: vector<6xi32>) 2698func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi32>) -> vector<12xi32> { 2699 // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<6xi32> -> vector<12xi32> 2700 // CHECK: return %[[ZIP]] 2701 %0 = vector.shuffle %arg0, %arg1 [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11] : vector<6xi32>, vector<6xi32> 2702 return %0 : vector<12xi32> 2703} 2704 2705// ----- 2706 2707// CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression( 2708// CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: vector<f32>, %[[c:.*]]: vector<2xf32>) 2709func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) { 2710 // Splat scalar to 0D and extract scalar. 2711 %0 = vector.splat %a : vector<f32> 2712 %1 = vector.extract %0[] : f32 from vector<f32> 2713 2714 // Broadcast scalar to 0D and extract scalar. 2715 %2 = vector.broadcast %a : f32 to vector<f32> 2716 %3 = vector.extract %2[] : f32 from vector<f32> 2717 2718 // Broadcast 0D to 3D and extract scalar. 2719 // CHECK: %[[extract1:.*]] = vector.extractelement %[[b]][] : vector<f32> 2720 %4 = vector.broadcast %b : vector<f32> to vector<1x2x4xf32> 2721 %5 = vector.extract %4[0, 0, 1] : f32 from vector<1x2x4xf32> 2722 2723 // Splat scalar to 2D and extract scalar. 2724 %6 = vector.splat %a : vector<2x3xf32> 2725 %7 = vector.extract %6[0, 1] : f32 from vector<2x3xf32> 2726 2727 // Broadcast scalar to 3D and extract scalar. 2728 %8 = vector.broadcast %a : f32 to vector<5x6x7xf32> 2729 %9 = vector.extract %8[2, 1, 5] : f32 from vector<5x6x7xf32> 2730 2731 // Extract 2D from 3D that was broadcasted from a scalar. 2732 // CHECK: %[[extract2:.*]] = vector.broadcast %[[a]] : f32 to vector<6x7xf32> 2733 %10 = vector.extract %8[2] : vector<6x7xf32> from vector<5x6x7xf32> 2734 2735 // Extract 1D from 2D that was splat'ed from a scalar. 2736 // CHECK: %[[extract3:.*]] = vector.broadcast %[[a]] : f32 to vector<3xf32> 2737 %11 = vector.extract %6[1] : vector<3xf32> from vector<2x3xf32> 2738 2739 // CHECK: return %[[a]], %[[a]], %[[extract1]], %[[a]], %[[a]], %[[extract2]], %[[extract3]] 2740 return %1, %3, %5, %7, %9, %10, %11 : f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32> 2741} 2742 2743// ----- 2744 2745// CHECK-LABEL: func @extract_scalar_from_from_elements( 2746// CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32) 2747func.func @extract_scalar_from_from_elements(%a: f32, %b: f32) -> (f32, f32, f32, f32, f32, f32, f32) { 2748 // Extract from 0D. 2749 %0 = vector.from_elements %a : vector<f32> 2750 %1 = vector.extract %0[] : f32 from vector<f32> 2751 2752 // Extract from 1D. 2753 %2 = vector.from_elements %a : vector<1xf32> 2754 %3 = vector.extract %2[0] : f32 from vector<1xf32> 2755 %4 = vector.from_elements %a, %b, %a, %a, %b : vector<5xf32> 2756 %5 = vector.extract %4[4] : f32 from vector<5xf32> 2757 2758 // Extract from 2D. 2759 %6 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32> 2760 %7 = vector.extract %6[0, 0] : f32 from vector<2x3xf32> 2761 %8 = vector.extract %6[0, 1] : f32 from vector<2x3xf32> 2762 %9 = vector.extract %6[1, 1] : f32 from vector<2x3xf32> 2763 %10 = vector.extract %6[1, 2] : f32 from vector<2x3xf32> 2764 2765 // CHECK: return %[[a]], %[[a]], %[[b]], %[[a]], %[[a]], %[[b]], %[[b]] 2766 return %1, %3, %5, %7, %8, %9, %10 : f32, f32, f32, f32, f32, f32, f32 2767} 2768 2769// ----- 2770 2771// CHECK-LABEL: func @extract_1d_from_from_elements( 2772// CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32) 2773func.func @extract_1d_from_from_elements(%a: f32, %b: f32) -> (vector<3xf32>, vector<3xf32>) { 2774 %0 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32> 2775 // CHECK: %[[splat1:.*]] = vector.splat %[[a]] : vector<3xf32> 2776 %1 = vector.extract %0[0] : vector<3xf32> from vector<2x3xf32> 2777 // CHECK: %[[splat2:.*]] = vector.splat %[[b]] : vector<3xf32> 2778 %2 = vector.extract %0[1] : vector<3xf32> from vector<2x3xf32> 2779 // CHECK: return %[[splat1]], %[[splat2]] 2780 return %1, %2 : vector<3xf32>, vector<3xf32> 2781} 2782 2783// ----- 2784 2785// CHECK-LABEL: func @extract_2d_from_from_elements( 2786// CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32) 2787func.func @extract_2d_from_from_elements(%a: f32, %b: f32) -> (vector<2x2xf32>, vector<2x2xf32>) { 2788 %0 = vector.from_elements %a, %a, %a, %b, %b, %b, %b, %a, %b, %a, %a, %b : vector<3x2x2xf32> 2789 // CHECK: %[[splat1:.*]] = vector.from_elements %[[a]], %[[a]], %[[a]], %[[b]] : vector<2x2xf32> 2790 %1 = vector.extract %0[0] : vector<2x2xf32> from vector<3x2x2xf32> 2791 // CHECK: %[[splat2:.*]] = vector.from_elements %[[b]], %[[b]], %[[b]], %[[a]] : vector<2x2xf32> 2792 %2 = vector.extract %0[1] : vector<2x2xf32> from vector<3x2x2xf32> 2793 // CHECK: return %[[splat1]], %[[splat2]] 2794 return %1, %2 : vector<2x2xf32>, vector<2x2xf32> 2795} 2796 2797// ----- 2798 2799// CHECK-LABEL: func @from_elements_to_splat( 2800// CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32) 2801func.func @from_elements_to_splat(%a: f32, %b: f32) -> (vector<2x3xf32>, vector<2x3xf32>, vector<f32>) { 2802 // CHECK: %[[splat:.*]] = vector.splat %[[a]] : vector<2x3xf32> 2803 %0 = vector.from_elements %a, %a, %a, %a, %a, %a : vector<2x3xf32> 2804 // CHECK: %[[from_el:.*]] = vector.from_elements {{.*}} : vector<2x3xf32> 2805 %1 = vector.from_elements %a, %a, %a, %a, %b, %a : vector<2x3xf32> 2806 // CHECK: %[[splat2:.*]] = vector.splat %[[a]] : vector<f32> 2807 %2 = vector.from_elements %a : vector<f32> 2808 // CHECK: return %[[splat]], %[[from_el]], %[[splat2]] 2809 return %0, %1, %2 : vector<2x3xf32>, vector<2x3xf32>, vector<f32> 2810} 2811 2812// ----- 2813 2814// CHECK-LABEL: func @vector_insert_const_regression( 2815// CHECK: llvm.mlir.undef 2816// CHECK: vector.insert 2817func.func @vector_insert_const_regression(%arg0: i8) -> vector<4xi8> { 2818 %0 = llvm.mlir.undef : vector<4xi8> 2819 %1 = vector.insert %arg0, %0 [0] : i8 into vector<4xi8> 2820 return %1 : vector<4xi8> 2821} 2822 2823// ----- 2824 2825// CHECK-LABEL: @insert_scalar_poison_idx 2826func.func @insert_scalar_poison_idx(%a: vector<4x5xf32>, %b: f32) 2827 -> vector<4x5xf32> { 2828 // CHECK-NOT: vector.insert 2829 // CHECK-NEXT: ub.poison : vector<4x5xf32> 2830 %0 = vector.insert %b, %a[-1, 0] : f32 into vector<4x5xf32> 2831 return %0 : vector<4x5xf32> 2832} 2833 2834// ----- 2835 2836// CHECK-LABEL: @insert_vector_poison_idx 2837func.func @insert_vector_poison_idx(%a: vector<4x5xf32>, %b: vector<5xf32>) 2838 -> vector<4x5xf32> { 2839 // CHECK-NOT: vector.insert 2840 // CHECK-NEXT: ub.poison : vector<4x5xf32> 2841 %0 = vector.insert %b, %a[-1] : vector<5xf32> into vector<4x5xf32> 2842 return %0 : vector<4x5xf32> 2843} 2844 2845// ----- 2846 2847// CHECK-LABEL: @insert_multiple_poison_idx 2848func.func @insert_multiple_poison_idx(%a: vector<4x5x8xf32>, %b: vector<8xf32>) 2849 -> vector<4x5x8xf32> { 2850 // CHECK-NOT: vector.insert 2851 // CHECK-NEXT: ub.poison : vector<4x5x8xf32> 2852 %0 = vector.insert %b, %a[-1, -1] : vector<8xf32> into vector<4x5x8xf32> 2853 return %0 : vector<4x5x8xf32> 2854} 2855 2856// ----- 2857 2858// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract 2859// CHECK: %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0, 0] : vector<4xi32> from vector<8x1x2x1x1x4xi32> 2860// CHECK-NEXT: return %[[EXTRACT]] : vector<4xi32> 2861func.func @contiguous_extract_strided_slices_to_extract(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<4xi32> { 2862 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32> 2863 %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<4xi32> 2864 return %2 : vector<4xi32> 2865} 2866 2867// ----- 2868 2869// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_shorter_size_list 2870// CHECK: %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0] : vector<1x4xi32> from vector<8x1x2x1x1x4xi32> 2871// CHECK-NEXT: return %[[EXTRACT]] : vector<1x4xi32> 2872func.func @contiguous_extract_strided_slices_to_extract_shorter_size_list(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x4xi32> { 2873 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1], strides = [1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32> 2874 %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<1x4xi32> 2875 return %2 : vector<1x4xi32> 2876} 2877 2878// ----- 2879 2880// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size 2881// CHECK-NEXT: vector.extract_strided_slice 2882func.func @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<8x1x1x1x1x4xi32> { 2883 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<8x1x1x1x1x4xi32> 2884 return %1 : vector<8x1x1x1x1x4xi32> 2885} 2886 2887// ----- 2888 2889// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_size 2890// CHECK-NEXT: vector.extract_strided_slice 2891func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x1x1x1x2xi32> { 2892 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 2], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x2xi32> 2893 return %1 : vector<1x1x1x1x1x2xi32> 2894} 2895 2896// ----- 2897 2898// CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size 2899// CHECK-NEXT: vector.extract_strided_slice 2900func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x2x1x1x1xi32> { 2901 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 2, 1, 1, 1], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x2x1x1x1xi32> 2902 return %1 : vector<1x1x2x1x1x1xi32> 2903} 2904 2905// ----- 2906 2907// CHECK-LABEL: @contiguous_gather 2908// CHECK-SAME: (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[PASSTHRU:.*]]: vector<16xf32>) 2909// CHECK: %[[C0:.*]] = arith.constant 0 : index 2910// CHECK: %[[R:.*]] = vector.maskedload %[[BASE]][%[[C0]]], %[[MASK]], %[[PASSTHRU]] : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2911// CHECK: return %[[R]] 2912func.func @contiguous_gather(%base: memref<?xf32>, 2913 %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> { 2914 %c0 = arith.constant 0 : index 2915 %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32> 2916 %1 = vector.gather %base[%c0][%indices], %mask, %passthru : 2917 memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2918 return %1 : vector<16xf32> 2919} 2920 2921// ----- 2922 2923// CHECK-LABEL: @contiguous_gather_non_zero_start( 2924// TODO: Non-zero start is not supported yet. 2925// CHECK: %[[R:.*]] = vector.gather 2926// CHECK: return %[[R]] 2927func.func @contiguous_gather_non_zero_start(%base: memref<?xf32>, 2928 %mask: vector<16xi1>, 2929 %passthru: vector<16xf32>) -> vector<16xf32> { 2930 %c0 = arith.constant 0 : index 2931 %indices = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : vector<16xi32> 2932 %1 = vector.gather %base[%c0][%indices], %mask, %passthru : 2933 memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2934 return %1 : vector<16xf32> 2935} 2936 2937// ----- 2938 2939// CHECK-LABEL: @contiguous_gather_2d( 2940// TODO: Only 1D vectors are supported. 2941// CHECK: %[[R:.*]] = vector.gather 2942// CHECK: return %[[R]] 2943func.func @contiguous_gather_2d(%base: memref<?x?xf32>, 2944 %mask: vector<4x4xi1>, %passthru: vector<4x4xf32>) -> vector<4x4xf32> { 2945 %c0 = arith.constant 0 : index 2946 %indices = arith.constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : vector<4x4xi32> 2947 %1 = vector.gather %base[%c0, %c0][%indices], %mask, %passthru : 2948 memref<?x?xf32>, vector<4x4xi32>, vector<4x4xi1>, vector<4x4xf32> into vector<4x4xf32> 2949 return %1 : vector<4x4xf32> 2950} 2951 2952// ----- 2953 2954// CHECK-LABEL: @contiguous_gather_const_mask 2955// CHECK-SAME: (%[[BASE:.*]]: memref<?xf32>, %[[PASSTHRU:.*]]: vector<16xf32>) 2956// CHECK: %[[C0:.*]] = arith.constant 0 : index 2957// CHECK: %[[R:.*]] = vector.load %[[BASE]][%[[C0]]] : memref<?xf32>, vector<16xf32> 2958// CHECK: return %[[R]] 2959func.func @contiguous_gather_const_mask(%base: memref<?xf32>, 2960 %passthru: vector<16xf32>) -> vector<16xf32> { 2961 %c0 = arith.constant 0 : index 2962 %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32> 2963 %mask = arith.constant dense<true> : vector<16xi1> 2964 %1 = vector.gather %base[%c0][%indices], %mask, %passthru : 2965 memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2966 return %1 : vector<16xf32> 2967} 2968 2969// ----- 2970 2971// CHECK-LABEL: @contiguous_gather_step 2972// CHECK-SAME: (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[PASSTHRU:.*]]: vector<16xf32>) 2973// CHECK: %[[C0:.*]] = arith.constant 0 : index 2974// CHECK: %[[R:.*]] = vector.maskedload %[[BASE]][%[[C0]]], %[[MASK]], %[[PASSTHRU]] : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2975// CHECK: return %[[R]] 2976func.func @contiguous_gather_step(%base: memref<?xf32>, 2977 %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> { 2978 %c0 = arith.constant 0 : index 2979 %indices = vector.step : vector<16xindex> 2980 %1 = vector.gather %base[%c0][%indices], %mask, %passthru : 2981 memref<?xf32>, vector<16xindex>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2982 return %1 : vector<16xf32> 2983} 2984 2985// ----- 2986 2987// CHECK-LABEL: @gather_broadcast( 2988// TODO: Broadcast is not supported yet 2989// CHECK: %[[R:.*]] = vector.gather 2990// CHECK: return %[[R]] 2991func.func @gather_broadcast(%base: memref<?xf32>, 2992 %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> { 2993 %c0 = arith.constant 0 : index 2994 %indices = arith.constant dense<0> : vector<16xi32> 2995 %1 = vector.gather %base[%c0][%indices], %mask, %passthru : 2996 memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 2997 return %1 : vector<16xf32> 2998} 2999 3000// ----- 3001 3002// CHECK-LABEL: @contiguous_scatter 3003// CHECK-SAME: (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[VALUE:.*]]: vector<16xf32>) 3004// CHECK: %[[C0:.*]] = arith.constant 0 : index 3005// CHECK: vector.maskedstore %[[BASE]][%[[C0]]], %[[MASK]], %[[VALUE]] : memref<?xf32>, vector<16xi1>, vector<16xf32> 3006func.func @contiguous_scatter(%base: memref<?xf32>, 3007 %mask: vector<16xi1>, %value: vector<16xf32>) { 3008 %c0 = arith.constant 0 : index 3009 %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32> 3010 vector.scatter %base[%c0][%indices], %mask, %value : 3011 memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> 3012 return 3013} 3014 3015// ----- 3016 3017// CHECK-LABEL: @contiguous_scatter_const_mask 3018// CHECK-SAME: (%[[BASE:.*]]: memref<?xf32>, %[[VALUE:.*]]: vector<16xf32>) 3019// CHECK: %[[C0:.*]] = arith.constant 0 : index 3020// CHECK: vector.store %[[VALUE]], %[[BASE]][%[[C0]]] : memref<?xf32>, vector<16xf32> 3021func.func @contiguous_scatter_const_mask(%base: memref<?xf32>, 3022 %value: vector<16xf32>) { 3023 %c0 = arith.constant 0 : index 3024 %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32> 3025 %mask = vector.constant_mask [16] : vector<16xi1> 3026 vector.scatter %base[%c0][%indices], %mask, %value : 3027 memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> 3028 return 3029} 3030 3031// ----- 3032 3033// CHECK-LABEL: @contiguous_scatter_step 3034// CHECK-SAME: (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[VALUE:.*]]: vector<16xf32>) 3035// CHECK: %[[C0:.*]] = arith.constant 0 : index 3036// CHECK: vector.maskedstore %[[BASE]][%[[C0]]], %[[MASK]], %[[VALUE]] : memref<?xf32>, vector<16xi1>, vector<16xf32> 3037func.func @contiguous_scatter_step(%base: memref<?xf32>, 3038 %mask: vector<16xi1>, %value: vector<16xf32>) { 3039 %c0 = arith.constant 0 : index 3040 %indices = vector.step : vector<16xindex> 3041 vector.scatter %base[%c0][%indices], %mask, %value : 3042 memref<?xf32>, vector<16xindex>, vector<16xi1>, vector<16xf32> 3043 return 3044} 3045