1// RUN: mlir-opt -split-input-file -verify-diagnostics %s 2 3func.func @not_enough_sizes(%sz : index) { 4 // expected-error@+1 {{expected 6 or more operands, but found 5}} 5 "gpu.launch"(%sz, %sz, %sz, %sz, %sz) ({ 6 gpu.return 7 }) {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0>} : (index, index, index, index, index) -> () 8 return 9} 10 11// ----- 12 13func.func @no_region_attrs(%sz : index) { 14 // expected-error@+1 {{unexpected number of region arguments}} 15 "gpu.launch"(%sz, %sz, %sz, %sz, %sz, %sz) ({ 16 ^bb1(%bx: index, %by: index, %bz: index, 17 %tx: index, %ty: index, %tz: index): 18 gpu.terminator 19 }) {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0>} : (index, index, index, index, index, index) -> () 20 return 21} 22 23// ----- 24 25func.func @launch_requires_gpu_return(%sz : index) { 26 // @expected-note@+1 {{in 'gpu.launch' body region}} 27 gpu.launch blocks(%bx, %by, %bz) in (%sbx = %sz, %sby = %sz, %sbz = %sz) 28 threads(%tx, %ty, %tz) in (%stx = %sz, %sty = %sz, %stz = %sz) { 29 // @expected-error@+2 {{expected 'gpu.terminator' or a terminator with successors}} 30 %one = arith.constant 1 : i32 31 "gpu.yield"(%one) : (i32) -> () 32 } 33 return 34} 35 36// ----- 37 38func.func @launch_func_too_few_operands(%sz : index) { 39 // expected-error@+1 {{expected 6 or more operands}} 40 "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz) 41 {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 0, 0>} 42 : (index, index, index, index, index) -> () 43 return 44} 45 46// ----- 47 48func.func @launch_func_missing_parent_module_attribute(%sz : index) { 49 // expected-error@+1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}} 50 gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 51 return 52} 53 54// ----- 55 56module attributes {gpu.container_module} { 57 func.func @launch_func_missing_callee_attribute(%sz : index) { 58 // expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}} 59 "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) 60 {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0>} 61 : (index, index, index, index, index, index) -> () 62 return 63 } 64} 65 66// ----- 67 68module attributes {gpu.container_module} { 69 func.func @launch_func_no_function_attribute(%sz : index) { 70 // expected-error@+1 {{custom op 'gpu.launch_func' invalid kind of attribute specified}} 71 gpu.launch_func "foo" blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 72 return 73 } 74} 75 76// ----- 77 78module attributes {gpu.container_module} { 79 func.func @launch_func_undefined_module(%sz : index) { 80 // expected-error@+1 {{kernel container 'kernels' is undefined}} 81 gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 82 return 83 } 84} 85 86// ----- 87 88module attributes {gpu.container_module} { 89 module @kernels { 90 // expected-error@+1 {{'gpu.func' op expects parent op 'gpu.module'}} 91 gpu.func @kernel_1(%arg1 : !llvm.ptr) { 92 gpu.return 93 } 94 } 95} 96 97// ----- 98 99module attributes {gpu.container_module} { 100 module @kernels { 101 } 102 103 func.func @launch_func_missing_module_attribute(%sz : index) { 104 // expected-error@+1 {{kernel module 'kernels' is undefined}} 105 gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 106 return 107 } 108} 109 110// ----- 111 112module attributes {gpu.container_module} { 113 gpu.module @kernels { } 114 115 func.func @launch_func_undefined_function(%sz : index) { 116 // expected-error@+1 {{kernel function '@kernels::@kernel_1' is undefined}} 117 gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 118 return 119 } 120} 121 122// ----- 123 124module attributes {gpu.container_module} { 125 gpu.module @kernels { 126 // expected-note@+1 {{see the kernel definition here}} 127 memref.global "private" @kernel_1 : memref<4xi32> 128 } 129 130 func.func @launch_func_undefined_function(%sz : index) { 131 // expected-error@+1 {{referenced kernel '@kernels::@kernel_1' is not a function}} 132 gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 133 return 134 } 135} 136 137// ----- 138 139module attributes {gpu.container_module} { 140 module @kernels { 141 gpu.func @kernel_1(%arg1 : !llvm.ptr) kernel { 142 gpu.return 143 } 144 } 145 146 func.func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr) { 147 // expected-error@+1 {{kernel module 'kernels' is undefined}} 148 gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr) 149 return 150 } 151} 152 153// ----- 154 155module attributes {gpu.container_module} { 156 gpu.module @kernels { 157 gpu.func @kernel_1(%arg1 : !llvm.ptr) { 158 gpu.return 159 } 160 } 161 162 func.func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr) { 163 // expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}} 164 gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr) 165 return 166 } 167} 168 169// ----- 170 171module attributes {gpu.container_module} { 172 gpu.module @kernels { 173 gpu.func @kernel_1(%arg1 : !llvm.ptr) kernel { 174 gpu.return 175 } 176 } 177 178 func.func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm.ptr) { 179 // expected-error@+1 {{got 2 kernel operands but expected 1}} 180 gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr, %arg : !llvm.ptr) 181 return 182 } 183} 184 185// ----- 186 187module attributes {gpu.container_module} { 188 gpu.module @kernels { 189 gpu.func @kernel_1(%arg1 : f32) kernel { 190 gpu.return 191 } 192 } 193 194 func.func @launch_func_kernel_operand_types(%sz : index, %arg : f32) { 195 // expected-err@+1 {{type of function argument 0 does not match}} 196 gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : f32) 197 return 198 } 199} 200 201// ----- 202 203module attributes {gpu.container_module} { 204 func.func @launch_func_kernel_operand_attr(%sz : index) { 205 // expected-error@+1 {{expected ')' in argument list}} 206 gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%sz : index {foo}) 207 return 208 } 209} 210 211// ----- 212 213func.func @reduce_bad_type(%arg0 : vector<4xf32>) { 214 // expected-error@+1 {{'gpu.all_reduce' op operand #0 must be Integer or Float}} 215 %res = gpu.all_reduce add %arg0 {} : (vector<4xf32>) -> vector<4xf32> 216 return 217} 218 219// ----- 220 221func.func @reduce_no_op_no_body(%arg0 : f32) { 222 // expected-error@+1 {{expected either an op attribute or a non-empty body}} 223 %res = "gpu.all_reduce"(%arg0) ({}) : (f32) -> (f32) 224 return 225} 226 227// ----- 228 229func.func @reduce_op_and_body(%arg0 : f32) { 230 // expected-error@+1 {{expected either an op attribute or a non-empty body}} 231 %res = "gpu.all_reduce"(%arg0) ({ 232 ^bb(%lhs : f32, %rhs : f32): 233 "gpu.yield"(%lhs) : (f32) -> () 234 }) {op = #gpu<all_reduce_op add>} : (f32) -> (f32) 235 return 236} 237 238// ----- 239 240func.func @reduce_invalid_op(%arg0 : f32) { 241 // expected-error@+1 {{invalid op kind}} 242 %res = gpu.all_reduce foo %arg0 {} : (f32) -> (f32) 243 return 244} 245 246// ----- 247 248func.func @reduce_invalid_op_type_minsi(%arg0 : f32) { 249 // expected-error@+1 {{`minsi` reduction operation is not compatible with type 'f32'}} 250 %res = gpu.all_reduce minsi %arg0 {} : (f32) -> (f32) 251 return 252} 253 254// ----- 255 256func.func @reduce_invalid_op_type_minui(%arg0 : f32) { 257 // expected-error@+1 {{`minui` reduction operation is not compatible with type 'f32'}} 258 %res = gpu.all_reduce minui %arg0 {} : (f32) -> (f32) 259 return 260} 261 262// ----- 263 264func.func @reduce_invalid_op_type_maxsi(%arg0 : f32) { 265 // expected-error@+1 {{`maxsi` reduction operation is not compatible with type 'f32'}} 266 %res = gpu.all_reduce maxsi %arg0 {} : (f32) -> (f32) 267 return 268} 269 270// ----- 271 272func.func @reduce_invalid_op_type_maxui(%arg0 : f32) { 273 // expected-error@+1 {{`maxui` reduction operation is not compatible with type 'f32'}} 274 %res = gpu.all_reduce maxui %arg0 {} : (f32) -> (f32) 275 return 276} 277 278// ----- 279 280func.func @reduce_invalid_op_type_and(%arg0 : f32) { 281 // expected-error@+1 {{`and` reduction operation is not compatible with type 'f32'}} 282 %res = gpu.all_reduce and %arg0 {} : (f32) -> (f32) 283 return 284} 285 286// ----- 287 288func.func @reduce_invalid_op_type_or(%arg0 : f32) { 289 // expected-error@+1 {{`or` reduction operation is not compatible with type 'f32'}} 290 %res = gpu.all_reduce or %arg0 {} : (f32) -> (f32) 291 return 292} 293 294// ----- 295 296func.func @reduce_invalid_op_type_xor(%arg0 : f32) { 297 // expected-error@+1 {{`xor` reduction operation is not compatible with type 'f32'}} 298 %res = gpu.all_reduce xor %arg0 {} : (f32) -> (f32) 299 return 300} 301 302// ----- 303 304func.func @reduce_invalid_op_type_minnumf(%arg0 : i32) { 305 // expected-error@+1 {{`minnumf` reduction operation is not compatible with type 'i32'}} 306 %res = gpu.all_reduce minnumf %arg0 {} : (i32) -> (i32) 307 return 308} 309 310// ----- 311 312func.func @reduce_invalid_op_type_maxnumf(%arg0 : i32) { 313 // expected-error@+1 {{`maxnumf` reduction operation is not compatible with type 'i32'}} 314 %res = gpu.all_reduce maxnumf %arg0 {} : (i32) -> (i32) 315 return 316} 317 318// ----- 319 320func.func @reduce_invalid_op_type_minimumf(%arg0 : i32) { 321 // expected-error@+1 {{`minimumf` reduction operation is not compatible with type 'i32'}} 322 %res = gpu.all_reduce minimumf %arg0 {} : (i32) -> (i32) 323 return 324} 325 326// ----- 327 328func.func @reduce_invalid_op_type_maximumf(%arg0 : i32) { 329 // expected-error@+1 {{`maximumf` reduction operation is not compatible with type 'i32'}} 330 %res = gpu.all_reduce maximumf %arg0 {} : (i32) -> (i32) 331 return 332} 333 334// ----- 335 336func.func @subgroup_reduce_zero_cluster_size(%arg0 : vector<4xf32>) { 337 // expected-error@+1 {{cluster size 0 is not a power of two}} 338 %res = gpu.subgroup_reduce add %arg0 cluster(size = 0) : (vector<4xf32>) -> vector<4xf32> 339 return 340} 341 342// ----- 343 344func.func @subgroup_reduce_npot_cluster_size(%arg0 : vector<4xf32>) { 345 // expected-error@+1 {{cluster size 3 is not a power of two}} 346 %res = gpu.subgroup_reduce add %arg0 cluster(size = 3) : (vector<4xf32>) -> vector<4xf32> 347 return 348} 349 350// ----- 351 352func.func @subgroup_reduce_zero_cluster_stride(%arg0 : vector<4xf32>) { 353 // expected-error@+1 {{cluster stride 0 is not a power of two}} 354 %res = gpu.subgroup_reduce add %arg0 cluster(size = 4, stride = 0) : (vector<4xf32>) -> vector<4xf32> 355 return 356} 357 358// ----- 359 360func.func @subgroup_reduce_cluster_stride_without_size(%arg0 : vector<4xf32>) { 361 // expected-error@+1 {{cluster stride can only be specified if cluster size is specified}} 362 %res = gpu.subgroup_reduce add %arg0 { cluster_stride = 2 : i32 } : (vector<4xf32>) -> vector<4xf32> 363 return 364} 365 366 367// ----- 368 369func.func @subgroup_reduce_bad_type(%arg0 : vector<2x2xf32>) { 370 // expected-error@+1 {{'gpu.subgroup_reduce' op operand #0 must be Integer or Float or vector of}} 371 %res = gpu.subgroup_reduce add %arg0 : (vector<2x2xf32>) -> vector<2x2xf32> 372 return 373} 374 375// ----- 376 377func.func @subgroup_reduce_bad_type_scalable(%arg0 : vector<[2]xf32>) { 378 // expected-error@+1 {{is not compatible with scalable vector types}} 379 %res = gpu.subgroup_reduce add %arg0 : (vector<[2]xf32>) -> vector<[2]xf32> 380 return 381} 382 383// ----- 384 385func.func @subgroup_reduce_invalid_op_type_and(%arg0 : f32) { 386 // expected-error@+1 {{`and` reduction operation is not compatible with type 'f32'}} 387 %res = gpu.subgroup_reduce and %arg0 : (f32) -> (f32) 388 return 389} 390 391// ----- 392 393func.func @subgroup_reduce_invalid_op_type_maxnumf(%arg0 : i32) { 394 // expected-error@+1 {{`maxnumf` reduction operation is not compatible with type 'i32'}} 395 %res = gpu.subgroup_reduce maxnumf %arg0 : (i32) -> (i32) 396 return 397} 398 399// ----- 400 401func.func @reduce_incorrect_region_arguments(%arg0 : f32) { 402 // expected-error@+1 {{expected two region arguments}} 403 %res = gpu.all_reduce %arg0 { 404 ^bb(%lhs : f32): 405 "gpu.yield"(%lhs) : (f32) -> () 406 } : (f32) -> (f32) 407 return 408} 409 410// ----- 411 412func.func @reduce_incorrect_region_arguments(%arg0 : f32) { 413 // expected-error@+1 {{incorrect region argument type}} 414 %res = gpu.all_reduce %arg0 { 415 ^bb(%lhs : f32, %rhs : i32): 416 "gpu.yield"(%lhs) : (f32) -> () 417 } : (f32) -> (f32) 418 return 419} 420 421// ----- 422 423func.func @reduce_incorrect_yield(%arg0 : f32) { 424 // expected-error@+1 {{expected one gpu.yield operand}} 425 %res = gpu.all_reduce %arg0 { 426 ^bb(%lhs : f32, %rhs : f32): 427 "gpu.yield"(%lhs, %rhs) : (f32, f32) -> () 428 } : (f32) -> (f32) 429 return 430} 431 432// ----- 433 434func.func @reduce_incorrect_yield(%arg0 : f32) { 435 // expected-error@+1 {{incorrect gpu.yield type}} 436 %res = gpu.all_reduce %arg0 { 437 ^bb(%lhs : f32, %rhs : f32): 438 %one = arith.constant 1 : i32 439 "gpu.yield"(%one) : (i32) -> () 440 } : (f32) -> (f32) 441 return 442} 443 444// ----- 445 446func.func @reduce_incorrect_yield(%arg0 : f32) { 447 // expected-error@+1 {{expected gpu.yield op in region}} 448 %res = gpu.all_reduce %arg0 { 449 ^bb(%lhs : f32, %rhs : f32): 450 "test.finish" () : () -> () 451 } : (f32) -> (f32) 452 return 453} 454 455// ----- 456 457func.func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) { 458 // expected-error@+1 {{op failed to verify that all of {value, shuffleResult} have same type}} 459 %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = #gpu<shuffle_mode xor> } : (f32, i32, i32) -> (i32, i1) 460 return 461} 462 463// ----- 464 465func.func @shuffle_unsupported_type(%arg0 : index, %arg1 : i32, %arg2 : i32) { 466 // expected-error@+1 {{op operand #0 must be Integer or Float or vector of Integer or Float values of ranks 1, but got 'index'}} 467 %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : index 468 return 469} 470 471// ----- 472 473module { 474 gpu.module @gpu_funcs { 475 // expected-error @+1 {{custom op 'gpu.func' gpu.func requires named arguments}} 476 gpu.func @kernel_1(f32, f32) { 477 ^bb0(%arg0: f32): 478 gpu.return 479 } 480 } 481} 482 483// ----- 484 485module { 486 gpu.module @gpu_funcs { 487 // expected-error @+1 {{attribute 'function_type' failed to satisfy constraint: type attribute of function type}} 488 "gpu.func"() ({ 489 gpu.return 490 }) {sym_name="kernel_1", function_type=f32} : () -> () 491 } 492} 493 494// ----- 495 496module { 497 gpu.module @gpu_funcs { 498 // expected-error @below {{'gpu.func' op expected memref type in attribution}} 499 gpu.func @kernel() workgroup(%0: i32) { 500 gpu.return 501 } 502 } 503} 504 505// ----- 506 507module { 508 gpu.module @gpu_funcs { 509 // expected-error @below {{'gpu.func' op expected memory space workgroup in attribution}} 510 gpu.func @kernel() workgroup(%0: memref<4xf32, #gpu.address_space<private>>) { 511 gpu.return 512 } 513 } 514} 515 516// ----- 517 518module { 519 gpu.module @gpu_funcs { 520 // expected-error @below {{'gpu.func' op expected memory space private in attribution}} 521 gpu.func @kernel() private(%0: memref<4xf32, #gpu.address_space<workgroup>>) { 522 gpu.return 523 } 524 } 525} 526 527// ----- 528 529module { 530 gpu.module @gpu_funcs { 531 // expected-note @+1 {{return type declared here}} 532 gpu.func @kernel() { 533 %0 = arith.constant 0 : index 534 // expected-error @+1 {{'gpu.return' op expected 0 result operands}} 535 gpu.return %0 : index 536 } 537 } 538} 539 540// ----- 541 542module { 543 gpu.module @gpu_funcs { 544 // expected-error @+1 {{'gpu.func' op expected void return type for kernel function}} 545 gpu.func @kernel() -> index kernel { 546 %0 = arith.constant 0 : index 547 gpu.return 548 } 549 } 550} 551 552// ----- 553 554module { 555 gpu.module @gpu_funcs { 556 // expected-error @+1 {{'gpu.func' op expected at least 5 arguments to body region}} 557 "gpu.func"() ({ 558 ^bb0(%arg0: f32, %arg1: memref<?xf32>, %arg2: memref<5xf32, 3>, %arg3: memref<5xf32, 5>): 559 "gpu.return"() : () -> () 560 } ) {function_type = (f32, memref<?xf32>) -> (), gpu.kernel, sym_name = "kernel_1", workgroup_attributions = 3: i64} : () -> () 561 } 562} 563 564// ----- 565 566module { 567 gpu.module @gpu_funcs { 568 // expected-error @+1 {{expected body with at least one block}} 569 "gpu.func"() ({}) {function_type = () -> (), gpu.kernel, sym_name = "kernel"} : () -> () 570 } 571} 572 573// ----- 574 575func.func @sync_wait_with_result() { 576 // expected-error @+1 {{cannot name an operation with no results}} 577 %t = gpu.wait 578} 579 580// ----- 581 582func.func @async_wait_without_result() { 583 // expected-error @+1 {{custom op 'gpu.wait' needs to be named when marked 'async'}} 584 gpu.wait async 585} 586 587// ----- 588 589func.func @memcpy_incompatible_type(%dst : memref<?xf32>, %src : memref<?xi32>) { 590 // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible element type}} 591 gpu.memcpy %dst, %src : memref<?xf32>, memref<?xi32> 592} 593 594// ----- 595 596func.func @memcpy_incompatible_shape(%dst : memref<7xf32>, %src : memref<9xf32>) { 597 // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible shape}} 598 gpu.memcpy %dst, %src : memref<7xf32>, memref<9xf32> 599} 600 601// ----- 602 603func.func @memset_incompatible_shape(%dst : memref<?xf32>, %value : i32) { 604 // expected-error @+1 {{'gpu.memset' op failed to verify that all of {dst, value} have same element type}} 605 gpu.memset %dst, %value : memref<?xf32>, i32 606} 607 608// ----- 609 610func.func @mmamatrix_invalid_shape(){ 611 %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3> 612 %i = arith.constant 16 : index 613 // expected-error @+1 {{MMAMatrixType must have exactly two dimensions}} 614 %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16x16xf16, "AOp"> 615 return 616} 617 618// ----- 619 620func.func @mmamatrix_operand_type(){ 621 %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3> 622 %i = arith.constant 16 : index 623 // expected-error @+1 {{operand expected to be one of AOp, BOp or COp}} 624 %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xf16, "EOp"> 625 return 626} 627 628// ----- 629 630func.func @mmamatrix_invalid_element_type(){ 631 %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3> 632 %i = arith.constant 16 : index 633 // expected-error @+1 {{MMAMatrixType elements must be SI8, UI8, I32, F16, or F32}} 634 %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xbf16, "AOp"> 635 return 636} 637 638// ----- 639 640#layout_map_col_major = affine_map<(i, j) -> (j, i)> 641 642func.func @mmaLoadOp_identity_layout(){ 643 %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, #layout_map_col_major, 3> 644 %i = arith.constant 16 : index 645 // expected-error @+1 {{expected source memref most minor dim must have unit stride}} 646 %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, #layout_map_col_major, 3> -> !gpu.mma_matrix<16x16xf16, "AOp"> 647 return 648} 649 650// ----- 651 652func.func @mma_invalid_memref_type(%src: memref<32x4xvector<4x8xf32>>, %i: index) { 653 // expected-error @+1 {{operand #0 must be memref of 8-bit signless integer or 32-bit signless integer or 16-bit float or 32-bit float or vector of 8-bit signless integer or 32-bit signless integer or 16-bit float or 32-bit float values of ranks 1 values}} 654 %0 = gpu.subgroup_mma_load_matrix %src[%i, %i] {leadDimension = 4 : index} : memref<32x4xvector<4x8xf32>> -> !gpu.mma_matrix<16x16xf16, "AOp"> 655 return 656} 657 658// ----- 659 660#layout_map_col_major = affine_map<(i, j) -> (j, i)> 661 662func.func @wmmaStoreOp_invalid_map(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () { 663 %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, #layout_map_col_major, 3> 664 %i = arith.constant 16 : index 665 %j = arith.constant 16 : index 666 // expected-error @+1 {{expected destination memref most minor dim must have unit stride}} 667 gpu.subgroup_mma_store_matrix %arg0, %sg[%i,%j] {leadDimension= 32 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<32x32xf16,#layout_map_col_major, 3> 668 return 669} 670 671// ----- 672 673func.func @wmmaStoreOp_invalid_store_operand(%arg0 : !gpu.mma_matrix<16x16xf16, "AOp">) -> () { 674 %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, 3> 675 %i = arith.constant 16 : index 676 %j = arith.constant 16 : index 677 // expected-error @+1 {{expected the operand matrix being stored to have 'COp' operand type}} 678 gpu.subgroup_mma_store_matrix %arg0, %sg[%i,%j] {leadDimension= 32 : index} : !gpu.mma_matrix<16x16xf16, "AOp">, memref<32x32xf16, 3> 679 return 680} 681 682// ----- 683 684func.func @wmmaMmaOp_invalid_operand_order(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> () { 685 // expected-error @+1 {{operands must be in the order AOp, BOp, COp}} 686 %D = gpu.subgroup_mma_compute %B, %A, %C : !gpu.mma_matrix<16x16xf16, "BOp">, !gpu.mma_matrix<16x16xf16, "AOp"> -> !gpu.mma_matrix<16x16xf16, "COp"> 687 return 688} 689 690// ----- 691 692func.func @wmmaMmaOp_invalid_operand_shapes(%A : !gpu.mma_matrix<16x32xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> () { 693 // expected-error @+1 {{operand shapes do not satisfy matmul constraints}} 694 %D = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x32xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp"> 695 return 696} 697 698// ----- 699 700// Number of symbol operand count less than memref symbol count. 701func.func @alloc() { 702 // expected-error@+1 {{symbol operand count does not equal memref symbol count}} 703 %1 = gpu.alloc() : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1> 704 return 705} 706 707// ----- 708 709// Number of symbol operand count greater than memref symbol count. 710func.func @alloc() { 711 %0 = arith.constant 7 : index 712 // expected-error@+1 {{symbol operand count does not equal memref symbol count}} 713 %1 = gpu.alloc()[%0] : memref<2x4xf32, 1> 714 return 715} 716 717// ----- 718 719// Number of dynamic dimension operand count greater than memref dynamic dimension count. 720func.func @alloc() { 721 %0 = arith.constant 7 : index 722 // expected-error@+1 {{dimension operand count does not equal memref dynamic dimension count}} 723 %1 = gpu.alloc(%0, %0) : memref<2x?xf32, 1> 724 return 725} 726 727// ----- 728 729// Number of dynamic dimension operand count less than memref dynamic dimension count. 730func.func @alloc() { 731 %0 = arith.constant 7 : index 732 // expected-error@+1 {{dimension operand count does not equal memref dynamic dimension count}} 733 %1 = gpu.alloc(%0) : memref<2x?x?xf32, 1> 734 return 735} 736 737// ----- 738 739module attributes {gpu.container_module} { 740 // expected-error@+1 {{'func.func' op gpu.known_block_size must be a dense i32 array}} 741 func.func @kernel() attributes {gpu.known_block_size = 32 : i32} { 742 func.return 743 } 744} 745 746// ----- 747 748module attributes {gpu.container_module} { 749 gpu.module @kernel { 750 // expected-error@+1 {{'gpu.func' op attribute 'known_block_size' failed to satisfy constraint: i32 dense array attribute with 3 elements (if present)}} 751 gpu.func @kernel() kernel attributes {known_block_size = array<i32: 2, 1>} { 752 gpu.return 753 } 754 } 755} 756 757// ----- 758 759module { 760 // expected-error@+1 {{'func.func' op gpu.known_block_size must contain exactly 3 elements}} 761 func.func @kernel() attributes {gpu.known_block_size = array<i32: 2, 1>} { 762 func.return 763 } 764} 765 766// ----- 767 768module { 769 // expected-error @+1 {{'gpu.module' op attribute 'targets' failed to satisfy constraint: array of GPU target attributes with at least 1 elements}} 770 gpu.module @gpu_funcs [] { 771 } 772} 773 774// ----- 775 776module { 777 // expected-error @+1 {{'gpu.module' op attribute 'targets' failed to satisfy constraint: array of GPU target attributes with at least 1 elements}} 778 gpu.module @gpu_funcs [1] { 779 } 780} 781 782// ----- 783 784module { 785 // expected-error @+1 {{'gpu.binary' op attribute 'objects' failed to satisfy constraint: an array of GPU object attributes with at least 1 elements}} 786 gpu.binary @binary [] 787} 788 789// ----- 790 791module { 792 // expected-error @+1 {{'gpu.binary' op attribute 'offloadingHandler' failed to satisfy constraint: any attribute with the `OffloadingTranslationAttrTrait` trait.}} 793 gpu.binary @binary <1> [#gpu.object<#nvvm.target, "">] 794} 795 796// ----- 797 798func.func @main() { 799 %shmemSize = arith.constant 10000 : i32 800 %c1 = arith.constant 1 : index 801 gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1) 802 threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1) 803 dynamic_shared_memory_size %shmemSize 804 { 805 // expected-error @below {{'gpu.dynamic_shared_memory' op address space must be address_space<workgroup>}} 806 %0 = gpu.dynamic_shared_memory : memref<?xi8> 807 gpu.terminator 808 } 809 return 810} 811 812 813// ----- 814 815func.func @main() { 816 %shmemSize = arith.constant 8192 : i32 817 %c1 = arith.constant 1 : index 818 gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1) 819 threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1) 820 dynamic_shared_memory_size %shmemSize 821 { 822 // expected-error @below {{'gpu.dynamic_shared_memory' op result memref type must be memref<?xi8, #gpu.address_space<workgroup>>}} 823 %0 = gpu.dynamic_shared_memory : memref<1xi8, #gpu.address_space<workgroup>> 824 gpu.terminator 825 } 826 return 827} 828 829// ----- 830 831func.func @main(%arg0 : index) { 832 %shmemSize = arith.constant 8192 : i32 833 %c1 = arith.constant 1 : index 834 gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1) 835 threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1) 836 dynamic_shared_memory_size %shmemSize 837 { 838 // expected-error @below {{'gpu.dynamic_shared_memory' op address space must be address_space<workgroup>}} 839 %0 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<private>> 840 gpu.terminator 841 } 842 return 843} 844 845// ----- 846 847func.func @main(%arg0 : index) { 848 %shmemSize = arith.constant 8192 : i32 849 %c1 = arith.constant 1 : index 850 gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1) 851 threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1) 852 dynamic_shared_memory_size %shmemSize 853 { 854 // expected-error @below {{'gpu.dynamic_shared_memory' op result #0 must be 1D memref of 8-bit signless integer values, but got 'memref<?xf32, #gpu.address_space<workgroup>}} 855 %0 = gpu.dynamic_shared_memory : memref<?xf32, #gpu.address_space<workgroup>> 856 gpu.terminator 857 } 858 return 859} 860 861// ----- 862 863module attributes {gpu.container_module} { 864 // expected-error@+1 {{expected attribute value}} 865 gpu.module @kernel <> { 866 } 867} 868 869// ----- 870 871gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">, 872 // expected-error@+1{{expected all kernels to be uniquely named}} 873 kernels = #gpu.kernel_table<[ 874 #gpu.kernel_metadata<"kernel", (i32) -> ()>, 875 #gpu.kernel_metadata<"kernel", (i32, f32) -> (), metadata = {sgpr_count = 255}> 876 // expected-error@below{{failed to parse GPU_ObjectAttr parameter 'kernels' which is to be a `KernelTableAttr`}} 877 ]>, 878 bin = "BLOB"> 879 ] 880 881// ----- 882 883func.func @warp_wrong_num_outputs(%laneid: index) { 884 // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op expected same number of yield operands and return values.}} 885 %2 = gpu.warp_execute_on_lane_0(%laneid)[64] -> (vector<4xi32>) { 886 } 887 return 888} 889 890// ----- 891 892func.func @warp_wrong_num_inputs(%laneid: index) { 893 // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op expected same number op arguments and block arguments.}} 894 gpu.warp_execute_on_lane_0(%laneid)[64] { 895 ^bb0(%arg0 : vector<128xi32>) : 896 } 897 return 898} 899 900// ----- 901 902func.func @warp_wrong_return_distribution(%laneid: index) { 903 // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op incompatible distribution dimensions from 'vector<128xi32>' to 'vector<4xi32>'}} 904 %2 = gpu.warp_execute_on_lane_0(%laneid)[64] -> (vector<4xi32>) { 905 %0 = arith.constant dense<2>: vector<128xi32> 906 gpu.yield %0 : vector<128xi32> 907 } 908 return 909} 910 911 912// ----- 913 914func.func @warp_wrong_arg_distribution(%laneid: index, %v0 : vector<4xi32>) { 915 // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op incompatible distribution dimensions from 'vector<128xi32>' to 'vector<4xi32>'}} 916 gpu.warp_execute_on_lane_0(%laneid)[64] 917 args(%v0 : vector<4xi32>) { 918 ^bb0(%arg0 : vector<128xi32>) : 919 } 920 return 921} 922 923// ----- 924 925func.func @warp_2_distributed_dims(%laneid: index) { 926 // expected-error@+1 {{incompatible distribution dimensions from 'vector<128x128xi32>' to 'vector<4x4xi32>' with warp size = 32}} 927 %2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<4x4xi32>) { 928 %0 = arith.constant dense<2>: vector<128x128xi32> 929 gpu.yield %0 : vector<128x128xi32> 930 } 931 return 932} 933 934// ----- 935 936func.func @warp_2_distributed_dims(%laneid: index) { 937 // expected-error@+1 {{expected expanded vector dimension #1 (8) to be a multipler of the distributed vector dimension (3)}} 938 %2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<1x3xi32>) { 939 %0 = arith.constant dense<2>: vector<4x8xi32> 940 gpu.yield %0 : vector<4x8xi32> 941 } 942 return 943} 944 945// ----- 946 947func.func @warp_mismatch_rank(%laneid: index) { 948 // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op expected distributed vectors to have same rank and element type.}} 949 %2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<4x4xi32>) { 950 %0 = arith.constant dense<2>: vector<128xi32> 951 gpu.yield %0 : vector<128xi32> 952 } 953 return 954} 955 956// ----- 957 958func.func @warp_mismatch_rank(%laneid: index) { 959 // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op expected vector type for distributed operands.}} 960 %2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (i32) { 961 %0 = arith.constant dense<2>: vector<128xi32> 962 gpu.yield %0 : vector<128xi32> 963 } 964 return 965} 966