xref: /llvm-project/mlir/test/Dialect/GPU/invalid.mlir (revision ecaf2c335cd612646086ec53315cb1018a5b9d91)
1// RUN: mlir-opt -split-input-file -verify-diagnostics %s
2
3func.func @not_enough_sizes(%sz : index) {
4  // expected-error@+1 {{expected 6 or more operands, but found 5}}
5  "gpu.launch"(%sz, %sz, %sz, %sz, %sz) ({
6    gpu.return
7  }) {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0>} : (index, index, index, index, index) -> ()
8  return
9}
10
11// -----
12
13func.func @no_region_attrs(%sz : index) {
14  // expected-error@+1 {{unexpected number of region arguments}}
15  "gpu.launch"(%sz, %sz, %sz, %sz, %sz, %sz) ({
16  ^bb1(%bx: index, %by: index, %bz: index,
17       %tx: index, %ty: index, %tz: index):
18    gpu.terminator
19  }) {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0>} : (index, index, index, index, index, index) -> ()
20  return
21}
22
23// -----
24
25func.func @launch_requires_gpu_return(%sz : index) {
26  // @expected-note@+1 {{in 'gpu.launch' body region}}
27  gpu.launch blocks(%bx, %by, %bz) in (%sbx = %sz, %sby = %sz, %sbz = %sz)
28             threads(%tx, %ty, %tz) in (%stx = %sz, %sty = %sz, %stz = %sz) {
29    // @expected-error@+2 {{expected 'gpu.terminator' or a terminator with successors}}
30    %one = arith.constant 1 : i32
31    "gpu.yield"(%one) : (i32) -> ()
32  }
33  return
34}
35
36// -----
37
38func.func @launch_func_too_few_operands(%sz : index) {
39  // expected-error@+1 {{expected 6 or more operands}}
40  "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz)
41      {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 0, 0>}
42      : (index, index, index, index, index) -> ()
43  return
44}
45
46// -----
47
48func.func @launch_func_missing_parent_module_attribute(%sz : index) {
49  // expected-error@+1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}}
50  gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
51  return
52}
53
54// -----
55
56module attributes {gpu.container_module} {
57  func.func @launch_func_missing_callee_attribute(%sz : index) {
58    // expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}}
59    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
60        {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0>}
61        : (index, index, index, index, index, index) -> ()
62    return
63  }
64}
65
66// -----
67
68module attributes {gpu.container_module} {
69  func.func @launch_func_no_function_attribute(%sz : index) {
70    // expected-error@+1 {{custom op 'gpu.launch_func' invalid kind of attribute specified}}
71    gpu.launch_func "foo" blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
72    return
73  }
74}
75
76// -----
77
78module attributes {gpu.container_module} {
79  func.func @launch_func_undefined_module(%sz : index) {
80    // expected-error@+1 {{kernel container 'kernels' is undefined}}
81    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
82    return
83  }
84}
85
86// -----
87
88module attributes {gpu.container_module} {
89  module @kernels {
90    // expected-error@+1 {{'gpu.func' op expects parent op 'gpu.module'}}
91    gpu.func @kernel_1(%arg1 : !llvm.ptr) {
92      gpu.return
93    }
94  }
95}
96
97// -----
98
99module attributes {gpu.container_module} {
100  module @kernels {
101  }
102
103  func.func @launch_func_missing_module_attribute(%sz : index) {
104    // expected-error@+1 {{kernel module 'kernels' is undefined}}
105    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
106    return
107  }
108}
109
110// -----
111
112module attributes {gpu.container_module} {
113  gpu.module @kernels { }
114
115  func.func @launch_func_undefined_function(%sz : index) {
116    // expected-error@+1 {{kernel function '@kernels::@kernel_1' is undefined}}
117    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
118    return
119  }
120}
121
122// -----
123
124module attributes {gpu.container_module} {
125  gpu.module @kernels {
126    // expected-note@+1 {{see the kernel definition here}}
127    memref.global "private" @kernel_1 : memref<4xi32>
128  }
129
130  func.func @launch_func_undefined_function(%sz : index) {
131    // expected-error@+1 {{referenced kernel '@kernels::@kernel_1' is not a function}}
132    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
133    return
134  }
135}
136
137// -----
138
139module attributes {gpu.container_module} {
140  module @kernels {
141    gpu.func @kernel_1(%arg1 : !llvm.ptr) kernel {
142      gpu.return
143    }
144  }
145
146  func.func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr) {
147    // expected-error@+1 {{kernel module 'kernels' is undefined}}
148    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr)
149    return
150  }
151}
152
153// -----
154
155module attributes {gpu.container_module} {
156  gpu.module @kernels {
157    gpu.func @kernel_1(%arg1 : !llvm.ptr) {
158      gpu.return
159    }
160  }
161
162  func.func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr) {
163    // expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}}
164    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr)
165    return
166  }
167}
168
169// -----
170
171module attributes {gpu.container_module} {
172  gpu.module @kernels {
173    gpu.func @kernel_1(%arg1 : !llvm.ptr) kernel {
174      gpu.return
175    }
176  }
177
178  func.func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm.ptr) {
179    // expected-error@+1 {{got 2 kernel operands but expected 1}}
180    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr, %arg : !llvm.ptr)
181    return
182  }
183}
184
185// -----
186
187module attributes {gpu.container_module} {
188  gpu.module @kernels {
189    gpu.func @kernel_1(%arg1 : f32) kernel {
190      gpu.return
191    }
192  }
193
194  func.func @launch_func_kernel_operand_types(%sz : index, %arg : f32) {
195    // expected-err@+1 {{type of function argument 0 does not match}}
196    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : f32)
197    return
198  }
199}
200
201// -----
202
203module attributes {gpu.container_module} {
204  func.func @launch_func_kernel_operand_attr(%sz : index) {
205    // expected-error@+1 {{expected ')' in argument list}}
206    gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%sz : index {foo})
207    return
208  }
209}
210
211// -----
212
213func.func @reduce_bad_type(%arg0 : vector<4xf32>) {
214  // expected-error@+1 {{'gpu.all_reduce' op operand #0 must be Integer or Float}}
215  %res = gpu.all_reduce add %arg0 {} : (vector<4xf32>) -> vector<4xf32>
216  return
217}
218
219// -----
220
221func.func @reduce_no_op_no_body(%arg0 : f32) {
222  // expected-error@+1 {{expected either an op attribute or a non-empty body}}
223  %res = "gpu.all_reduce"(%arg0) ({}) : (f32) -> (f32)
224  return
225}
226
227// -----
228
229func.func @reduce_op_and_body(%arg0 : f32) {
230  // expected-error@+1 {{expected either an op attribute or a non-empty body}}
231  %res = "gpu.all_reduce"(%arg0) ({
232  ^bb(%lhs : f32, %rhs : f32):
233    "gpu.yield"(%lhs) : (f32) -> ()
234  }) {op = #gpu<all_reduce_op add>} : (f32) -> (f32)
235  return
236}
237
238// -----
239
240func.func @reduce_invalid_op(%arg0 : f32) {
241  // expected-error@+1 {{invalid op kind}}
242  %res = gpu.all_reduce foo %arg0 {} : (f32) -> (f32)
243  return
244}
245
246// -----
247
248func.func @reduce_invalid_op_type_minsi(%arg0 : f32) {
249  // expected-error@+1 {{`minsi` reduction operation is not compatible with type 'f32'}}
250  %res = gpu.all_reduce minsi %arg0 {} : (f32) -> (f32)
251  return
252}
253
254// -----
255
256func.func @reduce_invalid_op_type_minui(%arg0 : f32) {
257  // expected-error@+1 {{`minui` reduction operation is not compatible with type 'f32'}}
258  %res = gpu.all_reduce minui %arg0 {} : (f32) -> (f32)
259  return
260}
261
262// -----
263
264func.func @reduce_invalid_op_type_maxsi(%arg0 : f32) {
265  // expected-error@+1 {{`maxsi` reduction operation is not compatible with type 'f32'}}
266  %res = gpu.all_reduce maxsi %arg0 {} : (f32) -> (f32)
267  return
268}
269
270// -----
271
272func.func @reduce_invalid_op_type_maxui(%arg0 : f32) {
273  // expected-error@+1 {{`maxui` reduction operation is not compatible with type 'f32'}}
274  %res = gpu.all_reduce maxui %arg0 {} : (f32) -> (f32)
275  return
276}
277
278// -----
279
280func.func @reduce_invalid_op_type_and(%arg0 : f32) {
281  // expected-error@+1 {{`and` reduction operation is not compatible with type 'f32'}}
282  %res = gpu.all_reduce and %arg0 {} : (f32) -> (f32)
283  return
284}
285
286// -----
287
288func.func @reduce_invalid_op_type_or(%arg0 : f32) {
289  // expected-error@+1 {{`or` reduction operation is not compatible with type 'f32'}}
290  %res = gpu.all_reduce or %arg0 {} : (f32) -> (f32)
291  return
292}
293
294// -----
295
296func.func @reduce_invalid_op_type_xor(%arg0 : f32) {
297  // expected-error@+1 {{`xor` reduction operation is not compatible with type 'f32'}}
298  %res = gpu.all_reduce xor %arg0 {} : (f32) -> (f32)
299  return
300}
301
302// -----
303
304func.func @reduce_invalid_op_type_minnumf(%arg0 : i32) {
305  // expected-error@+1 {{`minnumf` reduction operation is not compatible with type 'i32'}}
306  %res = gpu.all_reduce minnumf %arg0 {} : (i32) -> (i32)
307  return
308}
309
310// -----
311
312func.func @reduce_invalid_op_type_maxnumf(%arg0 : i32) {
313  // expected-error@+1 {{`maxnumf` reduction operation is not compatible with type 'i32'}}
314  %res = gpu.all_reduce maxnumf %arg0 {} : (i32) -> (i32)
315  return
316}
317
318// -----
319
320func.func @reduce_invalid_op_type_minimumf(%arg0 : i32) {
321  // expected-error@+1 {{`minimumf` reduction operation is not compatible with type 'i32'}}
322  %res = gpu.all_reduce minimumf %arg0 {} : (i32) -> (i32)
323  return
324}
325
326// -----
327
328func.func @reduce_invalid_op_type_maximumf(%arg0 : i32) {
329  // expected-error@+1 {{`maximumf` reduction operation is not compatible with type 'i32'}}
330  %res = gpu.all_reduce maximumf %arg0 {} : (i32) -> (i32)
331  return
332}
333
334// -----
335
336func.func @subgroup_reduce_zero_cluster_size(%arg0 : vector<4xf32>) {
337  // expected-error@+1 {{cluster size 0 is not a power of two}}
338  %res = gpu.subgroup_reduce add %arg0 cluster(size = 0) : (vector<4xf32>) -> vector<4xf32>
339  return
340}
341
342// -----
343
344func.func @subgroup_reduce_npot_cluster_size(%arg0 : vector<4xf32>) {
345  // expected-error@+1 {{cluster size 3 is not a power of two}}
346  %res = gpu.subgroup_reduce add %arg0 cluster(size = 3) : (vector<4xf32>) -> vector<4xf32>
347  return
348}
349
350// -----
351
352func.func @subgroup_reduce_zero_cluster_stride(%arg0 : vector<4xf32>) {
353  // expected-error@+1 {{cluster stride 0 is not a power of two}}
354  %res = gpu.subgroup_reduce add %arg0 cluster(size = 4, stride = 0) : (vector<4xf32>) -> vector<4xf32>
355  return
356}
357
358// -----
359
360func.func @subgroup_reduce_cluster_stride_without_size(%arg0 : vector<4xf32>) {
361  // expected-error@+1 {{cluster stride can only be specified if cluster size is specified}}
362  %res = gpu.subgroup_reduce add %arg0 { cluster_stride = 2 : i32 } : (vector<4xf32>) -> vector<4xf32>
363  return
364}
365
366
367// -----
368
369func.func @subgroup_reduce_bad_type(%arg0 : vector<2x2xf32>) {
370  // expected-error@+1 {{'gpu.subgroup_reduce' op operand #0 must be Integer or Float or vector of}}
371  %res = gpu.subgroup_reduce add %arg0 : (vector<2x2xf32>) -> vector<2x2xf32>
372  return
373}
374
375// -----
376
377func.func @subgroup_reduce_bad_type_scalable(%arg0 : vector<[2]xf32>) {
378  // expected-error@+1 {{is not compatible with scalable vector types}}
379  %res = gpu.subgroup_reduce add %arg0 : (vector<[2]xf32>) -> vector<[2]xf32>
380  return
381}
382
383// -----
384
385func.func @subgroup_reduce_invalid_op_type_and(%arg0 : f32) {
386  // expected-error@+1 {{`and` reduction operation is not compatible with type 'f32'}}
387  %res = gpu.subgroup_reduce and %arg0 : (f32) -> (f32)
388  return
389}
390
391// -----
392
393func.func @subgroup_reduce_invalid_op_type_maxnumf(%arg0 : i32) {
394  // expected-error@+1 {{`maxnumf` reduction operation is not compatible with type 'i32'}}
395  %res = gpu.subgroup_reduce maxnumf %arg0 : (i32) -> (i32)
396  return
397}
398
399// -----
400
401func.func @reduce_incorrect_region_arguments(%arg0 : f32) {
402  // expected-error@+1 {{expected two region arguments}}
403  %res = gpu.all_reduce %arg0 {
404  ^bb(%lhs : f32):
405    "gpu.yield"(%lhs) : (f32) -> ()
406  } : (f32) -> (f32)
407  return
408}
409
410// -----
411
412func.func @reduce_incorrect_region_arguments(%arg0 : f32) {
413  // expected-error@+1 {{incorrect region argument type}}
414  %res = gpu.all_reduce %arg0 {
415  ^bb(%lhs : f32, %rhs : i32):
416    "gpu.yield"(%lhs) : (f32) -> ()
417  } : (f32) -> (f32)
418  return
419}
420
421// -----
422
423func.func @reduce_incorrect_yield(%arg0 : f32) {
424  // expected-error@+1 {{expected one gpu.yield operand}}
425  %res = gpu.all_reduce %arg0 {
426  ^bb(%lhs : f32, %rhs : f32):
427    "gpu.yield"(%lhs, %rhs) : (f32, f32) -> ()
428  } : (f32) -> (f32)
429  return
430}
431
432// -----
433
434func.func @reduce_incorrect_yield(%arg0 : f32) {
435  // expected-error@+1 {{incorrect gpu.yield type}}
436  %res = gpu.all_reduce %arg0 {
437  ^bb(%lhs : f32, %rhs : f32):
438    %one = arith.constant 1 : i32
439    "gpu.yield"(%one) : (i32) -> ()
440  } : (f32) -> (f32)
441  return
442}
443
444// -----
445
446func.func @reduce_incorrect_yield(%arg0 : f32) {
447  // expected-error@+1 {{expected gpu.yield op in region}}
448  %res = gpu.all_reduce %arg0 {
449  ^bb(%lhs : f32, %rhs : f32):
450    "test.finish" () : () -> ()
451  } : (f32) -> (f32)
452  return
453}
454
455// -----
456
457func.func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) {
458  // expected-error@+1 {{op failed to verify that all of {value, shuffleResult} have same type}}
459  %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = #gpu<shuffle_mode xor> } : (f32, i32, i32) -> (i32, i1)
460  return
461}
462
463// -----
464
465func.func @shuffle_unsupported_type(%arg0 : index, %arg1 : i32, %arg2 : i32) {
466  // expected-error@+1 {{op operand #0 must be Integer or Float or vector of Integer or Float values of ranks 1, but got 'index'}}
467  %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : index
468  return
469}
470
471// -----
472
473module {
474  gpu.module @gpu_funcs {
475    // expected-error @+1 {{custom op 'gpu.func' gpu.func requires named arguments}}
476    gpu.func @kernel_1(f32, f32) {
477    ^bb0(%arg0: f32):
478      gpu.return
479    }
480  }
481}
482
483// -----
484
485module {
486  gpu.module @gpu_funcs {
487    // expected-error @+1 {{attribute 'function_type' failed to satisfy constraint: type attribute of function type}}
488    "gpu.func"() ({
489      gpu.return
490    }) {sym_name="kernel_1", function_type=f32} : () -> ()
491  }
492}
493
494// -----
495
496module {
497  gpu.module @gpu_funcs {
498    // expected-error @below {{'gpu.func' op expected memref type in attribution}}
499    gpu.func @kernel() workgroup(%0: i32) {
500      gpu.return
501    }
502  }
503}
504
505// -----
506
507module {
508  gpu.module @gpu_funcs {
509    // expected-error @below {{'gpu.func' op expected memory space workgroup in attribution}}
510    gpu.func @kernel() workgroup(%0: memref<4xf32, #gpu.address_space<private>>) {
511      gpu.return
512    }
513  }
514}
515
516// -----
517
518module {
519  gpu.module @gpu_funcs {
520    // expected-error @below {{'gpu.func' op expected memory space private in attribution}}
521    gpu.func @kernel() private(%0: memref<4xf32, #gpu.address_space<workgroup>>) {
522      gpu.return
523    }
524  }
525}
526
527// -----
528
529module {
530  gpu.module @gpu_funcs {
531    // expected-note @+1 {{return type declared here}}
532    gpu.func @kernel() {
533      %0 = arith.constant 0 : index
534      // expected-error @+1 {{'gpu.return' op expected 0 result operands}}
535      gpu.return %0 : index
536    }
537  }
538}
539
540// -----
541
542module {
543  gpu.module @gpu_funcs {
544    // expected-error @+1 {{'gpu.func' op expected void return type for kernel function}}
545    gpu.func @kernel() -> index kernel {
546      %0 = arith.constant 0 : index
547      gpu.return
548    }
549  }
550}
551
552// -----
553
554module {
555  gpu.module @gpu_funcs {
556    // expected-error @+1 {{'gpu.func' op expected at least 5 arguments to body region}}
557    "gpu.func"() ({
558    ^bb0(%arg0: f32, %arg1: memref<?xf32>, %arg2: memref<5xf32, 3>, %arg3: memref<5xf32, 5>):
559      "gpu.return"() : () -> ()
560    } ) {function_type = (f32, memref<?xf32>) -> (), gpu.kernel, sym_name = "kernel_1", workgroup_attributions = 3: i64} : () -> ()
561  }
562}
563
564// -----
565
566module {
567  gpu.module @gpu_funcs {
568    // expected-error @+1 {{expected body with at least one block}}
569    "gpu.func"() ({}) {function_type = () -> (), gpu.kernel, sym_name = "kernel"} : () -> ()
570  }
571}
572
573// -----
574
575func.func @sync_wait_with_result() {
576  // expected-error @+1 {{cannot name an operation with no results}}
577  %t = gpu.wait
578}
579
580// -----
581
582func.func @async_wait_without_result() {
583  // expected-error @+1 {{custom op 'gpu.wait' needs to be named when marked 'async'}}
584  gpu.wait async
585}
586
587// -----
588
589func.func @memcpy_incompatible_type(%dst : memref<?xf32>, %src : memref<?xi32>) {
590  // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible element type}}
591  gpu.memcpy %dst, %src  : memref<?xf32>, memref<?xi32>
592}
593
594// -----
595
596func.func @memcpy_incompatible_shape(%dst : memref<7xf32>, %src : memref<9xf32>) {
597  // expected-error @+1 {{'gpu.memcpy' op arguments have incompatible shape}}
598  gpu.memcpy %dst, %src  : memref<7xf32>, memref<9xf32>
599}
600
601// -----
602
603func.func @memset_incompatible_shape(%dst : memref<?xf32>, %value : i32) {
604  // expected-error @+1 {{'gpu.memset' op failed to verify that all of {dst, value} have same element type}}
605  gpu.memset %dst, %value  : memref<?xf32>, i32
606}
607
608// -----
609
610func.func @mmamatrix_invalid_shape(){
611    %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
612    %i = arith.constant 16 : index
613    // expected-error @+1 {{MMAMatrixType must have exactly two dimensions}}
614    %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16x16xf16, "AOp">
615    return
616}
617
618// -----
619
620func.func @mmamatrix_operand_type(){
621    %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
622    %i = arith.constant 16 : index
623    // expected-error @+1 {{operand expected to be one of AOp, BOp or COp}}
624    %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xf16, "EOp">
625    return
626}
627
628// -----
629
630func.func @mmamatrix_invalid_element_type(){
631    %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
632    %i = arith.constant 16 : index
633    // expected-error @+1 {{MMAMatrixType elements must be SI8, UI8, I32, F16, or F32}}
634    %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xbf16, "AOp">
635    return
636}
637
638// -----
639
640#layout_map_col_major = affine_map<(i, j) -> (j, i)>
641
642func.func @mmaLoadOp_identity_layout(){
643    %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, #layout_map_col_major, 3>
644    %i = arith.constant 16 : index
645    // expected-error @+1 {{expected source memref most minor dim must have unit stride}}
646    %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, #layout_map_col_major, 3> -> !gpu.mma_matrix<16x16xf16, "AOp">
647    return
648}
649
650// -----
651
652func.func @mma_invalid_memref_type(%src: memref<32x4xvector<4x8xf32>>, %i: index) {
653    // expected-error @+1 {{operand #0 must be memref of 8-bit signless integer or 32-bit signless integer or 16-bit float or 32-bit float or vector of 8-bit signless integer or 32-bit signless integer or 16-bit float or 32-bit float values of ranks 1 values}}
654    %0 = gpu.subgroup_mma_load_matrix %src[%i, %i] {leadDimension = 4 : index} : memref<32x4xvector<4x8xf32>> -> !gpu.mma_matrix<16x16xf16, "AOp">
655    return
656}
657
658// -----
659
660#layout_map_col_major = affine_map<(i, j) -> (j, i)>
661
662func.func @wmmaStoreOp_invalid_map(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
663    %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, #layout_map_col_major, 3>
664    %i = arith.constant 16 : index
665    %j = arith.constant 16 : index
666    // expected-error @+1 {{expected destination memref most minor dim must have unit stride}}
667    gpu.subgroup_mma_store_matrix %arg0, %sg[%i,%j] {leadDimension= 32 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<32x32xf16,#layout_map_col_major, 3>
668    return
669}
670
671// -----
672
673func.func @wmmaStoreOp_invalid_store_operand(%arg0 : !gpu.mma_matrix<16x16xf16, "AOp">) -> () {
674    %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, 3>
675    %i = arith.constant 16 : index
676    %j = arith.constant 16 : index
677    // expected-error @+1 {{expected the operand matrix being stored to have 'COp' operand type}}
678    gpu.subgroup_mma_store_matrix %arg0, %sg[%i,%j] {leadDimension= 32 : index} : !gpu.mma_matrix<16x16xf16, "AOp">, memref<32x32xf16, 3>
679    return
680}
681
682// -----
683
684func.func @wmmaMmaOp_invalid_operand_order(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
685    // expected-error @+1 {{operands must be in the order AOp, BOp, COp}}
686    %D = gpu.subgroup_mma_compute %B, %A, %C : !gpu.mma_matrix<16x16xf16, "BOp">, !gpu.mma_matrix<16x16xf16, "AOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
687    return
688}
689
690// -----
691
692func.func @wmmaMmaOp_invalid_operand_shapes(%A : !gpu.mma_matrix<16x32xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
693    // expected-error @+1 {{operand shapes do not satisfy matmul constraints}}
694    %D = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x32xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
695    return
696}
697
698// -----
699
700// Number of symbol operand count less than memref symbol count.
701func.func @alloc() {
702   // expected-error@+1 {{symbol operand count does not equal memref symbol count}}
703   %1 = gpu.alloc() : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
704   return
705}
706
707// -----
708
709// Number of symbol operand count greater than memref symbol count.
710func.func @alloc() {
711   %0 = arith.constant 7 : index
712   // expected-error@+1 {{symbol operand count does not equal memref symbol count}}
713   %1 = gpu.alloc()[%0] : memref<2x4xf32, 1>
714   return
715}
716
717// -----
718
719// Number of dynamic dimension operand count greater than memref dynamic dimension count.
720func.func @alloc() {
721   %0 = arith.constant 7 : index
722   // expected-error@+1 {{dimension operand count does not equal memref dynamic dimension count}}
723   %1 = gpu.alloc(%0, %0) : memref<2x?xf32, 1>
724   return
725}
726
727// -----
728
729// Number of dynamic dimension operand count less than memref dynamic dimension count.
730func.func @alloc() {
731   %0 = arith.constant 7 : index
732   // expected-error@+1 {{dimension operand count does not equal memref dynamic dimension count}}
733   %1 = gpu.alloc(%0) : memref<2x?x?xf32, 1>
734   return
735}
736
737// -----
738
739module attributes {gpu.container_module} {
740  // expected-error@+1 {{'func.func' op gpu.known_block_size must be a dense i32 array}}
741  func.func @kernel() attributes {gpu.known_block_size = 32 : i32} {
742    func.return
743  }
744}
745
746// -----
747
748module attributes {gpu.container_module} {
749  gpu.module @kernel {
750    // expected-error@+1 {{'gpu.func' op attribute 'known_block_size' failed to satisfy constraint: i32 dense array attribute with 3 elements (if present)}}
751    gpu.func @kernel() kernel attributes {known_block_size = array<i32: 2, 1>} {
752      gpu.return
753    }
754  }
755}
756
757// -----
758
759module {
760  // expected-error@+1 {{'func.func' op gpu.known_block_size must contain exactly 3 elements}}
761  func.func @kernel() attributes {gpu.known_block_size = array<i32: 2, 1>} {
762    func.return
763  }
764}
765
766// -----
767
768module {
769  // expected-error @+1 {{'gpu.module' op attribute 'targets' failed to satisfy constraint: array of GPU target attributes with at least 1 elements}}
770  gpu.module @gpu_funcs [] {
771  }
772}
773
774// -----
775
776module {
777  // expected-error @+1 {{'gpu.module' op attribute 'targets' failed to satisfy constraint: array of GPU target attributes with at least 1 elements}}
778  gpu.module @gpu_funcs [1] {
779  }
780}
781
782// -----
783
784module {
785  // expected-error @+1 {{'gpu.binary' op attribute 'objects' failed to satisfy constraint: an array of GPU object attributes with at least 1 elements}}
786  gpu.binary @binary []
787}
788
789// -----
790
791module {
792  // expected-error @+1 {{'gpu.binary' op attribute 'offloadingHandler' failed to satisfy constraint: any attribute with the `OffloadingTranslationAttrTrait` trait.}}
793  gpu.binary @binary <1> [#gpu.object<#nvvm.target, "">]
794}
795
796// -----
797
798func.func @main() {
799  %shmemSize = arith.constant 10000 : i32
800  %c1 = arith.constant 1 : index
801  gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
802             threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
803             dynamic_shared_memory_size %shmemSize
804  {
805    // expected-error @below {{'gpu.dynamic_shared_memory' op address space must be address_space<workgroup>}}
806    %0 = gpu.dynamic_shared_memory : memref<?xi8>
807    gpu.terminator
808  }
809  return
810}
811
812
813// -----
814
815func.func @main() {
816  %shmemSize = arith.constant 8192 : i32
817  %c1 = arith.constant 1 : index
818  gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
819             threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
820             dynamic_shared_memory_size %shmemSize
821  {
822    // expected-error @below {{'gpu.dynamic_shared_memory' op result memref type must be memref<?xi8, #gpu.address_space<workgroup>>}}
823    %0 = gpu.dynamic_shared_memory : memref<1xi8, #gpu.address_space<workgroup>>
824    gpu.terminator
825  }
826  return
827}
828
829// -----
830
831func.func @main(%arg0 : index) {
832  %shmemSize = arith.constant 8192 : i32
833  %c1 = arith.constant 1 : index
834  gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
835             threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
836             dynamic_shared_memory_size %shmemSize
837  {
838    // expected-error @below {{'gpu.dynamic_shared_memory' op address space must be address_space<workgroup>}}
839    %0 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<private>>
840    gpu.terminator
841  }
842  return
843}
844
845// -----
846
847func.func @main(%arg0 : index) {
848  %shmemSize = arith.constant 8192 : i32
849  %c1 = arith.constant 1 : index
850  gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
851             threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
852             dynamic_shared_memory_size %shmemSize
853  {
854    // expected-error @below {{'gpu.dynamic_shared_memory' op result #0 must be 1D memref of 8-bit signless integer values, but got 'memref<?xf32, #gpu.address_space<workgroup>}}
855    %0 = gpu.dynamic_shared_memory : memref<?xf32, #gpu.address_space<workgroup>>
856    gpu.terminator
857  }
858  return
859}
860
861// -----
862
863module attributes {gpu.container_module} {
864  // expected-error@+1 {{expected attribute value}}
865  gpu.module @kernel <> {
866  }
867}
868
869// -----
870
871gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">,
872  // expected-error@+1{{expected all kernels to be uniquely named}}
873    kernels = #gpu.kernel_table<[
874      #gpu.kernel_metadata<"kernel", (i32) -> ()>,
875      #gpu.kernel_metadata<"kernel", (i32, f32) -> (), metadata = {sgpr_count = 255}>
876  // expected-error@below{{failed to parse GPU_ObjectAttr parameter 'kernels' which is to be a `KernelTableAttr`}}
877    ]>,
878    bin = "BLOB">
879  ]
880
881// -----
882
883func.func @warp_wrong_num_outputs(%laneid: index) {
884  // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op expected same number of yield operands and return values.}}
885  %2 = gpu.warp_execute_on_lane_0(%laneid)[64] -> (vector<4xi32>) {
886  }
887  return
888}
889
890// -----
891
892func.func @warp_wrong_num_inputs(%laneid: index) {
893  // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op expected same number op arguments and block arguments.}}
894  gpu.warp_execute_on_lane_0(%laneid)[64] {
895  ^bb0(%arg0 : vector<128xi32>) :
896  }
897  return
898}
899
900// -----
901
902func.func @warp_wrong_return_distribution(%laneid: index) {
903  // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op incompatible distribution dimensions from 'vector<128xi32>' to 'vector<4xi32>'}}
904  %2 = gpu.warp_execute_on_lane_0(%laneid)[64] -> (vector<4xi32>) {
905    %0 = arith.constant dense<2>: vector<128xi32>
906    gpu.yield %0 : vector<128xi32>
907  }
908  return
909}
910
911
912// -----
913
914func.func @warp_wrong_arg_distribution(%laneid: index, %v0 : vector<4xi32>) {
915  // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op incompatible distribution dimensions from 'vector<128xi32>' to 'vector<4xi32>'}}
916  gpu.warp_execute_on_lane_0(%laneid)[64]
917  args(%v0 : vector<4xi32>) {
918   ^bb0(%arg0 : vector<128xi32>) :
919  }
920  return
921}
922
923// -----
924
925func.func @warp_2_distributed_dims(%laneid: index) {
926  // expected-error@+1 {{incompatible distribution dimensions from 'vector<128x128xi32>' to 'vector<4x4xi32>' with warp size = 32}}
927  %2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<4x4xi32>) {
928    %0 = arith.constant dense<2>: vector<128x128xi32>
929    gpu.yield %0 : vector<128x128xi32>
930  }
931  return
932}
933
934// -----
935
936func.func @warp_2_distributed_dims(%laneid: index) {
937  // expected-error@+1 {{expected expanded vector dimension #1 (8) to be a multipler of the distributed vector dimension (3)}}
938  %2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<1x3xi32>) {
939    %0 = arith.constant dense<2>: vector<4x8xi32>
940    gpu.yield %0 : vector<4x8xi32>
941  }
942  return
943}
944
945// -----
946
947func.func @warp_mismatch_rank(%laneid: index) {
948  // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op expected distributed vectors to have same rank and element type.}}
949  %2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<4x4xi32>) {
950    %0 = arith.constant dense<2>: vector<128xi32>
951    gpu.yield %0 : vector<128xi32>
952  }
953  return
954}
955
956// -----
957
958func.func @warp_mismatch_rank(%laneid: index) {
959  // expected-error@+1 {{'gpu.warp_execute_on_lane_0' op expected vector type for distributed operands.}}
960  %2 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (i32) {
961    %0 = arith.constant dense<2>: vector<128xi32>
962    gpu.yield %0 : vector<128xi32>
963  }
964  return
965}
966