xref: /llvm-project/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1// RUN: mlir-opt %s \
2// RUN:  -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_90a cubin-features=+ptx80 opt-level=3" \
3// RUN:  | mlir-runner \
4// RUN:   --shared-libs=%mlir_cuda_runtime \
5// RUN:   --shared-libs=%mlir_runner_utils \
6// RUN:   --shared-libs=%mlir_c_runner_utils \
7// RUN:   --entry-point-result=void \
8// RUN:  | FileCheck %s
9
10// CHECK: clusterIdx: (1, 1, 0) in Cluster Dimension: (2, 2, 1) blockIdx: (3, 3, 0)
11
12module attributes {gpu.container_module} {
13  func.func @main() {
14    %c1 = arith.constant 1 : index
15    %c2 = arith.constant 2 : index
16    %c4 = arith.constant 4 : index
17    gpu.launch_func  @gpumodule::@kernel_cluster clusters in(%c2,%c2,%c1)  blocks in (%c4, %c4, %c1) threads in (%c1, %c1, %c1)
18    return
19  }
20  gpu.module @gpumodule {
21    gpu.func @kernel_cluster() kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 4, 1>} {
22      %cidX = gpu.cluster_id  x
23      %cidY = gpu.cluster_id  y
24      %cidZ = gpu.cluster_id  z
25      %cdimX = gpu.cluster_dim_blocks  x
26      %cdimY = gpu.cluster_dim_blocks  y
27      %cdimZ = gpu.cluster_dim_blocks  z
28      %bidX = gpu.block_id  x
29      %bidY = gpu.block_id  y
30      %bidZ = gpu.block_id  z
31      %cidX_i32 = index.casts %cidX : index to i32
32      %cidY_i32 = index.casts %cidY : index to i32
33      %cidZ_i32 = index.casts %cidZ : index to i32
34      %cdimX_i32 = index.casts %cdimX : index to i32
35      %cdimY_i32 = index.casts %cdimY : index to i32
36      %cdimZ_i32 = index.casts %cdimZ : index to i32
37      %bidX_i32 = index.casts %bidX : index to i32
38      %bidY_i32 = index.casts %bidY : index to i32
39      %bidZ_i32 = index.casts %bidZ : index to i32
40
41      %c3 = arith.constant 3 : index
42      %cnd1 =  arith.cmpi eq, %bidX, %c3 : index
43      %cnd2 =  arith.cmpi eq, %bidY, %c3 : index
44      scf.if %cnd1 {
45        scf.if %cnd2 {
46          gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n",
47            %cidX_i32,
48            %cidY_i32,
49            %cidZ_i32,
50            %cdimX_i32,
51            %cdimY_i32,
52            %cdimZ_i32,
53            %bidX_i32,
54            %bidY_i32,
55            %bidZ_i32
56            :
57            i32, i32, i32, i32, i32, i32, i32, i32, i32
58        }
59      }
60
61      gpu.return
62    }
63  }
64}
65
66