xref: /llvm-project/mlir/test/Dialect/GPU/dynamic-shared-memory.mlir (revision 4bce270157f9a81bd7e38dc589a2970a445d1e96)
1// RUN: mlir-opt %s -convert-gpu-to-nvvm -cse -canonicalize | FileCheck %s
2
3gpu.module @modules {
4  // CHECK: llvm.mlir.global internal @__dynamic_shmem__3() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
5  llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8>
6  llvm.mlir.global internal @__dynamic_shmem__1() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8>
7  llvm.mlir.global internal @__dynamic_shmem__2() {alignment = 16 : i64} : !llvm.array<0 x i8>
8  // CHECK-LABEL: llvm.func @dynamic_shared_memory_kernel(
9  // CHECK-SAME: %[[arg0:.+]]: i64)
10  gpu.func @dynamic_shared_memory_kernel(%d : index) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 1, 1, 1>} {
11    %c1 = arith.constant 1 : index
12    %c8192 = arith.constant 8192 : index
13    %c16384 = arith.constant 16384 : index
14    %shmem = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
15    %shmem2 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
16
17    %0 = memref.view %shmem[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>>
18    "test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
19
20    %1 = memref.view %shmem[%c16384][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>>
21    "test.use.shared.memory"(%1) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
22
23// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
24// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
25// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
26// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64
27// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64
28// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3>
29//     CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
30//     CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
31//     CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
32//     CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
33//     CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
34//     CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
35//     CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
36//     CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
37//     CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %[[S13]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
38//     CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
39//     CHECK: %[[S15:.+]] = llvm.getelementptr %[[S5]][16384] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
40//     CHECK: %[[S16:.+]] = llvm.insertvalue %[[S15]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
41//     CHECK: %[[S17:.+]] = llvm.insertvalue %[[S4]], %[[S16]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
42//     CHECK: %[[S18:.+]] = llvm.insertvalue %[[S1]], %[[S17]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
43//     CHECK: %[[S19:.+]] = llvm.insertvalue %[[S3]], %[[S18]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
44//     CHECK: %[[S20:.+]] = llvm.insertvalue %[[S0]], %[[S19]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
45//     CHECK: %[[S21:.+]] = llvm.insertvalue %[[S1]], %[[S20]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
46//     CHECK: %[[S22:.+]] = builtin.unrealized_conversion_cast %[[S21]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
47//     CHECK: "test.use.shared.memory"(%[[S22]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
48    gpu.return
49  }
50
51// CHECK-LABEL: llvm.func @gpu_device_function
52  gpu.func @gpu_device_function()  {
53    %c8192 = arith.constant 8192 : index
54    %shmem = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
55    %0 = memref.view %shmem[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>>
56    "test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
57// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
58// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
59// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
60// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64
61// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64
62// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3>
63//     CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
64//     CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
65//     CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
66//     CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
67//     CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
68//     CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
69//     CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
70//     CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
71//     CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
72//     CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
73
74    gpu.return
75  }
76
77// CHECK-LABEL: llvm.func @func_device_function
78  func.func @func_device_function()  {
79    %c8192 = arith.constant 8192 : index
80    %shmem = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
81    %0 = memref.view %shmem[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>>
82    "test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
83// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64
84// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64
85// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
86// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64
87// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64
88// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3>
89//     CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
90//     CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8
91//     CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
92//     CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
93//     CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
94//     CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
95//     CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
96//     CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
97//     CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>>
98//     CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> ()
99
100    func.return
101  }
102}
103