xref: /llvm-project/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir (revision 641e05decc64c541aa5fd5ce253020178ffe094a)
1// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=CHECK,ROCDL
2// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=CHECK,NVVM
3
4gpu.module @kernel {
5  gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, #gpu.address_space<private>>) {
6    %c0 = arith.constant 0 : index
7    memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<private>>
8    gpu.return
9  }
10}
11
12// CHECK-LABEL:  llvm.func @private
13//      CHECK:  llvm.store
14// ROCDL-SAME:   : f32, !llvm.ptr<5>
15//  NVVM-SAME:   : f32, !llvm.ptr
16
17
18// -----
19
20gpu.module @kernel {
21  gpu.func @workgroup(%arg0: f32) workgroup(%arg1: memref<4xf32, #gpu.address_space<workgroup>>) {
22    %c0 = arith.constant 0 : index
23    memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<workgroup>>
24    gpu.return
25  }
26}
27
28// CHECK-LABEL:  llvm.func @workgroup
29//       CHECK:  llvm.store
30//  CHECK-SAME:   : f32, !llvm.ptr<3>
31
32// -----
33
34gpu.module @kernel {
35  gpu.func @nested_memref(%arg0: memref<4xmemref<4xf32, #gpu.address_space<global>>, #gpu.address_space<global>>) -> f32 {
36    %c0 = arith.constant 0 : index
37    %inner = memref.load %arg0[%c0] : memref<4xmemref<4xf32, #gpu.address_space<global>>, #gpu.address_space<global>>
38    %value = memref.load %inner[%c0] : memref<4xf32, #gpu.address_space<global>>
39    gpu.return %value : f32
40  }
41}
42
43// CHECK-LABEL:  llvm.func @nested_memref
44//       CHECK:  llvm.load
45//  CHECK-SAME:   : !llvm.ptr<1>
46//       CHECK: [[value:%.+]] = llvm.load
47//  CHECK-SAME:   : !llvm.ptr<1> -> f32
48//       CHECK: llvm.return [[value]]
49
50// -----
51
52gpu.module @kernel {
53  gpu.func @dynamic_shmem_with_vector(%arg1: memref<1xf32>) {
54    %0 = arith.constant 0 : index
55    %1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
56    %2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<1xf32, #gpu.address_space<workgroup>>
57    %3 = vector.load %2[%0] : memref<1xf32, #gpu.address_space<workgroup>>, vector<1xf32>
58    vector.store %3, %arg1[%0] : memref<1xf32>, vector<1xf32>
59    gpu.return
60  }
61}
62
63// ROCDL: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32} : !llvm.array<0 x i8>
64// NVVM: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
65// CHECK-LABEL:  llvm.func @dynamic_shmem_with_vector
66// CHECK: llvm.mlir.addressof @__dynamic_shmem__0 : !llvm.ptr<3>
67// CHECK: llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr<3> -> vector<1xf32>
68// CHECK: llvm.store
69
70// -----
71
72gpu.module @kernel {
73  gpu.func @dynamic_shmem(%arg0: f32)  {
74    %0 = arith.constant 0 : index
75    %1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
76    %2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<4xf32, #gpu.address_space<workgroup>>
77    memref.store %arg0, %2[%0] : memref<4xf32, #gpu.address_space<workgroup>>
78    gpu.return
79  }
80}
81
82// CHECK-LABEL:  llvm.func @dynamic_shmem
83//       CHECK:  llvm.store
84//  CHECK-SAME:   : f32, !llvm.ptr<3>
85
86