1// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=CHECK,ROCDL 2// RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=CHECK,NVVM 3 4gpu.module @kernel { 5 gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, #gpu.address_space<private>>) { 6 %c0 = arith.constant 0 : index 7 memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<private>> 8 gpu.return 9 } 10} 11 12// CHECK-LABEL: llvm.func @private 13// CHECK: llvm.store 14// ROCDL-SAME: : f32, !llvm.ptr<5> 15// NVVM-SAME: : f32, !llvm.ptr 16 17 18// ----- 19 20gpu.module @kernel { 21 gpu.func @workgroup(%arg0: f32) workgroup(%arg1: memref<4xf32, #gpu.address_space<workgroup>>) { 22 %c0 = arith.constant 0 : index 23 memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<workgroup>> 24 gpu.return 25 } 26} 27 28// CHECK-LABEL: llvm.func @workgroup 29// CHECK: llvm.store 30// CHECK-SAME: : f32, !llvm.ptr<3> 31 32// ----- 33 34gpu.module @kernel { 35 gpu.func @nested_memref(%arg0: memref<4xmemref<4xf32, #gpu.address_space<global>>, #gpu.address_space<global>>) -> f32 { 36 %c0 = arith.constant 0 : index 37 %inner = memref.load %arg0[%c0] : memref<4xmemref<4xf32, #gpu.address_space<global>>, #gpu.address_space<global>> 38 %value = memref.load %inner[%c0] : memref<4xf32, #gpu.address_space<global>> 39 gpu.return %value : f32 40 } 41} 42 43// CHECK-LABEL: llvm.func @nested_memref 44// CHECK: llvm.load 45// CHECK-SAME: : !llvm.ptr<1> 46// CHECK: [[value:%.+]] = llvm.load 47// CHECK-SAME: : !llvm.ptr<1> -> f32 48// CHECK: llvm.return [[value]] 49 50// ----- 51 52gpu.module @kernel { 53 gpu.func @dynamic_shmem_with_vector(%arg1: memref<1xf32>) { 54 %0 = arith.constant 0 : index 55 %1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>> 56 %2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<1xf32, #gpu.address_space<workgroup>> 57 %3 = vector.load %2[%0] : memref<1xf32, #gpu.address_space<workgroup>>, vector<1xf32> 58 vector.store %3, %arg1[%0] : memref<1xf32>, vector<1xf32> 59 gpu.return 60 } 61} 62 63// ROCDL: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32} : !llvm.array<0 x i8> 64// NVVM: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8> 65// CHECK-LABEL: llvm.func @dynamic_shmem_with_vector 66// CHECK: llvm.mlir.addressof @__dynamic_shmem__0 : !llvm.ptr<3> 67// CHECK: llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr<3> -> vector<1xf32> 68// CHECK: llvm.store 69 70// ----- 71 72gpu.module @kernel { 73 gpu.func @dynamic_shmem(%arg0: f32) { 74 %0 = arith.constant 0 : index 75 %1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>> 76 %2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<4xf32, #gpu.address_space<workgroup>> 77 memref.store %arg0, %2[%0] : memref<4xf32, #gpu.address_space<workgroup>> 78 gpu.return 79 } 80} 81 82// CHECK-LABEL: llvm.func @dynamic_shmem 83// CHECK: llvm.store 84// CHECK-SAME: : f32, !llvm.ptr<3> 85 86