1// RUN: mlir-opt %s -convert-gpu-to-nvvm -cse -canonicalize | FileCheck %s 2 3gpu.module @modules { 4 // CHECK: llvm.mlir.global internal @__dynamic_shmem__3() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8> 5 llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8> 6 llvm.mlir.global internal @__dynamic_shmem__1() {addr_space = 3 : i32, alignment = 4 : i64} : !llvm.array<0 x i8> 7 llvm.mlir.global internal @__dynamic_shmem__2() {alignment = 16 : i64} : !llvm.array<0 x i8> 8 // CHECK-LABEL: llvm.func @dynamic_shared_memory_kernel( 9 // CHECK-SAME: %[[arg0:.+]]: i64) 10 gpu.func @dynamic_shared_memory_kernel(%d : index) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 1, 1, 1>} { 11 %c1 = arith.constant 1 : index 12 %c8192 = arith.constant 8192 : index 13 %c16384 = arith.constant 16384 : index 14 %shmem = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>> 15 %shmem2 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>> 16 17 %0 = memref.view %shmem[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>> 18 "test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> () 19 20 %1 = memref.view %shmem[%c16384][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>> 21 "test.use.shared.memory"(%1) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> () 22 23// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 24// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 25// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 26// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64 27// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64 28// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> 29// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 30// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 31// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 32// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 33// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 34// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 35// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 36// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 37// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %[[S13]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>> 38// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> () 39// CHECK: %[[S15:.+]] = llvm.getelementptr %[[S5]][16384] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 40// CHECK: %[[S16:.+]] = llvm.insertvalue %[[S15]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 41// CHECK: %[[S17:.+]] = llvm.insertvalue %[[S4]], %[[S16]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 42// CHECK: %[[S18:.+]] = llvm.insertvalue %[[S1]], %[[S17]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 43// CHECK: %[[S19:.+]] = llvm.insertvalue %[[S3]], %[[S18]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 44// CHECK: %[[S20:.+]] = llvm.insertvalue %[[S0]], %[[S19]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 45// CHECK: %[[S21:.+]] = llvm.insertvalue %[[S1]], %[[S20]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 46// CHECK: %[[S22:.+]] = builtin.unrealized_conversion_cast %[[S21]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>> 47// CHECK: "test.use.shared.memory"(%[[S22]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> () 48 gpu.return 49 } 50 51// CHECK-LABEL: llvm.func @gpu_device_function 52 gpu.func @gpu_device_function() { 53 %c8192 = arith.constant 8192 : index 54 %shmem = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>> 55 %0 = memref.view %shmem[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>> 56 "test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> () 57// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 58// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 59// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 60// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64 61// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64 62// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> 63// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 64// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 65// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 66// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 67// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 68// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 69// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 70// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 71// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>> 72// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> () 73 74 gpu.return 75 } 76 77// CHECK-LABEL: llvm.func @func_device_function 78 func.func @func_device_function() { 79 %c8192 = arith.constant 8192 : index 80 %shmem = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>> 81 %0 = memref.view %shmem[%c8192][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<32x64xf32, #gpu.address_space<workgroup>> 82 "test.use.shared.memory"(%0) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> () 83// CHECK-DAG: %[[S0:.+]] = llvm.mlir.constant(32 : index) : i64 84// CHECK-DAG: %[[S1:.+]] = llvm.mlir.constant(64 : index) : i64 85// CHECK-DAG: %[[S2:.+]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 86// CHECK-DAG: %[[S3:.+]] = llvm.mlir.constant(1 : index) : i64 87// CHECK-DAG: %[[S4:.+]] = llvm.mlir.constant(0 : index) : i64 88// CHECK-DAG: %[[S5:.+]] = llvm.mlir.addressof @__dynamic_shmem__3 : !llvm.ptr<3> 89// CHECK: %[[S6:.+]] = llvm.insertvalue %[[S5]], %[[S2]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 90// CHECK: %[[S7:.+]] = llvm.getelementptr %[[S5]][8192] : (!llvm.ptr<3>) -> !llvm.ptr<3>, i8 91// CHECK: %[[S8:.+]] = llvm.insertvalue %[[S7]], %[[S6]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 92// CHECK: %[[S9:.+]] = llvm.insertvalue %[[S4]], %[[S8]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 93// CHECK: %[[S10:.+]] = llvm.insertvalue %[[S1]], %[[S9]][3, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 94// CHECK: %[[S11:.+]] = llvm.insertvalue %[[S3]], %[[S10]][4, 1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 95// CHECK: %[[S12:.+]] = llvm.insertvalue %[[S0]], %[[S11]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 96// CHECK: %[[S13:.+]] = llvm.insertvalue %[[S1]], %[[S12]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> 97// CHECK: %[[S14:.+]] = builtin.unrealized_conversion_cast %13 : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> to memref<32x64xf32, #gpu.address_space<workgroup>> 98// CHECK: "test.use.shared.memory"(%[[S14]]) : (memref<32x64xf32, #gpu.address_space<workgroup>>) -> () 99 100 func.return 101 } 102} 103