1// RUN: mlir-opt %s --gpu-to-llvm -split-input-file | FileCheck %s 2 3module attributes {gpu.container_module} { 4 // CHECK: gpu.module 5 gpu.module @kernel_module [#nvvm.target] { 6 llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, 7 %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, 8 %arg5: i64) attributes {gpu.kernel} { 9 llvm.return 10 } 11 } 12 13 func.func @foo(%buffer: memref<?xf32>) { 14 // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64 15 // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32 16 // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32 17 %c8 = arith.constant 8 : index 18 %c32 = arith.constant 32 : i32 19 %c256 = arith.constant 256 : i32 20 21 // CHECK: gpu.launch_func @kernel_module::@kernel 22 // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64 23 // CHECK: dynamic_shared_memory_size [[C256]] 24 // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64) 25 gpu.launch_func @kernel_module::@kernel 26 blocks in (%c8, %c8, %c8) 27 threads in (%c8, %c8, %c8) 28 dynamic_shared_memory_size %c256 29 args(%c32 : i32, %buffer : memref<?xf32>) 30 return 31 } 32} 33 34 35// ----- 36 37module attributes {gpu.container_module} { 38 // CHECK: gpu.module 39 gpu.module @kernel_module [#nvvm.target] { 40 llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr, 41 %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, 42 %arg5: i64) attributes {gpu.kernel} { 43 llvm.return 44 } 45 } 46 47 func.func @foo(%buffer: memref<?xf32>) { 48 // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64 49 // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32 50 // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32 51 // CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64 52 %c8 = arith.constant 8 : index 53 %c32 = arith.constant 32 : i32 54 %c256 = arith.constant 256 : i32 55 %c2 = arith.constant 2 : index 56 57 // CHECK: gpu.launch_func @kernel_module::@kernel 58 // CHECK: clusters in ([[C2]], [[C2]], [[C2]]) 59 // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64 60 // CHECK: dynamic_shared_memory_size [[C256]] 61 // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64) 62 gpu.launch_func @kernel_module::@kernel 63 clusters in (%c2, %c2, %c2) 64 blocks in (%c8, %c8, %c8) 65 threads in (%c8, %c8, %c8) 66 dynamic_shared_memory_size %c256 67 args(%c32 : i32, %buffer : memref<?xf32>) 68 return 69 } 70} 71 72// ----- 73 74module attributes {gpu.container_module} { 75 // CHECK: gpu.binary 76 gpu.binary @kernel_module [#gpu.object<#rocdl.target, "blob">] 77 78 func.func @foo(%buffer: memref<?xf32>) { 79 // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64 80 // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32 81 // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32 82 // CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64 83 %c8 = arith.constant 8 : index 84 %c32 = arith.constant 32 : i32 85 %c256 = arith.constant 256 : i32 86 %c2 = arith.constant 2 : index 87 88 // CHECK: gpu.launch_func @kernel_module::@kernel 89 // CHECK: clusters in ([[C2]], [[C2]], [[C2]]) 90 // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64 91 // CHECK: dynamic_shared_memory_size [[C256]] 92 // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64) 93 gpu.launch_func @kernel_module::@kernel 94 clusters in (%c2, %c2, %c2) 95 blocks in (%c8, %c8, %c8) 96 threads in (%c8, %c8, %c8) 97 dynamic_shared_memory_size %c256 98 args(%c32 : i32, %buffer : memref<?xf32>) 99 return 100 } 101} 102