xref: /llvm-project/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir (revision 8e12f31be5a98a66700dd3571e4e12465f05ad61)
1// RUN: mlir-opt %s --gpu-to-llvm -split-input-file | FileCheck %s
2
3module attributes {gpu.container_module} {
4  // CHECK: gpu.module
5  gpu.module @kernel_module [#nvvm.target] {
6    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
7        %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
8        %arg5: i64) attributes {gpu.kernel} {
9      llvm.return
10    }
11  }
12
13  func.func @foo(%buffer: memref<?xf32>) {
14  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
15  // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
16  // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
17    %c8 = arith.constant 8 : index
18    %c32 = arith.constant 32 : i32
19    %c256 = arith.constant 256 : i32
20
21  // CHECK: gpu.launch_func @kernel_module::@kernel
22  // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
23  // CHECK: dynamic_shared_memory_size [[C256]]
24  // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
25    gpu.launch_func @kernel_module::@kernel
26        blocks in (%c8, %c8, %c8)
27        threads in (%c8, %c8, %c8)
28        dynamic_shared_memory_size %c256
29        args(%c32 : i32, %buffer : memref<?xf32>)
30    return
31  }
32}
33
34
35// -----
36
37module attributes {gpu.container_module} {
38  // CHECK: gpu.module
39  gpu.module @kernel_module [#nvvm.target] {
40    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
41        %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
42        %arg5: i64) attributes {gpu.kernel} {
43      llvm.return
44    }
45  }
46
47  func.func @foo(%buffer: memref<?xf32>) {
48  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
49  // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
50  // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
51  // CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64
52    %c8 = arith.constant 8 : index
53    %c32 = arith.constant 32 : i32
54    %c256 = arith.constant 256 : i32
55    %c2 = arith.constant 2 : index
56
57  // CHECK: gpu.launch_func @kernel_module::@kernel
58  // CHECK: clusters in ([[C2]], [[C2]], [[C2]])
59  // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
60  // CHECK: dynamic_shared_memory_size [[C256]]
61  // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
62    gpu.launch_func @kernel_module::@kernel
63        clusters in (%c2, %c2, %c2)
64        blocks in (%c8, %c8, %c8)
65        threads in (%c8, %c8, %c8)
66        dynamic_shared_memory_size %c256
67        args(%c32 : i32, %buffer : memref<?xf32>)
68    return
69  }
70}
71
72// -----
73
74module attributes {gpu.container_module} {
75  // CHECK: gpu.binary
76  gpu.binary @kernel_module [#gpu.object<#rocdl.target, "blob">]
77
78  func.func @foo(%buffer: memref<?xf32>) {
79  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
80  // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
81  // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
82  // CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64
83    %c8 = arith.constant 8 : index
84    %c32 = arith.constant 32 : i32
85    %c256 = arith.constant 256 : i32
86    %c2 = arith.constant 2 : index
87
88  // CHECK: gpu.launch_func @kernel_module::@kernel
89  // CHECK: clusters in ([[C2]], [[C2]], [[C2]])
90  // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
91  // CHECK: dynamic_shared_memory_size [[C256]]
92  // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
93    gpu.launch_func @kernel_module::@kernel
94        clusters in (%c2, %c2, %c2)
95        blocks in (%c8, %c8, %c8)
96        threads in (%c8, %c8, %c8)
97        dynamic_shared_memory_size %c256
98        args(%c32 : i32, %buffer : memref<?xf32>)
99    return
100  }
101}
102