xref: /llvm-project/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir (revision dbd4a0dd38eb03df4f7d55c780b3dd6cb15a270d)
1// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
2
3module attributes {gpu.container_module} {
4
5  // CHECK-LABEL: func @matvec
6  // CHECK: llvm.call @mgpuStreamCreate
7  // CHECK: llvm.call @mgpuMemAlloc
8  // CHECK: llvm.call @mgpuMemAlloc
9  // CHECK: llvm.call @mgpuCreateCoo
10  // CHECK: llvm.call @mgpuCreateDnVec
11  // CHECK: llvm.call @mgpuSpMVBufferSize
12  // CHECK: llvm.call @mgpuSpMV
13  // CHECK: llvm.call @mgpuDestroySpMat
14  // CHECK: llvm.call @mgpuDestroyDnVec
15  // CHECK: llvm.call @mgpuStreamSynchronize
16  // CHECK: llvm.call @mgpuStreamDestroy
17  func.func @matvec(%arg0: index) {
18    %token0 = gpu.wait async
19    %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
20    %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
21    %spmat, %token4 = gpu.create_coo async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
22    %dnvec, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0 : index into memref<?xf64>
23    %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %spmat, %dnvec, %dnvec  into f64
24    %token7 = gpu.spmv async [%token6] %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
25    %token8 = gpu.destroy_sp_mat async [%token7] %spmat
26    %token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
27    gpu.wait [%token9]
28    return
29  }
30
31  // CHECK-LABEL: func @matmul
32  // CHECK: llvm.call @mgpuStreamCreate
33  // CHECK: llvm.call @mgpuMemAlloc
34  // CHECK: llvm.call @mgpuMemAlloc
35  // CHECK: llvm.call @mgpuCreateCsr
36  // CHECK: llvm.call @mgpuCreateDnMat
37  // CHECK: llvm.call @mgpuSpMMBufferSize
38  // CHECK: llvm.call @mgpuSpMM
39  // CHECK: llvm.call @mgpuDestroySpMat
40  // CHECK: llvm.call @mgpuDestroyDnMat
41  // CHECK: llvm.call @mgpuStreamSynchronize
42  // CHECK: llvm.call @mgpuStreamDestroy
43  func.func @matmul(%arg0: index) {
44    %token0 = gpu.wait async
45    %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
46    %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
47    %spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
48    %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
49    %bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %spmat, %dnmat, %dnmat : index into f64
50    %token7 = gpu.spmm async [%token6] %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
51    %token8 = gpu.destroy_sp_mat async [%token7] %spmat
52    %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
53    gpu.wait [%token9]
54    return
55  }
56
57  // CHECK-LABEL: func @spgemm
58  // CHECK: llvm.call @mgpuStreamCreate
59  // CHECK: llvm.call @mgpuMemAlloc
60  // CHECK: llvm.call @mgpuMemAlloc
61  // CHECK: llvm.call @mgpuCreateCsr
62  // CHECK: llvm.call @mgpuCreateCsr
63  // CHECK: llvm.call @mgpuCreateCsr
64  // CHECK: llvm.call @mgpuSpGEMMCreateDescr
65  // CHECK: llvm.call @mgpuSpGEMMWorkEstimation
66  // CHECK: llvm.call @mgpuSpGEMMCompute
67  // CHECK: llvm.call @mgpuSpMatGetSize
68  // CHECK: llvm.call @mgpuSetCsrPointers
69  // CHECK: llvm.call @mgpuSpGEMMCopy
70  // CHECK: llvm.call @mgpuSpGEMMDestroyDescr
71  // CHECK: llvm.call @mgpuDestroySpMat
72  // CHECK: llvm.call @mgpuDestroySpMat
73  // CHECK: llvm.call @mgpuDestroySpMat
74  // CHECK: llvm.call @mgpuStreamSynchronize
75  // CHECK: llvm.call @mgpuStreamDestroy
76  func.func @spgemm(%arg0: index) {
77    %token0 = gpu.wait async
78    %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
79    %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf32>
80    %spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
81    %spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
82    %spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
83    %spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5]
84    %alloc = memref.alloc() : memref<0xi8>  // nullptr
85    %c0 = arith.constant 0 : index
86    %bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async
87                            [%token6]{WORK_ESTIMATION}
88                            %spmatA, %spmatB, %spmatC,
89                            %spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
90    %bufferSz2, %token8 = gpu.spgemm_work_estimation_or_compute async
91                               [%token7]{COMPUTE}
92                               %spmatA, %spmatB, %spmatC,
93                               %spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
94    %rows, %cols, %nnz, %token9 = gpu.spmat_get_size async [%token8] %spmatC
95    %token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
96    %token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
97    %token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
98    %token13 = gpu.destroy_sp_mat async [%token12] %spmatA
99    %token14 = gpu.destroy_sp_mat async [%token13] %spmatB
100    %token15 = gpu.destroy_sp_mat async [%token14] %spmatC
101    gpu.wait [%token15]
102    return
103  }
104
105  // CHECK-LABEL: func @sddmm
106  // CHECK: llvm.call @mgpuStreamCreate
107  // CHECK: llvm.call @mgpuMemAlloc
108  // CHECK: llvm.call @mgpuMemAlloc
109  // CHECK: llvm.call @mgpuCreateCsr
110  // CHECK: llvm.call @mgpuCreateDnMat
111  // CHECK: llvm.call @mgpuSDDMMBufferSize
112  // CHECK: llvm.call @mgpuSDDMM
113  // CHECK: llvm.call @mgpuDestroySpMat
114  // CHECK: llvm.call @mgpuDestroyDnMat
115  // CHECK: llvm.call @mgpuStreamSynchronize
116  // CHECK: llvm.call @mgpuStreamDestroy
117  func.func @sddmm(%arg0: index) {
118    %token0 = gpu.wait async
119    %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
120    %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
121    %spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
122    %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
123    %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %dnmat, %dnmat, %spmat into f64
124    %token7 = gpu.sddmm async [%token6]  %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
125    %token8 = gpu.destroy_sp_mat async [%token7] %spmat
126    %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
127    gpu.wait [%token9]
128    return
129  }
130
131  // CHECK-LABEL: func @csc_and_bsr
132  // CHECK: llvm.call @mgpuStreamCreate
133  // CHECK: llvm.call @mgpuMemAlloc
134  // CHECK: llvm.call @mgpuMemAlloc
135  // CHECK: llvm.call @mgpuCreateCsc
136  // CHECK: llvm.call @mgpuCreateBsr
137  // CHECK: llvm.call @mgpuDestroySpMat
138  // CHECK: llvm.call @mgpuDestroySpMat
139  // CHECK: llvm.call @mgpuStreamSynchronize
140  // CHECK: llvm.call @mgpuStreamDestroy
141  func.func @csc_and_bsr(%arg0: index) {
142    %token0 = gpu.wait async
143    %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
144    %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
145    %csc, %token3 = gpu.create_csc async [%token2]
146      %arg0, %arg0, %arg0, %mem1, %mem1, %mem2
147      : memref<?xindex>, memref<?xindex>, memref<?xf64>
148    %bsr, %token4 = gpu.create_bsr async [%token3]
149      %arg0, %arg0, %arg0, %arg0, %arg0, %mem1, %mem1, %mem2
150      : memref<?xindex>, memref<?xindex>, memref<?xf64>
151    %token5 = gpu.destroy_sp_mat async [%token4] %csc
152    %token6 = gpu.destroy_sp_mat async [%token5] %bsr
153    gpu.wait [%token6]
154    return
155  }
156}
157