1// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s 2 3module attributes {gpu.container_module} { 4 5 // CHECK-LABEL: func @matmul 6 // CHECK: llvm.call @mgpuStreamCreate 7 // CHECK: llvm.call @mgpuMemAlloc 8 // CHECK: llvm.call @mgpuMemAlloc 9 // CHECK: llvm.call @mgpuCusparseLtCreate2To4SpMat 10 // CHECK: llvm.call @mgpuCreateCuSparseLtDnMat 11 // CHECK: llvm.call @mgpuCuSparseLtSpMMBufferSize 12 // CHECK: llvm.call @mgpuCuSparseLtSpMM 13 // CHECK: llvm.call @mgpuDestroyCuSparseLtSpMat 14 // CHECK: llvm.call @mgpuDestroyCuSparseLtDnMat 15 // CHECK: llvm.call @mgpuStreamSynchronize 16 // CHECK: llvm.call @mgpuStreamDestroy 17 func.func @matmul(%arg0: index) { 18 %token0 = gpu.wait async 19 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xf16> 20 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf16> 21 %spmat, %token4 = gpu.create_2to4_spmat async [%token2] {PRUNE_AND_CHECK} %arg0, %arg0, %mem1: memref<?xf16> 22 %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf16> 23 %bufferSz0, %bufferSz1, %bufferSz2, %token6 = gpu.spmm_buffer_size async [%token5] %spmat, %dnmat, %dnmat : index,index,index into f16 24 %token7 = gpu.spmm async [%token6] %spmat, %dnmat, %dnmat, %mem2, %mem2, %mem2 : memref<?xf16>,memref<?xf16>,memref<?xf16> into f16 25 %token8 = gpu.destroy_sp_mat async [%token7] %spmat 26 %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat 27 gpu.wait [%token9] 28 return 29 } 30 31} 32