xref: /llvm-project/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-gemm-lib.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1// NOTE: this test requires gpu-sm80
2//
3// DEFINE: %{compile} = mlir-opt %s \
4// DEFINE:   --sparsifier="enable-gpu-libgen gpu-triple=nvptx64-nvidia-cuda gpu-chip=sm_80 gpu-features=+ptx71 gpu-format=%gpu_compilation_format
5// DEFINE: %{run} = mlir-runner \
6// DEFINE:   --shared-libs=%mlir_cuda_runtime \
7// DEFINE:   --shared-libs=%mlir_c_runner_utils \
8// DEFINE:   --e main --entry-point-result=void \
9// DEFINE: | FileCheck %s
10//
11// with RT lib:
12//
13// RUN: %{compile} enable-runtime-library=true"  | %{run}
14//
15// without RT lib:
16//
17// RUN: %{compile} enable-runtime-library=false" | %{run}
18
19#CSR = #sparse_tensor.encoding<{
20  map = (d0, d1) -> (d0 : dense, d1 : compressed),
21  posWidth = 32,
22  crdWidth = 32
23}>
24
25module {
26  llvm.func @mgpuCreateSparseEnv()
27  llvm.func @mgpuDestroySparseEnv()
28
29  // Computes C = A x B with A,B,C sparse CSR.
30  func.func @matmulCSR(%A: tensor<8x8xf32, #CSR>,
31                       %B: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> {
32    %init = tensor.empty() : tensor<8x8xf32, #CSR>
33    %C = linalg.matmul
34      ins(%A, %B: tensor<8x8xf32, #CSR>,
35                  tensor<8x8xf32, #CSR>)
36      outs(%init: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR>
37    return %C: tensor<8x8xf32, #CSR>
38  }
39
40  //
41  // Main driver.
42  //
43  func.func @main() {
44    llvm.call @mgpuCreateSparseEnv(): () -> ()
45
46    %c0 = arith.constant 0 : index
47    %f0 = arith.constant 0.0 : f32
48
49    %t = arith.constant dense<[
50       [ 1.0,  0.0,  2.0,  0.0,  0.0,  0.0,  0.0,  3.0],
51       [ 0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0],
52       [ 0.0,  0.0,  4.0,  0.0,  0.0,  0.0,  0.0,  0.0],
53       [ 0.0,  0.0,  0.0,  5.0,  0.0,  0.0,  0.0,  0.0],
54       [ 0.0,  0.0,  0.0,  0.0,  6.0,  0.0,  0.0,  0.0],
55       [ 0.0,  7.0,  8.0,  0.0,  0.0,  0.0,  0.0,  9.0],
56       [ 0.0,  0.0, 10.0,  0.0,  0.0,  0.0, 11.0, 12.0],
57       [ 0.0, 13.0, 14.0,  0.0,  0.0,  0.0, 15.0, 16.0]
58    ]> : tensor<8x8xf32>
59    %Acsr = sparse_tensor.convert %t : tensor<8x8xf32> to tensor<8x8xf32, #CSR>
60
61    %Ccsr = call @matmulCSR(%Acsr, %Acsr) : (tensor<8x8xf32, #CSR>,
62                                             tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR>
63
64    //
65    // Verify computed result.
66    //
67    // CHECK:      ---- Sparse Tensor ----
68    // CHECK-NEXT: nse = 20
69    // CHECK-NEXT: dim = ( 8, 8 )
70    // CHECK-NEXT: lvl = ( 8, 8 )
71    // CHECK-NEXT: pos[1] : ( 0, 5, 5, 6, 7, 8, 12, 16, 20 )
72    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 6, 7, 2, 3, 4, 1, 2, 6, 7, 1, 2, 6, 7, 1, 2, 6, 7 )
73    // CHECK-NEXT: values : ( 1, 39, 52, 45, 51, 16, 25, 36, 117, 158, 135, 144, 156, 318, 301, 324, 208, 430, 405, 436 )
74    // CHECK-NEXT: ----
75    sparse_tensor.print %Ccsr : tensor<8x8xf32, #CSR>
76
77    llvm.call @mgpuDestroySparseEnv(): () -> ()
78    return
79  }
80}
81