1// NOTE: this test requires gpu-sm80 2// 3// DEFINE: %{compile} = mlir-opt %s \ 4// DEFINE: --sparsifier="enable-gpu-libgen gpu-triple=nvptx64-nvidia-cuda gpu-chip=sm_80 gpu-features=+ptx71 gpu-format=%gpu_compilation_format 5// DEFINE: %{run} = mlir-runner \ 6// DEFINE: --shared-libs=%mlir_cuda_runtime \ 7// DEFINE: --shared-libs=%mlir_c_runner_utils \ 8// DEFINE: --e main --entry-point-result=void \ 9// DEFINE: | FileCheck %s 10// 11// with RT lib: 12// 13// RUN: %{compile} enable-runtime-library=true" | %{run} 14// 15// without RT lib: 16// 17// RUN: %{compile} enable-runtime-library=false" | %{run} 18 19#CSR = #sparse_tensor.encoding<{ 20 map = (d0, d1) -> (d0 : dense, d1 : compressed), 21 posWidth = 32, 22 crdWidth = 32 23}> 24 25module { 26 llvm.func @mgpuCreateSparseEnv() 27 llvm.func @mgpuDestroySparseEnv() 28 29 // Computes C = A x B with A,B,C sparse CSR. 30 func.func @matmulCSR(%A: tensor<8x8xf32, #CSR>, 31 %B: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> { 32 %init = tensor.empty() : tensor<8x8xf32, #CSR> 33 %C = linalg.matmul 34 ins(%A, %B: tensor<8x8xf32, #CSR>, 35 tensor<8x8xf32, #CSR>) 36 outs(%init: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> 37 return %C: tensor<8x8xf32, #CSR> 38 } 39 40 // 41 // Main driver. 42 // 43 func.func @main() { 44 llvm.call @mgpuCreateSparseEnv(): () -> () 45 46 %c0 = arith.constant 0 : index 47 %f0 = arith.constant 0.0 : f32 48 49 %t = arith.constant dense<[ 50 [ 1.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 3.0], 51 [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 52 [ 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0], 53 [ 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0], 54 [ 0.0, 0.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0], 55 [ 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 0.0, 9.0], 56 [ 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 11.0, 12.0], 57 [ 0.0, 13.0, 14.0, 0.0, 0.0, 0.0, 15.0, 16.0] 58 ]> : tensor<8x8xf32> 59 %Acsr = sparse_tensor.convert %t : tensor<8x8xf32> to tensor<8x8xf32, #CSR> 60 61 %Ccsr = call @matmulCSR(%Acsr, %Acsr) : (tensor<8x8xf32, #CSR>, 62 tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> 63 64 // 65 // Verify computed result. 66 // 67 // CHECK: ---- Sparse Tensor ---- 68 // CHECK-NEXT: nse = 20 69 // CHECK-NEXT: dim = ( 8, 8 ) 70 // CHECK-NEXT: lvl = ( 8, 8 ) 71 // CHECK-NEXT: pos[1] : ( 0, 5, 5, 6, 7, 8, 12, 16, 20 ) 72 // CHECK-NEXT: crd[1] : ( 0, 1, 2, 6, 7, 2, 3, 4, 1, 2, 6, 7, 1, 2, 6, 7, 1, 2, 6, 7 ) 73 // CHECK-NEXT: values : ( 1, 39, 52, 45, 51, 16, 25, 36, 117, 158, 135, 144, 156, 318, 301, 324, 208, 430, 405, 436 ) 74 // CHECK-NEXT: ---- 75 sparse_tensor.print %Ccsr : tensor<8x8xf32, #CSR> 76 77 llvm.call @mgpuDestroySparseEnv(): () -> () 78 return 79 } 80} 81