xref: /llvm-project/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir (revision 06a65ce500a632048db1058de9ca61072004a640)
1// RUN: mlir-opt %s --linalg-generalize-named-ops \
2// RUN:             --pre-sparsification-rewrite \
3// RUN:             --sparse-reinterpret-map \
4// RUN:             --sparsification="parallelization-strategy=dense-outer-loop" \
5// RUN:             --sparse-gpu-codegen | FileCheck %s
6
7#CSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
8
9//
10// CHECK-LABEL: gpu.module @sparse_kernels
11// CHECK:       gpu.func @kernel1
12// CHECK:       gpu.func @kernel0
13//
14// CHECK-LABEL: func.func @matmuls
15// CHECK:       gpu.alloc async
16// CHECK:       gpu.memcpy async
17// CHECK:       gpu.alloc async
18// CHECK:       gpu.memcpy async
19// CHECK:       gpu.alloc async
20// CHECK:       gpu.memcpy async
21// CHECK:       gpu.alloc async
22// CHECK:       gpu.memcpy async
23// CHECK:       gpu.alloc async
24// CHECK:       gpu.memcpy async
25// CHECK:       %[[T1:.*]] = gpu.launch_func async @sparse_kernels::@kernel1 blocks
26// CHECK:       gpu.memcpy async [%[[T1]]]
27// CHECK:       gpu.dealloc async
28// CHECK:       gpu.dealloc async
29// CHECK:       gpu.dealloc async
30// CHECK:       gpu.dealloc async
31// CHECK:       gpu.dealloc async
32// CHECK:       gpu.wait
33// CHECK:       gpu.alloc async
34// CHECK:       gpu.memcpy async
35// CHECK:       gpu.alloc async
36// CHECK:       gpu.memcpy async
37// CHECK:       gpu.alloc async
38// CHECK:       gpu.memcpy async
39// CHECK:       gpu.alloc async
40// CHECK:       gpu.memcpy async
41// CHECK:       gpu.alloc async
42// CHECK:       gpu.memcpy async
43// CHECK:       %[[T0:.*]] = gpu.launch_func async @sparse_kernels::@kernel0 blocks
44// CHECK:       gpu.memcpy async [%[[T0]]]
45// CHECK:       gpu.dealloc async
46// CHECK:       gpu.dealloc async
47// CHECK:       gpu.dealloc async
48// CHECK:       gpu.dealloc async
49// CHECK:       gpu.dealloc async
50// CHECK:       gpu.wait
51//
52func.func @matmuls(%A: tensor<1024x8xf64>,
53                   %B: tensor<8x1024xf64, #CSR>,
54		   %C: tensor<1024x1024xf64, #CSR>) -> tensor<1024x1024xf64> {
55  %Z = arith.constant dense<0.0> : tensor<1024x1024xf64>
56  %T = linalg.matmul
57      ins(%A, %B: tensor<1024x8xf64>, tensor<8x1024xf64, #CSR>)
58      outs(%Z: tensor<1024x1024xf64>) -> tensor<1024x1024xf64>
59  %D = linalg.matmul
60      ins(%T, %C: tensor<1024x1024xf64>, tensor<1024x1024xf64, #CSR>)
61      outs(%Z: tensor<1024x1024xf64>) -> tensor<1024x1024xf64>
62  return %D : tensor<1024x1024xf64>
63}
64