xref: /llvm-project/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1//--------------------------------------------------------------------------------------------------
2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
3//
4// Set-up that's shared across all tests in this directory. In principle, this
5// config could be moved to lit.local.cfg. However, there are downstream users that
6//  do not use these LIT config files. Hence why this is kept inline.
7//
8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true
9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils
14// DEFINE: %{run_opts} = -e main -entry-point-result=void
15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs}
16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve}
17//
18// DEFINE: %{env} =
19//--------------------------------------------------------------------------------------------------
20
21// RUN: %{compile} | %{run} | FileCheck %s
22//
23// Do the same run, but now with direct IR generation.
24// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false
25// RUN: %{compile} | %{run} | FileCheck %s
26//
27// Do the same run, but now with direct IR generation and vectorization.
28// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
29// RUN: %{compile} | %{run} | FileCheck %s
30//
31// Do the same run, but now with direct IR generation and VLA vectorization.
32// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %}
33
34// Reduction in this file _are_ supported by the AArch64 SVE backend
35
36#SV = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>
37
38#trait_reduction = {
39  indexing_maps = [
40    affine_map<(i) -> (i)>,  // a
41    affine_map<(i) -> ()>    // x (scalar out)
42  ],
43  iterator_types = ["reduction"],
44  doc = "x += OPER_i a(i)"
45}
46
47// An example of vector reductions.
48module {
49
50  func.func @sum_reduction_i32(%arga: tensor<32xi32, #SV>,
51                          %argx: tensor<i32>) -> tensor<i32> {
52    %0 = linalg.generic #trait_reduction
53      ins(%arga: tensor<32xi32, #SV>)
54      outs(%argx: tensor<i32>) {
55        ^bb(%a: i32, %x: i32):
56          %0 = arith.addi %x, %a : i32
57          linalg.yield %0 : i32
58    } -> tensor<i32>
59    return %0 : tensor<i32>
60  }
61
62  func.func @sum_reduction_f32(%arga: tensor<32xf32, #SV>,
63                          %argx: tensor<f32>) -> tensor<f32> {
64    %0 = linalg.generic #trait_reduction
65      ins(%arga: tensor<32xf32, #SV>)
66      outs(%argx: tensor<f32>) {
67        ^bb(%a: f32, %x: f32):
68          %0 = arith.addf %x, %a : f32
69          linalg.yield %0 : f32
70    } -> tensor<f32>
71    return %0 : tensor<f32>
72  }
73
74  func.func @or_reduction_i32(%arga: tensor<32xi32, #SV>,
75                         %argx: tensor<i32>) -> tensor<i32> {
76    %0 = linalg.generic #trait_reduction
77      ins(%arga: tensor<32xi32, #SV>)
78      outs(%argx: tensor<i32>) {
79        ^bb(%a: i32, %x: i32):
80          %0 = arith.ori %x, %a : i32
81          linalg.yield %0 : i32
82    } -> tensor<i32>
83    return %0 : tensor<i32>
84  }
85
86  func.func @xor_reduction_i32(%arga: tensor<32xi32, #SV>,
87                          %argx: tensor<i32>) -> tensor<i32> {
88    %0 = linalg.generic #trait_reduction
89      ins(%arga: tensor<32xi32, #SV>)
90      outs(%argx: tensor<i32>) {
91        ^bb(%a: i32, %x: i32):
92          %0 = arith.xori %x, %a : i32
93          linalg.yield %0 : i32
94    } -> tensor<i32>
95    return %0 : tensor<i32>
96  }
97
98  func.func @dump_i32(%arg0 : tensor<i32>) {
99    %v = tensor.extract %arg0[] : tensor<i32>
100    vector.print %v : i32
101    return
102  }
103
104  func.func @dump_f32(%arg0 : tensor<f32>) {
105    %v = tensor.extract %arg0[] : tensor<f32>
106    vector.print %v : f32
107    return
108  }
109
110  func.func @main() {
111    %ri = arith.constant dense< 7   > : tensor<i32>
112    %rf = arith.constant dense< 2.0 > : tensor<f32>
113
114    %c_0_i32 = arith.constant dense<[
115      0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0,
116      0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0
117    ]> : tensor<32xi32>
118
119    %c_0_f32 = arith.constant dense<[
120      0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
121      0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0,
122      0.0, 0.0, 0.0, 0.0, 2.5, 0.0, 0.0, 0.0,
123      2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 9.0
124    ]> : tensor<32xf32>
125
126    // Convert constants to annotated tensors.
127    %sparse_input_i32 = sparse_tensor.convert %c_0_i32
128      : tensor<32xi32> to tensor<32xi32, #SV>
129    %sparse_input_f32 = sparse_tensor.convert %c_0_f32
130      : tensor<32xf32> to tensor<32xf32, #SV>
131
132    // Call the kernels.
133    %0 = call @sum_reduction_i32(%sparse_input_i32, %ri)
134       : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
135    %1 = call @sum_reduction_f32(%sparse_input_f32, %rf)
136       : (tensor<32xf32, #SV>, tensor<f32>) -> tensor<f32>
137    %2 = call @or_reduction_i32(%sparse_input_i32, %ri)
138       : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
139    %3 = call @xor_reduction_i32(%sparse_input_i32, %ri)
140       : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
141
142    // Verify results.
143    //
144    // CHECK: 26
145    // CHECK: 27.5
146    // CHECK: 15
147    // CHECK: 10
148    //
149    call @dump_i32(%0) : (tensor<i32>) -> ()
150    call @dump_f32(%1) : (tensor<f32>) -> ()
151    call @dump_i32(%2) : (tensor<i32>) -> ()
152    call @dump_i32(%3) : (tensor<i32>) -> ()
153
154    // Release the resources.
155    bufferization.dealloc_tensor %sparse_input_i32 : tensor<32xi32, #SV>
156    bufferization.dealloc_tensor %sparse_input_f32 : tensor<32xf32, #SV>
157    bufferization.dealloc_tensor %0 : tensor<i32>
158    bufferization.dealloc_tensor %1 : tensor<f32>
159    bufferization.dealloc_tensor %2 : tensor<i32>
160    bufferization.dealloc_tensor %3 : tensor<i32>
161
162    return
163  }
164}
165