xref: /llvm-project/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1//--------------------------------------------------------------------------------------------------
2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
3//
4// Set-up that's shared across all tests in this directory. In principle, this
5// config could be moved to lit.local.cfg. However, there are downstream users that
6//  do not use these LIT config files. Hence why this is kept inline.
7//
8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true
9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils
14// DEFINE: %{run_opts} = -e main -entry-point-result=void
15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs}
16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve}
17//
18// DEFINE: %{env} =
19//--------------------------------------------------------------------------------------------------
20
21// RUN: %{compile} | %{run} | FileCheck %s
22//
23// Do the same run, but now with direct IR generation.
24// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false
25// RUN: %{compile} | %{run} | FileCheck %s
26//
27// Do the same run, but now with direct IR generation and vectorization.
28// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
29// RUN: %{compile} | %{run} | FileCheck %s
30//
31// Do the same run, but now with direct IR generation and VLA vectorization.
32// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %}
33
34#DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }>
35#CSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
36#CDR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : dense)}>
37#CSC = #sparse_tensor.encoding<{
38  map = (d0, d1) -> (d1 : dense, d0 : compressed)
39}>
40
41#map = affine_map<(d0, d1, d2, d3) -> (d0 + d1, d3 + d2)>
42#map1 = affine_map<(d0, d1, d2, d3) -> (d1, d2)>
43#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d3)>
44
45// An example of a 2D convolution with a sparse filter.
46module {
47
48  func.func @conv2d(%input:  tensor<8x8xi32>,
49                    %filter: tensor<3x3xi32>,
50                    %output: tensor<6x6xi32>) -> tensor<6x6xi32> {
51    %0 = linalg.conv_2d
52      ins  (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32>)
53      outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
54    return %0 : tensor<6x6xi32>
55  }
56
57  func.func @conv2d_CSR_dense_rotated(%arg0: tensor<8x8xi32, #CSR>,
58                                      %arg1: tensor<3x3xi32>) -> tensor<6x6xi32> {
59    %s = arith.constant dense<0> : tensor<6x6xi32>
60    %0 = linalg.generic {indexing_maps = [#map, #map1, #map2],
61      iterator_types = ["parallel", "reduction", "reduction", "parallel"]}
62      ins(%arg0, %arg1 : tensor<8x8xi32, #CSR>, tensor<3x3xi32>)
63      outs(%s : tensor<6x6xi32>) attrs =  {sorted = true} {
64      ^bb0(%in: i32, %in_0: i32, %out: i32):
65        %1 = arith.muli %in, %in_0 : i32
66        %2 = arith.addi %out, %1 : i32
67        linalg.yield %2 : i32
68    } -> tensor<6x6xi32>
69    return %0 : tensor<6x6xi32>
70  }
71
72  func.func @conv2d_sparse_out(%input:  tensor<8x8xi32>,
73                               %filter: tensor<3x3xi32>) -> tensor<6x6xi32, #DCSR> {
74    %s = tensor.empty() : tensor<6x6xi32, #DCSR>
75    %0 = linalg.conv_2d
76      ins  (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32>)
77      outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
78    return %0 : tensor<6x6xi32, #DCSR>
79  }
80
81  func.func @conv2d_all_sparse_DCSR(%input:  tensor<8x8xi32, #DCSR>,
82                                    %filter: tensor<3x3xi32>) -> tensor<6x6xi32, #DCSR> {
83    %s = tensor.empty() : tensor<6x6xi32, #DCSR>
84    %0 = linalg.conv_2d
85      ins  (%input, %filter: tensor<8x8xi32, #DCSR>, tensor<3x3xi32>)
86      outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
87    return %0 : tensor<6x6xi32, #DCSR>
88  }
89
90  func.func @conv2d_all_sparse_CSR(%input:  tensor<8x8xi32, #CSR>,
91                                   %filter: tensor<3x3xi32>) -> tensor<6x6xi32, #CSR> {
92    %s = tensor.empty() : tensor<6x6xi32, #CSR>
93    %0 = linalg.conv_2d
94      ins  (%input, %filter: tensor<8x8xi32, #CSR>, tensor<3x3xi32>)
95      outs (%s: tensor<6x6xi32, #CSR>) -> tensor<6x6xi32, #CSR>
96    return %0 : tensor<6x6xi32, #CSR>
97  }
98
99  func.func @conv2d_all_sparse_CD(%input:  tensor<8x8xi32, #CDR>,
100                                  %filter: tensor<3x3xi32>) -> tensor<6x6xi32, #CDR> {
101    %s = tensor.empty() : tensor<6x6xi32, #CDR>
102    %0 = linalg.conv_2d
103      ins  (%input, %filter: tensor<8x8xi32, #CDR>, tensor<3x3xi32>)
104      outs (%s: tensor<6x6xi32, #CDR>) -> tensor<6x6xi32, #CDR>
105    return %0 : tensor<6x6xi32, #CDR>
106  }
107
108  func.func @conv2d_all_sparse_CSC(%input:  tensor<8x8xi32, #CSC>,
109                                   %filter: tensor<3x3xi32>) -> tensor<6x6xi32, #CSC> {
110    %s = tensor.empty() : tensor<6x6xi32, #CSC>
111    %0 = linalg.conv_2d
112      ins  (%input, %filter: tensor<8x8xi32, #CSC>, tensor<3x3xi32>)
113      outs (%s: tensor<6x6xi32, #CSC>) -> tensor<6x6xi32, #CSC>
114    return %0 : tensor<6x6xi32, #CSC>
115  }
116
117  func.func @main() {
118    %c0 = arith.constant 0 : index
119    %i0 = arith.constant 0 : i32
120
121    // A typical edge detection filter.
122    %filter = arith.constant dense<[
123      [  1,  0, -1 ],
124      [  0,  0,  0 ],
125      [ -1,  0,  1 ]
126    ]> : tensor<3x3xi32>
127
128    %input = arith.constant dense<[
129      [  1,  2,  3,  4,  0,  6,  7,  8 ],
130      [  2,  2,  4,  4,  0,  0,  6,  8 ],
131      [  2,  2,  4,  4,  0,  0,  6,  8 ],
132      [  2,  2,  3,  4,  0,  0,  7,  8 ],
133      [  1,  3,  3,  4,  0,  0,  6,  8 ],
134      [  3,  2,  3,  4,  0,  0,  7,  8 ],
135      [  1,  3,  3,  4,  3,  6,  6,  8 ],
136      [  1,  3,  3,  4,  3,  0,  7,  8 ]
137    ]> : tensor<8x8xi32>
138    %sparse_input_DCSR = sparse_tensor.convert %input
139      : tensor<8x8xi32> to tensor<8x8xi32, #DCSR>
140    %sparse_input_CSR = sparse_tensor.convert %input
141      : tensor<8x8xi32> to tensor<8x8xi32, #CSR>
142    %sparse_input_CD = sparse_tensor.convert %input
143      : tensor<8x8xi32> to tensor<8x8xi32, #CDR>
144    %sparse_input_CSC = sparse_tensor.convert %input
145      : tensor<8x8xi32> to tensor<8x8xi32, #CSC>
146
147    // Call the kernel.
148    %output = arith.constant dense<0> : tensor<6x6xi32>
149    %0 = call @conv2d(%input, %filter, %output)
150       : (tensor<8x8xi32>,
151          tensor<3x3xi32>, tensor<6x6xi32>) -> tensor<6x6xi32>
152    %1 = call @conv2d_sparse_out(%input, %filter)
153       : (tensor<8x8xi32>,
154          tensor<3x3xi32>) -> tensor<6x6xi32, #DCSR>
155    %2 = call @conv2d_all_sparse_DCSR(%sparse_input_DCSR, %filter)
156       : (tensor<8x8xi32, #DCSR>,
157          tensor<3x3xi32>) -> tensor<6x6xi32, #DCSR>
158    %3 = call @conv2d_all_sparse_CSR(%sparse_input_CSR, %filter)
159       : (tensor<8x8xi32, #CSR>,
160          tensor<3x3xi32>) -> tensor<6x6xi32, #CSR>
161    %4 = call @conv2d_all_sparse_CD(%sparse_input_CD, %filter)
162       : (tensor<8x8xi32, #CDR>,
163          tensor<3x3xi32>) -> tensor<6x6xi32, #CDR>
164    %5 = call @conv2d_all_sparse_CSC(%sparse_input_CSC, %filter)
165       : (tensor<8x8xi32, #CSC>,
166          tensor<3x3xi32>) -> tensor<6x6xi32, #CSC>
167    %6 = call @conv2d_CSR_dense_rotated(%sparse_input_CSR, %filter)
168       : (tensor<8x8xi32, #CSR>,
169          tensor<3x3xi32>) -> tensor<6x6xi32>
170
171    // Verify the output.
172    //
173    // CHECK:    ( ( 0, 0, -1, -6, -1, 6 ),
174    // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
175    // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
176    // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
177    // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
178    // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
179    //
180    %v = vector.transfer_read %0[%c0, %c0], %i0
181      : tensor<6x6xi32>, vector<6x6xi32>
182    vector.print %v : vector<6x6xi32>
183
184    //
185    // Should be the same as dense output.
186    //
187    // CHECK:      ---- Sparse Tensor ----
188    // CHECK-NEXT: nse = 36
189    // CHECK-NEXT: dim = ( 6, 6 )
190    // CHECK-NEXT: lvl = ( 6, 6 )
191    // CHECK-NEXT: pos[0] : ( 0, 6 )
192    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5 )
193    // CHECK-NEXT: pos[1] : ( 0, 6, 12, 18, 24, 30, 36 )
194    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5 )
195    // CHECK-NEXT: values : ( 0, 0, -1, -6, -1, 6, -1, 0, 1, 0, 1, 0, 0, -1, 1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 3, 6, -3, -6, 2, -1, 3, 0, -3, 0 )
196    // CHECK-NEXT: ----
197    //
198    sparse_tensor.print %1 : tensor<6x6xi32, #DCSR>
199
200    //
201    // Should be the same as dense output.
202    //
203    // CHECK:      ---- Sparse Tensor ----
204    // CHECK-NEXT: nse = 36
205    // CHECK-NEXT: dim = ( 6, 6 )
206    // CHECK-NEXT: lvl = ( 6, 6 )
207    // CHECK-NEXT: pos[0] : ( 0, 6 )
208    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5 )
209    // CHECK-NEXT: pos[1] : ( 0, 6, 12, 18, 24, 30, 36 )
210    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5 )
211    // CHECK-NEXT: values : ( 0, 0, -1, -6, -1, 6, -1, 0, 1, 0, 1, 0, 0, -1, 1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 3, 6, -3, -6, 2, -1, 3, 0, -3, 0 )
212    // CHECK-NEXT: ----
213    //
214    sparse_tensor.print %2 : tensor<6x6xi32, #DCSR>
215
216    //
217    // Should be the same as dense output.
218    //
219    // CHECK:      ---- Sparse Tensor ----
220    // CHECK-NEXT: nse = 36
221    // CHECK-NEXT: dim = ( 6, 6 )
222    // CHECK-NEXT: lvl = ( 6, 6 )
223    // CHECK-NEXT: pos[1] : ( 0, 6, 12, 18, 24, 30, 36 )
224    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5 )
225    // CHECK-NEXT: values : ( 0, 0, -1, -6, -1, 6, -1, 0, 1, 0, 1, 0, 0, -1, 1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 3, 6, -3, -6, 2, -1, 3, 0, -3, 0 )
226    // CHECK-NEXT: ----
227    //
228    sparse_tensor.print %3 : tensor<6x6xi32, #CSR>
229
230    //
231    // Should be the same as dense output.
232    //
233    // CHECK:      ---- Sparse Tensor ----
234    // CHECK-NEXT: nse = 36
235    // CHECK-NEXT: dim = ( 6, 6 )
236    // CHECK-NEXT: lvl = ( 6, 6 )
237    // CHECK-NEXT: pos[0] : ( 0, 6 )
238    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5 )
239    // CHECK-NEXT: values : ( 0, 0, -1, -6, -1, 6, -1, 0, 1, 0, 1, 0, 0, -1, 1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 3, 6, -3, -6, 2, -1, 3, 0, -3, 0 )
240    // CHECK-NEXT: ----
241    //
242    sparse_tensor.print %4 : tensor<6x6xi32, #CDR>
243
244    //
245    // Should be the same as dense output.
246    //
247    // CHECK:      ---- Sparse Tensor ----
248    // CHECK-NEXT: nse = 36
249    // CHECK-NEXT: dim = ( 6, 6 )
250    // CHECK-NEXT: lvl = ( 6, 6 )
251    // CHECK-NEXT: pos[1] : ( 0, 6, 12, 18, 24, 30, 36 )
252    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5 )
253    // CHECK-NEXT: values : ( 0, -1, 0, -1, 0, 2, 0, 0, -1, 0, 0, -1, -1, 1, 1, 0, 3, 3, -6, 0, 0, 0, 6, 0, -1, 1, 0, 0, -3, -3, 6, 0, 0, 0, -6, 0 )
254    // CHECK-NEXT: ----
255    //
256    sparse_tensor.print %5 : tensor<6x6xi32, #CSC>
257
258    //
259    // Should be the same as dense output.
260    // CHECK:    ( ( 0, 0, -1, -6, -1, 6 ),
261    // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
262    // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
263    // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
264    // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
265    // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
266    //
267    %v6 = vector.transfer_read %6[%c0, %c0], %i0
268      : tensor<6x6xi32>, vector<6x6xi32>
269    vector.print %v : vector<6x6xi32>
270
271    // Release the resources.
272    bufferization.dealloc_tensor %sparse_input_DCSR : tensor<8x8xi32, #DCSR>
273    bufferization.dealloc_tensor %sparse_input_CSR : tensor<8x8xi32, #CSR>
274    bufferization.dealloc_tensor %sparse_input_CSC : tensor<8x8xi32, #CSC>
275    bufferization.dealloc_tensor %sparse_input_CD : tensor<8x8xi32, #CDR>
276
277    bufferization.dealloc_tensor %0 : tensor<6x6xi32>
278    bufferization.dealloc_tensor %1 : tensor<6x6xi32, #DCSR>
279    bufferization.dealloc_tensor %2 : tensor<6x6xi32, #DCSR>
280    bufferization.dealloc_tensor %3 : tensor<6x6xi32, #CSR>
281    bufferization.dealloc_tensor %4 : tensor<6x6xi32, #CDR>
282    bufferization.dealloc_tensor %5 : tensor<6x6xi32, #CSC>
283    bufferization.dealloc_tensor %6 : tensor<6x6xi32>
284
285    return
286  }
287}
288