xref: /llvm-project/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nchw_fchw.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1//--------------------------------------------------------------------------------------------------
2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
3//
4// Set-up that's shared across all tests in this directory. In principle, this
5// config could be moved to lit.local.cfg. However, there are downstream users that
6//  do not use these LIT config files. Hence why this is kept inline.
7//
8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true
9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils
14// DEFINE: %{run_opts} = -e main -entry-point-result=void
15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs}
16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve}
17//
18// DEFINE: %{env} =
19//--------------------------------------------------------------------------------------------------
20
21// RUN: %{compile} | %{run} | FileCheck %s
22//
23// Do the same run, but now with direct IR generation.
24// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true
25// RUN: %{compile} | %{run} | FileCheck %s
26//
27// Do the same run, but now with direct IR generation and vectorization.
28// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
29// RUN: %{compile} | %{run} | FileCheck %s
30//
31// Do the same run, but now with direct IR generation and VLA vectorization.
32// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %}
33
34
35// TODO: we can only support dense output for nchw input because 'c' is a reduction loop
36
37
38#CDCD = #sparse_tensor.encoding<{
39  map = (d0, d1, d2, d3) -> (d0 : compressed, d1 : dense, d2 : compressed, d3 : dense)
40}>
41
42
43#CCCC = #sparse_tensor.encoding<{
44  map = (d0, d1, d2, d3) -> (d0 : compressed, d1 : compressed, d2 : compressed, d3 : compressed)
45}>
46
47// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
48func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor<?x?x?x?xf32> {
49  %buf = tensor.empty(%s1, %s2, %s3, %s4) : tensor<?x?x?x?xf32>
50  %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
51  return %ret : tensor<?x?x?x?xf32>
52}
53
54func.func @conv_2d_nchw_fchw(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x?xf32>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
55  %ret = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>,
56                                     strides = dense<1> : tensor<2xi64>}
57     ins (%arg0, %arg1: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
58    outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
59  return %ret : tensor<?x?x?x?xf32>
60}
61
62func.func @conv_2d_nchw_fchw_CDCD(%arg0: tensor<?x?x?x?xf32, #CDCD>, %arg1: tensor<?x?x?x?xf32>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
63  %ret = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>,
64                                     strides = dense<1> : tensor<2xi64>}
65     ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32>)
66    outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
67  return %ret : tensor<?x?x?x?xf32>
68}
69
70func.func @conv_2d_nchw_fchw_CCCC(%arg0: tensor<?x?x?x?xf32, #CCCC>, %arg1: tensor<?x?x?x?xf32>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
71  %ret = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>,
72                                     strides = dense<1> : tensor<2xi64>}
73     ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32>)
74    outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
75  return %ret : tensor<?x?x?x?xf32>
76}
77
78func.func @conv_2d_nchw_fchw_CCCC_CCCC(%arg0: tensor<?x?x?x?xf32, #CCCC>, %arg1: tensor<?x?x?x?xf32, #CCCC>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
79  %ret = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>,
80                                     strides = dense<1> : tensor<2xi64>}
81     ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>)
82    outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
83  return %ret : tensor<?x?x?x?xf32>
84}
85
86func.func @main() {
87  %c0 = arith.constant 0 : index
88  %c1 = arith.constant 1 : index
89  %c3 = arith.constant 3 : index
90  %c6 = arith.constant 6 : index
91  %c8 = arith.constant 8 : index
92  %f10 = arith.constant 10.00000e+00 : f32
93  %val = arith.constant 2.00000e+00 : f32
94  %zero = arith.constant 0.00000e+00 : f32
95
96  %filter2D_nhwc = call @alloc_4d_filled_f32(%c1, %c3, %c3, %c3, %val) :(index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
97  %in2D_tmp = call @alloc_4d_filled_f32(%c3, %c3, %c8, %c8, %val) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
98  %in2D_nhwc = tensor.insert %f10 into %in2D_tmp[%c0, %c0, %c0, %c3] : tensor<?x?x?x?xf32>
99  %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
100  %out2D_nhwc_CCCD = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
101  %out2D_nhwc_CCCC = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
102
103  %in2D_nhwc_CCCD = sparse_tensor.convert %in2D_nhwc
104    : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CDCD>
105  %in2D_nhwc_CCCC = sparse_tensor.convert %in2D_nhwc
106    : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CCCC>
107
108  %filter2D_nhwc_CCCC = sparse_tensor.convert %filter2D_nhwc
109    : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CCCC>
110
111  %dense_ret = call @conv_2d_nchw_fchw(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>)
112  %CCCC_ret = call @conv_2d_nchw_fchw_CDCD(%in2D_nhwc_CCCD, %filter2D_nhwc, %out2D_nhwc_CCCD) : (tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>)
113  %CDCD_ret = call @conv_2d_nchw_fchw_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc, %out2D_nhwc_CCCC) : (tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>)
114  %dual_CCCC_ret = call @conv_2d_nchw_fchw_CCCC_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc_CCCC, %out2D_nhwc) : (tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>)
115
116
117  // CHECK:     ( ( ( ( 108, 124, 124, 124, 108, 108 ),
118  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
119  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
120  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
121  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
122  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ),
123  // CHECK-SAME:  ( ( ( 108, 108, 108, 108, 108, 108 ),
124  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
125  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
126  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
127  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
128  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ),
129  // CHECK-SAME:  ( ( ( 108, 108, 108, 108, 108, 108 ),
130  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
131  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
132  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
133  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
134  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ) )
135  %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0], %zero
136      : tensor<?x?x?x?xf32>, vector<3x1x6x6xf32>
137  vector.print %dense_v : vector<3x1x6x6xf32>
138
139  // CHECK:     ( ( ( ( 108, 124, 124, 124, 108, 108 ),
140  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
141  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
142  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
143  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
144  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ),
145  // CHECK-SAME:  ( ( ( 108, 108, 108, 108, 108, 108 ),
146  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
147  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
148  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
149  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
150  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ),
151  // CHECK-SAME:  ( ( ( 108, 108, 108, 108, 108, 108 ),
152  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
153  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
154  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
155  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
156  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ) )
157  %v1 = vector.transfer_read %CCCC_ret[%c0, %c0, %c0, %c0], %zero
158      : tensor<?x?x?x?xf32>, vector<3x1x6x6xf32>
159  vector.print %v1 : vector<3x1x6x6xf32>
160
161  // CHECK:     ( ( ( ( 108, 124, 124, 124, 108, 108 ),
162  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
163  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
164  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
165  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
166  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ),
167  // CHECK-SAME:  ( ( ( 108, 108, 108, 108, 108, 108 ),
168  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
169  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
170  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
171  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
172  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ),
173  // CHECK-SAME:  ( ( ( 108, 108, 108, 108, 108, 108 ),
174  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
175  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
176  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
177  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
178  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ) )
179  %v2 = vector.transfer_read %CDCD_ret[%c0, %c0, %c0, %c0], %zero
180      : tensor<?x?x?x?xf32>, vector<3x1x6x6xf32>
181  vector.print %v2 : vector<3x1x6x6xf32>
182
183  // CHECK:     ( ( ( ( 108, 124, 124, 124, 108, 108 ),
184  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
185  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
186  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
187  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
188  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ),
189  // CHECK-SAME:  ( ( ( 108, 108, 108, 108, 108, 108 ),
190  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
191  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
192  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
193  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
194  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ),
195  // CHECK-SAME:  ( ( ( 108, 108, 108, 108, 108, 108 ),
196  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
197  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
198  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
199  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ),
200  // CHECK-SAME:      ( 108, 108, 108, 108, 108, 108 ) ) ) )
201  %v3 = vector.transfer_read %dual_CCCC_ret[%c0, %c0, %c0, %c0], %zero
202      : tensor<?x?x?x?xf32>, vector<3x1x6x6xf32>
203  vector.print %v3 : vector<3x1x6x6xf32>
204
205  // Free the resources
206  bufferization.dealloc_tensor %in2D_nhwc : tensor<?x?x?x?xf32>
207  bufferization.dealloc_tensor %filter2D_nhwc : tensor<?x?x?x?xf32>
208  bufferization.dealloc_tensor %out2D_nhwc : tensor<?x?x?x?xf32>
209  bufferization.dealloc_tensor %out2D_nhwc_CCCD : tensor<?x?x?x?xf32>
210  bufferization.dealloc_tensor %out2D_nhwc_CCCC : tensor<?x?x?x?xf32>
211  bufferization.dealloc_tensor %dense_ret :tensor<?x?x?x?xf32>
212
213  bufferization.dealloc_tensor %in2D_nhwc_CCCC : tensor<?x?x?x?xf32, #CCCC>
214  bufferization.dealloc_tensor %in2D_nhwc_CCCD : tensor<?x?x?x?xf32, #CDCD>
215  bufferization.dealloc_tensor %filter2D_nhwc_CCCC : tensor<?x?x?x?xf32, #CCCC>
216  return
217}
218