1//-------------------------------------------------------------------------------------------------- 2// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS. 3// 4// Set-up that's shared across all tests in this directory. In principle, this 5// config could be moved to lit.local.cfg. However, there are downstream users that 6// do not use these LIT config files. Hence why this is kept inline. 7// 8// DEFINE: %{sparsifier_opts} = enable-runtime-library=true 9// DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts} 10// DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" 11// DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" 12// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils 13// DEFINE: %{run_libs_sve} = -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils 14// DEFINE: %{run_opts} = -e main -entry-point-result=void 15// DEFINE: %{run} = mlir-runner %{run_opts} %{run_libs} 16// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs_sve} 17// 18// DEFINE: %{env} = 19//-------------------------------------------------------------------------------------------------- 20 21// RUN: %{compile} | %{run} | FileCheck %s 22// 23// Do the same run, but now with direct IR generation. 24// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true 25// RUN: %{compile} | %{run} | FileCheck %s 26// 27// Do the same run, but now with direct IR generation and vectorization. 28// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true 29// RUN: %{compile} | %{run} | FileCheck %s 30// 31// Do the same run, but now with direct IR generation and VLA vectorization. 32// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %} 33 34 35// TODO: we can only support dense output for nchw input because 'c' is a reduction loop 36 37 38#CDCD = #sparse_tensor.encoding<{ 39 map = (d0, d1, d2, d3) -> (d0 : compressed, d1 : dense, d2 : compressed, d3 : dense) 40}> 41 42 43#CCCC = #sparse_tensor.encoding<{ 44 map = (d0, d1, d2, d3) -> (d0 : compressed, d1 : compressed, d2 : compressed, d3 : compressed) 45}> 46 47// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f 48func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor<?x?x?x?xf32> { 49 %buf = tensor.empty(%s1, %s2, %s3, %s4) : tensor<?x?x?x?xf32> 50 %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> 51 return %ret : tensor<?x?x?x?xf32> 52} 53 54func.func @conv_2d_nchw_fchw(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x?xf32>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> { 55 %ret = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, 56 strides = dense<1> : tensor<2xi64>} 57 ins (%arg0, %arg1: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) 58 outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> 59 return %ret : tensor<?x?x?x?xf32> 60} 61 62func.func @conv_2d_nchw_fchw_CDCD(%arg0: tensor<?x?x?x?xf32, #CDCD>, %arg1: tensor<?x?x?x?xf32>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> { 63 %ret = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, 64 strides = dense<1> : tensor<2xi64>} 65 ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32>) 66 outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> 67 return %ret : tensor<?x?x?x?xf32> 68} 69 70func.func @conv_2d_nchw_fchw_CCCC(%arg0: tensor<?x?x?x?xf32, #CCCC>, %arg1: tensor<?x?x?x?xf32>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> { 71 %ret = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, 72 strides = dense<1> : tensor<2xi64>} 73 ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32>) 74 outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> 75 return %ret : tensor<?x?x?x?xf32> 76} 77 78func.func @conv_2d_nchw_fchw_CCCC_CCCC(%arg0: tensor<?x?x?x?xf32, #CCCC>, %arg1: tensor<?x?x?x?xf32, #CCCC>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> { 79 %ret = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, 80 strides = dense<1> : tensor<2xi64>} 81 ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>) 82 outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> 83 return %ret : tensor<?x?x?x?xf32> 84} 85 86func.func @main() { 87 %c0 = arith.constant 0 : index 88 %c1 = arith.constant 1 : index 89 %c3 = arith.constant 3 : index 90 %c6 = arith.constant 6 : index 91 %c8 = arith.constant 8 : index 92 %f10 = arith.constant 10.00000e+00 : f32 93 %val = arith.constant 2.00000e+00 : f32 94 %zero = arith.constant 0.00000e+00 : f32 95 96 %filter2D_nhwc = call @alloc_4d_filled_f32(%c1, %c3, %c3, %c3, %val) :(index, index, index, index, f32) -> (tensor<?x?x?x?xf32>) 97 %in2D_tmp = call @alloc_4d_filled_f32(%c3, %c3, %c8, %c8, %val) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>) 98 %in2D_nhwc = tensor.insert %f10 into %in2D_tmp[%c0, %c0, %c0, %c3] : tensor<?x?x?x?xf32> 99 %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>) 100 %out2D_nhwc_CCCD = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>) 101 %out2D_nhwc_CCCC = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>) 102 103 %in2D_nhwc_CCCD = sparse_tensor.convert %in2D_nhwc 104 : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CDCD> 105 %in2D_nhwc_CCCC = sparse_tensor.convert %in2D_nhwc 106 : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CCCC> 107 108 %filter2D_nhwc_CCCC = sparse_tensor.convert %filter2D_nhwc 109 : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CCCC> 110 111 %dense_ret = call @conv_2d_nchw_fchw(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>) 112 %CCCC_ret = call @conv_2d_nchw_fchw_CDCD(%in2D_nhwc_CCCD, %filter2D_nhwc, %out2D_nhwc_CCCD) : (tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>) 113 %CDCD_ret = call @conv_2d_nchw_fchw_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc, %out2D_nhwc_CCCC) : (tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>) 114 %dual_CCCC_ret = call @conv_2d_nchw_fchw_CCCC_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc_CCCC, %out2D_nhwc) : (tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>) 115 116 117 // CHECK: ( ( ( ( 108, 124, 124, 124, 108, 108 ), 118 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 119 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 120 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 121 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 122 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ), 123 // CHECK-SAME: ( ( ( 108, 108, 108, 108, 108, 108 ), 124 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 125 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 126 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 127 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 128 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ), 129 // CHECK-SAME: ( ( ( 108, 108, 108, 108, 108, 108 ), 130 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 131 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 132 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 133 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 134 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ) ) 135 %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0], %zero 136 : tensor<?x?x?x?xf32>, vector<3x1x6x6xf32> 137 vector.print %dense_v : vector<3x1x6x6xf32> 138 139 // CHECK: ( ( ( ( 108, 124, 124, 124, 108, 108 ), 140 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 141 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 142 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 143 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 144 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ), 145 // CHECK-SAME: ( ( ( 108, 108, 108, 108, 108, 108 ), 146 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 147 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 148 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 149 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 150 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ), 151 // CHECK-SAME: ( ( ( 108, 108, 108, 108, 108, 108 ), 152 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 153 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 154 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 155 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 156 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ) ) 157 %v1 = vector.transfer_read %CCCC_ret[%c0, %c0, %c0, %c0], %zero 158 : tensor<?x?x?x?xf32>, vector<3x1x6x6xf32> 159 vector.print %v1 : vector<3x1x6x6xf32> 160 161 // CHECK: ( ( ( ( 108, 124, 124, 124, 108, 108 ), 162 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 163 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 164 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 165 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 166 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ), 167 // CHECK-SAME: ( ( ( 108, 108, 108, 108, 108, 108 ), 168 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 169 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 170 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 171 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 172 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ), 173 // CHECK-SAME: ( ( ( 108, 108, 108, 108, 108, 108 ), 174 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 175 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 176 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 177 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 178 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ) ) 179 %v2 = vector.transfer_read %CDCD_ret[%c0, %c0, %c0, %c0], %zero 180 : tensor<?x?x?x?xf32>, vector<3x1x6x6xf32> 181 vector.print %v2 : vector<3x1x6x6xf32> 182 183 // CHECK: ( ( ( ( 108, 124, 124, 124, 108, 108 ), 184 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 185 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 186 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 187 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 188 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ), 189 // CHECK-SAME: ( ( ( 108, 108, 108, 108, 108, 108 ), 190 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 191 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 192 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 193 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 194 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ), 195 // CHECK-SAME: ( ( ( 108, 108, 108, 108, 108, 108 ), 196 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 197 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 198 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 199 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ), 200 // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) ) ) 201 %v3 = vector.transfer_read %dual_CCCC_ret[%c0, %c0, %c0, %c0], %zero 202 : tensor<?x?x?x?xf32>, vector<3x1x6x6xf32> 203 vector.print %v3 : vector<3x1x6x6xf32> 204 205 // Free the resources 206 bufferization.dealloc_tensor %in2D_nhwc : tensor<?x?x?x?xf32> 207 bufferization.dealloc_tensor %filter2D_nhwc : tensor<?x?x?x?xf32> 208 bufferization.dealloc_tensor %out2D_nhwc : tensor<?x?x?x?xf32> 209 bufferization.dealloc_tensor %out2D_nhwc_CCCD : tensor<?x?x?x?xf32> 210 bufferization.dealloc_tensor %out2D_nhwc_CCCC : tensor<?x?x?x?xf32> 211 bufferization.dealloc_tensor %dense_ret :tensor<?x?x?x?xf32> 212 213 bufferization.dealloc_tensor %in2D_nhwc_CCCC : tensor<?x?x?x?xf32, #CCCC> 214 bufferization.dealloc_tensor %in2D_nhwc_CCCD : tensor<?x?x?x?xf32, #CDCD> 215 bufferization.dealloc_tensor %filter2D_nhwc_CCCC : tensor<?x?x?x?xf32, #CCCC> 216 return 217} 218