1// RUN: mlir-opt %s -test-lower-to-llvm | \ 2// RUN: mlir-runner -e entry -entry-point-result=void \ 3// RUN: -shared-libs=%mlir_c_runner_utils | \ 4// RUN: FileCheck %s 5 6// Illustrates an 8x8 Sparse Matrix x Vector implemented with only operations 7// of the vector dialect (and some std/scf). Essentially, this example performs 8// the following multiplication: 9// 10// 0 1 2 3 4 5 6 7 11// +------------------------+ 12// 0 | 1 0 2 0 0 1 0 1 | | 1 | | 21 | 13// 1 | 1 8 0 0 3 0 1 0 | | 2 | | 39 | 14// 2 | 0 0 1 0 0 2 6 2 | | 3 | | 73 | 15// 3 | 0 3 0 1 0 1 0 1 | x | 4 | = | 24 | 16// 4 | 5 0 0 1 1 1 0 0 | | 5 | | 20 | 17// 5 | 0 3 0 0 2 1 2 0 | | 6 | | 36 | 18// 6 | 4 0 7 0 1 0 1 0 | | 7 | | 37 | 19// 7 | 0 3 0 2 0 0 1 1 | | 8 | | 29 | 20// +------------------------+ 21// 22// The sparse storage scheme used is an extended column scheme (also referred 23// to as jagged diagonal, which is essentially a vector friendly variant of 24// the general sparse row-wise scheme (also called compressed row storage), 25// using fixed length vectors and no explicit pointer indexing into the 26// value array to find the rows. 27// 28// The extended column storage for the matrix shown above is as follows. 29// 30// VALUE INDEX 31// +---------+ +---------+ 32// 0 | 1 2 1 1 | | 0 2 5 7 | 33// 1 | 1 8 3 1 | | 0 1 4 6 | 34// 2 | 1 2 6 2 | | 2 5 6 7 | 35// 3 | 3 1 1 1 | | 1 3 5 7 | 36// 4 | 5 1 1 1 | | 0 3 4 5 | 37// 5 | 3 2 1 2 | | 1 4 5 6 | 38// 6 | 4 7 1 1 | | 0 2 4 6 | 39// 7 | 3 2 1 1 | | 1 3 6 7 | 40// +---------+ +---------+ 41// 42// This example illustrates a DOT version for the operation. Another example 43// in this directory illustrates an effective SAXPY version that operates on the 44// transposed jagged diagonal storage to obtain higher vector lengths. 45 46#contraction_accesses = [ 47 affine_map<(i) -> (i)>, 48 affine_map<(i) -> (i)>, 49 affine_map<(i) -> ()> 50] 51#dot_trait = { 52 indexing_maps = #contraction_accesses, 53 iterator_types = ["reduction"] 54} 55 56func.func @spmv8x8(%AVAL: memref<8xvector<4xf32>>, 57 %AIDX: memref<8xvector<4xi32>>, %X: memref<?xf32>, %B: memref<?xf32>) { 58 %c0 = arith.constant 0 : index 59 %c1 = arith.constant 1 : index 60 %cn = arith.constant 8 : index 61 %f0 = arith.constant 0.0 : f32 62 %mask = vector.constant_mask [4] : vector<4xi1> 63 %pass = vector.broadcast %f0 : f32 to vector<4xf32> 64 scf.for %i = %c0 to %cn step %c1 { 65 %aval = memref.load %AVAL[%i] : memref<8xvector<4xf32>> 66 %aidx = memref.load %AIDX[%i] : memref<8xvector<4xi32>> 67 %0 = vector.gather %X[%c0][%aidx], %mask, %pass 68 : memref<?xf32>, vector<4xi32>, vector<4xi1>, vector<4xf32> into vector<4xf32> 69 %1 = vector.contract #dot_trait %aval, %0, %f0 : vector<4xf32>, vector<4xf32> into f32 70 memref.store %1, %B[%i] : memref<?xf32> 71 } 72 return 73} 74 75func.func @entry() { 76 %c0 = arith.constant 0 : index 77 %c1 = arith.constant 1 : index 78 %c2 = arith.constant 2 : index 79 %c3 = arith.constant 3 : index 80 %c4 = arith.constant 4 : index 81 %c5 = arith.constant 5 : index 82 %c6 = arith.constant 6 : index 83 %c7 = arith.constant 7 : index 84 %c8 = arith.constant 8 : index 85 86 %f0 = arith.constant 0.0 : f32 87 %f1 = arith.constant 1.0 : f32 88 %f2 = arith.constant 2.0 : f32 89 %f3 = arith.constant 3.0 : f32 90 %f4 = arith.constant 4.0 : f32 91 %f5 = arith.constant 5.0 : f32 92 %f6 = arith.constant 6.0 : f32 93 %f7 = arith.constant 7.0 : f32 94 %f8 = arith.constant 8.0 : f32 95 96 %i0 = arith.constant 0 : i32 97 %i1 = arith.constant 1 : i32 98 %i2 = arith.constant 2 : i32 99 %i3 = arith.constant 3 : i32 100 %i4 = arith.constant 4 : i32 101 %i5 = arith.constant 5 : i32 102 %i6 = arith.constant 6 : i32 103 %i7 = arith.constant 7 : i32 104 105 // 106 // Allocate. 107 // 108 109 %AVAL = memref.alloc() {alignment = 64} : memref<8xvector<4xf32>> 110 %AIDX = memref.alloc() {alignment = 64} : memref<8xvector<4xi32>> 111 %X = memref.alloc(%c8) {alignment = 64} : memref<?xf32> 112 %B = memref.alloc(%c8) {alignment = 64} : memref<?xf32> 113 114 // 115 // Initialize. 116 // 117 118 %vf1 = vector.broadcast %f1 : f32 to vector<4xf32> 119 120 %0 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32> 121 memref.store %0, %AVAL[%c0] : memref<8xvector<4xf32>> 122 123 %1 = vector.insert %f8, %vf1[1] : f32 into vector<4xf32> 124 %2 = vector.insert %f3, %1[2] : f32 into vector<4xf32> 125 memref.store %2, %AVAL[%c1] : memref<8xvector<4xf32>> 126 127 %3 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32> 128 %4 = vector.insert %f6, %3[2] : f32 into vector<4xf32> 129 %5 = vector.insert %f2, %4[3] : f32 into vector<4xf32> 130 memref.store %5, %AVAL[%c2] : memref<8xvector<4xf32>> 131 132 %6 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32> 133 memref.store %6, %AVAL[%c3] : memref<8xvector<4xf32>> 134 135 %7 = vector.insert %f5, %vf1[0] : f32 into vector<4xf32> 136 memref.store %7, %AVAL[%c4] : memref<8xvector<4xf32>> 137 138 %8 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32> 139 %9 = vector.insert %f2, %8[1] : f32 into vector<4xf32> 140 %10 = vector.insert %f2, %9[3] : f32 into vector<4xf32> 141 memref.store %10, %AVAL[%c5] : memref<8xvector<4xf32>> 142 143 %11 = vector.insert %f4, %vf1[0] : f32 into vector<4xf32> 144 %12 = vector.insert %f7, %11[1] : f32 into vector<4xf32> 145 memref.store %12, %AVAL[%c6] : memref<8xvector<4xf32>> 146 147 %13 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32> 148 %14 = vector.insert %f2, %13[1] : f32 into vector<4xf32> 149 memref.store %14, %AVAL[%c7] : memref<8xvector<4xf32>> 150 151 %vi0 = vector.broadcast %i0 : i32 to vector<4xi32> 152 153 %20 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32> 154 %21 = vector.insert %i5, %20[2] : i32 into vector<4xi32> 155 %22 = vector.insert %i7, %21[3] : i32 into vector<4xi32> 156 memref.store %22, %AIDX[%c0] : memref<8xvector<4xi32>> 157 158 %23 = vector.insert %i1, %vi0[1] : i32 into vector<4xi32> 159 %24 = vector.insert %i4, %23[2] : i32 into vector<4xi32> 160 %25 = vector.insert %i6, %24[3] : i32 into vector<4xi32> 161 memref.store %25, %AIDX[%c1] : memref<8xvector<4xi32>> 162 163 %26 = vector.insert %i2, %vi0[0] : i32 into vector<4xi32> 164 %27 = vector.insert %i5, %26[1] : i32 into vector<4xi32> 165 %28 = vector.insert %i6, %27[2] : i32 into vector<4xi32> 166 %29 = vector.insert %i7, %28[3] : i32 into vector<4xi32> 167 memref.store %29, %AIDX[%c2] : memref<8xvector<4xi32>> 168 169 %30 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32> 170 %31 = vector.insert %i3, %30[1] : i32 into vector<4xi32> 171 %32 = vector.insert %i5, %31[2] : i32 into vector<4xi32> 172 %33 = vector.insert %i7, %32[3] : i32 into vector<4xi32> 173 memref.store %33, %AIDX[%c3] : memref<8xvector<4xi32>> 174 175 %34 = vector.insert %i3, %vi0[1] : i32 into vector<4xi32> 176 %35 = vector.insert %i4, %34[2] : i32 into vector<4xi32> 177 %36 = vector.insert %i5, %35[3] : i32 into vector<4xi32> 178 memref.store %36, %AIDX[%c4] : memref<8xvector<4xi32>> 179 180 %37 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32> 181 %38 = vector.insert %i4, %37[1] : i32 into vector<4xi32> 182 %39 = vector.insert %i5, %38[2] : i32 into vector<4xi32> 183 %40 = vector.insert %i6, %39[3] : i32 into vector<4xi32> 184 memref.store %40, %AIDX[%c5] : memref<8xvector<4xi32>> 185 186 %41 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32> 187 %42 = vector.insert %i4, %41[2] : i32 into vector<4xi32> 188 %43 = vector.insert %i6, %42[3] : i32 into vector<4xi32> 189 memref.store %43, %AIDX[%c6] : memref<8xvector<4xi32>> 190 191 %44 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32> 192 %45 = vector.insert %i3, %44[1] : i32 into vector<4xi32> 193 %46 = vector.insert %i6, %45[2] : i32 into vector<4xi32> 194 %47 = vector.insert %i7, %46[3] : i32 into vector<4xi32> 195 memref.store %47, %AIDX[%c7] : memref<8xvector<4xi32>> 196 197 scf.for %i = %c0 to %c8 step %c1 { 198 %ix = arith.addi %i, %c1 : index 199 %kx = arith.index_cast %ix : index to i32 200 %fx = arith.sitofp %kx : i32 to f32 201 memref.store %fx, %X[%i] : memref<?xf32> 202 memref.store %f0, %B[%i] : memref<?xf32> 203 } 204 205 // 206 // Multiply. 207 // 208 209 call @spmv8x8(%AVAL, %AIDX, %X, %B) : (memref<8xvector<4xf32>>, 210 memref<8xvector<4xi32>>, 211 memref<?xf32>, memref<?xf32>) -> () 212 213 // 214 // Print and verify. 215 // 216 217 scf.for %i = %c0 to %c8 step %c1 { 218 %aval = memref.load %AVAL[%i] : memref<8xvector<4xf32>> 219 vector.print %aval : vector<4xf32> 220 } 221 222 scf.for %i = %c0 to %c8 step %c1 { 223 %aidx = memref.load %AIDX[%i] : memref<8xvector<4xi32>> 224 vector.print %aidx : vector<4xi32> 225 } 226 227 scf.for %i = %c0 to %c8 step %c1 { 228 %ldb = memref.load %B[%i] : memref<?xf32> 229 vector.print %ldb : f32 230 } 231 232 // 233 // CHECK: ( 1, 2, 1, 1 ) 234 // CHECK-NEXT: ( 1, 8, 3, 1 ) 235 // CHECK-NEXT: ( 1, 2, 6, 2 ) 236 // CHECK-NEXT: ( 3, 1, 1, 1 ) 237 // CHECK-NEXT: ( 5, 1, 1, 1 ) 238 // CHECK-NEXT: ( 3, 2, 1, 2 ) 239 // CHECK-NEXT: ( 4, 7, 1, 1 ) 240 // CHECK-NEXT: ( 3, 2, 1, 1 ) 241 // 242 // CHECK-NEXT: ( 0, 2, 5, 7 ) 243 // CHECK-NEXT: ( 0, 1, 4, 6 ) 244 // CHECK-NEXT: ( 2, 5, 6, 7 ) 245 // CHECK-NEXT: ( 1, 3, 5, 7 ) 246 // CHECK-NEXT: ( 0, 3, 4, 5 ) 247 // CHECK-NEXT: ( 1, 4, 5, 6 ) 248 // CHECK-NEXT: ( 0, 2, 4, 6 ) 249 // CHECK-NEXT: ( 1, 3, 6, 7 ) 250 // 251 // CHECK-NEXT: 21 252 // CHECK-NEXT: 39 253 // CHECK-NEXT: 73 254 // CHECK-NEXT: 24 255 // CHECK-NEXT: 20 256 // CHECK-NEXT: 36 257 // CHECK-NEXT: 37 258 // CHECK-NEXT: 29 259 // 260 261 // 262 // Free. 263 // 264 265 memref.dealloc %AVAL : memref<8xvector<4xf32>> 266 memref.dealloc %AIDX : memref<8xvector<4xi32>> 267 memref.dealloc %X : memref<?xf32> 268 memref.dealloc %B : memref<?xf32> 269 270 return 271} 272