xref: /llvm-project/mlir/test/Integration/Dialect/Vector/CPU/sparse-dot-matvec.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1// RUN: mlir-opt %s -test-lower-to-llvm  | \
2// RUN: mlir-runner -e entry -entry-point-result=void \
3// RUN:   -shared-libs=%mlir_c_runner_utils | \
4// RUN: FileCheck %s
5
6// Illustrates an 8x8 Sparse Matrix x Vector implemented with only operations
7// of the vector dialect (and some std/scf). Essentially, this example performs
8// the following multiplication:
9//
10//     0  1  2  3  4  5  6  7
11//   +------------------------+
12// 0 | 1  0  2  0  0  1  0  1 |   | 1 |   | 21 |
13// 1 | 1  8  0  0  3  0  1  0 |   | 2 |   | 39 |
14// 2 | 0  0  1  0  0  2  6  2 |   | 3 |   | 73 |
15// 3 | 0  3  0  1  0  1  0  1 | x | 4 | = | 24 |
16// 4 | 5  0  0  1  1  1  0  0 |   | 5 |   | 20 |
17// 5 | 0  3  0  0  2  1  2  0 |   | 6 |   | 36 |
18// 6 | 4  0  7  0  1  0  1  0 |   | 7 |   | 37 |
19// 7 | 0  3  0  2  0  0  1  1 |   | 8 |   | 29 |
20//   +------------------------+
21//
22// The sparse storage scheme used is an extended column scheme (also referred
23// to as jagged diagonal, which is essentially a vector friendly variant of
24// the general sparse row-wise scheme (also called compressed row storage),
25// using fixed length vectors and no explicit pointer indexing into the
26// value array to find the rows.
27//
28// The extended column storage for the matrix shown above is as follows.
29//
30//      VALUE           INDEX
31//   +---------+     +---------+
32// 0 | 1 2 1 1 |     | 0 2 5 7 |
33// 1 | 1 8 3 1 |     | 0 1 4 6 |
34// 2 | 1 2 6 2 |     | 2 5 6 7 |
35// 3 | 3 1 1 1 |     | 1 3 5 7 |
36// 4 | 5 1 1 1 |     | 0 3 4 5 |
37// 5 | 3 2 1 2 |     | 1 4 5 6 |
38// 6 | 4 7 1 1 |     | 0 2 4 6 |
39// 7 | 3 2 1 1 |     | 1 3 6 7 |
40//   +---------+     +---------+
41//
42// This example illustrates a DOT version for the operation. Another example
43// in this directory illustrates an effective SAXPY version that operates on the
44// transposed jagged diagonal storage to obtain higher vector lengths.
45
46#contraction_accesses = [
47  affine_map<(i) -> (i)>,
48  affine_map<(i) -> (i)>,
49  affine_map<(i) -> ()>
50]
51#dot_trait = {
52  indexing_maps = #contraction_accesses,
53  iterator_types = ["reduction"]
54}
55
56func.func @spmv8x8(%AVAL: memref<8xvector<4xf32>>,
57              %AIDX: memref<8xvector<4xi32>>, %X: memref<?xf32>, %B: memref<?xf32>) {
58  %c0 = arith.constant 0 : index
59  %c1 = arith.constant 1 : index
60  %cn = arith.constant 8 : index
61  %f0 = arith.constant 0.0 : f32
62  %mask = vector.constant_mask [4] : vector<4xi1>
63  %pass = vector.broadcast %f0 : f32 to vector<4xf32>
64  scf.for %i = %c0 to %cn step %c1 {
65    %aval = memref.load %AVAL[%i] : memref<8xvector<4xf32>>
66    %aidx = memref.load %AIDX[%i] : memref<8xvector<4xi32>>
67    %0 = vector.gather %X[%c0][%aidx], %mask, %pass
68       : memref<?xf32>, vector<4xi32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
69    %1 = vector.contract #dot_trait %aval, %0, %f0 : vector<4xf32>, vector<4xf32> into f32
70    memref.store %1, %B[%i] : memref<?xf32>
71  }
72  return
73}
74
75func.func @entry() {
76  %c0 = arith.constant 0 : index
77  %c1 = arith.constant 1 : index
78  %c2 = arith.constant 2 : index
79  %c3 = arith.constant 3 : index
80  %c4 = arith.constant 4 : index
81  %c5 = arith.constant 5 : index
82  %c6 = arith.constant 6 : index
83  %c7 = arith.constant 7 : index
84  %c8 = arith.constant 8 : index
85
86  %f0 = arith.constant 0.0 : f32
87  %f1 = arith.constant 1.0 : f32
88  %f2 = arith.constant 2.0 : f32
89  %f3 = arith.constant 3.0 : f32
90  %f4 = arith.constant 4.0 : f32
91  %f5 = arith.constant 5.0 : f32
92  %f6 = arith.constant 6.0 : f32
93  %f7 = arith.constant 7.0 : f32
94  %f8 = arith.constant 8.0 : f32
95
96  %i0 = arith.constant 0 : i32
97  %i1 = arith.constant 1 : i32
98  %i2 = arith.constant 2 : i32
99  %i3 = arith.constant 3 : i32
100  %i4 = arith.constant 4 : i32
101  %i5 = arith.constant 5 : i32
102  %i6 = arith.constant 6 : i32
103  %i7 = arith.constant 7 : i32
104
105  //
106  // Allocate.
107  //
108
109  %AVAL = memref.alloc()    {alignment = 64} : memref<8xvector<4xf32>>
110  %AIDX = memref.alloc()    {alignment = 64} : memref<8xvector<4xi32>>
111  %X    = memref.alloc(%c8) {alignment = 64} : memref<?xf32>
112  %B    = memref.alloc(%c8) {alignment = 64} : memref<?xf32>
113
114  //
115  // Initialize.
116  //
117
118  %vf1 = vector.broadcast %f1 : f32 to vector<4xf32>
119
120  %0 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32>
121  memref.store %0, %AVAL[%c0] : memref<8xvector<4xf32>>
122
123  %1 = vector.insert %f8, %vf1[1] : f32 into vector<4xf32>
124  %2 = vector.insert %f3, %1[2]   : f32 into vector<4xf32>
125  memref.store %2, %AVAL[%c1] : memref<8xvector<4xf32>>
126
127  %3 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32>
128  %4 = vector.insert %f6, %3[2]   : f32 into vector<4xf32>
129  %5 = vector.insert %f2, %4[3]   : f32 into vector<4xf32>
130  memref.store %5, %AVAL[%c2] : memref<8xvector<4xf32>>
131
132  %6 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
133  memref.store %6, %AVAL[%c3] : memref<8xvector<4xf32>>
134
135  %7 = vector.insert %f5, %vf1[0] : f32 into vector<4xf32>
136  memref.store %7, %AVAL[%c4] : memref<8xvector<4xf32>>
137
138  %8 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
139  %9 = vector.insert %f2, %8[1]   : f32 into vector<4xf32>
140  %10 = vector.insert %f2, %9[3]   : f32 into vector<4xf32>
141  memref.store %10, %AVAL[%c5] : memref<8xvector<4xf32>>
142
143  %11 = vector.insert %f4, %vf1[0] : f32 into vector<4xf32>
144  %12 = vector.insert %f7, %11[1]   : f32 into vector<4xf32>
145  memref.store %12, %AVAL[%c6] : memref<8xvector<4xf32>>
146
147  %13 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
148  %14 = vector.insert %f2, %13[1]   : f32 into vector<4xf32>
149  memref.store %14, %AVAL[%c7] : memref<8xvector<4xf32>>
150
151  %vi0 = vector.broadcast %i0 : i32 to vector<4xi32>
152
153  %20 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32>
154  %21 = vector.insert %i5, %20[2] : i32 into vector<4xi32>
155  %22 = vector.insert %i7, %21[3] : i32 into vector<4xi32>
156  memref.store %22, %AIDX[%c0] : memref<8xvector<4xi32>>
157
158  %23 = vector.insert %i1, %vi0[1] : i32 into vector<4xi32>
159  %24 = vector.insert %i4, %23[2] : i32 into vector<4xi32>
160  %25 = vector.insert %i6, %24[3] : i32 into vector<4xi32>
161  memref.store %25, %AIDX[%c1] : memref<8xvector<4xi32>>
162
163  %26 = vector.insert %i2, %vi0[0] : i32 into vector<4xi32>
164  %27 = vector.insert %i5, %26[1] : i32 into vector<4xi32>
165  %28 = vector.insert %i6, %27[2] : i32 into vector<4xi32>
166  %29 = vector.insert %i7, %28[3] : i32 into vector<4xi32>
167  memref.store %29, %AIDX[%c2] : memref<8xvector<4xi32>>
168
169  %30 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
170  %31 = vector.insert %i3, %30[1] : i32 into vector<4xi32>
171  %32 = vector.insert %i5, %31[2] : i32 into vector<4xi32>
172  %33 = vector.insert %i7, %32[3] : i32 into vector<4xi32>
173  memref.store %33, %AIDX[%c3] : memref<8xvector<4xi32>>
174
175  %34 = vector.insert %i3, %vi0[1] : i32 into vector<4xi32>
176  %35 = vector.insert %i4, %34[2] : i32 into vector<4xi32>
177  %36 = vector.insert %i5, %35[3] : i32 into vector<4xi32>
178  memref.store %36, %AIDX[%c4] : memref<8xvector<4xi32>>
179
180  %37 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
181  %38 = vector.insert %i4, %37[1] : i32 into vector<4xi32>
182  %39 = vector.insert %i5, %38[2] : i32 into vector<4xi32>
183  %40 = vector.insert %i6, %39[3] : i32 into vector<4xi32>
184  memref.store %40, %AIDX[%c5] : memref<8xvector<4xi32>>
185
186  %41 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32>
187  %42 = vector.insert %i4, %41[2] : i32 into vector<4xi32>
188  %43 = vector.insert %i6, %42[3] : i32 into vector<4xi32>
189  memref.store %43, %AIDX[%c6] : memref<8xvector<4xi32>>
190
191  %44 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
192  %45 = vector.insert %i3, %44[1] : i32 into vector<4xi32>
193  %46 = vector.insert %i6, %45[2] : i32 into vector<4xi32>
194  %47 = vector.insert %i7, %46[3] : i32 into vector<4xi32>
195  memref.store %47, %AIDX[%c7] : memref<8xvector<4xi32>>
196
197  scf.for %i = %c0 to %c8 step %c1 {
198    %ix = arith.addi %i, %c1 : index
199    %kx = arith.index_cast %ix : index to i32
200    %fx = arith.sitofp %kx : i32 to f32
201    memref.store %fx, %X[%i] : memref<?xf32>
202    memref.store %f0, %B[%i] : memref<?xf32>
203  }
204
205  //
206  // Multiply.
207  //
208
209  call @spmv8x8(%AVAL, %AIDX, %X, %B) : (memref<8xvector<4xf32>>,
210                                         memref<8xvector<4xi32>>,
211                                         memref<?xf32>, memref<?xf32>) -> ()
212
213  //
214  // Print and verify.
215  //
216
217  scf.for %i = %c0 to %c8 step %c1 {
218    %aval = memref.load %AVAL[%i] : memref<8xvector<4xf32>>
219    vector.print %aval : vector<4xf32>
220  }
221
222  scf.for %i = %c0 to %c8 step %c1 {
223    %aidx = memref.load %AIDX[%i] : memref<8xvector<4xi32>>
224    vector.print %aidx : vector<4xi32>
225  }
226
227  scf.for %i = %c0 to %c8 step %c1 {
228    %ldb = memref.load %B[%i] : memref<?xf32>
229    vector.print %ldb : f32
230  }
231
232  //
233  // CHECK:      ( 1, 2, 1, 1 )
234  // CHECK-NEXT: ( 1, 8, 3, 1 )
235  // CHECK-NEXT: ( 1, 2, 6, 2 )
236  // CHECK-NEXT: ( 3, 1, 1, 1 )
237  // CHECK-NEXT: ( 5, 1, 1, 1 )
238  // CHECK-NEXT: ( 3, 2, 1, 2 )
239  // CHECK-NEXT: ( 4, 7, 1, 1 )
240  // CHECK-NEXT: ( 3, 2, 1, 1 )
241  //
242  // CHECK-NEXT: ( 0, 2, 5, 7 )
243  // CHECK-NEXT: ( 0, 1, 4, 6 )
244  // CHECK-NEXT: ( 2, 5, 6, 7 )
245  // CHECK-NEXT: ( 1, 3, 5, 7 )
246  // CHECK-NEXT: ( 0, 3, 4, 5 )
247  // CHECK-NEXT: ( 1, 4, 5, 6 )
248  // CHECK-NEXT: ( 0, 2, 4, 6 )
249  // CHECK-NEXT: ( 1, 3, 6, 7 )
250  //
251  // CHECK-NEXT: 21
252  // CHECK-NEXT: 39
253  // CHECK-NEXT: 73
254  // CHECK-NEXT: 24
255  // CHECK-NEXT: 20
256  // CHECK-NEXT: 36
257  // CHECK-NEXT: 37
258  // CHECK-NEXT: 29
259  //
260
261  //
262  // Free.
263  //
264
265  memref.dealloc %AVAL : memref<8xvector<4xf32>>
266  memref.dealloc %AIDX : memref<8xvector<4xi32>>
267  memref.dealloc %X    : memref<?xf32>
268  memref.dealloc %B    : memref<?xf32>
269
270  return
271}
272