xref: /llvm-project/mlir/test/Integration/Dialect/Vector/CPU/transfer-to-loops.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,finalize-memref-to-llvm,convert-func-to-llvm,convert-arith-to-llvm,convert-cf-to-llvm,reconcile-unrealized-casts)" | \
2// RUN: mlir-runner -e main -entry-point-result=void  \
3// RUN:   -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \
4// RUN: FileCheck %s
5
6// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,finalize-memref-to-llvm,convert-func-to-llvm,convert-arith-to-llvm,convert-cf-to-llvm,reconcile-unrealized-casts)" | \
7// RUN: mlir-runner -e main -entry-point-result=void  \
8// RUN:   -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \
9// RUN: FileCheck %s
10
11#map0 = affine_map<(d0, d1) -> (d1, d0)>
12#map1 = affine_map<(d0, d1) -> (d1)>
13
14func.func private @printMemrefF32(memref<*xf32>)
15
16func.func @alloc_2d_filled_f32(%arg0: index, %arg1: index) -> memref<?x?xf32> {
17  %c0 = arith.constant 0 : index
18  %c1 = arith.constant 1 : index
19  %c10 = arith.constant 10 : index
20  %c100 = arith.constant 100 : index
21  %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
22  scf.for %arg5 = %c0 to %arg0 step %c1 {
23    scf.for %arg6 = %c0 to %arg1 step %c1 {
24      %arg66 = arith.muli %arg6, %c100 : index
25      %tmp1 = arith.addi %arg5, %arg66 : index
26      %tmp2 = arith.index_cast %tmp1 : index to i32
27      %tmp3 = arith.sitofp %tmp2 : i32 to f32
28      memref.store %tmp3, %0[%arg5, %arg6] : memref<?x?xf32>
29    }
30  }
31  return %0 : memref<?x?xf32>
32}
33
34func.func @main() {
35  %c0 = arith.constant 0 : index
36  %c1 = arith.constant 1 : index
37  %c2 = arith.constant 2 : index
38  %c3 = arith.constant 3 : index
39  %c6 = arith.constant 6 : index
40  %cst = arith.constant -4.2e+01 : f32
41  %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref<?x?xf32>
42  %converted = memref.cast %0 : memref<?x?xf32> to memref<*xf32>
43  call @printMemrefF32(%converted): (memref<*xf32>) -> ()
44  // CHECK:      Unranked{{.*}}data =
45  // CHECK:      [
46  // CHECK-SAME:  [0,   100,   200,   300,   400,   500],
47  // CHECK-NEXT:  [1,   101,   201,   301,   401,   501],
48  // CHECK-NEXT:  [2,   102,   202,   302,   402,   502],
49  // CHECK-NEXT:  [3,   103,   203,   303,   403,   503],
50  // CHECK-NEXT:  [4,   104,   204,   304,   404,   504],
51  // CHECK-NEXT:  [5,   105,   205,   305,   405,   505]]
52
53  %init = vector.transfer_read %0[%c1, %c1], %cst : memref<?x?xf32>, vector<5x5xf32>
54  vector.print %init : vector<5x5xf32>
55  // 5x5 block rooted at {1, 1}
56  // CHECK-NEXT:  ( ( 101, 201, 301, 401, 501 ),
57  // CHECK-SAME:    ( 102, 202, 302, 402, 502 ),
58  // CHECK-SAME:    ( 103, 203, 303, 403, 503 ),
59  // CHECK-SAME:    ( 104, 204, 304, 404, 504 ),
60  // CHECK-SAME:    ( 105, 205, 305, 405, 505 ) )
61
62  %1 = vector.transfer_read %0[%c1, %c1], %cst {permutation_map = #map0} : memref<?x?xf32>, vector<5x5xf32>
63  vector.print %1 : vector<5x5xf32>
64  // Transposed 5x5 block rooted @{1, 1} in memory.
65  // CHECK-NEXT:  ( ( 101, 102, 103, 104, 105 ),
66  // CHECK-SAME:    ( 201, 202, 203, 204, 205 ),
67  // CHECK-SAME:    ( 301, 302, 303, 304, 305 ),
68  // CHECK-SAME:    ( 401, 402, 403, 404, 405 ),
69  // CHECK-SAME:    ( 501, 502, 503, 504, 505 ) )
70
71  // Transpose-write the transposed 5x5 block @{0, 0} in memory.
72  vector.transfer_write %1, %0[%c0, %c0] {permutation_map = #map0} : vector<5x5xf32>, memref<?x?xf32>
73
74  %2 = vector.transfer_read %0[%c1, %c1], %cst : memref<?x?xf32>, vector<5x5xf32>
75  vector.print %2 : vector<5x5xf32>
76  // New 5x5 block rooted @{1, 1} in memory.
77  // Here we expect the boundaries from the original data
78  //   (i.e. last row: 105 .. 505, last col: 501 .. 505)
79  // and the 4x4 subblock 202 .. 505 rooted @{0, 0} in the vector
80  // CHECK-NEXT:  ( ( 202, 302, 402, 502, 501 ),
81  // CHECK-SAME:    ( 203, 303, 403, 503, 502 ),
82  // CHECK-SAME:    ( 204, 304, 404, 504, 503 ),
83  // CHECK-SAME:    ( 205, 305, 405, 505, 504 ),
84  // CHECK-SAME:    ( 105, 205, 305, 405, 505 ) )
85
86  %3 = vector.transfer_read %0[%c2, %c3], %cst : memref<?x?xf32>, vector<5x5xf32>
87  vector.print %3 : vector<5x5xf32>
88  // New 5x5 block rooted @{2, 3} in memory.
89  // CHECK-NEXT: ( ( 403, 503, 502, -42, -42 ),
90  // CHECK-SAME:   ( 404, 504, 503, -42, -42 ),
91  // CHECK-SAME:   ( 405, 505, 504, -42, -42 ),
92  // CHECK-SAME:   ( 305, 405, 505, -42, -42 ),
93  // CHECK-SAME:   ( -42, -42, -42, -42, -42 ) )
94
95  %4 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map0} : memref<?x?xf32>, vector<5x5xf32>
96  vector.print %4 : vector<5x5xf32>
97  // Transposed 5x5 block rooted @{2, 3} in memory.
98  // CHECK-NEXT: ( ( 403, 404, 405, 305, -42 ),
99  // CHECK-SAME:   ( 503, 504, 505, 405, -42 ),
100  // CHECK-SAME:   ( 502, 503, 504, 505, -42 ),
101  // CHECK-SAME:   ( -42, -42, -42, -42, -42 ),
102  // CHECK-SAME:   ( -42, -42, -42, -42, -42 ) )
103
104  %5 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map1} : memref<?x?xf32>, vector<5xf32>
105  vector.print %5 : vector<5xf32>
106  // CHECK-NEXT: ( 403, 503, 502, -42, -42 )
107
108  memref.dealloc %0 : memref<?x?xf32>
109  return
110}
111