1// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,finalize-memref-to-llvm,convert-func-to-llvm,convert-arith-to-llvm,convert-cf-to-llvm,reconcile-unrealized-casts)" | \ 2// RUN: mlir-runner -e main -entry-point-result=void \ 3// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \ 4// RUN: FileCheck %s 5 6// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,finalize-memref-to-llvm,convert-func-to-llvm,convert-arith-to-llvm,convert-cf-to-llvm,reconcile-unrealized-casts)" | \ 7// RUN: mlir-runner -e main -entry-point-result=void \ 8// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \ 9// RUN: FileCheck %s 10 11#map0 = affine_map<(d0, d1) -> (d1, d0)> 12#map1 = affine_map<(d0, d1) -> (d1)> 13 14func.func private @printMemrefF32(memref<*xf32>) 15 16func.func @alloc_2d_filled_f32(%arg0: index, %arg1: index) -> memref<?x?xf32> { 17 %c0 = arith.constant 0 : index 18 %c1 = arith.constant 1 : index 19 %c10 = arith.constant 10 : index 20 %c100 = arith.constant 100 : index 21 %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> 22 scf.for %arg5 = %c0 to %arg0 step %c1 { 23 scf.for %arg6 = %c0 to %arg1 step %c1 { 24 %arg66 = arith.muli %arg6, %c100 : index 25 %tmp1 = arith.addi %arg5, %arg66 : index 26 %tmp2 = arith.index_cast %tmp1 : index to i32 27 %tmp3 = arith.sitofp %tmp2 : i32 to f32 28 memref.store %tmp3, %0[%arg5, %arg6] : memref<?x?xf32> 29 } 30 } 31 return %0 : memref<?x?xf32> 32} 33 34func.func @main() { 35 %c0 = arith.constant 0 : index 36 %c1 = arith.constant 1 : index 37 %c2 = arith.constant 2 : index 38 %c3 = arith.constant 3 : index 39 %c6 = arith.constant 6 : index 40 %cst = arith.constant -4.2e+01 : f32 41 %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref<?x?xf32> 42 %converted = memref.cast %0 : memref<?x?xf32> to memref<*xf32> 43 call @printMemrefF32(%converted): (memref<*xf32>) -> () 44 // CHECK: Unranked{{.*}}data = 45 // CHECK: [ 46 // CHECK-SAME: [0, 100, 200, 300, 400, 500], 47 // CHECK-NEXT: [1, 101, 201, 301, 401, 501], 48 // CHECK-NEXT: [2, 102, 202, 302, 402, 502], 49 // CHECK-NEXT: [3, 103, 203, 303, 403, 503], 50 // CHECK-NEXT: [4, 104, 204, 304, 404, 504], 51 // CHECK-NEXT: [5, 105, 205, 305, 405, 505]] 52 53 %init = vector.transfer_read %0[%c1, %c1], %cst : memref<?x?xf32>, vector<5x5xf32> 54 vector.print %init : vector<5x5xf32> 55 // 5x5 block rooted at {1, 1} 56 // CHECK-NEXT: ( ( 101, 201, 301, 401, 501 ), 57 // CHECK-SAME: ( 102, 202, 302, 402, 502 ), 58 // CHECK-SAME: ( 103, 203, 303, 403, 503 ), 59 // CHECK-SAME: ( 104, 204, 304, 404, 504 ), 60 // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) 61 62 %1 = vector.transfer_read %0[%c1, %c1], %cst {permutation_map = #map0} : memref<?x?xf32>, vector<5x5xf32> 63 vector.print %1 : vector<5x5xf32> 64 // Transposed 5x5 block rooted @{1, 1} in memory. 65 // CHECK-NEXT: ( ( 101, 102, 103, 104, 105 ), 66 // CHECK-SAME: ( 201, 202, 203, 204, 205 ), 67 // CHECK-SAME: ( 301, 302, 303, 304, 305 ), 68 // CHECK-SAME: ( 401, 402, 403, 404, 405 ), 69 // CHECK-SAME: ( 501, 502, 503, 504, 505 ) ) 70 71 // Transpose-write the transposed 5x5 block @{0, 0} in memory. 72 vector.transfer_write %1, %0[%c0, %c0] {permutation_map = #map0} : vector<5x5xf32>, memref<?x?xf32> 73 74 %2 = vector.transfer_read %0[%c1, %c1], %cst : memref<?x?xf32>, vector<5x5xf32> 75 vector.print %2 : vector<5x5xf32> 76 // New 5x5 block rooted @{1, 1} in memory. 77 // Here we expect the boundaries from the original data 78 // (i.e. last row: 105 .. 505, last col: 501 .. 505) 79 // and the 4x4 subblock 202 .. 505 rooted @{0, 0} in the vector 80 // CHECK-NEXT: ( ( 202, 302, 402, 502, 501 ), 81 // CHECK-SAME: ( 203, 303, 403, 503, 502 ), 82 // CHECK-SAME: ( 204, 304, 404, 504, 503 ), 83 // CHECK-SAME: ( 205, 305, 405, 505, 504 ), 84 // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) 85 86 %3 = vector.transfer_read %0[%c2, %c3], %cst : memref<?x?xf32>, vector<5x5xf32> 87 vector.print %3 : vector<5x5xf32> 88 // New 5x5 block rooted @{2, 3} in memory. 89 // CHECK-NEXT: ( ( 403, 503, 502, -42, -42 ), 90 // CHECK-SAME: ( 404, 504, 503, -42, -42 ), 91 // CHECK-SAME: ( 405, 505, 504, -42, -42 ), 92 // CHECK-SAME: ( 305, 405, 505, -42, -42 ), 93 // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) 94 95 %4 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map0} : memref<?x?xf32>, vector<5x5xf32> 96 vector.print %4 : vector<5x5xf32> 97 // Transposed 5x5 block rooted @{2, 3} in memory. 98 // CHECK-NEXT: ( ( 403, 404, 405, 305, -42 ), 99 // CHECK-SAME: ( 503, 504, 505, 405, -42 ), 100 // CHECK-SAME: ( 502, 503, 504, 505, -42 ), 101 // CHECK-SAME: ( -42, -42, -42, -42, -42 ), 102 // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) 103 104 %5 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map1} : memref<?x?xf32>, vector<5xf32> 105 vector.print %5 : vector<5xf32> 106 // CHECK-NEXT: ( 403, 503, 502, -42, -42 ) 107 108 memref.dealloc %0 : memref<?x?xf32> 109 return 110} 111