1// RUN: mlir-opt %s --transform-interpreter -canonicalize --split-input-file | FileCheck %s 2 3// CHECK-LABEL: func @vector_transfer_ops_0d_memref( 4// CHECK-SAME: %[[MEM:.*]]: memref<f32> 5// CHECK-SAME: %[[VEC:.*]]: vector<1x1x1xf32> 6func.func @vector_transfer_ops_0d_memref(%mem: memref<f32>, %vec: vector<1x1x1xf32>) { 7 %f0 = arith.constant 0.0 : f32 8 9// CHECK-NEXT: %[[S:.*]] = vector.load %[[MEM]][] : memref<f32>, vector<f32> 10 %0 = vector.transfer_read %mem[], %f0 : memref<f32>, vector<f32> 11 12// CHECK-NEXT: vector.store %[[S]], %[[MEM]][] : memref<f32>, vector<f32> 13 vector.transfer_write %0, %mem[] : vector<f32>, memref<f32> 14 15// CHECK-NEXT: vector.store %[[VEC]], %[[MEM]][] : memref<f32>, vector<1x1x1xf32> 16 vector.store %vec, %mem[] : memref<f32>, vector<1x1x1xf32> 17 18 return 19} 20 21// CHECK-LABEL: func @vector_transfer_ops_0d_tensor( 22// CHECK-SAME: %[[SRC:.*]]: tensor<f32> 23func.func @vector_transfer_ops_0d_tensor(%src: tensor<f32>) -> vector<1xf32> { 24 %f0 = arith.constant 0.0 : f32 25 26// CHECK: %[[S:.*]] = vector.transfer_read %[[SRC]][] 27// CHECK: %[[V:.*]] = vector.broadcast %[[S]] : vector<f32> to vector<1xf32> 28 %res = vector.transfer_read %src[], %f0 {in_bounds = [true], permutation_map = affine_map<()->(0)>} : 29 tensor<f32>, vector<1xf32> 30 31// CHECK-NEXT: return %[[V]] 32 return %res: vector<1xf32> 33} 34 35// transfer_read/write are lowered to vector.load/store 36// CHECK-LABEL: func @transfer_to_load( 37// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, 38// CHECK-SAME: %[[IDX:.*]]: index) -> vector<4xf32> { 39// CHECK-NEXT: %[[RES:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>, vector<4xf32> 40// CHECK-NEXT: vector.store %[[RES:.*]], %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>, vector<4xf32> 41// CHECK-NEXT: return %[[RES]] : vector<4xf32> 42// CHECK-NEXT: } 43 44func.func @transfer_to_load(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf32> { 45 %cf0 = arith.constant 0.0 : f32 46 %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<8x8xf32>, vector<4xf32> 47 vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [true]} : vector<4xf32>, memref<8x8xf32> 48 return %res : vector<4xf32> 49} 50 51// Masked transfer_read/write inside are NOT lowered to vector.load/store 52// CHECK-LABEL: func @masked_transfer_to_load( 53// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, 54// CHECK-SAME: %[[IDX:.*]]: index, 55// CHECK-SAME: %[[MASK:.*]]: vector<4xi1>) -> memref<8x8xf32> 56// CHECK-NOT: vector.load 57// CHECK-NOT: vector.store 58// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %arg0[%[[IDX]], %[[IDX]]]{{.*}} : memref<8x8xf32>, vector<4xf32> } : vector<4xi1> -> vector<4xf32> 59// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[MEM]][%[[IDX]], %[[IDX]]]{{.*}} : vector<4xf32>, memref<8x8xf32> } : vector<4xi1> 60 61func.func @masked_transfer_to_load(%mem : memref<8x8xf32>, %idx : index, %mask : vector<4xi1>) -> memref<8x8xf32> { 62 %cf0 = arith.constant 0.0 : f32 63 %read = vector.mask %mask {vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<8x8xf32>, vector<4xf32>} : vector<4xi1> -> vector<4xf32> 64 vector.mask %mask {vector.transfer_write %read, %mem[%idx, %idx] {in_bounds = [true]} : vector<4xf32>, memref<8x8xf32> } : vector<4xi1> 65 return %mem : memref<8x8xf32> 66} 67 68// n-D results are also supported. 69// CHECK-LABEL: func @transfer_2D( 70// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, 71// CHECK-SAME: %[[IDX:.*]]: index) -> vector<2x4xf32> { 72// CHECK-NEXT: %[[RES:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>, vector<2x4xf32> 73// CHECK-NEXT: vector.store %[[RES:.*]], %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>, vector<2x4xf32> 74// CHECK-NEXT: return %[[RES]] : vector<2x4xf32> 75// CHECK-NEXT: } 76 77func.func @transfer_2D(%mem : memref<8x8xf32>, %idx : index) -> vector<2x4xf32> { 78 %cf0 = arith.constant 0.0 : f32 79 %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true, true]} : memref<8x8xf32>, vector<2x4xf32> 80 vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [true, true]} : vector<2x4xf32>, memref<8x8xf32> 81 return %res : vector<2x4xf32> 82} 83 84// Vector element types are supported when the result has the same type. 85// CHECK-LABEL: func @transfer_vector_element( 86// CHECK-SAME: %[[MEM:.*]]: memref<8x8xvector<2x4xf32>>, 87// CHECK-SAME: %[[IDX:.*]]: index) -> vector<2x4xf32> { 88// CHECK-NEXT: %[[RES:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xvector<2x4xf32>>, vector<2x4xf32> 89// CHECK-NEXT: vector.store %[[RES:.*]], %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xvector<2x4xf32>>, vector<2x4xf32> 90// CHECK-NEXT: return %[[RES]] : vector<2x4xf32> 91// CHECK-NEXT: } 92 93func.func @transfer_vector_element(%mem : memref<8x8xvector<2x4xf32>>, %idx : index) -> vector<2x4xf32> { 94 %cf0 = arith.constant dense<0.0> : vector<2x4xf32> 95 %res = vector.transfer_read %mem[%idx, %idx], %cf0 : memref<8x8xvector<2x4xf32>>, vector<2x4xf32> 96 vector.transfer_write %res, %mem[%idx, %idx] : vector<2x4xf32>, memref<8x8xvector<2x4xf32>> 97 return %res : vector<2x4xf32> 98} 99 100// TODO: Vector element types are not supported yet when the result has a 101// different type. 102// CHECK-LABEL: func @transfer_vector_element_different_types( 103// CHECK-SAME: %[[MEM:.*]]: memref<8x8xvector<2x4xf32>>, 104// CHECK-SAME: %[[IDX:.*]]: index) -> vector<1x2x4xf32> { 105// CHECK-NEXT: %[[CF0:.*]] = arith.constant dense<0.000000e+00> : vector<2x4xf32> 106// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = [true]} : memref<8x8xvector<2x4xf32>>, vector<1x2x4xf32> 107// CHECK-NEXT: vector.transfer_write %[[RES:.*]], %[[MEM]][%[[IDX]], %[[IDX]]] {in_bounds = [true]} : vector<1x2x4xf32>, memref<8x8xvector<2x4xf32>> 108// CHECK-NEXT: return %[[RES]] : vector<1x2x4xf32> 109// CHECK-NEXT: } 110 111func.func @transfer_vector_element_different_types(%mem : memref<8x8xvector<2x4xf32>>, %idx : index) -> vector<1x2x4xf32> { 112 %cf0 = arith.constant dense<0.0> : vector<2x4xf32> 113 %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<8x8xvector<2x4xf32>>, vector<1x2x4xf32> 114 vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [true]} : vector<1x2x4xf32>, memref<8x8xvector<2x4xf32>> 115 return %res : vector<1x2x4xf32> 116} 117 118// TODO: transfer_read/write cannot be lowered because there is a dimension 119// that is not guaranteed to be in-bounds. 120// CHECK-LABEL: func @transfer_2D_not_inbounds( 121// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, 122// CHECK-SAME: %[[IDX:.*]]: index) -> vector<2x4xf32> { 123// CHECK-NEXT: %[[CF0:.*]] = arith.constant 0.000000e+00 : f32 124// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = [true, false]} : memref<8x8xf32>, vector<2x4xf32> 125// CHECK-NEXT: vector.transfer_write %[[RES]], %[[MEM]][%[[IDX]], %[[IDX]]] {in_bounds = [false, true]} : vector<2x4xf32>, memref<8x8xf32> 126// CHECK-NEXT: return %[[RES]] : vector<2x4xf32> 127// CHECK-NEXT: } 128 129func.func @transfer_2D_not_inbounds(%mem : memref<8x8xf32>, %idx : index) -> vector<2x4xf32> { 130 %cf0 = arith.constant 0.0 : f32 131 %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true, false]} : memref<8x8xf32>, vector<2x4xf32> 132 vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [false, true]} : vector<2x4xf32>, memref<8x8xf32> 133 return %res : vector<2x4xf32> 134} 135 136// TODO: transfer_read/write cannot be lowered because they are not guaranteed 137// to be in-bounds. 138// CHECK-LABEL: func @transfer_not_inbounds( 139// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, 140// CHECK-SAME: %[[IDX:.*]]: index) -> vector<4xf32> { 141// CHECK-NEXT: %[[CF0:.*]] = arith.constant 0.000000e+00 : f32 142// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] : memref<8x8xf32>, vector<4xf32> 143// CHECK-NEXT: vector.transfer_write %[[RES]], %[[MEM]][%[[IDX]], %[[IDX]]] : vector<4xf32>, memref<8x8xf32> 144// CHECK-NEXT: return %[[RES]] : vector<4xf32> 145// CHECK-NEXT: } 146 147func.func @transfer_not_inbounds(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf32> { 148 %cf0 = arith.constant 0.0 : f32 149 %res = vector.transfer_read %mem[%idx, %idx], %cf0 : memref<8x8xf32>, vector<4xf32> 150 vector.transfer_write %res, %mem[%idx, %idx] : vector<4xf32>, memref<8x8xf32> 151 return %res : vector<4xf32> 152} 153 154// CHECK-LABEL: func @transfer_nondefault_layout( 155// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32, #{{.*}}>, 156// CHECK-SAME: %[[IDX:.*]]: index) -> vector<4xf32> { 157// CHECK-NEXT: %[[RES:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32, #{{.*}}>, vector<4xf32> 158// CHECK-NEXT: vector.store %[[RES]], %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32, #{{.*}}>, vector<4xf32> 159// CHECK-NEXT: return %[[RES]] : vector<4xf32> 160// CHECK-NEXT: } 161 162#layout = affine_map<(d0, d1) -> (d0*16 + d1)> 163func.func @transfer_nondefault_layout(%mem : memref<8x8xf32, #layout>, %idx : index) -> vector<4xf32> { 164 %cf0 = arith.constant 0.0 : f32 165 %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<8x8xf32, #layout>, vector<4xf32> 166 vector.transfer_write %res, %mem[%idx, %idx] {in_bounds = [true]} : vector<4xf32>, memref<8x8xf32, #layout> 167 return %res : vector<4xf32> 168} 169 170// TODO: transfer_read/write cannot be lowered to vector.load/store yet when the 171// permutation map is not the minor identity map (up to broadcasting). 172// CHECK-LABEL: func @transfer_perm_map( 173// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, 174// CHECK-SAME: %[[IDX:.*]]: index) -> vector<4xf32> { 175// CHECK-NEXT: %[[CF0:.*]] = arith.constant 0.000000e+00 : f32 176// CHECK-NEXT: %[[RES:.*]] = vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[CF0]] {in_bounds = [true], permutation_map = #{{.*}}} : memref<8x8xf32>, vector<4xf32> 177// CHECK-NEXT: return %[[RES]] : vector<4xf32> 178// CHECK-NEXT: } 179 180func.func @transfer_perm_map(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf32> { 181 %cf0 = arith.constant 0.0 : f32 182 %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true], permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<8x8xf32>, vector<4xf32> 183 return %res : vector<4xf32> 184} 185 186// Lowering of transfer_read with broadcasting is supported (note that a `load` 187// is generated instead of a `vector.load`). 188// CHECK-LABEL: func @transfer_broadcasting( 189// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, 190// CHECK-SAME: %[[IDX:.*]]: index) -> vector<4xf32> { 191// CHECK-NEXT: %[[LOAD:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>, vector<1xf32> 192// CHECK-NEXT: %[[RES:.*]] = vector.broadcast %[[LOAD]] : vector<1xf32> to vector<4xf32> 193// CHECK-NEXT: return %[[RES]] : vector<4xf32> 194// CHECK-NEXT: } 195 196#broadcast_1d = affine_map<(d0, d1) -> (0)> 197func.func @transfer_broadcasting(%mem : memref<8x8xf32>, %idx : index) -> vector<4xf32> { 198 %cf0 = arith.constant 0.0 : f32 199 %res = vector.transfer_read %mem[%idx, %idx], %cf0 200 {in_bounds = [true], permutation_map = #broadcast_1d} 201 : memref<8x8xf32>, vector<4xf32> 202 return %res : vector<4xf32> 203} 204 205// CHECK-LABEL: func @transfer_scalar( 206// CHECK-SAME: %[[MEM:.*]]: memref<?x?xf32>, 207// CHECK-SAME: %[[IDX:.*]]: index) -> vector<1xf32> { 208// CHECK-NEXT: %[[RES:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<?x?xf32>, vector<1xf32> 209// CHECK-NEXT: return %[[RES]] : vector<1xf32> 210// CHECK-NEXT: } 211func.func @transfer_scalar(%mem : memref<?x?xf32>, %idx : index) -> vector<1xf32> { 212 %cf0 = arith.constant 0.0 : f32 213 %res = vector.transfer_read %mem[%idx, %idx], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<1xf32> 214 return %res : vector<1xf32> 215} 216 217// An example with two broadcasted dimensions. 218// CHECK-LABEL: func @transfer_broadcasting_2D( 219// CHECK-SAME: %[[MEM:.*]]: memref<8x8xf32>, 220// CHECK-SAME: %[[IDX:.*]]: index) -> vector<4x4xf32> { 221// CHECK-NEXT: %[[LOAD:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>, vector<1x1xf32> 222// CHECK-NEXT: %[[RES:.*]] = vector.broadcast %[[LOAD]] : vector<1x1xf32> to vector<4x4xf32> 223// CHECK-NEXT: return %[[RES]] : vector<4x4xf32> 224// CHECK-NEXT: } 225 226#broadcast_2d = affine_map<(d0, d1) -> (0, 0)> 227func.func @transfer_broadcasting_2D(%mem : memref<8x8xf32>, %idx : index) -> vector<4x4xf32> { 228 %cf0 = arith.constant 0.0 : f32 229 %res = vector.transfer_read %mem[%idx, %idx], %cf0 230 {in_bounds = [true, true], permutation_map = #broadcast_2d} 231 : memref<8x8xf32>, vector<4x4xf32> 232 return %res : vector<4x4xf32> 233} 234 235// More complex broadcasting case (here a `vector.load` is generated). 236// CHECK-LABEL: func @transfer_broadcasting_complex( 237// CHECK-SAME: %[[MEM:.*]]: memref<10x20x30x8x8xf32>, 238// CHECK-SAME: %[[IDX:.*]]: index) -> vector<3x2x4x5xf32> { 239// CHECK-NEXT: %[[LOAD:.*]] = vector.load %[[MEM]][%[[IDX]], %[[IDX]], %[[IDX]], %[[IDX]], %[[IDX]]] : memref<10x20x30x8x8xf32>, vector<3x1x1x5xf32> 240// CHECK-NEXT: %[[RES:.*]] = vector.broadcast %[[LOAD]] : vector<3x1x1x5xf32> to vector<3x2x4x5xf32> 241// CHECK-NEXT: return %[[RES]] : vector<3x2x4x5xf32> 242// CHECK-NEXT: } 243 244#broadcast_2d_in_4d = affine_map<(d0, d1, d2, d3, d4) -> (d1, 0, 0, d4)> 245func.func @transfer_broadcasting_complex(%mem : memref<10x20x30x8x8xf32>, %idx : index) -> vector<3x2x4x5xf32> { 246 %cf0 = arith.constant 0.0 : f32 247 %res = vector.transfer_read %mem[%idx, %idx, %idx, %idx, %idx], %cf0 248 {in_bounds = [true, true, true, true], permutation_map = #broadcast_2d_in_4d} 249 : memref<10x20x30x8x8xf32>, vector<3x2x4x5xf32> 250 return %res : vector<3x2x4x5xf32> 251} 252 253 254module attributes {transform.with_named_sequence} { 255 transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) { 256 %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func"> 257 transform.apply_patterns to %func_op { 258 transform.apply_patterns.vector.lower_transfer max_transfer_rank = 99 259 transform.apply_patterns.vector.transfer_permutation_patterns 260 } : !transform.op<"func.func"> 261 transform.yield 262 } 263} 264 265// ----- 266 267#map0 = affine_map<(d0, d1, d2, d3) -> (d1, d0, 0, 0)> 268#map1 = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d0)> 269#map2 = affine_map<(d0, d1, d2, d3) -> (d3, d1, 0, 0)> 270#map3 = affine_map<(d0, d1) -> (d1, d0, 0, 0)> 271#map4 = affine_map<(d0, d1) -> (0, d1, 0, d0)> 272#map5 = affine_map<(d0, d1, d2, d3) -> (d2, d1, d3, d0)> 273#map6 = affine_map<(d0, d1) -> (0)> 274 275// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, 0, 0)> 276// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, 0, d3)> 277 278// CHECK-LABEL: func @transfer_read_permutations 279func.func @transfer_read_permutations(%mem_0 : memref<?x?xf32>, %mem_1 : memref<?x?x?x?xf32>, %m: i1) 280 -> (vector<7x14x8x16xf32>, vector<7x14x8x16xf32>, vector<7x14x8x16xf32>, 281 vector<7x14x8x16xf32>, vector<7x14x8x16xf32>, vector<7x14x8x16xf32>, vector<8xf32>) { 282// CHECK-DAG: %[[CF0:.*]] = arith.constant 0.000000e+00 : f32 283// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 284 %cst = arith.constant 0.000000e+00 : f32 285 %c0 = arith.constant 0 : index 286 287// CHECK: %[[MASK0:.*]] = vector.splat %{{.*}} : vector<14x7xi1> 288 %mask0 = vector.splat %m : vector<14x7xi1> 289 %0 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask0 {in_bounds = [true, false, true, true], permutation_map = #map0} : memref<?x?x?x?xf32>, vector<7x14x8x16xf32> 290// CHECK: vector.transfer_read {{.*}} %[[MASK0]] {in_bounds = [false, true, true, true], permutation_map = #[[$MAP0]]} : memref<?x?x?x?xf32>, vector<14x7x8x16xf32> 291// CHECK: vector.transpose %{{.*}}, [1, 0, 2, 3] : vector<14x7x8x16xf32> to vector<7x14x8x16xf32> 292 293// CHECK: %[[MASK1:.*]] = vector.splat %{{.*}} : vector<16x14xi1> 294 %mask1 = vector.splat %m : vector<16x14xi1> 295 %1 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask1 {in_bounds = [true, false, true, false], permutation_map = #map1} : memref<?x?x?x?xf32>, vector<7x14x8x16xf32> 296// CHECK: vector.transfer_read {{.*}} %[[MASK1]] {in_bounds = [false, false, true, true], permutation_map = #[[$MAP0]]} : memref<?x?x?x?xf32>, vector<16x14x7x8xf32> 297// CHECK: vector.transpose %{{.*}}, [2, 1, 3, 0] : vector<16x14x7x8xf32> to vector<7x14x8x16xf32> 298 299// CHECK: %[[MASK3:.*]] = vector.splat %{{.*}} : vector<14x7xi1> 300 %mask2 = vector.splat %m : vector<14x7xi1> 301 %2 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask2 {in_bounds = [true, false, true, true], permutation_map = #map2} : memref<?x?x?x?xf32>, vector<7x14x8x16xf32> 302// CHECK: vector.transfer_read {{.*}} %[[MASK3]] {in_bounds = [false, true, true], permutation_map = #[[$MAP1]]} : memref<?x?x?x?xf32>, vector<14x16x7xf32> 303// CHECK: vector.broadcast %{{.*}} : vector<14x16x7xf32> to vector<8x14x16x7xf32> 304// CHECK: vector.transpose %{{.*}}, [3, 1, 0, 2] : vector<8x14x16x7xf32> to vector<7x14x8x16xf32> 305 306 %3 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = [false, false, true, true], permutation_map = #map3} : memref<?x?xf32>, vector<7x14x8x16xf32> 307// CHECK: vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF0]] : memref<?x?xf32>, vector<14x7xf32> 308// CHECK: vector.broadcast %{{.*}} : vector<14x7xf32> to vector<8x16x14x7xf32> 309// CHECK: vector.transpose %{{.*}}, [3, 2, 0, 1] : vector<8x16x14x7xf32> to vector<7x14x8x16xf32> 310 311 %4 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = [true, false, true, false], permutation_map = #map4} : memref<?x?xf32>, vector<7x14x8x16xf32> 312// CHECK: vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CF0]] : memref<?x?xf32>, vector<16x14xf32> 313// CHECK: vector.broadcast %{{.*}} : vector<16x14xf32> to vector<7x8x16x14xf32> 314// CHECK: vector.transpose %{{.*}}, [0, 3, 1, 2] : vector<7x8x16x14xf32> to vector<7x14x8x16xf32> 315 316 %5 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst {permutation_map = #map5} : memref<?x?x?x?xf32>, vector<7x14x8x16xf32> 317// CHECK: vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[CF0]] : memref<?x?x?x?xf32>, vector<16x14x7x8xf32> 318// CHECK: vector.transpose %{{.*}}, [2, 1, 3, 0] : vector<16x14x7x8xf32> to vector<7x14x8x16xf32> 319 320 %6 = vector.transfer_read %mem_0[%c0, %c0], %cst {in_bounds = [true], permutation_map = #map6} : memref<?x?xf32>, vector<8xf32> 321// CHECK: vector.load %{{.*}}[%[[C0]], %[[C0]]] : memref<?x?xf32>, vector<1xf32> 322// CHECK: vector.broadcast %{{.*}} : vector<1xf32> to vector<8xf32> 323 324 return %0, %1, %2, %3, %4, %5, %6 : vector<7x14x8x16xf32>, vector<7x14x8x16xf32>, 325 vector<7x14x8x16xf32>, vector<7x14x8x16xf32>, vector<7x14x8x16xf32>, 326 vector<7x14x8x16xf32>, vector<8xf32> 327} 328 329// CHECK-LABEL: func @transfer_write_permutations_tensor_masked 330// CHECK-SAME: %[[DST:.*]]: tensor<?x?x?x?xf32> 331// CHECK-SAME: %[[VEC:.*]]: vector<7x14x8x16xf32> 332// CHECK-SAME: %[[M:.*]]: i1 333func.func @transfer_write_permutations_tensor_masked( 334 %dst : tensor<?x?x?x?xf32>, 335 %vec : vector<7x14x8x16xf32>, %m: i1) -> tensor<?x?x?x?xf32> { 336 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 337 %c0 = arith.constant 0 : index 338 339 // CHECK: %[[MASK:.*]] = vector.splat %[[M]] : vector<16x14x7x8xi1> 340 %mask0 = vector.splat %m : vector<16x14x7x8xi1> 341 %res = vector.transfer_write %vec, %dst[%c0, %c0, %c0, %c0], %mask0 {in_bounds = [true, false, false, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d1, d3, d0)>} : vector<7x14x8x16xf32>, tensor<?x?x?x?xf32> 342 // CHECK: %[[NEW_VEC0:.*]] = vector.transpose %{{.*}} [3, 1, 0, 2] : vector<7x14x8x16xf32> to vector<16x14x7x8xf32> 343 // CHECK: %[[NEW_RES0:.*]] = vector.transfer_write %[[NEW_VEC0]], %[[DST]][%c0, %c0, %c0, %c0], %[[MASK]] {in_bounds = [true, false, true, false]} : vector<16x14x7x8xf32>, tensor<?x?x?x?xf32> 344 345 return %res : tensor<?x?x?x?xf32> 346} 347 348// CHECK-LABEL: func @transfer_write_permutations_memref 349// CHECK-SAME: %[[MEM:.*]]: memref<?x?x?x?xf32> 350// CHECK-SAME: %[[VEC:.*]]: vector<8x16xf32> 351func.func @transfer_write_permutations_memref( 352 %mem : memref<?x?x?x?xf32>, %vec : vector<8x16xf32>) { 353 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 354 %c0 = arith.constant 0 : index 355 356 vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0] {permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)>} : vector<8x16xf32>, memref<?x?x?x?xf32> 357 // CHECK: %[[NEW_VEC0:.*]] = vector.transpose %{{.*}} [1, 0] : vector<8x16xf32> to vector<16x8xf32> 358 // CHECK: vector.transfer_write %[[NEW_VEC0]], %[[MEM]][%c0, %c0, %c0, %c0] : vector<16x8xf32>, memref<?x?x?x?xf32> 359 360 return 361} 362 363// CHECK-LABEL: func @transfer_write_broadcast_unit_dim_tensor 364// CHECK-SAME: %[[DST0:.*]]: tensor<?x?x?x?xf32> 365// CHECK-SAME: %[[VEC0:.*]]: vector<14x8x16xf32> 366func.func @transfer_write_broadcast_unit_dim_tensor( 367 %dst_0 : tensor<?x?x?x?xf32>, %vec_0 : vector<14x8x16xf32>) -> tensor<?x?x?x?xf32> { 368 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 369 %c0 = arith.constant 0 : index 370 371 %res = vector.transfer_write %vec_0, %dst_0[%c0, %c0, %c0, %c0] {in_bounds = [false, false, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>} : vector<14x8x16xf32>, tensor<?x?x?x?xf32> 372 // CHECK: %[[NEW_VEC0:.*]] = vector.broadcast %{{.*}} : vector<14x8x16xf32> to vector<1x14x8x16xf32> 373 // CHECK: %[[NEW_VEC1:.*]] = vector.transpose %[[NEW_VEC0]], [1, 2, 0, 3] : vector<1x14x8x16xf32> to vector<14x8x1x16xf32> 374 // CHECK: %[[NEW_RES0:.*]] = vector.transfer_write %[[NEW_VEC1]], %[[DST0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [false, false, true, true]} : vector<14x8x1x16xf32>, tensor<?x?x?x?xf32> 375 376 return %res : tensor<?x?x?x?xf32> 377} 378 379// CHECK-LABEL: func @transfer_write_broadcast_unit_dim_memref 380// CHECK-SAME: %[[MEM0:.*]]: memref<?x?x?x?xf32> 381// CHECK-SAME: %[[VEC0:.*]]: vector<8x16xf32> 382func.func @transfer_write_broadcast_unit_dim_memref( 383 %mem_0 : memref<?x?x?x?xf32>, %vec_0 : vector<8x16xf32>) { 384 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 385 %c0 = arith.constant 0 : index 386 387 vector.transfer_write %vec_0, %mem_0[%c0, %c0, %c0, %c0] {permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)>} : vector<8x16xf32>, memref<?x?x?x?xf32> 388 // CHECK: %[[NEW_VEC0:.*]] = vector.broadcast %{{.*}} : vector<8x16xf32> to vector<1x8x16xf32> 389 // CHECK: %[[NEW_VEC1:.*]] = vector.transpose %[[NEW_VEC0]], [1, 2, 0] : vector<1x8x16xf32> to vector<8x16x1xf32> 390 // CHECK: vector.transfer_write %[[NEW_VEC1]], %[[MEM0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [false, false, true]} : vector<8x16x1xf32>, memref<?x?x?x?xf32> 391 392 return 393} 394 395module attributes {transform.with_named_sequence} { 396 transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) { 397 %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func"> 398 transform.apply_patterns to %func_op { 399 transform.apply_patterns.vector.lower_transfer max_transfer_rank = 99 400 transform.apply_patterns.vector.transfer_permutation_patterns 401 } : !transform.op<"func.func"> 402 transform.yield 403 } 404} 405 406// ----- 407 408/// Verify vector.maskedload and vector.maskedstore ops that operate on 1-D 409/// vectors aren't generated for rank > 1 transfer ops. 410 411// CHECK-LABEL: @transfer_2D_masked 412// CHECK-NOT: vector.maskedload 413// CHECK-NOT: vector.maskedstore 414// CHECK: vector.transfer_read 415// CHECK: vector.transfer_write 416func.func @transfer_2D_masked(%mem : memref<?x?xf32>, %mask : vector<2x4xi1>) -> vector<2x4xf32> { 417 %c0 = arith.constant 0 : index 418 %pad = arith.constant 0.0 : f32 419 %res = vector.transfer_read %mem[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : memref<?x?xf32>, vector<2x4xf32> 420 vector.transfer_write %res, %mem[%c0, %c0], %mask {in_bounds = [true, true]} : vector<2x4xf32>, memref<?x?xf32> 421 return %res : vector<2x4xf32> 422} 423 424module attributes {transform.with_named_sequence} { 425 transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) { 426 %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func"> 427 transform.apply_patterns to %func_op { 428 transform.apply_patterns.vector.lower_transfer max_transfer_rank = 2 429 } : !transform.op<"func.func"> 430 transform.yield 431 } 432} 433