1// RUN: mlir-opt %s -test-vector-transferop-opt | FileCheck %s 2 3// CHECK-LABEL: func @forward_dead_store 4// CHECK-NOT: vector.transfer_write 5// CHECK-NOT: vector.transfer_read 6// CHECK: scf.for 7// CHECK: } 8// CHECK: vector.transfer_write 9// CHECK: return 10func.func @forward_dead_store(%arg0: i1, %arg1 : memref<4x4xf32>, 11 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) { 12 %c1 = arith.constant 1 : index 13 %c4 = arith.constant 4 : index 14 %c0 = arith.constant 0 : index 15 %cf0 = arith.constant 0.0 : f32 16 vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : 17 vector<1x4xf32>, memref<4x4xf32> 18 %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} : 19 memref<4x4xf32>, vector<1x4xf32> 20 %x = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%acc = %0) 21 -> (vector<1x4xf32>) { 22 %1 = arith.addf %acc, %acc : vector<1x4xf32> 23 scf.yield %1 : vector<1x4xf32> 24 } 25 vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} : 26 vector<1x4xf32>, memref<4x4xf32> 27 return 28} 29 30// CHECK-LABEL: func @forward_nested 31// CHECK: vector.transfer_write 32// CHECK: vector.transfer_write 33// CHECK: scf.if 34// CHECK-NOT: vector.transfer_read 35// CHECK: } 36// CHECK: vector.transfer_write 37// CHECK: return 38func.func @forward_nested(%arg0: i1, %arg1 : memref<4x4xf32>, %v0 : vector<1x4xf32>, 39 %v1 : vector<1x4xf32>, %i : index) { 40 %c0 = arith.constant 0 : index 41 %c1 = arith.constant 1 : index 42 %cf0 = arith.constant 0.0 : f32 43 vector.transfer_write %v1, %arg1[%i, %c0] {in_bounds = [true, true]} : 44 vector<1x4xf32>, memref<4x4xf32> 45 vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : 46 vector<1x4xf32>, memref<4x4xf32> 47 %x = scf.if %arg0 -> (vector<1x4xf32>) { 48 %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} : 49 memref<4x4xf32>, vector<1x4xf32> 50 scf.yield %0 : vector<1x4xf32> 51 } else { 52 scf.yield %v1 : vector<1x4xf32> 53 } 54 vector.transfer_write %x, %arg1[%c0, %c0] {in_bounds = [true, true]} : 55 vector<1x4xf32>, memref<4x4xf32> 56 return 57} 58 59// Negative test, the transfer_write in the scf.if region block the store to 60// load forwarding because we don't recursively look into the region to realize 61// that the transfer_write cannot reach the transfer_read. 62// CHECK-LABEL: func @forward_nested_negative 63// CHECK: vector.transfer_write 64// CHECK: scf.if 65// CHECK: vector.transfer_read 66// CHECK: } else { 67// CHECK: vector.transfer_write 68// CHECK: } 69// CHECK: vector.transfer_write 70// CHECK: return 71func.func @forward_nested_negative(%arg0: i1, %arg1 : memref<4x4xf32>, 72 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) { 73 %c0 = arith.constant 0 : index 74 %c1 = arith.constant 1 : index 75 %cf0 = arith.constant 0.0 : f32 76 vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : 77 vector<1x4xf32>, memref<4x4xf32> 78 %x = scf.if %arg0 -> (vector<1x4xf32>) { 79 %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} : 80 memref<4x4xf32>, vector<1x4xf32> 81 scf.yield %0 : vector<1x4xf32> 82 } else { 83 vector.transfer_write %v1, %arg1[%i, %c0] {in_bounds = [true, true]} : 84 vector<1x4xf32>, memref<4x4xf32> 85 scf.yield %v1 : vector<1x4xf32> 86 } 87 vector.transfer_write %x, %arg1[%c0, %i] {in_bounds = [true, true]} : 88 vector<1x4xf32>, memref<4x4xf32> 89 return 90} 91 92// CHECK-LABEL: func @dead_store_region 93// CHECK: vector.transfer_write 94// CHECK: scf.if 95// CHECK: } else { 96// CHECK: vector.transfer_read 97// CHECK: } 98// CHECK: scf.if 99// CHECK-NOT: vector.transfer_write 100// CHECK: } 101// CHECK: vector.transfer_write 102// CHECK-NOT: vector.transfer_write 103// CHECK: vector.transfer_read 104// CHECK: return 105func.func @dead_store_region(%arg0: i1, %arg1 : memref<4x4xf32>, 106 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) 107 -> (vector<1x4xf32>) { 108 %c0 = arith.constant 0 : index 109 %c1 = arith.constant 1 : index 110 %cf0 = arith.constant 0.0 : f32 111 vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : 112 vector<1x4xf32>, memref<4x4xf32> 113 %x = scf.if %arg0 -> (vector<1x4xf32>) { 114 scf.yield %v1 : vector<1x4xf32> 115 } else { 116 %0 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} : 117 memref<4x4xf32>, vector<1x4xf32> 118 scf.yield %0 : vector<1x4xf32> 119 } 120 scf.if %arg0 { 121 vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : 122 vector<1x4xf32>, memref<4x4xf32> 123 } 124 vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} : 125 vector<1x4xf32>, memref<4x4xf32> 126 vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} : 127 vector<1x4xf32>, memref<4x4xf32> 128 %1 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} : 129 memref<4x4xf32>, vector<1x4xf32> 130 return %1 : vector<1x4xf32> 131} 132 133// CHECK-LABEL: func @dead_store_negative 134// CHECK: scf.if 135// CHECK: vector.transfer_write 136// CHECK: vector.transfer_read 137// CHECK: } else { 138// CHECK: } 139// CHECK: vector.transfer_write 140// CHECK: return 141func.func @dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>, 142 %v0 :vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) { 143 %c0 = arith.constant 0 : index 144 %c1 = arith.constant 1 : index 145 %cf0 = arith.constant 0.0 : f32 146 %x = scf.if %arg0 -> (vector<1x4xf32>) { 147 vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : 148 vector<1x4xf32>, memref<4x4xf32> 149 %0 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} : 150 memref<4x4xf32>, vector<1x4xf32> 151 scf.yield %0 : vector<1x4xf32> 152 } else { 153 scf.yield %v1 : vector<1x4xf32> 154 } 155 vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} : 156 vector<1x4xf32>, memref<4x4xf32> 157 return 158} 159 160// CHECK-LABEL: func @dead_store_nested_region 161// CHECK: scf.if 162// CHECK: vector.transfer_read 163// CHECK: scf.if 164// CHECK-NOT: vector.transfer_write 165// CHECK: } 166// CHECK: vector.transfer_write 167// CHECK: } 168// CHECK: return 169func.func @dead_store_nested_region(%arg0: i1, %arg1: i1, %arg2 : memref<4x4xf32>, 170 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) { 171 %c0 = arith.constant 0 : index 172 %c1 = arith.constant 1 : index 173 %cf0 = arith.constant 0.0 : f32 174 scf.if %arg0 { 175 %0 = vector.transfer_read %arg2[%i, %c0], %cf0 {in_bounds = [true, true]} : 176 memref<4x4xf32>, vector<1x4xf32> 177 scf.if %arg1 { 178 vector.transfer_write %v1, %arg2[%c1, %c0] {in_bounds = [true, true]} : 179 vector<1x4xf32>, memref<4x4xf32> 180 } 181 vector.transfer_write %v0, %arg2[%c1, %c0] {in_bounds = [true, true]} : 182 vector<1x4xf32>, memref<4x4xf32> 183 } 184 return 185} 186 187// CHECK-LABEL: func @forward_dead_store_negative 188// CHECK: vector.transfer_write 189// CHECK: vector.transfer_write 190// CHECK: vector.transfer_write 191// CHECK: vector.transfer_write 192// CHECK: vector.transfer_read 193// CHECK: vector.transfer_write 194// CHECK: return 195func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>, 196 %v0 : vector<1x4xf32>, %v1 : vector<1x1xf32>, %v2 : vector<1x4xf32>, %i : index) -> vector<1x4xf32> { 197 %alias = memref.subview %arg1[0, 0] [2, 2] [1, 1] : 198 memref<4x4xf32> to memref<2x2xf32, strided<[4, 1]>> 199 %c1 = arith.constant 1 : index 200 %c4 = arith.constant 4 : index 201 %c0 = arith.constant 0 : index 202 %cf0 = arith.constant 0.0 : f32 203 vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} : 204 vector<1x4xf32>, memref<4x4xf32> 205 // blocking write. 206 vector.transfer_write %v1, %alias[%c0, %c0] {in_bounds = [true, true]} : 207 vector<1x1xf32>, memref<2x2xf32, strided<[4, 1]>> 208 vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} : 209 vector<1x4xf32>, memref<4x4xf32> 210 // blocking write. 211 vector.transfer_write %v1, %alias[%c1, %c0] {in_bounds = [true, true]} : 212 vector<1x1xf32>, memref<2x2xf32, strided<[4, 1]>> 213 %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} : 214 memref<4x4xf32>, vector<1x4xf32> 215 vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} : 216 vector<1x4xf32>, memref<4x4xf32> 217 return %0 : vector<1x4xf32> 218} 219 220 221// Regression test - the following _potential forwarding_ of %1 to the final 222// `vector.transfer_write` would not be safe: 223// %1 = vector.transfer_read %subview 224// vector.transfer_write %1, %alloca 225// vector.transfer_write %vec, %collapse_shape 226// %2 = vector.transfer_read %alloca 227// vector.transfer_write %1, %subview 228// Indeed, %alloca and %collapse_shape alias and hence %2 != %1. Instead, the 229// final `vector.transfer_write` should be preserved as: 230// vector.transfer_write %2, %subview 231 232// CHECK-LABEL: func.func @collapse_shape_and_read_from_source 233// CHECK: scf.for {{.*}} { 234// CHECK: vector.transfer_read 235// CHECK: vector.transfer_write 236// CHECK: vector.transfer_write 237// CHECK: vector.transfer_read 238// CHECK: vector.transfer_write 239 240func.func @collapse_shape_and_read_from_source(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) { 241 %c0_i32 = arith.constant 0 : i32 242 %c0 = arith.constant 0 : index 243 %c4 = arith.constant 4 : index 244 %c20 = arith.constant 20 : index 245 246 %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32> 247 %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32> 248 scf.for %arg0 = %c0 to %c20 step %c4 { 249 %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> 250 %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32> 251 // $alloca and $collapse_shape alias 252 vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32> 253 vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32> 254 %2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32> 255 vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> 256 } 257 return 258} 259 260// The same regression test for expand_shape. 261 262// CHECK-LABEL: func.func @expand_shape_and_read_from_source 263// CHECK: scf.for {{.*}} { 264// CHECK: vector.transfer_read 265// CHECK: vector.transfer_write 266// CHECK: vector.transfer_write 267// CHECK: vector.transfer_read 268// CHECK: vector.transfer_write 269 270func.func @expand_shape_and_read_from_source(%in_0: memref<20xi32>, %vec: vector<1x4x1xi32>) { 271 %c0_i32 = arith.constant 0 : i32 272 %c0 = arith.constant 0 : index 273 %c4 = arith.constant 4 : index 274 %c20 = arith.constant 20 : index 275 276 %alloca = memref.alloca() {alignment = 64 : i64} : memref<4xi32> 277 %expand_shape = memref.expand_shape %alloca [[0, 1, 2]] output_shape [1, 4, 1] : memref<4xi32> into memref<1x4x1xi32> 278 scf.for %arg0 = %c0 to %c20 step %c4 { 279 %subview = memref.subview %in_0[%arg0] [4] [1] : memref<20xi32> to memref<4xi32, strided<[1], offset: ?>> 280 %1 = vector.transfer_read %subview[%c0], %c0_i32 {in_bounds = [true]} : memref<4xi32, strided<[1], offset: ?>>, vector<4xi32> 281 // $alloca and $expand_shape alias 282 vector.transfer_write %1, %alloca[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32> 283 vector.transfer_write %vec, %expand_shape[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32> 284 %2 = vector.transfer_read %alloca[%c0], %c0_i32 {in_bounds = [true]} : memref<4xi32>, vector<4xi32> 285 vector.transfer_write %2, %subview[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32, strided<[1], offset: ?>> 286 } 287 return 288} 289 290// The same regression test, but the initial write is to the collapsed memref, 291// and the subsequent unforwardable read is from the collapse shape. 292 293// CHECK-LABEL: func.func @collapse_shape_and_read_from_collapse 294// CHECK: scf.for {{.*}} { 295// CHECK: vector.transfer_read 296// CHECK: vector.transfer_write 297// CHECK: vector.transfer_write 298// CHECK: vector.transfer_read 299// CHECK: vector.transfer_write 300 301func.func @collapse_shape_and_read_from_collapse(%in_0: memref<20xi32>, %vec: vector<1x4x1xi32>) { 302 %c0_i32 = arith.constant 0 : i32 303 %c0 = arith.constant 0 : index 304 %c4 = arith.constant 4 : index 305 %c20 = arith.constant 20 : index 306 307 %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32> 308 %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32> 309 scf.for %arg0 = %c0 to %c20 step %c4 { 310 %subview = memref.subview %in_0[%arg0] [4] [1] : memref<20xi32> to memref<4xi32, strided<[1], offset: ?>> 311 %1 = vector.transfer_read %subview[%c0], %c0_i32 {in_bounds = [true]} : memref<4xi32, strided<[1], offset: ?>>, vector<4xi32> 312 vector.transfer_write %1, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32> 313 // $alloca and $collapse_shape alias 314 vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32> 315 %2 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<4xi32>, vector<4xi32> 316 vector.transfer_write %2, %subview[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32, strided<[1], offset: ?>> 317 } 318 return 319} 320 321// The same test except writing to the expanded source first (same as the 322// previous collapse test but for expand). 323 324// CHECK-LABEL: func.func @expand_shape_and_read_from_expand 325// CHECK: scf.for {{.*}} { 326// CHECK: vector.transfer_read 327// CHECK: vector.transfer_write 328// CHECK: vector.transfer_write 329// CHECK: vector.transfer_read 330// CHECK: vector.transfer_write 331 332func.func @expand_shape_and_read_from_expand(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) { 333 %c0_i32 = arith.constant 0 : i32 334 %c0 = arith.constant 0 : index 335 %c4 = arith.constant 4 : index 336 %c20 = arith.constant 20 : index 337 338 %alloca = memref.alloca() {alignment = 64 : i64} : memref<4xi32> 339 %expand_shape = memref.expand_shape %alloca [[0, 1, 2]] output_shape [1, 4, 1] : memref<4xi32> into memref<1x4x1xi32> 340 scf.for %arg0 = %c0 to %c20 step %c4 { 341 %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> 342 %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32> 343 vector.transfer_write %1, %expand_shape[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32> 344 // $alloca and $expand_shape alias 345 vector.transfer_write %vec, %alloca[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32> 346 %2 = vector.transfer_read %expand_shape[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32> 347 vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>> 348 } 349 return 350} 351 352// CHECK-LABEL: func @forward_dead_store_dynamic_same_index 353// CHECK-NOT: vector.transfer_write 354// CHECK-NOT: vector.transfer_read 355// CHECK: scf.for 356// CHECK: } 357// CHECK: vector.transfer_write 358// CHECK: return 359func.func @forward_dead_store_dynamic_same_index( 360 %buffer : memref<?x?xf32>, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i : index) { 361 %c1 = arith.constant 1 : index 362 %c4 = arith.constant 4 : index 363 %c0 = arith.constant 0 : index 364 %cf0 = arith.constant 0.0 : f32 365 vector.transfer_write %v0, %buffer[%i, %i] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 366 // The following transfer op reads/writes to the same address so that we can forward. 367 %0 = vector.transfer_read %buffer[%i, %i], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<4xf32> 368 %x = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { 369 %1 = arith.addf %acc, %acc : vector<4xf32> 370 scf.yield %1 : vector<4xf32> 371 } 372 vector.transfer_write %x, %buffer[%i, %i] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 373 return 374} 375 376// CHECK-LABEL: func @dont_forward_dead_store_dynamic_overlap 377// CHECK-COUNT-2: vector.transfer_write 378// CHECK: vector.transfer_read 379// CHECK: scf.for 380// CHECK: } 381// CHECK: vector.transfer_write 382// CHECK: return 383func.func @dont_forward_dead_store_dynamic_overlap( 384 %buffer : memref<?x?xf32>, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) { 385 %c1 = arith.constant 1 : index 386 %c4 = arith.constant 4 : index 387 %c0 = arith.constant 0 : index 388 %cf0 = arith.constant 0.0 : f32 389 %i1 = affine.apply affine_map<(d0) -> (d0 + 3)>(%i0) 390 vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 391 // The following transfer op writes to an overlapping range so we cannot forward. 392 vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 393 %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<4xf32> 394 %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { 395 %1 = arith.addf %acc, %acc : vector<4xf32> 396 scf.yield %1 : vector<4xf32> 397 } 398 vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 399 return 400} 401 402// CHECK-LABEL: func @forward_dead_store_dynamic_non_overlap_leading_dim 403// CHECK: vector.transfer_write 404// CHECK-NOT: vector.transfer_write 405// CHECK-NOT: vector.transfer_read 406// CHECK: scf.for 407// CHECK: } 408// CHECK: vector.transfer_write 409// CHECK: return 410func.func @forward_dead_store_dynamic_non_overlap_leading_dim( 411 %buffer : memref<?x?xf32>, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) { 412 %c1 = arith.constant 1 : index 413 %c4 = arith.constant 4 : index 414 %c0 = arith.constant 0 : index 415 %cf0 = arith.constant 0.0 : f32 416 %i1 = affine.apply affine_map<(d0) -> (d0 + 1)>(%i0) 417 vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 418 // The following transfer op writes to an non-overlapping range so we can forward. 419 vector.transfer_write %v0, %buffer[%i1, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 420 %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<4xf32> 421 %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { 422 %1 = arith.addf %acc, %acc : vector<4xf32> 423 scf.yield %1 : vector<4xf32> 424 } 425 vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 426 return 427} 428 429// CHECK-LABEL: func @forward_dead_store_dynamic_non_overlap_trailing_dim 430// CHECK: vector.transfer_write 431// CHECK-NOT: vector.transfer_write 432// CHECK-NOT: vector.transfer_read 433// CHECK: scf.for 434// CHECK: } 435// CHECK: vector.transfer_write 436// CHECK: return 437func.func @forward_dead_store_dynamic_non_overlap_trailing_dim( 438 %buffer : memref<?x?xf32>, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) { 439 %c1 = arith.constant 1 : index 440 %c4 = arith.constant 4 : index 441 %c0 = arith.constant 0 : index 442 %cf0 = arith.constant 0.0 : f32 443 %i1 = affine.apply affine_map<(d0) -> (d0 + 4)>(%i0) 444 vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 445 // The following transfer op writes to an non-overlapping range so we can forward. 446 vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 447 %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<4xf32> 448 %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) { 449 %1 = arith.addf %acc, %acc : vector<4xf32> 450 scf.yield %1 : vector<4xf32> 451 } 452 vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32> 453 return 454} 455 456// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking 457// CHECK: %[[SPLAT:.*]] = arith.constant dense<0.000000e+00> : vector<[8]x[8]xf32> 458// CHECK-NOT: vector.transfer_write 459// CHECK-NOT: vector.transfer_read 460// CHECK: scf.for 461// CHECK-SAME: iter_args(%{{.*}} = %[[SPLAT]]) 462// CHECK: } 463// CHECK: vector.transfer_write 464// CHECK: return 465func.func @forward_dead_constant_splat_store_with_masking(%buffer : memref<?x?xf32>, %mask: vector<[8]x[8]xi1>) { 466 %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32> 467 %read_padding = arith.constant 0.0 : f32 468 %c1 = arith.constant 1 : index 469 %c0 = arith.constant 0 : index 470 %c512 = arith.constant 512 : index 471 vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 472 %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32> 473 %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { 474 %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> 475 scf.yield %1 : vector<[8]x[8]xf32> 476 } 477 vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 478 return 479} 480 481// Here the read can be eliminated but not the write (due to mismatched masks). 482// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking_unmasked_write 483// CHECK: %[[SPLAT:.*]] = arith.constant dense<0.000000e+00> : vector<[8]x[8]xf32> 484// CHECK: vector.transfer_write %[[SPLAT]] 485// CHECK: scf.for 486// CHECK-SAME: iter_args(%{{.*}} = %[[SPLAT]]) 487// CHECK: } 488// CHECK: vector.transfer_write 489// CHECK: return 490func.func @forward_dead_constant_splat_store_with_masking_unmasked_write(%buffer : memref<?x?xf32>, %mask: vector<[8]x[8]xi1>) { 491 %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32> 492 %read_padding = arith.constant 0.0 : f32 493 %c1 = arith.constant 1 : index 494 %c0 = arith.constant 0 : index 495 %c512 = arith.constant 512 : index 496 vector.transfer_write %zero_splat, %buffer[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 497 %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32> 498 %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { 499 %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> 500 scf.yield %1 : vector<[8]x[8]xf32> 501 } 502 vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 503 return 504} 505 506// Negative test, the padding does not match the constant splat, so we can't 507// forward the store. 508// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking_negative_0 509// CHECK: vector.transfer_write 510// CHECK: vector.transfer_read 511// CHECK: scf.for 512// CHECK: } 513// CHECK: vector.transfer_write 514// CHECK: return 515func.func @forward_dead_constant_splat_store_with_masking_negative_0(%buffer : memref<?x?xf32>, %mask: vector<[8]x[8]xi1>) { 516 %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32> 517 %read_padding = arith.constant 1.0 : f32 518 %c1 = arith.constant 1 : index 519 %c0 = arith.constant 0 : index 520 %c512 = arith.constant 512 : index 521 vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 522 %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32> 523 %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { 524 %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> 525 scf.yield %1 : vector<[8]x[8]xf32> 526 } 527 vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 528 return 529} 530 531// Negative test, the masks don't match between the read and write, so we can't 532// foward the store. 533// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking_negative_1 534// CHECK: vector.transfer_write 535// CHECK: vector.transfer_read 536// CHECK: scf.for 537// CHECK: } 538// CHECK: vector.transfer_write 539// CHECK: return 540func.func @forward_dead_constant_splat_store_with_masking_negative_1(%buffer : memref<?x?xf32>, %mask_a: vector<[8]x[8]xi1>, %mask_b: vector<[8]x[8]xi1>) { 541 %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32> 542 %read_padding = arith.constant 0.0 : f32 543 %c1 = arith.constant 1 : index 544 %c0 = arith.constant 0 : index 545 %c512 = arith.constant 512 : index 546 vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask_a {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 547 %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask_b {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32> 548 %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { 549 %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> 550 scf.yield %1 : vector<[8]x[8]xf32> 551 } 552 vector.transfer_write %x, %buffer[%c0, %c0], %mask_a {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 553 return 554} 555 556// Negative test, here the write is masked but the read is unmasked. We can't 557// forward the store (as the write could be of less elements then the read). 558// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking_negative_3 559// CHECK: vector.transfer_write 560// CHECK: vector.transfer_read 561// CHECK: scf.for 562// CHECK: } 563// CHECK: vector.transfer_write 564// CHECK: return 565func.func @forward_dead_constant_splat_store_with_masking_negative_3(%buffer : memref<?x?xf32>, %mask: vector<[8]x[8]xi1>) { 566 %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32> 567 %read_padding = arith.constant 0.0 : f32 568 %c1 = arith.constant 1 : index 569 %c0 = arith.constant 0 : index 570 %c512 = arith.constant 512 : index 571 vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 572 %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32> 573 %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) { 574 %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32> 575 scf.yield %1 : vector<[8]x[8]xf32> 576 } 577 vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 578 return 579} 580 581// Here each read/write is to a different subview, but they all point to exact 582// same bit of memory (just through casts and subviews with unit strides and 583// zero offsets). 584// CHECK-LABEL: func @forward_and_eliminate_stores_through_trivial_aliases 585// CHECK-NOT: vector.transfer_write 586// CHECK-NOT: vector.transfer_read 587// CHECK: scf.for 588// CHECK: } 589// CHECK: vector.transfer_write 590// CHECK: return 591func.func @forward_and_eliminate_stores_through_trivial_aliases( 592 %buffer : memref<?x?xf32>, %vec: vector<[8]x[8]xf32>, %size: index, %a_size: index, %another_size: index 593) { 594 %c0 = arith.constant 0 : index 595 %c1 = arith.constant 1 : index 596 %c32 = arith.constant 32 : index 597 %cst = arith.constant 0.0 : f32 598 vector.transfer_write %vec, %buffer[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32> 599 %direct_subview = memref.subview %buffer[0, 0] [%a_size, %a_size] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1]>> 600 %cast = memref.cast %direct_subview : memref<?x?xf32, strided<[?, 1]>> to memref<?x?xf32> 601 %subview_of_cast = memref.subview %cast[0, 0] [%another_size, %another_size] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1]>> 602 %21 = vector.transfer_read %direct_subview[%c0, %c0], %cst {in_bounds = [true, true]} : memref<?x?xf32, strided<[?, 1]>>, vector<[8]x[8]xf32> 603 %23 = scf.for %arg2 = %c0 to %c32 step %c1 iter_args(%arg3 = %21) -> (vector<[8]x[8]xf32>) { 604 %24 = arith.addf %arg3, %arg3 : vector<[8]x[8]xf32> 605 scf.yield %24 : vector<[8]x[8]xf32> 606 } 607 vector.transfer_write %23, %subview_of_cast[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32, strided<[?, 1]>> 608 return 609} 610