xref: /llvm-project/mlir/test/Dialect/Vector/vector-transferop-opt.mlir (revision 4e2efea5e8e55b26dd7ac90c6cd1ab7bf6775650)
1// RUN: mlir-opt %s -test-vector-transferop-opt | FileCheck %s
2
3// CHECK-LABEL: func @forward_dead_store
4//   CHECK-NOT:   vector.transfer_write
5//   CHECK-NOT:   vector.transfer_read
6//       CHECK:   scf.for
7//       CHECK:   }
8//       CHECK:   vector.transfer_write
9//       CHECK:   return
10func.func @forward_dead_store(%arg0: i1, %arg1 : memref<4x4xf32>,
11  %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) {
12  %c1 = arith.constant 1 : index
13  %c4 = arith.constant 4 : index
14  %c0 = arith.constant 0 : index
15  %cf0 = arith.constant 0.0 : f32
16  vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} :
17    vector<1x4xf32>, memref<4x4xf32>
18  %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
19    memref<4x4xf32>, vector<1x4xf32>
20  %x = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%acc = %0)
21    -> (vector<1x4xf32>) {
22    %1 = arith.addf %acc, %acc : vector<1x4xf32>
23    scf.yield %1 : vector<1x4xf32>
24  }
25  vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} :
26    vector<1x4xf32>, memref<4x4xf32>
27  return
28}
29
30// CHECK-LABEL: func @forward_nested
31//       CHECK:   vector.transfer_write
32//       CHECK:   vector.transfer_write
33//       CHECK:   scf.if
34//   CHECK-NOT:     vector.transfer_read
35//       CHECK:   }
36//       CHECK:   vector.transfer_write
37//       CHECK:   return
38func.func @forward_nested(%arg0: i1, %arg1 : memref<4x4xf32>, %v0 : vector<1x4xf32>,
39  %v1 : vector<1x4xf32>, %i : index) {
40  %c0 = arith.constant 0 : index
41  %c1 = arith.constant 1 : index
42  %cf0 = arith.constant 0.0 : f32
43  vector.transfer_write %v1, %arg1[%i, %c0] {in_bounds = [true, true]} :
44    vector<1x4xf32>, memref<4x4xf32>
45  vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} :
46    vector<1x4xf32>, memref<4x4xf32>
47  %x = scf.if %arg0 -> (vector<1x4xf32>) {
48    %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
49      memref<4x4xf32>, vector<1x4xf32>
50    scf.yield %0 : vector<1x4xf32>
51  } else {
52    scf.yield %v1 : vector<1x4xf32>
53  }
54  vector.transfer_write %x, %arg1[%c0, %c0] {in_bounds = [true, true]} :
55    vector<1x4xf32>, memref<4x4xf32>
56  return
57}
58
59// Negative test, the transfer_write in the scf.if region block the store to
60// load forwarding because we don't recursively look into the region to realize
61// that the transfer_write cannot reach the transfer_read.
62// CHECK-LABEL: func @forward_nested_negative
63//       CHECK:   vector.transfer_write
64//       CHECK:   scf.if
65//       CHECK:     vector.transfer_read
66//       CHECK:   } else {
67//       CHECK:     vector.transfer_write
68//       CHECK:   }
69//       CHECK:   vector.transfer_write
70//       CHECK:   return
71func.func @forward_nested_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
72  %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) {
73  %c0 = arith.constant 0 : index
74  %c1 = arith.constant 1 : index
75  %cf0 = arith.constant 0.0 : f32
76  vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} :
77    vector<1x4xf32>, memref<4x4xf32>
78  %x = scf.if %arg0 -> (vector<1x4xf32>) {
79    %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
80      memref<4x4xf32>, vector<1x4xf32>
81    scf.yield %0 : vector<1x4xf32>
82  } else {
83    vector.transfer_write %v1, %arg1[%i, %c0] {in_bounds = [true, true]} :
84      vector<1x4xf32>, memref<4x4xf32>
85    scf.yield %v1 : vector<1x4xf32>
86  }
87  vector.transfer_write %x, %arg1[%c0, %i] {in_bounds = [true, true]} :
88    vector<1x4xf32>, memref<4x4xf32>
89  return
90}
91
92// CHECK-LABEL: func @dead_store_region
93//       CHECK:   vector.transfer_write
94//       CHECK:   scf.if
95//       CHECK:   } else {
96//       CHECK:     vector.transfer_read
97//       CHECK:   }
98//       CHECK:   scf.if
99//   CHECK-NOT:     vector.transfer_write
100//       CHECK:   }
101//       CHECK:   vector.transfer_write
102//   CHECK-NOT:   vector.transfer_write
103//       CHECK:   vector.transfer_read
104//       CHECK:   return
105func.func @dead_store_region(%arg0: i1, %arg1 : memref<4x4xf32>,
106  %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index)
107  -> (vector<1x4xf32>) {
108  %c0 = arith.constant 0 : index
109  %c1 = arith.constant 1 : index
110  %cf0 = arith.constant 0.0 : f32
111  vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} :
112    vector<1x4xf32>, memref<4x4xf32>
113  %x = scf.if %arg0 -> (vector<1x4xf32>) {
114    scf.yield %v1 : vector<1x4xf32>
115  } else {
116    %0 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} :
117      memref<4x4xf32>, vector<1x4xf32>
118    scf.yield %0 : vector<1x4xf32>
119  }
120  scf.if %arg0 {
121    vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} :
122      vector<1x4xf32>, memref<4x4xf32>
123  }
124  vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} :
125    vector<1x4xf32>, memref<4x4xf32>
126  vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} :
127    vector<1x4xf32>, memref<4x4xf32>
128  %1 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} :
129    memref<4x4xf32>, vector<1x4xf32>
130  return %1 : vector<1x4xf32>
131}
132
133// CHECK-LABEL: func @dead_store_negative
134//       CHECK:   scf.if
135//       CHECK:     vector.transfer_write
136//       CHECK:     vector.transfer_read
137//       CHECK:   } else {
138//       CHECK:   }
139//       CHECK:   vector.transfer_write
140//       CHECK:   return
141func.func @dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
142  %v0 :vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) {
143  %c0 = arith.constant 0 : index
144  %c1 = arith.constant 1 : index
145  %cf0 = arith.constant 0.0 : f32
146  %x = scf.if %arg0 -> (vector<1x4xf32>) {
147    vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} :
148      vector<1x4xf32>, memref<4x4xf32>
149    %0 = vector.transfer_read %arg1[%i, %c0], %cf0 {in_bounds = [true, true]} :
150      memref<4x4xf32>, vector<1x4xf32>
151    scf.yield %0 : vector<1x4xf32>
152  } else {
153    scf.yield %v1 : vector<1x4xf32>
154  }
155  vector.transfer_write %x, %arg1[%c1, %c0] {in_bounds = [true, true]} :
156    vector<1x4xf32>, memref<4x4xf32>
157  return
158}
159
160// CHECK-LABEL: func @dead_store_nested_region
161//       CHECK:   scf.if
162//       CHECK:     vector.transfer_read
163//       CHECK:     scf.if
164//   CHECK-NOT:       vector.transfer_write
165//       CHECK:     }
166//       CHECK:     vector.transfer_write
167//       CHECK:   }
168//       CHECK:   return
169func.func @dead_store_nested_region(%arg0: i1, %arg1: i1, %arg2 : memref<4x4xf32>,
170  %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) {
171  %c0 = arith.constant 0 : index
172  %c1 = arith.constant 1 : index
173  %cf0 = arith.constant 0.0 : f32
174  scf.if %arg0 {
175    %0 = vector.transfer_read %arg2[%i, %c0], %cf0 {in_bounds = [true, true]} :
176      memref<4x4xf32>, vector<1x4xf32>
177    scf.if %arg1 {
178      vector.transfer_write %v1, %arg2[%c1, %c0] {in_bounds = [true, true]} :
179        vector<1x4xf32>, memref<4x4xf32>
180    }
181    vector.transfer_write %v0, %arg2[%c1, %c0] {in_bounds = [true, true]} :
182      vector<1x4xf32>, memref<4x4xf32>
183  }
184  return
185}
186
187// CHECK-LABEL: func @forward_dead_store_negative
188//       CHECK:   vector.transfer_write
189//       CHECK:   vector.transfer_write
190//       CHECK:   vector.transfer_write
191//       CHECK:   vector.transfer_write
192//       CHECK:   vector.transfer_read
193//       CHECK:   vector.transfer_write
194//       CHECK:   return
195func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
196  %v0 : vector<1x4xf32>, %v1 : vector<1x1xf32>, %v2 : vector<1x4xf32>, %i : index) -> vector<1x4xf32> {
197  %alias = memref.subview %arg1[0, 0] [2, 2] [1, 1] :
198    memref<4x4xf32> to memref<2x2xf32, strided<[4, 1]>>
199  %c1 = arith.constant 1 : index
200  %c4 = arith.constant 4 : index
201  %c0 = arith.constant 0 : index
202  %cf0 = arith.constant 0.0 : f32
203  vector.transfer_write %v0, %arg1[%c1, %c0] {in_bounds = [true, true]} :
204    vector<1x4xf32>, memref<4x4xf32>
205  // blocking write.
206  vector.transfer_write %v1, %alias[%c0, %c0] {in_bounds = [true, true]} :
207    vector<1x1xf32>, memref<2x2xf32, strided<[4, 1]>>
208  vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
209    vector<1x4xf32>, memref<4x4xf32>
210  // blocking write.
211  vector.transfer_write %v1, %alias[%c1, %c0] {in_bounds = [true, true]} :
212    vector<1x1xf32>, memref<2x2xf32, strided<[4, 1]>>
213  %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
214    memref<4x4xf32>, vector<1x4xf32>
215  vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
216    vector<1x4xf32>, memref<4x4xf32>
217  return %0 : vector<1x4xf32>
218}
219
220
221// Regression test - the following _potential forwarding_ of %1 to the final
222// `vector.transfer_write` would not be safe:
223//         %1 = vector.transfer_read %subview
224//         vector.transfer_write %1, %alloca
225//         vector.transfer_write %vec, %collapse_shape
226//         %2 = vector.transfer_read %alloca
227//         vector.transfer_write %1, %subview
228// Indeed, %alloca and %collapse_shape alias and hence %2 != %1. Instead, the
229// final `vector.transfer_write` should be preserved as:
230//         vector.transfer_write %2, %subview
231
232// CHECK-LABEL:  func.func @collapse_shape_and_read_from_source
233//       CHECK:    scf.for {{.*}} {
234//       CHECK:      vector.transfer_read
235//       CHECK:      vector.transfer_write
236//       CHECK:      vector.transfer_write
237//       CHECK:      vector.transfer_read
238//       CHECK:      vector.transfer_write
239
240func.func @collapse_shape_and_read_from_source(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
241  %c0_i32 = arith.constant 0 : i32
242  %c0 = arith.constant 0 : index
243  %c4 = arith.constant 4 : index
244  %c20 = arith.constant 20 : index
245
246  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
247  %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
248  scf.for %arg0 = %c0 to %c20 step %c4 {
249    %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
250    %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32>
251    // $alloca and $collapse_shape alias
252    vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
253    vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
254    %2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
255    vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
256  }
257  return
258}
259
260// The same regression test for expand_shape.
261
262// CHECK-LABEL:  func.func @expand_shape_and_read_from_source
263//       CHECK:    scf.for {{.*}} {
264//       CHECK:      vector.transfer_read
265//       CHECK:      vector.transfer_write
266//       CHECK:      vector.transfer_write
267//       CHECK:      vector.transfer_read
268//       CHECK:      vector.transfer_write
269
270func.func @expand_shape_and_read_from_source(%in_0: memref<20xi32>, %vec: vector<1x4x1xi32>) {
271  %c0_i32 = arith.constant 0 : i32
272  %c0 = arith.constant 0 : index
273  %c4 = arith.constant 4 : index
274  %c20 = arith.constant 20 : index
275
276  %alloca = memref.alloca() {alignment = 64 : i64} : memref<4xi32>
277  %expand_shape = memref.expand_shape %alloca [[0, 1, 2]] output_shape [1, 4, 1] : memref<4xi32> into memref<1x4x1xi32>
278  scf.for %arg0 = %c0 to %c20 step %c4 {
279    %subview = memref.subview %in_0[%arg0] [4] [1] : memref<20xi32> to memref<4xi32, strided<[1], offset: ?>>
280    %1 = vector.transfer_read %subview[%c0], %c0_i32 {in_bounds = [true]} : memref<4xi32, strided<[1], offset: ?>>, vector<4xi32>
281    // $alloca and $expand_shape alias
282    vector.transfer_write %1, %alloca[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
283    vector.transfer_write %vec, %expand_shape[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
284    %2 = vector.transfer_read %alloca[%c0], %c0_i32 {in_bounds = [true]} : memref<4xi32>, vector<4xi32>
285    vector.transfer_write %2, %subview[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32, strided<[1], offset: ?>>
286  }
287  return
288}
289
290// The same regression test, but the initial write is to the collapsed memref,
291// and the subsequent unforwardable read is from the collapse shape.
292
293// CHECK-LABEL:  func.func @collapse_shape_and_read_from_collapse
294//       CHECK:    scf.for {{.*}} {
295//       CHECK:      vector.transfer_read
296//       CHECK:      vector.transfer_write
297//       CHECK:      vector.transfer_write
298//       CHECK:      vector.transfer_read
299//       CHECK:      vector.transfer_write
300
301func.func @collapse_shape_and_read_from_collapse(%in_0: memref<20xi32>, %vec: vector<1x4x1xi32>) {
302  %c0_i32 = arith.constant 0 : i32
303  %c0 = arith.constant 0 : index
304  %c4 = arith.constant 4 : index
305  %c20 = arith.constant 20 : index
306
307  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
308  %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
309  scf.for %arg0 = %c0 to %c20 step %c4 {
310    %subview = memref.subview %in_0[%arg0] [4] [1] : memref<20xi32> to memref<4xi32, strided<[1], offset: ?>>
311    %1 = vector.transfer_read %subview[%c0], %c0_i32 {in_bounds = [true]} : memref<4xi32, strided<[1], offset: ?>>, vector<4xi32>
312    vector.transfer_write %1, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
313    // $alloca and $collapse_shape alias
314    vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
315    %2 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<4xi32>, vector<4xi32>
316    vector.transfer_write %2, %subview[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32, strided<[1], offset: ?>>
317  }
318  return
319}
320
321// The same test except writing to the expanded source first (same as the
322// previous collapse test but for expand).
323
324// CHECK-LABEL:  func.func @expand_shape_and_read_from_expand
325//       CHECK:    scf.for {{.*}} {
326//       CHECK:      vector.transfer_read
327//       CHECK:      vector.transfer_write
328//       CHECK:      vector.transfer_write
329//       CHECK:      vector.transfer_read
330//       CHECK:      vector.transfer_write
331
332func.func @expand_shape_and_read_from_expand(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
333  %c0_i32 = arith.constant 0 : i32
334  %c0 = arith.constant 0 : index
335  %c4 = arith.constant 4 : index
336  %c20 = arith.constant 20 : index
337
338  %alloca = memref.alloca() {alignment = 64 : i64} : memref<4xi32>
339  %expand_shape = memref.expand_shape %alloca [[0, 1, 2]] output_shape [1, 4, 1] : memref<4xi32> into memref<1x4x1xi32>
340  scf.for %arg0 = %c0 to %c20 step %c4 {
341    %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
342    %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32>
343    vector.transfer_write %1, %expand_shape[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
344    // $alloca and $expand_shape alias
345    vector.transfer_write %vec, %alloca[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
346    %2 = vector.transfer_read %expand_shape[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
347    vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
348  }
349  return
350}
351
352// CHECK-LABEL: func @forward_dead_store_dynamic_same_index
353//   CHECK-NOT:   vector.transfer_write
354//   CHECK-NOT:   vector.transfer_read
355//       CHECK:   scf.for
356//       CHECK:   }
357//       CHECK:   vector.transfer_write
358//       CHECK:   return
359func.func @forward_dead_store_dynamic_same_index(
360    %buffer : memref<?x?xf32>, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i : index) {
361  %c1 = arith.constant 1 : index
362  %c4 = arith.constant 4 : index
363  %c0 = arith.constant 0 : index
364  %cf0 = arith.constant 0.0 : f32
365  vector.transfer_write %v0, %buffer[%i, %i] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
366  // The following transfer op reads/writes to the same address so that we can forward.
367  %0 = vector.transfer_read %buffer[%i, %i], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<4xf32>
368  %x = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) {
369    %1 = arith.addf %acc, %acc : vector<4xf32>
370    scf.yield %1 : vector<4xf32>
371  }
372  vector.transfer_write %x, %buffer[%i, %i] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
373  return
374}
375
376//   CHECK-LABEL: func @dont_forward_dead_store_dynamic_overlap
377// CHECK-COUNT-2:   vector.transfer_write
378//         CHECK:   vector.transfer_read
379//         CHECK:   scf.for
380//         CHECK:   }
381//         CHECK:   vector.transfer_write
382//         CHECK:   return
383func.func @dont_forward_dead_store_dynamic_overlap(
384    %buffer : memref<?x?xf32>, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) {
385  %c1 = arith.constant 1 : index
386  %c4 = arith.constant 4 : index
387  %c0 = arith.constant 0 : index
388  %cf0 = arith.constant 0.0 : f32
389  %i1 = affine.apply affine_map<(d0) -> (d0 + 3)>(%i0)
390  vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
391  // The following transfer op writes to an overlapping range so we cannot forward.
392  vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
393  %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<4xf32>
394  %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) {
395    %1 = arith.addf %acc, %acc : vector<4xf32>
396    scf.yield %1 : vector<4xf32>
397  }
398  vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
399  return
400}
401
402// CHECK-LABEL: func @forward_dead_store_dynamic_non_overlap_leading_dim
403//       CHECK:   vector.transfer_write
404//   CHECK-NOT:   vector.transfer_write
405//   CHECK-NOT:   vector.transfer_read
406//       CHECK:   scf.for
407//       CHECK:   }
408//       CHECK:   vector.transfer_write
409//       CHECK:   return
410func.func @forward_dead_store_dynamic_non_overlap_leading_dim(
411    %buffer : memref<?x?xf32>, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) {
412  %c1 = arith.constant 1 : index
413  %c4 = arith.constant 4 : index
414  %c0 = arith.constant 0 : index
415  %cf0 = arith.constant 0.0 : f32
416  %i1 = affine.apply affine_map<(d0) -> (d0 + 1)>(%i0)
417  vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
418  // The following transfer op writes to an non-overlapping range so we can forward.
419  vector.transfer_write %v0, %buffer[%i1, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
420  %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<4xf32>
421  %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) {
422    %1 = arith.addf %acc, %acc : vector<4xf32>
423    scf.yield %1 : vector<4xf32>
424  }
425  vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
426  return
427}
428
429// CHECK-LABEL: func @forward_dead_store_dynamic_non_overlap_trailing_dim
430//       CHECK:   vector.transfer_write
431//   CHECK-NOT:   vector.transfer_write
432//   CHECK-NOT:   vector.transfer_read
433//       CHECK:   scf.for
434//       CHECK:   }
435//       CHECK:   vector.transfer_write
436//       CHECK:   return
437func.func @forward_dead_store_dynamic_non_overlap_trailing_dim(
438    %buffer : memref<?x?xf32>, %v0 : vector<4xf32>, %v1 : vector<4xf32>, %i0 : index) {
439  %c1 = arith.constant 1 : index
440  %c4 = arith.constant 4 : index
441  %c0 = arith.constant 0 : index
442  %cf0 = arith.constant 0.0 : f32
443  %i1 = affine.apply affine_map<(d0) -> (d0 + 4)>(%i0)
444  vector.transfer_write %v0, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
445  // The following transfer op writes to an non-overlapping range so we can forward.
446  vector.transfer_write %v0, %buffer[%i0, %i1] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
447  %0 = vector.transfer_read %buffer[%i0, %i0], %cf0 {in_bounds = [true]} : memref<?x?xf32>, vector<4xf32>
448  %x = scf.for %iv = %c0 to %c4 step %c1 iter_args(%acc = %0) -> (vector<4xf32>) {
449    %1 = arith.addf %acc, %acc : vector<4xf32>
450    scf.yield %1 : vector<4xf32>
451  }
452  vector.transfer_write %x, %buffer[%i0, %i0] {in_bounds = [true]} : vector<4xf32>, memref<?x?xf32>
453  return
454}
455
456// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking
457//       CHECK:   %[[SPLAT:.*]] = arith.constant dense<0.000000e+00> : vector<[8]x[8]xf32>
458//   CHECK-NOT:   vector.transfer_write
459//   CHECK-NOT:   vector.transfer_read
460//       CHECK:   scf.for
461//  CHECK-SAME:     iter_args(%{{.*}} = %[[SPLAT]])
462//       CHECK:   }
463//       CHECK:   vector.transfer_write
464//       CHECK:   return
465func.func @forward_dead_constant_splat_store_with_masking(%buffer : memref<?x?xf32>, %mask: vector<[8]x[8]xi1>) {
466  %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32>
467  %read_padding = arith.constant 0.0 : f32
468  %c1 = arith.constant 1 : index
469  %c0 = arith.constant 0 : index
470  %c512 = arith.constant 512 : index
471  vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
472  %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32>
473  %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) {
474    %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32>
475    scf.yield %1 : vector<[8]x[8]xf32>
476  }
477  vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
478  return
479}
480
481// Here the read can be eliminated but not the write (due to mismatched masks).
482// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking_unmasked_write
483//       CHECK:   %[[SPLAT:.*]] = arith.constant dense<0.000000e+00> : vector<[8]x[8]xf32>
484//       CHECK:   vector.transfer_write %[[SPLAT]]
485//       CHECK:   scf.for
486//  CHECK-SAME:     iter_args(%{{.*}} = %[[SPLAT]])
487//       CHECK:   }
488//       CHECK:   vector.transfer_write
489//       CHECK:   return
490func.func @forward_dead_constant_splat_store_with_masking_unmasked_write(%buffer : memref<?x?xf32>, %mask: vector<[8]x[8]xi1>) {
491  %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32>
492  %read_padding = arith.constant 0.0 : f32
493  %c1 = arith.constant 1 : index
494  %c0 = arith.constant 0 : index
495  %c512 = arith.constant 512 : index
496  vector.transfer_write %zero_splat, %buffer[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
497  %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32>
498  %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) {
499    %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32>
500    scf.yield %1 : vector<[8]x[8]xf32>
501  }
502  vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
503  return
504}
505
506// Negative test, the padding does not match the constant splat, so we can't
507// forward the store.
508// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking_negative_0
509//       CHECK:   vector.transfer_write
510//       CHECK:   vector.transfer_read
511//       CHECK:   scf.for
512//       CHECK:   }
513//       CHECK:   vector.transfer_write
514//       CHECK:   return
515func.func @forward_dead_constant_splat_store_with_masking_negative_0(%buffer : memref<?x?xf32>, %mask: vector<[8]x[8]xi1>) {
516  %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32>
517  %read_padding = arith.constant 1.0 : f32
518  %c1 = arith.constant 1 : index
519  %c0 = arith.constant 0 : index
520  %c512 = arith.constant 512 : index
521  vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
522  %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32>
523  %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) {
524    %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32>
525    scf.yield %1 : vector<[8]x[8]xf32>
526  }
527  vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
528  return
529}
530
531// Negative test, the masks don't match between the read and write, so we can't
532// foward the store.
533// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking_negative_1
534//       CHECK:   vector.transfer_write
535//       CHECK:   vector.transfer_read
536//       CHECK:   scf.for
537//       CHECK:   }
538//       CHECK:   vector.transfer_write
539//       CHECK:   return
540func.func @forward_dead_constant_splat_store_with_masking_negative_1(%buffer : memref<?x?xf32>, %mask_a: vector<[8]x[8]xi1>, %mask_b: vector<[8]x[8]xi1>) {
541  %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32>
542  %read_padding = arith.constant 0.0 : f32
543  %c1 = arith.constant 1 : index
544  %c0 = arith.constant 0 : index
545  %c512 = arith.constant 512 : index
546  vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask_a {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
547  %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding, %mask_b {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32>
548  %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) {
549    %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32>
550    scf.yield %1 : vector<[8]x[8]xf32>
551  }
552  vector.transfer_write %x, %buffer[%c0, %c0], %mask_a {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
553  return
554}
555
556// Negative test, here the write is masked but the read is unmasked. We can't
557// forward the store (as the write could be of less elements then the read).
558// CHECK-LABEL: func @forward_dead_constant_splat_store_with_masking_negative_3
559//       CHECK:   vector.transfer_write
560//       CHECK:   vector.transfer_read
561//       CHECK:   scf.for
562//       CHECK:   }
563//       CHECK:   vector.transfer_write
564//       CHECK:   return
565func.func @forward_dead_constant_splat_store_with_masking_negative_3(%buffer : memref<?x?xf32>, %mask: vector<[8]x[8]xi1>) {
566  %zero_splat = arith.constant dense<0.0> : vector<[8]x[8]xf32>
567  %read_padding = arith.constant 0.0 : f32
568  %c1 = arith.constant 1 : index
569  %c0 = arith.constant 0 : index
570  %c512 = arith.constant 512 : index
571  vector.transfer_write %zero_splat, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
572  %0 = vector.transfer_read %buffer[%c0, %c0], %read_padding {in_bounds = [true, true]} : memref<?x?xf32>, vector<[8]x[8]xf32>
573  %x = scf.for %arg2 = %c0 to %c512 step %c1 iter_args(%acc = %0) -> (vector<[8]x[8]xf32>) {
574    %1 = arith.addf %acc, %acc : vector<[8]x[8]xf32>
575    scf.yield %1 : vector<[8]x[8]xf32>
576  }
577  vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
578  return
579}
580
581// Here each read/write is to a different subview, but they all point to exact
582// same bit of memory (just through casts and subviews with unit strides and
583// zero offsets).
584// CHECK-LABEL: func @forward_and_eliminate_stores_through_trivial_aliases
585//   CHECK-NOT:   vector.transfer_write
586//   CHECK-NOT:   vector.transfer_read
587//       CHECK:   scf.for
588//       CHECK:   }
589//       CHECK:   vector.transfer_write
590//       CHECK:   return
591func.func @forward_and_eliminate_stores_through_trivial_aliases(
592  %buffer : memref<?x?xf32>, %vec: vector<[8]x[8]xf32>, %size: index, %a_size: index, %another_size: index
593) {
594  %c0 = arith.constant 0 : index
595  %c1 = arith.constant 1 : index
596  %c32 = arith.constant 32 : index
597  %cst = arith.constant 0.0 : f32
598  vector.transfer_write %vec, %buffer[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
599  %direct_subview = memref.subview %buffer[0, 0] [%a_size, %a_size] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1]>>
600  %cast = memref.cast %direct_subview : memref<?x?xf32, strided<[?, 1]>> to memref<?x?xf32>
601  %subview_of_cast = memref.subview %cast[0, 0] [%another_size, %another_size] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1]>>
602  %21 = vector.transfer_read %direct_subview[%c0, %c0], %cst {in_bounds = [true, true]} : memref<?x?xf32, strided<[?, 1]>>, vector<[8]x[8]xf32>
603  %23 = scf.for %arg2 = %c0 to %c32 step %c1 iter_args(%arg3 = %21) -> (vector<[8]x[8]xf32>) {
604    %24 = arith.addf %arg3, %arg3 : vector<[8]x[8]xf32>
605    scf.yield %24 : vector<[8]x[8]xf32>
606  }
607  vector.transfer_write %23, %subview_of_cast[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32, strided<[?, 1]>>
608  return
609}
610