xref: /llvm-project/mlir/test/Dialect/MemRef/transform-ops.mlir (revision 2798b72ae7e5caad793169b77cbac47fe2362d0f)
1// RUN: mlir-opt %s -transform-interpreter -verify-diagnostics -allow-unregistered-dialect -split-input-file | FileCheck %s
2
3// CHECK-DAG: memref.global "private" @[[ALLOC0:alloc.*]] : memref<2x32xf32>
4// CHECK-DAG: memref.global "private" @[[ALLOC1:alloc.*]] : memref<2x32xf32>
5
6// CHECK-DAG: func.func @func(%[[LB:.*]]: index, %[[UB:.*]]: index)
7func.func @func(%lb: index, %ub: index) {
8  // CHECK-DAG: scf.forall (%[[ARG0:.*]], %[[ARG1:.*]]) in (%[[LB]], %[[UB]])
9  scf.forall (%arg0, %arg1) in (%lb, %ub) {
10    // CHECK-DAG: %[[MR0:.*]] = memref.get_global @[[ALLOC0]] : memref<2x32xf32>
11    // CHECK-DAG: %[[MR1:.*]] = memref.get_global @[[ALLOC1]] : memref<2x32xf32>
12    // CHECK-DAG: memref.store %{{.*}}, %[[MR0]][%{{.*}}, %{{.*}}] : memref<2x32xf32>
13    // CHECK-DAG: memref.store %{{.*}}, %[[MR1]][%{{.*}}, %{{.*}}] : memref<2x32xf32>
14    %cst = arith.constant 0.0 : f32
15    %mr0 = memref.alloca() : memref<2x32xf32>
16    %mr1 = memref.alloca() : memref<2x32xf32>
17    memref.store %cst, %mr0[%arg0, %arg1] : memref<2x32xf32>
18    memref.store %cst, %mr1[%arg0, %arg1] : memref<2x32xf32>
19  }
20  return
21}
22
23module attributes {transform.with_named_sequence} {
24  transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
25    %alloca = transform.structured.match ops{["memref.alloca"]} in %arg0
26        : (!transform.any_op) -> !transform.op<"memref.alloca">
27    %get_global, %global = transform.memref.alloca_to_global %alloca
28          : (!transform.op<"memref.alloca">)
29            -> (!transform.any_op, !transform.any_op)
30    transform.yield
31  }
32}
33
34// -----
35
36// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)>
37// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
38
39// CHECK-LABEL: func @multi_buffer
40func.func @multi_buffer(%in: memref<16xf32>) {
41  // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32>
42  // expected-remark @below {{transformed}}
43  %tmp = memref.alloc() : memref<4xf32>
44
45  // CHECK: %[[C0:.*]] = arith.constant 0 : index
46  // CHECK: %[[C4:.*]] = arith.constant 4 : index
47  %c0 = arith.constant 0 : index
48  %c4 = arith.constant 4 : index
49  %c16 = arith.constant 16 : index
50
51  // CHECK: scf.for %[[IV:.*]] = %[[C0]]
52  scf.for %i0 = %c0 to %c16 step %c4 {
53    // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]])
54    // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>>
55    %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
56    // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>>
57    memref.copy %1, %tmp :  memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32>
58
59    "some_use"(%tmp) : (memref<4xf32>) ->()
60  }
61  return
62}
63
64module attributes {transform.with_named_sequence} {
65  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
66    %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc">
67    %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloc">) -> !transform.any_op
68    // Verify that the returned handle is usable.
69    transform.debug.emit_remark_at %1, "transformed" : !transform.any_op
70    transform.yield
71  }
72}
73
74// -----
75
76// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)>
77// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
78
79// CHECK-LABEL: func @multi_buffer_on_affine_loop
80func.func @multi_buffer_on_affine_loop(%in: memref<16xf32>) {
81  // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32>
82  // expected-remark @below {{transformed}}
83  %tmp = memref.alloc() : memref<4xf32>
84
85  // CHECK: %[[C0:.*]] = arith.constant 0 : index
86  %c0 = arith.constant 0 : index
87
88  // CHECK: affine.for %[[IV:.*]] = 0
89  affine.for %i0 = 0 to 16 step 4 {
90    // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]])
91    // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>>
92    %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
93    // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>>
94    memref.copy %1, %tmp :  memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32>
95
96    "some_use"(%tmp) : (memref<4xf32>) ->()
97  }
98  return
99}
100
101module attributes {transform.with_named_sequence} {
102  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
103    %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc">
104    %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloc">) -> !transform.any_op
105    // Verify that the returned handle is usable.
106    transform.debug.emit_remark_at %1, "transformed" : !transform.any_op
107    transform.yield
108  }
109}
110
111// -----
112
113// Trying to use multibuffer on allocs that are used in different loops
114// with none dominating the other is going to fail.
115// Check that we emit a proper error for that.
116func.func @multi_buffer_uses_with_no_loop_dominator(%in: memref<16xf32>, %cond: i1) {
117  // expected-error @below {{op failed to multibuffer}}
118  %tmp = memref.alloc() : memref<4xf32>
119
120  %c0 = arith.constant 0 : index
121  %c4 = arith.constant 4 : index
122  %c16 = arith.constant 16 : index
123  scf.if %cond {
124    scf.for %i0 = %c0 to %c16 step %c4 {
125      %var = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
126      memref.copy %var, %tmp :  memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32>
127
128      "some_use"(%tmp) : (memref<4xf32>) ->()
129    }
130  }
131
132  scf.for %i0 = %c0 to %c16 step %c4 {
133    %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
134    memref.copy %1, %tmp :  memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32>
135
136    "some_use"(%tmp) : (memref<4xf32>) ->()
137  }
138  return
139}
140
141module attributes {transform.with_named_sequence} {
142  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
143    %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc">
144    %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloc">) -> !transform.any_op
145    transform.yield
146  }
147}
148
149// -----
150
151// Make sure the multibuffer operation is typed so that it only supports
152// memref.alloc.
153// Check that we emit an error if we try to match something else.
154func.func @multi_buffer_reject_alloca(%in: memref<16xf32>, %cond: i1) {
155  %tmp = memref.alloca() : memref<4xf32>
156
157  %c0 = arith.constant 0 : index
158  %c4 = arith.constant 4 : index
159  %c16 = arith.constant 16 : index
160  scf.if %cond {
161    scf.for %i0 = %c0 to %c16 step %c4 {
162      %var = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
163      memref.copy %var, %tmp :  memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32>
164
165      "some_use"(%tmp) : (memref<4xf32>) ->()
166    }
167  }
168
169  scf.for %i0 = %c0 to %c16 step %c4 {
170    %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
171    memref.copy %1, %tmp :  memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32>
172
173    "some_use"(%tmp) : (memref<4xf32>) ->()
174  }
175  return
176}
177
178module attributes {transform.with_named_sequence} {
179  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
180    %0 = transform.structured.match ops{["memref.alloca"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloca">
181    // expected-error @below {{'transform.memref.multibuffer' op operand #0 must be Transform IR handle to memref.alloc operations, but got '!transform.op<"memref.alloca">'}}
182    %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloca">) -> !transform.any_op
183    transform.yield
184  }
185}
186
187// -----
188
189// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)>
190// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
191
192// CHECK-LABEL: func @multi_buffer_one_alloc_with_use_outside_of_loop
193// Make sure we manage to apply multi_buffer to the memref that is used in
194// the loop (%tmp) and don't error out for the one that is not (%tmp2).
195func.func @multi_buffer_one_alloc_with_use_outside_of_loop(%in: memref<16xf32>) {
196  // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32>
197  // expected-remark @below {{transformed}}
198  %tmp = memref.alloc() : memref<4xf32>
199  %tmp2 = memref.alloc() : memref<4xf32>
200
201  "some_use_outside_of_loop"(%tmp2) : (memref<4xf32>) -> ()
202
203  // CHECK: %[[C0:.*]] = arith.constant 0 : index
204  // CHECK: %[[C4:.*]] = arith.constant 4 : index
205  %c0 = arith.constant 0 : index
206  %c4 = arith.constant 4 : index
207  %c16 = arith.constant 16 : index
208
209  // CHECK: scf.for %[[IV:.*]] = %[[C0]]
210  scf.for %i0 = %c0 to %c16 step %c4 {
211    // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]])
212    // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>>
213    %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
214    // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>>
215    memref.copy %1, %tmp :  memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref<4xf32>
216
217    "some_use"(%tmp) : (memref<4xf32>) ->()
218  }
219  return
220}
221
222module attributes {transform.with_named_sequence} {
223  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
224    %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc">
225    %1 = transform.memref.multibuffer %0 {factor = 2 : i64} : (!transform.op<"memref.alloc">) -> !transform.any_op
226    // Verify that the returned handle is usable.
227    transform.debug.emit_remark_at %1, "transformed" : !transform.any_op
228    transform.yield
229  }
230}
231
232// -----
233
234
235// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)>
236
237// CHECK-LABEL: func @multi_buffer
238func.func @multi_buffer_no_analysis(%in: memref<16xf32>) {
239  // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32>
240  // expected-remark @below {{transformed}}
241  %tmp = memref.alloc() : memref<4xf32>
242
243  // CHECK: %[[C0:.*]] = arith.constant 0 : index
244  // CHECK: %[[C4:.*]] = arith.constant 4 : index
245  %c0 = arith.constant 0 : index
246  %c4 = arith.constant 4 : index
247  %c16 = arith.constant 16 : index
248
249  // CHECK: scf.for %[[IV:.*]] = %[[C0]]
250  scf.for %i0 = %c0 to %c16 step %c4 {
251  // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]])
252  // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>>
253    "some_write_read"(%tmp) : (memref<4xf32>) ->()
254  }
255  return
256}
257
258module attributes {transform.with_named_sequence} {
259  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
260    %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc">
261    %1 = transform.memref.multibuffer %0 {factor = 2 : i64, skip_analysis} : (!transform.op<"memref.alloc">) -> !transform.any_op
262    // Verify that the returned handle is usable.
263    transform.debug.emit_remark_at %1, "transformed" : !transform.any_op
264    transform.yield
265  }
266}
267
268// -----
269
270// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)>
271
272// CHECK-LABEL: func @multi_buffer_dealloc
273func.func @multi_buffer_dealloc(%in: memref<16xf32>) {
274  // CHECK: %[[A:.*]] = memref.alloc() : memref<2x4xf32>
275  // expected-remark @below {{transformed}}
276  %tmp = memref.alloc() : memref<4xf32>
277
278  // CHECK: %[[C0:.*]] = arith.constant 0 : index
279  // CHECK: %[[C4:.*]] = arith.constant 4 : index
280  %c0 = arith.constant 0 : index
281  %c4 = arith.constant 4 : index
282  %c16 = arith.constant 16 : index
283
284  // CHECK: scf.for %[[IV:.*]] = %[[C0]]
285  scf.for %i0 = %c0 to %c16 step %c4 {
286  // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]])
287  // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>>
288    "some_write_read"(%tmp) : (memref<4xf32>) ->()
289  }
290
291  // CHECK-NOT: memref.dealloc {{.*}} : memref<4xf32>
292  // CHECK: memref.dealloc %[[A]] : memref<2x4xf32>
293  memref.dealloc %tmp : memref<4xf32>
294  return
295}
296
297module attributes {transform.with_named_sequence} {
298  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
299    %0 = transform.structured.match ops{["memref.alloc"]} in %arg1 : (!transform.any_op) -> !transform.op<"memref.alloc">
300    %1 = transform.memref.multibuffer %0 {factor = 2 : i64, skip_analysis} : (!transform.op<"memref.alloc">) -> !transform.any_op
301    // Verify that the returned handle is usable.
302    transform.debug.emit_remark_at %1, "transformed" : !transform.any_op
303    transform.yield
304  }
305}
306
307// -----
308
309// CHECK-LABEL: func.func @dead_alloc
310func.func @dead_alloc() {
311  // CHECK-NOT: %{{.+}} = memref.alloc
312  %0 = memref.alloc() : memref<8x64xf32, 3>
313  %1 = memref.subview %0[0, 0] [8, 4] [1, 1] : memref<8x64xf32, 3> to
314    memref<8x4xf32, affine_map<(d0, d1) -> (d0 * 64 + d1)>, 3>
315  %c0 = arith.constant 0 : index
316  %cst_0 = arith.constant dense<0.000000e+00> : vector<1x4xf32>
317  vector.transfer_write %cst_0, %1[%c0, %c0] {in_bounds = [true, true]} :
318    vector<1x4xf32>, memref<8x4xf32, affine_map<(d0, d1) -> (d0 * 64 + d1)>, 3>
319  return
320}
321
322module attributes {transform.with_named_sequence} {
323  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
324    %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op
325    transform.memref.erase_dead_alloc_and_stores %0 : (!transform.any_op) -> ()
326    transform.yield
327  }
328}
329
330// -----
331
332// CHECK-LABEL: @store_to_load
333//  CHECK-SAME:   (%[[ARG:.+]]: vector<4xf32>)
334//   CHECK-NOT:   memref.alloc()
335//   CHECK-NOT:   vector.transfer_write
336//   CHECK-NOT:   vector.transfer_read
337//       CHECK:   return %[[ARG]] : vector<4xf32>
338func.func @store_to_load(%arg: vector<4xf32>) -> vector<4xf32> {
339  %c0 = arith.constant 0 : index
340  %cst_1 = arith.constant 0.000000e+00 : f32
341  %alloc = memref.alloc() {alignment = 64 : i64} : memref<64xf32>
342  vector.transfer_write %arg, %alloc[%c0] {in_bounds = [true]} : vector<4xf32>, memref<64xf32>
343  %r = vector.transfer_read %alloc[%c0], %cst_1 {in_bounds = [true]} : memref<64xf32>, vector<4xf32>
344  return %r : vector<4xf32>
345}
346
347module attributes {transform.with_named_sequence} {
348  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
349    %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op
350    transform.memref.erase_dead_alloc_and_stores %0 : (!transform.any_op) -> ()
351    transform.yield
352  }
353}
354
355// -----
356
357// CHECK-LABEL: func @lower_to_llvm
358//   CHECK-NOT:   memref.alloc
359//       CHECK:   llvm.call @malloc
360func.func @lower_to_llvm() {
361  %0 = memref.alloc() : memref<2048xi8>
362  return
363}
364
365module attributes {transform.with_named_sequence} {
366  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
367    %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op
368    transform.apply_conversion_patterns to %0 {
369      transform.apply_conversion_patterns.dialect_to_llvm "memref"
370    } with type_converter {
371      transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter
372    } {legal_dialects = ["func", "llvm"]} : !transform.any_op
373    transform.yield
374  }
375}
376