1// RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation \
2// RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck %s
3// RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null
4
5// RUN: mlir-opt %s -buffer-deallocation-pipeline --split-input-file --verify-diagnostics > /dev/null
6
7// Test Case: Nested regions - This test defines a BufferBasedOp inside the
8// region of a RegionBufferBasedOp.
9// BufferDeallocation expected behavior: The AllocOp for the BufferBasedOp
10// should remain inside the region of the RegionBufferBasedOp and it should insert
11// the missing DeallocOp in the same region. The missing DeallocOp should be
12// inserted after CopyOp.
13
14func.func @nested_regions_and_cond_branch(
15  %arg0: i1,
16  %arg1: memref<2xf32>,
17  %arg2: memref<2xf32>) {
18  cf.cond_br %arg0, ^bb1, ^bb2
19^bb1:
20  cf.br ^bb3(%arg1 : memref<2xf32>)
21^bb2:
22  %0 = memref.alloc() : memref<2xf32>
23  test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
24  ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
25    %1 = memref.alloc() : memref<2xf32>
26    test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
27    %tmp1 = math.exp %gen1_arg0 : f32
28    test.region_yield %tmp1 : f32
29  }
30  cf.br ^bb3(%0 : memref<2xf32>)
31^bb3(%1: memref<2xf32>):
32  test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
33  return
34}
35
36// CHECK-LABEL: func @nested_regions_and_cond_branch
37//  CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
38//       CHECK: ^bb1:
39//   CHECK-NOT:   bufferization.clone
40//   CHECK-NOT:   bufferization.dealloc
41//       CHECK:   cf.br ^bb3([[ARG1]], %false
42//       CHECK: ^bb2:
43//       CHECK:   [[ALLOC0:%.+]] = memref.alloc()
44//       CHECK:   test.region_buffer_based
45//       CHECK:     [[ALLOC1:%.+]] = memref.alloc()
46//       CHECK:     test.buffer_based
47//       CHECK:     bufferization.dealloc ([[ALLOC1]] : memref<2xf32>) if (%true
48//  CHECK-NEXT:     test.region_yield
49//   CHECK-NOT:   bufferization.clone
50//   CHECK-NOT:   bufferization.dealloc
51//       CHECK:   cf.br ^bb3([[ALLOC0]], %true
52//       CHECK: ^bb3([[A0:%.+]]: memref<2xf32>, [[COND0:%.+]]: i1):
53//       CHECK:   test.copy
54//  CHECK-NEXT:   [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
55//  CHECK-NEXT:   bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND0]])
56//       CHECK:   return
57
58// -----
59
60// Test Case: nested region control flow
61// The alloc %1 flows through both if branches until it is finally returned.
62// Hence, it does not require a specific dealloc operation. However, %3
63// requires a dealloc.
64
65func.func @nested_region_control_flow(
66  %arg0 : index,
67  %arg1 : index) -> memref<?x?xf32> {
68  %0 = arith.cmpi eq, %arg0, %arg1 : index
69  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
70  %2 = scf.if %0 -> (memref<?x?xf32>) {
71    scf.yield %1 : memref<?x?xf32>
72  } else {
73    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
74    "test.read_buffer"(%3) : (memref<?x?xf32>) -> ()
75    scf.yield %1 : memref<?x?xf32>
76  }
77  return %2 : memref<?x?xf32>
78}
79
80// CHECK-LABEL: func @nested_region_control_flow
81//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
82//       CHECK:   [[V0:%.+]]:2 = scf.if
83//       CHECK:     scf.yield [[ALLOC]], %false
84//       CHECK:     [[ALLOC1:%.+]] = memref.alloc(
85//       CHECK:     bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}})
86//   CHECK-NOT: retain
87//       CHECK:     scf.yield [[ALLOC]], %false
88//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
89//       CHECK:     scf.yield [[V0]]#0
90//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
91//       CHECK:     scf.yield [[CLONE]]
92//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
93//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] : {{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
94//       CHECK:   return [[V1]]
95
96// -----
97
98// Test Case: nested region control flow with a nested buffer allocation in a
99// divergent branch.
100// Buffer deallocation places a copy for both  %1 and %3, since they are
101// returned in the end.
102
103func.func @nested_region_control_flow_div(
104  %arg0 : index,
105  %arg1 : index) -> memref<?x?xf32> {
106  %0 = arith.cmpi eq, %arg0, %arg1 : index
107  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
108  %2 = scf.if %0 -> (memref<?x?xf32>) {
109    scf.yield %1 : memref<?x?xf32>
110  } else {
111    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
112    scf.yield %3 : memref<?x?xf32>
113  }
114  return %2 : memref<?x?xf32>
115}
116
117// CHECK-LABEL: func @nested_region_control_flow_div
118//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
119//       CHECK:   [[V0:%.+]]:2 = scf.if
120//       CHECK:     scf.yield [[ALLOC]], %false
121//       CHECK:     [[ALLOC1:%.+]] = memref.alloc(
122//       CHECK:     scf.yield [[ALLOC1]], %true
123//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
124//       CHECK:     scf.yield [[V0]]#0
125//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
126//       CHECK:     scf.yield [[CLONE]]
127//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
128//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
129//       CHECK:   return [[V1]]
130
131// -----
132
133// Test Case: nested region control flow within a region interface.
134// No copies are required in this case since the allocation finally escapes
135// the method.
136
137func.func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
138  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
139  %1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then {
140    ^bb0(%arg1 : memref<?x?xf32>):
141      test.region_if_yield %arg1 : memref<?x?xf32>
142  } else {
143    ^bb0(%arg1 : memref<?x?xf32>):
144      test.region_if_yield %arg1 : memref<?x?xf32>
145  } join {
146    ^bb0(%arg1 : memref<?x?xf32>):
147      test.region_if_yield %arg1 : memref<?x?xf32>
148  }
149  return %1 : memref<?x?xf32>
150}
151
152// CHECK-LABEL: func.func @inner_region_control_flow
153//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
154//       CHECK:   [[V0:%.+]]:2 = test.region_if [[ALLOC]], %false
155//       CHECK:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
156//       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
157//       CHECK:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
158//       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
159//       CHECK:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
160//       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
161//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
162//       CHECK:     scf.yield [[V0]]#0
163//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
164//       CHECK:     scf.yield [[CLONE]]
165//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
166//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
167//       CHECK:   return [[V1]]
168
169// -----
170
171func.func @nestedRegionsAndCondBranchAlloca(
172  %arg0: i1,
173  %arg1: memref<2xf32>,
174  %arg2: memref<2xf32>) {
175  cf.cond_br %arg0, ^bb1, ^bb2
176^bb1:
177  cf.br ^bb3(%arg1 : memref<2xf32>)
178^bb2:
179  %0 = memref.alloc() : memref<2xf32>
180  test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
181  ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
182    %1 = memref.alloca() : memref<2xf32>
183    test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
184    %tmp1 = math.exp %gen1_arg0 : f32
185    test.region_yield %tmp1 : f32
186  }
187  cf.br ^bb3(%0 : memref<2xf32>)
188^bb3(%1: memref<2xf32>):
189  test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
190  return
191}
192
193// CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca
194//  CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>)
195//       CHECK: ^bb1:
196//       CHECK:   cf.br ^bb3([[ARG1]], %false
197//       CHECK: ^bb2:
198//       CHECK:   [[ALLOC:%.+]] = memref.alloc()
199//       CHECK:   test.region_buffer_based
200//       CHECK:     memref.alloca()
201//       CHECK:     test.buffer_based
202//   CHECK-NOT:     bufferization.dealloc
203//   CHECK-NOT:     bufferization.clone
204//       CHECK:     test.region_yield
205//       CHECK:   }
206//       CHECK:   cf.br ^bb3([[ALLOC]], %true
207//       CHECK: ^bb3([[A0:%.+]]: memref<2xf32>, [[COND:%.+]]: i1):
208//       CHECK:   test.copy
209//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[A0]]
210//       CHECK:   bufferization.dealloc ([[BASE]] :{{.*}}) if ([[COND]])
211
212// -----
213
214func.func @nestedRegionControlFlowAlloca(
215  %arg0 : index, %arg1 : index, %arg2: f32) -> memref<?x?xf32> {
216  %0 = arith.cmpi eq, %arg0, %arg1 : index
217  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
218  %2 = scf.if %0 -> (memref<?x?xf32>) {
219    scf.yield %1 : memref<?x?xf32>
220  } else {
221    %3 = memref.alloca(%arg0, %arg1) : memref<?x?xf32>
222    %c0 = arith.constant 0 : index
223    memref.store %arg2, %3[%c0, %c0] : memref<?x?xf32>
224    scf.yield %1 : memref<?x?xf32>
225  }
226  return %2 : memref<?x?xf32>
227}
228
229// CHECK-LABEL: func @nestedRegionControlFlowAlloca
230//       CHECK: [[ALLOC:%.+]] = memref.alloc(
231//       CHECK: [[V0:%.+]]:2 = scf.if
232//       CHECK:   scf.yield [[ALLOC]], %false
233//       CHECK:   memref.alloca(
234//       CHECK:   scf.yield [[ALLOC]], %false
235//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
236//       CHECK:   scf.yield [[V0]]#0
237//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
238//       CHECK:   scf.yield [[CLONE]]
239//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
240//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
241//       CHECK: return [[V1]]
242
243// -----
244
245// Test Case: structured control-flow loop using a nested alloc.
246// The iteration argument %iterBuf has to be freed before yielding %3 to avoid
247// memory leaks.
248
249func.func @loop_alloc(
250  %lb: index,
251  %ub: index,
252  %step: index,
253  %buf: memref<2xf32>,
254  %res: memref<2xf32>) {
255  %0 = memref.alloc() : memref<2xf32>
256  "test.read_buffer"(%0) : (memref<2xf32>) -> ()
257  %1 = scf.for %i = %lb to %ub step %step
258    iter_args(%iterBuf = %buf) -> memref<2xf32> {
259    %2 = arith.cmpi eq, %i, %ub : index
260    %3 = memref.alloc() : memref<2xf32>
261    scf.yield %3 : memref<2xf32>
262  }
263  test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
264  return
265}
266
267// CHECK-LABEL: func @loop_alloc
268//  CHECK-SAME: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: index, [[ARG2:%.+]]: index, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>)
269//       CHECK: [[ALLOC:%.+]] = memref.alloc()
270//       CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false
271//       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
272//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]]
273//       CHECK:   bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]])
274//   CHECK-NOT:       retain
275//       CHECK:   scf.yield [[ALLOC1]], %true
276//       CHECK: test.copy
277//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
278//       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
279//   CHECK-NOT: retain
280//       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1)
281//   CHECK-NOT: retain
282
283// -----
284
285// Test Case: structured control-flow loop with a nested if operation.
286// The loop yields buffers that have been defined outside of the loop and the
287// backedges only use the iteration arguments (or one of its aliases).
288// Therefore, we do not have to (and are not allowed to) free any buffers
289// that are passed via the backedges.
290
291func.func @loop_nested_if_no_alloc(
292  %lb: index,
293  %ub: index,
294  %step: index,
295  %buf: memref<2xf32>,
296  %res: memref<2xf32>) {
297  %0 = memref.alloc() : memref<2xf32>
298  %1 = scf.for %i = %lb to %ub step %step
299    iter_args(%iterBuf = %buf) -> memref<2xf32> {
300    %2 = arith.cmpi eq, %i, %ub : index
301    %3 = scf.if %2 -> (memref<2xf32>) {
302      scf.yield %0 : memref<2xf32>
303    } else {
304      scf.yield %iterBuf : memref<2xf32>
305    }
306    scf.yield %3 : memref<2xf32>
307  }
308  test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
309  return
310}
311
312// CHECK-LABEL: func @loop_nested_if_no_alloc
313//  CHECK-SAME: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>)
314//       CHECK: [[ALLOC:%.+]] = memref.alloc()
315//       CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false
316//       CHECK:   [[V1:%.+]]:2 = scf.if
317//       CHECK:     scf.yield [[ALLOC]], %false
318//       CHECK:     scf.yield [[ARG6]], %false
319//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]]
320//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]]) retain ([[V1]]#0 :
321//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
322//       CHECK:   scf.yield [[V1]]#0, [[OWN_AGG]]
323//       CHECK: test.copy
324//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
325//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1)
326
327// TODO: we know statically that the inner dealloc will never deallocate
328//       anything, i.e., we can optimize it away
329
330// -----
331
332// Test Case: structured control-flow loop with a nested if operation using
333// a deeply nested buffer allocation.
334
335func.func @loop_nested_if_alloc(
336  %lb: index,
337  %ub: index,
338  %step: index,
339  %buf: memref<2xf32>) -> memref<2xf32> {
340  %0 = memref.alloc() : memref<2xf32>
341  %1 = scf.for %i = %lb to %ub step %step
342    iter_args(%iterBuf = %buf) -> memref<2xf32> {
343    %2 = arith.cmpi eq, %i, %ub : index
344    %3 = scf.if %2 -> (memref<2xf32>) {
345      %4 = memref.alloc() : memref<2xf32>
346      scf.yield %4 : memref<2xf32>
347    } else {
348      scf.yield %0 : memref<2xf32>
349    }
350    scf.yield %3 : memref<2xf32>
351  }
352  return %1 : memref<2xf32>
353}
354
355// CHECK-LABEL: func @loop_nested_if_alloc
356//  CHECK-SAME: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>)
357//       CHECK: [[ALLOC:%.+]] = memref.alloc()
358//       CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG5:%.+]] = [[ARG3]], [[ARG6:%.+]] = %false
359//       CHECK:   [[V1:%.+]]:2 = scf.if
360//       CHECK:     [[ALLOC1:%.+]] = memref.alloc()
361//       CHECK:     scf.yield [[ALLOC1]], %true
362//       CHECK:     scf.yield [[ALLOC]], %false
363//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG5]]
364//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG6]]) retain ([[V1]]#0 :
365//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
366//       CHECK:   scf.yield [[V1]]#0, [[OWN_AGG]]
367//       CHECK: }
368//       CHECK: [[V2:%.+]] = scf.if [[V0]]#1
369//       CHECK:   scf.yield [[V0]]#0
370//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
371//       CHECK:   scf.yield [[CLONE]]
372//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
373//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V2]] :
374//       CHECK: return [[V2]]
375
376// -----
377
378// Test Case: several nested structured control-flow loops with a deeply nested
379// buffer allocation inside an if operation.
380
381func.func @loop_nested_alloc(
382  %lb: index,
383  %ub: index,
384  %step: index,
385  %buf: memref<2xf32>,
386  %res: memref<2xf32>) {
387  %0 = memref.alloc() : memref<2xf32>
388  "test.read_buffer"(%0) : (memref<2xf32>) -> ()
389  %1 = scf.for %i = %lb to %ub step %step
390    iter_args(%iterBuf = %buf) -> memref<2xf32> {
391    %2 = scf.for %i2 = %lb to %ub step %step
392      iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
393      %3 = scf.for %i3 = %lb to %ub step %step
394        iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> {
395        %4 = memref.alloc() : memref<2xf32>
396        "test.read_buffer"(%4) : (memref<2xf32>) -> ()
397        %5 = arith.cmpi eq, %i, %ub : index
398        %6 = scf.if %5 -> (memref<2xf32>) {
399          %7 = memref.alloc() : memref<2xf32>
400          scf.yield %7 : memref<2xf32>
401        } else {
402          scf.yield %iterBuf3 : memref<2xf32>
403        }
404        scf.yield %6 : memref<2xf32>
405      }
406      scf.yield %3 : memref<2xf32>
407    }
408    scf.yield %2 : memref<2xf32>
409  }
410  test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
411  return
412}
413
414// CHECK-LABEL: func @loop_nested_alloc
415//       CHECK: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>, {{.*}}: memref<2xf32>)
416//       CHECK: [[ALLOC:%.+]] = memref.alloc()
417//       CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false
418//       CHECK:   [[V1:%.+]]:2 = scf.for {{.*}} iter_args([[ARG9:%.+]] = [[ARG6]], [[ARG10:%.+]] = %false
419//       CHECK:     [[V2:%.+]]:2 = scf.for {{.*}} iter_args([[ARG12:%.+]] = [[ARG9]], [[ARG13:%.+]] = %false
420//       CHECK:       [[ALLOC1:%.+]] = memref.alloc()
421//       CHECK:       [[V3:%.+]]:2 = scf.if
422//       CHECK:         [[ALLOC2:%.+]] = memref.alloc()
423//       CHECK:         scf.yield [[ALLOC2]], %true
424//       CHECK:       } else {
425//       CHECK:         scf.yield [[ARG12]], %false
426//       CHECK:       }
427//       CHECK:       [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG12]]
428//       CHECK:       [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG13]]) retain ([[V3]]#0 :
429//       CHECK:       bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}})
430//   CHECK-NOT: retain
431//       CHECK:       [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V3]]#1
432//       CHECK:       scf.yield [[V3]]#0, [[OWN_AGG]]
433//       CHECK:     }
434//       CHECK:     [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG9]]
435//       CHECK:     [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG10]]) retain ([[V2]]#0 :
436//       CHECK:     [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V2]]#1
437//       CHECK:     scf.yield [[V2]]#0, [[OWN_AGG]]
438//       CHECK:   }
439//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]]
440//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]]) retain ([[V1]]#0 :
441//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
442//       CHECK:   scf.yield [[V1]]#0, [[OWN_AGG]]
443//       CHECK: }
444//       CHECK: test.copy
445//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
446//       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
447//       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1)
448
449// TODO: all the retain operands could be removed by doing some more thorough analysis
450
451// -----
452
453func.func @affine_loop() -> f32 {
454  %buffer = memref.alloc() : memref<1024xf32>
455  %sum_init_0 = arith.constant 0.0 : f32
456  %res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 {
457    %t = affine.load %buffer[%i] : memref<1024xf32>
458    %sum_next = arith.addf %sum_iter, %t : f32
459    affine.yield %sum_next : f32
460  }
461  return %res : f32
462}
463
464// CHECK-LABEL: func @affine_loop
465//       CHECK: [[ALLOC:%.+]] = memref.alloc()
466//       CHECK: affine.for {{.*}} iter_args(%arg1 = %cst)
467//       CHECK:   affine.yield
468//       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
469
470// -----
471
472func.func @assumingOp(
473  %arg0: !shape.witness,
474  %arg2: memref<2xf32>,
475  %arg3: memref<2xf32>) {
476  // Confirm the alloc will be dealloc'ed in the block.
477  %1 = shape.assuming %arg0 -> memref<2xf32> {
478    %0 = memref.alloc() : memref<2xf32>
479    "test.read_buffer"(%0) : (memref<2xf32>) -> ()
480    shape.assuming_yield %arg2 : memref<2xf32>
481  }
482  // Confirm the alloc will be returned and dealloc'ed after its use.
483  %3 = shape.assuming %arg0 -> memref<2xf32> {
484    %2 = memref.alloc() : memref<2xf32>
485    shape.assuming_yield %2 : memref<2xf32>
486  }
487  test.copy(%3, %arg3) : (memref<2xf32>, memref<2xf32>)
488  return
489}
490
491// CHECK-LABEL: func @assumingOp
492//       CHECK: ({{.*}}, [[ARG1:%.+]]: memref<2xf32>, {{.*}}: memref<2xf32>)
493//       CHECK: [[V0:%.+]]:2 = shape.assuming
494//       CHECK:   [[ALLOC:%.+]] = memref.alloc()
495//       CHECK:   bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true{{[0-9_]*}})
496//   CHECK-NOT: retain
497//       CHECK:   shape.assuming_yield [[ARG1]], %false
498//       CHECK: }
499//       CHECK: [[V1:%.+]]:2 = shape.assuming
500//       CHECK:   [[ALLOC:%.+]] = memref.alloc()
501//       CHECK:   shape.assuming_yield [[ALLOC]], %true
502//       CHECK: }
503//       CHECK: test.copy
504//       CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
505//       CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V1]]#0
506//       CHECK: bufferization.dealloc ([[BASE1]] :{{.*}}) if ([[V1]]#1)
507//   CHECK-NOT: retain
508//       CHECK: bufferization.dealloc ([[BASE0]] :{{.*}}) if ([[V0]]#1)
509//   CHECK-NOT: retain
510//       CHECK: return
511
512// -----
513
514// Test Case: The op "test.one_region_with_recursive_memory_effects" does not
515// implement the RegionBranchOpInterface. This is allowed during buffer
516// deallocation because the operation's region does not deal with any MemRef
517// values.
518
519func.func @noRegionBranchOpInterface() {
520  %0 = "test.one_region_with_recursive_memory_effects"() ({
521    %1 = "test.one_region_with_recursive_memory_effects"() ({
522      %2 = memref.alloc() : memref<2xi32>
523      "test.read_buffer"(%2) : (memref<2xi32>) -> ()
524      "test.return"() : () -> ()
525    }) : () -> (i32)
526    "test.return"() : () -> ()
527  }) : () -> (i32)
528  "test.return"() : () -> ()
529}
530
531// -----
532
533// Test Case: The second op "test.one_region_with_recursive_memory_effects" does
534// not implement the RegionBranchOpInterface but has buffer semantics. This is
535// not allowed during buffer deallocation.
536
537func.func @noRegionBranchOpInterface() {
538  %0 = "test.one_region_with_recursive_memory_effects"() ({
539    // expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}}
540    %1 = "test.one_region_with_recursive_memory_effects"() ({
541      %2 = memref.alloc() : memref<2xi32>
542      "test.read_buffer"(%2) : (memref<2xi32>) -> ()
543      "test.return"(%2) : (memref<2xi32>) -> ()
544    }) : () -> (memref<2xi32>)
545    "test.return"() : () -> ()
546  }) : () -> (i32)
547  "test.return"() : () -> ()
548}
549
550// -----
551
552func.func @while_two_arg(%arg0: index) {
553  %a = memref.alloc(%arg0) : memref<?xf32>
554  scf.while (%arg1 = %a, %arg2 = %a) : (memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>) {
555    // This op has a side effect, but it's not an allocate/free side effect.
556    %0 = "test.side_effect_op"() {effects = [{effect="read"}]} : () -> i1
557    scf.condition(%0) %arg1, %arg2 : memref<?xf32>, memref<?xf32>
558  } do {
559  ^bb0(%arg1: memref<?xf32>, %arg2: memref<?xf32>):
560    %b = memref.alloc(%arg0) : memref<?xf32>
561    scf.yield %arg1, %b : memref<?xf32>, memref<?xf32>
562  }
563  return
564}
565
566// CHECK-LABEL: func @while_two_arg
567//       CHECK: [[ALLOC:%.+]] = memref.alloc(
568//       CHECK: [[V0:%.+]]:4 = scf.while ({{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false{{[0-9_]*}})
569//       CHECK:   scf.condition
570//       CHECK: ^bb0([[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: i1, [[ARG4:%.+]]: i1):
571//       CHECK:   [[ALLOC1:%.+]] = memref.alloc(
572//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG2]]
573//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG4]]) retain ([[ARG1]] :
574//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[ARG3]]
575//       CHECK:   scf.yield [[ARG1]], [[ALLOC1]], [[OWN_AGG]], %true
576//       CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
577//       CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#1
578//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE0]], [[BASE1]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#2, [[V0]]#3)
579
580// -----
581
582func.func @while_three_arg(%arg0: index) {
583  %a = memref.alloc(%arg0) : memref<?xf32>
584  scf.while (%arg1 = %a, %arg2 = %a, %arg3 = %a) : (memref<?xf32>, memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>, memref<?xf32>) {
585    // This op has a side effect, but it's not an allocate/free side effect.
586    %0 = "test.side_effect_op"() {effects = [{effect="read"}]} : () -> i1
587    scf.condition(%0) %arg1, %arg2, %arg3 : memref<?xf32>, memref<?xf32>, memref<?xf32>
588  } do {
589  ^bb0(%arg1: memref<?xf32>, %arg2: memref<?xf32>, %arg3: memref<?xf32>):
590    %b = memref.alloc(%arg0) : memref<?xf32>
591    %q = memref.alloc(%arg0) : memref<?xf32>
592    scf.yield %q, %b, %arg2: memref<?xf32>, memref<?xf32>, memref<?xf32>
593  }
594  return
595}
596
597// CHECK-LABEL: func @while_three_arg
598//       CHECK: [[ALLOC:%.+]] = memref.alloc(
599//       CHECK: [[V0:%.+]]:6 = scf.while ({{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false
600//       CHECK:   scf.condition
601//       CHECK: ^bb0([[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: memref<?xf32>, [[ARG4:%.+]]: i1, [[ARG5:%.+]]: i1, [[ARG6:%.+]]: i1):
602//       CHECK:   [[ALLOC1:%.+]] = memref.alloc(
603//       CHECK:   [[ALLOC2:%.+]] = memref.alloc(
604//       CHECK:   [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG1]]
605//       CHECK:   [[BASE2:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG3]]
606//       CHECK:   [[OWN:%.+]] = bufferization.dealloc ([[BASE0]], [[BASE2]] :{{.*}}) if ([[ARG4]], [[ARG6]]) retain ([[ARG2]] :
607//       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[ARG5]]
608//       CHECK:   scf.yield [[ALLOC2]], [[ALLOC1]], [[ARG2]], %true{{[0-9_]*}}, %true{{[0-9_]*}}, [[OWN_AGG]] :
609//       CHECK: }
610//       CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
611//       CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#1
612//       CHECK: [[BASE2:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#2
613//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE0]], [[BASE1]], [[BASE2]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#3, [[V0]]#4, [[V0]]#5)
614
615// TODO: better alias analysis could simplify the dealloc inside the body further
616
617// -----
618
619// Memref allocated in `then` region and passed back to the parent if op.
620#set = affine_set<() : (0 >= 0)>
621func.func @test_affine_if_1(%arg0: memref<10xf32>) -> memref<10xf32> {
622  %0 = affine.if #set() -> memref<10xf32> {
623    %alloc = memref.alloc() : memref<10xf32>
624    affine.yield %alloc : memref<10xf32>
625  } else {
626    affine.yield %arg0 : memref<10xf32>
627  }
628  return %0 : memref<10xf32>
629}
630
631// CHECK-LABEL: func @test_affine_if_1
632//  CHECK-SAME: ([[ARG0:%.*]]: memref<10xf32>)
633//       CHECK: [[V0:%.+]]:2 = affine.if
634//       CHECK:   [[ALLOC:%.+]] = memref.alloc()
635//       CHECK:   affine.yield [[ALLOC]], %true
636//       CHECK:   affine.yield [[ARG0]], %false
637//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
638//       CHECK:   scf.yield [[V0]]#0
639//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
640//       CHECK:   scf.yield [[CLONE]]
641//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
642//       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) retain ([[V1]] :
643//       CHECK: return [[V1]]
644
645// TODO: the dealloc could be optimized away since the memref to be deallocated
646//       either aliases with V1 or the condition is false
647
648// -----
649
650// Memref allocated before parent IfOp and used in `then` region.
651// Expected result: deallocation should happen after affine.if op.
652#set = affine_set<() : (0 >= 0)>
653func.func @test_affine_if_2() -> memref<10xf32> {
654  %alloc0 = memref.alloc() : memref<10xf32>
655  %0 = affine.if #set() -> memref<10xf32> {
656    affine.yield %alloc0 : memref<10xf32>
657  } else {
658    %alloc = memref.alloc() : memref<10xf32>
659    affine.yield %alloc : memref<10xf32>
660  }
661  return %0 : memref<10xf32>
662}
663// CHECK-LABEL: func @test_affine_if_2
664//       CHECK: [[ALLOC:%.+]] = memref.alloc()
665//       CHECK: [[V0:%.+]]:2 = affine.if
666//       CHECK:   affine.yield [[ALLOC]], %false
667//       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
668//       CHECK:   affine.yield [[ALLOC1]], %true
669//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
670//       CHECK:   scf.yield [[V0]]#0
671//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
672//       CHECK:   scf.yield [[CLONE]]
673//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
674//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
675//       CHECK: return [[V1]]
676
677// -----
678
679// Memref allocated before parent IfOp and used in `else` region.
680// Expected result: deallocation should happen after affine.if op.
681#set = affine_set<() : (0 >= 0)>
682func.func @test_affine_if_3() -> memref<10xf32> {
683  %alloc0 = memref.alloc() : memref<10xf32>
684  %0 = affine.if #set() -> memref<10xf32> {
685    %alloc = memref.alloc() : memref<10xf32>
686    affine.yield %alloc : memref<10xf32>
687  } else {
688    affine.yield %alloc0 : memref<10xf32>
689  }
690  return %0 : memref<10xf32>
691}
692
693// CHECK-LABEL: func @test_affine_if_3
694//       CHECK: [[ALLOC:%.+]] = memref.alloc()
695//       CHECK: [[V0:%.+]]:2 = affine.if
696//       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
697//       CHECK:   affine.yield [[ALLOC1]], %true
698//       CHECK:   affine.yield [[ALLOC]], %false
699//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
700//       CHECK:   scf.yield [[V0]]#0
701//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
702//       CHECK:   scf.yield [[CLONE]]
703//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
704//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]]
705//       CHECK: return [[V1]]
706