1// RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation \ 2// RUN: --buffer-deallocation-simplification -split-input-file %s | FileCheck %s 3// RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null 4 5// RUN: mlir-opt %s -buffer-deallocation-pipeline --split-input-file --verify-diagnostics > /dev/null 6 7// Test Case: Nested regions - This test defines a BufferBasedOp inside the 8// region of a RegionBufferBasedOp. 9// BufferDeallocation expected behavior: The AllocOp for the BufferBasedOp 10// should remain inside the region of the RegionBufferBasedOp and it should insert 11// the missing DeallocOp in the same region. The missing DeallocOp should be 12// inserted after CopyOp. 13 14func.func @nested_regions_and_cond_branch( 15 %arg0: i1, 16 %arg1: memref<2xf32>, 17 %arg2: memref<2xf32>) { 18 cf.cond_br %arg0, ^bb1, ^bb2 19^bb1: 20 cf.br ^bb3(%arg1 : memref<2xf32>) 21^bb2: 22 %0 = memref.alloc() : memref<2xf32> 23 test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { 24 ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): 25 %1 = memref.alloc() : memref<2xf32> 26 test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) 27 %tmp1 = math.exp %gen1_arg0 : f32 28 test.region_yield %tmp1 : f32 29 } 30 cf.br ^bb3(%0 : memref<2xf32>) 31^bb3(%1: memref<2xf32>): 32 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 33 return 34} 35 36// CHECK-LABEL: func @nested_regions_and_cond_branch 37// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>) 38// CHECK: ^bb1: 39// CHECK-NOT: bufferization.clone 40// CHECK-NOT: bufferization.dealloc 41// CHECK: cf.br ^bb3([[ARG1]], %false 42// CHECK: ^bb2: 43// CHECK: [[ALLOC0:%.+]] = memref.alloc() 44// CHECK: test.region_buffer_based 45// CHECK: [[ALLOC1:%.+]] = memref.alloc() 46// CHECK: test.buffer_based 47// CHECK: bufferization.dealloc ([[ALLOC1]] : memref<2xf32>) if (%true 48// CHECK-NEXT: test.region_yield 49// CHECK-NOT: bufferization.clone 50// CHECK-NOT: bufferization.dealloc 51// CHECK: cf.br ^bb3([[ALLOC0]], %true 52// CHECK: ^bb3([[A0:%.+]]: memref<2xf32>, [[COND0:%.+]]: i1): 53// CHECK: test.copy 54// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]] 55// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND0]]) 56// CHECK: return 57 58// ----- 59 60// Test Case: nested region control flow 61// The alloc %1 flows through both if branches until it is finally returned. 62// Hence, it does not require a specific dealloc operation. However, %3 63// requires a dealloc. 64 65func.func @nested_region_control_flow( 66 %arg0 : index, 67 %arg1 : index) -> memref<?x?xf32> { 68 %0 = arith.cmpi eq, %arg0, %arg1 : index 69 %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32> 70 %2 = scf.if %0 -> (memref<?x?xf32>) { 71 scf.yield %1 : memref<?x?xf32> 72 } else { 73 %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> 74 "test.read_buffer"(%3) : (memref<?x?xf32>) -> () 75 scf.yield %1 : memref<?x?xf32> 76 } 77 return %2 : memref<?x?xf32> 78} 79 80// CHECK-LABEL: func @nested_region_control_flow 81// CHECK: [[ALLOC:%.+]] = memref.alloc( 82// CHECK: [[V0:%.+]]:2 = scf.if 83// CHECK: scf.yield [[ALLOC]], %false 84// CHECK: [[ALLOC1:%.+]] = memref.alloc( 85// CHECK: bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}}) 86// CHECK-NOT: retain 87// CHECK: scf.yield [[ALLOC]], %false 88// CHECK: [[V1:%.+]] = scf.if [[V0]]#1 89// CHECK: scf.yield [[V0]]#0 90// CHECK: [[CLONE:%.+]] = bufferization.clone [[V0]]#0 91// CHECK: scf.yield [[CLONE]] 92// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 93// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] : {{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] : 94// CHECK: return [[V1]] 95 96// ----- 97 98// Test Case: nested region control flow with a nested buffer allocation in a 99// divergent branch. 100// Buffer deallocation places a copy for both %1 and %3, since they are 101// returned in the end. 102 103func.func @nested_region_control_flow_div( 104 %arg0 : index, 105 %arg1 : index) -> memref<?x?xf32> { 106 %0 = arith.cmpi eq, %arg0, %arg1 : index 107 %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32> 108 %2 = scf.if %0 -> (memref<?x?xf32>) { 109 scf.yield %1 : memref<?x?xf32> 110 } else { 111 %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> 112 scf.yield %3 : memref<?x?xf32> 113 } 114 return %2 : memref<?x?xf32> 115} 116 117// CHECK-LABEL: func @nested_region_control_flow_div 118// CHECK: [[ALLOC:%.+]] = memref.alloc( 119// CHECK: [[V0:%.+]]:2 = scf.if 120// CHECK: scf.yield [[ALLOC]], %false 121// CHECK: [[ALLOC1:%.+]] = memref.alloc( 122// CHECK: scf.yield [[ALLOC1]], %true 123// CHECK: [[V1:%.+]] = scf.if [[V0]]#1 124// CHECK: scf.yield [[V0]]#0 125// CHECK: [[CLONE:%.+]] = bufferization.clone [[V0]]#0 126// CHECK: scf.yield [[CLONE]] 127// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 128// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] : 129// CHECK: return [[V1]] 130 131// ----- 132 133// Test Case: nested region control flow within a region interface. 134// No copies are required in this case since the allocation finally escapes 135// the method. 136 137func.func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> { 138 %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32> 139 %1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then { 140 ^bb0(%arg1 : memref<?x?xf32>): 141 test.region_if_yield %arg1 : memref<?x?xf32> 142 } else { 143 ^bb0(%arg1 : memref<?x?xf32>): 144 test.region_if_yield %arg1 : memref<?x?xf32> 145 } join { 146 ^bb0(%arg1 : memref<?x?xf32>): 147 test.region_if_yield %arg1 : memref<?x?xf32> 148 } 149 return %1 : memref<?x?xf32> 150} 151 152// CHECK-LABEL: func.func @inner_region_control_flow 153// CHECK: [[ALLOC:%.+]] = memref.alloc( 154// CHECK: [[V0:%.+]]:2 = test.region_if [[ALLOC]], %false 155// CHECK: ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1): 156// CHECK: test.region_if_yield [[ARG1]], [[ARG2]] 157// CHECK: ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1): 158// CHECK: test.region_if_yield [[ARG1]], [[ARG2]] 159// CHECK: ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1): 160// CHECK: test.region_if_yield [[ARG1]], [[ARG2]] 161// CHECK: [[V1:%.+]] = scf.if [[V0]]#1 162// CHECK: scf.yield [[V0]]#0 163// CHECK: [[CLONE:%.+]] = bufferization.clone [[V0]]#0 164// CHECK: scf.yield [[CLONE]] 165// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 166// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] : 167// CHECK: return [[V1]] 168 169// ----- 170 171func.func @nestedRegionsAndCondBranchAlloca( 172 %arg0: i1, 173 %arg1: memref<2xf32>, 174 %arg2: memref<2xf32>) { 175 cf.cond_br %arg0, ^bb1, ^bb2 176^bb1: 177 cf.br ^bb3(%arg1 : memref<2xf32>) 178^bb2: 179 %0 = memref.alloc() : memref<2xf32> 180 test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) { 181 ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): 182 %1 = memref.alloca() : memref<2xf32> 183 test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>) 184 %tmp1 = math.exp %gen1_arg0 : f32 185 test.region_yield %tmp1 : f32 186 } 187 cf.br ^bb3(%0 : memref<2xf32>) 188^bb3(%1: memref<2xf32>): 189 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 190 return 191} 192 193// CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca 194// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>) 195// CHECK: ^bb1: 196// CHECK: cf.br ^bb3([[ARG1]], %false 197// CHECK: ^bb2: 198// CHECK: [[ALLOC:%.+]] = memref.alloc() 199// CHECK: test.region_buffer_based 200// CHECK: memref.alloca() 201// CHECK: test.buffer_based 202// CHECK-NOT: bufferization.dealloc 203// CHECK-NOT: bufferization.clone 204// CHECK: test.region_yield 205// CHECK: } 206// CHECK: cf.br ^bb3([[ALLOC]], %true 207// CHECK: ^bb3([[A0:%.+]]: memref<2xf32>, [[COND:%.+]]: i1): 208// CHECK: test.copy 209// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[A0]] 210// CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[COND]]) 211 212// ----- 213 214func.func @nestedRegionControlFlowAlloca( 215 %arg0 : index, %arg1 : index, %arg2: f32) -> memref<?x?xf32> { 216 %0 = arith.cmpi eq, %arg0, %arg1 : index 217 %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32> 218 %2 = scf.if %0 -> (memref<?x?xf32>) { 219 scf.yield %1 : memref<?x?xf32> 220 } else { 221 %3 = memref.alloca(%arg0, %arg1) : memref<?x?xf32> 222 %c0 = arith.constant 0 : index 223 memref.store %arg2, %3[%c0, %c0] : memref<?x?xf32> 224 scf.yield %1 : memref<?x?xf32> 225 } 226 return %2 : memref<?x?xf32> 227} 228 229// CHECK-LABEL: func @nestedRegionControlFlowAlloca 230// CHECK: [[ALLOC:%.+]] = memref.alloc( 231// CHECK: [[V0:%.+]]:2 = scf.if 232// CHECK: scf.yield [[ALLOC]], %false 233// CHECK: memref.alloca( 234// CHECK: scf.yield [[ALLOC]], %false 235// CHECK: [[V1:%.+]] = scf.if [[V0]]#1 236// CHECK: scf.yield [[V0]]#0 237// CHECK: [[CLONE:%.+]] = bufferization.clone [[V0]]#0 238// CHECK: scf.yield [[CLONE]] 239// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 240// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] : 241// CHECK: return [[V1]] 242 243// ----- 244 245// Test Case: structured control-flow loop using a nested alloc. 246// The iteration argument %iterBuf has to be freed before yielding %3 to avoid 247// memory leaks. 248 249func.func @loop_alloc( 250 %lb: index, 251 %ub: index, 252 %step: index, 253 %buf: memref<2xf32>, 254 %res: memref<2xf32>) { 255 %0 = memref.alloc() : memref<2xf32> 256 "test.read_buffer"(%0) : (memref<2xf32>) -> () 257 %1 = scf.for %i = %lb to %ub step %step 258 iter_args(%iterBuf = %buf) -> memref<2xf32> { 259 %2 = arith.cmpi eq, %i, %ub : index 260 %3 = memref.alloc() : memref<2xf32> 261 scf.yield %3 : memref<2xf32> 262 } 263 test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) 264 return 265} 266 267// CHECK-LABEL: func @loop_alloc 268// CHECK-SAME: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: index, [[ARG2:%.+]]: index, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>) 269// CHECK: [[ALLOC:%.+]] = memref.alloc() 270// CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false 271// CHECK: [[ALLOC1:%.+]] = memref.alloc() 272// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]] 273// CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]]) 274// CHECK-NOT: retain 275// CHECK: scf.yield [[ALLOC1]], %true 276// CHECK: test.copy 277// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 278// CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true 279// CHECK-NOT: retain 280// CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) 281// CHECK-NOT: retain 282 283// ----- 284 285// Test Case: structured control-flow loop with a nested if operation. 286// The loop yields buffers that have been defined outside of the loop and the 287// backedges only use the iteration arguments (or one of its aliases). 288// Therefore, we do not have to (and are not allowed to) free any buffers 289// that are passed via the backedges. 290 291func.func @loop_nested_if_no_alloc( 292 %lb: index, 293 %ub: index, 294 %step: index, 295 %buf: memref<2xf32>, 296 %res: memref<2xf32>) { 297 %0 = memref.alloc() : memref<2xf32> 298 %1 = scf.for %i = %lb to %ub step %step 299 iter_args(%iterBuf = %buf) -> memref<2xf32> { 300 %2 = arith.cmpi eq, %i, %ub : index 301 %3 = scf.if %2 -> (memref<2xf32>) { 302 scf.yield %0 : memref<2xf32> 303 } else { 304 scf.yield %iterBuf : memref<2xf32> 305 } 306 scf.yield %3 : memref<2xf32> 307 } 308 test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) 309 return 310} 311 312// CHECK-LABEL: func @loop_nested_if_no_alloc 313// CHECK-SAME: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>) 314// CHECK: [[ALLOC:%.+]] = memref.alloc() 315// CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false 316// CHECK: [[V1:%.+]]:2 = scf.if 317// CHECK: scf.yield [[ALLOC]], %false 318// CHECK: scf.yield [[ARG6]], %false 319// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]] 320// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]]) retain ([[V1]]#0 : 321// CHECK: [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1 322// CHECK: scf.yield [[V1]]#0, [[OWN_AGG]] 323// CHECK: test.copy 324// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 325// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) 326 327// TODO: we know statically that the inner dealloc will never deallocate 328// anything, i.e., we can optimize it away 329 330// ----- 331 332// Test Case: structured control-flow loop with a nested if operation using 333// a deeply nested buffer allocation. 334 335func.func @loop_nested_if_alloc( 336 %lb: index, 337 %ub: index, 338 %step: index, 339 %buf: memref<2xf32>) -> memref<2xf32> { 340 %0 = memref.alloc() : memref<2xf32> 341 %1 = scf.for %i = %lb to %ub step %step 342 iter_args(%iterBuf = %buf) -> memref<2xf32> { 343 %2 = arith.cmpi eq, %i, %ub : index 344 %3 = scf.if %2 -> (memref<2xf32>) { 345 %4 = memref.alloc() : memref<2xf32> 346 scf.yield %4 : memref<2xf32> 347 } else { 348 scf.yield %0 : memref<2xf32> 349 } 350 scf.yield %3 : memref<2xf32> 351 } 352 return %1 : memref<2xf32> 353} 354 355// CHECK-LABEL: func @loop_nested_if_alloc 356// CHECK-SAME: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>) 357// CHECK: [[ALLOC:%.+]] = memref.alloc() 358// CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG5:%.+]] = [[ARG3]], [[ARG6:%.+]] = %false 359// CHECK: [[V1:%.+]]:2 = scf.if 360// CHECK: [[ALLOC1:%.+]] = memref.alloc() 361// CHECK: scf.yield [[ALLOC1]], %true 362// CHECK: scf.yield [[ALLOC]], %false 363// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG5]] 364// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG6]]) retain ([[V1]]#0 : 365// CHECK: [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1 366// CHECK: scf.yield [[V1]]#0, [[OWN_AGG]] 367// CHECK: } 368// CHECK: [[V2:%.+]] = scf.if [[V0]]#1 369// CHECK: scf.yield [[V0]]#0 370// CHECK: [[CLONE:%.+]] = bufferization.clone [[V0]]#0 371// CHECK: scf.yield [[CLONE]] 372// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 373// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V2]] : 374// CHECK: return [[V2]] 375 376// ----- 377 378// Test Case: several nested structured control-flow loops with a deeply nested 379// buffer allocation inside an if operation. 380 381func.func @loop_nested_alloc( 382 %lb: index, 383 %ub: index, 384 %step: index, 385 %buf: memref<2xf32>, 386 %res: memref<2xf32>) { 387 %0 = memref.alloc() : memref<2xf32> 388 "test.read_buffer"(%0) : (memref<2xf32>) -> () 389 %1 = scf.for %i = %lb to %ub step %step 390 iter_args(%iterBuf = %buf) -> memref<2xf32> { 391 %2 = scf.for %i2 = %lb to %ub step %step 392 iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> { 393 %3 = scf.for %i3 = %lb to %ub step %step 394 iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> { 395 %4 = memref.alloc() : memref<2xf32> 396 "test.read_buffer"(%4) : (memref<2xf32>) -> () 397 %5 = arith.cmpi eq, %i, %ub : index 398 %6 = scf.if %5 -> (memref<2xf32>) { 399 %7 = memref.alloc() : memref<2xf32> 400 scf.yield %7 : memref<2xf32> 401 } else { 402 scf.yield %iterBuf3 : memref<2xf32> 403 } 404 scf.yield %6 : memref<2xf32> 405 } 406 scf.yield %3 : memref<2xf32> 407 } 408 scf.yield %2 : memref<2xf32> 409 } 410 test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>) 411 return 412} 413 414// CHECK-LABEL: func @loop_nested_alloc 415// CHECK: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>, {{.*}}: memref<2xf32>) 416// CHECK: [[ALLOC:%.+]] = memref.alloc() 417// CHECK: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG6:%.+]] = [[ARG3]], [[ARG7:%.+]] = %false 418// CHECK: [[V1:%.+]]:2 = scf.for {{.*}} iter_args([[ARG9:%.+]] = [[ARG6]], [[ARG10:%.+]] = %false 419// CHECK: [[V2:%.+]]:2 = scf.for {{.*}} iter_args([[ARG12:%.+]] = [[ARG9]], [[ARG13:%.+]] = %false 420// CHECK: [[ALLOC1:%.+]] = memref.alloc() 421// CHECK: [[V3:%.+]]:2 = scf.if 422// CHECK: [[ALLOC2:%.+]] = memref.alloc() 423// CHECK: scf.yield [[ALLOC2]], %true 424// CHECK: } else { 425// CHECK: scf.yield [[ARG12]], %false 426// CHECK: } 427// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG12]] 428// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG13]]) retain ([[V3]]#0 : 429// CHECK: bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}}) 430// CHECK-NOT: retain 431// CHECK: [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V3]]#1 432// CHECK: scf.yield [[V3]]#0, [[OWN_AGG]] 433// CHECK: } 434// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG9]] 435// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG10]]) retain ([[V2]]#0 : 436// CHECK: [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V2]]#1 437// CHECK: scf.yield [[V2]]#0, [[OWN_AGG]] 438// CHECK: } 439// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG6]] 440// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG7]]) retain ([[V1]]#0 : 441// CHECK: [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1 442// CHECK: scf.yield [[V1]]#0, [[OWN_AGG]] 443// CHECK: } 444// CHECK: test.copy 445// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 446// CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true 447// CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) 448 449// TODO: all the retain operands could be removed by doing some more thorough analysis 450 451// ----- 452 453func.func @affine_loop() -> f32 { 454 %buffer = memref.alloc() : memref<1024xf32> 455 %sum_init_0 = arith.constant 0.0 : f32 456 %res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 { 457 %t = affine.load %buffer[%i] : memref<1024xf32> 458 %sum_next = arith.addf %sum_iter, %t : f32 459 affine.yield %sum_next : f32 460 } 461 return %res : f32 462} 463 464// CHECK-LABEL: func @affine_loop 465// CHECK: [[ALLOC:%.+]] = memref.alloc() 466// CHECK: affine.for {{.*}} iter_args(%arg1 = %cst) 467// CHECK: affine.yield 468// CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true 469 470// ----- 471 472func.func @assumingOp( 473 %arg0: !shape.witness, 474 %arg2: memref<2xf32>, 475 %arg3: memref<2xf32>) { 476 // Confirm the alloc will be dealloc'ed in the block. 477 %1 = shape.assuming %arg0 -> memref<2xf32> { 478 %0 = memref.alloc() : memref<2xf32> 479 "test.read_buffer"(%0) : (memref<2xf32>) -> () 480 shape.assuming_yield %arg2 : memref<2xf32> 481 } 482 // Confirm the alloc will be returned and dealloc'ed after its use. 483 %3 = shape.assuming %arg0 -> memref<2xf32> { 484 %2 = memref.alloc() : memref<2xf32> 485 shape.assuming_yield %2 : memref<2xf32> 486 } 487 test.copy(%3, %arg3) : (memref<2xf32>, memref<2xf32>) 488 return 489} 490 491// CHECK-LABEL: func @assumingOp 492// CHECK: ({{.*}}, [[ARG1:%.+]]: memref<2xf32>, {{.*}}: memref<2xf32>) 493// CHECK: [[V0:%.+]]:2 = shape.assuming 494// CHECK: [[ALLOC:%.+]] = memref.alloc() 495// CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true{{[0-9_]*}}) 496// CHECK-NOT: retain 497// CHECK: shape.assuming_yield [[ARG1]], %false 498// CHECK: } 499// CHECK: [[V1:%.+]]:2 = shape.assuming 500// CHECK: [[ALLOC:%.+]] = memref.alloc() 501// CHECK: shape.assuming_yield [[ALLOC]], %true 502// CHECK: } 503// CHECK: test.copy 504// CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 505// CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V1]]#0 506// CHECK: bufferization.dealloc ([[BASE1]] :{{.*}}) if ([[V1]]#1) 507// CHECK-NOT: retain 508// CHECK: bufferization.dealloc ([[BASE0]] :{{.*}}) if ([[V0]]#1) 509// CHECK-NOT: retain 510// CHECK: return 511 512// ----- 513 514// Test Case: The op "test.one_region_with_recursive_memory_effects" does not 515// implement the RegionBranchOpInterface. This is allowed during buffer 516// deallocation because the operation's region does not deal with any MemRef 517// values. 518 519func.func @noRegionBranchOpInterface() { 520 %0 = "test.one_region_with_recursive_memory_effects"() ({ 521 %1 = "test.one_region_with_recursive_memory_effects"() ({ 522 %2 = memref.alloc() : memref<2xi32> 523 "test.read_buffer"(%2) : (memref<2xi32>) -> () 524 "test.return"() : () -> () 525 }) : () -> (i32) 526 "test.return"() : () -> () 527 }) : () -> (i32) 528 "test.return"() : () -> () 529} 530 531// ----- 532 533// Test Case: The second op "test.one_region_with_recursive_memory_effects" does 534// not implement the RegionBranchOpInterface but has buffer semantics. This is 535// not allowed during buffer deallocation. 536 537func.func @noRegionBranchOpInterface() { 538 %0 = "test.one_region_with_recursive_memory_effects"() ({ 539 // expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}} 540 %1 = "test.one_region_with_recursive_memory_effects"() ({ 541 %2 = memref.alloc() : memref<2xi32> 542 "test.read_buffer"(%2) : (memref<2xi32>) -> () 543 "test.return"(%2) : (memref<2xi32>) -> () 544 }) : () -> (memref<2xi32>) 545 "test.return"() : () -> () 546 }) : () -> (i32) 547 "test.return"() : () -> () 548} 549 550// ----- 551 552func.func @while_two_arg(%arg0: index) { 553 %a = memref.alloc(%arg0) : memref<?xf32> 554 scf.while (%arg1 = %a, %arg2 = %a) : (memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>) { 555 // This op has a side effect, but it's not an allocate/free side effect. 556 %0 = "test.side_effect_op"() {effects = [{effect="read"}]} : () -> i1 557 scf.condition(%0) %arg1, %arg2 : memref<?xf32>, memref<?xf32> 558 } do { 559 ^bb0(%arg1: memref<?xf32>, %arg2: memref<?xf32>): 560 %b = memref.alloc(%arg0) : memref<?xf32> 561 scf.yield %arg1, %b : memref<?xf32>, memref<?xf32> 562 } 563 return 564} 565 566// CHECK-LABEL: func @while_two_arg 567// CHECK: [[ALLOC:%.+]] = memref.alloc( 568// CHECK: [[V0:%.+]]:4 = scf.while ({{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false{{[0-9_]*}}) 569// CHECK: scf.condition 570// CHECK: ^bb0([[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: i1, [[ARG4:%.+]]: i1): 571// CHECK: [[ALLOC1:%.+]] = memref.alloc( 572// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG2]] 573// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG4]]) retain ([[ARG1]] : 574// CHECK: [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[ARG3]] 575// CHECK: scf.yield [[ARG1]], [[ALLOC1]], [[OWN_AGG]], %true 576// CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 577// CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#1 578// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE0]], [[BASE1]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#2, [[V0]]#3) 579 580// ----- 581 582func.func @while_three_arg(%arg0: index) { 583 %a = memref.alloc(%arg0) : memref<?xf32> 584 scf.while (%arg1 = %a, %arg2 = %a, %arg3 = %a) : (memref<?xf32>, memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>, memref<?xf32>) { 585 // This op has a side effect, but it's not an allocate/free side effect. 586 %0 = "test.side_effect_op"() {effects = [{effect="read"}]} : () -> i1 587 scf.condition(%0) %arg1, %arg2, %arg3 : memref<?xf32>, memref<?xf32>, memref<?xf32> 588 } do { 589 ^bb0(%arg1: memref<?xf32>, %arg2: memref<?xf32>, %arg3: memref<?xf32>): 590 %b = memref.alloc(%arg0) : memref<?xf32> 591 %q = memref.alloc(%arg0) : memref<?xf32> 592 scf.yield %q, %b, %arg2: memref<?xf32>, memref<?xf32>, memref<?xf32> 593 } 594 return 595} 596 597// CHECK-LABEL: func @while_three_arg 598// CHECK: [[ALLOC:%.+]] = memref.alloc( 599// CHECK: [[V0:%.+]]:6 = scf.while ({{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = [[ALLOC]], {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false{{[0-9_]*}}, {{.*}} = %false 600// CHECK: scf.condition 601// CHECK: ^bb0([[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: memref<?xf32>, [[ARG4:%.+]]: i1, [[ARG5:%.+]]: i1, [[ARG6:%.+]]: i1): 602// CHECK: [[ALLOC1:%.+]] = memref.alloc( 603// CHECK: [[ALLOC2:%.+]] = memref.alloc( 604// CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG1]] 605// CHECK: [[BASE2:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG3]] 606// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[BASE0]], [[BASE2]] :{{.*}}) if ([[ARG4]], [[ARG6]]) retain ([[ARG2]] : 607// CHECK: [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[ARG5]] 608// CHECK: scf.yield [[ALLOC2]], [[ALLOC1]], [[ARG2]], %true{{[0-9_]*}}, %true{{[0-9_]*}}, [[OWN_AGG]] : 609// CHECK: } 610// CHECK: [[BASE0:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 611// CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#1 612// CHECK: [[BASE2:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#2 613// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE0]], [[BASE1]], [[BASE2]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#3, [[V0]]#4, [[V0]]#5) 614 615// TODO: better alias analysis could simplify the dealloc inside the body further 616 617// ----- 618 619// Memref allocated in `then` region and passed back to the parent if op. 620#set = affine_set<() : (0 >= 0)> 621func.func @test_affine_if_1(%arg0: memref<10xf32>) -> memref<10xf32> { 622 %0 = affine.if #set() -> memref<10xf32> { 623 %alloc = memref.alloc() : memref<10xf32> 624 affine.yield %alloc : memref<10xf32> 625 } else { 626 affine.yield %arg0 : memref<10xf32> 627 } 628 return %0 : memref<10xf32> 629} 630 631// CHECK-LABEL: func @test_affine_if_1 632// CHECK-SAME: ([[ARG0:%.*]]: memref<10xf32>) 633// CHECK: [[V0:%.+]]:2 = affine.if 634// CHECK: [[ALLOC:%.+]] = memref.alloc() 635// CHECK: affine.yield [[ALLOC]], %true 636// CHECK: affine.yield [[ARG0]], %false 637// CHECK: [[V1:%.+]] = scf.if [[V0]]#1 638// CHECK: scf.yield [[V0]]#0 639// CHECK: [[CLONE:%.+]] = bufferization.clone [[V0]]#0 640// CHECK: scf.yield [[CLONE]] 641// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 642// CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) retain ([[V1]] : 643// CHECK: return [[V1]] 644 645// TODO: the dealloc could be optimized away since the memref to be deallocated 646// either aliases with V1 or the condition is false 647 648// ----- 649 650// Memref allocated before parent IfOp and used in `then` region. 651// Expected result: deallocation should happen after affine.if op. 652#set = affine_set<() : (0 >= 0)> 653func.func @test_affine_if_2() -> memref<10xf32> { 654 %alloc0 = memref.alloc() : memref<10xf32> 655 %0 = affine.if #set() -> memref<10xf32> { 656 affine.yield %alloc0 : memref<10xf32> 657 } else { 658 %alloc = memref.alloc() : memref<10xf32> 659 affine.yield %alloc : memref<10xf32> 660 } 661 return %0 : memref<10xf32> 662} 663// CHECK-LABEL: func @test_affine_if_2 664// CHECK: [[ALLOC:%.+]] = memref.alloc() 665// CHECK: [[V0:%.+]]:2 = affine.if 666// CHECK: affine.yield [[ALLOC]], %false 667// CHECK: [[ALLOC1:%.+]] = memref.alloc() 668// CHECK: affine.yield [[ALLOC1]], %true 669// CHECK: [[V1:%.+]] = scf.if [[V0]]#1 670// CHECK: scf.yield [[V0]]#0 671// CHECK: [[CLONE:%.+]] = bufferization.clone [[V0]]#0 672// CHECK: scf.yield [[CLONE]] 673// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 674// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] : 675// CHECK: return [[V1]] 676 677// ----- 678 679// Memref allocated before parent IfOp and used in `else` region. 680// Expected result: deallocation should happen after affine.if op. 681#set = affine_set<() : (0 >= 0)> 682func.func @test_affine_if_3() -> memref<10xf32> { 683 %alloc0 = memref.alloc() : memref<10xf32> 684 %0 = affine.if #set() -> memref<10xf32> { 685 %alloc = memref.alloc() : memref<10xf32> 686 affine.yield %alloc : memref<10xf32> 687 } else { 688 affine.yield %alloc0 : memref<10xf32> 689 } 690 return %0 : memref<10xf32> 691} 692 693// CHECK-LABEL: func @test_affine_if_3 694// CHECK: [[ALLOC:%.+]] = memref.alloc() 695// CHECK: [[V0:%.+]]:2 = affine.if 696// CHECK: [[ALLOC1:%.+]] = memref.alloc() 697// CHECK: affine.yield [[ALLOC1]], %true 698// CHECK: affine.yield [[ALLOC]], %false 699// CHECK: [[V1:%.+]] = scf.if [[V0]]#1 700// CHECK: scf.yield [[V0]]#0 701// CHECK: [[CLONE:%.+]] = bufferization.clone [[V0]]#0 702// CHECK: scf.yield [[CLONE]] 703// CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0 704// CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] 705// CHECK: return [[V1]] 706