1// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation \ 2// RUN: -buffer-deallocation-simplification -split-input-file %s | FileCheck %s 3// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null 4 5// RUN: mlir-opt %s -buffer-deallocation-pipeline --split-input-file > /dev/null 6 7// Test Case: 8// bb0 9// / \ 10// bb1 bb2 <- Initial position of AllocOp 11// \ / 12// bb3 13// BufferDeallocation expected behavior: bb2 contains an AllocOp which is 14// passed to bb3. In the latter block, there should be a deallocation. 15// Since bb1 does not contain an adequate alloc, the deallocation has to be 16// made conditional on the branch taken in bb0. 17 18func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 19 cf.cond_br %arg0, ^bb2(%arg1 : memref<2xf32>), ^bb1 20^bb1: 21 %0 = memref.alloc() : memref<2xf32> 22 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 23 cf.br ^bb2(%0 : memref<2xf32>) 24^bb2(%1: memref<2xf32>): 25 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 26 return 27} 28 29// CHECK-LABEL: func @condBranch 30// CHECK-SAME: ([[ARG0:%.+]]: i1, 31// CHECK-SAME: [[ARG1:%.+]]: memref<2xf32>, 32// CHECK-SAME: [[ARG2:%.+]]: memref<2xf32>) 33// CHECK-NOT: bufferization.dealloc 34// CHECK: cf.cond_br{{.*}}, ^bb2([[ARG1]], %false{{[0-9_]*}} :{{.*}}), ^bb1 35// CHECK: ^bb1: 36// CHECK: %[[ALLOC1:.*]] = memref.alloc 37// CHECK-NEXT: test.buffer_based 38// CHECK-NEXT: cf.br ^bb2(%[[ALLOC1]], %true 39// CHECK-NEXT: ^bb2([[ALLOC2:%.+]]: memref<2xf32>, [[COND1:%.+]]: i1): 40// CHECK: test.copy 41// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC2]] 42// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND1]]) 43// CHECK-NEXT: return 44 45// ----- 46 47// Test Case: 48// bb0 49// / \ 50// bb1 bb2 <- Initial position of AllocOp 51// \ / 52// bb3 53// BufferDeallocation expected behavior: The existing AllocOp has a dynamic 54// dependency to block argument %0 in bb2. Since the dynamic type is passed 55// to bb3 via the block argument %2, it is currently required to allocate a 56// temporary buffer for %2 that gets copies of %arg0 and %1 with their 57// appropriate shape dimensions. The copy buffer deallocation will be applied 58// to %2 in block bb3. 59 60func.func @condBranchDynamicType( 61 %arg0: i1, 62 %arg1: memref<?xf32>, 63 %arg2: memref<?xf32>, 64 %arg3: index) { 65 cf.cond_br %arg0, ^bb2(%arg1 : memref<?xf32>), ^bb1(%arg3: index) 66^bb1(%0: index): 67 %1 = memref.alloc(%0) : memref<?xf32> 68 test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>) 69 cf.br ^bb2(%1 : memref<?xf32>) 70^bb2(%2: memref<?xf32>): 71 test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>) 72 return 73} 74 75// CHECK-LABEL: func @condBranchDynamicType 76// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: index) 77// CHECK-NOT: bufferization.dealloc 78// CHECK: cf.cond_br{{.*}}^bb2(%arg1, %false{{[0-9_]*}} :{{.*}}), ^bb1 79// CHECK: ^bb1([[IDX:%.*]]:{{.*}}) 80// CHECK: [[ALLOC1:%.*]] = memref.alloc([[IDX]]) 81// CHECK-NEXT: test.buffer_based 82// CHECK-NEXT: cf.br ^bb2([[ALLOC1]], %true 83// CHECK-NEXT: ^bb2([[ALLOC3:%.*]]:{{.*}}, [[COND:%.+]]:{{.*}}) 84// CHECK: test.copy([[ALLOC3]], 85// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC3]] 86// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND]]) 87// CHECK-NEXT: return 88 89// ----- 90 91// Test case: See above. 92 93func.func @condBranchUnrankedType( 94 %arg0: i1, 95 %arg1: memref<*xf32>, 96 %arg2: memref<*xf32>, 97 %arg3: index) { 98 cf.cond_br %arg0, ^bb2(%arg1 : memref<*xf32>), ^bb1(%arg3: index) 99^bb1(%0: index): 100 %1 = memref.alloc(%0) : memref<?xf32> 101 %2 = memref.cast %1 : memref<?xf32> to memref<*xf32> 102 test.buffer_based in(%arg1: memref<*xf32>) out(%2: memref<*xf32>) 103 cf.br ^bb2(%2 : memref<*xf32>) 104^bb2(%3: memref<*xf32>): 105 test.copy(%3, %arg2) : (memref<*xf32>, memref<*xf32>) 106 return 107} 108 109// CHECK-LABEL: func @condBranchUnrankedType 110// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<*xf32>, [[ARG2:%.+]]: memref<*xf32>, [[ARG3:%.+]]: index) 111// CHECK-NOT: bufferization.dealloc 112// CHECK: cf.cond_br{{.*}}^bb2([[ARG1]], %false{{[0-9_]*}} :{{.*}}), ^bb1 113// CHECK: ^bb1([[IDX:%.*]]:{{.*}}) 114// CHECK: [[ALLOC1:%.*]] = memref.alloc([[IDX]]) 115// CHECK-NEXT: [[CAST:%.+]] = memref.cast [[ALLOC1]] 116// CHECK-NEXT: test.buffer_based 117// CHECK-NEXT: cf.br ^bb2([[CAST]], %true 118// CHECK-NEXT: ^bb2([[ALLOC3:%.*]]:{{.*}}, [[COND:%.+]]:{{.*}}) 119// CHECK: test.copy([[ALLOC3]], 120// CHECK-NEXT: [[CAST:%.+]] = memref.reinterpret_cast [[ALLOC3]] 121// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[CAST]] 122// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND]]) 123// CHECK-NEXT: return 124 125// TODO: we can get rid of first dealloc by doing some must-alias analysis 126 127// ----- 128 129// Test Case: 130// bb0 131// / \ 132// bb1 bb2 <- Initial position of AllocOp 133// | / \ 134// | bb3 bb4 135// | \ / 136// \ bb5 137// \ / 138// bb6 139// | 140// bb7 141// BufferDeallocation expected behavior: The existing AllocOp has a dynamic 142// dependency to block argument %0 in bb2. Since the dynamic type is passed to 143// bb5 via the block argument %2 and to bb6 via block argument %3, it is 144// currently required to pass along the condition under which the newly 145// allocated buffer should be deallocated, since the path via bb1 does not 146// allocate a buffer. 147 148func.func @condBranchDynamicTypeNested( 149 %arg0: i1, 150 %arg1: memref<?xf32>, 151 %arg2: memref<?xf32>, 152 %arg3: index) { 153 cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index) 154^bb1: 155 cf.br ^bb6(%arg1 : memref<?xf32>) 156^bb2(%0: index): 157 %1 = memref.alloc(%0) : memref<?xf32> 158 test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>) 159 cf.cond_br %arg0, ^bb3, ^bb4 160^bb3: 161 cf.br ^bb5(%1 : memref<?xf32>) 162^bb4: 163 cf.br ^bb5(%1 : memref<?xf32>) 164^bb5(%2: memref<?xf32>): 165 cf.br ^bb6(%2 : memref<?xf32>) 166^bb6(%3: memref<?xf32>): 167 cf.br ^bb7(%3 : memref<?xf32>) 168^bb7(%4: memref<?xf32>): 169 test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>) 170 return 171} 172 173// CHECK-LABEL: func @condBranchDynamicTypeNested 174// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<?xf32>, [[ARG2:%.+]]: memref<?xf32>, [[ARG3:%.+]]: index) 175// CHECK-NOT: bufferization.dealloc 176// CHECK-NOT: bufferization.clone 177// CHECK: cf.cond_br{{.*}} 178// CHECK-NEXT: ^bb1 179// CHECK-NOT: bufferization.dealloc 180// CHECK-NOT: bufferization.clone 181// CHECK: cf.br ^bb6([[ARG1]], %false{{[0-9_]*}} : 182// CHECK: ^bb2([[IDX:%.*]]:{{.*}}) 183// CHECK: [[ALLOC1:%.*]] = memref.alloc([[IDX]]) 184// CHECK-NEXT: test.buffer_based 185// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true 186// CHECK-NEXT: [[OWN:%.+]] = arith.select [[ARG0]], [[ARG0]], [[NOT_ARG0]] 187// CHECK-NOT: bufferization.dealloc 188// CHECK-NOT: bufferization.clone 189// CHECK: cf.cond_br{{.*}}, ^bb3, ^bb4 190// CHECK-NEXT: ^bb3: 191// CHECK-NOT: bufferization.dealloc 192// CHECK-NOT: bufferization.clone 193// CHECK: cf.br ^bb5([[ALLOC1]], [[OWN]] 194// CHECK-NEXT: ^bb4: 195// CHECK-NOT: bufferization.dealloc 196// CHECK-NOT: bufferization.clone 197// CHECK: cf.br ^bb5([[ALLOC1]], [[OWN]] 198// CHECK-NEXT: ^bb5([[ALLOC2:%.*]]:{{.*}}, [[COND1:%.+]]:{{.*}}) 199// CHECK-NOT: bufferization.dealloc 200// CHECK-NOT: bufferization.clone 201// CHECK: cf.br ^bb6([[ALLOC2]], [[COND1]] 202// CHECK-NEXT: ^bb6([[ALLOC4:%.*]]:{{.*}}, [[COND2:%.+]]:{{.*}}) 203// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC4]] 204// CHECK-NEXT: [[OWN:%.+]]:2 = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[COND2]]) retain ([[ALLOC4]], [[ARG2]] : 205// CHECK: cf.br ^bb7([[ALLOC4]], [[OWN]]#0 206// CHECK-NEXT: ^bb7([[ALLOC5:%.*]]:{{.*}}, [[COND3:%.+]]:{{.*}}) 207// CHECK: test.copy 208// CHECK: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC5]] 209// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND3]]) 210// CHECK-NEXT: return 211 212// TODO: the dealloc in bb5 can be optimized away by adding another 213// canonicalization pattern 214 215// ----- 216 217// Test Case: 218// bb0 219// / \ 220// | bb1 <- Initial position of AllocOp 221// \ / 222// bb2 223// BufferDeallocation expected behavior: It should insert a DeallocOp at the 224// exit block after CopyOp since %1 is an alias for %0 and %arg1. 225 226func.func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 227 cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) 228^bb1: 229 %0 = memref.alloc() : memref<2xf32> 230 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 231 cf.br ^bb2(%0 : memref<2xf32>) 232^bb2(%1: memref<2xf32>): 233 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 234 return 235} 236 237// CHECK-LABEL: func @criticalEdge 238// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>) 239// CHECK-NOT: bufferization.dealloc 240// CHECK-NOT: bufferization.clone 241// CHECK: cf.cond_br{{.*}}, ^bb1, ^bb2([[ARG1]], %false 242// CHECK: [[ALLOC1:%.*]] = memref.alloc() 243// CHECK-NEXT: test.buffer_based 244// CHECK-NEXT: cf.br ^bb2([[ALLOC1]], %true 245// CHECK-NEXT: ^bb2([[ALLOC2:%.+]]:{{.*}}, [[COND:%.+]]: {{.*}}) 246// CHECK: test.copy 247// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC2]] 248// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND]]) 249// CHECK-NEXT: return 250 251// ----- 252 253// Test Case: 254// bb0 <- Initial position of AllocOp 255// / \ 256// | bb1 257// \ / 258// bb2 259// BufferDeallocation expected behavior: It only inserts a DeallocOp at the 260// exit block after CopyOp since %1 is an alias for %0 and %arg1. 261 262func.func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 263 %0 = memref.alloc() : memref<2xf32> 264 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 265 cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>) 266^bb1: 267 cf.br ^bb2(%0 : memref<2xf32>) 268^bb2(%1: memref<2xf32>): 269 test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) 270 return 271} 272 273// CHECK-LABEL: func @invCriticalEdge 274// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>) 275// CHECK: [[ALLOC:%.+]] = memref.alloc() 276// CHECK-NEXT: test.buffer_based 277// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true 278// CHECK-NEXT: bufferization.dealloc ([[ALLOC]] : {{.*}}) if ([[NOT_ARG0]]) 279// CHECK-NEXT: cf.cond_br{{.*}}^bb1, ^bb2([[ARG1]], %false 280// CHECK-NEXT: ^bb1: 281// CHECK-NOT: bufferization.dealloc 282// CHECK-NOT: bufferization.clone 283// CHECK: cf.br ^bb2([[ALLOC]], [[ARG0]] 284// CHECK-NEXT: ^bb2([[ALLOC1:%.+]]:{{.*}}, [[COND:%.+]]:{{.*}}) 285// CHECK: test.copy 286// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[ALLOC1]] 287// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND]]) 288// CHECK-NEXT: return 289 290// ----- 291 292// Test Case: 293// bb0 <- Initial position of the first AllocOp 294// / \ 295// bb1 bb2 296// \ / 297// bb3 <- Initial position of the second AllocOp 298// BufferDeallocation expected behavior: It only inserts two missing 299// DeallocOps in the exit block. %5 is an alias for %0. Therefore, the 300// DeallocOp for %0 should occur after the last BufferBasedOp. The Dealloc for 301// %7 should happen after CopyOp. 302 303func.func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 304 %0 = memref.alloc() : memref<2xf32> 305 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 306 cf.cond_br %arg0, 307 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 308 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 309^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 310 cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) 311^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 312 cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) 313^bb3(%5: memref<2xf32>, %6: memref<2xf32>): 314 %7 = memref.alloc() : memref<2xf32> 315 test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>) 316 test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>) 317 return 318} 319 320// CHECK-LABEL: func @ifElse 321// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>) 322// CHECK: [[ALLOC0:%.+]] = memref.alloc() 323// CHECK-NEXT: test.buffer_based 324// CHECK-NOT: bufferization.dealloc 325// CHECK-NOT: bufferization.clone 326// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true 327// CHECK-NEXT: cf.cond_br {{.*}}^bb1([[ARG1]], [[ALLOC0]], %false{{[0-9_]*}}, [[ARG0]] : {{.*}}), ^bb2([[ALLOC0]], [[ARG1]], [[NOT_ARG0]], %false{{[0-9_]*}} : {{.*}}) 328// CHECK: ^bb3([[A0:%.+]]:{{.*}}, [[A1:%.+]]:{{.*}}, [[COND0:%.+]]: i1, [[COND1:%.+]]: i1): 329// CHECK: [[ALLOC1:%.+]] = memref.alloc() 330// CHECK-NEXT: test.buffer_based 331// CHECK-NEXT: test.copy 332// CHECK-NEXT: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]] 333// CHECK-NEXT: [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]] 334// CHECK-NEXT: bufferization.dealloc ([[ALLOC1]] : {{.*}}) if (%true 335// CHECK-NOT: retain 336// CHECK-NEXT: bufferization.dealloc ([[BASE0]], [[BASE1]] : {{.*}}) if ([[COND0]], [[COND1]]) 337// CHECK-NOT: retain 338// CHECK-NEXT: return 339 340// TODO: Instead of deallocating the bbarg memrefs, a slightly better analysis 341// could do an unconditional deallocation on ALLOC0 and move it before the 342// test.copy (dealloc of ALLOC1 would remain after the copy) 343 344// ----- 345 346// Test Case: No users for buffer in if-else CFG 347// bb0 <- Initial position of AllocOp 348// / \ 349// bb1 bb2 350// \ / 351// bb3 352// BufferDeallocation expected behavior: It only inserts a missing DeallocOp 353// in the exit block since %5 or %6 are the latest aliases of %0. 354 355func.func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 356 %0 = memref.alloc() : memref<2xf32> 357 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 358 cf.cond_br %arg0, 359 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 360 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 361^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 362 cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) 363^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 364 cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) 365^bb3(%5: memref<2xf32>, %6: memref<2xf32>): 366 test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) 367 return 368} 369 370// CHECK-LABEL: func @ifElseNoUsers 371// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>) 372// CHECK: [[ALLOC:%.+]] = memref.alloc() 373// CHECK-NEXT: test.buffer_based 374// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true 375// CHECK-NEXT: cf.cond_br {{.*}}^bb1([[ARG1]], [[ALLOC]], %false{{[0-9_]*}}, [[ARG0]] : {{.*}}), ^bb2([[ALLOC]], [[ARG1]], [[NOT_ARG0]], %false{{[0-9_]*}} : {{.*}}) 376// CHECK: ^bb3([[A0:%.+]]:{{.*}}, [[A1:%.+]]:{{.*}}, [[COND0:%.+]]: i1, [[COND1:%.+]]: i1): 377// CHECK: test.copy 378// CHECK-NEXT: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]] 379// CHECK-NEXT: [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]] 380// CHECK-NEXT: bufferization.dealloc ([[BASE0]], [[BASE1]] : {{.*}}) if ([[COND0]], [[COND1]]) 381// CHECK-NOT: retain 382// CHECK-NEXT: return 383 384// TODO: slightly better analysis could just insert an unconditional dealloc on %0 385 386// ----- 387 388// Test Case: 389// bb0 <- Initial position of the first AllocOp 390// / \ 391// bb1 bb2 392// | / \ 393// | bb3 bb4 394// \ \ / 395// \ / 396// bb5 <- Initial position of the second AllocOp 397// BufferDeallocation expected behavior: Two missing DeallocOps should be 398// inserted in the exit block. 399 400func.func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { 401 %0 = memref.alloc() : memref<2xf32> 402 test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) 403 cf.cond_br %arg0, 404 ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), 405 ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) 406^bb1(%1: memref<2xf32>, %2: memref<2xf32>): 407 cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) 408^bb2(%3: memref<2xf32>, %4: memref<2xf32>): 409 cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) 410^bb3(%5: memref<2xf32>): 411 cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) 412^bb4(%6: memref<2xf32>): 413 cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) 414^bb5(%7: memref<2xf32>, %8: memref<2xf32>): 415 %9 = memref.alloc() : memref<2xf32> 416 test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>) 417 test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>) 418 return 419} 420 421// CHECK-LABEL: func @ifElseNested 422// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG1:%.+]]: memref<2xf32>, [[ARG2:%.+]]: memref<2xf32>) 423// CHECK: [[ALLOC0:%.+]] = memref.alloc() 424// CHECK-NEXT: test.buffer_based 425// CHECK-NEXT: [[NOT_ARG0:%.+]] = arith.xori [[ARG0]], %true 426// CHECK-NEXT: cf.cond_br {{.*}}^bb1([[ARG1]], [[ALLOC0]], %false{{[0-9_]*}}, [[ARG0]] : {{.*}}), ^bb2([[ALLOC0]], [[ARG1]], [[NOT_ARG0]], %false{{[0-9_]*}} : 427// CHECK: ^bb5([[A0:%.+]]: memref<2xf32>, [[A1:%.+]]: memref<2xf32>, [[COND0:%.+]]: i1, [[COND1:%.+]]: i1): 428// CHECK: [[ALLOC1:%.+]] = memref.alloc() 429// CHECK-NEXT: test.buffer_based 430// CHECK-NEXT: test.copy 431// CHECK-NEXT: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]] 432// CHECK-NEXT: [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]] 433// CHECK-NEXT: bufferization.dealloc ([[ALLOC1]] : {{.*}}) if (%true 434// CHECK-NOT: retain 435// CHECK-NEXT: bufferization.dealloc ([[BASE0]], [[BASE1]] : {{.*}}) if ([[COND0]], [[COND1]]) 436// CHECK-NOT: retain 437// CHECK-NEXT: return 438 439// TODO: Instead of deallocating the bbarg memrefs, a slightly better analysis 440// could do an unconditional deallocation on ALLOC0 and move it before the 441// test.copy (dealloc of ALLOC1 would remain after the copy) 442 443// ----- 444 445// Test Case: 446// bb0 447// / \ 448// Initial pos of the 1st AllocOp -> bb1 bb2 <- Initial pos of the 2nd AllocOp 449// \ / 450// bb3 451// BufferDeallocation expected behavior: We need to introduce a copy for each 452// buffer since the buffers are passed to bb3. The both missing DeallocOps are 453// inserted in the respective block of the allocs. The copy is freed in the exit 454// block. 455 456func.func @moving_alloc_and_inserting_missing_dealloc( 457 %cond: i1, 458 %arg0: memref<2xf32>, 459 %arg1: memref<2xf32>) { 460 cf.cond_br %cond, ^bb1, ^bb2 461^bb1: 462 %0 = memref.alloc() : memref<2xf32> 463 test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>) 464 cf.br ^exit(%0 : memref<2xf32>) 465^bb2: 466 %1 = memref.alloc() : memref<2xf32> 467 test.buffer_based in(%1: memref<2xf32>) out(%arg0: memref<2xf32>) 468 cf.br ^exit(%1 : memref<2xf32>) 469^exit(%arg2: memref<2xf32>): 470 test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) 471 return 472} 473 474// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc 475// CHECK-SAME: ([[ARG0:%.+]]: i1, [[ARG0:%.+]]: memref<2xf32>, [[ARG0:%.+]]: memref<2xf32>) 476// CHECK: ^bb1: 477// CHECK: [[ALLOC0:%.+]] = memref.alloc() 478// CHECK-NEXT: test.buffer_based 479// CHECK-NEXT: cf.br ^bb3([[ALLOC0]], %true 480// CHECK: ^bb2: 481// CHECK: [[ALLOC1:%.+]] = memref.alloc() 482// CHECK-NEXT: test.buffer_based 483// CHECK-NEXT: cf.br ^bb3([[ALLOC1]], %true 484// CHECK: ^bb3([[A0:%.+]]: memref<2xf32>, [[COND0:%.+]]: i1): 485// CHECK: test.copy 486// CHECK-NEXT: [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]] 487// CHECK-NEXT: bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND0]]) 488// CHECK-NEXT: return 489 490// ----- 491 492func.func @select_aliases(%arg0: index, %arg1: memref<?xi8>, %arg2: i1) { 493 %0 = memref.alloc(%arg0) : memref<?xi8> 494 %1 = memref.alloc(%arg0) : memref<?xi8> 495 %2 = arith.select %arg2, %0, %1 : memref<?xi8> 496 test.copy(%2, %arg1) : (memref<?xi8>, memref<?xi8>) 497 return 498} 499 500// CHECK-LABEL: func @select_aliases 501// CHECK: [[ALLOC0:%.+]] = memref.alloc( 502// CHECK: [[ALLOC1:%.+]] = memref.alloc( 503// CHECK: arith.select 504// CHECK: test.copy 505// CHECK: bufferization.dealloc ([[ALLOC0]] : {{.*}}) if (%true 506// CHECK-NOT: retain 507// CHECK: bufferization.dealloc ([[ALLOC1]] : {{.*}}) if (%true 508// CHECK-NOT: retain 509 510// ----- 511 512func.func @select_aliases_not_same_ownership(%arg0: index, %arg1: memref<?xi8>, %arg2: i1) { 513 %0 = memref.alloc(%arg0) : memref<?xi8> 514 %1 = memref.alloca(%arg0) : memref<?xi8> 515 %2 = arith.select %arg2, %0, %1 : memref<?xi8> 516 cf.br ^bb1(%2 : memref<?xi8>) 517^bb1(%arg3: memref<?xi8>): 518 test.copy(%arg3, %arg1) : (memref<?xi8>, memref<?xi8>) 519 return 520} 521 522// CHECK-LABEL: func @select_aliases_not_same_ownership 523// CHECK: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: memref<?xi8>, [[ARG2:%.+]]: i1) 524// CHECK: [[ALLOC0:%.+]] = memref.alloc( 525// CHECK: [[ALLOC1:%.+]] = memref.alloca( 526// CHECK: [[SELECT:%.+]] = arith.select 527// CHECK: [[OWN:%.+]] = bufferization.dealloc ([[ALLOC0]] :{{.*}}) if (%true{{[0-9_]*}}) retain ([[SELECT]] : 528// CHECK: cf.br ^bb1([[SELECT]], [[OWN]] : 529// CHECK: ^bb1([[A0:%.+]]: memref<?xi8>, [[COND:%.+]]: i1) 530// CHECK: test.copy 531// CHECK: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]] 532// CHECK: bufferization.dealloc ([[BASE0]] : {{.*}}) if ([[COND]]) 533// CHECK-NOT: retain 534 535// ----- 536 537func.func @select_captured_in_next_block(%arg0: index, %arg1: memref<?xi8>, %arg2: i1, %arg3: i1) { 538 %0 = memref.alloc(%arg0) : memref<?xi8> 539 %1 = memref.alloca(%arg0) : memref<?xi8> 540 %2 = arith.select %arg2, %0, %1 : memref<?xi8> 541 cf.cond_br %arg3, ^bb1(%0 : memref<?xi8>), ^bb1(%arg1 : memref<?xi8>) 542^bb1(%arg4: memref<?xi8>): 543 test.copy(%arg4, %2) : (memref<?xi8>, memref<?xi8>) 544 return 545} 546 547// CHECK-LABEL: func @select_captured_in_next_block 548// CHECK: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: memref<?xi8>, [[ARG2:%.+]]: i1, [[ARG3:%.+]]: i1) 549// CHECK: [[ALLOC0:%.+]] = memref.alloc( 550// CHECK: [[ALLOC1:%.+]] = memref.alloca( 551// CHECK: [[SELECT:%.+]] = arith.select 552// CHECK: [[OWN0:%.+]]:2 = bufferization.dealloc ([[ALLOC0]] :{{.*}}) if ([[ARG3]]) retain ([[ALLOC0]], [[SELECT]] : 553// CHECK: [[NOT_ARG3:%.+]] = arith.xori [[ARG3]], %true 554// CHECK: [[OWN1:%.+]] = bufferization.dealloc ([[ALLOC0]] :{{.*}}) if ([[NOT_ARG3]]) retain ([[SELECT]] : 555// CHECK: [[MERGED_OWN:%.+]] = arith.select [[ARG3]], [[OWN0]]#1, [[OWN1]] 556// CHECK: cf.cond_br{{.*}}^bb1([[ALLOC0]], [[OWN0]]#0 :{{.*}}), ^bb1([[ARG1]], %false 557// CHECK: ^bb1([[A0:%.+]]: memref<?xi8>, [[COND:%.+]]: i1) 558// CHECK: test.copy 559// CHECK: [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[SELECT]] 560// CHECK: [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]] 561// CHECK: bufferization.dealloc ([[BASE0]], [[BASE1]] : {{.*}}) if ([[MERGED_OWN]], [[COND]]) 562 563// There are two interesting parts here: 564// * The dealloc condition of %0 in the second block should be the corresponding 565// result of the dealloc operation of the first block, because %0 has unknown 566// ownership status and thus would other wise require a clone in the first 567// block. 568// * The dealloc of the first block must make sure that the branch condition and 569// respective retained values are handled correctly, i.e., only the ones for the 570// actual branch taken have to be retained. 571 572// ----- 573 574func.func @blocks_not_preordered_by_dominance() { 575 cf.br ^bb1 576^bb2: 577 "test.read_buffer"(%alloc) : (memref<2xi32>) -> () 578 return 579^bb1: 580 %alloc = memref.alloc() : memref<2xi32> 581 cf.br ^bb2 582} 583 584// CHECK-LABEL: func @blocks_not_preordered_by_dominance 585// CHECK-NEXT: [[TRUE:%.+]] = arith.constant true 586// CHECK-NEXT: cf.br [[BB1:\^.+]] 587// CHECK-NEXT: [[BB2:\^[a-zA-Z0-9_]+]]: 588// CHECK-NEXT: "test.read_buffer"([[ALLOC:%[a-zA-Z0-9_]+]]) 589// CHECK-NEXT: bufferization.dealloc ([[ALLOC]] : {{.*}}) if ([[TRUE]]) 590// CHECK-NOT: retain 591// CHECK-NEXT: return 592// CHECK-NEXT: [[BB1]]: 593// CHECK-NEXT: [[ALLOC]] = memref.alloc() 594// CHECK-NEXT: cf.br [[BB2]] 595// CHECK-NEXT: } 596