1// RUN: mlir-opt %s -convert-scf-to-cf -test-arm-sme-tile-allocation -split-input-file -verify-diagnostics | FileCheck %s 2// RUN: mlir-opt %s -convert-scf-to-cf -test-arm-sme-tile-allocation=dump-tile-live-ranges -mlir-disable-threading -split-input-file -verify-diagnostics 2>&1 >/dev/null | FileCheck %s --check-prefix=CHECK-LIVE-RANGE 3 4// This file tests some simple aspects of using liveness in the SME tile allocator. 5// Note: We use -convert-scf-to-cf first as the tile allocator expects CF, but 6// some of these tests are written in SCF (to make things easier to follow). 7 8// CHECK-LIVE-RANGE-LABEL: @constant_with_multiple_users 9// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 10// CHECK-LIVE-RANGE: ^bb0: 11// CHECK-LIVE-RANGE: S arm_sme.zero 12// CHECK-LIVE-RANGE-NEXT: |S arm_sme.insert_tile_slice 13// CHECK-LIVE-RANGE-NEXT: || arm_sme.insert_tile_slice 14// CHECK-LIVE-RANGE-NEXT: |E test.some_use 15// CHECK-LIVE-RANGE-NEXT: E test.some_use 16 17// CHECK-LABEL: @constant_with_multiple_users( 18// CHECK-SAME: %[[VECTOR_A:.*]]: vector<[4]xf32>, %[[VECTOR_B:.*]]: vector<[4]xf32> 19func.func @constant_with_multiple_users(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %index: index) { 20 // CHECK-NEXT: %[[ZERO_TILE_0:.*]] = arm_sme.zero {tile_id = 0 : i32} : vector<[4]x[4]xf32> 21 // CHECK-NEXT: %[[ZERO_TILE_1:.*]] = arm_sme.zero {tile_id = 1 : i32} : vector<[4]x[4]xf32> 22 // CHECK-NEXT: %[[INSERT_TILE_1:.*]] = arm_sme.insert_tile_slice %[[VECTOR_A]], %[[ZERO_TILE_1]][%{{.*}}] {tile_id = 1 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> 23 // CHECK-NEXT: %[[INSERT_TILE_0:.*]] = arm_sme.insert_tile_slice %[[VECTOR_B]], %[[ZERO_TILE_0]][%{{.*}}] {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> 24 %zero = arm_sme.zero : vector<[4]x[4]xf32> 25 %tile_a = arm_sme.insert_tile_slice %a, %zero[%index] : vector<[4]xf32> into vector<[4]x[4]xf32> 26 %tile_b = arm_sme.insert_tile_slice %b, %zero[%index] : vector<[4]xf32> into vector<[4]x[4]xf32> 27 "test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> () 28 "test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> () 29 return 30} 31 32// ----- 33 34// CHECK-LIVE-RANGE-LABEL: @value_with_multiple_users 35// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 36// CHECK-LIVE-RANGE: ^bb0: 37// CHECK-LIVE-RANGE-NEXT: |S arm_sme.insert_tile_slice 38// CHECK-LIVE-RANGE-NEXT: || arm_sme.insert_tile_slice 39// CHECK-LIVE-RANGE-NEXT: |E test.some_use 40// CHECK-LIVE-RANGE-NEXT: E test.some_use 41 42// expected-note@below {{tile operand is: <block argument> of type 'vector<[4]x[4]xf32>'}} 43func.func @value_with_multiple_users(%tile: vector<[4]x[4]xf32>, %a: vector<[4]xf32>, %b: vector<[4]xf32>, %index: index) { 44 // expected-error@below {{op tile operand allocated to different SME virtial tile (move required)}} 45 %tile_a = arm_sme.insert_tile_slice %a, %tile[%index] : vector<[4]xf32> into vector<[4]x[4]xf32> 46 %tile_b = arm_sme.insert_tile_slice %b, %tile[%index] : vector<[4]xf32> into vector<[4]x[4]xf32> 47 "test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> () 48 "test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> () 49 return 50} 51 52// ----- 53 54// CHECK-LIVE-RANGE-LABEL: @reuse_tiles_after_initial_use 55// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 56// CHECK-LIVE-RANGE: ^bb0: 57// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 58// CHECK-LIVE-RANGE-NEXT: |S arm_sme.get_tile 59// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.get_tile 60// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.get_tile 61// CHECK-LIVE-RANGE-NEXT: |||| test.dummy 62// CHECK-LIVE-RANGE-NEXT: |||| test.dummy 63// CHECK-LIVE-RANGE-NEXT: |||| test.dummy 64// CHECK-LIVE-RANGE-NEXT: E||| test.some_use 65// CHECK-LIVE-RANGE-NEXT: E|| test.some_use 66// CHECK-LIVE-RANGE-NEXT: E| test.some_use 67// CHECK-LIVE-RANGE-NEXT: E test.some_use 68// CHECK-LIVE-RANGE-NEXT: S arm_sme.zero 69// CHECK-LIVE-RANGE-NEXT: |S arm_sme.zero 70// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.zero 71// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.zero 72// CHECK-LIVE-RANGE-NEXT: |||| test.dummy 73// CHECK-LIVE-RANGE-NEXT: |||| test.dummy 74// CHECK-LIVE-RANGE-NEXT: |||| test.dummy 75// CHECK-LIVE-RANGE-NEXT: E||| test.some_use 76// CHECK-LIVE-RANGE-NEXT: E|| test.some_use 77// CHECK-LIVE-RANGE-NEXT: E| test.some_use 78// CHECK-LIVE-RANGE-NEXT: E test.some_use 79 80// CHECK-LABEL: @reuse_tiles_after_initial_use 81func.func @reuse_tiles_after_initial_use() { 82 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} 83 // CHECK: arm_sme.get_tile {tile_id = 1 : i32} 84 // CHECK: arm_sme.get_tile {tile_id = 2 : i32} 85 // CHECK: arm_sme.get_tile {tile_id = 3 : i32} 86 %tile_a = arm_sme.get_tile : vector<[4]x[4]xf32> 87 %tile_b = arm_sme.get_tile : vector<[4]x[4]xf32> 88 %tile_c = arm_sme.get_tile : vector<[4]x[4]xf32> 89 %tile_d = arm_sme.get_tile : vector<[4]x[4]xf32> 90 "test.dummy"(): () -> () 91 "test.dummy"(): () -> () 92 "test.dummy"(): () -> () 93 "test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> () 94 "test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> () 95 "test.some_use"(%tile_c) : (vector<[4]x[4]xf32>) -> () 96 "test.some_use"(%tile_d) : (vector<[4]x[4]xf32>) -> () 97 // CHECK: arm_sme.zero {tile_id = 0 : i32} 98 // CHECK: arm_sme.zero {tile_id = 1 : i32} 99 // CHECK: arm_sme.zero {tile_id = 2 : i32} 100 // CHECK: arm_sme.zero {tile_id = 3 : i32} 101 %tile_1 = arm_sme.zero : vector<[4]x[4]xf32> 102 %tile_2 = arm_sme.zero : vector<[4]x[4]xf32> 103 %tile_3 = arm_sme.zero : vector<[4]x[4]xf32> 104 %tile_4 = arm_sme.zero : vector<[4]x[4]xf32> 105 "test.dummy"(): () -> () 106 "test.dummy"(): () -> () 107 "test.dummy"(): () -> () 108 "test.some_use"(%tile_1) : (vector<[4]x[4]xf32>) -> () 109 "test.some_use"(%tile_2) : (vector<[4]x[4]xf32>) -> () 110 "test.some_use"(%tile_3) : (vector<[4]x[4]xf32>) -> () 111 "test.some_use"(%tile_4) : (vector<[4]x[4]xf32>) -> () 112 return 113} 114 115// ----- 116 117// CHECK-LIVE-RANGE-LABEL: @tile_live_ins 118// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 119// CHECK-LIVE-RANGE: ^bb0: 120// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 121// CHECK-LIVE-RANGE-NEXT: |S arm_sme.zero 122// CHECK-LIVE-RANGE-NEXT: EE cf.br 123// CHECK-LIVE-RANGE-NEXT: ^bb1: 124// CHECK-LIVE-RANGE-NEXT: || test.dummy 125// CHECK-LIVE-RANGE-NEXT: || test.dummy 126// CHECK-LIVE-RANGE-NEXT: EE cf.br 127// CHECK-LIVE-RANGE-NEXT: ^bb2: 128// CHECK-LIVE-RANGE-NEXT: || test.dummy 129// CHECK-LIVE-RANGE-NEXT: || test.dummy 130// CHECK-LIVE-RANGE-NEXT: EE cf.br 131// CHECK-LIVE-RANGE-NEXT: ^bb3: 132// CHECK-LIVE-RANGE-NEXT: E| test.some_use 133// CHECK-LIVE-RANGE-NEXT: E test.some_use 134 135// CHECK-LABEL: @tile_live_ins 136func.func @tile_live_ins() 137{ 138 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32> 139 // CHECK: arm_sme.zero {tile_id = 1 : i32} : vector<[4]x[4]xf32> 140 %tile_1 = arm_sme.get_tile : vector<[4]x[4]xf32> 141 %tile_2 = arm_sme.zero : vector<[4]x[4]xf32> 142 cf.br ^bb1 143^bb1: 144 "test.dummy"(): () -> () 145 "test.dummy"(): () -> () 146 cf.br ^bb2 147^bb2: 148 "test.dummy"(): () -> () 149 "test.dummy"(): () -> () 150 cf.br ^bb3 151^bb3: 152 "test.some_use"(%tile_1) : (vector<[4]x[4]xf32>) -> () 153 "test.some_use"(%tile_2) : (vector<[4]x[4]xf32>) -> () 154 return 155} 156 157// ----- 158 159// This is basically the same test as tile_live_ins but shows that the order of 160// the blocks within the source does not relate to the liveness, which is based 161// on successors and predecessors (not textual order). 162// 163// So %tile_1 is live on the path bb0 -> bb2 -> bb1 (and dies in bb1). The 164// 'hole' when looking at the live range dump comes from the textual order 165// (and would disappear if bb1 was moved before bb2 in the source). 166// 167// When looking at the live range dump (outside of straight-line code) it 168// normally makes more sense to consider blocks in isolation (and how they 169// relate to the CFG). 170 171// CHECK-LIVE-RANGE-LABEL: @non_sequential_live_ins 172// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 173// CHECK-LIVE-RANGE: ^bb0: 174// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 175// CHECK-LIVE-RANGE-NEXT: | test.dummy 176// CHECK-LIVE-RANGE-NEXT: E cf.br 177// CHECK-LIVE-RANGE-NEXT: ^bb1: 178// CHECK-LIVE-RANGE-NEXT: E| test.some_use 179// CHECK-LIVE-RANGE-NEXT: | test.dummy 180// CHECK-LIVE-RANGE-NEXT: E cf.br 181// CHECK-LIVE-RANGE-NEXT: ^bb2: 182// CHECK-LIVE-RANGE-NEXT: |S arm_sme.zero 183// CHECK-LIVE-RANGE-NEXT: || test.dummy 184// CHECK-LIVE-RANGE-NEXT: EE cf.cond_br 185// CHECK-LIVE-RANGE-NEXT: ^bb3: 186// CHECK-LIVE-RANGE-NEXT: | test.dummy 187// CHECK-LIVE-RANGE-NEXT: E test.some_use 188// CHECK-LIVE-RANGE-NEXT: func.return 189 190// CHECK-LABEL: @non_sequential_live_ins 191func.func @non_sequential_live_ins(%cond: i1) { 192 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32> 193 // CHECK: arm_sme.zero {tile_id = 1 : i32} : vector<[4]x[4]xf32> 194 %tile_1 = arm_sme.get_tile : vector<[4]x[4]xf32> 195 "test.dummy"(): () -> () 196 cf.br ^bb2 197^bb1: 198 "test.some_use"(%tile_1) : (vector<[4]x[4]xf32>) -> () 199 "test.dummy"(): () -> () 200 cf.br ^bb3 201^bb2: 202 %tile_2 = arm_sme.zero : vector<[4]x[4]xf32> 203 "test.dummy"(): () -> () 204 cf.cond_br %cond, ^bb1, ^bb3 205^bb3: 206 "test.dummy"(): () -> () 207 "test.some_use"(%tile_2) : (vector<[4]x[4]xf32>) -> () 208 return 209} 210 211// ----- 212 213// CHECK-LIVE-RANGE-LABEL: @non_overlapping_branches 214// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 215// CHECK-LIVE-RANGE: ^bb1: 216// CHECK-LIVE-RANGE-NEXT: S arm_sme.zero 217// CHECK-LIVE-RANGE-NEXT: | arm_sme.copy_tile 218// CHECK-LIVE-RANGE-NEXT: E cf.br 219// CHECK-LIVE-RANGE-NEXT: ^bb2: 220// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 221// CHECK-LIVE-RANGE-NEXT: | arm_sme.copy_tile 222// CHECK-LIVE-RANGE-NEXT: E cf.br 223 224// CHECK-LABEL: @non_overlapping_branches 225func.func @non_overlapping_branches(%cond: i1) { 226 // CHECK: arm_sme.zero {tile_id = 0 : i32} : vector<[4]x[4]xf32> 227 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32> 228 %tile = scf.if %cond -> vector<[4]x[4]xf32> { 229 // ^bb1: 230 %zero = arm_sme.zero : vector<[4]x[4]xf32> 231 scf.yield %zero : vector<[4]x[4]xf32> 232 } else { 233 // ^bb2: 234 %undef = arm_sme.get_tile : vector<[4]x[4]xf32> 235 scf.yield %undef : vector<[4]x[4]xf32> 236 } 237 "test.some_use"(%tile) : (vector<[4]x[4]xf32>) -> () 238 return 239} 240 241// ----- 242 243// Here %vecA and %vecB are not merged into the same live range (as they are unknown values). 244// This means that %vecA and %vecB are both allocated to different tiles (which is not legal). 245 246// expected-note@below {{tile operand is: <block argument> of type 'vector<[4]x[4]xf32>'}} 247func.func @overlapping_branches(%cond: i1, %vecA: vector<[4]x[4]xf32>, %vecB: vector<[4]x[4]xf32>) { 248 // expected-error@below {{op tile operand allocated to different SME virtial tile (move required)}} 249 %tile = scf.if %cond -> vector<[4]x[4]xf32> { 250 scf.yield %vecA : vector<[4]x[4]xf32> 251 } else { 252 scf.yield %vecB : vector<[4]x[4]xf32> 253 } 254 "test.some_use"(%tile) : (vector<[4]x[4]xf32>) -> () 255 return 256} 257 258// ----- 259 260// CHECK-LIVE-RANGE-LABEL: @run_out_of_tiles_but_avoid_spill 261// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 262// CHECK-LIVE-RANGE: ^bb2: 263// CHECK-LIVE-RANGE-NEXT: |S arm_sme.copy_tile 264// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.copy_tile 265// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.copy_tile 266// CHECK-LIVE-RANGE-NEXT: ||||S arm_sme.copy_tile 267// CHECK-LIVE-RANGE-NEXT: EEEEE cf.br 268 269// Note in the live ranges (above) there is five tile values, but we only have four tiles. 270// There is no 'real' spill as we spill the `arm_sme.zero` but are then able to clone it 271// at each of its uses. 272 273// CHECK-LABEL: @run_out_of_tiles_but_avoid_spill 274func.func @run_out_of_tiles_but_avoid_spill(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %c: vector<[4]xf32>, %d: vector<[4]xf32>) { 275 %init = arm_sme.zero : vector<[4]x[4]xf32> 276 %c0 = arith.constant 0 : index 277 %c1 = arith.constant 1 : index 278 %c10 = arith.constant 10 : index 279 // Live = %init 280 scf.for %i = %c0 to %c10 step %c1 { 281 // CHECK: arm_sme.zero {tile_id = 1 : i32} 282 // CHECK: arm_sme.zero {tile_id = 2 : i32} 283 // CHECK: arm_sme.zero {tile_id = 3 : i32} 284 // CHECK: arm_sme.zero {tile_id = 0 : i32} 285 %tile_a, %tile_b, %tile_c, %tile_d = scf.for %j = %c0 to %c10 step %c1 286 iter_args(%iter_a = %init, %iter_b = %init, %iter_c = %init, %iter_d = %init) 287 -> (vector<[4]x[4]xf32>, vector<[4]x[4]xf32> , vector<[4]x[4]xf32> , vector<[4]x[4]xf32>) { 288 // ^bb2: 289 // CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 1 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> 290 // CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 2 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> 291 // CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 3 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> 292 // CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> 293 %new_a = arm_sme.insert_tile_slice %a, %iter_a[%i] : vector<[4]xf32> into vector<[4]x[4]xf32> 294 %new_b = arm_sme.insert_tile_slice %b, %iter_b[%i] : vector<[4]xf32> into vector<[4]x[4]xf32> 295 %new_c = arm_sme.insert_tile_slice %c, %iter_c[%i] : vector<[4]xf32> into vector<[4]x[4]xf32> 296 %new_d = arm_sme.insert_tile_slice %d, %iter_d[%i] : vector<[4]xf32> into vector<[4]x[4]xf32> 297 scf.yield %new_a, %new_b, %new_c, %new_d : vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32> 298 } 299 // Live = %init, %tile_a, %tile_b, %tile_c, %tile_d (out of tiles!) 300 // This should be resolved by duplicating the arm_sme.zero (from folding 301 // arm_sme.copy_tile operations inserted by the tile allocator). 302 "test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> () 303 "test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> () 304 "test.some_use"(%tile_c) : (vector<[4]x[4]xf32>) -> () 305 "test.some_use"(%tile_d) : (vector<[4]x[4]xf32>) -> () 306 } 307 return 308} 309 310// ----- 311 312// We should be able to avoid spills like this, but logic handling this case is 313// not implemented yet. Note tile ID >= 16 means a spill/in-memory tile. 314 315// CHECK-LIVE-RANGE-LABEL: @avoidable_spill 316// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 317// CHECK-LIVE-RANGE: ^bb2: 318// CHECK-LIVE-RANGE-NEXT: || test.some_use 319// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.insert_tile_slice 320// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.insert_tile_slice 321// CHECK-LIVE-RANGE-NEXT: ||||S arm_sme.insert_tile_slice 322// CHECK-LIVE-RANGE-NEXT: |||||S arm_sme.insert_tile_slice 323// CHECK-LIVE-RANGE-NEXT: ||E||| test.some_use 324// CHECK-LIVE-RANGE-NEXT: || E|| test.some_use 325// CHECK-LIVE-RANGE-NEXT: || E| test.some_use 326// CHECK-LIVE-RANGE-NEXT: || E test.some_use 327// CHECK-LIVE-RANGE-NEXT: || arith.addi 328// CHECK-LIVE-RANGE-NEXT: EE cf.br 329 330// Note in the live ranges (above) there is two constant live-ins (first two ranges), 331// which gives six overlapping live ranges (at the point where %tile_d is defined). 332// The allocator currently will spill the first constant (which results in a real 333// spill at it's use), however, this could be avoided by using the knowledge that 334// at the first "test.some_use" there's actually only two live ranges (so we can 335// fix this be duplicating the constant). 336 337// CHECK-LABEL: @avoidable_spill 338func.func @avoidable_spill(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %c: vector<[4]xf32>, %d: vector<[4]xf32>) { 339 // CHECK: arm_sme.zero {tile_id = 16 : i32} : vector<[4]x[4]xf32> 340 %zero = arm_sme.zero : vector<[4]x[4]xf32> 341 %tile = arm_sme.get_tile : vector<[4]x[4]xf32> 342 %c0 = arith.constant 0 : index 343 %c1 = arith.constant 1 : index 344 %c10 = arith.constant 10 : index 345 scf.for %i = %c0 to %c10 step %c1 { 346 // So spilled here (unnecessarily). 347 // The arm_sme.zero op could be moved into the loop to avoid this. 348 "test.some_use"(%zero) : (vector<[4]x[4]xf32>) -> () 349 %tile_a = arm_sme.insert_tile_slice %a, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32> 350 %tile_b = arm_sme.insert_tile_slice %b, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32> 351 %tile_c = arm_sme.insert_tile_slice %c, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32> 352 %tile_d = arm_sme.insert_tile_slice %d, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32> 353 // %zero is still live here (due the the backedge) 354 "test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> () 355 "test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> () 356 "test.some_use"(%tile_c) : (vector<[4]x[4]xf32>) -> () 357 "test.some_use"(%tile_d) : (vector<[4]x[4]xf32>) -> () 358 } 359 return 360} 361 362// ----- 363 364// This test is a follow up to the test of the same name in `tile-allocation-copies.mlir`. 365// This shows the live ranges (which are why we need to split the conditional branch). 366 367// CHECK-LIVE-RANGE-LABEL: @cond_branch_with_backedge 368// CHECK-LIVE-RANGE: ^bb1: 369// CHECK-LIVE-RANGE-NEXT: ||| | arith.cmpi 370// CHECK-LIVE-RANGE-NEXT: EEE E cf.cond_br 371// 372// CHECK-LIVE-RANGE-NEXT: ^[[BB3_COPIES:[[:alnum:]]+]]: 373// CHECK-LIVE-RANGE-NEXT: ||| ES arm_sme.copy_tile 374// CHECK-LIVE-RANGE-NEXT: E|| |S arm_sme.copy_tile 375// CHECK-LIVE-RANGE-NEXT: E| ||S arm_sme.copy_tile 376// CHECK-LIVE-RANGE-NEXT: E |||S arm_sme.copy_tile 377// CHECK-LIVE-RANGE-NEXT: EEEE cf.br 378// 379// It is important to note that the first three live ranges in ^bb1 do not end 380// at the `cf.cond_br` they are live-out via the backedge bb1 -> bb2 -> bb1. 381// This means that if we placed the `arm_sme.tile_copies` before the `cf.cond_br` 382// then those live ranges would not end at the copies, resulting in unwanted 383// overlapping live ranges (and hence tile spills). 384// 385// With the conditional branch split and the copies placed in the BB3_COPIES 386// block the first three live ranges end at the copy operations (as the 387// BB3_COPIES block is on the path out of the loop and has no backedge). This 388// means there is no overlaps and the live ranges all merge, as shown below. 389// 390// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 391// CHECK-LIVE-RANGE: ^bb1: 392// CHECK-LIVE-RANGE-NEXT: |||| arith.cmpi 393// CHECK-LIVE-RANGE-NEXT: EEEE cf.cond_br 394// 395// CHECK-LIVE-RANGE-NEXT: ^[[BB3_COPIES]]: 396// CHECK-LIVE-RANGE-NEXT: |||| arm_sme.copy_tile 397// CHECK-LIVE-RANGE-NEXT: |||| arm_sme.copy_tile 398// CHECK-LIVE-RANGE-NEXT: |||| arm_sme.copy_tile 399// CHECK-LIVE-RANGE-NEXT: |||| arm_sme.copy_tile 400// CHECK-LIVE-RANGE-NEXT: EEEE cf.br 401 402// CHECK-LABEL: @cond_branch_with_backedge 403// CHECK-NOT: tile_id = 16 404// CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32> 405// CHECK: arm_sme.get_tile {tile_id = 1 : i32} : vector<[4]x[4]xf32> 406// CHECK: arm_sme.get_tile {tile_id = 2 : i32} : vector<[4]x[4]xf32> 407// CHECK: arm_sme.get_tile {tile_id = 3 : i32} : vector<[4]x[4]xf32> 408// CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> 409// CHECK-NOT: tile_id = 16 410func.func @cond_branch_with_backedge(%slice: vector<[4]xf32>) { 411 %tileA = arm_sme.get_tile : vector<[4]x[4]xf32> 412 %tileB = arm_sme.get_tile : vector<[4]x[4]xf32> 413 %tileC = arm_sme.get_tile : vector<[4]x[4]xf32> 414 %tileD = arm_sme.get_tile : vector<[4]x[4]xf32> 415 %c0 = arith.constant 0 : index 416 %c1 = arith.constant 1 : index 417 %c10 = arith.constant 10 : index 418 // Live here: %tileA, %tileB, %tileC, %tileD 419 cf.br ^bb1(%c0, %tileA : index, vector<[4]x[4]xf32>) 420^bb1(%currentIndex: index, %iterTile: vector<[4]x[4]xf32>): 421 %continueLoop = arith.cmpi slt, %currentIndex, %c10 : index 422 // Live here: %iterTile, %tileB, %tileC, %tileD 423 cf.cond_br %continueLoop, ^bb2, ^bb3(%iterTile, %tileB, %tileC, %tileD : vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>) 424^bb2: 425 // Live here: %iterTile, %tileB, %tileC, %tileD 426 %nextTile = arm_sme.insert_tile_slice %slice, %iterTile[%currentIndex] : vector<[4]xf32> into vector<[4]x[4]xf32> 427 %nextIndex = arith.addi %currentIndex, %c1 : index 428 cf.br ^bb1(%nextIndex, %nextTile : index, vector<[4]x[4]xf32>) 429^bb3(%finalTileA: vector<[4]x[4]xf32>, %finalTileB: vector<[4]x[4]xf32>, %finalTileC: vector<[4]x[4]xf32>, %finalTileD: vector<[4]x[4]xf32>): 430 // Live here: %finalTileA, %finalTileB, %finalTileC, %finalTileD 431 return 432} 433 434// ----- 435 436// CHECK-LIVE-RANGE-LABEL: @fill_holes_in_tile_liveness 437// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 438// CHECK-LIVE-RANGE: ^bb0: 439// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 440// CHECK-LIVE-RANGE-NEXT: E cf.cond_br 441// CHECK-LIVE-RANGE-NEXT: ^bb1: 442// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 443// CHECK-LIVE-RANGE-NEXT: | test.dummy 444// CHECK-LIVE-RANGE-NEXT: E test.some_use 445// CHECK-LIVE-RANGE-NEXT: cf.br 446// CHECK-LIVE-RANGE-NEXT: ^bb2: 447// CHECK-LIVE-RANGE-NEXT: | test.dummy 448// CHECK-LIVE-RANGE-NEXT: | test.dummy 449// CHECK-LIVE-RANGE-NEXT: | test.dummy 450// CHECK-LIVE-RANGE-NEXT: E test.some_use 451// CHECK-LIVE-RANGE-NEXT: cf.br 452 453// Here there's a 'hole' in the liveness of %tileA (in bb1) where another value 454// can reuse the tile ID assigned to %tileA. The liveness for %tileB is 455// entirely within the 'hole' in %tileA's live range, so %tileB should get the 456// same tile ID as %tileA. 457 458// CHECK-LABEL: @fill_holes_in_tile_liveness 459func.func @fill_holes_in_tile_liveness(%cond: i1) { 460 // CHECK: arm_sme.get_tile {tile_id = [[TILE_ID_A:.*]] : i32} 461 %tileA = arm_sme.get_tile : vector<[4]x[4]xf32> 462 cf.cond_br %cond, ^bb2, ^bb1 463^bb1: 464 // CHECK: arm_sme.get_tile {tile_id = [[TILE_ID_A]] : i32} 465 %tileB = arm_sme.get_tile : vector<[4]x[4]xf32> 466 "test.dummy"(): () -> () 467 "test.some_use"(%tileB) : (vector<[4]x[4]xf32>) -> () 468 cf.br ^bb3 469^bb2: 470 "test.dummy"(): () -> () 471 "test.dummy"(): () -> () 472 "test.dummy"(): () -> () 473 "test.some_use"(%tileA) : (vector<[4]x[4]xf32>) -> () 474 cf.br ^bb3 475^bb3: 476 return 477} 478 479// ----- 480 481// CHECK-LIVE-RANGE-LABEL: @holes_in_tile_liveness_inactive_overlaps 482// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 483// CHECK-LIVE-RANGE: ^bb0: 484// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 485// CHECK-LIVE-RANGE-NEXT: E cf.cond_br 486// CHECK-LIVE-RANGE-NEXT: ^bb1: 487// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 488// CHECK-LIVE-RANGE-NEXT: | test.dummy 489// CHECK-LIVE-RANGE-NEXT: | test.some_use 490// CHECK-LIVE-RANGE-NEXT: | arm_sme.copy_tile 491// CHECK-LIVE-RANGE-NEXT: E cf.br 492// CHECK-LIVE-RANGE-NEXT: ^bb2: 493// CHECK-LIVE-RANGE-NEXT: | test.dummy 494// CHECK-LIVE-RANGE-NEXT: | test.dummy 495// CHECK-LIVE-RANGE-NEXT: | test.dummy 496// CHECK-LIVE-RANGE-NEXT: |S arm_sme.get_tile 497// CHECK-LIVE-RANGE-NEXT: E| test.some_use 498// CHECK-LIVE-RANGE-NEXT: | arm_sme.copy_tile 499// CHECK-LIVE-RANGE-NEXT: E cf.br 500// CHECK-LIVE-RANGE-NEXT: ^bb3: 501// CHECK-LIVE-RANGE-NEXT: E test.some_use 502// CHECK-LIVE-RANGE-NEXT: func.return 503 504// This tests an edge case in inactive live ranges. The first live range is 505// inactive at the start of ^bb1. If the tile allocator did not check if the 506// second live range overlapped the first it would wrongly re-use tile ID 0 507// (as the first live range is inactive so tile ID 0 is free). This would mean 508// in ^bb2 two overlapping live ranges would have the same tile ID (bad!). 509 510// CHECK-LABEL: @holes_in_tile_liveness_inactive_overlaps 511func.func @holes_in_tile_liveness_inactive_overlaps(%cond: i1) { 512 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} 513 %tileA = arm_sme.get_tile : vector<[4]x[4]xf32> 514 cf.cond_br %cond, ^bb2, ^bb1 515^bb1: 516 // CHECK: arm_sme.get_tile {tile_id = 1 : i32} 517 %tileB = arm_sme.get_tile : vector<[4]x[4]xf32> 518 "test.dummy"(): () -> () 519 "test.some_use"(%tileB) : (vector<[4]x[4]xf32>) -> () 520 cf.br ^bb3(%tileB: vector<[4]x[4]xf32>) 521^bb2: 522 "test.dummy"(): () -> () 523 "test.dummy"(): () -> () 524 "test.dummy"(): () -> () 525 // CHECK: arm_sme.get_tile {tile_id = 1 : i32} 526 %tileC = arm_sme.get_tile : vector<[4]x[4]xf32> 527 "test.some_use"(%tileA) : (vector<[4]x[4]xf32>) -> () 528 cf.br ^bb3(%tileC: vector<[4]x[4]xf32>) 529^bb3(%tile: vector<[4]x[4]xf32>): 530 "test.some_use"(%tile) : (vector<[4]x[4]xf32>) -> () 531 return 532} 533 534// ----- 535 536// This is the same as the previous example, but changes the tile types to 537// vector<[16]x[16]xi8>. This means in bb1 the allocator will need to spill the 538// first live range (which is inactive). 539 540// Note: The live ranges are the same as the previous example (so are not checked). 541 542// CHECK-LABEL: @spill_inactive_live_range 543func.func @spill_inactive_live_range(%cond: i1) { 544 // CHECK: arm_sme.get_tile {tile_id = 16 : i32} 545 %tileA = arm_sme.get_tile : vector<[16]x[16]xi8> 546 cf.cond_br %cond, ^bb2, ^bb1 547^bb1: 548 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} 549 %tileB = arm_sme.get_tile : vector<[16]x[16]xi8> 550 "test.dummy"(): () -> () 551 "test.some_use"(%tileB) : (vector<[16]x[16]xi8>) -> () 552 cf.br ^bb3(%tileB: vector<[16]x[16]xi8>) 553^bb2: 554 "test.dummy"(): () -> () 555 "test.dummy"(): () -> () 556 "test.dummy"(): () -> () 557 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} 558 %tileC = arm_sme.get_tile : vector<[16]x[16]xi8> 559 "test.some_use"(%tileA) : (vector<[16]x[16]xi8>) -> () 560 cf.br ^bb3(%tileC: vector<[16]x[16]xi8>) 561^bb3(%tile: vector<[16]x[16]xi8>): 562 "test.some_use"(%tile) : (vector<[16]x[16]xi8>) -> () 563 return 564} 565 566// ----- 567 568// CHECK-LIVE-RANGE-LABEL: @reactivate_inactive_live_range 569// CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: 570// CHECK-LIVE-RANGE: ^bb0: 571// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 572// CHECK-LIVE-RANGE-NEXT: E cf.cond_br 573// CHECK-LIVE-RANGE-NEXT: ^bb1: 574// CHECK-LIVE-RANGE-NEXT: S arm_sme.get_tile 575// CHECK-LIVE-RANGE-NEXT: | test.dummy 576// CHECK-LIVE-RANGE-NEXT: E test.some_use 577// CHECK-LIVE-RANGE-NEXT: cf.br 578// CHECK-LIVE-RANGE-NEXT: ^bb2: 579// CHECK-LIVE-RANGE-NEXT: | S arm_sme.get_tile 580// CHECK-LIVE-RANGE-NEXT: | | test.dummy 581// CHECK-LIVE-RANGE-NEXT: | | test.dummy 582// CHECK-LIVE-RANGE-NEXT: | E test.some_use 583// CHECK-LIVE-RANGE-NEXT: E test.some_use 584// CHECK-LIVE-RANGE-NEXT: cf.br 585 586// Here the live range for %tileA becomes inactive in bb1 (so %tileB gets tile 587// ID 0 too). Then in bb2 the live range for tileA is reactivated as it overlaps 588// with the start of %tileC's live range (which means %tileC gets tile ID 1). 589 590func.func @reactivate_inactive_live_range(%cond: i1) { 591 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} 592 %tileA = arm_sme.get_tile : vector<[4]x[4]xf32> 593 cf.cond_br %cond, ^bb2, ^bb1 594^bb1: 595 // CHECK: arm_sme.get_tile {tile_id = 0 : i32} 596 %tileB = arm_sme.get_tile : vector<[16]x[16]xi8> 597 "test.dummy"(): () -> () 598 "test.some_use"(%tileB) : (vector<[16]x[16]xi8>) -> () 599 cf.br ^bb3 600^bb2: 601 // CHECK: arm_sme.get_tile {tile_id = 1 : i32} 602 %tileC = arm_sme.get_tile : vector<[4]x[4]xf32> 603 "test.dummy"(): () -> () 604 "test.dummy"(): () -> () 605 "test.some_use"(%tileC) : (vector<[4]x[4]xf32>) -> () 606 "test.some_use"(%tileA) : (vector<[4]x[4]xf32>) -> () 607 cf.br ^bb3 608^bb3: 609 return 610} 611