1// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse --mlir-print-local-scope %s | FileCheck %s 2 3// CHECK-LABEL: @one_3d_nest 4func.func @one_3d_nest() { 5 // Capture original bounds. Note that for zero-based step-one loops, the 6 // upper bound is also the number of iterations. 7 // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0 8 // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1 9 // CHECK-DAG: %[[range:.*]] = arith.constant 7056 10 %c0 = arith.constant 0 : index 11 %c1 = arith.constant 1 : index 12 %c2 = arith.constant 2 : index 13 %c3 = arith.constant 3 : index 14 %c42 = arith.constant 42 : index 15 %c56 = arith.constant 56 : index 16 // The range of the new scf. 17 // Updated loop bounds. 18 // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] 19 scf.for %i = %c0 to %c42 step %c1 { 20 // Inner loops must have been removed. 21 // CHECK-NOT: scf.for 22 23 // Reconstruct original IVs from the linearized one. 24 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] 25 // CHECK-SAME: into (42, 56, 3) 26 scf.for %j = %c0 to %c56 step %c1 { 27 scf.for %k = %c0 to %c3 step %c1 { 28 // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2) 29 "use"(%i, %j, %k) : (index, index, index) -> () 30 } 31 } 32 } 33 return 34} 35 36// ----- 37 38// Check that there is no chasing the replacement of value uses by ensuring 39// multiple uses of loop induction variables get rewritten to the same values. 40 41// CHECK-LABEL: @multi_use 42func.func @multi_use() { 43 %c0 = arith.constant 0 : index 44 %c1 = arith.constant 1 : index 45 %c10 = arith.constant 10 : index 46 // CHECK: scf.for %[[iv:.*]] = 47 scf.for %i = %c1 to %c10 step %c1 { 48 scf.for %j = %c1 to %c10 step %c1 { 49 scf.for %k = %c1 to %c10 step %c1 { 50 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[iv]] 51 // CHECK: %[[k:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#2) 52 // CHECK: %[[j:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#1) 53 // CHECK: %[[i:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#0) 54 55 // CHECK: "use1"(%[[i]], %[[j]], %[[k]]) 56 "use1"(%i,%j,%k) : (index,index,index) -> () 57 // CHECK: "use2"(%[[i]], %[[k]], %[[j]]) 58 "use2"(%i,%k,%j) : (index,index,index) -> () 59 // CHECK: "use3"(%[[k]], %[[j]], %[[i]]) 60 "use3"(%k,%j,%i) : (index,index,index) -> () 61 } 62 } 63 } 64 return 65} 66 67// ----- 68 69func.func @unnormalized_loops() { 70 // Normalized lower bound and step for the outer scf. 71 // CHECK-DAG: %[[lb_i:.*]] = arith.constant 0 72 // CHECK-DAG: %[[step_i:.*]] = arith.constant 1 73 74 // CHECK-DAG: %[[range:.*]] = arith.constant 12 75 76 %c2 = arith.constant 2 : index 77 %c3 = arith.constant 3 : index 78 %c5 = arith.constant 5 : index 79 %c7 = arith.constant 7 : index 80 %c10 = arith.constant 10 : index 81 %c17 = arith.constant 17 : index 82 83 84 // New bounds of the outer scf. 85 // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]] 86 scf.for %i = %c5 to %c10 step %c2 { 87 // The inner loop has been removed. 88 // CHECK-NOT: scf.for 89 scf.for %j = %c7 to %c17 step %c3 { 90 // The IVs are rewritten. 91 // CHECK: %[[delinearize:.+]]:2 = affine.delinearize_index %[[i]] 92 // CHECK-SAME: into (3, 4) 93 // CHECK: %[[orig_j:.*]] = affine.apply affine_map<(d0) -> (d0 * 3 + 7)>(%[[delinearize]]#1) 94 // CHECK: %[[orig_i:.*]] = affine.apply affine_map<(d0) -> (d0 * 2 + 5)>(%[[delinearize]]#0) 95 // CHECK: "use"(%[[orig_i]], %[[orig_j]]) 96 "use"(%i, %j) : (index, index) -> () 97 } 98 } 99 return 100} 101 102// ----- 103 104func.func @noramalized_loops_with_yielded_iter_args() { 105 // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0 106 // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1 107 // CHECK-DAG: %[[range:.*]] = arith.constant 7056 108 %c0 = arith.constant 0 : index 109 %c1 = arith.constant 1 : index 110 %c3 = arith.constant 3 : index 111 %c42 = arith.constant 42 : index 112 %c56 = arith.constant 56 : index 113 // The range of the new scf. 114 115 // Updated loop bounds. 116 // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) { 117 %2:1 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0) -> (index) { 118 // Inner loops must have been removed. 119 // CHECK-NOT: scf.for 120 121 // Reconstruct original IVs from the linearized one. 122 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] into (42, 56, 3) 123 %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){ 124 %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) { 125 // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2) 126 "use"(%i, %j, %k) : (index, index, index) -> () 127 // CHECK: scf.yield %[[VAL_1]] : index 128 scf.yield %arg2 : index 129 } 130 scf.yield %0#0 : index 131 } 132 scf.yield %1#0 : index 133 } 134 return 135} 136 137// ----- 138 139func.func @noramalized_loops_with_shuffled_yielded_iter_args() { 140 // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0 141 // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1 142 %c0 = arith.constant 0 : index 143 %c1 = arith.constant 1 : index 144 %c3 = arith.constant 3 : index 145 %c42 = arith.constant 42 : index 146 %c56 = arith.constant 56 : index 147 // The range of the new scf. 148 // CHECK-DAG:%[[range:.*]] = arith.constant 7056 149 150 // Updated loop bounds. 151 // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]], %[[VAL_2:.*]] = %[[orig_lb]]) -> (index, index) { 152 %2:2 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0, %arg1 = %c0) -> (index, index) { 153 // Inner loops must have been removed. 154 // CHECK-NOT: scf.for 155 156 // Reconstruct original IVs from the linearized one. 157 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] 158 // CHECK-SAME: into (42, 56, 3) 159 %1:2 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (index, index){ 160 %0:2 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (index, index) { 161 // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2) 162 "use"(%i, %j, %k) : (index, index, index) -> () 163 // CHECK: scf.yield %[[VAL_2]], %[[VAL_1]] : index, index 164 scf.yield %arg5, %arg4 : index, index 165 } 166 scf.yield %0#0, %0#1 : index, index 167 } 168 scf.yield %1#0, %1#1 : index, index 169 } 170 return 171} 172 173// ----- 174 175func.func @noramalized_loops_with_yielded_non_iter_args() { 176 // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0 177 // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1 178 %c0 = arith.constant 0 : index 179 %c1 = arith.constant 1 : index 180 %c3 = arith.constant 3 : index 181 %c42 = arith.constant 42 : index 182 %c56 = arith.constant 56 : index 183 // The range of the new scf. 184 // CHECK-DAG: %[[range:.*]] = arith.constant 7056 185 186 // Updated loop bounds. 187 // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) { 188 %2:1 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0) -> (index) { 189 // Inner loops must have been removed. 190 // CHECK-NOT: scf.for 191 192 // Reconstruct original IVs from the linearized one. 193 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] 194 // CHECK-SAME: into (42, 56, 3) 195 %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){ 196 %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) { 197 // CHECK: %[[res:.*]] = "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2) 198 %res = "use"(%i, %j, %k) : (index, index, index) -> (index) 199 // CHECK: scf.yield %[[res]] : index 200 scf.yield %res : index 201 } 202 scf.yield %0#0 : index 203 } 204 scf.yield %1#0 : index 205 } 206 return 207} 208 209// ----- 210 211// Check with parametric loop bounds and steps, capture the bounds here. 212// CHECK-LABEL: @parametric 213// CHECK-SAME: %[[orig_lb1:[A-Za-z0-9]+]]: 214// CHECK-SAME: %[[orig_ub1:[A-Za-z0-9]+]]: 215// CHECK-SAME: %[[orig_step1:[A-Za-z0-9]+]]: 216// CHECK-SAME: %[[orig_lb2:[A-Za-z0-9]+]]: 217// CHECK-SAME: %[[orig_ub2:[A-Za-z0-9]+]]: 218// CHECK-SAME: %[[orig_step2:[A-Za-z0-9]+]]: 219func.func @parametric(%lb1 : index, %ub1 : index, %step1 : index, 220 %lb2 : index, %ub2 : index, %step2 : index) { 221 // Compute the number of iterations for each of the loops and the total 222 // number of iterations. 223 // CHECK: %[[normalized_i:.*]] = affine.apply 224 // CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[orig_lb1]], %[[orig_ub1]], %[[orig_step1]]] 225 // CHECK: %[[c0:.+]] = arith.constant 0 226 // CHECK: %[[c1:.+]] = arith.constant 1 227 // CHECK: %[[normalized_j:.*]] = affine.apply 228 // CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[orig_lb2]], %[[orig_ub2]], %[[orig_step2]]] 229 // CHECK: %[[range:.+]] = affine.apply 230 // CHECK-SAME: affine_map<()[s0, s1, s2, s3, s4, s5] -> (((-s0 + s1) ceildiv s2) * ((-s3 + s4) ceildiv s5))>() 231 // CHECK-SAME: [%[[orig_lb1]], %[[orig_ub1]], %[[orig_step1]], %[[orig_lb2]], %[[orig_ub2]], %[[orig_step2]]] 232 233 // Check that the outer loop is updated. 234 // CHECK: scf.for %[[i:.*]] = %[[c0]] to %[[range]] step %[[c1]] 235 scf.for %i = %lb1 to %ub1 step %step1 { 236 // Check that the inner loop is removed. 237 // CHECK-NOT: scf.for 238 scf.for %j = %lb2 to %ub2 step %step2 { 239 // Remapping of the induction variables. 240 // CHECK: %[[delinearize:.+]]:2 = affine.delinearize_index %[[i]] into (%[[normalized_i]], %[[normalized_j]]) 241 // CHECK: %[[orig_j:.*]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 242 // CHECK-SAME: (%[[delinearize]]#1)[%[[orig_lb2]], %[[orig_step2]]] 243 // CHECK: %[[orig_i:.*]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> 244 // CHECK-SAME: (%[[delinearize]]#0)[%[[orig_lb1]], %[[orig_step1]]] 245 246 // CHECK: "foo"(%[[orig_i]], %[[orig_j]]) 247 "foo"(%i, %j) : (index, index) -> () 248 } 249 } 250 return 251} 252 253// ----- 254 255// CHECK-LABEL: @two_bands 256func.func @two_bands() { 257 %c0 = arith.constant 0 : index 258 %c1 = arith.constant 1 : index 259 %c10 = arith.constant 10 : index 260 // CHECK: %[[outer_range:.*]] = arith.constant 100 261 // CHECK: scf.for %{{.*}} = %{{.*}} to %[[outer_range]] 262 scf.for %i = %c0 to %c10 step %c1 { 263 // Check that the "j" loop was removed and that the inner loops were 264 // coalesced as well. The preparation step for coalescing will inject the 265 // subtraction operation unlike the IV remapping. 266 // CHECK-NOT: scf.for 267 // CHECK: affine.delinearize_index 268 scf.for %j = %c0 to %c10 step %c1 { 269 // The inner pair of loops is coalesced separately. 270 // CHECK: scf.for 271 scf.for %k = %i to %j step %c1 { 272 // CHECK-NOT: scf.for 273 scf.for %l = %i to %j step %c1 { 274 "foo"() : () -> () 275 } 276 } 277 } 278 } 279 return 280} 281 282// ----- 283 284// Check coalescing of affine.for loops when all the loops have constant upper bound. 285func.func @coalesce_affine_for() { 286 affine.for %i = 0 to 16 { 287 affine.for %j = 0 to 64 { 288 affine.for %k = 0 to 8 { 289 "test.foo"(%i, %j, %k) : (index, index, index) -> () 290 } 291 } 292 } 293 return 294} 295// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<() -> (16)>() 296// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<() -> (64)>() 297// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T1]]] 298// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<() -> (8)>() 299// CHECK-DAG: %[[T4:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T2]])[%[[T3]]] 300// CHECK: affine.for %[[IV:.*]] = 0 to %[[T4]] 301// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T3]]] 302// CHECK-DAG: %[[T6:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T3]]] 303// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T6]])[%[[T1]]] 304// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T6]])[%[[T1]]] 305// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) 306// CHECK-NEXT: } 307// CHECK-NEXT: return 308 309// ----- 310 311// Check coalescing of affine.for loops when all the loops have non constant upper bounds. 312func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) { 313 %c0 = arith.constant 0 : index 314 %M = memref.dim %arg0, %c0 : memref<?x?xf32> 315 %N = memref.dim %arg0, %c0 : memref<?x?xf32> 316 %K = memref.dim %arg0, %c0 : memref<?x?xf32> 317 affine.for %i = 0 to %M { 318 affine.for %j = 0 to %N { 319 affine.for %k = 0 to %K { 320 "test.foo"(%i, %j, %k) : (index, index, index) -> () 321 } 322 } 323 } 324 return 325} 326// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32> 327// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]] 328// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]] 329// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T1]])[%[[T0]]] 330// CHECK: affine.for %[[IV:.*]] = 0 to %[[T2]] 331// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T0]]] 332// CHECK-DAG: %[[T9:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T0]]] 333// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T9]])[%[[T0]]] 334// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T9]])[%[[T0]]] 335// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) 336// CHECK-NEXT: } 337// CHECK-NEXT: return 338 339// ----- 340 341// Check coalescing of affine.for loops when some of the loop has constant upper bounds while others have nin constant upper bounds. 342func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) { 343 %c0 = arith.constant 0 : index 344 %M = memref.dim %arg0, %c0 : memref<?x?xf32> 345 %N = memref.dim %arg0, %c0 : memref<?x?xf32> 346 affine.for %i = 0 to %M { 347 affine.for %j = 0 to %N { 348 affine.for %k = 0 to 64 { 349 "test.foo"(%i, %j, %k) : (index, index, index) -> () 350 } 351 } 352 } 353 return 354} 355// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32> 356// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]] 357// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]] 358// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<() -> (64)>() 359// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T1]])[%[[T2]]] 360// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]] 361// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T2]]] 362// CHECK-DAG: %[[T5:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T2]]] 363// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T5]])[%[[T0]]] 364// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T5]])[%[[T0]]] 365// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) 366// CHECK-NEXT: } 367// CHECK-NEXT: return 368 369// ----- 370 371// Check coalescing of affine.for loops when upper bound contains multi result upper bound map. 372#myMap = affine_map<()[s1] -> (s1, -s1)> 373func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) { 374 %c0 = arith.constant 0 : index 375 %M = memref.dim %arg0, %c0 : memref<?x?xf32> 376 %N = memref.dim %arg0, %c0 : memref<?x?xf32> 377 %K = memref.dim %arg0, %c0 : memref<?x?xf32> 378 affine.for %i = 0 to min #myMap()[%M] { 379 affine.for %j = 0 to %N { 380 affine.for %k = 0 to %K { 381 "test.foo"(%i, %j, %k) : (index, index, index) -> () 382 } 383 } 384 } 385 return 386} 387// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32> 388// CHECK-DAG: %[[T0:.*]] = affine.min affine_map<()[s0] -> (s0, -s0)>()[%[[DIM]]] 389// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]] 390// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T1]]] 391// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T2]])[%[[T1]]] 392// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]] 393// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T1]]] 394// CHECK-DAG: %[[T5:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T1]]] 395// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T5]])[%[[T1]]] 396// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T5]])[%[[T1]]] 397// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) 398// CHECK-NEXT: } 399// CHECK-NEXT: return 400 401// ----- 402 403#map0 = affine_map<(d0) -> (d0 * 110)> 404#map1 = affine_map<(d0) -> (696, d0 * 110 + 110)> 405func.func @test_loops_do_not_get_coalesced() { 406 affine.for %i = 0 to 7 { 407 affine.for %j = #map0(%i) to min #map1(%i) { 408 "use"(%i, %j) : (index, index) -> () 409 } 410 } 411 return 412} 413// CHECK: affine.for %[[IV0:.*]] = 0 to 7 414// CHECK-NEXT: affine.for %[[IV1:.*]] = affine_map<(d0) -> (d0 * 110)>(%[[IV0]]) to min affine_map<(d0) -> (696, d0 * 110 + 110)>(%[[IV0]]) 415// CHECK-NEXT: "use"(%[[IV0]], %[[IV1]]) 416// CHECK-NEXT: } 417// CHECK-NEXT: } 418// CHECK-NEXT: return 419