1// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2' | FileCheck %s --check-prefix UNROLL-BY-2 2// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3 3// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2 4// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2 5// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 annotate=true' | FileCheck %s --check-prefix UNROLL-BY-2-ANNOTATE 6// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO 7// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=5 cleanup-unroll=true' | FileCheck %s --check-prefix CLEANUP-UNROLL-BY-5 8// RUN: mlir-opt %s --affine-loop-unroll --split-input-file | FileCheck %s 9 10func.func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index, 11 %arg3: memref<?xf32>) { 12 %0 = arith.constant 7.0 : f32 13 scf.for %i0 = %arg0 to %arg1 step %arg2 { 14 memref.store %0, %arg3[%i0] : memref<?xf32> 15 } 16 return 17} 18// UNROLL-BY-2-LABEL: func @dynamic_loop_unroll 19// UNROLL-BY-2-SAME: %[[LB:.*0]]: index, 20// UNROLL-BY-2-SAME: %[[UB:.*1]]: index, 21// UNROLL-BY-2-SAME: %[[STEP:.*2]]: index, 22// UNROLL-BY-2-SAME: %[[MEM:.*3]]: memref<?xf32> 23// 24// UNROLL-BY-2-DAG: %[[V0:.*]] = arith.subi %[[UB]], %[[LB]] : index 25// UNROLL-BY-2-DAG: %[[C1:.*]] = arith.constant 1 : index 26// UNROLL-BY-2-DAG: %[[V1:.*]] = arith.subi %[[STEP]], %[[C1]] : index 27// UNROLL-BY-2-DAG: %[[V2:.*]] = arith.addi %[[V0]], %[[V1]] : index 28// Compute trip count in V3. 29// UNROLL-BY-2-DAG: %[[V3:.*]] = arith.divui %[[V2]], %[[STEP]] : index 30// Store unroll factor in C2. 31// UNROLL-BY-2-DAG: %[[C2:.*]] = arith.constant 2 : index 32// UNROLL-BY-2-DAG: %[[V4:.*]] = arith.remsi %[[V3]], %[[C2]] : index 33// UNROLL-BY-2-DAG: %[[V5:.*]] = arith.subi %[[V3]], %[[V4]] : index 34// UNROLL-BY-2-DAG: %[[V6:.*]] = arith.muli %[[V5]], %[[STEP]] : index 35// Compute upper bound of unrolled loop in V7. 36// UNROLL-BY-2-DAG: %[[V7:.*]] = arith.addi %[[LB]], %[[V6]] : index 37// Compute step of unrolled loop in V8. 38// UNROLL-BY-2-DAG: %[[V8:.*]] = arith.muli %[[STEP]], %[[C2]] : index 39// UNROLL-BY-2: scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] { 40// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 41// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = arith.constant 1 : index 42// UNROLL-BY-2-NEXT: %[[V9:.*]] = arith.muli %[[STEP]], %[[C1_IV]] : index 43// UNROLL-BY-2-NEXT: %[[V10:.*]] = arith.addi %[[IV]], %[[V9]] : index 44// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32> 45// UNROLL-BY-2-NEXT: } 46// UNROLL-BY-2-NEXT: scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] { 47// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 48// UNROLL-BY-2-NEXT: } 49// UNROLL-BY-2-NEXT: return 50 51// UNROLL-BY-3-LABEL: func @dynamic_loop_unroll 52// UNROLL-BY-3-SAME: %[[LB:.*0]]: index, 53// UNROLL-BY-3-SAME: %[[UB:.*1]]: index, 54// UNROLL-BY-3-SAME: %[[STEP:.*2]]: index, 55// UNROLL-BY-3-SAME: %[[MEM:.*3]]: memref<?xf32> 56// 57// UNROLL-BY-3-DAG: %[[V0:.*]] = arith.subi %[[UB]], %[[LB]] : index 58// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : index 59// UNROLL-BY-3-DAG: %[[V1:.*]] = arith.subi %[[STEP]], %[[C1]] : index 60// UNROLL-BY-3-DAG: %[[V2:.*]] = arith.addi %[[V0]], %[[V1]] : index 61// Compute trip count in V3. 62// UNROLL-BY-3-DAG: %[[V3:.*]] = arith.divui %[[V2]], %[[STEP]] : index 63// Store unroll factor in C3. 64// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : index 65// UNROLL-BY-3-DAG: %[[V4:.*]] = arith.remsi %[[V3]], %[[C3]] : index 66// UNROLL-BY-3-DAG: %[[V5:.*]] = arith.subi %[[V3]], %[[V4]] : index 67// UNROLL-BY-3-DAG: %[[V6:.*]] = arith.muli %[[V5]], %[[STEP]] : index 68// Compute upper bound of unrolled loop in V7. 69// UNROLL-BY-3-DAG: %[[V7:.*]] = arith.addi %[[LB]], %[[V6]] : index 70// Compute step of unrolled loop in V8. 71// UNROLL-BY-3-DAG: %[[V8:.*]] = arith.muli %[[STEP]], %[[C3]] : index 72// UNROLL-BY-3: scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] { 73// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 74// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = arith.constant 1 : index 75// UNROLL-BY-3-NEXT: %[[V9:.*]] = arith.muli %[[STEP]], %[[C1_IV]] : index 76// UNROLL-BY-3-NEXT: %[[V10:.*]] = arith.addi %[[IV]], %[[V9]] : index 77// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32> 78// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = arith.constant 2 : index 79// UNROLL-BY-3-NEXT: %[[V11:.*]] = arith.muli %[[STEP]], %[[C2_IV]] : index 80// UNROLL-BY-3-NEXT: %[[V12:.*]] = arith.addi %[[IV]], %[[V11]] : index 81// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32> 82// UNROLL-BY-3-NEXT: } 83// UNROLL-BY-3-NEXT: scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] { 84// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 85// UNROLL-BY-3-NEXT: } 86// UNROLL-BY-3-NEXT: return 87 88func.func @dynamic_loop_unroll_outer_by_2( 89 %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, 90 %arg5 : index, %arg6: memref<?xf32>) { 91 %0 = arith.constant 7.0 : f32 92 scf.for %i0 = %arg0 to %arg1 step %arg2 { 93 scf.for %i1 = %arg3 to %arg4 step %arg5 { 94 memref.store %0, %arg6[%i1] : memref<?xf32> 95 } 96 } 97 return 98} 99// UNROLL-OUTER-BY-2-LABEL: func @dynamic_loop_unroll_outer_by_2 100// UNROLL-OUTER-BY-2-SAME: %[[LB0:.*0]]: index, 101// UNROLL-OUTER-BY-2-SAME: %[[UB0:.*1]]: index, 102// UNROLL-OUTER-BY-2-SAME: %[[STEP0:.*2]]: index, 103// UNROLL-OUTER-BY-2-SAME: %[[LB1:.*3]]: index, 104// UNROLL-OUTER-BY-2-SAME: %[[UB1:.*4]]: index, 105// UNROLL-OUTER-BY-2-SAME: %[[STEP1:.*5]]: index, 106// UNROLL-OUTER-BY-2-SAME: %[[MEM:.*6]]: memref<?xf32> 107// 108// UNROLL-OUTER-BY-2: scf.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} { 109// UNROLL-OUTER-BY-2-NEXT: scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { 110// UNROLL-OUTER-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32> 111// UNROLL-OUTER-BY-2-NEXT: } 112// UNROLL-OUTER-BY-2-NEXT: scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { 113// UNROLL-OUTER-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32> 114// UNROLL-OUTER-BY-2-NEXT: } 115// UNROLL-OUTER-BY-2-NEXT: } 116// UNROLL-OUTER-BY-2-NEXT: scf.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] { 117// UNROLL-OUTER-BY-2-NEXT: scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { 118// UNROLL-OUTER-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32> 119// UNROLL-OUTER-BY-2-NEXT: } 120// UNROLL-OUTER-BY-2-NEXT: } 121// UNROLL-OUTER-BY-2-NEXT: return 122 123func.func @dynamic_loop_unroll_inner_by_2( 124 %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, 125 %arg5 : index, %arg6: memref<?xf32>) { 126 %0 = arith.constant 7.0 : f32 127 scf.for %i0 = %arg0 to %arg1 step %arg2 { 128 scf.for %i1 = %arg3 to %arg4 step %arg5 { 129 memref.store %0, %arg6[%i1] : memref<?xf32> 130 } 131 } 132 return 133} 134// UNROLL-INNER-BY-2-LABEL: func @dynamic_loop_unroll_inner_by_2 135// UNROLL-INNER-BY-2-SAME: %[[LB0:.*0]]: index, 136// UNROLL-INNER-BY-2-SAME: %[[UB0:.*1]]: index, 137// UNROLL-INNER-BY-2-SAME: %[[STEP0:.*2]]: index, 138// UNROLL-INNER-BY-2-SAME: %[[LB1:.*3]]: index, 139// UNROLL-INNER-BY-2-SAME: %[[UB1:.*4]]: index, 140// UNROLL-INNER-BY-2-SAME: %[[STEP1:.*5]]: index, 141// UNROLL-INNER-BY-2-SAME: %[[MEM:.*6]]: memref<?xf32> 142// 143// UNROLL-INNER-BY-2: scf.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] { 144// UNROLL-INNER-BY-2: scf.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} { 145// UNROLL-INNER-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32> 146// UNROLL-INNER-BY-2-NEXT: %[[C1_IV:.*]] = arith.constant 1 : index 147// UNROLL-INNER-BY-2-NEXT: %[[V0:.*]] = arith.muli %[[STEP1]], %[[C1_IV]] : index 148// UNROLL-INNER-BY-2-NEXT: %[[V1:.*]] = arith.addi %[[IV1]], %[[V0]] : index 149// UNROLL-INNER-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32> 150// UNROLL-INNER-BY-2-NEXT: } 151// UNROLL-INNER-BY-2-NEXT: scf.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] { 152// UNROLL-INNER-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32> 153// UNROLL-INNER-BY-2-NEXT: } 154// UNROLL-INNER-BY-2-NEXT: } 155// UNROLL-INNER-BY-2-NEXT: return 156 157// Test that no epilogue clean-up loop is generated because the trip count is 158// a multiple of the unroll factor. 159func.func @static_loop_unroll_by_2(%arg0 : memref<?xf32>) { 160 %0 = arith.constant 7.0 : f32 161 %lb = arith.constant 0 : index 162 %ub = arith.constant 20 : index 163 %step = arith.constant 1 : index 164 scf.for %i0 = %lb to %ub step %step { 165 memref.store %0, %arg0[%i0] : memref<?xf32> 166 } 167 return 168} 169// UNROLL-BY-2-LABEL: func @static_loop_unroll_by_2 170// UNROLL-BY-2-SAME: %[[MEM:.*0]]: memref<?xf32> 171// 172// UNROLL-BY-2-DAG: %[[C0:.*]] = arith.constant 0 : index 173// UNROLL-BY-2-DAG: %[[C1:.*]] = arith.constant 1 : index 174// UNROLL-BY-2-DAG: %[[C20:.*]] = arith.constant 20 : index 175// UNROLL-BY-2-DAG: %[[C2:.*]] = arith.constant 2 : index 176// UNROLL-BY-2: scf.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] { 177// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 178// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = arith.constant 1 : index 179// UNROLL-BY-2-NEXT: %[[V0:.*]] = arith.muli %[[C1]], %[[C1_IV]] : index 180// UNROLL-BY-2-NEXT: %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index 181// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32> 182// UNROLL-BY-2-NEXT: } 183// UNROLL-BY-2-NEXT: return 184 185// UNROLL-BY-2-ANNOTATE-LABEL: func @static_loop_unroll_by_2 186// UNROLL-BY-2-ANNOTATE: memref.store %{{.*}}, %[[MEM:.*0]][%{{.*}}] {unrolled_iteration = 0 : ui32} : memref<?xf32> 187// UNROLL-BY-2-ANNOTATE: memref.store %{{.*}}, %[[MEM]][%{{.*}}] {unrolled_iteration = 1 : ui32} : memref<?xf32> 188 189// Test that no epilogue clean-up loop is generated because the trip count 190// (taking into account the non-unit step size) is a multiple of the unroll 191// factor. 192func.func @static_loop_step_2_unroll_by_2(%arg0 : memref<?xf32>) { 193 %0 = arith.constant 7.0 : f32 194 %lb = arith.constant 0 : index 195 %ub = arith.constant 19 : index 196 %step = arith.constant 2 : index 197 scf.for %i0 = %lb to %ub step %step { 198 memref.store %0, %arg0[%i0] : memref<?xf32> 199 } 200 return 201} 202 203// UNROLL-BY-2-LABEL: func @static_loop_step_2_unroll_by_2 204// UNROLL-BY-2-SAME: %[[MEM:.*0]]: memref<?xf32> 205// 206// UNROLL-BY-2-DAG: %[[C0:.*]] = arith.constant 0 : index 207// UNROLL-BY-2-DAG: %[[C2:.*]] = arith.constant 2 : index 208// UNROLL-BY-2-DAG: %[[C19:.*]] = arith.constant 19 : index 209// UNROLL-BY-2-DAG: %[[C4:.*]] = arith.constant 4 : index 210// UNROLL-BY-2: scf.for %[[IV:.*]] = %[[C0]] to %[[C19]] step %[[C4]] { 211// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 212// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = arith.constant 1 : index 213// UNROLL-BY-2-NEXT: %[[V0:.*]] = arith.muli %[[C2]], %[[C1_IV]] : index 214// UNROLL-BY-2-NEXT: %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index 215// UNROLL-BY-2-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32> 216// UNROLL-BY-2-NEXT: } 217// UNROLL-BY-2-NEXT: return 218 219// Test that epilogue clean up loop is generated (trip count is not 220// a multiple of unroll factor). 221func.func @static_loop_unroll_by_3(%arg0 : memref<?xf32>) { 222 %0 = arith.constant 7.0 : f32 223 %lb = arith.constant 0 : index 224 %ub = arith.constant 20 : index 225 %step = arith.constant 1 : index 226 scf.for %i0 = %lb to %ub step %step { 227 memref.store %0, %arg0[%i0] : memref<?xf32> 228 } 229 return 230} 231 232// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3 233// UNROLL-BY-3-SAME: %[[MEM:.*0]]: memref<?xf32> 234// 235// UNROLL-BY-3-DAG: %[[C0:.*]] = arith.constant 0 : index 236// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : index 237// UNROLL-BY-3-DAG: %[[C20:.*]] = arith.constant 20 : index 238// UNROLL-BY-3-DAG: %[[C18:.*]] = arith.constant 18 : index 239// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : index 240// UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] { 241// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 242// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = arith.constant 1 : index 243// UNROLL-BY-3-NEXT: %[[V0:.*]] = arith.muli %[[C1]], %[[C1_IV]] : index 244// UNROLL-BY-3-NEXT: %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index 245// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32> 246// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = arith.constant 2 : index 247// UNROLL-BY-3-NEXT: %[[V2:.*]] = arith.muli %[[C1]], %[[C2_IV]] : index 248// UNROLL-BY-3-NEXT: %[[V3:.*]] = arith.addi %[[IV]], %[[V2]] : index 249// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32> 250// UNROLL-BY-3-NEXT: } 251// UNROLL-BY-3-NEXT: scf.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] { 252// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 253// UNROLL-BY-3-NEXT: } 254// UNROLL-BY-3-NEXT: return 255 256// Test that the single iteration epilogue loop body is promoted to the loops 257// containing block. 258func.func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref<?xf32>) { 259 %0 = arith.constant 7.0 : f32 260 %lb = arith.constant 0 : index 261 %ub = arith.constant 10 : index 262 %step = arith.constant 1 : index 263 scf.for %i0 = %lb to %ub step %step { 264 memref.store %0, %arg0[%i0] : memref<?xf32> 265 } 266 return 267} 268// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_promote_epilogue 269// UNROLL-BY-3-SAME: %[[MEM:.*0]]: memref<?xf32> 270// 271// UNROLL-BY-3-DAG: %[[C0:.*]] = arith.constant 0 : index 272// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : index 273// UNROLL-BY-3-DAG: %[[C10:.*]] = arith.constant 10 : index 274// UNROLL-BY-3-DAG: %[[C9:.*]] = arith.constant 9 : index 275// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : index 276// UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] { 277// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32> 278// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = arith.constant 1 : index 279// UNROLL-BY-3-NEXT: %[[V0:.*]] = arith.muli %[[C1]], %[[C1_IV]] : index 280// UNROLL-BY-3-NEXT: %[[V1:.*]] = arith.addi %[[IV]], %[[V0]] : index 281// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32> 282// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = arith.constant 2 : index 283// UNROLL-BY-3-NEXT: %[[V2:.*]] = arith.muli %[[C1]], %[[C2_IV]] : index 284// UNROLL-BY-3-NEXT: %[[V3:.*]] = arith.addi %[[IV]], %[[V2]] : index 285// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32> 286// UNROLL-BY-3-NEXT: } 287// UNROLL-BY-3-NEXT: memref.store %{{.*}}, %[[MEM]][%[[C9]]] : memref<?xf32> 288// UNROLL-BY-3-NEXT: return 289 290// Test unroll-up-to functionality. 291func.func @static_loop_unroll_up_to_factor(%arg0 : memref<?xf32>) { 292 %0 = arith.constant 7.0 : f32 293 %lb = arith.constant 0 : index 294 %ub = arith.constant 2 : index 295 affine.for %i0 = %lb to %ub { 296 affine.store %0, %arg0[%i0] : memref<?xf32> 297 } 298 return 299} 300// UNROLL-UP-TO-LABEL: func @static_loop_unroll_up_to_factor 301// UNROLL-UP-TO-SAME: %[[MEM:.*0]]: memref<?xf32> 302// 303// UNROLL-UP-TO-DAG: %[[C0:.*]] = arith.constant 0 : index 304// UNROLL-UP-TO-DAG: %[[C2:.*]] = arith.constant 2 : index 305// UNROLL-UP-TO-NEXT: %[[V0:.*]] = affine.apply {{.*}} 306// UNROLL-UP-TO-NEXT: affine.store %{{.*}}, %[[MEM]][%[[V0]]] : memref<?xf32> 307// UNROLL-UP-TO-NEXT: %[[V1:.*]] = affine.apply {{.*}} 308// UNROLL-UP-TO-NEXT: affine.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32> 309// UNROLL-UP-TO-NEXT: return 310 311// Test that epilogue's arguments are correctly renamed. 312func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) { 313 %0 = arith.constant 7.0 : f32 314 %lb = arith.constant 0 : index 315 %ub = arith.constant 20 : index 316 %step = arith.constant 1 : index 317 %result:2 = scf.for %i0 = %lb to %ub step %step iter_args(%arg0 = %0, %arg1 = %0) -> (f32, f32) { 318 %add = arith.addf %arg0, %arg1 : f32 319 %mul = arith.mulf %arg0, %arg1 : f32 320 scf.yield %add, %mul : f32, f32 321 } 322 return %result#0, %result#1 : f32, f32 323} 324// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_rename_epilogue_arguments 325// 326// UNROLL-BY-3-DAG: %[[CST:.*]] = arith.constant {{.*}} : f32 327// UNROLL-BY-3-DAG: %[[C0:.*]] = arith.constant 0 : index 328// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : index 329// UNROLL-BY-3-DAG: %[[C20:.*]] = arith.constant 20 : index 330// UNROLL-BY-3-DAG: %[[C18:.*]] = arith.constant 18 : index 331// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : index 332// UNROLL-BY-3: %[[FOR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] 333// UNROLL-BY-3-SAME: iter_args(%[[ARG0:.*]] = %[[CST]], %[[ARG1:.*]] = %[[CST]]) -> (f32, f32) { 334// UNROLL-BY-3-NEXT: %[[ADD0:.*]] = arith.addf %[[ARG0]], %[[ARG1]] : f32 335// UNROLL-BY-3-NEXT: %[[MUL0:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 336// UNROLL-BY-3-NEXT: %[[ADD1:.*]] = arith.addf %[[ADD0]], %[[MUL0]] : f32 337// UNROLL-BY-3-NEXT: %[[MUL1:.*]] = arith.mulf %[[ADD0]], %[[MUL0]] : f32 338// UNROLL-BY-3-NEXT: %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[MUL1]] : f32 339// UNROLL-BY-3-NEXT: %[[MUL2:.*]] = arith.mulf %[[ADD1]], %[[MUL1]] : f32 340// UNROLL-BY-3-NEXT: scf.yield %[[ADD2]], %[[MUL2]] : f32, f32 341// UNROLL-BY-3-NEXT: } 342// UNROLL-BY-3: %[[EFOR:.*]]:2 = scf.for %[[EIV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] 343// UNROLL-BY-3-SAME: iter_args(%[[EARG0:.*]] = %[[FOR]]#0, %[[EARG1:.*]] = %[[FOR]]#1) -> (f32, f32) { 344// UNROLL-BY-3-NEXT: %[[EADD:.*]] = arith.addf %[[EARG0]], %[[EARG1]] : f32 345// UNROLL-BY-3-NEXT: %[[EMUL:.*]] = arith.mulf %[[EARG0]], %[[EARG1]] : f32 346// UNROLL-BY-3-NEXT: scf.yield %[[EADD]], %[[EMUL]] : f32, f32 347// UNROLL-BY-3-NEXT: } 348// UNROLL-BY-3-NEXT: return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32 349 350// Test that epilogue clean up loop is generated (trip count is less 351// than an unroll factor). 352func.func @static_loop_unroll_by_5_with_cleanup(%arg0 : memref<?xf32>) { 353 %0 = arith.constant 7.0 : f32 354 %lb = arith.constant 0 : index 355 %ub = arith.constant 3 : index 356 affine.for %i0 = %lb to %ub { 357 memref.store %0, %arg0[%i0] : memref<?xf32> 358 } 359 return 360} 361 362// CLEANUP-UNROLL-BY-5-LABEL: func @static_loop_unroll_by_5_with_cleanup 363// CLEANUP-UNROLL-BY-5-SAME: %[[MEM:.*0]]: memref<?xf32> 364// 365// CLEANUP-UNROLL-BY-5-DAG: %[[C0:.*]] = arith.constant 0 : index 366// CLEANUP-UNROLL-BY-5-DAG: %[[C3:.*]] = arith.constant 3 : index 367// CLEANUP-UNROLL-BY-5-NEXT: %[[V0:.*]] = affine.apply {{.*}} 368// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V0]]] : memref<?xf32> 369// CLEANUP-UNROLL-BY-5-NEXT: %[[V1:.*]] = affine.apply {{.*}} 370// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32> 371// CLEANUP-UNROLL-BY-5-NEXT: %[[V2:.*]] = affine.apply {{.*}} 372// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V2]]] : memref<?xf32> 373// CLEANUP-UNROLL-BY-5-NEXT: return 374 375// ----- 376 377// Test loop unrolling when the yielded value remains unchanged. 378// CHECK: [[$MAP:#map]] = affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)> 379// CHECK-LABEL: func @loop_unroll_static_yield_value 380func.func @loop_unroll_static_yield_value_test1() { 381 %true_4 = arith.constant true 382 %c1 = arith.constant 1 : index 383 %103 = affine.for %arg2 = 0 to 40 iter_args(%arg3 = %true_4) -> (i1) { 384 %324 = affine.max affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>(%c1) 385 affine.yield %true_4 : i1 386 } 387 return 388} 389// CHECK: %[[TRUE:.*]] = arith.constant true 390// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index 391// CHECK-NEXT: affine.for %{{.*}} = 0 to 40 step 4 iter_args(%{{.*}} = %[[TRUE]]) -> (i1) { 392// CHECK-NEXT: affine.max [[$MAP]](%[[C1]]) 393// CHECK-NEXT: affine.max [[$MAP]](%[[C1]]) 394// CHECK-NEXT: affine.max [[$MAP]](%[[C1]]) 395// CHECK-NEXT: affine.max [[$MAP]](%[[C1]]) 396// CHECK-NEXT: affine.yield %[[TRUE]] : i1 397// CHECK-NEXT: } 398// CHECK-NEXT: return 399 400// ----- 401 402// Loop unrolling when the yielded value is loop iv. 403// CHECK: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)> 404// CHECK: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)> 405// CHECK: [[$MAP2:#map[0-9]*]] = affine_map<(d0) -> (d0 + 4)> 406// CHECK: [[$MAP3:#map[0-9]*]] = affine_map<(d0) -> (d0 + 6)> 407// CHECK-LABEL: func @loop_unroll_yield_loop_iv 408func.func @loop_unroll_yield_loop_iv() { 409 %c1 = arith.constant 1 : index 410 %103 = affine.for %arg2 = 0 to 40 step 2 iter_args(%arg3 = %c1) -> (index) { 411 %324 = affine.max affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>(%arg2) 412 affine.yield %arg2 : index 413 } 414 return 415} 416// CHECK: %[[C1:.*]] = arith.constant 1 : index 417// CHECK-NEXT: affine.for %[[LOOP_IV:.*]] = 0 to 40 step 8 iter_args(%{{.*}} = %[[C1]]) -> (index) { 418// CHECK-NEXT: affine.max [[$MAP0]](%[[LOOP_IV]]) 419// CHECK-NEXT: %[[LOOP_IV_PLUS_2:.*]] = affine.apply [[$MAP1]](%[[LOOP_IV]]) 420// CHECK-NEXT: affine.max [[$MAP0]](%[[LOOP_IV_PLUS_2]]) 421// CHECK-NEXT: %[[LOOP_IV_PLUS_4:.*]] = affine.apply [[$MAP2]](%[[LOOP_IV]]) 422// CHECK-NEXT: affine.max [[$MAP0]](%[[LOOP_IV_PLUS_4]]) 423// CHECK-NEXT: %[[LOOP_IV_PLUS_6:.*]] = affine.apply [[$MAP3]](%[[LOOP_IV]]) 424// CHECK-NEXT: affine.max [[$MAP0]](%[[LOOP_IV_PLUS_6]]) 425// CHECK-NEXT: affine.yield %[[LOOP_IV]] : index 426// CHECK-NEXT: } 427// CHECK-NEXT: return 428 429// ----- 430 431// Loop unrolling when the yielded value is iter_arg. 432// CHECK: [[$MAP:#map]] = affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)> 433// CHECK-LABEL: func @loop_unroll_yield_iter_arg 434func.func @loop_unroll_yield_iter_arg() { 435 %c1 = arith.constant 1 : index 436 %103 = affine.for %arg2 = 0 to 40 step 2 iter_args(%arg3 = %c1) -> (index) { 437 %324 = affine.max affine_map<(d0) -> (-d0 + 64, (d0 floordiv 8) ceildiv 64, -d0 - 16, d0 * -64)>(%arg3) 438 affine.yield %arg3 : index 439 } 440 return 441} 442// CHECK: %[[C1:.*]] = arith.constant 1 : index 443// CHECK-NEXT: affine.for %{{.*}} = 0 to 40 step 8 iter_args(%[[ITER_ARG:.*]] = %[[C1]]) -> (index) { 444// CHECK-NEXT: affine.max [[$MAP]](%[[ITER_ARG]]) 445// CHECK-NEXT: affine.max [[$MAP]](%[[ITER_ARG]]) 446// CHECK-NEXT: affine.max [[$MAP]](%[[ITER_ARG]]) 447// CHECK-NEXT: affine.max [[$MAP]](%[[ITER_ARG]]) 448// CHECK-NEXT: affine.yield %[[ITER_ARG]] : index 449// CHECK-NEXT: } 450// CHECK-NEXT: return 451 452// ----- 453 454// Test the loop unroller works with integer IV type. 455func.func @static_loop_unroll_with_integer_iv() -> (f32, f32) { 456 %0 = arith.constant 7.0 : f32 457 %lb = arith.constant 0 : i32 458 %ub = arith.constant 20 : i32 459 %step = arith.constant 1 : i32 460 %result:2 = scf.for %i0 = %lb to %ub step %step iter_args(%arg0 = %0, %arg1 = %0) -> (f32, f32) : i32{ 461 %add = arith.addf %arg0, %arg1 : f32 462 %mul = arith.mulf %arg0, %arg1 : f32 463 scf.yield %add, %mul : f32, f32 464 } 465 return %result#0, %result#1 : f32, f32 466} 467// UNROLL-BY-3-LABEL: func @static_loop_unroll_with_integer_iv 468// 469// UNROLL-BY-3-DAG: %[[CST:.*]] = arith.constant {{.*}} : f32 470// UNROLL-BY-3-DAG: %[[C0:.*]] = arith.constant 0 : i32 471// UNROLL-BY-3-DAG: %[[C1:.*]] = arith.constant 1 : i32 472// UNROLL-BY-3-DAG: %[[C20:.*]] = arith.constant 20 : i32 473// UNROLL-BY-3-DAG: %[[C18:.*]] = arith.constant 18 : i32 474// UNROLL-BY-3-DAG: %[[C3:.*]] = arith.constant 3 : i32 475// UNROLL-BY-3: %[[FOR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] 476// UNROLL-BY-3-SAME: iter_args(%[[ARG0:.*]] = %[[CST]], %[[ARG1:.*]] = %[[CST]]) -> (f32, f32) : i32 { 477// UNROLL-BY-3-NEXT: %[[ADD0:.*]] = arith.addf %[[ARG0]], %[[ARG1]] : f32 478// UNROLL-BY-3-NEXT: %[[MUL0:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 479// UNROLL-BY-3-NEXT: %[[ADD1:.*]] = arith.addf %[[ADD0]], %[[MUL0]] : f32 480// UNROLL-BY-3-NEXT: %[[MUL1:.*]] = arith.mulf %[[ADD0]], %[[MUL0]] : f32 481// UNROLL-BY-3-NEXT: %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[MUL1]] : f32 482// UNROLL-BY-3-NEXT: %[[MUL2:.*]] = arith.mulf %[[ADD1]], %[[MUL1]] : f32 483// UNROLL-BY-3-NEXT: scf.yield %[[ADD2]], %[[MUL2]] : f32, f32 484// UNROLL-BY-3-NEXT: } 485// UNROLL-BY-3: %[[EFOR:.*]]:2 = scf.for %[[EIV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] 486// UNROLL-BY-3-SAME: iter_args(%[[EARG0:.*]] = %[[FOR]]#0, %[[EARG1:.*]] = %[[FOR]]#1) -> (f32, f32) : i32 { 487// UNROLL-BY-3-NEXT: %[[EADD:.*]] = arith.addf %[[EARG0]], %[[EARG1]] : f32 488// UNROLL-BY-3-NEXT: %[[EMUL:.*]] = arith.mulf %[[EARG0]], %[[EARG1]] : f32 489// UNROLL-BY-3-NEXT: scf.yield %[[EADD]], %[[EMUL]] : f32, f32 490// UNROLL-BY-3-NEXT: } 491// UNROLL-BY-3-NEXT: return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32 492 493// ----- 494 495// Test loop unrolling when the yielded value is defined above the loop. 496func.func @loop_unroll_static_yield_value_defined_above(%init: i32) { 497 %c42 = arith.constant 42 : i32 498 %c0 = arith.constant 0 : index 499 %c1 = arith.constant 1 : index 500 %c4 = arith.constant 4 : index 501 %103:2 = scf.for %i = %c0 to %c4 step %c1 502 iter_args(%iter1 = %c42, %iter2 = %init) -> (i32, i32) { 503 %0 = arith.andi %iter2, %iter1 : i32 504 scf.yield %0, %init : i32, i32 505 } 506 return 507} 508// UNROLL-OUTER-BY-2-LABEL: @loop_unroll_static_yield_value_defined_above( 509// UNROLL-OUTER-BY-2-SAME: %[[INIT:.*]]: i32) { 510// UNROLL-OUTER-BY-2-DAG: %[[C42:.*]] = arith.constant 42 : i32 511// UNROLL-OUTER-BY-2-DAG: %[[C0:.*]] = arith.constant 0 : index 512// UNROLL-OUTER-BY-2-DAG: %[[C4:.*]] = arith.constant 4 : index 513// UNROLL-OUTER-BY-2-DAG: %[[C2:.*]] = arith.constant 2 : index 514// UNROLL-OUTER-BY-2: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C2]] 515// UNROLL-OUTER-BY-2-SAME: iter_args(%[[ITER1:.*]] = %[[C42]], 516// UNROLL-OUTER-BY-2-SAME: %[[ITER2:.*]] = %[[INIT]]) 517// UNROLL-OUTER-BY-2: %[[SUM:.*]] = arith.andi %[[ITER2]], %[[ITER1]] 518// UNROLL-OUTER-BY-2: %[[SUM1:.*]] = arith.andi %[[INIT]], %[[SUM]] 519// UNROLL-OUTER-BY-2: scf.yield %[[SUM1]], %[[INIT]] : i32, i32 520 521