1// RUN: mlir-opt %s -test-scf-pipelining -split-input-file -verify-diagnostics | FileCheck %s 2// RUN: mlir-opt %s -test-scf-pipelining=annotate -split-input-file | FileCheck %s --check-prefix ANNOTATE 3// RUN: mlir-opt %s -test-scf-pipelining=no-epilogue-peeling -split-input-file | FileCheck %s --check-prefix NOEPILOGUE 4 5// CHECK-LABEL: simple_pipeline( 6// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 7// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 8// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 9// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 10// Prologue: 11// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 12// Kernel: 13// CHECK-NEXT: %[[L1:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 14// CHECK-SAME: step %[[C1]] iter_args(%[[LARG:.*]] = %[[L0]]) -> (f32) { 15// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG]], %{{.*}} : f32 16// CHECK-NEXT: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref<?xf32> 17// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C1]] : index 18// CHECK-NEXT: %[[LR:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 19// CHECK-NEXT: scf.yield %[[LR]] : f32 20// CHECK-NEXT: } 21// Epilogue: 22// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L1]], %{{.*}} : f32 23// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C3]]] : memref<?xf32> 24func.func @simple_pipeline(%A: memref<?xf32>, %result: memref<?xf32>) { 25 %c0 = arith.constant 0 : index 26 %c1 = arith.constant 1 : index 27 %c4 = arith.constant 4 : index 28 %cf = arith.constant 1.0 : f32 29 scf.for %i0 = %c0 to %c4 step %c1 { 30 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 31 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 32 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : memref<?xf32> 33 } { __test_pipelining_loop__ } 34 return 35} 36 37 38// ----- 39 40// CHECK-LABEL: simple_pipeline_region( 41// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 42// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 43// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 44// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 45// Prologue: 46// CHECK: %[[L0:.*]] = scf.execute_region 47// CHECK-NEXT: memref.load %[[A]][%[[C0]]] : memref<?xf32> 48// Kernel: 49// CHECK: %[[L1:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 50// CHECK-SAME: step %[[C1]] iter_args(%[[LARG:.*]] = %[[L0]]) -> (f32) { 51// CHECK-NEXT: %[[ADD0:.*]] = scf.execute_region 52// CHECK-NEXT: arith.addf %[[LARG]], %{{.*}} : f32 53// CHECK: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref<?xf32> 54// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C1]] : index 55// CHECK-NEXT: %[[LR:.*]] = scf.execute_region 56// CHECK-NEXT: memref.load %[[A]][%[[IV1]]] : memref<?xf32> 57// CHECK: scf.yield %[[LR]] : f32 58// CHECK-NEXT: } 59// Epilogue: 60// CHECK-NEXT: %[[ADD1:.*]] = scf.execute_region 61// CHECK-NEXT: arith.addf %[[L1]], %{{.*}} : f32 62// CHECK: memref.store %[[ADD1]], %[[R]][%[[C3]]] : memref<?xf32> 63func.func @simple_pipeline_region(%A: memref<?xf32>, %result: memref<?xf32>) { 64 %c0 = arith.constant 0 : index 65 %c1 = arith.constant 1 : index 66 %c4 = arith.constant 4 : index 67 %cf = arith.constant 1.0 : f32 68 scf.for %i0 = %c0 to %c4 step %c1 { 69 70 %A_elem = scf.execute_region -> f32 { 71 %A_elem1 = memref.load %A[%i0] : memref<?xf32> 72 scf.yield %A_elem1 : f32 73 } { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } 74 75 %A1_elem = scf.execute_region -> f32 { 76 %A1_elem1 = arith.addf %A_elem, %cf : f32 77 scf.yield %A1_elem1 : f32 78 } { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } 79 80 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : memref<?xf32> 81 } { __test_pipelining_loop__ } 82 return 83} 84 85// ----- 86 87// CHECK-LABEL: simple_pipeline_step( 88// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 89// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 90// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 91// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index 92// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index 93// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index 94// Prologue: 95// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 96// CHECK: %[[L1:.*]] = memref.load %[[A]][%[[C3]]] : memref<?xf32> 97// Kernel: 98// CHECK-NEXT: %[[L2:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C5]] 99// CHECK-SAME: step %[[C3]] iter_args(%[[LARG0:.*]] = %[[L0]], %[[LARG1:.*]] = %[[L1]]) -> (f32, f32) { 100// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG0]], %{{.*}} : f32 101// CHECK-NEXT: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref<?xf32> 102// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C6]] : index 103// CHECK-NEXT: %[[LR:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 104// CHECK-NEXT: scf.yield %[[LARG1]], %[[LR]] : f32, f32 105// CHECK-NEXT: } 106// Epilogue: 107// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L2]]#0, %{{.*}} : f32 108// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C6]]] : memref<?xf32> 109// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[L2]]#1, %{{.*}} : f32 110// CHECK-NEXT: memref.store %[[ADD2]], %[[R]][%[[C9]]] : memref<?xf32> 111func.func @simple_pipeline_step(%A: memref<?xf32>, %result: memref<?xf32>) { 112 %c0 = arith.constant 0 : index 113 %c3 = arith.constant 3 : index 114 %c11 = arith.constant 11 : index 115 %cf = arith.constant 1.0 : f32 116 scf.for %i0 = %c0 to %c11 step %c3 { 117 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 118 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32 119 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 1 } : memref<?xf32> 120 } { __test_pipelining_loop__ } 121 return 122} 123 124// ----- 125 126// CHECK-LABEL: three_stage( 127// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 128// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 129// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 130// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 131// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 132// Prologue: 133// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 134// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %{{.*}} : f32 135// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 136// Kernel: 137// CHECK-NEXT: %[[LR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C2]] 138// CHECK-SAME: step %[[C1]] iter_args(%[[ADDARG:.*]] = %[[ADD0]], 139// CHECK-SAME: %[[LARG:.*]] = %[[L1]]) -> (f32, f32) { 140// CHECK-NEXT: memref.store %[[ADDARG]], %[[R]][%[[IV]]] : memref<?xf32> 141// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[LARG]], %{{.*}} : f32 142// CHECK-NEXT: %[[IV2:.*]] = arith.addi %[[IV]], %[[C2]] : index 143// CHECK-NEXT: %[[L3:.*]] = memref.load %[[A]][%[[IV2]]] : memref<?xf32> 144// CHECK-NEXT: scf.yield %[[ADD1]], %[[L3]] : f32, f32 145// CHECK-NEXT: } 146// Epilogue: 147// CHECK-NEXT: memref.store %[[LR]]#0, %[[R]][%[[C2]]] : memref<?xf32> 148// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[LR]]#1, %{{.*}} : f32 149// CHECK-NEXT: memref.store %[[ADD2]], %[[R]][%[[C3]]] : memref<?xf32> 150 151// Prologue: 152// ANNOTATE: memref.load {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "prologue"} 153// ANNOTATE: memref.load {{.*}} {__test_pipelining_iteration = 1 : i32, __test_pipelining_part = "prologue"} 154// Kernel: 155// ANNOTATE: scf.for 156// ANNOTATE: memref.store {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "kernel"} 157// ANNOTATE: arith.addf {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "kernel"} 158// ANNOTATE: memref.load {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "kernel"} 159// ANNOTATE: scf.yield 160// ANNOTATE: } 161// Epilogue: 162// ANNOTATE: memref.store {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "epilogue"} 163// ANNOTATE: arith.addf {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "epilogue"} 164// ANNOTATE: memref.store {{.*}} {__test_pipelining_iteration = 1 : i32, __test_pipelining_part = "epilogue"} 165 166// NOEPILOGUE-LABEL: three_stage( 167// NOEPILOGUE-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 168// NOEPILOGUE-DAG: %[[C0:.*]] = arith.constant 0 : index 169// NOEPILOGUE-DAG: %[[C1:.*]] = arith.constant 1 : index 170// NOEPILOGUE-DAG: %[[C2:.*]] = arith.constant 2 : index 171// NOEPILOGUE-DAG: %[[C3:.*]] = arith.constant 3 : index 172// NOEPILOGUE-DAG: %[[C4:.*]] = arith.constant 4 : index 173// NOEPILOGUE-DAG: %[[CF:.*]] = arith.constant 0.000000e+00 : f32 174// Prologue: 175// NOEPILOGUE: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 176// NOEPILOGUE-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %{{.*}} : f32 177// NOEPILOGUE-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 178// Kernel: 179// NOEPILOGUE-NEXT: %[[LR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C4]] 180// NOEPILOGUE-SAME: step %[[C1]] iter_args(%[[ADDARG:.*]] = %[[ADD0]], 181// NOEPILOGUE-SAME: %[[LARG:.*]] = %[[L1]]) -> (f32, f32) { 182// NOEPILOGUE-DAG: %[[S0:.*]] = arith.cmpi slt, %[[IV]], %[[C2]] : index 183// NOEPILOGUE-DAG: %[[S1:.*]] = arith.cmpi slt, %[[IV]], %[[C3]] : index 184// NOEPILOGUE-NEXT: memref.store %[[ADDARG]], %[[R]][%[[IV]]] : memref<?xf32> 185// NOEPILOGUE-NEXT: %[[ADD1:.*]] = scf.if %[[S1]] -> (f32) { 186// NOEPILOGUE-NEXT: %[[PADD:.*]] = arith.addf %[[LARG]], %{{.*}} : f32 187// NOEPILOGUE-NEXT: scf.yield %[[PADD]] : f32 188// NOEPILOGUE-NEXT: } else { 189// NOEPILOGUE-NEXT: scf.yield %[[CF]] : f32 190// NOEPILOGUE-NEXT: } 191// NOEPILOGUE-NEXT: %[[IV2:.*]] = arith.addi %[[IV]], %[[C2]] : index 192// NOEPILOGUE-NEXT: %[[L3:.*]] = scf.if %[[S0]] -> (f32) { 193// NOEPILOGUE-NEXT: %[[PL:.*]] = memref.load %[[A]][%[[IV2]]] : memref<?xf32> 194// NOEPILOGUE-NEXT: scf.yield %[[PL]] : f32 195// NOEPILOGUE-NEXT: } else { 196// NOEPILOGUE-NEXT: scf.yield %[[CF]] : f32 197// NOEPILOGUE-NEXT: } 198// NOEPILOGUE-NEXT: scf.yield %[[ADD1]], %[[L3]] : f32, f32 199// NOEPILOGUE-NEXT: } 200// No epilogue should be generated. 201// NOEPILOGUE-NOT: memref.store 202// NOEPILOGUE: return 203 204func.func @three_stage(%A: memref<?xf32>, %result: memref<?xf32>) { 205 %c0 = arith.constant 0 : index 206 %c1 = arith.constant 1 : index 207 %c4 = arith.constant 4 : index 208 %cf = arith.constant 1.0 : f32 209 scf.for %i0 = %c0 to %c4 step %c1 { 210 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 211 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 212 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : memref<?xf32> 213 } { __test_pipelining_loop__ } 214 return 215} 216 217// ----- 218// CHECK-LABEL: long_liverange( 219// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 220// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 221// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 222// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 223// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 224// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 225// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index 226// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index 227// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 228// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index 229// Prologue: 230// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 231// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 232// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[C2]]] : memref<?xf32> 233// CHECK-NEXT: %[[L3:.*]] = memref.load %[[A]][%[[C3]]] : memref<?xf32> 234// Kernel: 235// CHECK-NEXT: %[[LR:.*]]:4 = scf.for %[[IV:.*]] = %[[C0]] to %[[C6]] 236// CHECK-SAME: step %[[C1]] iter_args(%[[LA0:.*]] = %[[L0]], 237// CHECK-SAME: %[[LA1:.*]] = %[[L1]], %[[LA2:.*]] = %[[L2]], 238// CHECK-SAME: %[[LA3:.*]] = %[[L3]]) -> (f32, f32, f32, f32) { 239// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LA0]], %{{.*}} : f32 240// CHECK-NEXT: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref<?xf32> 241// CHECK-NEXT: %[[IV4:.*]] = arith.addi %[[IV]], %[[C4]] : index 242// CHECK-NEXT: %[[L4:.*]] = memref.load %[[A]][%[[IV4]]] : memref<?xf32> 243// CHECK-NEXT: scf.yield %[[LA1]], %[[LA2]], %[[LA3]], %[[L4]] : f32, f32, f32, f32 244// CHECK-NEXT: } 245// Epilogue: 246// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[LR]]#0, %{{.*}} : f32 247// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C6]]] : memref<?xf32> 248// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[LR]]#1, %{{.*}} : f32 249// CHECK-NEXT: memref.store %[[ADD2]], %[[R]][%[[C7]]] : memref<?xf32> 250// CHECK-NEXT: %[[ADD3:.*]] = arith.addf %[[LR]]#2, %{{.*}} : f32 251// CHECK-NEXT: memref.store %[[ADD3]], %[[R]][%[[C8]]] : memref<?xf32> 252// CHECK-NEXT: %[[ADD4:.*]] = arith.addf %[[LR]]#3, %{{.*}} : f32 253// CHECK-NEXT: memref.store %[[ADD4]], %[[R]][%[[C9]]] : memref<?xf32> 254func.func @long_liverange(%A: memref<?xf32>, %result: memref<?xf32>) { 255 %c0 = arith.constant 0 : index 256 %c1 = arith.constant 1 : index 257 %c10 = arith.constant 10 : index 258 %cf = arith.constant 1.0 : f32 259 scf.for %i0 = %c0 to %c10 step %c1 { 260 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 261 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 4, __test_pipelining_op_order__ = 0 } : f32 262 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 4, __test_pipelining_op_order__ = 1 } : memref<?xf32> 263 } { __test_pipelining_loop__ } 264 return 265} 266 267// ----- 268 269// CHECK-LABEL: multiple_uses( 270// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 271// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 272// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 273// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 274// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 275// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index 276// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 277// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index 278// Prologue: 279// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 280// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %{{.*}} : f32 281// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 282// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L1]], %{{.*}} : f32 283// CHECK-NEXT: %[[MUL0:.*]] = arith.mulf %[[ADD0]], %[[L0]] : f32 284// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[C2]]] : memref<?xf32> 285// Kernel: 286// CHECK-NEXT: %[[LR:.*]]:4 = scf.for %[[IV:.*]] = %[[C0]] to %[[C7]] 287// CHECK-SAME: step %[[C1]] iter_args(%[[LA1:.*]] = %[[L1]], 288// CHECK-SAME: %[[LA2:.*]] = %[[L2]], %[[ADDARG1:.*]] = %[[ADD1]], 289// CHECK-SAME: %[[MULARG0:.*]] = %[[MUL0]]) -> (f32, f32, f32, f32) { 290// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[LA2]], %{{.*}} : f32 291// CHECK-NEXT: %[[MUL1:.*]] = arith.mulf %[[ADDARG1]], %[[LA1]] : f32 292// CHECK-NEXT: memref.store %[[MULARG0]], %[[R]][%[[IV]]] : memref<?xf32> 293// CHECK-NEXT: %[[IV3:.*]] = arith.addi %[[IV]], %[[C3]] : index 294// CHECK-NEXT: %[[L3:.*]] = memref.load %[[A]][%[[IV3]]] : memref<?xf32> 295// CHECK-NEXT: scf.yield %[[LA2]], %[[L3]], %[[ADD2]], %[[MUL1]] : f32, f32, f32, f32 296// CHECK-NEXT: } 297// Epilogue: 298// CHECK-NEXT: %[[ADD3:.*]] = arith.addf %[[LR]]#1, %{{.*}} : f32 299// CHECK-NEXT: %[[MUL2:.*]] = arith.mulf %[[LR]]#2, %[[LR]]#0 : f32 300// CHECK-NEXT: memref.store %[[LR]]#3, %[[R]][%[[C7]]] : memref<?xf32> 301// CHECK-NEXT: %[[MUL3:.*]] = arith.mulf %[[ADD3]], %[[LR]]#1 : f32 302// CHECK-NEXT: memref.store %[[MUL2]], %[[R]][%[[C8]]] : memref<?xf32> 303// CHECK-NEXT: memref.store %[[MUL3]], %[[R]][%[[C9]]] : memref<?xf32> 304func.func @multiple_uses(%A: memref<?xf32>, %result: memref<?xf32>) { 305 %c0 = arith.constant 0 : index 306 %c1 = arith.constant 1 : index 307 %c10 = arith.constant 10 : index 308 %cf = arith.constant 1.0 : f32 309 scf.for %i0 = %c0 to %c10 step %c1 { 310 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 3 } : memref<?xf32> 311 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 312 %A2_elem = arith.mulf %A1_elem, %A_elem { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 1 } : f32 313 memref.store %A2_elem, %result[%i0] { __test_pipelining_stage__ = 3, __test_pipelining_op_order__ = 2 } : memref<?xf32> 314 } { __test_pipelining_loop__ } 315 return 316} 317 318// ----- 319 320// CHECK-LABEL: region_multiple_uses( 321// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 322// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 323// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 324// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 325// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 326// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index 327// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 328// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index 329// Prologue: 330// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 331// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %{{.*}} : f32 332// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 333// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L1]], %{{.*}} : f32 334// CHECK-NEXT: %[[MUL0:.*]] = scf.execute_region 335// arith.mulf %[[ADD0]], %[[L0]] : f32 336// CHECK: %[[L2:.*]] = memref.load %[[A]][%[[C2]]] : memref<?xf32> 337// Kernel: 338// CHECK-NEXT: %[[LR:.*]]:4 = scf.for %[[IV:.*]] = %[[C0]] to %[[C7]] 339// CHECK-SAME: step %[[C1]] iter_args(%[[LA1:.*]] = %[[L1]], 340// CHECK-SAME: %[[LA2:.*]] = %[[L2]], %[[ADDARG1:.*]] = %[[ADD1]], 341// CHECK-SAME: %[[MULARG0:.*]] = %[[MUL0]]) -> (f32, f32, f32, f32) { 342// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[LA2]], %{{.*}} : f32 343// CHECK-NEXT: %[[MUL1:.*]] = scf.execute_region 344// arith.mulf %[[ADDARG1]], %[[LA1]] : f32 345// CHECK: memref.store %[[MULARG0]], %[[R]][%[[IV]]] : memref<?xf32> 346// CHECK-NEXT: %[[IV3:.*]] = arith.addi %[[IV]], %[[C3]] : index 347// CHECK-NEXT: %[[L3:.*]] = memref.load %[[A]][%[[IV3]]] : memref<?xf32> 348// CHECK-NEXT: scf.yield %[[LA2]], %[[L3]], %[[ADD2]], %[[MUL1]] : f32, f32, f32, f32 349// CHECK-NEXT: } 350// Epilogue: 351// CHECK-NEXT: %[[ADD3:.*]] = arith.addf %[[LR]]#1, %{{.*}} : f32 352// CHECK-NEXT: %[[MUL2:.*]] = scf.execute_region 353// arith.mulf %[[LR]]#2, %[[LR]]#0 : f32 354// CHECK: memref.store %[[LR]]#3, %[[R]][%[[C7]]] : memref<?xf32> 355// CHECK-NEXT: %[[MUL3:.*]] = scf.execute_region 356/// %[[ADD3]], %[[LR]]#1 : f32 357// CHECK: memref.store %[[MUL2]], %[[R]][%[[C8]]] : memref<?xf32> 358// CHECK-NEXT: memref.store %[[MUL3]], %[[R]][%[[C9]]] : memref<?xf32> 359 360func.func @region_multiple_uses(%A: memref<?xf32>, %result: memref<?xf32>) { 361 %c0 = arith.constant 0 : index 362 %c1 = arith.constant 1 : index 363 %c10 = arith.constant 10 : index 364 %cf = arith.constant 1.0 : f32 365 scf.for %i0 = %c0 to %c10 step %c1 { 366 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 3 } : memref<?xf32> 367 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 368 %A2_elem = scf.execute_region -> f32 { 369 %A2_elem1 = arith.mulf %A1_elem, %A_elem : f32 370 scf.yield %A2_elem1 : f32 371 } { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 1 } 372 memref.store %A2_elem, %result[%i0] { __test_pipelining_stage__ = 3, __test_pipelining_op_order__ = 2 } : memref<?xf32> 373 } { __test_pipelining_loop__ } 374 return 375} 376 377// ----- 378 379// CHECK-LABEL: loop_carried( 380// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 381// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 382// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 383// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 384// CHECK-DAG: %[[CSTF:.*]] = arith.constant 1.000000e+00 : f32 385// Prologue: 386// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 387// Kernel: 388// CHECK-NEXT: %[[LR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 389// CHECK-SAME: step %[[C1]] iter_args(%[[C:.*]] = %[[CSTF]], 390// CHECK-SAME: %[[LARG:.*]] = %[[L0]]) -> (f32, f32) { 391// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG]], %[[C]] : f32 392// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C1]] : index 393// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 394// CHECK-NEXT: scf.yield %[[ADD0]], %[[L1]] : f32, f32 395// CHECK-NEXT: } 396// Epilogue: 397// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[LR]]#1, %[[LR]]#0 : f32 398// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C0]]] : memref<?xf32> 399func.func @loop_carried(%A: memref<?xf32>, %result: memref<?xf32>) { 400 %c0 = arith.constant 0 : index 401 %c1 = arith.constant 1 : index 402 %c4 = arith.constant 4 : index 403 %cf = arith.constant 1.0 : f32 404 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf) -> (f32) { 405 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 1 } : memref<?xf32> 406 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 407 scf.yield %A1_elem : f32 408 } { __test_pipelining_loop__ } 409 memref.store %r, %result[%c0] : memref<?xf32> 410 return 411} 412 413// ----- 414 415// CHECK-LABEL: backedge_different_stage 416// CHECK-SAME: (%[[A:.*]]: memref<?xf32>) -> f32 { 417// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 418// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 419// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 420// CHECK-DAG: %[[CSTF:.*]] = arith.constant 2.000000e+00 : f32 421// Prologue: 422// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 423// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %[[CSTF]] : f32 424// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 425// Kernel: 426// CHECK-NEXT: %[[R:.*]]:3 = scf.for %[[IV:.*]] = %[[C0]] to %[[C2]] 427// CHECK-SAME: step %[[C1]] iter_args(%[[C:.*]] = %[[CSTF]], 428// CHECK-SAME: %[[ADDARG:.*]] = %[[ADD0]], %[[LARG:.*]] = %[[L1]]) -> (f32, f32, f32) { 429// CHECK-NEXT: %[[MUL0:.*]] = arith.mulf %[[ADDARG]], %[[CSTF]] : f32 430// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[LARG]], %[[MUL0]] : f32 431// CHECK-NEXT: %[[IV2:.*]] = arith.addi %[[IV]], %[[C2]] : index 432// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[IV2]]] : memref<?xf32> 433// CHECK-NEXT: scf.yield %[[MUL0]], %[[ADD1]], %[[L2]] : f32, f32, f32 434// CHECK-NEXT: } 435// Epilogue: 436// CHECK-NEXT: %[[MUL1:.*]] = arith.mulf %[[R]]#1, %[[CSTF]] : f32 437// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[R]]#2, %[[MUL1]] : f32 438// CHECK-NEXT: %[[MUL2:.*]] = arith.mulf %[[ADD2]], %[[CSTF]] : f32 439// CHECK-NEXT: return %[[MUL2]] : f32 440func.func @backedge_different_stage(%A: memref<?xf32>) -> f32 { 441 %c0 = arith.constant 0 : index 442 %c1 = arith.constant 1 : index 443 %c4 = arith.constant 4 : index 444 %cf = arith.constant 2.0 : f32 445 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf) -> (f32) { 446 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 447 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 448 %A2_elem = arith.mulf %cf, %A1_elem { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32 449 scf.yield %A2_elem : f32 450 } { __test_pipelining_loop__ } 451 return %r : f32 452} 453 454// ----- 455 456// CHECK-LABEL: region_backedge_different_stage 457// CHECK-SAME: (%[[A:.*]]: memref<?xf32>) -> f32 { 458// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 459// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 460// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 461// CHECK-DAG: %[[CSTF:.*]] = arith.constant 2.000000e+00 : f32 462// Prologue: 463// CHECK: %[[L0:.*]] = scf.execute_region 464// CHECK-NEXT: memref.load %[[A]][%[[C0]]] : memref<?xf32> 465// CHECK: %[[ADD0:.*]] = scf.execute_region 466// CHECK-NEXT: arith.addf %[[L0]], %[[CSTF]] : f32 467// CHECK: %[[L1:.*]] = scf.execute_region 468// CHECK-NEXT: memref.load %[[A]][%[[C1]]] : memref<?xf32> 469// Kernel: 470// CHECK: %[[R:.*]]:3 = scf.for %[[IV:.*]] = %[[C0]] to %[[C2]] 471// CHECK-SAME: step %[[C1]] iter_args(%[[C:.*]] = %[[CSTF]], 472// CHECK-SAME: %[[ADDARG:.*]] = %[[ADD0]], %[[LARG:.*]] = %[[L1]]) -> (f32, f32, f32) { 473// CHECK: %[[MUL0:.*]] = arith.mulf %[[ADDARG]], %[[CSTF]] : f32 474// CHECK: %[[ADD1:.*]] = scf.execute_region 475// CHECK-NEXT: arith.addf %[[LARG]], %[[MUL0]] : f32 476// CHECK: %[[IV2:.*]] = arith.addi %[[IV]], %[[C2]] : index 477// CHECK: %[[L2:.*]] = scf.execute_region 478// CHECK-NEXT: memref.load %[[A]][%[[IV2]]] : memref<?xf32> 479// CHECK: scf.yield %[[MUL0]], %[[ADD1]], %[[L2]] : f32, f32, f32 480// CHECK-NEXT: } 481// Epilogue: 482// CHECK: %[[MUL1:.*]] = arith.mulf %[[R]]#1, %[[CSTF]] : f32 483// CHECK: %[[ADD2:.*]] = scf.execute_region 484// CHECK-NEXT: arith.addf %[[R]]#2, %[[MUL1]] : f32 485// CHECK: %[[MUL2:.*]] = arith.mulf %[[ADD2]], %[[CSTF]] : f32 486// CHECK: return %[[MUL2]] : f32 487 488func.func @region_backedge_different_stage(%A: memref<?xf32>) -> f32 { 489 %c0 = arith.constant 0 : index 490 %c1 = arith.constant 1 : index 491 %c4 = arith.constant 4 : index 492 %cf = arith.constant 2.0 : f32 493 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf) -> (f32) { 494 %A_elem = scf.execute_region -> f32 { 495 %A_elem1 = memref.load %A[%i0] : memref<?xf32> 496 scf.yield %A_elem1 : f32 497 } { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } 498 %A1_elem = scf.execute_region -> f32 { 499 %inner = arith.addf %A_elem, %arg0 : f32 500 scf.yield %inner : f32 501 } { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } 502 %A2_elem = arith.mulf %cf, %A1_elem { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32 503 scf.yield %A2_elem : f32 504 } { __test_pipelining_loop__ } 505 return %r : f32 506} 507 508 509// ----- 510 511// CHECK-LABEL: backedge_same_stage 512// CHECK-SAME: (%[[A:.*]]: memref<?xf32>) -> f32 { 513// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 514// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 515// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 516// CHECK-DAG: %[[CSTF:.*]] = arith.constant 2.000000e+00 : f32 517// Prologue: 518// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 519// Kernel: 520// CHECK-NEXT: %[[R:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 521// CHECK-SAME: step %[[C1]] iter_args(%[[C:.*]] = %[[CSTF]], 522// CHECK-SAME: %[[LARG:.*]] = %[[L0]]) -> (f32, f32) { 523// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG]], %[[C]] : f32 524// CHECK-NEXT: %[[MUL0:.*]] = arith.mulf %[[ADD0]], %[[CSTF]] : f32 525// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C1]] : index 526// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 527// CHECK-NEXT: scf.yield %[[MUL0]], %[[L2]] : f32, f32 528// CHECK-NEXT: } 529// Epilogue: 530// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[R]]#1, %[[R]]#0 : f32 531// CHECK-NEXT: %[[MUL1:.*]] = arith.mulf %[[ADD1]], %[[CSTF]] : f32 532// CHECK-NEXT: return %[[MUL1]] : f32 533func.func @backedge_same_stage(%A: memref<?xf32>) -> f32 { 534 %c0 = arith.constant 0 : index 535 %c1 = arith.constant 1 : index 536 %c4 = arith.constant 4 : index 537 %cf = arith.constant 2.0 : f32 538 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf) -> (f32) { 539 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 540 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 541 %A2_elem = arith.mulf %cf, %A1_elem { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 542 scf.yield %A2_elem : f32 543 } { __test_pipelining_loop__ } 544 return %r : f32 545} 546 547// ----- 548 549// CHECK: @pipeline_op_with_region(%[[ARG0:.+]]: memref<?xf32>, %[[ARG1:.+]]: memref<?xf32>, %[[ARG2:.+]]: memref<?xf32>, %[[CF:.*]]: f32) { 550// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : 551// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : 552// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : 553// CHECK: %[[APRO:.+]] = memref.alloc() : 554// CHECK: %[[BPRO:.+]] = memref.alloc() : 555// CHECK: %[[ASV0:.+]] = memref.subview %[[ARG0]][%[[C0]]] [8] [1] : 556// CHECK: %[[BSV0:.+]] = memref.subview %[[ARG1]][%[[C0]]] [8] [1] : 557 558// Prologue: 559// CHECK: %[[PAV0:.+]] = memref.subview %[[APRO]][%[[C0]], 0] [1, 8] [1, 1] : 560// CHECK: %[[PBV0:.+]] = memref.subview %[[BPRO]][%[[C0]], 0] [1, 8] [1, 1] : 561// CHECK: memref.copy %[[ASV0]], %[[PAV0]] : 562// CHECK: memref.copy %[[BSV0]], %[[PBV0]] : 563 564// Kernel: 565// CHECK: %[[R:.+]]:2 = scf.for %[[IV:.+]] = %[[C0]] to %[[C3]] step %[[C1]] 566// CHECK-SAME: iter_args(%[[IA:.+]] = %[[PAV0]], %[[IB:.+]] = %[[PBV0:.+]]) 567// CHECK: %[[CV:.+]] = memref.subview %[[ARG2]] 568// CHECK: linalg.generic 569// CHECK-SAME: ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) outs(%[[CV]] : 570// CHECK: %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] 571// CHECK: %[[ASV:.+]] = memref.subview %[[ARG0]][%[[NEXT]]] [8] [1] : 572// CHECK: %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] : 573// CHECK: %[[BSV:.+]] = memref.subview %[[ARG1]][%[[NEXT]]] [8] [1] : 574// CHECK: %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] : 575// CHECK: %[[BUFIDX:.+]] = affine.apply 576// CHECK: %[[APROSV:.+]] = memref.subview %[[APRO]][%[[BUFIDX]], 0] [1, 8] [1, 1] : 577// CHECK: %[[BPROSV:.+]] = memref.subview %[[BPRO]][%[[BUFIDX]], 0] [1, 8] [1, 1] : 578// CHECK: memref.copy %[[ASV]], %[[APROSV]] : 579// CHECK: memref.copy %[[BSV]], %[[BPROSV]] : 580// CHECK: scf.yield %[[APROSV]], %[[BPROSV]] : 581// CHECK: } 582// CHECK: %[[CV:.+]] = memref.subview %[[ARG2]][%[[C3]]] [8] [1] : 583// CHECK: linalg.generic 584// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) outs(%[[CV]] : 585 586 587#map = affine_map<(d0)[s0]->(d0 + s0)> 588#map1 = affine_map<(d0)->(d0)> 589#map2 = affine_map<(d0)->()> 590#linalg_attrs = { 591 indexing_maps = [ 592 #map1, 593 #map1, 594 #map2, 595 #map1 596 ], 597 iterator_types = ["parallel"], 598 __test_pipelining_stage__ = 1, 599 __test_pipelining_op_order__ = 2 600} 601func.func @pipeline_op_with_region(%A: memref<?xf32>, %B: memref<?xf32>, %result: memref<?xf32>, %cf: f32) { 602 %c0 = arith.constant 0 : index 603 %c1 = arith.constant 1 : index 604 %c4 = arith.constant 4 : index 605 %a_buf = memref.alloc() : memref<2x8xf32> 606 %b_buf = memref.alloc() : memref<2x8xf32> 607 scf.for %i0 = %c0 to %c4 step %c1 { 608 %A_view = memref.subview %A[%i0][8][1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 3 } : memref<?xf32> to memref<8xf32, #map> 609 %B_view = memref.subview %B[%i0][8][1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 4 } : memref<?xf32> to memref<8xf32, #map> 610 %buf_idx = affine.apply affine_map<(d0)->(d0 mod 2)> (%i0)[] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 5 } 611 %a_buf_view = memref.subview %a_buf[%buf_idx,0][1,8][1,1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 6 } : memref<2x8xf32> to memref<8xf32, #map> 612 %b_buf_view = memref.subview %b_buf[%buf_idx,0][1,8][1,1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 7 } : memref<2x8xf32> to memref<8xf32, #map> 613 memref.copy %A_view , %a_buf_view {__test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 8} : memref<8xf32, #map> to memref<8xf32, #map> 614 memref.copy %B_view , %b_buf_view {__test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 9} : memref<8xf32, #map> to memref<8xf32, #map> 615 %C_view = memref.subview %result[%i0][8][1] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : memref<?xf32> to memref<8xf32, #map> 616 %scalar = arith.addf %cf, %cf {__test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1} : f32 617 linalg.generic #linalg_attrs ins(%a_buf_view, %b_buf_view, %scalar : memref<8xf32, #map>, memref<8xf32, #map>, f32) 618 outs(%C_view: memref<8xf32, #map>) { 619 ^bb0(%a: f32, %b: f32, %s: f32, %c: f32): 620 %add = arith.addf %a, %b : f32 621 %accum = arith.addf %add, %c : f32 622 %accum1 = arith.addf %scalar, %accum : f32 623 %accum2 = arith.addf %s, %accum1 : f32 624 linalg.yield %accum2 : f32 625 } 626 scf.yield 627 } { __test_pipelining_loop__ } 628 return 629} 630 631// ----- 632 633// CHECK-LABEL: @backedge_mix_order 634// CHECK-SAME: (%[[A:.*]]: memref<?xf32>) -> f32 { 635// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 636// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 637// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 638// CHECK-DAG: %[[CSTF:.*]] = arith.constant 2.000000e+00 : f32 639// Prologue: 640// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 641// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 642// Kernel: 643// CHECK-NEXT: %[[R:.*]]:3 = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 644// CHECK-SAME: step %[[C1]] iter_args(%[[C:.*]] = %[[CSTF]], 645// CHECK-SAME: %[[ARG1:.*]] = %[[L0]], %[[ARG2:.*]] = %[[L1]]) -> (f32, f32, f32) { 646// CHECK-NEXT: %[[IV2:.*]] = arith.addi %[[IV]], %[[C1]] : index 647// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[IV2]]] : memref<?xf32> 648// CHECK-NEXT: %[[MUL0:.*]] = arith.mulf %[[C]], %[[ARG1]] : f32 649// CHECK-NEXT: %[[IV3:.*]] = arith.addi %[[IV]], %[[C1]] : index 650// CHECK-NEXT: %[[IV4:.*]] = arith.addi %[[IV3]], %[[C1]] : index 651// CHECK-NEXT: %[[L3:.*]] = memref.load %[[A]][%[[IV4]]] : memref<?xf32> 652// CHECK-NEXT: %[[MUL1:.*]] = arith.mulf %[[ARG2]], %[[MUL0]] : f32 653// CHECK-NEXT: scf.yield %[[MUL1]], %[[L2]], %[[L3]] : f32, f32, f32 654// CHECK-NEXT: } 655// Epilogue: 656// CHECK-NEXT: %[[MUL1:.*]] = arith.mulf %[[R]]#0, %[[R]]#1 : f32 657// CHECK-NEXT: %[[MUL2:.*]] = arith.mulf %[[R]]#2, %[[MUL1]] : f32 658// CHECK-NEXT: return %[[MUL2]] : f32 659func.func @backedge_mix_order(%A: memref<?xf32>) -> f32 { 660 %c0 = arith.constant 0 : index 661 %c1 = arith.constant 1 : index 662 %c4 = arith.constant 4 : index 663 %cf = arith.constant 2.0 : f32 664 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf) -> (f32) { 665 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 0 } : memref<?xf32> 666 %A2_elem = arith.mulf %arg0, %A_elem { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 667 %i1 = arith.addi %i0, %c1 { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : index 668 %A1_elem = memref.load %A[%i1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 3 } : memref<?xf32> 669 %A3_elem = arith.mulf %A1_elem, %A2_elem { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 4 } : f32 670 scf.yield %A3_elem : f32 671 } { __test_pipelining_loop__ } 672 return %r : f32 673} 674 675// ----- 676 677// CHECK-LABEL: @distance_1_use 678// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 679// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 680// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 681// Prologue: 682// CHECK: %[[L0:.+]] = memref.load %{{.*}}[%[[C0]]] : memref<?xf32> 683// CHECK: %[[L1:.+]] = memref.load %{{.*}}[%[[C1]]] : memref<?xf32> 684// CHECK: %[[R:.+]]:5 = scf.for {{.*}} iter_args(%[[IDX0:.+]] = %[[C2]], %[[L2:.+]] = %[[L0]], %[[L3:.+]] = %[[L1]] 685// CHECK: %[[L4:.+]] = memref.load %{{.*}}[%[[IDX0]]] : memref<?xf32> 686// CHECK: %[[IDX1:.+]] = arith.addi %[[IDX0]], %[[C1]] : index 687// CHECK: memref.store %[[L2]] 688// CHECK: scf.yield %[[IDX1]], %[[L3]], %[[L4]] 689func.func @distance_1_use(%A: memref<?xf32>, %result: memref<?xf32>) { 690 %c0 = arith.constant 0 : index 691 %c1 = arith.constant 1 : index 692 %c4 = arith.constant 4 : index 693 %cf = arith.constant 1.0 : f32 694 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%idx = %c0) -> (index) { 695 %A_elem = memref.load %A[%idx] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 0 } : memref<?xf32> 696 %idx1 = arith.addi %idx, %c1 { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 1 } : index 697 memref.store %A_elem, %result[%idx] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 2 } : memref<?xf32> 698 scf.yield %idx1 : index 699 } { __test_pipelining_loop__ } 700 return 701} 702 703// ----- 704 705// NOEPILOGUE-LABEL: stage_0_value_escape( 706func.func @stage_0_value_escape(%A: memref<?xf32>, %result: memref<?xf32>, %ub: index) { 707 %c0 = arith.constant 0 : index 708 %c1 = arith.constant 1 : index 709 %cf = arith.constant 1.0 : f32 710// NOEPILOGUE: %[[UB:[^,]+]]: index) 711// NOEPILOGUE-DAG: %[[C0:.+]] = arith.constant 0 : index 712// NOEPILOGUE-DAG: %[[C1:.+]] = arith.constant 1 : index 713// NOEPILOGUE-DAG: %[[CF:.+]] = arith.constant 1.000000e+00 714// NOEPILOGUE: %[[CND0:.+]] = arith.cmpi sgt, %[[UB]], %[[C0]] 715// NOEPILOGUE: scf.if 716// NOEPILOGUE: %[[IF:.+]] = scf.if %[[CND0]] 717// NOEPILOGUE: %[[A:.+]] = arith.addf 718// NOEPILOGUE: scf.yield %[[A]] 719// NOEPILOGUE: %[[S0:.+]] = arith.select %[[CND0]], %[[IF]], %[[CF]] 720// NOEPILOGUE: scf.for %[[IV:.+]] = {{.*}} iter_args(%[[ARG:.+]] = %[[S0]], 721// NOEPILOGUE: %[[UB_1:.+]] = arith.subi %[[UB]], %[[C1]] : index 722// NOEPILOGUE: %[[CND1:.+]] = arith.cmpi slt, %[[IV]], %[[UB_1]] : index 723// NOEPILOGUE: %[[S1:.+]] = arith.select %[[CND1]], %{{.+}}, %[[ARG]] : f32 724// NOEPILOGUE: scf.yield %[[S1]] 725 %r = scf.for %i0 = %c0 to %ub step %c1 iter_args(%arg0 = %cf) -> (f32) { 726 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 1 } : memref<?xf32> 727 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 728 memref.store %A1_elem, %result[%c0] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 2 } : memref<?xf32> 729 scf.yield %A1_elem : f32 730 } { __test_pipelining_loop__ } 731 memref.store %r, %result[%c1] : memref<?xf32> 732 return 733} 734 735// ----- 736 737// NOEPILOGUE-LABEL: dynamic_loop( 738// NOEPILOGUE-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>, %[[LB:.+]]: index, %[[UB:.+]]: index, %[[STEP:.+]]: index) { 739// NOEPILOGUE-DAG: %[[C2:.+]] = arith.constant 2 : index 740// NOEPILOGUE-DAG: %[[CSTF:.+]] = arith.constant 1.000000e+00 : f32 741// Prologue: 742// NOEPILOGUE: %[[P_I0:.+]] = arith.cmpi slt, %[[LB]], %[[UB]] : index 743// NOEPILOGUE: %[[L0:.+]] = scf.if %[[P_I0]] -> (f32) { 744// NOEPILOGUE-NEXT: memref.load %[[A]][%[[LB]]] : memref<?xf32> 745// NOEPILOGUE: %[[IV1:.+]] = arith.addi %[[LB]], %[[STEP]] : index 746// NOEPILOGUE: %[[P_I1:.+]] = arith.cmpi slt, %[[IV1]], %[[UB]] : index 747// NOEPILOGUE: %[[IV1_2:.+]] = arith.addi %[[LB]], %[[STEP]] : index 748// NOEPILOGUE: %[[V0:.+]] = scf.if %[[P_I0]] -> (f32) { 749// NOEPILOGUE-NEXT: arith.addf %[[L0]], %[[CSTF]] : f32 750// NOEPILOGUE: %[[L1:.+]] = scf.if %[[P_I1]] -> (f32) { 751// NOEPILOGUE-NEXT: memref.load %[[A]][%[[IV1_2]]] : memref<?xf32> 752// NOEPILOGUE: scf.for %[[IV2:.+]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[V1:.+]] = %[[V0]], %[[L2:.+]] = %[[L1]]) -> (f32, f32) { 753// NOEPILOGUE-DAG: %[[S2:.+]] = arith.muli %[[STEP]], %[[C2]] : index 754// NOEPILOGUE-DAG: %[[IT2:.+]] = arith.subi %[[UB]], %[[S2]] : index 755// NOEPILOGUE-DAG: %[[P_I2:.+]] = arith.cmpi slt, %[[IV2]], %[[IT2]] : index 756// NOEPILOGUE-DAG: %[[IT3:.+]] = arith.subi %[[UB]], %[[STEP]] : index 757// NOEPILOGUE-DAG: %[[P_I3:.+]] = arith.cmpi slt, %[[IV2]], %[[IT3]] : index 758// NOEPILOGUE: memref.store %[[V1]], %[[R]][%[[IV2]]] : memref<?xf32> 759// NOEPILOGUE: %[[V2:.+]] = scf.if %[[P_I3]] -> (f32) { 760// NOEPILOGUE: arith.addf %[[L2]], %[[CSTF]] : f32 761// NOEPILOGUE: %[[IT4:.+]] = arith.muli %[[STEP]], %[[C2]] : index 762// NOEPILOGUE: %[[IV3:.+]] = arith.addi %[[IV2]], %[[IT4]] : index 763// NOEPILOGUE: %[[L3:.+]] = scf.if %[[P_I2]] -> (f32) { 764// NOEPILOGUE: memref.load %[[A]][%[[IV3]]] : memref<?xf32> 765// NOEPILOGUE: scf.yield %[[V2]], %[[L3]] : f32, f32 766 767// Check for predicated epilogue for dynamic loop. 768// CHECK-LABEL: dynamic_loop( 769// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 770// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 771// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 772// CHECK-DAG: %[[CM1:.*]] = arith.constant -1 : index 773// CHECK: %[[UBM:.*]] = arith.subi %[[UB:.*]], %{{.*}} 774// CHECK: %{{.*}}:2 = scf.for %[[ARG5:.*]] = %[[LB:.*]] to %[[UBM]] step %[[STEP:.*]] iter_args(%[[ARG6:.*]] = %{{.*}}, %[[ARG7:.*]] = %{{.*}}) 775// CHECK: memref.store %[[ARG6]], %{{.*}}[%[[ARG5]]] 776// CHECK: %[[ADDF_24:.*]] = arith.addf %[[ARG7]], %{{.*}} 777// CHECK: %[[MULI_25:.*]] = arith.muli %{{.*}}, %{{.*}} 778// CHECK: %[[ADDI_26:.*]] = arith.addi %[[ARG5]], %[[MULI_25]] 779// CHECK: %[[LOAD_27:.*]] = memref.load %{{.*}}[%[[ADDI_26]]] 780// CHECK: scf.yield %[[ADDF_24]], %[[LOAD_27]] 781// CHECK: } 782// CHECK: %[[CMPI_10:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] 783// CHECK: %[[SELECT_11:.*]] = arith.select %[[CMPI_10]], %[[C1]], %[[CM1]] 784// CHECK: %[[SUBI_12:.*]] = arith.subi %[[UB]], %[[LB]] 785// CHECK: %[[ADDI_13:.*]] = arith.addi %[[SUBI_12]], %[[STEP]] 786// CHECK: %[[ADDI_14:.*]] = arith.addi %[[ADDI_13]], %[[SELECT_11]] 787// CHECK: %[[DIVSI_15:.*]] = arith.divsi %[[ADDI_14]], %[[STEP]] 788// CHECK: %[[SUBI_17:.*]] = arith.subi %[[DIVSI_15]], %[[C2]] 789// CHECK: %[[MAXSI_18:.*]] = arith.maxsi %[[SUBI_17]], %[[C0]] 790// CHECK: %[[MULI_19:.*]] = arith.muli %[[STEP]], %[[MAXSI_18]] 791// CHECK: %[[ADDI_20:.*]] = arith.addi %[[LB]], %[[MULI_19]] 792// CHECK: %[[ADDI_21:.*]] = arith.addi %[[MAXSI_18]], %[[C1]] 793// CHECK: %[[CMPI_22:.*]] = arith.cmpi sge, %[[DIVSI_15]], %[[C1]] 794// CHECK: %[[MULI_23:.*]] = arith.muli %[[STEP]], %[[ADDI_21]] 795// CHECK: %[[ADDI_24:.*]] = arith.addi %[[LB]], %[[MULI_23]] 796// CHECK: %[[CMPI_25:.*]] = arith.cmpi sge, %[[DIVSI_15]], %[[C2]] 797// CHECK: scf.if %[[CMPI_22]] { 798// CHECK: memref.store %{{.*}}#0, %{{.*}}[%[[ADDI_20]]] 799// CHECK: } else { 800// CHECK: } 801// CHECK: %[[IF_26:.*]] = scf.if %[[CMPI_25]] 802// CHECK: %[[ADDF_27:.*]] = arith.addf %{{.*}}#1, %{{.*}} 803// CHECK: scf.yield %[[ADDF_27]] 804// CHECK: } else { 805// CHECK: scf.yield %{{.*}} 806// CHECK: } 807// CHECK: scf.if %[[CMPI_25]] { 808// CHECK: memref.store %[[IF_26]], %{{.*}}[%[[ADDI_24]]] 809// CHECK: } else { 810// CHECK: } 811// CHECK: return 812func.func @dynamic_loop(%A: memref<?xf32>, %result: memref<?xf32>, %lb: index, %ub: index, %step: index) { 813 %cf = arith.constant 1.0 : f32 814 scf.for %i0 = %lb to %ub step %step { 815 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 816 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 817 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : memref<?xf32> 818 } { __test_pipelining_loop__ } 819 return 820} 821 822// ----- 823 824// NOEPILOGUE-LABEL: func.func @dynamic_loop_result 825// NOEPILOGUE: %{{.*}}:2 = scf.for %[[ARG5:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG6:.*]] = %{{.*}}, %[[ARG7:.*]] = %{{.*}}) 826// NOEPILOGUE: %[[SUBI_3:.*]] = arith.subi %{{.*}}, %{{.*}} 827// NOEPILOGUE: %[[CMPI_4:.*]] = arith.cmpi slt, %[[ARG5]], %[[SUBI_3]] 828// NOEPILOGUE: %[[ADDF_5:.*]] = arith.addf %[[ARG7]], %[[ARG6]] 829// NOEPILOGUE: %[[MULF_6:.*]] = arith.mulf %[[ADDF_5]], %{{.*}} 830// NOEPILOGUE: %[[ADDI_7:.*]] = arith.addi %[[ARG5]], %{{.*}} 831// NOEPILOGUE: %[[IF_8:.*]] = scf.if %[[CMPI_4]] 832// NOEPILOGUE: %[[LOAD_9:.*]] = memref.load %{{.*}}[%[[ADDI_7]]] 833// NOEPILOGUE: scf.yield %[[LOAD_9]] 834// NOEPILOGUE: } else { 835// NOEPILOGUE: scf.yield %{{.*}} 836// NOEPILOGUE: } 837// NOEPILOGUE: scf.yield %[[MULF_6]], %[[IF_8]] 838// NOEPILOGUE: } 839// NOEPILOGUE: memref.store %{{.*}}#0, %{{.*}}[%{{.*}}] 840 841// Check for predicated epilogue for dynamic loop. 842// CHECK-LABEL: func.func @dynamic_loop_result 843// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 844// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 845// CHECK-DAG: %[[CM1:.*]] = arith.constant -1 : index 846// CHECK-DAG: %[[CF0:.*]] = arith.constant 0.000000e+00 847// CHECK: %[[UBM:.*]] = arith.subi %[[UB:.*]], %{{.*}} 848// CHECK: %{{.*}}:2 = scf.for %[[ARG5:.*]] = %[[LB:.*]] to %[[UBM]] step %[[STEP:.*]] iter_args(%[[ARG6:.*]] = %{{.*}}, %[[ARG7:.*]] = %{{.*}}) 849// CHECK: %[[ADDF_13:.*]] = arith.addf %[[ARG7]], %[[ARG6]] 850// CHECK: %[[MULF_14:.*]] = arith.mulf %[[ADDF_13]], %{{.*}} 851// CHECK: %[[ADDI_15:.*]] = arith.addi %[[ARG5]], %{{.*}} 852// CHECK: %[[LOAD_16:.*]] = memref.load %{{.*}}[%[[ADDI_15]]] 853// CHECK: scf.yield %[[MULF_14]], %[[LOAD_16]] 854// CHECK: } 855// CHECK: %[[CMPI_4:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] 856// CHECK: %[[SELECT_5:.*]] = arith.select %[[CMPI_4]], %[[C1]], %[[CM1]] 857// CHECK: %[[SUBI_6:.*]] = arith.subi %[[UB]], %[[LB]] 858// CHECK: %[[ADDI_7:.*]] = arith.addi %[[SUBI_6]], %[[STEP]] 859// CHECK: %[[ADDI_8:.*]] = arith.addi %[[ADDI_7]], %[[SELECT_5]] 860// CHECK: %[[DIVSI_9:.*]] = arith.divsi %[[ADDI_8]], %[[STEP]] 861// CHECK: %[[CMPI_10:.*]] = arith.cmpi sge, %[[DIVSI_9]], %[[C1]] 862// CHECK: %[[IF_11:.*]] = scf.if %[[CMPI_10]] 863// CHECK: %[[ADDF_14:.*]] = arith.addf %{{.*}}#1, %{{.*}}#0 864// CHECK: scf.yield %[[ADDF_14]] 865// CHECK: } else { 866// CHECK: scf.yield %[[CF0]] 867// CHECK: } 868// CHECK: %[[IF_12:.*]] = scf.if %[[CMPI_10]] 869// CHECK: %[[MULF_14:.*]] = arith.mulf %[[IF_11]], %{{.*}} 870// CHECK: scf.yield %[[MULF_14]] 871// CHECK: } else { 872// CHECK: scf.yield %[[CF0]] 873// CHECK: } 874// CHECK: %[[SELECT_13:.*]] = arith.select %[[CMPI_10]], %[[IF_12]], %{{.*}}#0 875// CHECK: memref.store %[[SELECT_13]], %{{.*}}[%[[C0]]] 876func.func @dynamic_loop_result(%A: memref<?xf32>, %result: memref<?xf32>, %lb: index, %ub: index, %step: index) { 877 %cf0 = arith.constant 1.0 : f32 878 %cf1 = arith.constant 33.0 : f32 879 %cst = arith.constant 0 : index 880 %res:1 = scf.for %i0 = %lb to %ub step %step iter_args (%arg0 = %cf0) -> (f32) { 881 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 882 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 883 %A2_elem = arith.mulf %A1_elem, %cf1 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 884 scf.yield %A2_elem : f32 885 } { __test_pipelining_loop__ } 886 memref.store %res#0, %result[%cst] : memref<?xf32> 887 return 888} 889 890// ----- 891 892// CHECK-LABEL: yield_constant_loop( 893// CHECK-SAME: %[[A:.*]]: memref<?xf32>) -> f32 { 894// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 895// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 896// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 897// CHECK-DAG: %[[CST0:.*]] = arith.constant 0.000000e+00 : f32 898// CHECK-DAG: %[[CST2:.*]] = arith.constant 2.000000e+00 : f32 899// Prologue: 900// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 901// Kernel: 902// CHECK-NEXT: %[[L1:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 903// CHECK-SAME: step %[[C1]] iter_args(%[[ARG0:.*]] = %[[CST2]], %[[ARG1:.*]] = %[[L0]]) -> (f32, f32) { 904// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[ARG1]], %[[ARG0]] : f32 905// CHECK-NEXT: %[[MUL0:.*]] = arith.mulf %[[ADD0]], %[[CST0]] : f32 906// CHECK-NEXT: memref.store %[[MUL0]], %[[A]][%[[IV]]] : memref<?xf32> 907// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C1]] : index 908// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 909// CHECK-NEXT: scf.yield %[[CST0]], %[[L2]] : f32 910// CHECK-NEXT: } 911// Epilogue: 912// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L1]]#1, %[[CST0]] : f32 913// CHECK-NEXT: %[[MUL1:.*]] = arith.mulf %[[ADD1]], %[[CST0]] : f32 914// CHECK-NEXT: memref.store %[[MUL1]], %[[A]][%[[C3]]] : memref<?xf32> 915// CHECK-NEXT: return %[[L1]]#0 : f32 916 917func.func @yield_constant_loop(%A: memref<?xf32>) -> f32 { 918 %c0 = arith.constant 0 : index 919 %c1 = arith.constant 1 : index 920 %c4 = arith.constant 4 : index 921 %cf0 = arith.constant 0.0 : f32 922 %cf2 = arith.constant 2.0 : f32 923 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf2) -> f32 { 924 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 3 } : memref<?xf32> 925 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 926 %A2_elem = arith.mulf %cf0, %A1_elem { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 927 memref.store %A2_elem, %A[%i0] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 2 } : memref<?xf32> 928 scf.yield %cf0: f32 929 } { __test_pipelining_loop__ } 930 return %r : f32 931} 932 933// ----- 934 935func.func @invalid_schedule(%A: memref<?xf32>, %result: memref<?xf32>) { 936 %c0 = arith.constant 0 : index 937 %c1 = arith.constant 1 : index 938 %c4 = arith.constant 4 : index 939 %cf = arith.constant 1.0 : f32 940 scf.for %i0 = %c0 to %c4 step %c1 { 941 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 942 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32 943 // expected-error@+1 {{operation scheduled before its operands}} 944 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : memref<?xf32> 945 } { __test_pipelining_loop__ } 946 return 947} 948 949// ----- 950 951func.func @invalid_schedule2(%A: memref<?xf32>, %result: memref<?xf32>) { 952 %c0 = arith.constant 0 : index 953 %c1 = arith.constant 1 : index 954 %c4 = arith.constant 4 : index 955 %cf = arith.constant 1.0 : f32 956 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%idx = %c0) -> (index) { 957 // expected-error@+1 {{operation scheduled before its operands}} 958 %A_elem = memref.load %A[%idx] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 0 } : memref<?xf32> 959 %idx1 = arith.addi %idx, %c1 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : index 960 memref.store %A_elem, %result[%idx] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 2 } : memref<?xf32> 961 scf.yield %idx1 : index 962 } { __test_pipelining_loop__ } 963 return 964} 965 966// ----- 967 968func.func @invalid_schedule3(%A: memref<?xf32>, %result: memref<?xf32>, %ext: index) { 969 %c0 = arith.constant 0 : index 970 %c1 = arith.constant 1 : index 971 %c4 = arith.constant 4 : index 972 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%idx = %c0) -> (index) { 973 %cnd = arith.cmpi slt, %ext, %c4 { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 0 } : index 974 // expected-error@+1 {{operation scheduled before its operands}} 975 %idx1 = scf.if %cnd -> (index) { 976 %idxinc = arith.addi %idx, %c1 : index 977 scf.yield %idxinc : index 978 } else { 979 scf.yield %idx : index 980 } { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 1 } 981 %A_elem = memref.load %A[%idx1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 982 %idx2 = arith.addi %idx1, %c1 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 3 } : index 983 memref.store %A_elem, %result[%idx1] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 4 } : memref<?xf32> 984 scf.yield %idx2 : index 985 } { __test_pipelining_loop__ } 986 return 987} 988