1// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s 2 3// Only check the overall shape of the code and the presence of relevant 4// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level. 5 6omp.declare_reduction @add_f32 : f32 7init { 8^bb0(%arg: f32): 9 %0 = llvm.mlir.constant(0.0 : f32) : f32 10 omp.yield (%0 : f32) 11} 12combiner { 13^bb1(%arg0: f32, %arg1: f32): 14 %1 = llvm.fadd %arg0, %arg1 : f32 15 omp.yield (%1 : f32) 16} 17atomic { 18^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): 19 %2 = llvm.load %arg3 : !llvm.ptr -> f32 20 llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 21 omp.yield 22} 23 24// CHECK-LABEL: @simple_reduction 25llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) { 26 %c1 = llvm.mlir.constant(1 : i32) : i32 27 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 28 omp.parallel { 29 omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) { 30 omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { 31 %1 = llvm.mlir.constant(2.0 : f32) : f32 32 %2 = llvm.load %prv : !llvm.ptr -> f32 33 %3 = llvm.fadd %1, %2 : f32 34 llvm.store %3, %prv : f32, !llvm.ptr 35 omp.yield 36 } 37 } 38 omp.terminator 39 } 40 llvm.return 41} 42 43// Call to the outlined function. 44// CHECK: call void {{.*}} @__kmpc_fork_call 45// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] 46 47// Outlined function. 48// CHECK: define internal void @[[OUTLINED]] 49 50// Private reduction variable and its initialization. 51// CHECK: %[[PRIVATE:.+]] = alloca float 52// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]] 53 54// Call to the reduction function. 55// CHECK: call i32 @__kmpc_reduce 56// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] 57 58// Atomic reduction. 59// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 60// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]] 61 62// Non-atomic reduction: 63// CHECK: fadd float 64// CHECK: call void @__kmpc_end_reduce 65// CHECK: br label %[[FINALIZE:.+]] 66 67// CHECK: [[FINALIZE]]: 68// CHECK: call void @__kmpc_barrier 69 70// Update of the private variable using the reduction region 71// (the body block currently comes after all the other blocks). 72// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 73// CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]] 74// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]] 75 76// Reduction function. 77// CHECK: define internal void @[[REDFUNC]] 78// CHECK: fadd float 79 80// ----- 81 82omp.declare_reduction @add_f32 : f32 83init { 84^bb0(%arg: f32): 85 %0 = llvm.mlir.constant(0.0 : f32) : f32 86 omp.yield (%0 : f32) 87} 88combiner { 89^bb1(%arg0: f32, %arg1: f32): 90 %1 = llvm.fadd %arg0, %arg1 : f32 91 omp.yield (%1 : f32) 92} 93atomic { 94^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): 95 %2 = llvm.load %arg3 : !llvm.ptr -> f32 96 llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 97 omp.yield 98} 99 100// When the same reduction declaration is used several times, its regions 101// are translated several times, which shouldn't lead to value/block 102// remapping assertions. 103// CHECK-LABEL: @reuse_declaration 104llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) { 105 %c1 = llvm.mlir.constant(1 : i32) : i32 106 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 107 %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 108 omp.parallel { 109 omp.wsloop reduction(@add_f32 %0 -> %prv0, @add_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) { 110 omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { 111 %1 = llvm.mlir.constant(2.0 : f32) : f32 112 %3 = llvm.load %prv0 : !llvm.ptr -> f32 113 %4 = llvm.fadd %3, %1 : f32 114 llvm.store %4, %prv0 : f32, !llvm.ptr 115 %5 = llvm.load %prv1 : !llvm.ptr -> f32 116 %6 = llvm.fadd %5, %1 : f32 117 llvm.store %6, %prv1 : f32, !llvm.ptr 118 omp.yield 119 } 120 } 121 omp.terminator 122 } 123 llvm.return 124} 125 126// Call to the outlined function. 127// CHECK: call void {{.*}} @__kmpc_fork_call 128// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] 129 130// Outlined function. 131// CHECK: define internal void @[[OUTLINED]] 132 133// Private reduction variable and its initialization. 134// CHECK: %[[PRIVATE1:.+]] = alloca float 135// CHECK: %[[PRIVATE2:.+]] = alloca float 136// CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]] 137// CHECK: store float 0.000000e+00, ptr %[[PRIVATE2]] 138 139// Call to the reduction function. 140// CHECK: call i32 @__kmpc_reduce 141// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] 142 143// Atomic reduction. 144// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]] 145// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL1]] 146// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]] 147// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL2]] 148 149// Non-atomic reduction: 150// CHECK: fadd float 151// CHECK: fadd float 152// CHECK: call void @__kmpc_end_reduce 153// CHECK: br label %[[FINALIZE:.+]] 154 155// CHECK: [[FINALIZE]]: 156// CHECK: call void @__kmpc_barrier 157 158// Update of the private variable using the reduction region 159// (the body block currently comes after all the other blocks). 160// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]] 161// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00 162// CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]] 163// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]] 164// CHECK: %[[UPDATED2:.+]] = fadd float %[[PARTIAL2]], 2.000000e+00 165// CHECK: store float %[[UPDATED2]], ptr %[[PRIVATE2]] 166 167// Reduction function. 168// CHECK: define internal void @[[REDFUNC]] 169// CHECK: fadd float 170// CHECK: fadd float 171 172 173// ----- 174 175omp.declare_reduction @add_f32 : f32 176init { 177^bb0(%arg: f32): 178 %0 = llvm.mlir.constant(0.0 : f32) : f32 179 omp.yield (%0 : f32) 180} 181combiner { 182^bb1(%arg0: f32, %arg1: f32): 183 %1 = llvm.fadd %arg0, %arg1 : f32 184 omp.yield (%1 : f32) 185} 186atomic { 187^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): 188 %2 = llvm.load %arg3 : !llvm.ptr -> f32 189 llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 190 omp.yield 191} 192 193// It's okay not to reference the reduction variable in the body. 194// CHECK-LABEL: @missing_omp_reduction 195llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) { 196 %c1 = llvm.mlir.constant(1 : i32) : i32 197 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 198 %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 199 omp.parallel { 200 omp.wsloop reduction(@add_f32 %0 -> %prv0, @add_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) { 201 omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { 202 %1 = llvm.mlir.constant(2.0 : f32) : f32 203 %3 = llvm.load %prv0 : !llvm.ptr -> f32 204 %4 = llvm.fadd %3, %1 : f32 205 llvm.store %4, %prv0 : f32, !llvm.ptr 206 omp.yield 207 } 208 } 209 omp.terminator 210 } 211 llvm.return 212} 213 214// Call to the outlined function. 215// CHECK: call void {{.*}} @__kmpc_fork_call 216// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] 217 218// Outlined function. 219// CHECK: define internal void @[[OUTLINED]] 220 221// Private reduction variable and its initialization. 222// CHECK: %[[PRIVATE1:.+]] = alloca float 223// CHECK: %[[PRIVATE2:.+]] = alloca float 224// CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]] 225// CHECK: store float 0.000000e+00, ptr %[[PRIVATE2]] 226 227// Call to the reduction function. 228// CHECK: call i32 @__kmpc_reduce 229// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] 230 231// Atomic reduction. 232// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]] 233// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL1]] 234// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]] 235// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL2]] 236 237// Non-atomic reduction: 238// CHECK: fadd float 239// CHECK: fadd float 240// CHECK: call void @__kmpc_end_reduce 241// CHECK: br label %[[FINALIZE:.+]] 242 243// CHECK: [[FINALIZE]]: 244// CHECK: call void @__kmpc_barrier 245 246// Update of the private variable using the reduction region 247// (the body block currently comes after all the other blocks). 248// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]] 249// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00 250// CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]] 251// CHECK-NOT: %{{.*}} = load float, ptr %[[PRIVATE2]] 252// CHECK-NOT: %{{.*}} = fadd float %[[PARTIAL2]], 2.000000e+00 253 254// Reduction function. 255// CHECK: define internal void @[[REDFUNC]] 256// CHECK: fadd float 257// CHECK: fadd float 258 259// ----- 260 261omp.declare_reduction @add_f32 : f32 262init { 263^bb0(%arg: f32): 264 %0 = llvm.mlir.constant(0.0 : f32) : f32 265 omp.yield (%0 : f32) 266} 267combiner { 268^bb1(%arg0: f32, %arg1: f32): 269 %1 = llvm.fadd %arg0, %arg1 : f32 270 omp.yield (%1 : f32) 271} 272atomic { 273^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): 274 %2 = llvm.load %arg3 : !llvm.ptr -> f32 275 llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 276 omp.yield 277} 278 279// It's okay to refer to the same reduction variable more than once in the 280// body. 281// CHECK-LABEL: @double_reference 282llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) { 283 %c1 = llvm.mlir.constant(1 : i32) : i32 284 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 285 omp.parallel { 286 omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) { 287 omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { 288 %1 = llvm.mlir.constant(2.0 : f32) : f32 289 %2 = llvm.load %prv : !llvm.ptr -> f32 290 %3 = llvm.fadd %2, %1 : f32 291 llvm.store %3, %prv : f32, !llvm.ptr 292 %4 = llvm.load %prv : !llvm.ptr -> f32 293 %5 = llvm.fadd %4, %1 : f32 294 llvm.store %5, %prv : f32, !llvm.ptr 295 omp.yield 296 } 297 } 298 omp.terminator 299 } 300 llvm.return 301} 302 303// Call to the outlined function. 304// CHECK: call void {{.*}} @__kmpc_fork_call 305// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] 306 307// Outlined function. 308// CHECK: define internal void @[[OUTLINED]] 309 310// Private reduction variable and its initialization. 311// CHECK: %[[PRIVATE:.+]] = alloca float 312// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]] 313 314// Call to the reduction function. 315// CHECK: call i32 @__kmpc_reduce 316// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] 317 318// Atomic reduction. 319// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 320// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]] 321 322// Non-atomic reduction: 323// CHECK: fadd float 324// CHECK: call void @__kmpc_end_reduce 325// CHECK: br label %[[FINALIZE:.+]] 326 327// CHECK: [[FINALIZE]]: 328// CHECK: call void @__kmpc_barrier 329 330// Update of the private variable using the reduction region 331// (the body block currently comes after all the other blocks). 332// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 333// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00 334// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]] 335// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 336// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00 337// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]] 338 339// Reduction function. 340// CHECK: define internal void @[[REDFUNC]] 341// CHECK: fadd float 342 343// ----- 344 345omp.declare_reduction @add_f32 : f32 346init { 347^bb0(%arg: f32): 348 %0 = llvm.mlir.constant(0.0 : f32) : f32 349 omp.yield (%0 : f32) 350} 351combiner { 352^bb1(%arg0: f32, %arg1: f32): 353 %1 = llvm.fadd %arg0, %arg1 : f32 354 omp.yield (%1 : f32) 355} 356atomic { 357^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): 358 %2 = llvm.load %arg3 : !llvm.ptr -> f32 359 llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 360 omp.yield 361} 362 363omp.declare_reduction @mul_f32 : f32 364init { 365^bb0(%arg: f32): 366 %0 = llvm.mlir.constant(1.0 : f32) : f32 367 omp.yield (%0 : f32) 368} 369combiner { 370^bb1(%arg0: f32, %arg1: f32): 371 %1 = llvm.fmul %arg0, %arg1 : f32 372 omp.yield (%1 : f32) 373} 374 375// CHECK-LABEL: @no_atomic 376llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) { 377 %c1 = llvm.mlir.constant(1 : i32) : i32 378 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 379 %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 380 omp.parallel { 381 omp.wsloop reduction(@add_f32 %0 -> %prv0, @mul_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) { 382 omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { 383 %1 = llvm.mlir.constant(2.0 : f32) : f32 384 %3 = llvm.load %prv0 : !llvm.ptr -> f32 385 %4 = llvm.fadd %3, %1 : f32 386 llvm.store %4, %prv0 : f32, !llvm.ptr 387 %5 = llvm.load %prv1 : !llvm.ptr -> f32 388 %6 = llvm.fmul %5, %1 : f32 389 llvm.store %6, %prv1 : f32, !llvm.ptr 390 omp.yield 391 } 392 } 393 omp.terminator 394 } 395 llvm.return 396} 397 398// Call to the outlined function. 399// CHECK: call void {{.*}} @__kmpc_fork_call 400// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] 401 402// Outlined function. 403// CHECK: define internal void @[[OUTLINED]] 404 405// Private reduction variable and its initialization. 406// CHECK: %[[PRIVATE1:.+]] = alloca float 407// CHECK: %[[PRIVATE2:.+]] = alloca float 408// CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]] 409// CHECK: store float 1.000000e+00, ptr %[[PRIVATE2]] 410 411// Call to the reduction function. 412// CHECK: call i32 @__kmpc_reduce 413// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] 414 415// Atomic reduction not provided. 416// CHECK: unreachable 417 418// Non-atomic reduction: 419// CHECK: fadd float 420// CHECK: fmul float 421// CHECK: call void @__kmpc_end_reduce 422// CHECK: br label %[[FINALIZE:.+]] 423 424// CHECK: [[FINALIZE]]: 425// CHECK: call void @__kmpc_barrier 426 427// Update of the private variable using the reduction region 428// (the body block currently comes after all the other blocks). 429// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]] 430// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00 431// CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]] 432// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]] 433// CHECK: %[[UPDATED2:.+]] = fmul float %[[PARTIAL2]], 2.000000e+00 434// CHECK: store float %[[UPDATED2]], ptr %[[PRIVATE2]] 435 436// Reduction function. 437// CHECK: define internal void @[[REDFUNC]] 438// CHECK: fadd float 439// CHECK: fmul float 440 441// ----- 442 443omp.declare_reduction @add_f32 : f32 444init { 445^bb0(%arg: f32): 446 %0 = llvm.mlir.constant(0.0 : f32) : f32 447 omp.yield (%0 : f32) 448} 449combiner { 450^bb1(%arg0: f32, %arg1: f32): 451 %1 = llvm.fadd %arg0, %arg1 : f32 452 omp.yield (%1 : f32) 453} 454atomic { 455^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): 456 %2 = llvm.load %arg3 : !llvm.ptr -> f32 457 llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 458 omp.yield 459} 460 461// CHECK-LABEL: @simple_reduction_parallel 462llvm.func @simple_reduction_parallel() { 463 %c1 = llvm.mlir.constant(1 : i32) : i32 464 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 465 omp.parallel reduction(@add_f32 %0 -> %prv : !llvm.ptr) { 466 %1 = llvm.mlir.constant(2.0 : f32) : f32 467 %2 = llvm.load %prv : !llvm.ptr -> f32 468 %3 = llvm.fadd %2, %1 : f32 469 llvm.store %3, %prv : f32, !llvm.ptr 470 omp.terminator 471 } 472 llvm.return 473} 474 475// Call to the outlined function. 476// CHECK: call void {{.*}} @__kmpc_fork_call 477// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] 478 479// Outlined function. 480// CHECK: define internal void @[[OUTLINED]] 481 482// Private reduction variable and its initialization. 483// CHECK: %[[PRIVATE:.+]] = alloca float 484// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]] 485 486// Update of the private variable 487// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 488// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00 489// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]] 490 491// Call to the reduction function. 492// CHECK: call i32 @__kmpc_reduce 493// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] 494 495// Atomic reduction. 496// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 497// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]] 498 499// Non-atomic reduction: 500// CHECK: fadd float 501// CHECK: call void @__kmpc_end_reduce 502// CHECK: br label %[[FINALIZE:.+]] 503 504// CHECK: [[FINALIZE]]: 505 506// Reduction function. 507// CHECK: define internal void @[[REDFUNC]] 508// CHECK: fadd float 509 510// ----- 511 512omp.declare_reduction @add_i32 : i32 513init { 514^bb0(%arg: i32): 515 %0 = llvm.mlir.constant(0 : i32) : i32 516 omp.yield (%0 : i32) 517} 518combiner { 519^bb1(%arg0: i32, %arg1: i32): 520 %1 = llvm.add %arg0, %arg1 : i32 521 omp.yield (%1 : i32) 522} 523atomic { 524^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): 525 %2 = llvm.load %arg3 : !llvm.ptr -> i32 526 llvm.atomicrmw add %arg2, %2 monotonic : !llvm.ptr, i32 527 omp.yield 528} 529 530// CHECK-LABEL: @parallel_nested_workshare_reduction 531llvm.func @parallel_nested_workshare_reduction(%ub : i64) { 532 %c1 = llvm.mlir.constant(1 : i32) : i32 533 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 534 535 %lb = llvm.mlir.constant(1 : i64) : i64 536 %step = llvm.mlir.constant(1 : i64) : i64 537 538 omp.parallel { 539 omp.wsloop reduction(@add_i32 %0 -> %prv : !llvm.ptr) { 540 omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { 541 %ival = llvm.trunc %iv : i64 to i32 542 %lprv = llvm.load %prv : !llvm.ptr -> i32 543 %add = llvm.add %lprv, %ival : i32 544 llvm.store %add, %prv : i32, !llvm.ptr 545 omp.yield 546 } 547 } 548 omp.terminator 549 } 550 551 llvm.return 552} 553 554// Call to the outlined function. 555// CHECK: call void {{.*}} @__kmpc_fork_call 556// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] 557 558// Outlined function. 559// CHECK: define internal void @[[OUTLINED]] 560 561// Private reduction variable and its initialization. 562// CHECK: %[[PRIVATE:[0-9]+]] = alloca i32 563// CHECK: store i32 0, ptr %[[PRIVATE]] 564 565// Loop exit: 566// CHECK: call void @__kmpc_barrier 567 568// Call to the reduction function. 569// CHECK: call i32 @__kmpc_reduce 570// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] 571 572// Atomic reduction: 573// CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]] 574// CHECK: atomicrmw add ptr %{{.*}}, i32 %[[PARTIAL]] 575 576// Non-atomic reduction: 577// CHECK: add i32 578// CHECK: call void @__kmpc_end_reduce 579 580// Update of the private variable using the reduction region 581// (the body block currently comes after all the other blocks). 582// CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]] 583// CHECK: %[[UPDATED:.+]] = add i32 %[[PARTIAL]], {{.*}} 584// CHECK: store i32 %[[UPDATED]], ptr %[[PRIVATE]] 585 586// Reduction function. 587// CHECK: define internal void @[[REDFUNC]] 588// CHECK: add i32 589 590// ----- 591 592omp.declare_reduction @add_f32 : f32 593init { 594^bb0(%arg: f32): 595 %0 = llvm.mlir.constant(0.0 : f32) : f32 596 omp.yield (%0 : f32) 597} 598combiner { 599^bb1(%arg0: f32, %arg1: f32): 600 %1 = llvm.fadd %arg0, %arg1 : f32 601 omp.yield (%1 : f32) 602} 603atomic { 604^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): 605 %2 = llvm.load %arg3 : !llvm.ptr -> f32 606 llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 607 omp.yield 608} 609 610// CHECK-LABEL: @wsloop_simd_reduction 611llvm.func @wsloop_simd_reduction(%lb : i64, %ub : i64, %step : i64) { 612 %c1 = llvm.mlir.constant(1 : i32) : i32 613 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 614 omp.parallel { 615 omp.wsloop reduction(@add_f32 %0 -> %prv1 : !llvm.ptr) { 616 omp.simd reduction(@add_f32 %prv1 -> %prv2 : !llvm.ptr) { 617 omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { 618 %1 = llvm.mlir.constant(2.0 : f32) : f32 619 %2 = llvm.load %prv2 : !llvm.ptr -> f32 620 %3 = llvm.fadd %1, %2 : f32 621 llvm.store %3, %prv2 : f32, !llvm.ptr 622 omp.yield 623 } 624 } {omp.composite} 625 } {omp.composite} 626 omp.terminator 627 } 628 llvm.return 629} 630 631// Same checks as for wsloop reduction, because currently omp.simd is ignored in 632// a composite 'do/for simd' construct. 633// Call to the outlined function. 634// CHECK: call void {{.*}} @__kmpc_fork_call 635// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] 636 637// Outlined function. 638// CHECK: define internal void @[[OUTLINED]] 639 640// Private reduction variable and its initialization. 641// CHECK: %[[PRIVATE:.+]] = alloca float 642// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]] 643 644// Call to the reduction function. 645// CHECK: call i32 @__kmpc_reduce 646// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] 647 648// Atomic reduction. 649// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 650// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]] 651 652// Non-atomic reduction: 653// CHECK: fadd float 654// CHECK: call void @__kmpc_end_reduce 655// CHECK: br label %[[FINALIZE:.+]] 656 657// CHECK: [[FINALIZE]]: 658// CHECK: call void @__kmpc_barrier 659 660// Update of the private variable using the reduction region 661// (the body block currently comes after all the other blocks). 662// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] 663// CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]] 664// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]] 665 666// Reduction function. 667// CHECK: define internal void @[[REDFUNC]] 668// CHECK: fadd float 669