1 //===-- OpenMP.cpp -- Open MP directive lowering --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "flang/Lower/OpenMP.h" 14 15 #include "ClauseProcessor.h" 16 #include "Clauses.h" 17 #include "DataSharingProcessor.h" 18 #include "Decomposer.h" 19 #include "ReductionProcessor.h" 20 #include "Utils.h" 21 #include "flang/Common/OpenMP-utils.h" 22 #include "flang/Common/idioms.h" 23 #include "flang/Lower/Bridge.h" 24 #include "flang/Lower/ConvertExpr.h" 25 #include "flang/Lower/ConvertVariable.h" 26 #include "flang/Lower/DirectivesCommon.h" 27 #include "flang/Lower/StatementContext.h" 28 #include "flang/Lower/SymbolMap.h" 29 #include "flang/Optimizer/Builder/BoxValue.h" 30 #include "flang/Optimizer/Builder/FIRBuilder.h" 31 #include "flang/Optimizer/Builder/Todo.h" 32 #include "flang/Optimizer/Dialect/FIRType.h" 33 #include "flang/Optimizer/HLFIR/HLFIROps.h" 34 #include "flang/Parser/characters.h" 35 #include "flang/Parser/parse-tree.h" 36 #include "flang/Semantics/openmp-directive-sets.h" 37 #include "flang/Semantics/tools.h" 38 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" 39 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 40 #include "mlir/Transforms/RegionUtils.h" 41 #include "llvm/ADT/STLExtras.h" 42 #include "llvm/Frontend/OpenMP/OMPConstants.h" 43 44 using namespace Fortran::lower::omp; 45 using namespace Fortran::common::openmp; 46 47 //===----------------------------------------------------------------------===// 48 // Code generation helper functions 49 //===----------------------------------------------------------------------===// 50 51 static void genOMPDispatch(lower::AbstractConverter &converter, 52 lower::SymMap &symTable, 53 semantics::SemanticsContext &semaCtx, 54 lower::pft::Evaluation &eval, mlir::Location loc, 55 const ConstructQueue &queue, 56 ConstructQueue::const_iterator item); 57 58 static void processHostEvalClauses(lower::AbstractConverter &converter, 59 semantics::SemanticsContext &semaCtx, 60 lower::StatementContext &stmtCtx, 61 lower::pft::Evaluation &eval, 62 mlir::Location loc); 63 64 namespace { 65 /// Structure holding information that is needed to pass host-evaluated 66 /// information to later lowering stages. 67 class HostEvalInfo { 68 public: 69 // Allow this function access to private members in order to initialize them. 70 friend void ::processHostEvalClauses(lower::AbstractConverter &, 71 semantics::SemanticsContext &, 72 lower::StatementContext &, 73 lower::pft::Evaluation &, 74 mlir::Location); 75 76 /// Fill \c vars with values stored in \c ops. 77 /// 78 /// The order in which values are stored matches the one expected by \see 79 /// bindOperands(). 80 void collectValues(llvm::SmallVectorImpl<mlir::Value> &vars) const { 81 vars.append(ops.loopLowerBounds); 82 vars.append(ops.loopUpperBounds); 83 vars.append(ops.loopSteps); 84 85 if (ops.numTeamsLower) 86 vars.push_back(ops.numTeamsLower); 87 88 if (ops.numTeamsUpper) 89 vars.push_back(ops.numTeamsUpper); 90 91 if (ops.numThreads) 92 vars.push_back(ops.numThreads); 93 94 if (ops.threadLimit) 95 vars.push_back(ops.threadLimit); 96 } 97 98 /// Update \c ops, replacing all values with the corresponding block argument 99 /// in \c args. 100 /// 101 /// The order in which values are stored in \c args is the same as the one 102 /// used by \see collectValues(). 103 void bindOperands(llvm::ArrayRef<mlir::BlockArgument> args) { 104 assert(args.size() == 105 ops.loopLowerBounds.size() + ops.loopUpperBounds.size() + 106 ops.loopSteps.size() + (ops.numTeamsLower ? 1 : 0) + 107 (ops.numTeamsUpper ? 1 : 0) + (ops.numThreads ? 1 : 0) + 108 (ops.threadLimit ? 1 : 0) && 109 "invalid block argument list"); 110 int argIndex = 0; 111 for (size_t i = 0; i < ops.loopLowerBounds.size(); ++i) 112 ops.loopLowerBounds[i] = args[argIndex++]; 113 114 for (size_t i = 0; i < ops.loopUpperBounds.size(); ++i) 115 ops.loopUpperBounds[i] = args[argIndex++]; 116 117 for (size_t i = 0; i < ops.loopSteps.size(); ++i) 118 ops.loopSteps[i] = args[argIndex++]; 119 120 if (ops.numTeamsLower) 121 ops.numTeamsLower = args[argIndex++]; 122 123 if (ops.numTeamsUpper) 124 ops.numTeamsUpper = args[argIndex++]; 125 126 if (ops.numThreads) 127 ops.numThreads = args[argIndex++]; 128 129 if (ops.threadLimit) 130 ops.threadLimit = args[argIndex++]; 131 } 132 133 /// Update \p clauseOps and \p ivOut with the corresponding host-evaluated 134 /// values and Fortran symbols, respectively, if they have already been 135 /// initialized but not yet applied. 136 /// 137 /// \returns whether an update was performed. If not, these clauses were not 138 /// evaluated in the host device. 139 bool apply(mlir::omp::LoopNestOperands &clauseOps, 140 llvm::SmallVectorImpl<const semantics::Symbol *> &ivOut) { 141 if (iv.empty() || loopNestApplied) { 142 loopNestApplied = true; 143 return false; 144 } 145 146 loopNestApplied = true; 147 clauseOps.loopLowerBounds = ops.loopLowerBounds; 148 clauseOps.loopUpperBounds = ops.loopUpperBounds; 149 clauseOps.loopSteps = ops.loopSteps; 150 ivOut.append(iv); 151 return true; 152 } 153 154 /// Update \p clauseOps with the corresponding host-evaluated values if they 155 /// have already been initialized but not yet applied. 156 /// 157 /// \returns whether an update was performed. If not, these clauses were not 158 /// evaluated in the host device. 159 bool apply(mlir::omp::ParallelOperands &clauseOps) { 160 if (!ops.numThreads || parallelApplied) { 161 parallelApplied = true; 162 return false; 163 } 164 165 parallelApplied = true; 166 clauseOps.numThreads = ops.numThreads; 167 return true; 168 } 169 170 /// Update \p clauseOps with the corresponding host-evaluated values if they 171 /// have already been initialized. 172 /// 173 /// \returns whether an update was performed. If not, these clauses were not 174 /// evaluated in the host device. 175 bool apply(mlir::omp::TeamsOperands &clauseOps) { 176 if (!ops.numTeamsLower && !ops.numTeamsUpper && !ops.threadLimit) 177 return false; 178 179 clauseOps.numTeamsLower = ops.numTeamsLower; 180 clauseOps.numTeamsUpper = ops.numTeamsUpper; 181 clauseOps.threadLimit = ops.threadLimit; 182 return true; 183 } 184 185 private: 186 mlir::omp::HostEvaluatedOperands ops; 187 llvm::SmallVector<const semantics::Symbol *> iv; 188 bool loopNestApplied = false, parallelApplied = false; 189 }; 190 } // namespace 191 192 /// Stack of \see HostEvalInfo to represent the current nest of \c omp.target 193 /// operations being created. 194 /// 195 /// The current implementation prevents nested 'target' regions from breaking 196 /// the handling of the outer region by keeping a stack of information 197 /// structures, but it will probably still require some further work to support 198 /// reverse offloading. 199 static llvm::SmallVector<HostEvalInfo, 0> hostEvalInfo; 200 201 /// Bind symbols to their corresponding entry block arguments. 202 /// 203 /// The binding will be performed inside of the current block, which does not 204 /// necessarily have to be part of the operation for which the binding is done. 205 /// However, block arguments must be accessible. This enables controlling the 206 /// insertion point of any new MLIR operations related to the binding of 207 /// arguments of a loop wrapper operation. 208 /// 209 /// \param [in] converter - PFT to MLIR conversion interface. 210 /// \param [in] op - owner operation of the block arguments to bind. 211 /// \param [in] args - entry block arguments information for the given 212 /// operation. 213 static void bindEntryBlockArgs(lower::AbstractConverter &converter, 214 mlir::omp::BlockArgOpenMPOpInterface op, 215 const EntryBlockArgs &args) { 216 assert(op != nullptr && "invalid block argument-defining operation"); 217 assert(args.isValid() && "invalid args"); 218 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 219 220 auto bindSingleMapLike = [&converter, 221 &firOpBuilder](const semantics::Symbol &sym, 222 const mlir::BlockArgument &arg) { 223 // Clones the `bounds` placing them inside the entry block and returns 224 // them. 225 auto cloneBound = [&](mlir::Value bound) { 226 if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { 227 mlir::Operation *clonedOp = firOpBuilder.clone(*bound.getDefiningOp()); 228 return clonedOp->getResult(0); 229 } 230 TODO(converter.getCurrentLocation(), 231 "target map-like clause operand unsupported bound type"); 232 }; 233 234 auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) { 235 llvm::SmallVector<mlir::Value> clonedBounds; 236 llvm::transform(bounds, std::back_inserter(clonedBounds), 237 [&](mlir::Value bound) { return cloneBound(bound); }); 238 return clonedBounds; 239 }; 240 241 fir::ExtendedValue extVal = converter.getSymbolExtendedValue(sym); 242 auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType()); 243 if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) { 244 converter.bindSymbol(sym, arg); 245 } else { 246 extVal.match( 247 [&](const fir::BoxValue &v) { 248 converter.bindSymbol(sym, 249 fir::BoxValue(arg, cloneBounds(v.getLBounds()), 250 v.getExplicitParameters(), 251 v.getExplicitExtents())); 252 }, 253 [&](const fir::MutableBoxValue &v) { 254 converter.bindSymbol( 255 sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()), 256 v.getMutableProperties())); 257 }, 258 [&](const fir::ArrayBoxValue &v) { 259 converter.bindSymbol( 260 sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()), 261 cloneBounds(v.getLBounds()), 262 v.getSourceBox())); 263 }, 264 [&](const fir::CharArrayBoxValue &v) { 265 converter.bindSymbol( 266 sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()), 267 cloneBounds(v.getExtents()), 268 cloneBounds(v.getLBounds()))); 269 }, 270 [&](const fir::CharBoxValue &v) { 271 converter.bindSymbol( 272 sym, fir::CharBoxValue(arg, cloneBound(v.getLen()))); 273 }, 274 [&](const fir::UnboxedValue &v) { converter.bindSymbol(sym, arg); }, 275 [&](const auto &) { 276 TODO(converter.getCurrentLocation(), 277 "target map clause operand unsupported type"); 278 }); 279 } 280 }; 281 282 auto bindMapLike = 283 [&bindSingleMapLike](llvm::ArrayRef<const semantics::Symbol *> syms, 284 llvm::ArrayRef<mlir::BlockArgument> args) { 285 // Structure component symbols don't have bindings, and can only be 286 // explicitly mapped individually. If a member is captured implicitly 287 // we map the entirety of the derived type when we find its symbol. 288 llvm::SmallVector<const semantics::Symbol *> processedSyms; 289 llvm::copy_if(syms, std::back_inserter(processedSyms), 290 [](auto *sym) { return !sym->owner().IsDerivedType(); }); 291 292 for (auto [sym, arg] : llvm::zip_equal(processedSyms, args)) 293 bindSingleMapLike(*sym, arg); 294 }; 295 296 auto bindPrivateLike = [&converter, &firOpBuilder]( 297 llvm::ArrayRef<const semantics::Symbol *> syms, 298 llvm::ArrayRef<mlir::Value> vars, 299 llvm::ArrayRef<mlir::BlockArgument> args) { 300 llvm::SmallVector<const semantics::Symbol *> processedSyms; 301 for (auto *sym : syms) { 302 if (const auto *commonDet = 303 sym->detailsIf<semantics::CommonBlockDetails>()) { 304 llvm::transform(commonDet->objects(), std::back_inserter(processedSyms), 305 [&](const auto &mem) { return &*mem; }); 306 } else { 307 processedSyms.push_back(sym); 308 } 309 } 310 311 for (auto [sym, var, arg] : llvm::zip_equal(processedSyms, vars, args)) 312 converter.bindSymbol( 313 *sym, 314 hlfir::translateToExtendedValue( 315 var.getLoc(), firOpBuilder, hlfir::Entity{arg}, 316 /*contiguousHint=*/ 317 evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext())) 318 .first); 319 }; 320 321 // Process in clause name alphabetical order to match block arguments order. 322 // Do not bind host_eval variables because they cannot be used inside of the 323 // corresponding region, except for very specific cases handled separately. 324 bindPrivateLike(args.inReduction.syms, args.inReduction.vars, 325 op.getInReductionBlockArgs()); 326 bindMapLike(args.map.syms, op.getMapBlockArgs()); 327 bindPrivateLike(args.priv.syms, args.priv.vars, op.getPrivateBlockArgs()); 328 bindPrivateLike(args.reduction.syms, args.reduction.vars, 329 op.getReductionBlockArgs()); 330 bindPrivateLike(args.taskReduction.syms, args.taskReduction.vars, 331 op.getTaskReductionBlockArgs()); 332 bindMapLike(args.useDeviceAddr.syms, op.getUseDeviceAddrBlockArgs()); 333 bindMapLike(args.useDevicePtr.syms, op.getUseDevicePtrBlockArgs()); 334 } 335 336 /// Get the list of base values that the specified map-like variables point to. 337 /// 338 /// This function must be kept in sync with changes to the `createMapInfoOp` 339 /// utility function, since it must take into account the potential introduction 340 /// of levels of indirection (i.e. intermediate ops). 341 /// 342 /// \param [in] vars - list of values passed to map-like clauses, returned 343 /// by an `omp.map.info` operation. 344 /// \param [out] baseOps - populated with the `var_ptr` values of the 345 /// corresponding defining operations. 346 static void 347 extractMappedBaseValues(llvm::ArrayRef<mlir::Value> vars, 348 llvm::SmallVectorImpl<mlir::Value> &baseOps) { 349 llvm::transform(vars, std::back_inserter(baseOps), [](mlir::Value map) { 350 auto mapInfo = map.getDefiningOp<mlir::omp::MapInfoOp>(); 351 assert(mapInfo && "expected all map vars to be defined by omp.map.info"); 352 353 mlir::Value varPtr = mapInfo.getVarPtr(); 354 if (auto boxAddr = varPtr.getDefiningOp<fir::BoxAddrOp>()) 355 return boxAddr.getVal(); 356 357 return varPtr; 358 }); 359 } 360 361 /// Get the directive enumeration value corresponding to the given OpenMP 362 /// construct PFT node. 363 llvm::omp::Directive 364 extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) { 365 return common::visit( 366 common::visitors{ 367 [](const parser::OpenMPAllocatorsConstruct &c) { 368 return llvm::omp::OMPD_allocators; 369 }, 370 [](const parser::OpenMPAtomicConstruct &c) { 371 return llvm::omp::OMPD_atomic; 372 }, 373 [](const parser::OpenMPBlockConstruct &c) { 374 return std::get<parser::OmpBlockDirective>( 375 std::get<parser::OmpBeginBlockDirective>(c.t).t) 376 .v; 377 }, 378 [](const parser::OpenMPCriticalConstruct &c) { 379 return llvm::omp::OMPD_critical; 380 }, 381 [](const parser::OpenMPDeclarativeAllocate &c) { 382 return llvm::omp::OMPD_allocate; 383 }, 384 [](const parser::OpenMPDispatchConstruct &c) { 385 return llvm::omp::OMPD_dispatch; 386 }, 387 [](const parser::OpenMPExecutableAllocate &c) { 388 return llvm::omp::OMPD_allocate; 389 }, 390 [](const parser::OpenMPLoopConstruct &c) { 391 return std::get<parser::OmpLoopDirective>( 392 std::get<parser::OmpBeginLoopDirective>(c.t).t) 393 .v; 394 }, 395 [](const parser::OpenMPSectionConstruct &c) { 396 return llvm::omp::OMPD_section; 397 }, 398 [](const parser::OpenMPSectionsConstruct &c) { 399 return std::get<parser::OmpSectionsDirective>( 400 std::get<parser::OmpBeginSectionsDirective>(c.t).t) 401 .v; 402 }, 403 [](const parser::OpenMPStandaloneConstruct &c) { 404 return common::visit( 405 common::visitors{ 406 [](const parser::OpenMPSimpleStandaloneConstruct &c) { 407 return std::get<parser::OmpSimpleStandaloneDirective>(c.t) 408 .v; 409 }, 410 [](const parser::OpenMPFlushConstruct &c) { 411 return llvm::omp::OMPD_flush; 412 }, 413 [](const parser::OpenMPCancelConstruct &c) { 414 return llvm::omp::OMPD_cancel; 415 }, 416 [](const parser::OpenMPCancellationPointConstruct &c) { 417 return llvm::omp::OMPD_cancellation_point; 418 }, 419 [](const parser::OmpMetadirectiveDirective &c) { 420 return llvm::omp::OMPD_metadirective; 421 }, 422 [](const parser::OpenMPDepobjConstruct &c) { 423 return llvm::omp::OMPD_depobj; 424 }}, 425 c.u); 426 }, 427 [](const parser::OpenMPUtilityConstruct &c) { 428 return common::visit( 429 common::visitors{[](const parser::OmpErrorDirective &c) { 430 return llvm::omp::OMPD_error; 431 }, 432 [](const parser::OmpNothingDirective &c) { 433 return llvm::omp::OMPD_nothing; 434 }}, 435 c.u); 436 }}, 437 ompConstruct.u); 438 } 439 440 /// Populate the global \see hostEvalInfo after processing clauses for the given 441 /// \p eval OpenMP target construct, or nested constructs, if these must be 442 /// evaluated outside of the target region per the spec. 443 /// 444 /// In particular, this will ensure that in 'target teams' and equivalent nested 445 /// constructs, the \c thread_limit and \c num_teams clauses will be evaluated 446 /// in the host. Additionally, loop bounds, steps and the \c num_threads clause 447 /// will also be evaluated in the host if a target SPMD construct is detected 448 /// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting). 449 /// 450 /// The result, stored as a global, is intended to be used to populate the \c 451 /// host_eval operands of the associated \c omp.target operation, and also to be 452 /// checked and used by later lowering steps to populate the corresponding 453 /// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest 454 /// operations. 455 static void processHostEvalClauses(lower::AbstractConverter &converter, 456 semantics::SemanticsContext &semaCtx, 457 lower::StatementContext &stmtCtx, 458 lower::pft::Evaluation &eval, 459 mlir::Location loc) { 460 // Obtain the list of clauses of the given OpenMP block or loop construct 461 // evaluation. Other evaluations passed to this lambda keep `clauses` 462 // unchanged. 463 auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval, 464 List<Clause> &clauses) { 465 const auto *ompEval = eval.getIf<parser::OpenMPConstruct>(); 466 if (!ompEval) 467 return; 468 469 const parser::OmpClauseList *beginClauseList = nullptr; 470 const parser::OmpClauseList *endClauseList = nullptr; 471 common::visit( 472 common::visitors{ 473 [&](const parser::OpenMPBlockConstruct &ompConstruct) { 474 const auto &beginDirective = 475 std::get<parser::OmpBeginBlockDirective>(ompConstruct.t); 476 beginClauseList = 477 &std::get<parser::OmpClauseList>(beginDirective.t); 478 endClauseList = &std::get<parser::OmpClauseList>( 479 std::get<parser::OmpEndBlockDirective>(ompConstruct.t).t); 480 }, 481 [&](const parser::OpenMPLoopConstruct &ompConstruct) { 482 const auto &beginDirective = 483 std::get<parser::OmpBeginLoopDirective>(ompConstruct.t); 484 beginClauseList = 485 &std::get<parser::OmpClauseList>(beginDirective.t); 486 487 if (auto &endDirective = 488 std::get<std::optional<parser::OmpEndLoopDirective>>( 489 ompConstruct.t)) 490 endClauseList = 491 &std::get<parser::OmpClauseList>(endDirective->t); 492 }, 493 [&](const auto &) {}}, 494 ompEval->u); 495 496 assert(beginClauseList && "expected begin directive"); 497 clauses.append(makeClauses(*beginClauseList, semaCtx)); 498 499 if (endClauseList) 500 clauses.append(makeClauses(*endClauseList, semaCtx)); 501 }; 502 503 // Return the directive that is immediately nested inside of the given 504 // `parent` evaluation, if it is its only non-end-statement nested evaluation 505 // and it represents an OpenMP construct. 506 auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent) 507 -> std::optional<llvm::omp::Directive> { 508 if (!parent.hasNestedEvaluations()) 509 return std::nullopt; 510 511 llvm::omp::Directive dir; 512 auto &nested = parent.getFirstNestedEvaluation(); 513 if (const auto *ompEval = nested.getIf<parser::OpenMPConstruct>()) 514 dir = extractOmpDirective(*ompEval); 515 else 516 return std::nullopt; 517 518 for (auto &sibling : parent.getNestedEvaluations()) 519 if (&sibling != &nested && !sibling.isEndStmt()) 520 return std::nullopt; 521 522 return dir; 523 }; 524 525 // Process the given evaluation assuming it's part of a 'target' construct or 526 // captured by one, and store results in the global `hostEvalInfo`. 527 std::function<void(lower::pft::Evaluation &, const List<Clause> &)> 528 processEval; 529 processEval = [&](lower::pft::Evaluation &eval, const List<Clause> &clauses) { 530 using namespace llvm::omp; 531 ClauseProcessor cp(converter, semaCtx, clauses); 532 533 // Call `processEval` recursively with the immediately nested evaluation and 534 // its corresponding clauses if there is a single nested evaluation 535 // representing an OpenMP directive that passes the given test. 536 auto processSingleNestedIf = [&](llvm::function_ref<bool(Directive)> test) { 537 std::optional<Directive> nestedDir = extractOnlyOmpNestedDir(eval); 538 if (!nestedDir || !test(*nestedDir)) 539 return; 540 541 lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation(); 542 List<lower::omp::Clause> nestedClauses; 543 extractClauses(nestedEval, nestedClauses); 544 processEval(nestedEval, nestedClauses); 545 }; 546 547 const auto *ompEval = eval.getIf<parser::OpenMPConstruct>(); 548 if (!ompEval) 549 return; 550 551 HostEvalInfo &hostInfo = hostEvalInfo.back(); 552 553 switch (extractOmpDirective(*ompEval)) { 554 // Cases where 'teams' and target SPMD clauses might be present. 555 case OMPD_teams_distribute_parallel_do: 556 case OMPD_teams_distribute_parallel_do_simd: 557 cp.processThreadLimit(stmtCtx, hostInfo.ops); 558 [[fallthrough]]; 559 case OMPD_target_teams_distribute_parallel_do: 560 case OMPD_target_teams_distribute_parallel_do_simd: 561 cp.processNumTeams(stmtCtx, hostInfo.ops); 562 [[fallthrough]]; 563 case OMPD_distribute_parallel_do: 564 case OMPD_distribute_parallel_do_simd: 565 cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv); 566 cp.processNumThreads(stmtCtx, hostInfo.ops); 567 break; 568 569 // Cases where 'teams' clauses might be present, and target SPMD is 570 // possible by looking at nested evaluations. 571 case OMPD_teams: 572 cp.processThreadLimit(stmtCtx, hostInfo.ops); 573 [[fallthrough]]; 574 case OMPD_target_teams: 575 cp.processNumTeams(stmtCtx, hostInfo.ops); 576 processSingleNestedIf([](Directive nestedDir) { 577 return nestedDir == OMPD_distribute_parallel_do || 578 nestedDir == OMPD_distribute_parallel_do_simd; 579 }); 580 break; 581 582 // Cases where only 'teams' host-evaluated clauses might be present. 583 case OMPD_teams_distribute: 584 case OMPD_teams_distribute_simd: 585 cp.processThreadLimit(stmtCtx, hostInfo.ops); 586 [[fallthrough]]; 587 case OMPD_target_teams_distribute: 588 case OMPD_target_teams_distribute_simd: 589 cp.processNumTeams(stmtCtx, hostInfo.ops); 590 break; 591 592 // Standalone 'target' case. 593 case OMPD_target: { 594 processSingleNestedIf( 595 [](Directive nestedDir) { return topTeamsSet.test(nestedDir); }); 596 break; 597 } 598 default: 599 break; 600 } 601 }; 602 603 assert(!hostEvalInfo.empty() && "expected HOST_EVAL info structure"); 604 605 const auto *ompEval = eval.getIf<parser::OpenMPConstruct>(); 606 assert(ompEval && 607 llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) && 608 "expected TARGET construct evaluation"); 609 (void)ompEval; 610 611 // Use the whole list of clauses passed to the construct here, rather than the 612 // ones only applied to omp.target. 613 List<lower::omp::Clause> clauses; 614 extractClauses(eval, clauses); 615 processEval(eval, clauses); 616 } 617 618 static lower::pft::Evaluation * 619 getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) { 620 // Return the Evaluation of the innermost collapsed loop, or the current one 621 // if there was no COLLAPSE. 622 if (collapseValue == 0) 623 return &eval; 624 625 lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); 626 for (int i = 1; i < collapseValue; i++) { 627 // The nested evaluations should be DoConstructs (i.e. they should form 628 // a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block, 629 // EndDoStmt>. 630 assert(curEval->isA<parser::DoConstruct>()); 631 curEval = &*std::next(curEval->getNestedEvaluations().begin()); 632 } 633 return curEval; 634 } 635 636 static void genNestedEvaluations(lower::AbstractConverter &converter, 637 lower::pft::Evaluation &eval, 638 int collapseValue = 0) { 639 lower::pft::Evaluation *curEval = getCollapsedLoopEval(eval, collapseValue); 640 641 for (lower::pft::Evaluation &e : curEval->getNestedEvaluations()) 642 converter.genEval(e); 643 } 644 645 static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter, 646 fir::FirOpBuilder &firOpBuilder, 647 const semantics::Symbol &sym, 648 const lower::pft::Variable &var, 649 mlir::Location currentLocation) { 650 mlir::Type ty = converter.genType(sym); 651 std::string globalName = converter.mangleName(sym); 652 mlir::StringAttr linkage = firOpBuilder.createInternalLinkage(); 653 fir::GlobalOp global = 654 firOpBuilder.createGlobal(currentLocation, ty, globalName, linkage); 655 656 // Create default initialization for non-character scalar. 657 if (semantics::IsAllocatableOrObjectPointer(&sym)) { 658 mlir::Type baseAddrType = mlir::dyn_cast<fir::BoxType>(ty).getEleTy(); 659 lower::createGlobalInitialization( 660 firOpBuilder, global, [&](fir::FirOpBuilder &b) { 661 mlir::Value nullAddr = 662 b.createNullConstant(currentLocation, baseAddrType); 663 mlir::Value box = 664 b.create<fir::EmboxOp>(currentLocation, ty, nullAddr); 665 b.create<fir::HasValueOp>(currentLocation, box); 666 }); 667 } else { 668 lower::createGlobalInitialization( 669 firOpBuilder, global, [&](fir::FirOpBuilder &b) { 670 mlir::Value undef = b.create<fir::UndefOp>(currentLocation, ty); 671 b.create<fir::HasValueOp>(currentLocation, undef); 672 }); 673 } 674 675 return global; 676 } 677 678 // Get the extended value for \p val by extracting additional variable 679 // information from \p base. 680 static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base, 681 mlir::Value val) { 682 return base.match( 683 [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue { 684 return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {}); 685 }, 686 [&](const auto &) -> fir::ExtendedValue { 687 return fir::substBase(base, val); 688 }); 689 } 690 691 #ifndef NDEBUG 692 static bool isThreadPrivate(lower::SymbolRef sym) { 693 if (const auto *details = sym->detailsIf<semantics::CommonBlockDetails>()) { 694 for (const auto &obj : details->objects()) 695 if (!obj->test(semantics::Symbol::Flag::OmpThreadprivate)) 696 return false; 697 return true; 698 } 699 return sym->test(semantics::Symbol::Flag::OmpThreadprivate); 700 } 701 #endif 702 703 static void threadPrivatizeVars(lower::AbstractConverter &converter, 704 lower::pft::Evaluation &eval) { 705 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 706 mlir::Location currentLocation = converter.getCurrentLocation(); 707 mlir::OpBuilder::InsertionGuard guard(firOpBuilder); 708 firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); 709 710 // If the symbol corresponds to the original ThreadprivateOp, use the symbol 711 // value from that operation to create one ThreadprivateOp copy operation 712 // inside the parallel region. 713 // In some cases, however, the symbol will correspond to the original, 714 // non-threadprivate variable. This can happen, for instance, with a common 715 // block, declared in a separate module, used by a parent procedure and 716 // privatized in its child procedure. 717 auto genThreadprivateOp = [&](lower::SymbolRef sym) -> mlir::Value { 718 assert(isThreadPrivate(sym)); 719 mlir::Value symValue = converter.getSymbolAddress(sym); 720 mlir::Operation *op = symValue.getDefiningOp(); 721 if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(op)) 722 op = declOp.getMemref().getDefiningOp(); 723 if (mlir::isa<mlir::omp::ThreadprivateOp>(op)) 724 symValue = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op).getSymAddr(); 725 return firOpBuilder.create<mlir::omp::ThreadprivateOp>( 726 currentLocation, symValue.getType(), symValue); 727 }; 728 729 llvm::SetVector<const semantics::Symbol *> threadprivateSyms; 730 converter.collectSymbolSet(eval, threadprivateSyms, 731 semantics::Symbol::Flag::OmpThreadprivate, 732 /*collectSymbols=*/true, 733 /*collectHostAssociatedSymbols=*/true); 734 std::set<semantics::SourceName> threadprivateSymNames; 735 736 // For a COMMON block, the ThreadprivateOp is generated for itself instead of 737 // its members, so only bind the value of the new copied ThreadprivateOp 738 // inside the parallel region to the common block symbol only once for 739 // multiple members in one COMMON block. 740 llvm::SetVector<const semantics::Symbol *> commonSyms; 741 for (std::size_t i = 0; i < threadprivateSyms.size(); i++) { 742 const semantics::Symbol *sym = threadprivateSyms[i]; 743 mlir::Value symThreadprivateValue; 744 // The variable may be used more than once, and each reference has one 745 // symbol with the same name. Only do once for references of one variable. 746 if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end()) 747 continue; 748 threadprivateSymNames.insert(sym->name()); 749 if (const semantics::Symbol *common = 750 semantics::FindCommonBlockContaining(sym->GetUltimate())) { 751 mlir::Value commonThreadprivateValue; 752 if (commonSyms.contains(common)) { 753 commonThreadprivateValue = converter.getSymbolAddress(*common); 754 } else { 755 commonThreadprivateValue = genThreadprivateOp(*common); 756 converter.bindSymbol(*common, commonThreadprivateValue); 757 commonSyms.insert(common); 758 } 759 symThreadprivateValue = lower::genCommonBlockMember( 760 converter, currentLocation, *sym, commonThreadprivateValue); 761 } else { 762 symThreadprivateValue = genThreadprivateOp(*sym); 763 } 764 765 fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym); 766 fir::ExtendedValue symThreadprivateExv = 767 getExtendedValue(sexv, symThreadprivateValue); 768 converter.bindSymbol(*sym, symThreadprivateExv); 769 } 770 } 771 772 static mlir::Operation * 773 createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter, 774 mlir::Location loc, mlir::Value indexVal, 775 const semantics::Symbol *sym) { 776 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 777 mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); 778 firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); 779 780 mlir::Type tempTy = converter.genType(*sym); 781 782 assert(converter.isPresentShallowLookup(*sym) && 783 "Expected symbol to be in symbol table."); 784 785 firOpBuilder.restoreInsertionPoint(insPt); 786 mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); 787 mlir::Operation *storeOp = firOpBuilder.create<fir::StoreOp>( 788 loc, cvtVal, converter.getSymbolAddress(*sym)); 789 return storeOp; 790 } 791 792 // This helper function implements the functionality of "promoting" non-CPTR 793 // arguments of use_device_ptr to use_device_addr arguments (automagic 794 // conversion of use_device_ptr -> use_device_addr in these cases). The way we 795 // do so currently is through the shuffling of operands from the 796 // devicePtrOperands to deviceAddrOperands, as well as the types, locations and 797 // symbols. 798 // 799 // This effectively implements some deprecated OpenMP functionality that some 800 // legacy applications unfortunately depend on (deprecated in specification 801 // version 5.2): 802 // 803 // "If a list item in a use_device_ptr clause is not of type C_PTR, the behavior 804 // is as if the list item appeared in a use_device_addr clause. Support for 805 // such list items in a use_device_ptr clause is deprecated." 806 static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( 807 llvm::SmallVectorImpl<mlir::Value> &useDeviceAddrVars, 808 llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms, 809 llvm::SmallVectorImpl<mlir::Value> &useDevicePtrVars, 810 llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) { 811 // Iterate over our use_device_ptr list and shift all non-cptr arguments into 812 // use_device_addr. 813 auto *varIt = useDevicePtrVars.begin(); 814 auto *symIt = useDevicePtrSyms.begin(); 815 while (varIt != useDevicePtrVars.end()) { 816 if (fir::isa_builtin_cptr_type(fir::unwrapRefType(varIt->getType()))) { 817 ++varIt; 818 ++symIt; 819 continue; 820 } 821 822 useDeviceAddrVars.push_back(*varIt); 823 useDeviceAddrSyms.push_back(*symIt); 824 825 varIt = useDevicePtrVars.erase(varIt); 826 symIt = useDevicePtrSyms.erase(symIt); 827 } 828 } 829 830 /// Extract the list of function and variable symbols affected by the given 831 /// 'declare target' directive and return the intended device type for them. 832 static void getDeclareTargetInfo( 833 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 834 lower::pft::Evaluation &eval, 835 const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, 836 mlir::omp::DeclareTargetOperands &clauseOps, 837 llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) { 838 const auto &spec = 839 std::get<parser::OmpDeclareTargetSpecifier>(declareTargetConstruct.t); 840 if (const auto *objectList{parser::Unwrap<parser::OmpObjectList>(spec.u)}) { 841 ObjectList objects{makeObjects(*objectList, semaCtx)}; 842 // Case: declare target(func, var1, var2) 843 gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to, 844 symbolAndClause); 845 } else if (const auto *clauseList{ 846 parser::Unwrap<parser::OmpClauseList>(spec.u)}) { 847 List<Clause> clauses = makeClauses(*clauseList, semaCtx); 848 if (clauses.empty()) { 849 Fortran::lower::pft::FunctionLikeUnit *owningProc = 850 eval.getOwningProcedure(); 851 if (owningProc && (!owningProc->isMainProgram() || 852 owningProc->getMainProgramSymbol())) { 853 // Case: declare target, implicit capture of function 854 symbolAndClause.emplace_back(mlir::omp::DeclareTargetCaptureClause::to, 855 owningProc->getSubprogramSymbol()); 856 } 857 } 858 859 ClauseProcessor cp(converter, semaCtx, clauses); 860 cp.processDeviceType(clauseOps); 861 cp.processEnter(symbolAndClause); 862 cp.processLink(symbolAndClause); 863 cp.processTo(symbolAndClause); 864 865 cp.processTODO<clause::Indirect>(converter.getCurrentLocation(), 866 llvm::omp::Directive::OMPD_declare_target); 867 } 868 } 869 870 static void collectDeferredDeclareTargets( 871 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 872 lower::pft::Evaluation &eval, 873 const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, 874 llvm::SmallVectorImpl<lower::OMPDeferredDeclareTargetInfo> 875 &deferredDeclareTarget) { 876 mlir::omp::DeclareTargetOperands clauseOps; 877 llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause; 878 getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, 879 clauseOps, symbolAndClause); 880 // Return the device type only if at least one of the targets for the 881 // directive is a function or subroutine 882 mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); 883 884 for (const DeclareTargetCapturePair &symClause : symbolAndClause) { 885 mlir::Operation *op = mod.lookupSymbol( 886 converter.mangleName(std::get<const semantics::Symbol &>(symClause))); 887 888 if (!op) { 889 deferredDeclareTarget.push_back({std::get<0>(symClause), 890 clauseOps.deviceType, 891 std::get<1>(symClause)}); 892 } 893 } 894 } 895 896 static std::optional<mlir::omp::DeclareTargetDeviceType> 897 getDeclareTargetFunctionDevice( 898 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 899 lower::pft::Evaluation &eval, 900 const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { 901 mlir::omp::DeclareTargetOperands clauseOps; 902 llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause; 903 getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, 904 clauseOps, symbolAndClause); 905 906 // Return the device type only if at least one of the targets for the 907 // directive is a function or subroutine 908 mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); 909 for (const DeclareTargetCapturePair &symClause : symbolAndClause) { 910 mlir::Operation *op = mod.lookupSymbol( 911 converter.mangleName(std::get<const semantics::Symbol &>(symClause))); 912 913 if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op)) 914 return clauseOps.deviceType; 915 } 916 917 return std::nullopt; 918 } 919 920 /// Set up the entry block of the given `omp.loop_nest` operation, adding a 921 /// block argument for each loop induction variable and allocating and 922 /// initializing a private value to hold each of them. 923 /// 924 /// This function can also bind the symbols of any variables that should match 925 /// block arguments on parent loop wrapper operations attached to the same 926 /// loop. This allows the introduction of any necessary `hlfir.declare` 927 /// operations inside of the entry block of the `omp.loop_nest` operation and 928 /// not directly under any of the wrappers, which would invalidate them. 929 /// 930 /// \param [in] op - the loop nest operation. 931 /// \param [in] converter - PFT to MLIR conversion interface. 932 /// \param [in] loc - location. 933 /// \param [in] args - symbols of induction variables. 934 /// \param [in] wrapperArgs - list of parent loop wrappers and their associated 935 /// entry block arguments. 936 static void genLoopVars( 937 mlir::Operation *op, lower::AbstractConverter &converter, 938 mlir::Location &loc, llvm::ArrayRef<const semantics::Symbol *> args, 939 llvm::ArrayRef< 940 std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>> 941 wrapperArgs = {}) { 942 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 943 auto ®ion = op->getRegion(0); 944 945 std::size_t loopVarTypeSize = 0; 946 for (const semantics::Symbol *arg : args) 947 loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); 948 mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); 949 llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType); 950 llvm::SmallVector<mlir::Location> locs(args.size(), loc); 951 firOpBuilder.createBlock(®ion, {}, tiv, locs); 952 953 // Update nested wrapper operands if parent wrappers have mapped these values 954 // to block arguments. 955 // 956 // Binding these values earlier would take care of this, but we cannot rely on 957 // that approach because binding in between the creation of a wrapper and the 958 // next one would result in 'hlfir.declare' operations being introduced inside 959 // of a wrapper, which is illegal. 960 mlir::IRMapping mapper; 961 for (auto [argGeneratingOp, blockArgs] : wrapperArgs) { 962 for (mlir::OpOperand &operand : argGeneratingOp->getOpOperands()) 963 operand.set(mapper.lookupOrDefault(operand.get())); 964 965 for (const auto [arg, var] : llvm::zip_equal( 966 argGeneratingOp->getRegion(0).getArguments(), blockArgs.getVars())) 967 mapper.map(var, arg); 968 } 969 970 // Bind the entry block arguments of parent wrappers to the corresponding 971 // symbols. 972 for (auto [argGeneratingOp, blockArgs] : wrapperArgs) 973 bindEntryBlockArgs(converter, argGeneratingOp, blockArgs); 974 975 // The argument is not currently in memory, so make a temporary for the 976 // argument, and store it there, then bind that location to the argument. 977 mlir::Operation *storeOp = nullptr; 978 for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { 979 mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); 980 storeOp = 981 createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); 982 } 983 firOpBuilder.setInsertionPointAfter(storeOp); 984 } 985 986 static void 987 markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter, 988 mlir::omp::DeclareTargetCaptureClause captureClause, 989 mlir::omp::DeclareTargetDeviceType deviceType) { 990 // TODO: Add support for program local variables with declare target applied 991 auto declareTargetOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(op); 992 if (!declareTargetOp) 993 fir::emitFatalError( 994 converter.getCurrentLocation(), 995 "Attempt to apply declare target on unsupported operation"); 996 997 // The function or global already has a declare target applied to it, very 998 // likely through implicit capture (usage in another declare target 999 // function/subroutine). It should be marked as any if it has been assigned 1000 // both host and nohost, else we skip, as there is no change 1001 if (declareTargetOp.isDeclareTarget()) { 1002 if (declareTargetOp.getDeclareTargetDeviceType() != deviceType) 1003 declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any, 1004 captureClause); 1005 return; 1006 } 1007 1008 declareTargetOp.setDeclareTarget(deviceType, captureClause); 1009 } 1010 1011 //===----------------------------------------------------------------------===// 1012 // Op body generation helper structures and functions 1013 //===----------------------------------------------------------------------===// 1014 1015 struct OpWithBodyGenInfo { 1016 /// A type for a code-gen callback function. This takes as argument the op for 1017 /// which the code is being generated and returns the arguments of the op's 1018 /// region. 1019 using GenOMPRegionEntryCBFn = 1020 std::function<llvm::SmallVector<const semantics::Symbol *>( 1021 mlir::Operation *)>; 1022 1023 OpWithBodyGenInfo(lower::AbstractConverter &converter, 1024 lower::SymMap &symTable, 1025 semantics::SemanticsContext &semaCtx, mlir::Location loc, 1026 lower::pft::Evaluation &eval, llvm::omp::Directive dir) 1027 : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc), 1028 eval(eval), dir(dir) {} 1029 1030 OpWithBodyGenInfo &setClauses(const List<Clause> *value) { 1031 clauses = value; 1032 return *this; 1033 } 1034 1035 OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { 1036 dsp = value; 1037 return *this; 1038 } 1039 1040 OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { 1041 genRegionEntryCB = value; 1042 return *this; 1043 } 1044 1045 OpWithBodyGenInfo &setGenSkeletonOnly(bool value) { 1046 genSkeletonOnly = value; 1047 return *this; 1048 } 1049 1050 /// [inout] converter to use for the clauses. 1051 lower::AbstractConverter &converter; 1052 /// [in] Symbol table 1053 lower::SymMap &symTable; 1054 /// [in] Semantics context 1055 semantics::SemanticsContext &semaCtx; 1056 /// [in] location in source code. 1057 mlir::Location loc; 1058 /// [in] current PFT node/evaluation. 1059 lower::pft::Evaluation &eval; 1060 /// [in] leaf directive for which to generate the op body. 1061 llvm::omp::Directive dir; 1062 /// [in] list of clauses to process. 1063 const List<Clause> *clauses = nullptr; 1064 /// [in] if provided, processes the construct's data-sharing attributes. 1065 DataSharingProcessor *dsp = nullptr; 1066 /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block 1067 /// is created in the region. 1068 GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; 1069 /// [in] if set to `true`, skip generating nested evaluations and dispatching 1070 /// any further leaf constructs. 1071 bool genSkeletonOnly = false; 1072 }; 1073 1074 /// Create the body (block) for an OpenMP Operation. 1075 /// 1076 /// \param [in] op - the operation the body belongs to. 1077 /// \param [in] info - options controlling code-gen for the construction. 1078 /// \param [in] queue - work queue with nested constructs. 1079 /// \param [in] item - item in the queue to generate body for. 1080 static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, 1081 const ConstructQueue &queue, 1082 ConstructQueue::const_iterator item) { 1083 fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); 1084 1085 auto insertMarker = [](fir::FirOpBuilder &builder) { 1086 mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(), 1087 builder.getIndexType()); 1088 return undef.getDefiningOp(); 1089 }; 1090 1091 // If an argument for the region is provided then create the block with that 1092 // argument. Also update the symbol's address with the mlir argument value. 1093 // e.g. For loops the argument is the induction variable. And all further 1094 // uses of the induction variable should use this mlir value. 1095 auto regionArgs = [&]() -> llvm::SmallVector<const semantics::Symbol *> { 1096 if (info.genRegionEntryCB != nullptr) { 1097 return info.genRegionEntryCB(&op); 1098 } 1099 1100 firOpBuilder.createBlock(&op.getRegion(0)); 1101 return {}; 1102 }(); 1103 // Mark the earliest insertion point. 1104 mlir::Operation *marker = insertMarker(firOpBuilder); 1105 1106 // If it is an unstructured region, create empty blocks for all evaluations. 1107 if (lower::omp::isLastItemInQueue(item, queue) && 1108 info.eval.lowerAsUnstructured()) { 1109 lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>( 1110 firOpBuilder, info.eval.getNestedEvaluations()); 1111 } 1112 1113 // Start with privatization, so that the lowering of the nested 1114 // code will use the right symbols. 1115 bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) == 1116 llvm::omp::Association::Loop; 1117 bool privatize = info.clauses; 1118 1119 firOpBuilder.setInsertionPoint(marker); 1120 std::optional<DataSharingProcessor> tempDsp; 1121 if (privatize && !info.dsp) { 1122 tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval, 1123 Fortran::lower::omp::isLastItemInQueue(item, queue), 1124 /*useDelayedPrivatization=*/false, info.symTable); 1125 tempDsp->processStep1(); 1126 } 1127 1128 if (info.dir == llvm::omp::Directive::OMPD_parallel) { 1129 threadPrivatizeVars(info.converter, info.eval); 1130 if (info.clauses) { 1131 firOpBuilder.setInsertionPoint(marker); 1132 ClauseProcessor(info.converter, info.semaCtx, *info.clauses) 1133 .processCopyin(); 1134 } 1135 } 1136 1137 if (!info.genSkeletonOnly) { 1138 if (ConstructQueue::const_iterator next = std::next(item); 1139 next != queue.end()) { 1140 genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval, 1141 info.loc, queue, next); 1142 } else { 1143 // genFIR(Evaluation&) tries to patch up unterminated blocks, causing 1144 // a lot of complications for our approach if the terminator generation 1145 // is delayed past this point. Insert a temporary terminator here, then 1146 // delete it. 1147 firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back()); 1148 auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); 1149 firOpBuilder.setInsertionPointAfter(marker); 1150 genNestedEvaluations(info.converter, info.eval); 1151 temp->erase(); 1152 } 1153 } 1154 1155 // Get or create a unique exiting block from the given region, or 1156 // return nullptr if there is no exiting block. 1157 auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * { 1158 // Find the blocks where the OMP terminator should go. In simple cases 1159 // it is the single block in the operation's region. When the region 1160 // is more complicated, especially with unstructured control flow, there 1161 // may be multiple blocks, and some of them may have non-OMP terminators 1162 // resulting from lowering of the code contained within the operation. 1163 // All the remaining blocks are potential exit points from the op's region. 1164 // 1165 // Explicit control flow cannot exit any OpenMP region (other than via 1166 // STOP), and that is enforced by semantic checks prior to lowering. STOP 1167 // statements are lowered to a function call. 1168 1169 // Collect unterminated blocks. 1170 llvm::SmallVector<mlir::Block *> exits; 1171 for (mlir::Block &b : region) { 1172 if (b.empty() || !b.back().hasTrait<mlir::OpTrait::IsTerminator>()) 1173 exits.push_back(&b); 1174 } 1175 1176 if (exits.empty()) 1177 return nullptr; 1178 // If there already is a unique exiting block, do not create another one. 1179 // Additionally, some ops (e.g. omp.sections) require only 1 block in 1180 // its region. 1181 if (exits.size() == 1) 1182 return exits[0]; 1183 mlir::Block *exit = firOpBuilder.createBlock(®ion); 1184 for (mlir::Block *b : exits) { 1185 firOpBuilder.setInsertionPointToEnd(b); 1186 firOpBuilder.create<mlir::cf::BranchOp>(info.loc, exit); 1187 } 1188 return exit; 1189 }; 1190 1191 if (auto *exitBlock = getUniqueExit(op.getRegion(0))) { 1192 firOpBuilder.setInsertionPointToEnd(exitBlock); 1193 auto *term = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); 1194 // Only insert lastprivate code when there actually is an exit block. 1195 // Such a block may not exist if the nested code produced an infinite 1196 // loop (this may not make sense in production code, but a user could 1197 // write that and we should handle it). 1198 firOpBuilder.setInsertionPoint(term); 1199 if (privatize) { 1200 // DataSharingProcessor::processStep2() may create operations before/after 1201 // the one passed as argument. We need to treat loop wrappers and their 1202 // nested loop as a unit, so we need to pass the top level wrapper (if 1203 // present). Otherwise, these operations will be inserted within a 1204 // wrapper region. 1205 mlir::Operation *privatizationTopLevelOp = &op; 1206 if (auto loopNest = llvm::dyn_cast<mlir::omp::LoopNestOp>(op)) { 1207 llvm::SmallVector<mlir::omp::LoopWrapperInterface> wrappers; 1208 loopNest.gatherWrappers(wrappers); 1209 if (!wrappers.empty()) 1210 privatizationTopLevelOp = &*wrappers.back(); 1211 } 1212 1213 if (!info.dsp) { 1214 assert(tempDsp.has_value()); 1215 tempDsp->processStep2(privatizationTopLevelOp, isLoop); 1216 } else { 1217 if (isLoop && regionArgs.size() > 0) { 1218 for (const auto ®ionArg : regionArgs) { 1219 info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg)); 1220 } 1221 } 1222 info.dsp->processStep2(privatizationTopLevelOp, isLoop); 1223 } 1224 } 1225 } 1226 1227 firOpBuilder.setInsertionPointAfter(marker); 1228 marker->erase(); 1229 } 1230 1231 static void genBodyOfTargetDataOp( 1232 lower::AbstractConverter &converter, lower::SymMap &symTable, 1233 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 1234 mlir::omp::TargetDataOp &dataOp, const EntryBlockArgs &args, 1235 const mlir::Location ¤tLocation, const ConstructQueue &queue, 1236 ConstructQueue::const_iterator item) { 1237 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 1238 1239 genEntryBlock(firOpBuilder, args, dataOp.getRegion()); 1240 bindEntryBlockArgs(converter, dataOp, args); 1241 1242 // Insert dummy instruction to remember the insertion position. The 1243 // marker will be deleted by clean up passes since there are no uses. 1244 // Remembering the position for further insertion is important since 1245 // there are hlfir.declares inserted above while setting block arguments 1246 // and new code from the body should be inserted after that. 1247 mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>( 1248 dataOp.getLoc(), firOpBuilder.getIndexType()); 1249 1250 // Create blocks for unstructured regions. This has to be done since 1251 // blocks are initially allocated with the function as the parent region. 1252 if (eval.lowerAsUnstructured()) { 1253 lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>( 1254 firOpBuilder, eval.getNestedEvaluations()); 1255 } 1256 1257 firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation); 1258 1259 // Set the insertion point after the marker. 1260 firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); 1261 1262 if (ConstructQueue::const_iterator next = std::next(item); 1263 next != queue.end()) { 1264 genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, 1265 next); 1266 } else { 1267 genNestedEvaluations(converter, eval); 1268 } 1269 } 1270 1271 // This generates intermediate common block member accesses within a region 1272 // and then rebinds the members symbol to the intermediate accessors we have 1273 // generated so that subsequent code generation will utilise these instead. 1274 // 1275 // When the scope changes, the bindings to the intermediate accessors should 1276 // be dropped in place of the original symbol bindings. 1277 // 1278 // This is for utilisation with TargetOp. 1279 static void genIntermediateCommonBlockAccessors( 1280 Fortran::lower::AbstractConverter &converter, 1281 const mlir::Location ¤tLocation, 1282 llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs, 1283 llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms) { 1284 // Iterate over the symbol list, which will be shorter than the list of 1285 // arguments if new entry block arguments were introduced to implicitly map 1286 // outside values used by the bounds cloned into the target region. In that 1287 // case, the additional block arguments do not need processing here. 1288 for (auto [mapSym, mapArg] : llvm::zip_first(mapSyms, mapBlockArgs)) { 1289 auto *details = mapSym->detailsIf<Fortran::semantics::CommonBlockDetails>(); 1290 if (!details) 1291 continue; 1292 1293 for (auto obj : details->objects()) { 1294 auto targetCBMemberBind = Fortran::lower::genCommonBlockMember( 1295 converter, currentLocation, *obj, mapArg); 1296 fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj); 1297 fir::ExtendedValue targetCBExv = 1298 getExtendedValue(sexv, targetCBMemberBind); 1299 converter.bindSymbol(*obj, targetCBExv); 1300 } 1301 } 1302 } 1303 1304 // This functions creates a block for the body of the targetOp's region. It adds 1305 // all the symbols present in mapSymbols as block arguments to this block. 1306 static void genBodyOfTargetOp( 1307 lower::AbstractConverter &converter, lower::SymMap &symTable, 1308 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 1309 mlir::omp::TargetOp &targetOp, const EntryBlockArgs &args, 1310 const mlir::Location ¤tLocation, const ConstructQueue &queue, 1311 ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { 1312 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 1313 auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp); 1314 1315 mlir::Region ®ion = targetOp.getRegion(); 1316 mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region); 1317 bindEntryBlockArgs(converter, targetOp, args); 1318 if (!hostEvalInfo.empty()) 1319 hostEvalInfo.back().bindOperands(argIface.getHostEvalBlockArgs()); 1320 1321 // Check if cloning the bounds introduced any dependency on the outer region. 1322 // If so, then either clone them as well if they are MemoryEffectFree, or else 1323 // copy them to a new temporary and add them to the map and block_argument 1324 // lists and replace their uses with the new temporary. 1325 llvm::SetVector<mlir::Value> valuesDefinedAbove; 1326 mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); 1327 while (!valuesDefinedAbove.empty()) { 1328 for (mlir::Value val : valuesDefinedAbove) { 1329 mlir::Operation *valOp = val.getDefiningOp(); 1330 assert(valOp != nullptr); 1331 1332 // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the 1333 // indices separately, as the alternative is to eventually map the Box, 1334 // which comes with a fairly large overhead comparatively. We could be 1335 // more robust about this and check using a BackwardsSlice to see if we 1336 // run the risk of mapping a box. 1337 if (mlir::isMemoryEffectFree(valOp) && 1338 !mlir::isa<fir::BoxDimsOp>(valOp)) { 1339 mlir::Operation *clonedOp = valOp->clone(); 1340 entryBlock->push_front(clonedOp); 1341 1342 auto replace = [entryBlock](mlir::OpOperand &use) { 1343 return use.getOwner()->getBlock() == entryBlock; 1344 }; 1345 1346 valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace); 1347 valOp->replaceUsesWithIf(clonedOp, replace); 1348 } else { 1349 auto savedIP = firOpBuilder.getInsertionPoint(); 1350 firOpBuilder.setInsertionPointAfter(valOp); 1351 auto copyVal = 1352 firOpBuilder.createTemporary(val.getLoc(), val.getType()); 1353 firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); 1354 1355 fir::factory::AddrAndBoundsInfo info = 1356 fir::factory::getDataOperandBaseAddr( 1357 firOpBuilder, val, /*isOptional=*/false, val.getLoc()); 1358 llvm::SmallVector<mlir::Value> bounds = 1359 fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp, 1360 mlir::omp::MapBoundsType>( 1361 firOpBuilder, info, 1362 hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder, 1363 hlfir::Entity{val}) 1364 .first, 1365 /*dataExvIsAssumedSize=*/false, val.getLoc()); 1366 1367 std::stringstream name; 1368 firOpBuilder.setInsertionPoint(targetOp); 1369 1370 llvm::omp::OpenMPOffloadMappingFlags mapFlag = 1371 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 1372 mlir::omp::VariableCaptureKind captureKind = 1373 mlir::omp::VariableCaptureKind::ByRef; 1374 1375 mlir::Type eleType = copyVal.getType(); 1376 if (auto refType = 1377 mlir::dyn_cast<fir::ReferenceType>(copyVal.getType())) 1378 eleType = refType.getElementType(); 1379 1380 if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { 1381 captureKind = mlir::omp::VariableCaptureKind::ByCopy; 1382 } else if (!fir::isa_builtin_cptr_type(eleType)) { 1383 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; 1384 } 1385 1386 mlir::Value mapOp = createMapInfoOp( 1387 firOpBuilder, copyVal.getLoc(), copyVal, 1388 /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, 1389 /*members=*/llvm::SmallVector<mlir::Value>{}, 1390 /*membersIndex=*/mlir::ArrayAttr{}, 1391 static_cast< 1392 std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( 1393 mapFlag), 1394 captureKind, copyVal.getType()); 1395 1396 // Get the index of the first non-map argument before modifying mapVars, 1397 // then append an element to mapVars and an associated entry block 1398 // argument at that index. 1399 unsigned insertIndex = 1400 argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs(); 1401 targetOp.getMapVarsMutable().append(mapOp); 1402 mlir::Value clonedValArg = region.insertArgument( 1403 insertIndex, copyVal.getType(), copyVal.getLoc()); 1404 1405 firOpBuilder.setInsertionPointToStart(entryBlock); 1406 auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(), 1407 clonedValArg); 1408 val.replaceUsesWithIf(loadOp->getResult(0), 1409 [entryBlock](mlir::OpOperand &use) { 1410 return use.getOwner()->getBlock() == entryBlock; 1411 }); 1412 firOpBuilder.setInsertionPoint(entryBlock, savedIP); 1413 } 1414 } 1415 valuesDefinedAbove.clear(); 1416 mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); 1417 } 1418 1419 // Insert dummy instruction to remember the insertion position. The 1420 // marker will be deleted since there are not uses. 1421 // In the HLFIR flow there are hlfir.declares inserted above while 1422 // setting block arguments. 1423 mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>( 1424 targetOp.getLoc(), firOpBuilder.getIndexType()); 1425 1426 // Create blocks for unstructured regions. This has to be done since 1427 // blocks are initially allocated with the function as the parent region. 1428 if (lower::omp::isLastItemInQueue(item, queue) && 1429 eval.lowerAsUnstructured()) { 1430 lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>( 1431 firOpBuilder, eval.getNestedEvaluations()); 1432 } 1433 1434 firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation); 1435 1436 // Create the insertion point after the marker. 1437 firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); 1438 1439 // If we map a common block using it's symbol e.g. map(tofrom: /common_block/) 1440 // and accessing its members within the target region, there is a large 1441 // chance we will end up with uses external to the region accessing the common 1442 // resolve these, we do so by generating new common block member accesses 1443 // within the region, binding them to the member symbol for the scope of the 1444 // region so that subsequent code generation within the region will utilise 1445 // our new member accesses we have created. 1446 genIntermediateCommonBlockAccessors( 1447 converter, currentLocation, argIface.getMapBlockArgs(), args.map.syms); 1448 1449 if (ConstructQueue::const_iterator next = std::next(item); 1450 next != queue.end()) { 1451 genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, 1452 next); 1453 } else { 1454 genNestedEvaluations(converter, eval); 1455 } 1456 1457 dsp.processStep2(targetOp, /*isLoop=*/false); 1458 } 1459 1460 template <typename OpTy, typename... Args> 1461 static OpTy genOpWithBody(const OpWithBodyGenInfo &info, 1462 const ConstructQueue &queue, 1463 ConstructQueue::const_iterator item, Args &&...args) { 1464 auto op = info.converter.getFirOpBuilder().create<OpTy>( 1465 info.loc, std::forward<Args>(args)...); 1466 createBodyOfOp(*op, info, queue, item); 1467 return op; 1468 } 1469 1470 template <typename OpTy, typename ClauseOpsTy> 1471 static OpTy genWrapperOp(lower::AbstractConverter &converter, 1472 mlir::Location loc, const ClauseOpsTy &clauseOps, 1473 const EntryBlockArgs &args) { 1474 static_assert( 1475 OpTy::template hasTrait<mlir::omp::LoopWrapperInterface::Trait>(), 1476 "expected a loop wrapper"); 1477 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 1478 1479 // Create wrapper. 1480 auto op = firOpBuilder.create<OpTy>(loc, clauseOps); 1481 1482 // Create entry block with arguments. 1483 genEntryBlock(firOpBuilder, args, op.getRegion()); 1484 1485 return op; 1486 } 1487 1488 //===----------------------------------------------------------------------===// 1489 // Code generation functions for clauses 1490 //===----------------------------------------------------------------------===// 1491 1492 static void genCriticalDeclareClauses( 1493 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1494 const List<Clause> &clauses, mlir::Location loc, 1495 mlir::omp::CriticalDeclareOperands &clauseOps, llvm::StringRef name) { 1496 ClauseProcessor cp(converter, semaCtx, clauses); 1497 cp.processHint(clauseOps); 1498 clauseOps.symName = 1499 mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name); 1500 } 1501 1502 static void genDistributeClauses(lower::AbstractConverter &converter, 1503 semantics::SemanticsContext &semaCtx, 1504 lower::StatementContext &stmtCtx, 1505 const List<Clause> &clauses, 1506 mlir::Location loc, 1507 mlir::omp::DistributeOperands &clauseOps) { 1508 ClauseProcessor cp(converter, semaCtx, clauses); 1509 cp.processAllocate(clauseOps); 1510 cp.processDistSchedule(stmtCtx, clauseOps); 1511 cp.processOrder(clauseOps); 1512 } 1513 1514 static void genFlushClauses(lower::AbstractConverter &converter, 1515 semantics::SemanticsContext &semaCtx, 1516 const ObjectList &objects, 1517 const List<Clause> &clauses, mlir::Location loc, 1518 llvm::SmallVectorImpl<mlir::Value> &operandRange) { 1519 if (!objects.empty()) 1520 genObjectList(objects, converter, operandRange); 1521 1522 ClauseProcessor cp(converter, semaCtx, clauses); 1523 cp.processTODO<clause::AcqRel, clause::Acquire, clause::Release, 1524 clause::SeqCst>(loc, llvm::omp::OMPD_flush); 1525 } 1526 1527 static void 1528 genLoopNestClauses(lower::AbstractConverter &converter, 1529 semantics::SemanticsContext &semaCtx, 1530 lower::pft::Evaluation &eval, const List<Clause> &clauses, 1531 mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, 1532 llvm::SmallVectorImpl<const semantics::Symbol *> &iv) { 1533 ClauseProcessor cp(converter, semaCtx, clauses); 1534 1535 if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps, iv)) 1536 cp.processCollapse(loc, eval, clauseOps, iv); 1537 1538 clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); 1539 } 1540 1541 static void genLoopClauses( 1542 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1543 const List<Clause> &clauses, mlir::Location loc, 1544 mlir::omp::LoopOperands &clauseOps, 1545 llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) { 1546 ClauseProcessor cp(converter, semaCtx, clauses); 1547 cp.processBind(clauseOps); 1548 cp.processOrder(clauseOps); 1549 cp.processReduction(loc, clauseOps, reductionSyms); 1550 cp.processTODO<clause::Lastprivate>(loc, llvm::omp::Directive::OMPD_loop); 1551 } 1552 1553 static void genMaskedClauses(lower::AbstractConverter &converter, 1554 semantics::SemanticsContext &semaCtx, 1555 lower::StatementContext &stmtCtx, 1556 const List<Clause> &clauses, mlir::Location loc, 1557 mlir::omp::MaskedOperands &clauseOps) { 1558 ClauseProcessor cp(converter, semaCtx, clauses); 1559 cp.processFilter(stmtCtx, clauseOps); 1560 } 1561 1562 static void 1563 genOrderedRegionClauses(lower::AbstractConverter &converter, 1564 semantics::SemanticsContext &semaCtx, 1565 const List<Clause> &clauses, mlir::Location loc, 1566 mlir::omp::OrderedRegionOperands &clauseOps) { 1567 ClauseProcessor cp(converter, semaCtx, clauses); 1568 cp.processTODO<clause::Simd>(loc, llvm::omp::Directive::OMPD_ordered); 1569 } 1570 1571 static void genParallelClauses( 1572 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1573 lower::StatementContext &stmtCtx, const List<Clause> &clauses, 1574 mlir::Location loc, mlir::omp::ParallelOperands &clauseOps, 1575 llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) { 1576 ClauseProcessor cp(converter, semaCtx, clauses); 1577 cp.processAllocate(clauseOps); 1578 cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps); 1579 1580 if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) 1581 cp.processNumThreads(stmtCtx, clauseOps); 1582 1583 cp.processProcBind(clauseOps); 1584 cp.processReduction(loc, clauseOps, reductionSyms); 1585 } 1586 1587 static void genSectionsClauses( 1588 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1589 const List<Clause> &clauses, mlir::Location loc, 1590 mlir::omp::SectionsOperands &clauseOps, 1591 llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) { 1592 ClauseProcessor cp(converter, semaCtx, clauses); 1593 cp.processAllocate(clauseOps); 1594 cp.processNowait(clauseOps); 1595 cp.processReduction(loc, clauseOps, reductionSyms); 1596 // TODO Support delayed privatization. 1597 } 1598 1599 static void genSimdClauses( 1600 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1601 const List<Clause> &clauses, mlir::Location loc, 1602 mlir::omp::SimdOperands &clauseOps, 1603 llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) { 1604 ClauseProcessor cp(converter, semaCtx, clauses); 1605 cp.processAligned(clauseOps); 1606 cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps); 1607 cp.processNontemporal(clauseOps); 1608 cp.processOrder(clauseOps); 1609 cp.processReduction(loc, clauseOps, reductionSyms); 1610 cp.processSafelen(clauseOps); 1611 cp.processSimdlen(clauseOps); 1612 1613 cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd); 1614 } 1615 1616 static void genSingleClauses(lower::AbstractConverter &converter, 1617 semantics::SemanticsContext &semaCtx, 1618 const List<Clause> &clauses, mlir::Location loc, 1619 mlir::omp::SingleOperands &clauseOps) { 1620 ClauseProcessor cp(converter, semaCtx, clauses); 1621 cp.processAllocate(clauseOps); 1622 cp.processCopyprivate(loc, clauseOps); 1623 cp.processNowait(clauseOps); 1624 // TODO Support delayed privatization. 1625 } 1626 1627 static void genTargetClauses( 1628 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1629 lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval, 1630 const List<Clause> &clauses, mlir::Location loc, 1631 mlir::omp::TargetOperands &clauseOps, 1632 llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceAddrSyms, 1633 llvm::SmallVectorImpl<const semantics::Symbol *> &isDevicePtrSyms, 1634 llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms) { 1635 ClauseProcessor cp(converter, semaCtx, clauses); 1636 cp.processBare(clauseOps); 1637 cp.processDepend(clauseOps); 1638 cp.processDevice(stmtCtx, clauseOps); 1639 cp.processHasDeviceAddr(clauseOps, hasDeviceAddrSyms); 1640 if (!hostEvalInfo.empty()) { 1641 // Only process host_eval if compiling for the host device. 1642 processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc); 1643 hostEvalInfo.back().collectValues(clauseOps.hostEvalVars); 1644 } 1645 cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps); 1646 cp.processIsDevicePtr(clauseOps, isDevicePtrSyms); 1647 cp.processMap(loc, stmtCtx, clauseOps, &mapSyms); 1648 cp.processNowait(clauseOps); 1649 cp.processThreadLimit(stmtCtx, clauseOps); 1650 1651 cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate, 1652 clause::InReduction, clause::UsesAllocators>( 1653 loc, llvm::omp::Directive::OMPD_target); 1654 1655 // `target private(..)` is only supported in delayed privatization mode. 1656 if (!enableDelayedPrivatizationStaging) 1657 cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target); 1658 } 1659 1660 static void genTargetDataClauses( 1661 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1662 lower::StatementContext &stmtCtx, const List<Clause> &clauses, 1663 mlir::Location loc, mlir::omp::TargetDataOperands &clauseOps, 1664 llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms, 1665 llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) { 1666 ClauseProcessor cp(converter, semaCtx, clauses); 1667 cp.processDevice(stmtCtx, clauseOps); 1668 cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps); 1669 cp.processMap(loc, stmtCtx, clauseOps); 1670 cp.processUseDeviceAddr(stmtCtx, clauseOps, useDeviceAddrSyms); 1671 cp.processUseDevicePtr(stmtCtx, clauseOps, useDevicePtrSyms); 1672 1673 // This function implements the deprecated functionality of use_device_ptr 1674 // that allows users to provide non-CPTR arguments to it with the caveat 1675 // that the compiler will treat them as use_device_addr. A lot of legacy 1676 // code may still depend on this functionality, so we should support it 1677 // in some manner. We do so currently by simply shifting non-cptr operands 1678 // from the use_device_ptr lists into the use_device_addr lists. 1679 // TODO: Perhaps create a user provideable compiler option that will 1680 // re-introduce a hard-error rather than a warning in these cases. 1681 promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( 1682 clauseOps.useDeviceAddrVars, useDeviceAddrSyms, 1683 clauseOps.useDevicePtrVars, useDevicePtrSyms); 1684 } 1685 1686 static void genTargetEnterExitUpdateDataClauses( 1687 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1688 lower::StatementContext &stmtCtx, const List<Clause> &clauses, 1689 mlir::Location loc, llvm::omp::Directive directive, 1690 mlir::omp::TargetEnterExitUpdateDataOperands &clauseOps) { 1691 ClauseProcessor cp(converter, semaCtx, clauses); 1692 cp.processDepend(clauseOps); 1693 cp.processDevice(stmtCtx, clauseOps); 1694 cp.processIf(directive, clauseOps); 1695 1696 if (directive == llvm::omp::Directive::OMPD_target_update) 1697 cp.processMotionClauses(stmtCtx, clauseOps); 1698 else 1699 cp.processMap(loc, stmtCtx, clauseOps); 1700 1701 cp.processNowait(clauseOps); 1702 } 1703 1704 static void genTaskClauses(lower::AbstractConverter &converter, 1705 semantics::SemanticsContext &semaCtx, 1706 lower::StatementContext &stmtCtx, 1707 const List<Clause> &clauses, mlir::Location loc, 1708 mlir::omp::TaskOperands &clauseOps) { 1709 ClauseProcessor cp(converter, semaCtx, clauses); 1710 cp.processAllocate(clauseOps); 1711 cp.processDepend(clauseOps); 1712 cp.processFinal(stmtCtx, clauseOps); 1713 cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps); 1714 cp.processMergeable(clauseOps); 1715 cp.processPriority(stmtCtx, clauseOps); 1716 cp.processUntied(clauseOps); 1717 cp.processDetach(clauseOps); 1718 // TODO Support delayed privatization. 1719 1720 cp.processTODO<clause::Affinity, clause::InReduction>( 1721 loc, llvm::omp::Directive::OMPD_task); 1722 } 1723 1724 static void genTaskgroupClauses(lower::AbstractConverter &converter, 1725 semantics::SemanticsContext &semaCtx, 1726 const List<Clause> &clauses, mlir::Location loc, 1727 mlir::omp::TaskgroupOperands &clauseOps) { 1728 ClauseProcessor cp(converter, semaCtx, clauses); 1729 cp.processAllocate(clauseOps); 1730 cp.processTODO<clause::TaskReduction>(loc, 1731 llvm::omp::Directive::OMPD_taskgroup); 1732 } 1733 1734 static void genTaskwaitClauses(lower::AbstractConverter &converter, 1735 semantics::SemanticsContext &semaCtx, 1736 const List<Clause> &clauses, mlir::Location loc, 1737 mlir::omp::TaskwaitOperands &clauseOps) { 1738 ClauseProcessor cp(converter, semaCtx, clauses); 1739 cp.processTODO<clause::Depend, clause::Nowait>( 1740 loc, llvm::omp::Directive::OMPD_taskwait); 1741 } 1742 1743 static void genWorkshareClauses(lower::AbstractConverter &converter, 1744 semantics::SemanticsContext &semaCtx, 1745 lower::StatementContext &stmtCtx, 1746 const List<Clause> &clauses, mlir::Location loc, 1747 mlir::omp::WorkshareOperands &clauseOps) { 1748 ClauseProcessor cp(converter, semaCtx, clauses); 1749 cp.processNowait(clauseOps); 1750 } 1751 1752 static void genTeamsClauses( 1753 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1754 lower::StatementContext &stmtCtx, const List<Clause> &clauses, 1755 mlir::Location loc, mlir::omp::TeamsOperands &clauseOps, 1756 llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) { 1757 ClauseProcessor cp(converter, semaCtx, clauses); 1758 cp.processAllocate(clauseOps); 1759 cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps); 1760 1761 if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) { 1762 cp.processNumTeams(stmtCtx, clauseOps); 1763 cp.processThreadLimit(stmtCtx, clauseOps); 1764 } 1765 1766 cp.processReduction(loc, clauseOps, reductionSyms); 1767 // TODO Support delayed privatization. 1768 } 1769 1770 static void genWsloopClauses( 1771 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 1772 lower::StatementContext &stmtCtx, const List<Clause> &clauses, 1773 mlir::Location loc, mlir::omp::WsloopOperands &clauseOps, 1774 llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) { 1775 ClauseProcessor cp(converter, semaCtx, clauses); 1776 cp.processNowait(clauseOps); 1777 cp.processOrder(clauseOps); 1778 cp.processOrdered(clauseOps); 1779 cp.processReduction(loc, clauseOps, reductionSyms); 1780 cp.processSchedule(stmtCtx, clauseOps); 1781 1782 cp.processTODO<clause::Allocate, clause::Linear>( 1783 loc, llvm::omp::Directive::OMPD_do); 1784 } 1785 1786 //===----------------------------------------------------------------------===// 1787 // Code generation functions for leaf constructs 1788 //===----------------------------------------------------------------------===// 1789 1790 static mlir::omp::BarrierOp 1791 genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1792 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 1793 mlir::Location loc, const ConstructQueue &queue, 1794 ConstructQueue::const_iterator item) { 1795 return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc); 1796 } 1797 1798 static mlir::omp::CriticalOp 1799 genCriticalOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1800 semantics::SemanticsContext &semaCtx, 1801 lower::pft::Evaluation &eval, mlir::Location loc, 1802 const ConstructQueue &queue, ConstructQueue::const_iterator item, 1803 const std::optional<parser::Name> &name) { 1804 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 1805 mlir::FlatSymbolRefAttr nameAttr; 1806 1807 if (name) { 1808 std::string nameStr = name->ToString(); 1809 mlir::ModuleOp mod = firOpBuilder.getModule(); 1810 auto global = mod.lookupSymbol<mlir::omp::CriticalDeclareOp>(nameStr); 1811 if (!global) { 1812 mlir::omp::CriticalDeclareOperands clauseOps; 1813 genCriticalDeclareClauses(converter, semaCtx, item->clauses, loc, 1814 clauseOps, nameStr); 1815 1816 mlir::OpBuilder modBuilder(mod.getBodyRegion()); 1817 global = modBuilder.create<mlir::omp::CriticalDeclareOp>(loc, clauseOps); 1818 } 1819 nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), 1820 global.getSymName()); 1821 } 1822 1823 return genOpWithBody<mlir::omp::CriticalOp>( 1824 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 1825 llvm::omp::Directive::OMPD_critical), 1826 queue, item, nameAttr); 1827 } 1828 1829 static mlir::omp::FlushOp 1830 genFlushOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1831 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 1832 mlir::Location loc, const ObjectList &objects, 1833 const ConstructQueue &queue, ConstructQueue::const_iterator item) { 1834 llvm::SmallVector<mlir::Value> operandRange; 1835 genFlushClauses(converter, semaCtx, objects, item->clauses, loc, 1836 operandRange); 1837 1838 return converter.getFirOpBuilder().create<mlir::omp::FlushOp>( 1839 converter.getCurrentLocation(), operandRange); 1840 } 1841 1842 static mlir::omp::LoopNestOp genLoopNestOp( 1843 lower::AbstractConverter &converter, lower::SymMap &symTable, 1844 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 1845 mlir::Location loc, const ConstructQueue &queue, 1846 ConstructQueue::const_iterator item, mlir::omp::LoopNestOperands &clauseOps, 1847 llvm::ArrayRef<const semantics::Symbol *> iv, 1848 llvm::ArrayRef< 1849 std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>> 1850 wrapperArgs, 1851 llvm::omp::Directive directive, DataSharingProcessor &dsp) { 1852 auto ivCallback = [&](mlir::Operation *op) { 1853 genLoopVars(op, converter, loc, iv, wrapperArgs); 1854 return llvm::SmallVector<const semantics::Symbol *>(iv); 1855 }; 1856 1857 auto *nestedEval = 1858 getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); 1859 1860 return genOpWithBody<mlir::omp::LoopNestOp>( 1861 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, 1862 directive) 1863 .setClauses(&item->clauses) 1864 .setDataSharingProcessor(&dsp) 1865 .setGenRegionEntryCb(ivCallback), 1866 queue, item, clauseOps); 1867 } 1868 1869 static void genLoopOp(lower::AbstractConverter &converter, 1870 lower::SymMap &symTable, 1871 semantics::SemanticsContext &semaCtx, 1872 lower::pft::Evaluation &eval, mlir::Location loc, 1873 const ConstructQueue &queue, 1874 ConstructQueue::const_iterator item) { 1875 mlir::omp::LoopOperands loopClauseOps; 1876 llvm::SmallVector<const semantics::Symbol *> loopReductionSyms; 1877 genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, 1878 loopReductionSyms); 1879 1880 DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, 1881 /*shouldCollectPreDeterminedSymbols=*/true, 1882 /*useDelayedPrivatization=*/true, symTable); 1883 dsp.processStep1(&loopClauseOps); 1884 1885 mlir::omp::LoopNestOperands loopNestClauseOps; 1886 llvm::SmallVector<const semantics::Symbol *> iv; 1887 genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, 1888 loopNestClauseOps, iv); 1889 1890 EntryBlockArgs loopArgs; 1891 loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); 1892 loopArgs.priv.vars = loopClauseOps.privateVars; 1893 loopArgs.reduction.syms = loopReductionSyms; 1894 loopArgs.reduction.vars = loopClauseOps.reductionVars; 1895 1896 auto loopOp = 1897 genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs); 1898 genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, 1899 loopNestClauseOps, iv, {{loopOp, loopArgs}}, 1900 llvm::omp::Directive::OMPD_loop, dsp); 1901 } 1902 1903 static mlir::omp::MaskedOp 1904 genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1905 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 1906 mlir::Location loc, const ConstructQueue &queue, 1907 ConstructQueue::const_iterator item) { 1908 lower::StatementContext stmtCtx; 1909 mlir::omp::MaskedOperands clauseOps; 1910 genMaskedClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); 1911 1912 return genOpWithBody<mlir::omp::MaskedOp>( 1913 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 1914 llvm::omp::Directive::OMPD_masked), 1915 queue, item, clauseOps); 1916 } 1917 1918 static mlir::omp::MasterOp 1919 genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1920 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 1921 mlir::Location loc, const ConstructQueue &queue, 1922 ConstructQueue::const_iterator item) { 1923 return genOpWithBody<mlir::omp::MasterOp>( 1924 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 1925 llvm::omp::Directive::OMPD_master), 1926 queue, item); 1927 } 1928 1929 static mlir::omp::OrderedOp 1930 genOrderedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1931 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 1932 mlir::Location loc, const ConstructQueue &queue, 1933 ConstructQueue::const_iterator item) { 1934 TODO(loc, "OMPD_ordered"); 1935 return nullptr; 1936 } 1937 1938 static mlir::omp::OrderedRegionOp 1939 genOrderedRegionOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1940 semantics::SemanticsContext &semaCtx, 1941 lower::pft::Evaluation &eval, mlir::Location loc, 1942 const ConstructQueue &queue, 1943 ConstructQueue::const_iterator item) { 1944 mlir::omp::OrderedRegionOperands clauseOps; 1945 genOrderedRegionClauses(converter, semaCtx, item->clauses, loc, clauseOps); 1946 1947 return genOpWithBody<mlir::omp::OrderedRegionOp>( 1948 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 1949 llvm::omp::Directive::OMPD_ordered), 1950 queue, item, clauseOps); 1951 } 1952 1953 static mlir::omp::ParallelOp 1954 genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1955 semantics::SemanticsContext &semaCtx, 1956 lower::pft::Evaluation &eval, mlir::Location loc, 1957 const ConstructQueue &queue, ConstructQueue::const_iterator item, 1958 mlir::omp::ParallelOperands &clauseOps, 1959 const EntryBlockArgs &args, DataSharingProcessor *dsp, 1960 bool isComposite = false) { 1961 auto genRegionEntryCB = [&](mlir::Operation *op) { 1962 genEntryBlock(converter.getFirOpBuilder(), args, op->getRegion(0)); 1963 bindEntryBlockArgs( 1964 converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args); 1965 return llvm::to_vector(args.getSyms()); 1966 }; 1967 1968 assert((!enableDelayedPrivatization || dsp) && 1969 "expected valid DataSharingProcessor"); 1970 OpWithBodyGenInfo genInfo = 1971 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 1972 llvm::omp::Directive::OMPD_parallel) 1973 .setClauses(&item->clauses) 1974 .setGenRegionEntryCb(genRegionEntryCB) 1975 .setGenSkeletonOnly(isComposite) 1976 .setDataSharingProcessor(dsp); 1977 1978 auto parallelOp = 1979 genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps); 1980 parallelOp.setComposite(isComposite); 1981 return parallelOp; 1982 } 1983 1984 /// This breaks the normal prototype of the gen*Op functions: adding the 1985 /// sectionBlocks argument so that the enclosed section constructs can be 1986 /// lowered here with correct reduction symbol remapping. 1987 static mlir::omp::SectionsOp 1988 genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 1989 semantics::SemanticsContext &semaCtx, 1990 lower::pft::Evaluation &eval, mlir::Location loc, 1991 const ConstructQueue &queue, ConstructQueue::const_iterator item, 1992 const parser::OmpSectionBlocks §ionBlocks) { 1993 mlir::omp::SectionsOperands clauseOps; 1994 llvm::SmallVector<const semantics::Symbol *> reductionSyms; 1995 genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps, 1996 reductionSyms); 1997 1998 auto &builder = converter.getFirOpBuilder(); 1999 2000 // Insert privatizations before SECTIONS 2001 lower::SymMapScope scope(symTable); 2002 DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, 2003 lower::omp::isLastItemInQueue(item, queue), 2004 /*useDelayedPrivatization=*/false, symTable); 2005 dsp.processStep1(); 2006 2007 List<Clause> nonDsaClauses; 2008 List<const clause::Lastprivate *> lastprivates; 2009 2010 for (const Clause &clause : item->clauses) { 2011 if (clause.id == llvm::omp::Clause::OMPC_lastprivate) { 2012 auto &lastp = std::get<clause::Lastprivate>(clause.u); 2013 lastprivateModifierNotSupported(lastp, converter.getCurrentLocation()); 2014 lastprivates.push_back(&lastp); 2015 } else { 2016 switch (clause.id) { 2017 case llvm::omp::Clause::OMPC_firstprivate: 2018 case llvm::omp::Clause::OMPC_private: 2019 case llvm::omp::Clause::OMPC_shared: 2020 break; 2021 default: 2022 nonDsaClauses.push_back(clause); 2023 } 2024 } 2025 } 2026 2027 // SECTIONS construct. 2028 auto sectionsOp = builder.create<mlir::omp::SectionsOp>(loc, clauseOps); 2029 2030 // Create entry block with reduction variables as arguments. 2031 EntryBlockArgs args; 2032 // TODO: Add private syms and vars. 2033 args.reduction.syms = reductionSyms; 2034 args.reduction.vars = clauseOps.reductionVars; 2035 2036 genEntryBlock(builder, args, sectionsOp.getRegion()); 2037 mlir::Operation *terminator = 2038 lower::genOpenMPTerminator(builder, sectionsOp, loc); 2039 2040 auto genRegionEntryCB = [&](mlir::Operation *op) { 2041 genEntryBlock(builder, args, op->getRegion(0)); 2042 bindEntryBlockArgs( 2043 converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args); 2044 return llvm::to_vector(args.getSyms()); 2045 }; 2046 2047 // Generate nested SECTION constructs. 2048 // This is done here rather than in genOMP([...], OpenMPSectionConstruct ) 2049 // because we need to run genReductionVars on each omp.section so that the 2050 // reduction variable gets mapped to the private version 2051 for (auto [construct, nestedEval] : 2052 llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) { 2053 const auto *sectionConstruct = 2054 std::get_if<parser::OpenMPSectionConstruct>(&construct.u); 2055 if (!sectionConstruct) { 2056 assert(false && 2057 "unexpected construct nested inside of SECTIONS construct"); 2058 continue; 2059 } 2060 2061 ConstructQueue sectionQueue{buildConstructQueue( 2062 converter.getFirOpBuilder().getModule(), semaCtx, nestedEval, 2063 sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})}; 2064 2065 builder.setInsertionPoint(terminator); 2066 genOpWithBody<mlir::omp::SectionOp>( 2067 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, 2068 llvm::omp::Directive::OMPD_section) 2069 .setClauses(§ionQueue.begin()->clauses) 2070 .setGenRegionEntryCb(genRegionEntryCB), 2071 sectionQueue, sectionQueue.begin()); 2072 } 2073 2074 if (!lastprivates.empty()) { 2075 mlir::Region §ionsBody = sectionsOp.getRegion(); 2076 assert(sectionsBody.hasOneBlock()); 2077 mlir::Block &body = sectionsBody.front(); 2078 2079 auto lastSectionOp = llvm::find_if( 2080 llvm::reverse(body.getOperations()), [](const mlir::Operation &op) { 2081 return llvm::isa<mlir::omp::SectionOp>(op); 2082 }); 2083 assert(lastSectionOp != body.rend()); 2084 2085 for (const clause::Lastprivate *lastp : lastprivates) { 2086 builder.setInsertionPoint( 2087 lastSectionOp->getRegion(0).back().getTerminator()); 2088 mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint(); 2089 const auto &objList = std::get<ObjectList>(lastp->t); 2090 for (const Object &object : objList) { 2091 semantics::Symbol *sym = object.sym(); 2092 converter.copyHostAssociateVar(*sym, &insp, /*hostIsSource=*/false); 2093 } 2094 } 2095 } 2096 2097 // Perform DataSharingProcessor's step2 out of SECTIONS 2098 builder.setInsertionPointAfter(sectionsOp.getOperation()); 2099 dsp.processStep2(sectionsOp, false); 2100 // Emit implicit barrier to synchronize threads and avoid data 2101 // races on post-update of lastprivate variables when `nowait` 2102 // clause is present. 2103 if (clauseOps.nowait && !lastprivates.empty()) 2104 builder.create<mlir::omp::BarrierOp>(loc); 2105 2106 return sectionsOp; 2107 } 2108 2109 static void genScopeOp(lower::AbstractConverter &converter, 2110 lower::SymMap &symTable, 2111 semantics::SemanticsContext &semaCtx, 2112 lower::pft::Evaluation &eval, mlir::Location loc, 2113 const ConstructQueue &queue, 2114 ConstructQueue::const_iterator item) { 2115 TODO(loc, "Scope construct"); 2116 } 2117 2118 static mlir::omp::SingleOp 2119 genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2120 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 2121 mlir::Location loc, const ConstructQueue &queue, 2122 ConstructQueue::const_iterator item) { 2123 mlir::omp::SingleOperands clauseOps; 2124 genSingleClauses(converter, semaCtx, item->clauses, loc, clauseOps); 2125 2126 return genOpWithBody<mlir::omp::SingleOp>( 2127 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 2128 llvm::omp::Directive::OMPD_single) 2129 .setClauses(&item->clauses), 2130 queue, item, clauseOps); 2131 } 2132 2133 static mlir::omp::TargetOp 2134 genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2135 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 2136 mlir::Location loc, const ConstructQueue &queue, 2137 ConstructQueue::const_iterator item) { 2138 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 2139 lower::StatementContext stmtCtx; 2140 bool isTargetDevice = 2141 llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp()) 2142 .getIsTargetDevice(); 2143 2144 // Introduce a new host_eval information structure for this target region. 2145 if (!isTargetDevice) 2146 hostEvalInfo.emplace_back(); 2147 2148 mlir::omp::TargetOperands clauseOps; 2149 llvm::SmallVector<const semantics::Symbol *> mapSyms, isDevicePtrSyms, 2150 hasDeviceAddrSyms; 2151 genTargetClauses(converter, semaCtx, stmtCtx, eval, item->clauses, loc, 2152 clauseOps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms); 2153 2154 DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, 2155 /*shouldCollectPreDeterminedSymbols=*/ 2156 lower::omp::isLastItemInQueue(item, queue), 2157 /*useDelayedPrivatization=*/true, symTable); 2158 dsp.processStep1(&clauseOps); 2159 2160 // 5.8.1 Implicit Data-Mapping Attribute Rules 2161 // The following code follows the implicit data-mapping rules to map all the 2162 // symbols used inside the region that do not have explicit data-environment 2163 // attribute clauses (neither data-sharing; e.g. `private`, nor `map` 2164 // clauses). 2165 auto captureImplicitMap = [&](const semantics::Symbol &sym) { 2166 if (dsp.getAllSymbolsToPrivatize().contains(&sym)) 2167 return; 2168 2169 // Structure component symbols don't have bindings, and can only be 2170 // explicitly mapped individually. If a member is captured implicitly 2171 // we map the entirety of the derived type when we find its symbol. 2172 if (sym.owner().IsDerivedType()) 2173 return; 2174 2175 // if the symbol is part of an already mapped common block, do not make a 2176 // map for it. 2177 if (const Fortran::semantics::Symbol *common = 2178 Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate())) 2179 if (llvm::is_contained(mapSyms, common)) 2180 return; 2181 2182 // If we come across a symbol without a symbol address, we 2183 // return as we cannot process it, this is intended as a 2184 // catch all early exit for symbols that do not have a 2185 // corresponding extended value. Such as subroutines, 2186 // interfaces and named blocks. 2187 if (!converter.getSymbolAddress(sym)) 2188 return; 2189 2190 if (!llvm::is_contained(mapSyms, &sym)) { 2191 if (const auto *details = 2192 sym.template detailsIf<semantics::HostAssocDetails>()) 2193 converter.copySymbolBinding(details->symbol(), sym); 2194 std::stringstream name; 2195 fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); 2196 name << sym.name().ToString(); 2197 2198 fir::factory::AddrAndBoundsInfo info = 2199 Fortran::lower::getDataOperandBaseAddr( 2200 converter, firOpBuilder, sym, converter.getCurrentLocation()); 2201 llvm::SmallVector<mlir::Value> bounds = 2202 fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp, 2203 mlir::omp::MapBoundsType>( 2204 firOpBuilder, info, dataExv, 2205 semantics::IsAssumedSizeArray(sym.GetUltimate()), 2206 converter.getCurrentLocation()); 2207 2208 llvm::omp::OpenMPOffloadMappingFlags mapFlag = 2209 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 2210 mlir::omp::VariableCaptureKind captureKind = 2211 mlir::omp::VariableCaptureKind::ByRef; 2212 2213 mlir::Value baseOp = info.rawInput; 2214 mlir::Type eleType = baseOp.getType(); 2215 if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType())) 2216 eleType = refType.getElementType(); 2217 2218 // If a variable is specified in declare target link and if device 2219 // type is not specified as `nohost`, it needs to be mapped tofrom 2220 mlir::ModuleOp mod = firOpBuilder.getModule(); 2221 mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); 2222 auto declareTargetOp = 2223 llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op); 2224 if (declareTargetOp && declareTargetOp.isDeclareTarget()) { 2225 if (declareTargetOp.getDeclareTargetCaptureClause() == 2226 mlir::omp::DeclareTargetCaptureClause::link && 2227 declareTargetOp.getDeclareTargetDeviceType() != 2228 mlir::omp::DeclareTargetDeviceType::nohost) { 2229 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; 2230 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; 2231 } 2232 } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { 2233 captureKind = mlir::omp::VariableCaptureKind::ByCopy; 2234 } else if (!fir::isa_builtin_cptr_type(eleType)) { 2235 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; 2236 mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; 2237 } 2238 auto location = 2239 mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(), 2240 sym.name().ToString()), 2241 baseOp.getLoc()); 2242 mlir::Value mapOp = createMapInfoOp( 2243 firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{}, 2244 name.str(), bounds, /*members=*/{}, 2245 /*membersIndex=*/mlir::ArrayAttr{}, 2246 static_cast< 2247 std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( 2248 mapFlag), 2249 captureKind, baseOp.getType()); 2250 2251 clauseOps.mapVars.push_back(mapOp); 2252 mapSyms.push_back(&sym); 2253 } 2254 }; 2255 lower::pft::visitAllSymbols(eval, captureImplicitMap); 2256 2257 auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps); 2258 2259 llvm::SmallVector<mlir::Value> mapBaseValues; 2260 extractMappedBaseValues(clauseOps.mapVars, mapBaseValues); 2261 2262 EntryBlockArgs args; 2263 args.hostEvalVars = clauseOps.hostEvalVars; 2264 // TODO: Add in_reduction syms and vars. 2265 args.map.syms = mapSyms; 2266 args.map.vars = mapBaseValues; 2267 args.priv.syms = dsp.getDelayedPrivSymbols(); 2268 args.priv.vars = clauseOps.privateVars; 2269 2270 genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc, 2271 queue, item, dsp); 2272 2273 // Remove the host_eval information structure created for this target region. 2274 if (!isTargetDevice) 2275 hostEvalInfo.pop_back(); 2276 return targetOp; 2277 } 2278 2279 static mlir::omp::TargetDataOp 2280 genTargetDataOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2281 semantics::SemanticsContext &semaCtx, 2282 lower::pft::Evaluation &eval, mlir::Location loc, 2283 const ConstructQueue &queue, 2284 ConstructQueue::const_iterator item) { 2285 lower::StatementContext stmtCtx; 2286 mlir::omp::TargetDataOperands clauseOps; 2287 llvm::SmallVector<const semantics::Symbol *> useDeviceAddrSyms, 2288 useDevicePtrSyms; 2289 genTargetDataClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 2290 clauseOps, useDeviceAddrSyms, useDevicePtrSyms); 2291 2292 auto targetDataOp = 2293 converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc, 2294 clauseOps); 2295 2296 llvm::SmallVector<mlir::Value> useDeviceAddrBaseValues, 2297 useDevicePtrBaseValues; 2298 extractMappedBaseValues(clauseOps.useDeviceAddrVars, useDeviceAddrBaseValues); 2299 extractMappedBaseValues(clauseOps.useDevicePtrVars, useDevicePtrBaseValues); 2300 2301 EntryBlockArgs args; 2302 args.useDeviceAddr.syms = useDeviceAddrSyms; 2303 args.useDeviceAddr.vars = useDeviceAddrBaseValues; 2304 args.useDevicePtr.syms = useDevicePtrSyms; 2305 args.useDevicePtr.vars = useDevicePtrBaseValues; 2306 2307 genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp, args, 2308 loc, queue, item); 2309 return targetDataOp; 2310 } 2311 2312 template <typename OpTy> 2313 static OpTy genTargetEnterExitUpdateDataOp( 2314 lower::AbstractConverter &converter, lower::SymMap &symTable, 2315 semantics::SemanticsContext &semaCtx, mlir::Location loc, 2316 const ConstructQueue &queue, ConstructQueue::const_iterator item) { 2317 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 2318 lower::StatementContext stmtCtx; 2319 2320 // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. 2321 [[maybe_unused]] llvm::omp::Directive directive; 2322 if constexpr (std::is_same_v<OpTy, mlir::omp::TargetEnterDataOp>) { 2323 directive = llvm::omp::Directive::OMPD_target_enter_data; 2324 } else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetExitDataOp>) { 2325 directive = llvm::omp::Directive::OMPD_target_exit_data; 2326 } else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetUpdateOp>) { 2327 directive = llvm::omp::Directive::OMPD_target_update; 2328 } else { 2329 llvm_unreachable("Unexpected TARGET DATA construct"); 2330 } 2331 2332 mlir::omp::TargetEnterExitUpdateDataOperands clauseOps; 2333 genTargetEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx, 2334 item->clauses, loc, directive, clauseOps); 2335 2336 return firOpBuilder.create<OpTy>(loc, clauseOps); 2337 } 2338 2339 static mlir::omp::TaskOp 2340 genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2341 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 2342 mlir::Location loc, const ConstructQueue &queue, 2343 ConstructQueue::const_iterator item) { 2344 lower::StatementContext stmtCtx; 2345 mlir::omp::TaskOperands clauseOps; 2346 genTaskClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); 2347 2348 if (!enableDelayedPrivatization) 2349 return genOpWithBody<mlir::omp::TaskOp>( 2350 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 2351 llvm::omp::Directive::OMPD_task) 2352 .setClauses(&item->clauses), 2353 queue, item, clauseOps); 2354 2355 DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, 2356 lower::omp::isLastItemInQueue(item, queue), 2357 /*useDelayedPrivatization=*/true, symTable); 2358 dsp.processStep1(&clauseOps); 2359 2360 EntryBlockArgs taskArgs; 2361 taskArgs.priv.syms = dsp.getDelayedPrivSymbols(); 2362 taskArgs.priv.vars = clauseOps.privateVars; 2363 2364 auto genRegionEntryCB = [&](mlir::Operation *op) { 2365 genEntryBlock(converter.getFirOpBuilder(), taskArgs, op->getRegion(0)); 2366 bindEntryBlockArgs(converter, 2367 llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), 2368 taskArgs); 2369 return llvm::to_vector(taskArgs.priv.syms); 2370 }; 2371 2372 return genOpWithBody<mlir::omp::TaskOp>( 2373 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 2374 llvm::omp::Directive::OMPD_task) 2375 .setClauses(&item->clauses) 2376 .setDataSharingProcessor(&dsp) 2377 .setGenRegionEntryCb(genRegionEntryCB), 2378 queue, item, clauseOps); 2379 } 2380 2381 static mlir::omp::TaskgroupOp 2382 genTaskgroupOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2383 semantics::SemanticsContext &semaCtx, 2384 lower::pft::Evaluation &eval, mlir::Location loc, 2385 const ConstructQueue &queue, 2386 ConstructQueue::const_iterator item) { 2387 mlir::omp::TaskgroupOperands clauseOps; 2388 genTaskgroupClauses(converter, semaCtx, item->clauses, loc, clauseOps); 2389 2390 return genOpWithBody<mlir::omp::TaskgroupOp>( 2391 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 2392 llvm::omp::Directive::OMPD_taskgroup) 2393 .setClauses(&item->clauses), 2394 queue, item, clauseOps); 2395 } 2396 2397 static mlir::omp::TaskwaitOp 2398 genTaskwaitOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2399 semantics::SemanticsContext &semaCtx, 2400 lower::pft::Evaluation &eval, mlir::Location loc, 2401 const ConstructQueue &queue, 2402 ConstructQueue::const_iterator item) { 2403 mlir::omp::TaskwaitOperands clauseOps; 2404 genTaskwaitClauses(converter, semaCtx, item->clauses, loc, clauseOps); 2405 return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc, 2406 clauseOps); 2407 } 2408 2409 static mlir::omp::TaskyieldOp 2410 genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2411 semantics::SemanticsContext &semaCtx, 2412 lower::pft::Evaluation &eval, mlir::Location loc, 2413 const ConstructQueue &queue, 2414 ConstructQueue::const_iterator item) { 2415 return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc); 2416 } 2417 2418 static mlir::omp::WorkshareOp 2419 genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2420 semantics::SemanticsContext &semaCtx, 2421 lower::pft::Evaluation &eval, mlir::Location loc, 2422 const ConstructQueue &queue, 2423 ConstructQueue::const_iterator item) { 2424 lower::StatementContext stmtCtx; 2425 mlir::omp::WorkshareOperands clauseOps; 2426 genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 2427 clauseOps); 2428 2429 return genOpWithBody<mlir::omp::WorkshareOp>( 2430 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 2431 llvm::omp::Directive::OMPD_workshare) 2432 .setClauses(&item->clauses), 2433 queue, item, clauseOps); 2434 } 2435 2436 static mlir::omp::TeamsOp 2437 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, 2438 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 2439 mlir::Location loc, const ConstructQueue &queue, 2440 ConstructQueue::const_iterator item) { 2441 lower::StatementContext stmtCtx; 2442 2443 mlir::omp::TeamsOperands clauseOps; 2444 llvm::SmallVector<const semantics::Symbol *> reductionSyms; 2445 genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps, 2446 reductionSyms); 2447 2448 EntryBlockArgs args; 2449 // TODO: Add private syms and vars. 2450 args.reduction.syms = reductionSyms; 2451 args.reduction.vars = clauseOps.reductionVars; 2452 2453 auto genRegionEntryCB = [&](mlir::Operation *op) { 2454 genEntryBlock(converter.getFirOpBuilder(), args, op->getRegion(0)); 2455 bindEntryBlockArgs( 2456 converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args); 2457 return llvm::to_vector(args.getSyms()); 2458 }; 2459 2460 return genOpWithBody<mlir::omp::TeamsOp>( 2461 OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, 2462 llvm::omp::Directive::OMPD_teams) 2463 .setClauses(&item->clauses) 2464 .setGenRegionEntryCb(genRegionEntryCB), 2465 queue, item, clauseOps); 2466 } 2467 2468 //===----------------------------------------------------------------------===// 2469 // Code generation functions for the standalone version of constructs that can 2470 // also be a leaf of a composite construct 2471 //===----------------------------------------------------------------------===// 2472 2473 static void genStandaloneDistribute(lower::AbstractConverter &converter, 2474 lower::SymMap &symTable, 2475 semantics::SemanticsContext &semaCtx, 2476 lower::pft::Evaluation &eval, 2477 mlir::Location loc, 2478 const ConstructQueue &queue, 2479 ConstructQueue::const_iterator item) { 2480 lower::StatementContext stmtCtx; 2481 2482 mlir::omp::DistributeOperands distributeClauseOps; 2483 genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 2484 distributeClauseOps); 2485 2486 DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, 2487 /*shouldCollectPreDeterminedSymbols=*/true, 2488 enableDelayedPrivatizationStaging, symTable); 2489 dsp.processStep1(&distributeClauseOps); 2490 2491 mlir::omp::LoopNestOperands loopNestClauseOps; 2492 llvm::SmallVector<const semantics::Symbol *> iv; 2493 genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, 2494 loopNestClauseOps, iv); 2495 2496 EntryBlockArgs distributeArgs; 2497 distributeArgs.priv.syms = dsp.getDelayedPrivSymbols(); 2498 distributeArgs.priv.vars = distributeClauseOps.privateVars; 2499 auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>( 2500 converter, loc, distributeClauseOps, distributeArgs); 2501 2502 genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, 2503 loopNestClauseOps, iv, {{distributeOp, distributeArgs}}, 2504 llvm::omp::Directive::OMPD_distribute, dsp); 2505 } 2506 2507 static void genStandaloneDo(lower::AbstractConverter &converter, 2508 lower::SymMap &symTable, 2509 semantics::SemanticsContext &semaCtx, 2510 lower::pft::Evaluation &eval, mlir::Location loc, 2511 const ConstructQueue &queue, 2512 ConstructQueue::const_iterator item) { 2513 lower::StatementContext stmtCtx; 2514 2515 mlir::omp::WsloopOperands wsloopClauseOps; 2516 llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms; 2517 genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 2518 wsloopClauseOps, wsloopReductionSyms); 2519 2520 DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, 2521 /*shouldCollectPreDeterminedSymbols=*/true, 2522 enableDelayedPrivatizationStaging, symTable); 2523 dsp.processStep1(&wsloopClauseOps); 2524 2525 mlir::omp::LoopNestOperands loopNestClauseOps; 2526 llvm::SmallVector<const semantics::Symbol *> iv; 2527 genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, 2528 loopNestClauseOps, iv); 2529 2530 EntryBlockArgs wsloopArgs; 2531 wsloopArgs.priv.syms = dsp.getDelayedPrivSymbols(); 2532 wsloopArgs.priv.vars = wsloopClauseOps.privateVars; 2533 wsloopArgs.reduction.syms = wsloopReductionSyms; 2534 wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; 2535 auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>( 2536 converter, loc, wsloopClauseOps, wsloopArgs); 2537 2538 genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, 2539 loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}}, 2540 llvm::omp::Directive::OMPD_do, dsp); 2541 } 2542 2543 static void genStandaloneParallel(lower::AbstractConverter &converter, 2544 lower::SymMap &symTable, 2545 semantics::SemanticsContext &semaCtx, 2546 lower::pft::Evaluation &eval, 2547 mlir::Location loc, 2548 const ConstructQueue &queue, 2549 ConstructQueue::const_iterator item) { 2550 lower::StatementContext stmtCtx; 2551 2552 mlir::omp::ParallelOperands parallelClauseOps; 2553 llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms; 2554 genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc, 2555 parallelClauseOps, parallelReductionSyms); 2556 2557 std::optional<DataSharingProcessor> dsp; 2558 if (enableDelayedPrivatization) { 2559 dsp.emplace(converter, semaCtx, item->clauses, eval, 2560 lower::omp::isLastItemInQueue(item, queue), 2561 /*useDelayedPrivatization=*/true, symTable); 2562 dsp->processStep1(¶llelClauseOps); 2563 } 2564 2565 EntryBlockArgs parallelArgs; 2566 if (dsp) 2567 parallelArgs.priv.syms = dsp->getDelayedPrivSymbols(); 2568 parallelArgs.priv.vars = parallelClauseOps.privateVars; 2569 parallelArgs.reduction.syms = parallelReductionSyms; 2570 parallelArgs.reduction.vars = parallelClauseOps.reductionVars; 2571 genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item, 2572 parallelClauseOps, parallelArgs, 2573 enableDelayedPrivatization ? &dsp.value() : nullptr); 2574 } 2575 2576 static void genStandaloneSimd(lower::AbstractConverter &converter, 2577 lower::SymMap &symTable, 2578 semantics::SemanticsContext &semaCtx, 2579 lower::pft::Evaluation &eval, mlir::Location loc, 2580 const ConstructQueue &queue, 2581 ConstructQueue::const_iterator item) { 2582 mlir::omp::SimdOperands simdClauseOps; 2583 llvm::SmallVector<const semantics::Symbol *> simdReductionSyms; 2584 genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps, 2585 simdReductionSyms); 2586 2587 DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, 2588 /*shouldCollectPreDeterminedSymbols=*/true, 2589 enableDelayedPrivatization, symTable); 2590 dsp.processStep1(&simdClauseOps); 2591 2592 mlir::omp::LoopNestOperands loopNestClauseOps; 2593 llvm::SmallVector<const semantics::Symbol *> iv; 2594 genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, 2595 loopNestClauseOps, iv); 2596 2597 EntryBlockArgs simdArgs; 2598 simdArgs.priv.syms = dsp.getDelayedPrivSymbols(); 2599 simdArgs.priv.vars = simdClauseOps.privateVars; 2600 simdArgs.reduction.syms = simdReductionSyms; 2601 simdArgs.reduction.vars = simdClauseOps.reductionVars; 2602 auto simdOp = 2603 genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs); 2604 2605 genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, 2606 loopNestClauseOps, iv, {{simdOp, simdArgs}}, 2607 llvm::omp::Directive::OMPD_simd, dsp); 2608 } 2609 2610 static void genStandaloneTaskloop(lower::AbstractConverter &converter, 2611 lower::SymMap &symTable, 2612 semantics::SemanticsContext &semaCtx, 2613 lower::pft::Evaluation &eval, 2614 mlir::Location loc, 2615 const ConstructQueue &queue, 2616 ConstructQueue::const_iterator item) { 2617 TODO(loc, "Taskloop construct"); 2618 } 2619 2620 //===----------------------------------------------------------------------===// 2621 // Code generation functions for composite constructs 2622 //===----------------------------------------------------------------------===// 2623 2624 static void genCompositeDistributeParallelDo( 2625 lower::AbstractConverter &converter, lower::SymMap &symTable, 2626 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 2627 mlir::Location loc, const ConstructQueue &queue, 2628 ConstructQueue::const_iterator item) { 2629 lower::StatementContext stmtCtx; 2630 2631 assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); 2632 ConstructQueue::const_iterator distributeItem = item; 2633 ConstructQueue::const_iterator parallelItem = std::next(distributeItem); 2634 ConstructQueue::const_iterator doItem = std::next(parallelItem); 2635 2636 // Create parent omp.parallel first. 2637 mlir::omp::ParallelOperands parallelClauseOps; 2638 llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms; 2639 genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc, 2640 parallelClauseOps, parallelReductionSyms); 2641 2642 DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval, 2643 /*shouldCollectPreDeterminedSymbols=*/true, 2644 /*useDelayedPrivatization=*/true, symTable); 2645 dsp.processStep1(¶llelClauseOps); 2646 2647 EntryBlockArgs parallelArgs; 2648 parallelArgs.priv.syms = dsp.getDelayedPrivSymbols(); 2649 parallelArgs.priv.vars = parallelClauseOps.privateVars; 2650 parallelArgs.reduction.syms = parallelReductionSyms; 2651 parallelArgs.reduction.vars = parallelClauseOps.reductionVars; 2652 genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem, 2653 parallelClauseOps, parallelArgs, &dsp, /*isComposite=*/true); 2654 2655 // Clause processing. 2656 mlir::omp::DistributeOperands distributeClauseOps; 2657 genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses, 2658 loc, distributeClauseOps); 2659 2660 mlir::omp::WsloopOperands wsloopClauseOps; 2661 llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms; 2662 genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc, 2663 wsloopClauseOps, wsloopReductionSyms); 2664 2665 mlir::omp::LoopNestOperands loopNestClauseOps; 2666 llvm::SmallVector<const semantics::Symbol *> iv; 2667 genLoopNestClauses(converter, semaCtx, eval, doItem->clauses, loc, 2668 loopNestClauseOps, iv); 2669 2670 // Operation creation. 2671 EntryBlockArgs distributeArgs; 2672 // TODO: Add private syms and vars. 2673 auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>( 2674 converter, loc, distributeClauseOps, distributeArgs); 2675 distributeOp.setComposite(/*val=*/true); 2676 2677 EntryBlockArgs wsloopArgs; 2678 // TODO: Add private syms and vars. 2679 wsloopArgs.reduction.syms = wsloopReductionSyms; 2680 wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; 2681 auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>( 2682 converter, loc, wsloopClauseOps, wsloopArgs); 2683 wsloopOp.setComposite(/*val=*/true); 2684 2685 genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem, 2686 loopNestClauseOps, iv, 2687 {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, 2688 llvm::omp::Directive::OMPD_distribute_parallel_do, dsp); 2689 } 2690 2691 static void genCompositeDistributeParallelDoSimd( 2692 lower::AbstractConverter &converter, lower::SymMap &symTable, 2693 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 2694 mlir::Location loc, const ConstructQueue &queue, 2695 ConstructQueue::const_iterator item) { 2696 lower::StatementContext stmtCtx; 2697 2698 assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); 2699 ConstructQueue::const_iterator distributeItem = item; 2700 ConstructQueue::const_iterator parallelItem = std::next(distributeItem); 2701 ConstructQueue::const_iterator doItem = std::next(parallelItem); 2702 ConstructQueue::const_iterator simdItem = std::next(doItem); 2703 2704 // Create parent omp.parallel first. 2705 mlir::omp::ParallelOperands parallelClauseOps; 2706 llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms; 2707 genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc, 2708 parallelClauseOps, parallelReductionSyms); 2709 2710 DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, 2711 /*shouldCollectPreDeterminedSymbols=*/true, 2712 /*useDelayedPrivatization=*/true, symTable); 2713 dsp.processStep1(¶llelClauseOps); 2714 2715 EntryBlockArgs parallelArgs; 2716 parallelArgs.priv.syms = dsp.getDelayedPrivSymbols(); 2717 parallelArgs.priv.vars = parallelClauseOps.privateVars; 2718 parallelArgs.reduction.syms = parallelReductionSyms; 2719 parallelArgs.reduction.vars = parallelClauseOps.reductionVars; 2720 genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem, 2721 parallelClauseOps, parallelArgs, &dsp, /*isComposite=*/true); 2722 2723 // Clause processing. 2724 mlir::omp::DistributeOperands distributeClauseOps; 2725 genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses, 2726 loc, distributeClauseOps); 2727 2728 mlir::omp::WsloopOperands wsloopClauseOps; 2729 llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms; 2730 genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc, 2731 wsloopClauseOps, wsloopReductionSyms); 2732 2733 mlir::omp::SimdOperands simdClauseOps; 2734 llvm::SmallVector<const semantics::Symbol *> simdReductionSyms; 2735 genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps, 2736 simdReductionSyms); 2737 2738 mlir::omp::LoopNestOperands loopNestClauseOps; 2739 llvm::SmallVector<const semantics::Symbol *> iv; 2740 genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc, 2741 loopNestClauseOps, iv); 2742 2743 // Operation creation. 2744 EntryBlockArgs distributeArgs; 2745 // TODO: Add private syms and vars. 2746 auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>( 2747 converter, loc, distributeClauseOps, distributeArgs); 2748 distributeOp.setComposite(/*val=*/true); 2749 2750 EntryBlockArgs wsloopArgs; 2751 // TODO: Add private syms and vars. 2752 wsloopArgs.reduction.syms = wsloopReductionSyms; 2753 wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; 2754 auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>( 2755 converter, loc, wsloopClauseOps, wsloopArgs); 2756 wsloopOp.setComposite(/*val=*/true); 2757 2758 EntryBlockArgs simdArgs; 2759 // TODO: Add private syms and vars. 2760 simdArgs.reduction.syms = simdReductionSyms; 2761 simdArgs.reduction.vars = simdClauseOps.reductionVars; 2762 auto simdOp = 2763 genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs); 2764 simdOp.setComposite(/*val=*/true); 2765 2766 genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, 2767 loopNestClauseOps, iv, 2768 {{distributeOp, distributeArgs}, 2769 {wsloopOp, wsloopArgs}, 2770 {simdOp, simdArgs}}, 2771 llvm::omp::Directive::OMPD_distribute_parallel_do_simd, dsp); 2772 } 2773 2774 static void genCompositeDistributeSimd(lower::AbstractConverter &converter, 2775 lower::SymMap &symTable, 2776 semantics::SemanticsContext &semaCtx, 2777 lower::pft::Evaluation &eval, 2778 mlir::Location loc, 2779 const ConstructQueue &queue, 2780 ConstructQueue::const_iterator item) { 2781 lower::StatementContext stmtCtx; 2782 2783 assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); 2784 ConstructQueue::const_iterator distributeItem = item; 2785 ConstructQueue::const_iterator simdItem = std::next(distributeItem); 2786 2787 // Clause processing. 2788 mlir::omp::DistributeOperands distributeClauseOps; 2789 genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses, 2790 loc, distributeClauseOps); 2791 2792 mlir::omp::SimdOperands simdClauseOps; 2793 llvm::SmallVector<const semantics::Symbol *> simdReductionSyms; 2794 genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps, 2795 simdReductionSyms); 2796 2797 // TODO: Support delayed privatization. 2798 DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, 2799 /*shouldCollectPreDeterminedSymbols=*/true, 2800 /*useDelayedPrivatization=*/false, symTable); 2801 dsp.processStep1(); 2802 2803 // Pass the innermost leaf construct's clauses because that's where COLLAPSE 2804 // is placed by construct decomposition. 2805 mlir::omp::LoopNestOperands loopNestClauseOps; 2806 llvm::SmallVector<const semantics::Symbol *> iv; 2807 genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc, 2808 loopNestClauseOps, iv); 2809 2810 // Operation creation. 2811 EntryBlockArgs distributeArgs; 2812 // TODO: Add private syms and vars. 2813 auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>( 2814 converter, loc, distributeClauseOps, distributeArgs); 2815 distributeOp.setComposite(/*val=*/true); 2816 2817 EntryBlockArgs simdArgs; 2818 // TODO: Add private syms and vars. 2819 simdArgs.reduction.syms = simdReductionSyms; 2820 simdArgs.reduction.vars = simdClauseOps.reductionVars; 2821 auto simdOp = 2822 genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs); 2823 simdOp.setComposite(/*val=*/true); 2824 2825 genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, 2826 loopNestClauseOps, iv, 2827 {{distributeOp, distributeArgs}, {simdOp, simdArgs}}, 2828 llvm::omp::Directive::OMPD_distribute_simd, dsp); 2829 } 2830 2831 static void genCompositeDoSimd(lower::AbstractConverter &converter, 2832 lower::SymMap &symTable, 2833 semantics::SemanticsContext &semaCtx, 2834 lower::pft::Evaluation &eval, mlir::Location loc, 2835 const ConstructQueue &queue, 2836 ConstructQueue::const_iterator item) { 2837 lower::StatementContext stmtCtx; 2838 2839 assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); 2840 ConstructQueue::const_iterator doItem = item; 2841 ConstructQueue::const_iterator simdItem = std::next(doItem); 2842 2843 // Clause processing. 2844 mlir::omp::WsloopOperands wsloopClauseOps; 2845 llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms; 2846 genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc, 2847 wsloopClauseOps, wsloopReductionSyms); 2848 2849 mlir::omp::SimdOperands simdClauseOps; 2850 llvm::SmallVector<const semantics::Symbol *> simdReductionSyms; 2851 genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps, 2852 simdReductionSyms); 2853 2854 // TODO: Support delayed privatization. 2855 DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, 2856 /*shouldCollectPreDeterminedSymbols=*/true, 2857 /*useDelayedPrivatization=*/false, symTable); 2858 dsp.processStep1(); 2859 2860 // Pass the innermost leaf construct's clauses because that's where COLLAPSE 2861 // is placed by construct decomposition. 2862 mlir::omp::LoopNestOperands loopNestClauseOps; 2863 llvm::SmallVector<const semantics::Symbol *> iv; 2864 genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc, 2865 loopNestClauseOps, iv); 2866 2867 // Operation creation. 2868 EntryBlockArgs wsloopArgs; 2869 // TODO: Add private syms and vars. 2870 wsloopArgs.reduction.syms = wsloopReductionSyms; 2871 wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; 2872 auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>( 2873 converter, loc, wsloopClauseOps, wsloopArgs); 2874 wsloopOp.setComposite(/*val=*/true); 2875 2876 EntryBlockArgs simdArgs; 2877 // TODO: Add private syms and vars. 2878 simdArgs.reduction.syms = simdReductionSyms; 2879 simdArgs.reduction.vars = simdClauseOps.reductionVars; 2880 auto simdOp = 2881 genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs); 2882 simdOp.setComposite(/*val=*/true); 2883 2884 genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, 2885 loopNestClauseOps, iv, 2886 {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, 2887 llvm::omp::Directive::OMPD_do_simd, dsp); 2888 } 2889 2890 static void genCompositeTaskloopSimd(lower::AbstractConverter &converter, 2891 lower::SymMap &symTable, 2892 semantics::SemanticsContext &semaCtx, 2893 lower::pft::Evaluation &eval, 2894 mlir::Location loc, 2895 const ConstructQueue &queue, 2896 ConstructQueue::const_iterator item) { 2897 assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); 2898 TODO(loc, "Composite TASKLOOP SIMD"); 2899 } 2900 2901 //===----------------------------------------------------------------------===// 2902 // Dispatch 2903 //===----------------------------------------------------------------------===// 2904 2905 static bool genOMPCompositeDispatch(lower::AbstractConverter &converter, 2906 lower::SymMap &symTable, 2907 semantics::SemanticsContext &semaCtx, 2908 lower::pft::Evaluation &eval, 2909 mlir::Location loc, 2910 const ConstructQueue &queue, 2911 ConstructQueue::const_iterator item) { 2912 using llvm::omp::Directive; 2913 using lower::omp::matchLeafSequence; 2914 2915 // TODO: Privatization for composite constructs is currently only done based 2916 // on the clauses for their last leaf construct, which may not always be 2917 // correct. Consider per-leaf privatization of composite constructs once 2918 // delayed privatization is supported by all participating ops. 2919 if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do)) 2920 genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc, 2921 queue, item); 2922 else if (matchLeafSequence(item, queue, 2923 Directive::OMPD_distribute_parallel_do_simd)) 2924 genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval, 2925 loc, queue, item); 2926 else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd)) 2927 genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue, 2928 item); 2929 else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd)) 2930 genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item); 2931 else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd)) 2932 genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue, 2933 item); 2934 else 2935 return false; 2936 2937 return true; 2938 } 2939 2940 static void genOMPDispatch(lower::AbstractConverter &converter, 2941 lower::SymMap &symTable, 2942 semantics::SemanticsContext &semaCtx, 2943 lower::pft::Evaluation &eval, mlir::Location loc, 2944 const ConstructQueue &queue, 2945 ConstructQueue::const_iterator item) { 2946 assert(item != queue.end()); 2947 2948 bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) == 2949 llvm::omp::Association::Loop; 2950 if (loopLeaf) { 2951 symTable.pushScope(); 2952 if (genOMPCompositeDispatch(converter, symTable, semaCtx, eval, loc, queue, 2953 item)) { 2954 symTable.popScope(); 2955 return; 2956 } 2957 } 2958 2959 switch (llvm::omp::Directive dir = item->id) { 2960 case llvm::omp::Directive::OMPD_barrier: 2961 genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item); 2962 break; 2963 case llvm::omp::Directive::OMPD_distribute: 2964 genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue, 2965 item); 2966 break; 2967 case llvm::omp::Directive::OMPD_do: 2968 genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item); 2969 break; 2970 case llvm::omp::Directive::OMPD_loop: 2971 genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); 2972 break; 2973 case llvm::omp::Directive::OMPD_masked: 2974 genMaskedOp(converter, symTable, semaCtx, eval, loc, queue, item); 2975 break; 2976 case llvm::omp::Directive::OMPD_master: 2977 genMasterOp(converter, symTable, semaCtx, eval, loc, queue, item); 2978 break; 2979 case llvm::omp::Directive::OMPD_ordered: 2980 // Block-associated "ordered" construct. 2981 genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue, item); 2982 break; 2983 case llvm::omp::Directive::OMPD_parallel: 2984 genStandaloneParallel(converter, symTable, semaCtx, eval, loc, queue, item); 2985 break; 2986 case llvm::omp::Directive::OMPD_scan: 2987 TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir)); 2988 break; 2989 case llvm::omp::Directive::OMPD_section: 2990 llvm_unreachable("genOMPDispatch: OMPD_section"); 2991 // Lowered in the enclosing genSectionsOp. 2992 break; 2993 case llvm::omp::Directive::OMPD_sections: 2994 // Called directly from genOMP([...], OpenMPSectionsConstruct) because it 2995 // has a different prototype. 2996 // This code path is still taken when iterating through the construct queue 2997 // in genBodyOfOp 2998 break; 2999 case llvm::omp::Directive::OMPD_simd: 3000 genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item); 3001 break; 3002 case llvm::omp::Directive::OMPD_scope: 3003 genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item); 3004 break; 3005 case llvm::omp::Directive::OMPD_single: 3006 genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); 3007 break; 3008 case llvm::omp::Directive::OMPD_target: 3009 genTargetOp(converter, symTable, semaCtx, eval, loc, queue, item); 3010 break; 3011 case llvm::omp::Directive::OMPD_target_data: 3012 genTargetDataOp(converter, symTable, semaCtx, eval, loc, queue, item); 3013 break; 3014 case llvm::omp::Directive::OMPD_target_enter_data: 3015 genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>( 3016 converter, symTable, semaCtx, loc, queue, item); 3017 break; 3018 case llvm::omp::Directive::OMPD_target_exit_data: 3019 genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>( 3020 converter, symTable, semaCtx, loc, queue, item); 3021 break; 3022 case llvm::omp::Directive::OMPD_target_update: 3023 genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>( 3024 converter, symTable, semaCtx, loc, queue, item); 3025 break; 3026 case llvm::omp::Directive::OMPD_task: 3027 genTaskOp(converter, symTable, semaCtx, eval, loc, queue, item); 3028 break; 3029 case llvm::omp::Directive::OMPD_taskgroup: 3030 genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item); 3031 break; 3032 case llvm::omp::Directive::OMPD_taskloop: 3033 genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item); 3034 break; 3035 case llvm::omp::Directive::OMPD_taskwait: 3036 genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item); 3037 break; 3038 case llvm::omp::Directive::OMPD_taskyield: 3039 genTaskyieldOp(converter, symTable, semaCtx, eval, loc, queue, item); 3040 break; 3041 case llvm::omp::Directive::OMPD_teams: 3042 genTeamsOp(converter, symTable, semaCtx, eval, loc, queue, item); 3043 break; 3044 case llvm::omp::Directive::OMPD_tile: 3045 case llvm::omp::Directive::OMPD_unroll: 3046 TODO(loc, "Unhandled loop directive (" + 3047 llvm::omp::getOpenMPDirectiveName(dir) + ")"); 3048 // case llvm::omp::Directive::OMPD_workdistribute: 3049 case llvm::omp::Directive::OMPD_workshare: 3050 genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); 3051 break; 3052 default: 3053 // Combined and composite constructs should have been split into a sequence 3054 // of leaf constructs when building the construct queue. 3055 assert(!llvm::omp::isLeafConstruct(dir) && 3056 "Unexpected compound construct."); 3057 break; 3058 } 3059 3060 if (loopLeaf) 3061 symTable.popScope(); 3062 } 3063 3064 //===----------------------------------------------------------------------===// 3065 // OpenMPDeclarativeConstruct visitors 3066 //===----------------------------------------------------------------------===// 3067 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3068 semantics::SemanticsContext &semaCtx, 3069 lower::pft::Evaluation &eval, 3070 const parser::OpenMPUtilityConstruct &); 3071 3072 static void 3073 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3074 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 3075 const parser::OpenMPDeclarativeAllocate &declarativeAllocate) { 3076 TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate"); 3077 } 3078 3079 static void genOMP( 3080 lower::AbstractConverter &converter, lower::SymMap &symTable, 3081 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 3082 const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) { 3083 TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct"); 3084 } 3085 3086 static void 3087 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3088 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 3089 const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) { 3090 TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct"); 3091 } 3092 3093 static void 3094 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3095 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 3096 const parser::OpenMPDeclareMapperConstruct &declareMapperConstruct) { 3097 TODO(converter.getCurrentLocation(), "OpenMPDeclareMapperConstruct"); 3098 } 3099 3100 static void 3101 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3102 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 3103 const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { 3104 mlir::omp::DeclareTargetOperands clauseOps; 3105 llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause; 3106 mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); 3107 getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, 3108 clauseOps, symbolAndClause); 3109 3110 for (const DeclareTargetCapturePair &symClause : symbolAndClause) { 3111 mlir::Operation *op = mod.lookupSymbol( 3112 converter.mangleName(std::get<const semantics::Symbol &>(symClause))); 3113 3114 // Some symbols are deferred until later in the module, these are handled 3115 // upon finalization of the module for OpenMP inside of Bridge, so we simply 3116 // skip for now. 3117 if (!op) 3118 continue; 3119 3120 markDeclareTarget( 3121 op, converter, 3122 std::get<mlir::omp::DeclareTargetCaptureClause>(symClause), 3123 clauseOps.deviceType); 3124 } 3125 } 3126 3127 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3128 semantics::SemanticsContext &semaCtx, 3129 lower::pft::Evaluation &eval, 3130 const parser::OpenMPRequiresConstruct &requiresConstruct) { 3131 // Requires directives are gathered and processed in semantics and 3132 // then combined in the lowering bridge before triggering codegen 3133 // just once. Hence, there is no need to lower each individual 3134 // occurrence here. 3135 } 3136 3137 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3138 semantics::SemanticsContext &semaCtx, 3139 lower::pft::Evaluation &eval, 3140 const parser::OpenMPThreadprivate &threadprivate) { 3141 // The directive is lowered when instantiating the variable to 3142 // support the case of threadprivate variable declared in module. 3143 } 3144 3145 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3146 semantics::SemanticsContext &semaCtx, 3147 lower::pft::Evaluation &eval, 3148 const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) { 3149 Fortran::common::visit( 3150 [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, 3151 ompDeclConstruct.u); 3152 } 3153 3154 //===----------------------------------------------------------------------===// 3155 // OpenMPStandaloneConstruct visitors 3156 //===----------------------------------------------------------------------===// 3157 3158 static void genOMP( 3159 lower::AbstractConverter &converter, lower::SymMap &symTable, 3160 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 3161 const parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) { 3162 const auto &directive = std::get<parser::OmpSimpleStandaloneDirective>( 3163 simpleStandaloneConstruct.t); 3164 List<Clause> clauses = makeClauses( 3165 std::get<parser::OmpClauseList>(simpleStandaloneConstruct.t), semaCtx); 3166 mlir::Location currentLocation = converter.genLocation(directive.source); 3167 3168 ConstructQueue queue{ 3169 buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, 3170 eval, directive.source, directive.v, clauses)}; 3171 if (directive.v == llvm::omp::Directive::OMPD_ordered) { 3172 // Standalone "ordered" directive. 3173 genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, queue, 3174 queue.begin()); 3175 } else { 3176 // Dispatch handles the "block-associated" variant of "ordered". 3177 genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, 3178 queue.begin()); 3179 } 3180 } 3181 3182 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3183 semantics::SemanticsContext &semaCtx, 3184 lower::pft::Evaluation &eval, 3185 const parser::OpenMPFlushConstruct &flushConstruct) { 3186 const auto &verbatim = std::get<parser::Verbatim>(flushConstruct.t); 3187 const auto &objectList = 3188 std::get<std::optional<parser::OmpObjectList>>(flushConstruct.t); 3189 const auto &clauseList = 3190 std::get<std::optional<std::list<parser::OmpMemoryOrderClause>>>( 3191 flushConstruct.t); 3192 ObjectList objects = 3193 objectList ? makeObjects(*objectList, semaCtx) : ObjectList{}; 3194 List<Clause> clauses = 3195 clauseList ? makeList(*clauseList, 3196 [&](auto &&s) { return makeClause(s.v, semaCtx); }) 3197 : List<Clause>{}; 3198 mlir::Location currentLocation = converter.genLocation(verbatim.source); 3199 3200 ConstructQueue queue{buildConstructQueue( 3201 converter.getFirOpBuilder().getModule(), semaCtx, eval, verbatim.source, 3202 llvm::omp::Directive::OMPD_flush, clauses)}; 3203 genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects, 3204 queue, queue.begin()); 3205 } 3206 3207 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3208 semantics::SemanticsContext &semaCtx, 3209 lower::pft::Evaluation &eval, 3210 const parser::OpenMPCancelConstruct &cancelConstruct) { 3211 TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); 3212 } 3213 3214 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3215 semantics::SemanticsContext &semaCtx, 3216 lower::pft::Evaluation &eval, 3217 const parser::OpenMPCancellationPointConstruct 3218 &cancellationPointConstruct) { 3219 TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); 3220 } 3221 3222 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3223 semantics::SemanticsContext &semaCtx, 3224 lower::pft::Evaluation &eval, 3225 const parser::OpenMPDepobjConstruct &construct) { 3226 // These values will be ignored until the construct itself is implemented, 3227 // but run them anyway for the sake of testing (via a Todo test). 3228 auto &ompObj = std::get<parser::OmpObject>(construct.t); 3229 const Object &depObj = makeObject(ompObj, semaCtx); 3230 Clause clause = makeClause(std::get<parser::OmpClause>(construct.t), semaCtx); 3231 (void)depObj; 3232 (void)clause; 3233 3234 TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct"); 3235 } 3236 3237 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3238 semantics::SemanticsContext &semaCtx, 3239 lower::pft::Evaluation &eval, 3240 const parser::OmpMetadirectiveDirective &construct) {} 3241 3242 static void 3243 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3244 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 3245 const parser::OpenMPStandaloneConstruct &standaloneConstruct) { 3246 Fortran::common::visit( 3247 [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, 3248 standaloneConstruct.u); 3249 } 3250 3251 //===----------------------------------------------------------------------===// 3252 // OpenMPConstruct visitors 3253 //===----------------------------------------------------------------------===// 3254 3255 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3256 semantics::SemanticsContext &semaCtx, 3257 lower::pft::Evaluation &eval, 3258 const parser::OpenMPAllocatorsConstruct &allocsConstruct) { 3259 TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct"); 3260 } 3261 3262 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3263 semantics::SemanticsContext &semaCtx, 3264 lower::pft::Evaluation &eval, 3265 const parser::OpenMPAtomicConstruct &atomicConstruct) { 3266 Fortran::common::visit( 3267 common::visitors{ 3268 [&](const parser::OmpAtomicRead &atomicRead) { 3269 mlir::Location loc = converter.genLocation(atomicRead.source); 3270 lower::genOmpAccAtomicRead<parser::OmpAtomicRead, 3271 parser::OmpAtomicClauseList>( 3272 converter, atomicRead, loc); 3273 }, 3274 [&](const parser::OmpAtomicWrite &atomicWrite) { 3275 mlir::Location loc = converter.genLocation(atomicWrite.source); 3276 lower::genOmpAccAtomicWrite<parser::OmpAtomicWrite, 3277 parser::OmpAtomicClauseList>( 3278 converter, atomicWrite, loc); 3279 }, 3280 [&](const parser::OmpAtomic &atomicConstruct) { 3281 mlir::Location loc = converter.genLocation(atomicConstruct.source); 3282 lower::genOmpAtomic<parser::OmpAtomic, parser::OmpAtomicClauseList>( 3283 converter, atomicConstruct, loc); 3284 }, 3285 [&](const parser::OmpAtomicUpdate &atomicUpdate) { 3286 mlir::Location loc = converter.genLocation(atomicUpdate.source); 3287 lower::genOmpAccAtomicUpdate<parser::OmpAtomicUpdate, 3288 parser::OmpAtomicClauseList>( 3289 converter, atomicUpdate, loc); 3290 }, 3291 [&](const parser::OmpAtomicCapture &atomicCapture) { 3292 mlir::Location loc = converter.genLocation(atomicCapture.source); 3293 lower::genOmpAccAtomicCapture<parser::OmpAtomicCapture, 3294 parser::OmpAtomicClauseList>( 3295 converter, atomicCapture, loc); 3296 }, 3297 [&](const parser::OmpAtomicCompare &atomicCompare) { 3298 mlir::Location loc = converter.genLocation(atomicCompare.source); 3299 TODO(loc, "OpenMP atomic compare"); 3300 }, 3301 }, 3302 atomicConstruct.u); 3303 } 3304 3305 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3306 semantics::SemanticsContext &semaCtx, 3307 lower::pft::Evaluation &eval, 3308 const parser::OpenMPBlockConstruct &blockConstruct) { 3309 const auto &beginBlockDirective = 3310 std::get<parser::OmpBeginBlockDirective>(blockConstruct.t); 3311 const auto &endBlockDirective = 3312 std::get<parser::OmpEndBlockDirective>(blockConstruct.t); 3313 mlir::Location currentLocation = 3314 converter.genLocation(beginBlockDirective.source); 3315 const auto origDirective = 3316 std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v; 3317 List<Clause> clauses = makeClauses( 3318 std::get<parser::OmpClauseList>(beginBlockDirective.t), semaCtx); 3319 clauses.append(makeClauses( 3320 std::get<parser::OmpClauseList>(endBlockDirective.t), semaCtx)); 3321 3322 assert(llvm::omp::blockConstructSet.test(origDirective) && 3323 "Expected block construct"); 3324 (void)origDirective; 3325 3326 for (const Clause &clause : clauses) { 3327 mlir::Location clauseLocation = converter.genLocation(clause.source); 3328 if (!std::holds_alternative<clause::Affinity>(clause.u) && 3329 !std::holds_alternative<clause::Allocate>(clause.u) && 3330 !std::holds_alternative<clause::Copyin>(clause.u) && 3331 !std::holds_alternative<clause::Copyprivate>(clause.u) && 3332 !std::holds_alternative<clause::Default>(clause.u) && 3333 !std::holds_alternative<clause::Depend>(clause.u) && 3334 !std::holds_alternative<clause::Filter>(clause.u) && 3335 !std::holds_alternative<clause::Final>(clause.u) && 3336 !std::holds_alternative<clause::Firstprivate>(clause.u) && 3337 !std::holds_alternative<clause::HasDeviceAddr>(clause.u) && 3338 !std::holds_alternative<clause::If>(clause.u) && 3339 !std::holds_alternative<clause::IsDevicePtr>(clause.u) && 3340 !std::holds_alternative<clause::Map>(clause.u) && 3341 !std::holds_alternative<clause::Nowait>(clause.u) && 3342 !std::holds_alternative<clause::NumTeams>(clause.u) && 3343 !std::holds_alternative<clause::NumThreads>(clause.u) && 3344 !std::holds_alternative<clause::OmpxBare>(clause.u) && 3345 !std::holds_alternative<clause::Priority>(clause.u) && 3346 !std::holds_alternative<clause::Private>(clause.u) && 3347 !std::holds_alternative<clause::ProcBind>(clause.u) && 3348 !std::holds_alternative<clause::Reduction>(clause.u) && 3349 !std::holds_alternative<clause::Shared>(clause.u) && 3350 !std::holds_alternative<clause::Simd>(clause.u) && 3351 !std::holds_alternative<clause::ThreadLimit>(clause.u) && 3352 !std::holds_alternative<clause::Threads>(clause.u) && 3353 !std::holds_alternative<clause::UseDeviceAddr>(clause.u) && 3354 !std::holds_alternative<clause::UseDevicePtr>(clause.u) && 3355 !std::holds_alternative<clause::InReduction>(clause.u) && 3356 !std::holds_alternative<clause::Mergeable>(clause.u) && 3357 !std::holds_alternative<clause::Untied>(clause.u) && 3358 !std::holds_alternative<clause::TaskReduction>(clause.u) && 3359 !std::holds_alternative<clause::Detach>(clause.u)) { 3360 std::string name = 3361 parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(clause.id)); 3362 TODO(clauseLocation, name + " clause is not implemented yet"); 3363 } 3364 } 3365 3366 llvm::omp::Directive directive = 3367 std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v; 3368 const parser::CharBlock &source = 3369 std::get<parser::OmpBlockDirective>(beginBlockDirective.t).source; 3370 ConstructQueue queue{ 3371 buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, 3372 eval, source, directive, clauses)}; 3373 genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, 3374 queue.begin()); 3375 } 3376 3377 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3378 semantics::SemanticsContext &semaCtx, 3379 lower::pft::Evaluation &eval, 3380 const parser::OpenMPCriticalConstruct &criticalConstruct) { 3381 const auto &cd = std::get<parser::OmpCriticalDirective>(criticalConstruct.t); 3382 List<Clause> clauses = 3383 makeClauses(std::get<parser::OmpClauseList>(cd.t), semaCtx); 3384 3385 ConstructQueue queue{buildConstructQueue( 3386 converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source, 3387 llvm::omp::Directive::OMPD_critical, clauses)}; 3388 3389 const auto &name = std::get<std::optional<parser::Name>>(cd.t); 3390 mlir::Location currentLocation = converter.getCurrentLocation(); 3391 genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, queue, 3392 queue.begin(), name); 3393 } 3394 3395 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3396 semantics::SemanticsContext &semaCtx, 3397 lower::pft::Evaluation &eval, 3398 const parser::OpenMPUtilityConstruct &) { 3399 TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct"); 3400 } 3401 3402 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3403 semantics::SemanticsContext &semaCtx, 3404 lower::pft::Evaluation &eval, 3405 const parser::OpenMPDispatchConstruct &) { 3406 TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct"); 3407 } 3408 3409 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3410 semantics::SemanticsContext &semaCtx, 3411 lower::pft::Evaluation &eval, 3412 const parser::OpenMPExecutableAllocate &execAllocConstruct) { 3413 TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate"); 3414 } 3415 3416 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3417 semantics::SemanticsContext &semaCtx, 3418 lower::pft::Evaluation &eval, 3419 const parser::OpenMPLoopConstruct &loopConstruct) { 3420 const auto &beginLoopDirective = 3421 std::get<parser::OmpBeginLoopDirective>(loopConstruct.t); 3422 List<Clause> clauses = makeClauses( 3423 std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx); 3424 if (auto &endLoopDirective = 3425 std::get<std::optional<parser::OmpEndLoopDirective>>( 3426 loopConstruct.t)) { 3427 clauses.append(makeClauses( 3428 std::get<parser::OmpClauseList>(endLoopDirective->t), semaCtx)); 3429 } 3430 3431 mlir::Location currentLocation = 3432 converter.genLocation(beginLoopDirective.source); 3433 3434 llvm::omp::Directive directive = 3435 std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v; 3436 const parser::CharBlock &source = 3437 std::get<parser::OmpLoopDirective>(beginLoopDirective.t).source; 3438 ConstructQueue queue{ 3439 buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, 3440 eval, source, directive, clauses)}; 3441 genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, 3442 queue.begin()); 3443 } 3444 3445 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3446 semantics::SemanticsContext &semaCtx, 3447 lower::pft::Evaluation &eval, 3448 const parser::OpenMPSectionConstruct §ionConstruct) { 3449 // Do nothing here. SECTION is lowered inside of the lowering for Sections 3450 } 3451 3452 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3453 semantics::SemanticsContext &semaCtx, 3454 lower::pft::Evaluation &eval, 3455 const parser::OpenMPSectionsConstruct §ionsConstruct) { 3456 const auto &beginSectionsDirective = 3457 std::get<parser::OmpBeginSectionsDirective>(sectionsConstruct.t); 3458 List<Clause> clauses = makeClauses( 3459 std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx); 3460 const auto &endSectionsDirective = 3461 std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t); 3462 const auto §ionBlocks = 3463 std::get<parser::OmpSectionBlocks>(sectionsConstruct.t); 3464 clauses.append(makeClauses( 3465 std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx)); 3466 mlir::Location currentLocation = converter.getCurrentLocation(); 3467 3468 llvm::omp::Directive directive = 3469 std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v; 3470 const parser::CharBlock &source = 3471 std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).source; 3472 ConstructQueue queue{ 3473 buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, 3474 eval, source, directive, clauses)}; 3475 ConstructQueue::iterator next = queue.begin(); 3476 // Generate constructs that come first e.g. Parallel 3477 while (next != queue.end() && 3478 next->id != llvm::omp::Directive::OMPD_sections) { 3479 genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, 3480 next); 3481 next = std::next(next); 3482 } 3483 3484 // call genSectionsOp directly (not via genOMPDispatch) so that we can add the 3485 // sectionBlocks argument 3486 assert(next != queue.end()); 3487 assert(next->id == llvm::omp::Directive::OMPD_sections); 3488 genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, queue, 3489 next, sectionBlocks); 3490 assert(std::next(next) == queue.end()); 3491 } 3492 3493 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, 3494 semantics::SemanticsContext &semaCtx, 3495 lower::pft::Evaluation &eval, 3496 const parser::OpenMPConstruct &ompConstruct) { 3497 Fortran::common::visit( 3498 [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, 3499 ompConstruct.u); 3500 } 3501 3502 //===----------------------------------------------------------------------===// 3503 // Public functions 3504 //===----------------------------------------------------------------------===// 3505 3506 mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, 3507 mlir::Operation *op, 3508 mlir::Location loc) { 3509 if (mlir::isa<mlir::omp::AtomicUpdateOp, mlir::omp::DeclareReductionOp, 3510 mlir::omp::LoopNestOp>(op)) 3511 return builder.create<mlir::omp::YieldOp>(loc); 3512 return builder.create<mlir::omp::TerminatorOp>(loc); 3513 } 3514 3515 void Fortran::lower::genOpenMPConstruct(lower::AbstractConverter &converter, 3516 lower::SymMap &symTable, 3517 semantics::SemanticsContext &semaCtx, 3518 lower::pft::Evaluation &eval, 3519 const parser::OpenMPConstruct &omp) { 3520 lower::SymMapScope scope(symTable); 3521 genOMP(converter, symTable, semaCtx, eval, omp); 3522 } 3523 3524 void Fortran::lower::genOpenMPDeclarativeConstruct( 3525 lower::AbstractConverter &converter, lower::SymMap &symTable, 3526 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, 3527 const parser::OpenMPDeclarativeConstruct &omp) { 3528 genOMP(converter, symTable, semaCtx, eval, omp); 3529 genNestedEvaluations(converter, eval); 3530 } 3531 3532 void Fortran::lower::genOpenMPSymbolProperties( 3533 lower::AbstractConverter &converter, const lower::pft::Variable &var) { 3534 assert(var.hasSymbol() && "Expecting Symbol"); 3535 const semantics::Symbol &sym = var.getSymbol(); 3536 3537 if (sym.test(semantics::Symbol::Flag::OmpThreadprivate)) 3538 lower::genThreadprivateOp(converter, var); 3539 3540 if (sym.test(semantics::Symbol::Flag::OmpDeclareTarget)) 3541 lower::genDeclareTargetIntGlobal(converter, var); 3542 } 3543 3544 int64_t 3545 Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { 3546 for (const parser::OmpClause &clause : clauseList.v) { 3547 if (const auto &collapseClause = 3548 std::get_if<parser::OmpClause::Collapse>(&clause.u)) { 3549 const auto *expr = semantics::GetExpr(collapseClause->v); 3550 return evaluate::ToInt64(*expr).value(); 3551 } 3552 } 3553 return 1; 3554 } 3555 3556 void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, 3557 const lower::pft::Variable &var) { 3558 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 3559 mlir::Location currentLocation = converter.getCurrentLocation(); 3560 3561 const semantics::Symbol &sym = var.getSymbol(); 3562 mlir::Value symThreadprivateValue; 3563 if (const semantics::Symbol *common = 3564 semantics::FindCommonBlockContaining(sym.GetUltimate())) { 3565 mlir::Value commonValue = converter.getSymbolAddress(*common); 3566 if (mlir::isa<mlir::omp::ThreadprivateOp>(commonValue.getDefiningOp())) { 3567 // Generate ThreadprivateOp for a common block instead of its members and 3568 // only do it once for a common block. 3569 return; 3570 } 3571 // Generate ThreadprivateOp and rebind the common block. 3572 mlir::Value commonThreadprivateValue = 3573 firOpBuilder.create<mlir::omp::ThreadprivateOp>( 3574 currentLocation, commonValue.getType(), commonValue); 3575 converter.bindSymbol(*common, commonThreadprivateValue); 3576 // Generate the threadprivate value for the common block member. 3577 symThreadprivateValue = genCommonBlockMember(converter, currentLocation, 3578 sym, commonThreadprivateValue); 3579 } else if (!var.isGlobal()) { 3580 // Non-global variable which can be in threadprivate directive must be one 3581 // variable in main program, and it has implicit SAVE attribute. Take it as 3582 // with SAVE attribute, so to create GlobalOp for it to simplify the 3583 // translation to LLVM IR. 3584 // Avoids performing multiple globalInitializations. 3585 fir::GlobalOp global; 3586 auto module = converter.getModuleOp(); 3587 std::string globalName = converter.mangleName(sym); 3588 if (module.lookupSymbol<fir::GlobalOp>(globalName)) 3589 global = module.lookupSymbol<fir::GlobalOp>(globalName); 3590 else 3591 global = globalInitialization(converter, firOpBuilder, sym, var, 3592 currentLocation); 3593 3594 mlir::Value symValue = firOpBuilder.create<fir::AddrOfOp>( 3595 currentLocation, global.resultType(), global.getSymbol()); 3596 symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>( 3597 currentLocation, symValue.getType(), symValue); 3598 } else { 3599 mlir::Value symValue = converter.getSymbolAddress(sym); 3600 3601 // The symbol may be use-associated multiple times, and nothing needs to be 3602 // done after the original symbol is mapped to the threadprivatized value 3603 // for the first time. Use the threadprivatized value directly. 3604 mlir::Operation *op; 3605 if (auto declOp = symValue.getDefiningOp<hlfir::DeclareOp>()) 3606 op = declOp.getMemref().getDefiningOp(); 3607 else 3608 op = symValue.getDefiningOp(); 3609 if (mlir::isa<mlir::omp::ThreadprivateOp>(op)) 3610 return; 3611 3612 symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>( 3613 currentLocation, symValue.getType(), symValue); 3614 } 3615 3616 fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym); 3617 fir::ExtendedValue symThreadprivateExv = 3618 getExtendedValue(sexv, symThreadprivateValue); 3619 converter.bindSymbol(sym, symThreadprivateExv); 3620 } 3621 3622 // This function replicates threadprivate's behaviour of generating 3623 // an internal fir.GlobalOp for non-global variables in the main program 3624 // that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR 3625 // generation. 3626 void Fortran::lower::genDeclareTargetIntGlobal( 3627 lower::AbstractConverter &converter, const lower::pft::Variable &var) { 3628 if (!var.isGlobal()) { 3629 // A non-global variable which can be in a declare target directive must 3630 // be a variable in the main program, and it has the implicit SAVE 3631 // attribute. We create a GlobalOp for it to simplify the translation to 3632 // LLVM IR. 3633 globalInitialization(converter, converter.getFirOpBuilder(), 3634 var.getSymbol(), var, converter.getCurrentLocation()); 3635 } 3636 } 3637 3638 bool Fortran::lower::isOpenMPTargetConstruct( 3639 const parser::OpenMPConstruct &omp) { 3640 llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown; 3641 if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) { 3642 const auto &begin = std::get<parser::OmpBeginBlockDirective>(block->t); 3643 dir = std::get<parser::OmpBlockDirective>(begin.t).v; 3644 } else if (const auto *loop = 3645 std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) { 3646 const auto &begin = std::get<parser::OmpBeginLoopDirective>(loop->t); 3647 dir = std::get<parser::OmpLoopDirective>(begin.t).v; 3648 } 3649 return llvm::omp::allTargetSet.test(dir); 3650 } 3651 3652 void Fortran::lower::gatherOpenMPDeferredDeclareTargets( 3653 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 3654 lower::pft::Evaluation &eval, 3655 const parser::OpenMPDeclarativeConstruct &ompDecl, 3656 llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> 3657 &deferredDeclareTarget) { 3658 Fortran::common::visit( 3659 common::visitors{ 3660 [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { 3661 collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq, 3662 deferredDeclareTarget); 3663 }, 3664 [&](const auto &) {}, 3665 }, 3666 ompDecl.u); 3667 } 3668 3669 bool Fortran::lower::isOpenMPDeviceDeclareTarget( 3670 lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, 3671 lower::pft::Evaluation &eval, 3672 const parser::OpenMPDeclarativeConstruct &ompDecl) { 3673 return Fortran::common::visit( 3674 common::visitors{ 3675 [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { 3676 mlir::omp::DeclareTargetDeviceType targetType = 3677 getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq) 3678 .value_or(mlir::omp::DeclareTargetDeviceType::host); 3679 return targetType != mlir::omp::DeclareTargetDeviceType::host; 3680 }, 3681 [&](const auto &) { return false; }, 3682 }, 3683 ompDecl.u); 3684 } 3685 3686 // In certain cases such as subroutine or function interfaces which declare 3687 // but do not define or directly call the subroutine or function in the same 3688 // module, their lowering is delayed until after the declare target construct 3689 // itself is processed, so there symbol is not within the table. 3690 // 3691 // This function will also return true if we encounter any device declare 3692 // target cases, to satisfy checking if we require the requires attributes 3693 // on the module. 3694 bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions( 3695 mlir::Operation *mod, 3696 llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> &deferredDeclareTargets, 3697 AbstractConverter &converter) { 3698 bool deviceCodeFound = false; 3699 auto modOp = llvm::cast<mlir::ModuleOp>(mod); 3700 for (auto declTar : deferredDeclareTargets) { 3701 mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym)); 3702 3703 // Due to interfaces being optionally emitted on usage in a module, 3704 // not finding an operation at this point cannot be a hard error, we 3705 // simply ignore it for now. 3706 // TODO: Add semantic checks for detecting cases where an erronous 3707 // (undefined) symbol has been supplied to a declare target clause 3708 if (!op) 3709 continue; 3710 3711 auto devType = declTar.declareTargetDeviceType; 3712 if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host) 3713 deviceCodeFound = true; 3714 3715 markDeclareTarget(op, converter, declTar.declareTargetCaptureClause, 3716 devType); 3717 } 3718 3719 return deviceCodeFound; 3720 } 3721 3722 void Fortran::lower::genOpenMPRequires(mlir::Operation *mod, 3723 const semantics::Symbol *symbol) { 3724 using MlirRequires = mlir::omp::ClauseRequires; 3725 using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag; 3726 3727 if (auto offloadMod = 3728 llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) { 3729 semantics::WithOmpDeclarative::RequiresFlags semaFlags; 3730 if (symbol) { 3731 common::visit( 3732 [&](const auto &details) { 3733 if constexpr (std::is_base_of_v<semantics::WithOmpDeclarative, 3734 std::decay_t<decltype(details)>>) { 3735 if (details.has_ompRequires()) 3736 semaFlags = *details.ompRequires(); 3737 } 3738 }, 3739 symbol->details()); 3740 } 3741 3742 // Use pre-populated omp.requires module attribute if it was set, so that 3743 // the "-fopenmp-force-usm" compiler option is honored. 3744 MlirRequires mlirFlags = offloadMod.getRequires(); 3745 if (semaFlags.test(SemaRequires::ReverseOffload)) 3746 mlirFlags = mlirFlags | MlirRequires::reverse_offload; 3747 if (semaFlags.test(SemaRequires::UnifiedAddress)) 3748 mlirFlags = mlirFlags | MlirRequires::unified_address; 3749 if (semaFlags.test(SemaRequires::UnifiedSharedMemory)) 3750 mlirFlags = mlirFlags | MlirRequires::unified_shared_memory; 3751 if (semaFlags.test(SemaRequires::DynamicAllocators)) 3752 mlirFlags = mlirFlags | MlirRequires::dynamic_allocators; 3753 3754 offloadMod.setRequires(mlirFlags); 3755 } 3756 } 3757