1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/ASTContext.h" 19 #include "clang/AST/Attr.h" 20 #include "clang/AST/DeclOpenMP.h" 21 #include "clang/AST/OpenMPClause.h" 22 #include "clang/AST/Stmt.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/PrettyStackTrace.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "llvm/ADT/SmallSet.h" 29 #include "llvm/BinaryFormat/Dwarf.h" 30 #include "llvm/Frontend/OpenMP/OMPConstants.h" 31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DebugInfoMetadata.h" 34 #include "llvm/IR/Instructions.h" 35 #include "llvm/IR/IntrinsicInst.h" 36 #include "llvm/IR/Metadata.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Debug.h" 39 #include <optional> 40 using namespace clang; 41 using namespace CodeGen; 42 using namespace llvm::omp; 43 44 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" 45 46 static const VarDecl *getBaseDecl(const Expr *Ref); 47 static OpenMPDirectiveKind 48 getEffectiveDirectiveKind(const OMPExecutableDirective &S); 49 50 namespace { 51 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 52 /// for captured expressions. 53 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 54 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 55 for (const auto *C : S.clauses()) { 56 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 57 if (const auto *PreInit = 58 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 59 for (const auto *I : PreInit->decls()) { 60 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 61 CGF.EmitVarDecl(cast<VarDecl>(*I)); 62 } else { 63 CodeGenFunction::AutoVarEmission Emission = 64 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 65 CGF.EmitAutoVarCleanups(Emission); 66 } 67 } 68 } 69 } 70 } 71 } 72 CodeGenFunction::OMPPrivateScope InlinedShareds; 73 74 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 75 return CGF.LambdaCaptureFields.lookup(VD) || 76 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 77 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) && 78 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 79 } 80 81 public: 82 OMPLexicalScope( 83 CodeGenFunction &CGF, const OMPExecutableDirective &S, 84 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, 85 const bool EmitPreInitStmt = true) 86 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 87 InlinedShareds(CGF) { 88 if (EmitPreInitStmt) 89 emitPreInitStmt(CGF, S); 90 if (!CapturedRegion) 91 return; 92 assert(S.hasAssociatedStmt() && 93 "Expected associated statement for inlined directive."); 94 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 95 for (const auto &C : CS->captures()) { 96 if (C.capturesVariable() || C.capturesVariableByCopy()) { 97 auto *VD = C.getCapturedVar(); 98 assert(VD == VD->getCanonicalDecl() && 99 "Canonical decl must be captured."); 100 DeclRefExpr DRE( 101 CGF.getContext(), const_cast<VarDecl *>(VD), 102 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 103 InlinedShareds.isGlobalVarCaptured(VD)), 104 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 105 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 106 } 107 } 108 (void)InlinedShareds.Privatize(); 109 } 110 }; 111 112 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 113 /// for captured expressions. 114 class OMPParallelScope final : public OMPLexicalScope { 115 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 116 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 117 return !(isOpenMPTargetExecutionDirective(EKind) || 118 isOpenMPLoopBoundSharingDirective(EKind)) && 119 isOpenMPParallelDirective(EKind); 120 } 121 122 public: 123 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 124 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, 125 EmitPreInitStmt(S)) {} 126 }; 127 128 /// Lexical scope for OpenMP teams construct, that handles correct codegen 129 /// for captured expressions. 130 class OMPTeamsScope final : public OMPLexicalScope { 131 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 132 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 133 return !isOpenMPTargetExecutionDirective(EKind) && 134 isOpenMPTeamsDirective(EKind); 135 } 136 137 public: 138 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 139 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, 140 EmitPreInitStmt(S)) {} 141 }; 142 143 /// Private scope for OpenMP loop-based directives, that supports capturing 144 /// of used expression from loop statement. 145 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 146 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { 147 const Stmt *PreInits; 148 CodeGenFunction::OMPMapVars PreCondVars; 149 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { 150 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 151 for (const auto *E : LD->counters()) { 152 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 153 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 154 (void)PreCondVars.setVarAddr( 155 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 156 } 157 // Mark private vars as undefs. 158 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { 159 for (const Expr *IRef : C->varlist()) { 160 const auto *OrigVD = 161 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 162 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 163 QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); 164 (void)PreCondVars.setVarAddr( 165 CGF, OrigVD, 166 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( 167 CGF.getContext().getPointerType(OrigVDTy))), 168 CGF.ConvertTypeForMem(OrigVDTy), 169 CGF.getContext().getDeclAlign(OrigVD))); 170 } 171 } 172 } 173 (void)PreCondVars.apply(CGF); 174 // Emit init, __range and __end variables for C++ range loops. 175 (void)OMPLoopBasedDirective::doForAllLoops( 176 LD->getInnermostCapturedStmt()->getCapturedStmt(), 177 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), 178 [&CGF](unsigned Cnt, const Stmt *CurStmt) { 179 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { 180 if (const Stmt *Init = CXXFor->getInit()) 181 CGF.EmitStmt(Init); 182 CGF.EmitStmt(CXXFor->getRangeStmt()); 183 CGF.EmitStmt(CXXFor->getEndStmt()); 184 } 185 return false; 186 }); 187 PreInits = LD->getPreInits(); 188 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { 189 PreInits = Tile->getPreInits(); 190 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { 191 PreInits = Unroll->getPreInits(); 192 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(&S)) { 193 PreInits = Reverse->getPreInits(); 194 } else if (const auto *Interchange = 195 dyn_cast<OMPInterchangeDirective>(&S)) { 196 PreInits = Interchange->getPreInits(); 197 } else { 198 llvm_unreachable("Unknown loop-based directive kind."); 199 } 200 if (PreInits) { 201 // CompoundStmts and DeclStmts are used as lists of PreInit statements and 202 // declarations. Since declarations must be visible in the the following 203 // that they initialize, unpack the CompoundStmt they are nested in. 204 SmallVector<const Stmt *> PreInitStmts; 205 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(PreInits)) 206 llvm::append_range(PreInitStmts, PreInitCompound->body()); 207 else 208 PreInitStmts.push_back(PreInits); 209 210 for (const Stmt *S : PreInitStmts) { 211 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted 212 // here. 213 if (auto *PreInitDecl = dyn_cast<DeclStmt>(S)) { 214 for (Decl *I : PreInitDecl->decls()) 215 CGF.EmitVarDecl(cast<VarDecl>(*I)); 216 continue; 217 } 218 CGF.EmitStmt(S); 219 } 220 } 221 PreCondVars.restore(CGF); 222 } 223 224 public: 225 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) 226 : CodeGenFunction::RunCleanupsScope(CGF) { 227 emitPreInitStmt(CGF, S); 228 } 229 }; 230 231 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 232 CodeGenFunction::OMPPrivateScope InlinedShareds; 233 234 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 235 return CGF.LambdaCaptureFields.lookup(VD) || 236 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 237 (isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl) && 238 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 239 } 240 241 public: 242 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 243 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 244 InlinedShareds(CGF) { 245 for (const auto *C : S.clauses()) { 246 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 247 if (const auto *PreInit = 248 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 249 for (const auto *I : PreInit->decls()) { 250 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 251 CGF.EmitVarDecl(cast<VarDecl>(*I)); 252 } else { 253 CodeGenFunction::AutoVarEmission Emission = 254 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 255 CGF.EmitAutoVarCleanups(Emission); 256 } 257 } 258 } 259 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 260 for (const Expr *E : UDP->varlist()) { 261 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 262 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 263 CGF.EmitVarDecl(*OED); 264 } 265 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { 266 for (const Expr *E : UDP->varlist()) { 267 const Decl *D = getBaseDecl(E); 268 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 269 CGF.EmitVarDecl(*OED); 270 } 271 } 272 } 273 if (!isOpenMPSimdDirective(getEffectiveDirectiveKind(S))) 274 CGF.EmitOMPPrivateClause(S, InlinedShareds); 275 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 276 if (const Expr *E = TG->getReductionRef()) 277 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 278 } 279 // Temp copy arrays for inscan reductions should not be emitted as they are 280 // not used in simd only mode. 281 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; 282 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 283 if (C->getModifier() != OMPC_REDUCTION_inscan) 284 continue; 285 for (const Expr *E : C->copy_array_temps()) 286 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); 287 } 288 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 289 while (CS) { 290 for (auto &C : CS->captures()) { 291 if (C.capturesVariable() || C.capturesVariableByCopy()) { 292 auto *VD = C.getCapturedVar(); 293 if (CopyArrayTemps.contains(VD)) 294 continue; 295 assert(VD == VD->getCanonicalDecl() && 296 "Canonical decl must be captured."); 297 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 298 isCapturedVar(CGF, VD) || 299 (CGF.CapturedStmtInfo && 300 InlinedShareds.isGlobalVarCaptured(VD)), 301 VD->getType().getNonReferenceType(), VK_LValue, 302 C.getLocation()); 303 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 304 } 305 } 306 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 307 } 308 (void)InlinedShareds.Privatize(); 309 } 310 }; 311 312 } // namespace 313 314 // The loop directive with a bind clause will be mapped to a different 315 // directive with corresponding semantics. 316 static OpenMPDirectiveKind 317 getEffectiveDirectiveKind(const OMPExecutableDirective &S) { 318 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 319 if (Kind != OMPD_loop) 320 return Kind; 321 322 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; 323 if (const auto *C = S.getSingleClause<OMPBindClause>()) 324 BindKind = C->getBindKind(); 325 326 switch (BindKind) { 327 case OMPC_BIND_parallel: 328 return OMPD_for; 329 case OMPC_BIND_teams: 330 return OMPD_distribute; 331 case OMPC_BIND_thread: 332 return OMPD_simd; 333 default: 334 return OMPD_loop; 335 } 336 } 337 338 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 339 const OMPExecutableDirective &S, 340 const RegionCodeGenTy &CodeGen); 341 342 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 343 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 344 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 345 OrigVD = OrigVD->getCanonicalDecl(); 346 bool IsCaptured = 347 LambdaCaptureFields.lookup(OrigVD) || 348 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 349 (isa_and_nonnull<BlockDecl>(CurCodeDecl)); 350 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 351 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 352 return EmitLValue(&DRE); 353 } 354 } 355 return EmitLValue(E); 356 } 357 358 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 359 ASTContext &C = getContext(); 360 llvm::Value *Size = nullptr; 361 auto SizeInChars = C.getTypeSizeInChars(Ty); 362 if (SizeInChars.isZero()) { 363 // getTypeSizeInChars() returns 0 for a VLA. 364 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 365 VlaSizePair VlaSize = getVLASize(VAT); 366 Ty = VlaSize.Type; 367 Size = 368 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; 369 } 370 SizeInChars = C.getTypeSizeInChars(Ty); 371 if (SizeInChars.isZero()) 372 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 373 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 374 } 375 return CGM.getSize(SizeInChars); 376 } 377 378 void CodeGenFunction::GenerateOpenMPCapturedVars( 379 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 380 const RecordDecl *RD = S.getCapturedRecordDecl(); 381 auto CurField = RD->field_begin(); 382 auto CurCap = S.captures().begin(); 383 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 384 E = S.capture_init_end(); 385 I != E; ++I, ++CurField, ++CurCap) { 386 if (CurField->hasCapturedVLAType()) { 387 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 388 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 389 CapturedVars.push_back(Val); 390 } else if (CurCap->capturesThis()) { 391 CapturedVars.push_back(CXXThisValue); 392 } else if (CurCap->capturesVariableByCopy()) { 393 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 394 395 // If the field is not a pointer, we need to save the actual value 396 // and load it as a void pointer. 397 if (!CurField->getType()->isAnyPointerType()) { 398 ASTContext &Ctx = getContext(); 399 Address DstAddr = CreateMemTemp( 400 Ctx.getUIntPtrType(), 401 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 402 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 403 404 llvm::Value *SrcAddrVal = EmitScalarConversion( 405 DstAddr.emitRawPointer(*this), 406 Ctx.getPointerType(Ctx.getUIntPtrType()), 407 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 408 LValue SrcLV = 409 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 410 411 // Store the value using the source type pointer. 412 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 413 414 // Load the value using the destination type pointer. 415 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 416 } 417 CapturedVars.push_back(CV); 418 } else { 419 assert(CurCap->capturesVariable() && "Expected capture by reference."); 420 CapturedVars.push_back(EmitLValue(*I).getAddress().emitRawPointer(*this)); 421 } 422 } 423 } 424 425 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 426 QualType DstType, StringRef Name, 427 LValue AddrLV) { 428 ASTContext &Ctx = CGF.getContext(); 429 430 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 431 AddrLV.getAddress().emitRawPointer(CGF), Ctx.getUIntPtrType(), 432 Ctx.getPointerType(DstType), Loc); 433 // FIXME: should the pointee type (DstType) be passed? 434 Address TmpAddr = 435 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(); 436 return TmpAddr; 437 } 438 439 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 440 if (T->isLValueReferenceType()) 441 return C.getLValueReferenceType( 442 getCanonicalParamType(C, T.getNonReferenceType()), 443 /*SpelledAsLValue=*/false); 444 if (T->isPointerType()) 445 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 446 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 447 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 448 return getCanonicalParamType(C, VLA->getElementType()); 449 if (!A->isVariablyModifiedType()) 450 return C.getCanonicalType(T); 451 } 452 return C.getCanonicalParamType(T); 453 } 454 455 namespace { 456 /// Contains required data for proper outlined function codegen. 457 struct FunctionOptions { 458 /// Captured statement for which the function is generated. 459 const CapturedStmt *S = nullptr; 460 /// true if cast to/from UIntPtr is required for variables captured by 461 /// value. 462 const bool UIntPtrCastRequired = true; 463 /// true if only casted arguments must be registered as local args or VLA 464 /// sizes. 465 const bool RegisterCastedArgsOnly = false; 466 /// Name of the generated function. 467 const StringRef FunctionName; 468 /// Location of the non-debug version of the outlined function. 469 SourceLocation Loc; 470 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 471 bool RegisterCastedArgsOnly, StringRef FunctionName, 472 SourceLocation Loc) 473 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 474 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 475 FunctionName(FunctionName), Loc(Loc) {} 476 }; 477 } // namespace 478 479 static llvm::Function *emitOutlinedFunctionPrologue( 480 CodeGenFunction &CGF, FunctionArgList &Args, 481 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 482 &LocalAddrs, 483 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 484 &VLASizes, 485 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 486 const CapturedDecl *CD = FO.S->getCapturedDecl(); 487 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 488 assert(CD->hasBody() && "missing CapturedDecl body"); 489 490 CXXThisValue = nullptr; 491 // Build the argument list. 492 CodeGenModule &CGM = CGF.CGM; 493 ASTContext &Ctx = CGM.getContext(); 494 FunctionArgList TargetArgs; 495 Args.append(CD->param_begin(), 496 std::next(CD->param_begin(), CD->getContextParamPosition())); 497 TargetArgs.append( 498 CD->param_begin(), 499 std::next(CD->param_begin(), CD->getContextParamPosition())); 500 auto I = FO.S->captures().begin(); 501 FunctionDecl *DebugFunctionDecl = nullptr; 502 if (!FO.UIntPtrCastRequired) { 503 FunctionProtoType::ExtProtoInfo EPI; 504 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, {}, EPI); 505 DebugFunctionDecl = FunctionDecl::Create( 506 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 507 SourceLocation(), DeclarationName(), FunctionTy, 508 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 509 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, 510 /*hasWrittenPrototype=*/false); 511 } 512 for (const FieldDecl *FD : RD->fields()) { 513 QualType ArgType = FD->getType(); 514 IdentifierInfo *II = nullptr; 515 VarDecl *CapVar = nullptr; 516 517 // If this is a capture by copy and the type is not a pointer, the outlined 518 // function argument type should be uintptr and the value properly casted to 519 // uintptr. This is necessary given that the runtime library is only able to 520 // deal with pointers. We can pass in the same way the VLA type sizes to the 521 // outlined function. 522 if (FO.UIntPtrCastRequired && 523 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 524 I->capturesVariableArrayType())) 525 ArgType = Ctx.getUIntPtrType(); 526 527 if (I->capturesVariable() || I->capturesVariableByCopy()) { 528 CapVar = I->getCapturedVar(); 529 II = CapVar->getIdentifier(); 530 } else if (I->capturesThis()) { 531 II = &Ctx.Idents.get("this"); 532 } else { 533 assert(I->capturesVariableArrayType()); 534 II = &Ctx.Idents.get("vla"); 535 } 536 if (ArgType->isVariablyModifiedType()) 537 ArgType = getCanonicalParamType(Ctx, ArgType); 538 VarDecl *Arg; 539 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { 540 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 541 II, ArgType, 542 ImplicitParamKind::ThreadPrivateVar); 543 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 544 Arg = ParmVarDecl::Create( 545 Ctx, DebugFunctionDecl, 546 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 547 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 548 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 549 } else { 550 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 551 II, ArgType, ImplicitParamKind::Other); 552 } 553 Args.emplace_back(Arg); 554 // Do not cast arguments if we emit function with non-original types. 555 TargetArgs.emplace_back( 556 FO.UIntPtrCastRequired 557 ? Arg 558 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 559 ++I; 560 } 561 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 562 CD->param_end()); 563 TargetArgs.append( 564 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 565 CD->param_end()); 566 567 // Create the function declaration. 568 const CGFunctionInfo &FuncInfo = 569 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 570 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 571 572 auto *F = 573 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 574 FO.FunctionName, &CGM.getModule()); 575 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 576 if (CD->isNothrow()) 577 F->setDoesNotThrow(); 578 F->setDoesNotRecurse(); 579 580 // Always inline the outlined function if optimizations are enabled. 581 if (CGM.getCodeGenOpts().OptimizationLevel != 0) { 582 F->removeFnAttr(llvm::Attribute::NoInline); 583 F->addFnAttr(llvm::Attribute::AlwaysInline); 584 } 585 586 // Generate the function. 587 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 588 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), 589 FO.UIntPtrCastRequired ? FO.Loc 590 : CD->getBody()->getBeginLoc()); 591 unsigned Cnt = CD->getContextParamPosition(); 592 I = FO.S->captures().begin(); 593 for (const FieldDecl *FD : RD->fields()) { 594 // Do not map arguments if we emit function with non-original types. 595 Address LocalAddr(Address::invalid()); 596 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 597 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 598 TargetArgs[Cnt]); 599 } else { 600 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 601 } 602 // If we are capturing a pointer by copy we don't need to do anything, just 603 // use the value that we get from the arguments. 604 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 605 const VarDecl *CurVD = I->getCapturedVar(); 606 if (!FO.RegisterCastedArgsOnly) 607 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 608 ++Cnt; 609 ++I; 610 continue; 611 } 612 613 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 614 AlignmentSource::Decl); 615 if (FD->hasCapturedVLAType()) { 616 if (FO.UIntPtrCastRequired) { 617 ArgLVal = CGF.MakeAddrLValue( 618 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 619 Args[Cnt]->getName(), ArgLVal), 620 FD->getType(), AlignmentSource::Decl); 621 } 622 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 623 const VariableArrayType *VAT = FD->getCapturedVLAType(); 624 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 625 } else if (I->capturesVariable()) { 626 const VarDecl *Var = I->getCapturedVar(); 627 QualType VarTy = Var->getType(); 628 Address ArgAddr = ArgLVal.getAddress(); 629 if (ArgLVal.getType()->isLValueReferenceType()) { 630 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 631 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 632 assert(ArgLVal.getType()->isPointerType()); 633 ArgAddr = CGF.EmitLoadOfPointer( 634 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 635 } 636 if (!FO.RegisterCastedArgsOnly) { 637 LocalAddrs.insert( 638 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}}); 639 } 640 } else if (I->capturesVariableByCopy()) { 641 assert(!FD->getType()->isAnyPointerType() && 642 "Not expecting a captured pointer."); 643 const VarDecl *Var = I->getCapturedVar(); 644 LocalAddrs.insert({Args[Cnt], 645 {Var, FO.UIntPtrCastRequired 646 ? castValueFromUintptr( 647 CGF, I->getLocation(), FD->getType(), 648 Args[Cnt]->getName(), ArgLVal) 649 : ArgLVal.getAddress()}}); 650 } else { 651 // If 'this' is captured, load it into CXXThisValue. 652 assert(I->capturesThis()); 653 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 654 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 655 } 656 ++Cnt; 657 ++I; 658 } 659 660 return F; 661 } 662 663 llvm::Function * 664 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 665 SourceLocation Loc) { 666 assert( 667 CapturedStmtInfo && 668 "CapturedStmtInfo should be set when generating the captured function"); 669 const CapturedDecl *CD = S.getCapturedDecl(); 670 // Build the argument list. 671 bool NeedWrapperFunction = 672 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); 673 FunctionArgList Args, WrapperArgs; 674 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs, 675 WrapperLocalAddrs; 676 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes, 677 WrapperVLASizes; 678 SmallString<256> Buffer; 679 llvm::raw_svector_ostream Out(Buffer); 680 Out << CapturedStmtInfo->getHelperName(); 681 682 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 683 llvm::Function *WrapperF = nullptr; 684 if (NeedWrapperFunction) { 685 // Emit the final kernel early to allow attributes to be added by the 686 // OpenMPI-IR-Builder. 687 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 688 /*RegisterCastedArgsOnly=*/true, 689 CapturedStmtInfo->getHelperName(), Loc); 690 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 691 WrapperF = 692 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 693 WrapperCGF.CXXThisValue, WrapperFO); 694 Out << "_debug__"; 695 } 696 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 697 Out.str(), Loc); 698 llvm::Function *F = emitOutlinedFunctionPrologue( 699 *this, WrapperArgs, WrapperLocalAddrs, WrapperVLASizes, CXXThisValue, FO); 700 CodeGenFunction::OMPPrivateScope LocalScope(*this); 701 for (const auto &LocalAddrPair : WrapperLocalAddrs) { 702 if (LocalAddrPair.second.first) { 703 LocalScope.addPrivate(LocalAddrPair.second.first, 704 LocalAddrPair.second.second); 705 } 706 } 707 (void)LocalScope.Privatize(); 708 for (const auto &VLASizePair : WrapperVLASizes) 709 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 710 PGO.assignRegionCounters(GlobalDecl(CD), F); 711 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 712 (void)LocalScope.ForceCleanup(); 713 FinishFunction(CD->getBodyRBrace()); 714 if (!NeedWrapperFunction) 715 return F; 716 717 // Reverse the order. 718 WrapperF->removeFromParent(); 719 F->getParent()->getFunctionList().insertAfter(F->getIterator(), WrapperF); 720 721 llvm::SmallVector<llvm::Value *, 4> CallArgs; 722 auto *PI = F->arg_begin(); 723 for (const auto *Arg : Args) { 724 llvm::Value *CallArg; 725 auto I = LocalAddrs.find(Arg); 726 if (I != LocalAddrs.end()) { 727 LValue LV = WrapperCGF.MakeAddrLValue( 728 I->second.second, 729 I->second.first ? I->second.first->getType() : Arg->getType(), 730 AlignmentSource::Decl); 731 if (LV.getType()->isAnyComplexType()) 732 LV.setAddress(LV.getAddress().withElementType(PI->getType())); 733 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 734 } else { 735 auto EI = VLASizes.find(Arg); 736 if (EI != VLASizes.end()) { 737 CallArg = EI->second.second; 738 } else { 739 LValue LV = 740 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 741 Arg->getType(), AlignmentSource::Decl); 742 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 743 } 744 } 745 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 746 ++PI; 747 } 748 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 749 WrapperCGF.FinishFunction(); 750 return WrapperF; 751 } 752 753 //===----------------------------------------------------------------------===// 754 // OpenMP Directive Emission 755 //===----------------------------------------------------------------------===// 756 void CodeGenFunction::EmitOMPAggregateAssign( 757 Address DestAddr, Address SrcAddr, QualType OriginalType, 758 const llvm::function_ref<void(Address, Address)> CopyGen) { 759 // Perform element-by-element initialization. 760 QualType ElementTy; 761 762 // Drill down to the base element type on both arrays. 763 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 764 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 765 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 766 767 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(*this); 768 llvm::Value *DestBegin = DestAddr.emitRawPointer(*this); 769 // Cast from pointer to array type to pointer to single element. 770 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(), 771 DestBegin, NumElements); 772 773 // The basic structure here is a while-do loop. 774 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 775 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 776 llvm::Value *IsEmpty = 777 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 778 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 779 780 // Enter the loop body, making that address the current address. 781 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 782 EmitBlock(BodyBB); 783 784 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 785 786 llvm::PHINode *SrcElementPHI = 787 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 788 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 789 Address SrcElementCurrent = 790 Address(SrcElementPHI, SrcAddr.getElementType(), 791 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 792 793 llvm::PHINode *DestElementPHI = Builder.CreatePHI( 794 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 795 DestElementPHI->addIncoming(DestBegin, EntryBB); 796 Address DestElementCurrent = 797 Address(DestElementPHI, DestAddr.getElementType(), 798 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 799 800 // Emit copy. 801 CopyGen(DestElementCurrent, SrcElementCurrent); 802 803 // Shift the address forward by one element. 804 llvm::Value *DestElementNext = 805 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, 806 /*Idx0=*/1, "omp.arraycpy.dest.element"); 807 llvm::Value *SrcElementNext = 808 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, 809 /*Idx0=*/1, "omp.arraycpy.src.element"); 810 // Check whether we've reached the end. 811 llvm::Value *Done = 812 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 813 Builder.CreateCondBr(Done, DoneBB, BodyBB); 814 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 815 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 816 817 // Done. 818 EmitBlock(DoneBB, /*IsFinished=*/true); 819 } 820 821 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 822 Address SrcAddr, const VarDecl *DestVD, 823 const VarDecl *SrcVD, const Expr *Copy) { 824 if (OriginalType->isArrayType()) { 825 const auto *BO = dyn_cast<BinaryOperator>(Copy); 826 if (BO && BO->getOpcode() == BO_Assign) { 827 // Perform simple memcpy for simple copying. 828 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 829 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 830 EmitAggregateAssign(Dest, Src, OriginalType); 831 } else { 832 // For arrays with complex element types perform element by element 833 // copying. 834 EmitOMPAggregateAssign( 835 DestAddr, SrcAddr, OriginalType, 836 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 837 // Working with the single array element, so have to remap 838 // destination and source variables to corresponding array 839 // elements. 840 CodeGenFunction::OMPPrivateScope Remap(*this); 841 Remap.addPrivate(DestVD, DestElement); 842 Remap.addPrivate(SrcVD, SrcElement); 843 (void)Remap.Privatize(); 844 EmitIgnoredExpr(Copy); 845 }); 846 } 847 } else { 848 // Remap pseudo source variable to private copy. 849 CodeGenFunction::OMPPrivateScope Remap(*this); 850 Remap.addPrivate(SrcVD, SrcAddr); 851 Remap.addPrivate(DestVD, DestAddr); 852 (void)Remap.Privatize(); 853 // Emit copying of the whole variable. 854 EmitIgnoredExpr(Copy); 855 } 856 } 857 858 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 859 OMPPrivateScope &PrivateScope) { 860 if (!HaveInsertPoint()) 861 return false; 862 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 863 bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice && 864 isOpenMPTargetExecutionDirective(EKind); 865 bool FirstprivateIsLastprivate = false; 866 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; 867 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 868 for (const auto *D : C->varlist()) 869 Lastprivates.try_emplace( 870 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), 871 C->getKind()); 872 } 873 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 874 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 875 getOpenMPCaptureRegions(CaptureRegions, EKind); 876 // Force emission of the firstprivate copy if the directive does not emit 877 // outlined function, like omp for, omp simd, omp distribute etc. 878 bool MustEmitFirstprivateCopy = 879 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 880 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 881 const auto *IRef = C->varlist_begin(); 882 const auto *InitsRef = C->inits().begin(); 883 for (const Expr *IInit : C->private_copies()) { 884 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 885 bool ThisFirstprivateIsLastprivate = 886 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 887 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 888 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 889 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 890 !FD->getType()->isReferenceType() && 891 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 892 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 893 ++IRef; 894 ++InitsRef; 895 continue; 896 } 897 // Do not emit copy for firstprivate constant variables in target regions, 898 // captured by reference. 899 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 900 FD && FD->getType()->isReferenceType() && 901 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 902 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 903 ++IRef; 904 ++InitsRef; 905 continue; 906 } 907 FirstprivateIsLastprivate = 908 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 909 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 910 const auto *VDInit = 911 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 912 bool IsRegistered; 913 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 914 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 915 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 916 LValue OriginalLVal; 917 if (!FD) { 918 // Check if the firstprivate variable is just a constant value. 919 ConstantEmission CE = tryEmitAsConstant(&DRE); 920 if (CE && !CE.isReference()) { 921 // Constant value, no need to create a copy. 922 ++IRef; 923 ++InitsRef; 924 continue; 925 } 926 if (CE && CE.isReference()) { 927 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 928 } else { 929 assert(!CE && "Expected non-constant firstprivate."); 930 OriginalLVal = EmitLValue(&DRE); 931 } 932 } else { 933 OriginalLVal = EmitLValue(&DRE); 934 } 935 QualType Type = VD->getType(); 936 if (Type->isArrayType()) { 937 // Emit VarDecl with copy init for arrays. 938 // Get the address of the original variable captured in current 939 // captured region. 940 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 941 const Expr *Init = VD->getInit(); 942 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { 943 // Perform simple memcpy. 944 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type); 945 EmitAggregateAssign(Dest, OriginalLVal, Type); 946 } else { 947 EmitOMPAggregateAssign( 948 Emission.getAllocatedAddress(), OriginalLVal.getAddress(), Type, 949 [this, VDInit, Init](Address DestElement, Address SrcElement) { 950 // Clean up any temporaries needed by the 951 // initialization. 952 RunCleanupsScope InitScope(*this); 953 // Emit initialization for single element. 954 setAddrOfLocalVar(VDInit, SrcElement); 955 EmitAnyExprToMem(Init, DestElement, 956 Init->getType().getQualifiers(), 957 /*IsInitializer*/ false); 958 LocalDeclMap.erase(VDInit); 959 }); 960 } 961 EmitAutoVarCleanups(Emission); 962 IsRegistered = 963 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress()); 964 } else { 965 Address OriginalAddr = OriginalLVal.getAddress(); 966 // Emit private VarDecl with copy init. 967 // Remap temp VDInit variable to the address of the original 968 // variable (for proper handling of captured global variables). 969 setAddrOfLocalVar(VDInit, OriginalAddr); 970 EmitDecl(*VD); 971 LocalDeclMap.erase(VDInit); 972 Address VDAddr = GetAddrOfLocalVar(VD); 973 if (ThisFirstprivateIsLastprivate && 974 Lastprivates[OrigVD->getCanonicalDecl()] == 975 OMPC_LASTPRIVATE_conditional) { 976 // Create/init special variable for lastprivate conditionals. 977 llvm::Value *V = 978 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(), 979 AlignmentSource::Decl), 980 (*IRef)->getExprLoc()); 981 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 982 *this, OrigVD); 983 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(), 984 AlignmentSource::Decl)); 985 LocalDeclMap.erase(VD); 986 setAddrOfLocalVar(VD, VDAddr); 987 } 988 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 989 } 990 assert(IsRegistered && 991 "firstprivate var already registered as private"); 992 // Silence the warning about unused variable. 993 (void)IsRegistered; 994 } 995 ++IRef; 996 ++InitsRef; 997 } 998 } 999 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 1000 } 1001 1002 void CodeGenFunction::EmitOMPPrivateClause( 1003 const OMPExecutableDirective &D, 1004 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1005 if (!HaveInsertPoint()) 1006 return; 1007 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 1008 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 1009 auto IRef = C->varlist_begin(); 1010 for (const Expr *IInit : C->private_copies()) { 1011 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1012 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 1013 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1014 EmitDecl(*VD); 1015 // Emit private VarDecl with copy init. 1016 bool IsRegistered = 1017 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD)); 1018 assert(IsRegistered && "private var already registered as private"); 1019 // Silence the warning about unused variable. 1020 (void)IsRegistered; 1021 } 1022 ++IRef; 1023 } 1024 } 1025 } 1026 1027 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 1028 if (!HaveInsertPoint()) 1029 return false; 1030 // threadprivate_var1 = master_threadprivate_var1; 1031 // operator=(threadprivate_var2, master_threadprivate_var2); 1032 // ... 1033 // __kmpc_barrier(&loc, global_tid); 1034 llvm::DenseSet<const VarDecl *> CopiedVars; 1035 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 1036 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 1037 auto IRef = C->varlist_begin(); 1038 auto ISrcRef = C->source_exprs().begin(); 1039 auto IDestRef = C->destination_exprs().begin(); 1040 for (const Expr *AssignOp : C->assignment_ops()) { 1041 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1042 QualType Type = VD->getType(); 1043 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 1044 // Get the address of the master variable. If we are emitting code with 1045 // TLS support, the address is passed from the master as field in the 1046 // captured declaration. 1047 Address MasterAddr = Address::invalid(); 1048 if (getLangOpts().OpenMPUseTLS && 1049 getContext().getTargetInfo().isTLSSupported()) { 1050 assert(CapturedStmtInfo->lookup(VD) && 1051 "Copyin threadprivates should have been captured!"); 1052 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 1053 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1054 MasterAddr = EmitLValue(&DRE).getAddress(); 1055 LocalDeclMap.erase(VD); 1056 } else { 1057 MasterAddr = 1058 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 1059 : CGM.GetAddrOfGlobal(VD), 1060 CGM.getTypes().ConvertTypeForMem(VD->getType()), 1061 getContext().getDeclAlign(VD)); 1062 } 1063 // Get the address of the threadprivate variable. 1064 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 1065 if (CopiedVars.size() == 1) { 1066 // At first check if current thread is a master thread. If it is, no 1067 // need to copy data. 1068 CopyBegin = createBasicBlock("copyin.not.master"); 1069 CopyEnd = createBasicBlock("copyin.not.master.end"); 1070 // TODO: Avoid ptrtoint conversion. 1071 auto *MasterAddrInt = Builder.CreatePtrToInt( 1072 MasterAddr.emitRawPointer(*this), CGM.IntPtrTy); 1073 auto *PrivateAddrInt = Builder.CreatePtrToInt( 1074 PrivateAddr.emitRawPointer(*this), CGM.IntPtrTy); 1075 Builder.CreateCondBr( 1076 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, 1077 CopyEnd); 1078 EmitBlock(CopyBegin); 1079 } 1080 const auto *SrcVD = 1081 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1082 const auto *DestVD = 1083 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1084 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 1085 } 1086 ++IRef; 1087 ++ISrcRef; 1088 ++IDestRef; 1089 } 1090 } 1091 if (CopyEnd) { 1092 // Exit out of copying procedure for non-master thread. 1093 EmitBlock(CopyEnd, /*IsFinished=*/true); 1094 return true; 1095 } 1096 return false; 1097 } 1098 1099 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 1100 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 1101 if (!HaveInsertPoint()) 1102 return false; 1103 bool HasAtLeastOneLastprivate = false; 1104 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 1105 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1106 if (isOpenMPSimdDirective(EKind)) { 1107 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1108 for (const Expr *C : LoopDirective->counters()) { 1109 SIMDLCVs.insert( 1110 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1111 } 1112 } 1113 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1114 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1115 HasAtLeastOneLastprivate = true; 1116 if (isOpenMPTaskLoopDirective(EKind) && !getLangOpts().OpenMPSimd) 1117 break; 1118 const auto *IRef = C->varlist_begin(); 1119 const auto *IDestRef = C->destination_exprs().begin(); 1120 for (const Expr *IInit : C->private_copies()) { 1121 // Keep the address of the original variable for future update at the end 1122 // of the loop. 1123 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1124 // Taskloops do not require additional initialization, it is done in 1125 // runtime support library. 1126 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 1127 const auto *DestVD = 1128 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1129 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1130 /*RefersToEnclosingVariableOrCapture=*/ 1131 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1132 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1133 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress()); 1134 // Check if the variable is also a firstprivate: in this case IInit is 1135 // not generated. Initialization of this variable will happen in codegen 1136 // for 'firstprivate' clause. 1137 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1138 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1139 Address VDAddr = Address::invalid(); 1140 if (C->getKind() == OMPC_LASTPRIVATE_conditional) { 1141 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( 1142 *this, OrigVD); 1143 setAddrOfLocalVar(VD, VDAddr); 1144 } else { 1145 // Emit private VarDecl with copy init. 1146 EmitDecl(*VD); 1147 VDAddr = GetAddrOfLocalVar(VD); 1148 } 1149 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); 1150 assert(IsRegistered && 1151 "lastprivate var already registered as private"); 1152 (void)IsRegistered; 1153 } 1154 } 1155 ++IRef; 1156 ++IDestRef; 1157 } 1158 } 1159 return HasAtLeastOneLastprivate; 1160 } 1161 1162 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1163 const OMPExecutableDirective &D, bool NoFinals, 1164 llvm::Value *IsLastIterCond) { 1165 if (!HaveInsertPoint()) 1166 return; 1167 // Emit following code: 1168 // if (<IsLastIterCond>) { 1169 // orig_var1 = private_orig_var1; 1170 // ... 1171 // orig_varn = private_orig_varn; 1172 // } 1173 llvm::BasicBlock *ThenBB = nullptr; 1174 llvm::BasicBlock *DoneBB = nullptr; 1175 if (IsLastIterCond) { 1176 // Emit implicit barrier if at least one lastprivate conditional is found 1177 // and this is not a simd mode. 1178 if (!getLangOpts().OpenMPSimd && 1179 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), 1180 [](const OMPLastprivateClause *C) { 1181 return C->getKind() == OMPC_LASTPRIVATE_conditional; 1182 })) { 1183 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), 1184 OMPD_unknown, 1185 /*EmitChecks=*/false, 1186 /*ForceSimpleCall=*/true); 1187 } 1188 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1189 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1190 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1191 EmitBlock(ThenBB); 1192 } 1193 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1194 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1195 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1196 auto IC = LoopDirective->counters().begin(); 1197 for (const Expr *F : LoopDirective->finals()) { 1198 const auto *D = 1199 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1200 if (NoFinals) 1201 AlreadyEmittedVars.insert(D); 1202 else 1203 LoopCountersAndUpdates[D] = F; 1204 ++IC; 1205 } 1206 } 1207 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1208 auto IRef = C->varlist_begin(); 1209 auto ISrcRef = C->source_exprs().begin(); 1210 auto IDestRef = C->destination_exprs().begin(); 1211 for (const Expr *AssignOp : C->assignment_ops()) { 1212 const auto *PrivateVD = 1213 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1214 QualType Type = PrivateVD->getType(); 1215 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1216 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1217 // If lastprivate variable is a loop control variable for loop-based 1218 // directive, update its value before copyin back to original 1219 // variable. 1220 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1221 EmitIgnoredExpr(FinalExpr); 1222 const auto *SrcVD = 1223 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1224 const auto *DestVD = 1225 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1226 // Get the address of the private variable. 1227 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1228 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1229 PrivateAddr = Address( 1230 Builder.CreateLoad(PrivateAddr), 1231 CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()), 1232 CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); 1233 // Store the last value to the private copy in the last iteration. 1234 if (C->getKind() == OMPC_LASTPRIVATE_conditional) 1235 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( 1236 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, 1237 (*IRef)->getExprLoc()); 1238 // Get the address of the original variable. 1239 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1240 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1241 } 1242 ++IRef; 1243 ++ISrcRef; 1244 ++IDestRef; 1245 } 1246 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1247 EmitIgnoredExpr(PostUpdate); 1248 } 1249 if (IsLastIterCond) 1250 EmitBlock(DoneBB, /*IsFinished=*/true); 1251 } 1252 1253 void CodeGenFunction::EmitOMPReductionClauseInit( 1254 const OMPExecutableDirective &D, 1255 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { 1256 if (!HaveInsertPoint()) 1257 return; 1258 SmallVector<const Expr *, 4> Shareds; 1259 SmallVector<const Expr *, 4> Privates; 1260 SmallVector<const Expr *, 4> ReductionOps; 1261 SmallVector<const Expr *, 4> LHSs; 1262 SmallVector<const Expr *, 4> RHSs; 1263 OMPTaskDataTy Data; 1264 SmallVector<const Expr *, 4> TaskLHSs; 1265 SmallVector<const Expr *, 4> TaskRHSs; 1266 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1267 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) 1268 continue; 1269 Shareds.append(C->varlist_begin(), C->varlist_end()); 1270 Privates.append(C->privates().begin(), C->privates().end()); 1271 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1272 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1273 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1274 if (C->getModifier() == OMPC_REDUCTION_task) { 1275 Data.ReductionVars.append(C->privates().begin(), C->privates().end()); 1276 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 1277 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 1278 Data.ReductionOps.append(C->reduction_ops().begin(), 1279 C->reduction_ops().end()); 1280 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1281 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1282 } 1283 } 1284 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 1285 unsigned Count = 0; 1286 auto *ILHS = LHSs.begin(); 1287 auto *IRHS = RHSs.begin(); 1288 auto *IPriv = Privates.begin(); 1289 for (const Expr *IRef : Shareds) { 1290 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1291 // Emit private VarDecl with reduction init. 1292 RedCG.emitSharedOrigLValue(*this, Count); 1293 RedCG.emitAggregateType(*this, Count); 1294 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1295 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1296 RedCG.getSharedLValue(Count).getAddress(), 1297 [&Emission](CodeGenFunction &CGF) { 1298 CGF.EmitAutoVarInit(Emission); 1299 return true; 1300 }); 1301 EmitAutoVarCleanups(Emission); 1302 Address BaseAddr = RedCG.adjustPrivateAddress( 1303 *this, Count, Emission.getAllocatedAddress()); 1304 bool IsRegistered = 1305 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr); 1306 assert(IsRegistered && "private var already registered as private"); 1307 // Silence the warning about unused variable. 1308 (void)IsRegistered; 1309 1310 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1311 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1312 QualType Type = PrivateVD->getType(); 1313 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(IRef); 1314 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1315 // Store the address of the original variable associated with the LHS 1316 // implicit variable. 1317 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress()); 1318 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD)); 1319 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1320 isa<ArraySubscriptExpr>(IRef)) { 1321 // Store the address of the original variable associated with the LHS 1322 // implicit variable. 1323 PrivateScope.addPrivate(LHSVD, RedCG.getSharedLValue(Count).getAddress()); 1324 PrivateScope.addPrivate(RHSVD, 1325 GetAddrOfLocalVar(PrivateVD).withElementType( 1326 ConvertTypeForMem(RHSVD->getType()))); 1327 } else { 1328 QualType Type = PrivateVD->getType(); 1329 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1330 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1331 // Store the address of the original variable associated with the LHS 1332 // implicit variable. 1333 if (IsArray) { 1334 OriginalAddr = 1335 OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType())); 1336 } 1337 PrivateScope.addPrivate(LHSVD, OriginalAddr); 1338 PrivateScope.addPrivate( 1339 RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType( 1340 ConvertTypeForMem(RHSVD->getType())) 1341 : GetAddrOfLocalVar(PrivateVD)); 1342 } 1343 ++ILHS; 1344 ++IRHS; 1345 ++IPriv; 1346 ++Count; 1347 } 1348 if (!Data.ReductionVars.empty()) { 1349 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 1350 Data.IsReductionWithTaskMod = true; 1351 Data.IsWorksharingReduction = isOpenMPWorksharingDirective(EKind); 1352 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( 1353 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); 1354 const Expr *TaskRedRef = nullptr; 1355 switch (EKind) { 1356 case OMPD_parallel: 1357 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); 1358 break; 1359 case OMPD_for: 1360 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); 1361 break; 1362 case OMPD_sections: 1363 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); 1364 break; 1365 case OMPD_parallel_for: 1366 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); 1367 break; 1368 case OMPD_parallel_master: 1369 TaskRedRef = 1370 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); 1371 break; 1372 case OMPD_parallel_sections: 1373 TaskRedRef = 1374 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); 1375 break; 1376 case OMPD_target_parallel: 1377 TaskRedRef = 1378 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); 1379 break; 1380 case OMPD_target_parallel_for: 1381 TaskRedRef = 1382 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); 1383 break; 1384 case OMPD_distribute_parallel_for: 1385 TaskRedRef = 1386 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); 1387 break; 1388 case OMPD_teams_distribute_parallel_for: 1389 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) 1390 .getTaskReductionRefExpr(); 1391 break; 1392 case OMPD_target_teams_distribute_parallel_for: 1393 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) 1394 .getTaskReductionRefExpr(); 1395 break; 1396 case OMPD_simd: 1397 case OMPD_for_simd: 1398 case OMPD_section: 1399 case OMPD_single: 1400 case OMPD_master: 1401 case OMPD_critical: 1402 case OMPD_parallel_for_simd: 1403 case OMPD_task: 1404 case OMPD_taskyield: 1405 case OMPD_error: 1406 case OMPD_barrier: 1407 case OMPD_taskwait: 1408 case OMPD_taskgroup: 1409 case OMPD_flush: 1410 case OMPD_depobj: 1411 case OMPD_scan: 1412 case OMPD_ordered: 1413 case OMPD_atomic: 1414 case OMPD_teams: 1415 case OMPD_target: 1416 case OMPD_cancellation_point: 1417 case OMPD_cancel: 1418 case OMPD_target_data: 1419 case OMPD_target_enter_data: 1420 case OMPD_target_exit_data: 1421 case OMPD_taskloop: 1422 case OMPD_taskloop_simd: 1423 case OMPD_master_taskloop: 1424 case OMPD_master_taskloop_simd: 1425 case OMPD_parallel_master_taskloop: 1426 case OMPD_parallel_master_taskloop_simd: 1427 case OMPD_distribute: 1428 case OMPD_target_update: 1429 case OMPD_distribute_parallel_for_simd: 1430 case OMPD_distribute_simd: 1431 case OMPD_target_parallel_for_simd: 1432 case OMPD_target_simd: 1433 case OMPD_teams_distribute: 1434 case OMPD_teams_distribute_simd: 1435 case OMPD_teams_distribute_parallel_for_simd: 1436 case OMPD_target_teams: 1437 case OMPD_target_teams_distribute: 1438 case OMPD_target_teams_distribute_parallel_for_simd: 1439 case OMPD_target_teams_distribute_simd: 1440 case OMPD_declare_target: 1441 case OMPD_end_declare_target: 1442 case OMPD_threadprivate: 1443 case OMPD_allocate: 1444 case OMPD_declare_reduction: 1445 case OMPD_declare_mapper: 1446 case OMPD_declare_simd: 1447 case OMPD_requires: 1448 case OMPD_declare_variant: 1449 case OMPD_begin_declare_variant: 1450 case OMPD_end_declare_variant: 1451 case OMPD_unknown: 1452 default: 1453 llvm_unreachable("Unexpected directive with task reductions."); 1454 } 1455 1456 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1457 EmitVarDecl(*VD); 1458 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), 1459 /*Volatile=*/false, TaskRedRef->getType()); 1460 } 1461 } 1462 1463 void CodeGenFunction::EmitOMPReductionClauseFinal( 1464 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1465 if (!HaveInsertPoint()) 1466 return; 1467 llvm::SmallVector<const Expr *, 8> Privates; 1468 llvm::SmallVector<const Expr *, 8> LHSExprs; 1469 llvm::SmallVector<const Expr *, 8> RHSExprs; 1470 llvm::SmallVector<const Expr *, 8> ReductionOps; 1471 bool HasAtLeastOneReduction = false; 1472 bool IsReductionWithTaskMod = false; 1473 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1474 // Do not emit for inscan reductions. 1475 if (C->getModifier() == OMPC_REDUCTION_inscan) 1476 continue; 1477 HasAtLeastOneReduction = true; 1478 Privates.append(C->privates().begin(), C->privates().end()); 1479 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1480 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1481 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1482 IsReductionWithTaskMod = 1483 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; 1484 } 1485 if (HasAtLeastOneReduction) { 1486 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 1487 if (IsReductionWithTaskMod) { 1488 CGM.getOpenMPRuntime().emitTaskReductionFini( 1489 *this, D.getBeginLoc(), isOpenMPWorksharingDirective(EKind)); 1490 } 1491 bool TeamsLoopCanBeParallel = false; 1492 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D)) 1493 TeamsLoopCanBeParallel = TTLD->canBeParallelFor(); 1494 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1495 isOpenMPParallelDirective(EKind) || 1496 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd; 1497 bool SimpleReduction = ReductionKind == OMPD_simd; 1498 // Emit nowait reduction if nowait clause is present or directive is a 1499 // parallel directive (it always has implicit barrier). 1500 CGM.getOpenMPRuntime().emitReduction( 1501 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1502 {WithNowait, SimpleReduction, ReductionKind}); 1503 } 1504 } 1505 1506 static void emitPostUpdateForReductionClause( 1507 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1508 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1509 if (!CGF.HaveInsertPoint()) 1510 return; 1511 llvm::BasicBlock *DoneBB = nullptr; 1512 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1513 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1514 if (!DoneBB) { 1515 if (llvm::Value *Cond = CondGen(CGF)) { 1516 // If the first post-update expression is found, emit conditional 1517 // block if it was requested. 1518 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1519 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1520 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1521 CGF.EmitBlock(ThenBB); 1522 } 1523 } 1524 CGF.EmitIgnoredExpr(PostUpdate); 1525 } 1526 } 1527 if (DoneBB) 1528 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1529 } 1530 1531 namespace { 1532 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1533 /// parallel function. This is necessary for combined constructs such as 1534 /// 'distribute parallel for' 1535 typedef llvm::function_ref<void(CodeGenFunction &, 1536 const OMPExecutableDirective &, 1537 llvm::SmallVectorImpl<llvm::Value *> &)> 1538 CodeGenBoundParametersTy; 1539 } // anonymous namespace 1540 1541 static void 1542 checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, 1543 const OMPExecutableDirective &S) { 1544 if (CGF.getLangOpts().OpenMP < 50) 1545 return; 1546 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; 1547 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 1548 for (const Expr *Ref : C->varlist()) { 1549 if (!Ref->getType()->isScalarType()) 1550 continue; 1551 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1552 if (!DRE) 1553 continue; 1554 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1555 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1556 } 1557 } 1558 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 1559 for (const Expr *Ref : C->varlist()) { 1560 if (!Ref->getType()->isScalarType()) 1561 continue; 1562 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1563 if (!DRE) 1564 continue; 1565 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1566 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1567 } 1568 } 1569 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 1570 for (const Expr *Ref : C->varlist()) { 1571 if (!Ref->getType()->isScalarType()) 1572 continue; 1573 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1574 if (!DRE) 1575 continue; 1576 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1577 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); 1578 } 1579 } 1580 // Privates should ne analyzed since they are not captured at all. 1581 // Task reductions may be skipped - tasks are ignored. 1582 // Firstprivates do not return value but may be passed by reference - no need 1583 // to check for updated lastprivate conditional. 1584 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 1585 for (const Expr *Ref : C->varlist()) { 1586 if (!Ref->getType()->isScalarType()) 1587 continue; 1588 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 1589 if (!DRE) 1590 continue; 1591 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); 1592 } 1593 } 1594 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( 1595 CGF, S, PrivateDecls); 1596 } 1597 1598 static void emitCommonOMPParallelDirective( 1599 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1600 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1601 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1602 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1603 llvm::Value *NumThreads = nullptr; 1604 llvm::Function *OutlinedFn = 1605 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1606 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, 1607 CodeGen); 1608 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1609 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1610 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1611 /*IgnoreResultAssign=*/true); 1612 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1613 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1614 } 1615 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1616 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1617 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1618 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1619 } 1620 const Expr *IfCond = nullptr; 1621 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1622 if (C->getNameModifier() == OMPD_unknown || 1623 C->getNameModifier() == OMPD_parallel) { 1624 IfCond = C->getCondition(); 1625 break; 1626 } 1627 } 1628 1629 OMPParallelScope Scope(CGF, S); 1630 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1631 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1632 // lower and upper bounds with the pragma 'for' chunking mechanism. 1633 // The following lambda takes care of appending the lower and upper bound 1634 // parameters when necessary 1635 CodeGenBoundParameters(CGF, S, CapturedVars); 1636 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1637 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1638 CapturedVars, IfCond, NumThreads); 1639 } 1640 1641 static bool isAllocatableDecl(const VarDecl *VD) { 1642 const VarDecl *CVD = VD->getCanonicalDecl(); 1643 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 1644 return false; 1645 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1646 // Use the default allocation. 1647 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 1648 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 1649 !AA->getAllocator()); 1650 } 1651 1652 static void emitEmptyBoundParameters(CodeGenFunction &, 1653 const OMPExecutableDirective &, 1654 llvm::SmallVectorImpl<llvm::Value *> &) {} 1655 1656 static void emitOMPCopyinClause(CodeGenFunction &CGF, 1657 const OMPExecutableDirective &S) { 1658 bool Copyins = CGF.EmitOMPCopyinClause(S); 1659 if (Copyins) { 1660 // Emit implicit barrier to synchronize threads and avoid data races on 1661 // propagation master's thread values of threadprivate variables to local 1662 // instances of that variables of all other implicit threads. 1663 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1664 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1665 /*ForceSimpleCall=*/true); 1666 } 1667 } 1668 1669 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( 1670 CodeGenFunction &CGF, const VarDecl *VD) { 1671 CodeGenModule &CGM = CGF.CGM; 1672 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1673 1674 if (!VD) 1675 return Address::invalid(); 1676 const VarDecl *CVD = VD->getCanonicalDecl(); 1677 if (!isAllocatableDecl(CVD)) 1678 return Address::invalid(); 1679 llvm::Value *Size; 1680 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 1681 if (CVD->getType()->isVariablyModifiedType()) { 1682 Size = CGF.getTypeSize(CVD->getType()); 1683 // Align the size: ((size + align - 1) / align) * align 1684 Size = CGF.Builder.CreateNUWAdd( 1685 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 1686 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 1687 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 1688 } else { 1689 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 1690 Size = CGM.getSize(Sz.alignTo(Align)); 1691 } 1692 1693 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 1694 assert(AA->getAllocator() && 1695 "Expected allocator expression for non-default allocator."); 1696 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 1697 // According to the standard, the original allocator type is a enum (integer). 1698 // Convert to pointer type, if required. 1699 if (Allocator->getType()->isIntegerTy()) 1700 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 1701 else if (Allocator->getType()->isPointerTy()) 1702 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 1703 CGM.VoidPtrTy); 1704 1705 llvm::Value *Addr = OMPBuilder.createOMPAlloc( 1706 CGF.Builder, Size, Allocator, 1707 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); 1708 llvm::CallInst *FreeCI = 1709 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); 1710 1711 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); 1712 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1713 Addr, 1714 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 1715 getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); 1716 return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 1717 } 1718 1719 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( 1720 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, 1721 SourceLocation Loc) { 1722 CodeGenModule &CGM = CGF.CGM; 1723 if (CGM.getLangOpts().OpenMPUseTLS && 1724 CGM.getContext().getTargetInfo().isTLSSupported()) 1725 return VDAddr; 1726 1727 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1728 1729 llvm::Type *VarTy = VDAddr.getElementType(); 1730 llvm::Value *Data = 1731 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy); 1732 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); 1733 std::string Suffix = getNameWithSeparators({"cache", ""}); 1734 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); 1735 1736 llvm::CallInst *ThreadPrivateCacheCall = 1737 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); 1738 1739 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); 1740 } 1741 1742 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( 1743 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { 1744 SmallString<128> Buffer; 1745 llvm::raw_svector_ostream OS(Buffer); 1746 StringRef Sep = FirstSeparator; 1747 for (StringRef Part : Parts) { 1748 OS << Sep << Part; 1749 Sep = Separator; 1750 } 1751 return OS.str().str(); 1752 } 1753 1754 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 1755 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, 1756 InsertPointTy CodeGenIP, Twine RegionName) { 1757 CGBuilderTy &Builder = CGF.Builder; 1758 Builder.restoreIP(CodeGenIP); 1759 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, 1760 "." + RegionName + ".after"); 1761 1762 { 1763 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); 1764 CGF.EmitStmt(RegionBodyStmt); 1765 } 1766 1767 if (Builder.saveIP().isSet()) 1768 Builder.CreateBr(FiniBB); 1769 } 1770 1771 void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( 1772 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, 1773 InsertPointTy CodeGenIP, Twine RegionName) { 1774 CGBuilderTy &Builder = CGF.Builder; 1775 Builder.restoreIP(CodeGenIP); 1776 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, 1777 "." + RegionName + ".after"); 1778 1779 { 1780 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); 1781 CGF.EmitStmt(RegionBodyStmt); 1782 } 1783 1784 if (Builder.saveIP().isSet()) 1785 Builder.CreateBr(FiniBB); 1786 } 1787 1788 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1789 if (CGM.getLangOpts().OpenMPIRBuilder) { 1790 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1791 // Check if we have any if clause associated with the directive. 1792 llvm::Value *IfCond = nullptr; 1793 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1794 IfCond = EmitScalarExpr(C->getCondition(), 1795 /*IgnoreResultAssign=*/true); 1796 1797 llvm::Value *NumThreads = nullptr; 1798 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) 1799 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), 1800 /*IgnoreResultAssign=*/true); 1801 1802 ProcBindKind ProcBind = OMP_PROC_BIND_default; 1803 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) 1804 ProcBind = ProcBindClause->getProcBindKind(); 1805 1806 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 1807 1808 // The cleanup callback that finalizes all variables at the given location, 1809 // thus calls destructors etc. 1810 auto FiniCB = [this](InsertPointTy IP) { 1811 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 1812 return llvm::Error::success(); 1813 }; 1814 1815 // Privatization callback that performs appropriate action for 1816 // shared/private/firstprivate/lastprivate/copyin/... variables. 1817 // 1818 // TODO: This defaults to shared right now. 1819 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1820 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 1821 // The next line is appropriate only for variables (Val) with the 1822 // data-sharing attribute "shared". 1823 ReplVal = &Val; 1824 1825 return CodeGenIP; 1826 }; 1827 1828 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1829 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); 1830 1831 auto BodyGenCB = [&, this](InsertPointTy AllocaIP, 1832 InsertPointTy CodeGenIP) { 1833 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( 1834 *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); 1835 return llvm::Error::success(); 1836 }; 1837 1838 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1839 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1840 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 1841 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 1842 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( 1843 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1844 IfCond, NumThreads, ProcBind, S.hasCancel())); 1845 Builder.restoreIP(AfterIP); 1846 return; 1847 } 1848 1849 // Emit parallel region as a standalone region. 1850 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1851 Action.Enter(CGF); 1852 OMPPrivateScope PrivateScope(CGF); 1853 emitOMPCopyinClause(CGF, S); 1854 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1855 CGF.EmitOMPPrivateClause(S, PrivateScope); 1856 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1857 (void)PrivateScope.Privatize(); 1858 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1859 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1860 }; 1861 { 1862 auto LPCRegion = 1863 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 1864 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1865 emitEmptyBoundParameters); 1866 emitPostUpdateForReductionClause(*this, S, 1867 [](CodeGenFunction &) { return nullptr; }); 1868 } 1869 // Check for outer lastprivate conditional update. 1870 checkForLastprivateConditionalUpdate(*this, S); 1871 } 1872 1873 void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { 1874 EmitStmt(S.getIfStmt()); 1875 } 1876 1877 namespace { 1878 /// RAII to handle scopes for loop transformation directives. 1879 class OMPTransformDirectiveScopeRAII { 1880 OMPLoopScope *Scope = nullptr; 1881 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; 1882 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; 1883 1884 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = 1885 delete; 1886 OMPTransformDirectiveScopeRAII & 1887 operator=(const OMPTransformDirectiveScopeRAII &) = delete; 1888 1889 public: 1890 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { 1891 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { 1892 Scope = new OMPLoopScope(CGF, *Dir); 1893 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); 1894 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); 1895 } 1896 } 1897 ~OMPTransformDirectiveScopeRAII() { 1898 if (!Scope) 1899 return; 1900 delete CapInfoRAII; 1901 delete CGSI; 1902 delete Scope; 1903 } 1904 }; 1905 } // namespace 1906 1907 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1908 int MaxLevel, int Level = 0) { 1909 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1910 const Stmt *SimplifiedS = S->IgnoreContainers(); 1911 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1912 PrettyStackTraceLoc CrashInfo( 1913 CGF.getContext().getSourceManager(), CS->getLBracLoc(), 1914 "LLVM IR generation of compound statement ('{}')"); 1915 1916 // Keep track of the current cleanup stack depth, including debug scopes. 1917 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); 1918 for (const Stmt *CurStmt : CS->body()) 1919 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1920 return; 1921 } 1922 if (SimplifiedS == NextLoop) { 1923 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) 1924 SimplifiedS = Dir->getTransformedStmt(); 1925 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) 1926 SimplifiedS = CanonLoop->getLoopStmt(); 1927 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1928 S = For->getBody(); 1929 } else { 1930 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1931 "Expected canonical for loop or range-based for loop."); 1932 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); 1933 CGF.EmitStmt(CXXFor->getLoopVarStmt()); 1934 S = CXXFor->getBody(); 1935 } 1936 if (Level + 1 < MaxLevel) { 1937 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( 1938 S, /*TryImperfectlyNestedLoops=*/true); 1939 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); 1940 return; 1941 } 1942 } 1943 CGF.EmitStmt(S); 1944 } 1945 1946 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1947 JumpDest LoopExit) { 1948 RunCleanupsScope BodyScope(*this); 1949 // Update counters values on current iteration. 1950 for (const Expr *UE : D.updates()) 1951 EmitIgnoredExpr(UE); 1952 // Update the linear variables. 1953 // In distribute directives only loop counters may be marked as linear, no 1954 // need to generate the code for them. 1955 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 1956 if (!isOpenMPDistributeDirective(EKind)) { 1957 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1958 for (const Expr *UE : C->updates()) 1959 EmitIgnoredExpr(UE); 1960 } 1961 } 1962 1963 // On a continue in the body, jump to the end. 1964 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1965 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1966 for (const Expr *E : D.finals_conditions()) { 1967 if (!E) 1968 continue; 1969 // Check that loop counter in non-rectangular nest fits into the iteration 1970 // space. 1971 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1972 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1973 getProfileCount(D.getBody())); 1974 EmitBlock(NextBB); 1975 } 1976 1977 OMPPrivateScope InscanScope(*this); 1978 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); 1979 bool IsInscanRegion = InscanScope.Privatize(); 1980 if (IsInscanRegion) { 1981 // Need to remember the block before and after scan directive 1982 // to dispatch them correctly depending on the clause used in 1983 // this directive, inclusive or exclusive. For inclusive scan the natural 1984 // order of the blocks is used, for exclusive clause the blocks must be 1985 // executed in reverse order. 1986 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); 1987 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); 1988 // No need to allocate inscan exit block, in simd mode it is selected in the 1989 // codegen for the scan directive. 1990 if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd) 1991 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); 1992 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); 1993 EmitBranch(OMPScanDispatch); 1994 EmitBlock(OMPBeforeScanBlock); 1995 } 1996 1997 // Emit loop variables for C++ range loops. 1998 const Stmt *Body = 1999 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 2000 // Emit loop body. 2001 emitBody(*this, Body, 2002 OMPLoopBasedDirective::tryToFindNextInnerLoop( 2003 Body, /*TryImperfectlyNestedLoops=*/true), 2004 D.getLoopsNumber()); 2005 2006 // Jump to the dispatcher at the end of the loop body. 2007 if (IsInscanRegion) 2008 EmitBranch(OMPScanExitBlock); 2009 2010 // The end (updates/cleanups). 2011 EmitBlock(Continue.getBlock()); 2012 BreakContinueStack.pop_back(); 2013 } 2014 2015 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; 2016 2017 /// Emit a captured statement and return the function as well as its captured 2018 /// closure context. 2019 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, 2020 const CapturedStmt *S) { 2021 LValue CapStruct = ParentCGF.InitCapturedStruct(*S); 2022 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); 2023 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = 2024 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); 2025 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); 2026 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); 2027 2028 return {F, CapStruct.getPointer(ParentCGF)}; 2029 } 2030 2031 /// Emit a call to a previously captured closure. 2032 static llvm::CallInst * 2033 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, 2034 llvm::ArrayRef<llvm::Value *> Args) { 2035 // Append the closure context to the argument. 2036 SmallVector<llvm::Value *> EffectiveArgs; 2037 EffectiveArgs.reserve(Args.size() + 1); 2038 llvm::append_range(EffectiveArgs, Args); 2039 EffectiveArgs.push_back(Cap.second); 2040 2041 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); 2042 } 2043 2044 llvm::CanonicalLoopInfo * 2045 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { 2046 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); 2047 2048 // The caller is processing the loop-associated directive processing the \p 2049 // Depth loops nested in \p S. Put the previous pending loop-associated 2050 // directive to the stack. If the current loop-associated directive is a loop 2051 // transformation directive, it will push its generated loops onto the stack 2052 // such that together with the loops left here they form the combined loop 2053 // nest for the parent loop-associated directive. 2054 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; 2055 ExpectedOMPLoopDepth = Depth; 2056 2057 EmitStmt(S); 2058 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); 2059 2060 // The last added loop is the outermost one. 2061 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); 2062 2063 // Pop the \p Depth loops requested by the call from that stack and restore 2064 // the previous context. 2065 OMPLoopNestStack.pop_back_n(Depth); 2066 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; 2067 2068 return Result; 2069 } 2070 2071 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { 2072 const Stmt *SyntacticalLoop = S->getLoopStmt(); 2073 if (!getLangOpts().OpenMPIRBuilder) { 2074 // Ignore if OpenMPIRBuilder is not enabled. 2075 EmitStmt(SyntacticalLoop); 2076 return; 2077 } 2078 2079 LexicalScope ForScope(*this, S->getSourceRange()); 2080 2081 // Emit init statements. The Distance/LoopVar funcs may reference variable 2082 // declarations they contain. 2083 const Stmt *BodyStmt; 2084 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { 2085 if (const Stmt *InitStmt = For->getInit()) 2086 EmitStmt(InitStmt); 2087 BodyStmt = For->getBody(); 2088 } else if (const auto *RangeFor = 2089 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { 2090 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) 2091 EmitStmt(RangeStmt); 2092 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) 2093 EmitStmt(BeginStmt); 2094 if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) 2095 EmitStmt(EndStmt); 2096 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) 2097 EmitStmt(LoopVarStmt); 2098 BodyStmt = RangeFor->getBody(); 2099 } else 2100 llvm_unreachable("Expected for-stmt or range-based for-stmt"); 2101 2102 // Emit closure for later use. By-value captures will be captured here. 2103 const CapturedStmt *DistanceFunc = S->getDistanceFunc(); 2104 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); 2105 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); 2106 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); 2107 2108 // Call the distance function to get the number of iterations of the loop to 2109 // come. 2110 QualType LogicalTy = DistanceFunc->getCapturedDecl() 2111 ->getParam(0) 2112 ->getType() 2113 .getNonReferenceType(); 2114 RawAddress CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); 2115 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); 2116 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); 2117 2118 // Emit the loop structure. 2119 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2120 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, 2121 llvm::Value *IndVar) { 2122 Builder.restoreIP(CodeGenIP); 2123 2124 // Emit the loop body: Convert the logical iteration number to the loop 2125 // variable and emit the body. 2126 const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); 2127 LValue LCVal = EmitLValue(LoopVarRef); 2128 Address LoopVarAddress = LCVal.getAddress(); 2129 emitCapturedStmtCall(*this, LoopVarClosure, 2130 {LoopVarAddress.emitRawPointer(*this), IndVar}); 2131 2132 RunCleanupsScope BodyScope(*this); 2133 EmitStmt(BodyStmt); 2134 return llvm::Error::success(); 2135 }; 2136 2137 llvm::CanonicalLoopInfo *CL = 2138 cantFail(OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal)); 2139 2140 // Finish up the loop. 2141 Builder.restoreIP(CL->getAfterIP()); 2142 ForScope.ForceCleanup(); 2143 2144 // Remember the CanonicalLoopInfo for parent AST nodes consuming it. 2145 OMPLoopNestStack.push_back(CL); 2146 } 2147 2148 void CodeGenFunction::EmitOMPInnerLoop( 2149 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, 2150 const Expr *IncExpr, 2151 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 2152 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 2153 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 2154 2155 // Start the loop with a block that tests the condition. 2156 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 2157 EmitBlock(CondBlock); 2158 const SourceRange R = S.getSourceRange(); 2159 2160 // If attributes are attached, push to the basic block with them. 2161 const auto &OMPED = cast<OMPExecutableDirective>(S); 2162 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 2163 const Stmt *SS = ICS->getCapturedStmt(); 2164 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 2165 OMPLoopNestStack.clear(); 2166 if (AS) 2167 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 2168 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 2169 SourceLocToDebugLoc(R.getEnd())); 2170 else 2171 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2172 SourceLocToDebugLoc(R.getEnd())); 2173 2174 // If there are any cleanups between here and the loop-exit scope, 2175 // create a block to stage a loop exit along. 2176 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2177 if (RequiresCleanup) 2178 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 2179 2180 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 2181 2182 // Emit condition. 2183 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 2184 if (ExitBlock != LoopExit.getBlock()) { 2185 EmitBlock(ExitBlock); 2186 EmitBranchThroughCleanup(LoopExit); 2187 } 2188 2189 EmitBlock(LoopBody); 2190 incrementProfileCounter(&S); 2191 2192 // Create a block for the increment. 2193 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 2194 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2195 2196 BodyGen(*this); 2197 2198 // Emit "IV = IV + 1" and a back-edge to the condition block. 2199 EmitBlock(Continue.getBlock()); 2200 EmitIgnoredExpr(IncExpr); 2201 PostIncGen(*this); 2202 BreakContinueStack.pop_back(); 2203 EmitBranch(CondBlock); 2204 LoopStack.pop(); 2205 // Emit the fall-through block. 2206 EmitBlock(LoopExit.getBlock()); 2207 } 2208 2209 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 2210 if (!HaveInsertPoint()) 2211 return false; 2212 // Emit inits for the linear variables. 2213 bool HasLinears = false; 2214 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2215 for (const Expr *Init : C->inits()) { 2216 HasLinears = true; 2217 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 2218 if (const auto *Ref = 2219 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 2220 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 2221 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 2222 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2223 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2224 VD->getInit()->getType(), VK_LValue, 2225 VD->getInit()->getExprLoc()); 2226 EmitExprAsInit( 2227 &DRE, VD, 2228 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), 2229 /*capturedByInit=*/false); 2230 EmitAutoVarCleanups(Emission); 2231 } else { 2232 EmitVarDecl(*VD); 2233 } 2234 } 2235 // Emit the linear steps for the linear clauses. 2236 // If a step is not constant, it is pre-calculated before the loop. 2237 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 2238 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 2239 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 2240 // Emit calculation of the linear step. 2241 EmitIgnoredExpr(CS); 2242 } 2243 } 2244 return HasLinears; 2245 } 2246 2247 void CodeGenFunction::EmitOMPLinearClauseFinal( 2248 const OMPLoopDirective &D, 2249 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2250 if (!HaveInsertPoint()) 2251 return; 2252 llvm::BasicBlock *DoneBB = nullptr; 2253 // Emit the final values of the linear variables. 2254 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2255 auto IC = C->varlist_begin(); 2256 for (const Expr *F : C->finals()) { 2257 if (!DoneBB) { 2258 if (llvm::Value *Cond = CondGen(*this)) { 2259 // If the first post-update expression is found, emit conditional 2260 // block if it was requested. 2261 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 2262 DoneBB = createBasicBlock(".omp.linear.pu.done"); 2263 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2264 EmitBlock(ThenBB); 2265 } 2266 } 2267 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 2268 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 2269 CapturedStmtInfo->lookup(OrigVD) != nullptr, 2270 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 2271 Address OrigAddr = EmitLValue(&DRE).getAddress(); 2272 CodeGenFunction::OMPPrivateScope VarScope(*this); 2273 VarScope.addPrivate(OrigVD, OrigAddr); 2274 (void)VarScope.Privatize(); 2275 EmitIgnoredExpr(F); 2276 ++IC; 2277 } 2278 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 2279 EmitIgnoredExpr(PostUpdate); 2280 } 2281 if (DoneBB) 2282 EmitBlock(DoneBB, /*IsFinished=*/true); 2283 } 2284 2285 static void emitAlignedClause(CodeGenFunction &CGF, 2286 const OMPExecutableDirective &D) { 2287 if (!CGF.HaveInsertPoint()) 2288 return; 2289 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 2290 llvm::APInt ClauseAlignment(64, 0); 2291 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2292 auto *AlignmentCI = 2293 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2294 ClauseAlignment = AlignmentCI->getValue(); 2295 } 2296 for (const Expr *E : Clause->varlist()) { 2297 llvm::APInt Alignment(ClauseAlignment); 2298 if (Alignment == 0) { 2299 // OpenMP [2.8.1, Description] 2300 // If no optional parameter is specified, implementation-defined default 2301 // alignments for SIMD instructions on the target platforms are assumed. 2302 Alignment = 2303 CGF.getContext() 2304 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2305 E->getType()->getPointeeType())) 2306 .getQuantity(); 2307 } 2308 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2309 "alignment is not power of 2"); 2310 if (Alignment != 0) { 2311 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2312 CGF.emitAlignmentAssumption( 2313 PtrValue, E, /*No second loc needed*/ SourceLocation(), 2314 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 2315 } 2316 } 2317 } 2318 } 2319 2320 void CodeGenFunction::EmitOMPPrivateLoopCounters( 2321 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 2322 if (!HaveInsertPoint()) 2323 return; 2324 auto I = S.private_counters().begin(); 2325 for (const Expr *E : S.counters()) { 2326 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2327 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 2328 // Emit var without initialization. 2329 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 2330 EmitAutoVarCleanups(VarEmission); 2331 LocalDeclMap.erase(PrivateVD); 2332 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress()); 2333 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 2334 VD->hasGlobalStorage()) { 2335 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 2336 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 2337 E->getType(), VK_LValue, E->getExprLoc()); 2338 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress()); 2339 } else { 2340 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress()); 2341 } 2342 ++I; 2343 } 2344 // Privatize extra loop counters used in loops for ordered(n) clauses. 2345 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 2346 if (!C->getNumForLoops()) 2347 continue; 2348 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); 2349 I < E; ++I) { 2350 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 2351 const auto *VD = cast<VarDecl>(DRE->getDecl()); 2352 // Override only those variables that can be captured to avoid re-emission 2353 // of the variables declared within the loops. 2354 if (DRE->refersToEnclosingVariableOrCapture()) { 2355 (void)LoopScope.addPrivate( 2356 VD, CreateMemTemp(DRE->getType(), VD->getName())); 2357 } 2358 } 2359 } 2360 } 2361 2362 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 2363 const Expr *Cond, llvm::BasicBlock *TrueBlock, 2364 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 2365 if (!CGF.HaveInsertPoint()) 2366 return; 2367 { 2368 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 2369 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 2370 (void)PreCondScope.Privatize(); 2371 // Get initial values of real counters. 2372 for (const Expr *I : S.inits()) { 2373 CGF.EmitIgnoredExpr(I); 2374 } 2375 } 2376 // Create temp loop control variables with their init values to support 2377 // non-rectangular loops. 2378 CodeGenFunction::OMPMapVars PreCondVars; 2379 for (const Expr *E : S.dependent_counters()) { 2380 if (!E) 2381 continue; 2382 assert(!E->getType().getNonReferenceType()->isRecordType() && 2383 "dependent counter must not be an iterator."); 2384 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2385 Address CounterAddr = 2386 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 2387 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 2388 } 2389 (void)PreCondVars.apply(CGF); 2390 for (const Expr *E : S.dependent_inits()) { 2391 if (!E) 2392 continue; 2393 CGF.EmitIgnoredExpr(E); 2394 } 2395 // Check that loop is executed at least one time. 2396 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 2397 PreCondVars.restore(CGF); 2398 } 2399 2400 void CodeGenFunction::EmitOMPLinearClause( 2401 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 2402 if (!HaveInsertPoint()) 2403 return; 2404 llvm::DenseSet<const VarDecl *> SIMDLCVs; 2405 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 2406 if (isOpenMPSimdDirective(EKind)) { 2407 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 2408 for (const Expr *C : LoopDirective->counters()) { 2409 SIMDLCVs.insert( 2410 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 2411 } 2412 } 2413 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 2414 auto CurPrivate = C->privates().begin(); 2415 for (const Expr *E : C->varlist()) { 2416 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 2417 const auto *PrivateVD = 2418 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 2419 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 2420 // Emit private VarDecl with copy init. 2421 EmitVarDecl(*PrivateVD); 2422 bool IsRegistered = 2423 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD)); 2424 assert(IsRegistered && "linear var already registered as private"); 2425 // Silence the warning about unused variable. 2426 (void)IsRegistered; 2427 } else { 2428 EmitVarDecl(*PrivateVD); 2429 } 2430 ++CurPrivate; 2431 } 2432 } 2433 } 2434 2435 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2436 const OMPExecutableDirective &D) { 2437 if (!CGF.HaveInsertPoint()) 2438 return; 2439 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2440 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2441 /*ignoreResult=*/true); 2442 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2443 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2444 // In presence of finite 'safelen', it may be unsafe to mark all 2445 // the memory instructions parallel, because loop-carried 2446 // dependences of 'safelen' iterations are possible. 2447 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 2448 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2449 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2450 /*ignoreResult=*/true); 2451 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2452 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2453 // In presence of finite 'safelen', it may be unsafe to mark all 2454 // the memory instructions parallel, because loop-carried 2455 // dependences of 'safelen' iterations are possible. 2456 CGF.LoopStack.setParallel(/*Enable=*/false); 2457 } 2458 } 2459 2460 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { 2461 // Walk clauses and process safelen/lastprivate. 2462 LoopStack.setParallel(/*Enable=*/true); 2463 LoopStack.setVectorizeEnable(); 2464 emitSimdlenSafelenClause(*this, D); 2465 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2466 if (C->getKind() == OMPC_ORDER_concurrent) 2467 LoopStack.setParallel(/*Enable=*/true); 2468 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D); 2469 if ((EKind == OMPD_simd || 2470 (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(EKind))) && 2471 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), 2472 [](const OMPReductionClause *C) { 2473 return C->getModifier() == OMPC_REDUCTION_inscan; 2474 })) 2475 // Disable parallel access in case of prefix sum. 2476 LoopStack.setParallel(/*Enable=*/false); 2477 } 2478 2479 void CodeGenFunction::EmitOMPSimdFinal( 2480 const OMPLoopDirective &D, 2481 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2482 if (!HaveInsertPoint()) 2483 return; 2484 llvm::BasicBlock *DoneBB = nullptr; 2485 auto IC = D.counters().begin(); 2486 auto IPC = D.private_counters().begin(); 2487 for (const Expr *F : D.finals()) { 2488 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 2489 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 2490 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 2491 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 2492 OrigVD->hasGlobalStorage() || CED) { 2493 if (!DoneBB) { 2494 if (llvm::Value *Cond = CondGen(*this)) { 2495 // If the first post-update expression is found, emit conditional 2496 // block if it was requested. 2497 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 2498 DoneBB = createBasicBlock(".omp.final.done"); 2499 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 2500 EmitBlock(ThenBB); 2501 } 2502 } 2503 Address OrigAddr = Address::invalid(); 2504 if (CED) { 2505 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 2506 } else { 2507 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 2508 /*RefersToEnclosingVariableOrCapture=*/false, 2509 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 2510 OrigAddr = EmitLValue(&DRE).getAddress(); 2511 } 2512 OMPPrivateScope VarScope(*this); 2513 VarScope.addPrivate(OrigVD, OrigAddr); 2514 (void)VarScope.Privatize(); 2515 EmitIgnoredExpr(F); 2516 } 2517 ++IC; 2518 ++IPC; 2519 } 2520 if (DoneBB) 2521 EmitBlock(DoneBB, /*IsFinished=*/true); 2522 } 2523 2524 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 2525 const OMPLoopDirective &S, 2526 CodeGenFunction::JumpDest LoopExit) { 2527 CGF.EmitOMPLoopBody(S, LoopExit); 2528 CGF.EmitStopPoint(&S); 2529 } 2530 2531 /// Emit a helper variable and return corresponding lvalue. 2532 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 2533 const DeclRefExpr *Helper) { 2534 auto VDecl = cast<VarDecl>(Helper->getDecl()); 2535 CGF.EmitVarDecl(*VDecl); 2536 return CGF.EmitLValue(Helper); 2537 } 2538 2539 static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, 2540 const RegionCodeGenTy &SimdInitGen, 2541 const RegionCodeGenTy &BodyCodeGen) { 2542 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, 2543 PrePostActionTy &) { 2544 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); 2545 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2546 SimdInitGen(CGF); 2547 2548 BodyCodeGen(CGF); 2549 }; 2550 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 2551 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 2552 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); 2553 2554 BodyCodeGen(CGF); 2555 }; 2556 const Expr *IfCond = nullptr; 2557 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 2558 if (isOpenMPSimdDirective(EKind)) { 2559 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 2560 if (CGF.getLangOpts().OpenMP >= 50 && 2561 (C->getNameModifier() == OMPD_unknown || 2562 C->getNameModifier() == OMPD_simd)) { 2563 IfCond = C->getCondition(); 2564 break; 2565 } 2566 } 2567 } 2568 if (IfCond) { 2569 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2570 } else { 2571 RegionCodeGenTy ThenRCG(ThenGen); 2572 ThenRCG(CGF); 2573 } 2574 } 2575 2576 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 2577 PrePostActionTy &Action) { 2578 Action.Enter(CGF); 2579 OMPLoopScope PreInitScope(CGF, S); 2580 // if (PreCond) { 2581 // for (IV in 0..LastIteration) BODY; 2582 // <Final counter/linear vars updates>; 2583 // } 2584 2585 // The presence of lower/upper bound variable depends on the actual directive 2586 // kind in the AST node. The variables must be emitted because some of the 2587 // expressions associated with the loop will use them. 2588 OpenMPDirectiveKind DKind = S.getDirectiveKind(); 2589 if (isOpenMPDistributeDirective(DKind) || 2590 isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || 2591 isOpenMPGenericLoopDirective(DKind)) { 2592 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 2593 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 2594 } 2595 2596 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 2597 // Emit: if (PreCond) - begin. 2598 // If the condition constant folds and can be elided, avoid emitting the 2599 // whole loop. 2600 bool CondConstant; 2601 llvm::BasicBlock *ContBlock = nullptr; 2602 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2603 if (!CondConstant) 2604 return; 2605 } else { 2606 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 2607 ContBlock = CGF.createBasicBlock("simd.if.end"); 2608 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 2609 CGF.getProfileCount(&S)); 2610 CGF.EmitBlock(ThenBlock); 2611 CGF.incrementProfileCounter(&S); 2612 } 2613 2614 // Emit the loop iteration variable. 2615 const Expr *IVExpr = S.getIterationVariable(); 2616 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 2617 CGF.EmitVarDecl(*IVDecl); 2618 CGF.EmitIgnoredExpr(S.getInit()); 2619 2620 // Emit the iterations count variable. 2621 // If it is not a variable, Sema decided to calculate iterations count on 2622 // each iteration (e.g., it is foldable into a constant). 2623 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2624 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2625 // Emit calculation of the iterations count. 2626 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 2627 } 2628 2629 emitAlignedClause(CGF, S); 2630 (void)CGF.EmitOMPLinearClauseInit(S); 2631 { 2632 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2633 CGF.EmitOMPPrivateClause(S, LoopScope); 2634 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 2635 CGF.EmitOMPLinearClause(S, LoopScope); 2636 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2637 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 2638 CGF, S, CGF.EmitLValue(S.getIterationVariable())); 2639 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2640 (void)LoopScope.Privatize(); 2641 if (isOpenMPTargetExecutionDirective(EKind)) 2642 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2643 2644 emitCommonSimdLoop( 2645 CGF, S, 2646 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2647 CGF.EmitOMPSimdInit(S); 2648 }, 2649 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2650 CGF.EmitOMPInnerLoop( 2651 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2652 [&S](CodeGenFunction &CGF) { 2653 emitOMPLoopBodyWithStopPoint(CGF, S, 2654 CodeGenFunction::JumpDest()); 2655 }, 2656 [](CodeGenFunction &) {}); 2657 }); 2658 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2659 // Emit final copy of the lastprivate variables at the end of loops. 2660 if (HasLastprivateClause) 2661 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 2662 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 2663 emitPostUpdateForReductionClause(CGF, S, 2664 [](CodeGenFunction &) { return nullptr; }); 2665 LoopScope.restoreMap(); 2666 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 2667 } 2668 // Emit: if (PreCond) - end. 2669 if (ContBlock) { 2670 CGF.EmitBranch(ContBlock); 2671 CGF.EmitBlock(ContBlock, true); 2672 } 2673 } 2674 2675 // Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function 2676 // available for "loop bind(thread)", which maps to "simd". 2677 static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) { 2678 // Check for unsupported clauses 2679 for (OMPClause *C : S.clauses()) { 2680 // Currently only order, simdlen and safelen clauses are supported 2681 if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || 2682 isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) 2683 return false; 2684 } 2685 2686 // Check if we have a statement with the ordered directive. 2687 // Visit the statement hierarchy to find a compound statement 2688 // with a ordered directive in it. 2689 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { 2690 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { 2691 for (const Stmt *SubStmt : SyntacticalLoop->children()) { 2692 if (!SubStmt) 2693 continue; 2694 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { 2695 for (const Stmt *CSSubStmt : CS->children()) { 2696 if (!CSSubStmt) 2697 continue; 2698 if (isa<OMPOrderedDirective>(CSSubStmt)) { 2699 return false; 2700 } 2701 } 2702 } 2703 } 2704 } 2705 } 2706 return true; 2707 } 2708 2709 static llvm::MapVector<llvm::Value *, llvm::Value *> 2710 GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) { 2711 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; 2712 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { 2713 llvm::APInt ClauseAlignment(64, 0); 2714 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 2715 auto *AlignmentCI = 2716 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 2717 ClauseAlignment = AlignmentCI->getValue(); 2718 } 2719 for (const Expr *E : Clause->varlist()) { 2720 llvm::APInt Alignment(ClauseAlignment); 2721 if (Alignment == 0) { 2722 // OpenMP [2.8.1, Description] 2723 // If no optional parameter is specified, implementation-defined default 2724 // alignments for SIMD instructions on the target platforms are assumed. 2725 Alignment = 2726 CGF.getContext() 2727 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 2728 E->getType()->getPointeeType())) 2729 .getQuantity(); 2730 } 2731 assert((Alignment == 0 || Alignment.isPowerOf2()) && 2732 "alignment is not power of 2"); 2733 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 2734 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); 2735 } 2736 } 2737 return AlignedVars; 2738 } 2739 2740 // Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function 2741 // available for "loop bind(thread)", which maps to "simd". 2742 static void emitOMPSimdDirective(const OMPLoopDirective &S, 2743 CodeGenFunction &CGF, CodeGenModule &CGM) { 2744 bool UseOMPIRBuilder = 2745 CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S); 2746 if (UseOMPIRBuilder) { 2747 auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF, 2748 PrePostActionTy &) { 2749 // Use the OpenMPIRBuilder if enabled. 2750 if (UseOMPIRBuilder) { 2751 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = 2752 GetAlignedMapping(S, CGF); 2753 // Emit the associated statement and get its loop representation. 2754 const Stmt *Inner = S.getRawStmt(); 2755 llvm::CanonicalLoopInfo *CLI = 2756 CGF.EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2757 2758 llvm::OpenMPIRBuilder &OMPBuilder = 2759 CGM.getOpenMPRuntime().getOMPBuilder(); 2760 // Add SIMD specific metadata 2761 llvm::ConstantInt *Simdlen = nullptr; 2762 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { 2763 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2764 /*ignoreResult=*/true); 2765 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2766 Simdlen = Val; 2767 } 2768 llvm::ConstantInt *Safelen = nullptr; 2769 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { 2770 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2771 /*ignoreResult=*/true); 2772 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2773 Safelen = Val; 2774 } 2775 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; 2776 if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 2777 if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) { 2778 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; 2779 } 2780 } 2781 // Add simd metadata to the collapsed loop. Do not generate 2782 // another loop for if clause. Support for if clause is done earlier. 2783 OMPBuilder.applySimd(CLI, AlignedVars, 2784 /*IfCond*/ nullptr, Order, Simdlen, Safelen); 2785 return; 2786 } 2787 }; 2788 { 2789 auto LPCRegion = 2790 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 2791 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 2792 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 2793 CodeGenIRBuilder); 2794 } 2795 return; 2796 } 2797 2798 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 2799 CGF.OMPFirstScanLoop = true; 2800 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2801 emitOMPSimdRegion(CGF, S, Action); 2802 }; 2803 { 2804 auto LPCRegion = 2805 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 2806 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 2807 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, CodeGen); 2808 } 2809 // Check for outer lastprivate conditional update. 2810 checkForLastprivateConditionalUpdate(CGF, S); 2811 } 2812 2813 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2814 emitOMPSimdDirective(S, *this, CGM); 2815 } 2816 2817 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { 2818 // Emit the de-sugared statement. 2819 OMPTransformDirectiveScopeRAII TileScope(*this, &S); 2820 EmitStmt(S.getTransformedStmt()); 2821 } 2822 2823 void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { 2824 // Emit the de-sugared statement. 2825 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S); 2826 EmitStmt(S.getTransformedStmt()); 2827 } 2828 2829 void CodeGenFunction::EmitOMPInterchangeDirective( 2830 const OMPInterchangeDirective &S) { 2831 // Emit the de-sugared statement. 2832 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S); 2833 EmitStmt(S.getTransformedStmt()); 2834 } 2835 2836 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { 2837 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; 2838 2839 if (UseOMPIRBuilder) { 2840 auto DL = SourceLocToDebugLoc(S.getBeginLoc()); 2841 const Stmt *Inner = S.getRawStmt(); 2842 2843 // Consume nested loop. Clear the entire remaining loop stack because a 2844 // fully unrolled loop is non-transformable. For partial unrolling the 2845 // generated outer loop is pushed back to the stack. 2846 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 2847 OMPLoopNestStack.clear(); 2848 2849 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 2850 2851 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; 2852 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; 2853 2854 if (S.hasClausesOfKind<OMPFullClause>()) { 2855 assert(ExpectedOMPLoopDepth == 0); 2856 OMPBuilder.unrollLoopFull(DL, CLI); 2857 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2858 uint64_t Factor = 0; 2859 if (Expr *FactorExpr = PartialClause->getFactor()) { 2860 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2861 assert(Factor >= 1 && "Only positive factors are valid"); 2862 } 2863 OMPBuilder.unrollLoopPartial(DL, CLI, Factor, 2864 NeedsUnrolledCLI ? &UnrolledCLI : nullptr); 2865 } else { 2866 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2867 } 2868 2869 assert((!NeedsUnrolledCLI || UnrolledCLI) && 2870 "NeedsUnrolledCLI implies UnrolledCLI to be set"); 2871 if (UnrolledCLI) 2872 OMPLoopNestStack.push_back(UnrolledCLI); 2873 2874 return; 2875 } 2876 2877 // This function is only called if the unrolled loop is not consumed by any 2878 // other loop-associated construct. Such a loop-associated construct will have 2879 // used the transformed AST. 2880 2881 // Set the unroll metadata for the next emitted loop. 2882 LoopStack.setUnrollState(LoopAttributes::Enable); 2883 2884 if (S.hasClausesOfKind<OMPFullClause>()) { 2885 LoopStack.setUnrollState(LoopAttributes::Full); 2886 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { 2887 if (Expr *FactorExpr = PartialClause->getFactor()) { 2888 uint64_t Factor = 2889 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); 2890 assert(Factor >= 1 && "Only positive factors are valid"); 2891 LoopStack.setUnrollCount(Factor); 2892 } 2893 } 2894 2895 EmitStmt(S.getAssociatedStmt()); 2896 } 2897 2898 void CodeGenFunction::EmitOMPOuterLoop( 2899 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2900 CodeGenFunction::OMPPrivateScope &LoopScope, 2901 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2902 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2903 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 2904 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2905 2906 const Expr *IVExpr = S.getIterationVariable(); 2907 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2908 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2909 2910 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 2911 2912 // Start the loop with a block that tests the condition. 2913 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2914 EmitBlock(CondBlock); 2915 const SourceRange R = S.getSourceRange(); 2916 OMPLoopNestStack.clear(); 2917 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2918 SourceLocToDebugLoc(R.getEnd())); 2919 2920 llvm::Value *BoolCondVal = nullptr; 2921 if (!DynamicOrOrdered) { 2922 // UB = min(UB, GlobalUB) or 2923 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 2924 // 'distribute parallel for') 2925 EmitIgnoredExpr(LoopArgs.EUB); 2926 // IV = LB 2927 EmitIgnoredExpr(LoopArgs.Init); 2928 // IV < UB 2929 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 2930 } else { 2931 BoolCondVal = 2932 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 2933 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 2934 } 2935 2936 // If there are any cleanups between here and the loop-exit scope, 2937 // create a block to stage a loop exit along. 2938 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 2939 if (LoopScope.requiresCleanups()) 2940 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 2941 2942 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 2943 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 2944 if (ExitBlock != LoopExit.getBlock()) { 2945 EmitBlock(ExitBlock); 2946 EmitBranchThroughCleanup(LoopExit); 2947 } 2948 EmitBlock(LoopBody); 2949 2950 // Emit "IV = LB" (in case of static schedule, we have already calculated new 2951 // LB for loop condition and emitted it above). 2952 if (DynamicOrOrdered) 2953 EmitIgnoredExpr(LoopArgs.Init); 2954 2955 // Create a block for the increment. 2956 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 2957 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 2958 2959 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 2960 emitCommonSimdLoop( 2961 *this, S, 2962 [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) { 2963 // Generate !llvm.loop.parallel metadata for loads and stores for loops 2964 // with dynamic/guided scheduling and without ordered clause. 2965 if (!isOpenMPSimdDirective(EKind)) { 2966 CGF.LoopStack.setParallel(!IsMonotonic); 2967 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2968 if (C->getKind() == OMPC_ORDER_concurrent) 2969 CGF.LoopStack.setParallel(/*Enable=*/true); 2970 } else { 2971 CGF.EmitOMPSimdInit(S); 2972 } 2973 }, 2974 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2975 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2976 SourceLocation Loc = S.getBeginLoc(); 2977 // when 'distribute' is not combined with a 'for': 2978 // while (idx <= UB) { BODY; ++idx; } 2979 // when 'distribute' is combined with a 'for' 2980 // (e.g. 'distribute parallel for') 2981 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 2982 CGF.EmitOMPInnerLoop( 2983 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 2984 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 2985 CodeGenLoop(CGF, S, LoopExit); 2986 }, 2987 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 2988 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 2989 }); 2990 }); 2991 2992 EmitBlock(Continue.getBlock()); 2993 BreakContinueStack.pop_back(); 2994 if (!DynamicOrOrdered) { 2995 // Emit "LB = LB + Stride", "UB = UB + Stride". 2996 EmitIgnoredExpr(LoopArgs.NextLB); 2997 EmitIgnoredExpr(LoopArgs.NextUB); 2998 } 2999 3000 EmitBranch(CondBlock); 3001 OMPLoopNestStack.clear(); 3002 LoopStack.pop(); 3003 // Emit the fall-through block. 3004 EmitBlock(LoopExit.getBlock()); 3005 3006 // Tell the runtime we are done. 3007 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) { 3008 if (!DynamicOrOrdered) 3009 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3010 LoopArgs.DKind); 3011 }; 3012 OMPCancelStack.emitExit(*this, EKind, CodeGen); 3013 } 3014 3015 void CodeGenFunction::EmitOMPForOuterLoop( 3016 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 3017 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 3018 const OMPLoopArguments &LoopArgs, 3019 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3020 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3021 3022 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 3023 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); 3024 3025 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, 3026 LoopArgs.Chunk != nullptr)) && 3027 "static non-chunked schedule does not need outer loop"); 3028 3029 // Emit outer loop. 3030 // 3031 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3032 // When schedule(dynamic,chunk_size) is specified, the iterations are 3033 // distributed to threads in the team in chunks as the threads request them. 3034 // Each thread executes a chunk of iterations, then requests another chunk, 3035 // until no chunks remain to be distributed. Each chunk contains chunk_size 3036 // iterations, except for the last chunk to be distributed, which may have 3037 // fewer iterations. When no chunk_size is specified, it defaults to 1. 3038 // 3039 // When schedule(guided,chunk_size) is specified, the iterations are assigned 3040 // to threads in the team in chunks as the executing threads request them. 3041 // Each thread executes a chunk of iterations, then requests another chunk, 3042 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 3043 // each chunk is proportional to the number of unassigned iterations divided 3044 // by the number of threads in the team, decreasing to 1. For a chunk_size 3045 // with value k (greater than 1), the size of each chunk is determined in the 3046 // same way, with the restriction that the chunks do not contain fewer than k 3047 // iterations (except for the last chunk to be assigned, which may have fewer 3048 // than k iterations). 3049 // 3050 // When schedule(auto) is specified, the decision regarding scheduling is 3051 // delegated to the compiler and/or runtime system. The programmer gives the 3052 // implementation the freedom to choose any possible mapping of iterations to 3053 // threads in the team. 3054 // 3055 // When schedule(runtime) is specified, the decision regarding scheduling is 3056 // deferred until run time, and the schedule and chunk size are taken from the 3057 // run-sched-var ICV. If the ICV is set to auto, the schedule is 3058 // implementation defined 3059 // 3060 // __kmpc_dispatch_init(); 3061 // while(__kmpc_dispatch_next(&LB, &UB)) { 3062 // idx = LB; 3063 // while (idx <= UB) { BODY; ++idx; 3064 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 3065 // } // inner loop 3066 // } 3067 // __kmpc_dispatch_deinit(); 3068 // 3069 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3070 // When schedule(static, chunk_size) is specified, iterations are divided into 3071 // chunks of size chunk_size, and the chunks are assigned to the threads in 3072 // the team in a round-robin fashion in the order of the thread number. 3073 // 3074 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 3075 // while (idx <= UB) { BODY; ++idx; } // inner loop 3076 // LB = LB + ST; 3077 // UB = UB + ST; 3078 // } 3079 // 3080 3081 const Expr *IVExpr = S.getIterationVariable(); 3082 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3083 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3084 3085 if (DynamicOrOrdered) { 3086 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 3087 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 3088 llvm::Value *LBVal = DispatchBounds.first; 3089 llvm::Value *UBVal = DispatchBounds.second; 3090 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 3091 LoopArgs.Chunk}; 3092 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 3093 IVSigned, Ordered, DipatchRTInputValues); 3094 } else { 3095 CGOpenMPRuntime::StaticRTInput StaticInit( 3096 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 3097 LoopArgs.ST, LoopArgs.Chunk); 3098 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3099 RT.emitForStaticInit(*this, S.getBeginLoc(), EKind, ScheduleKind, 3100 StaticInit); 3101 } 3102 3103 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 3104 const unsigned IVSize, 3105 const bool IVSigned) { 3106 if (Ordered) { 3107 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 3108 IVSigned); 3109 } 3110 }; 3111 3112 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 3113 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 3114 OuterLoopArgs.IncExpr = S.getInc(); 3115 OuterLoopArgs.Init = S.getInit(); 3116 OuterLoopArgs.Cond = S.getCond(); 3117 OuterLoopArgs.NextLB = S.getNextLowerBound(); 3118 OuterLoopArgs.NextUB = S.getNextUpperBound(); 3119 OuterLoopArgs.DKind = LoopArgs.DKind; 3120 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 3121 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 3122 if (DynamicOrOrdered) { 3123 RT.emitForDispatchDeinit(*this, S.getBeginLoc()); 3124 } 3125 } 3126 3127 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 3128 const unsigned IVSize, const bool IVSigned) {} 3129 3130 void CodeGenFunction::EmitOMPDistributeOuterLoop( 3131 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 3132 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 3133 const CodeGenLoopTy &CodeGenLoopContent) { 3134 3135 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3136 3137 // Emit outer loop. 3138 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 3139 // dynamic 3140 // 3141 3142 const Expr *IVExpr = S.getIterationVariable(); 3143 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3144 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3145 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3146 3147 CGOpenMPRuntime::StaticRTInput StaticInit( 3148 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 3149 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 3150 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 3151 3152 // for combined 'distribute' and 'for' the increment expression of distribute 3153 // is stored in DistInc. For 'distribute' alone, it is in Inc. 3154 Expr *IncExpr; 3155 if (isOpenMPLoopBoundSharingDirective(EKind)) 3156 IncExpr = S.getDistInc(); 3157 else 3158 IncExpr = S.getInc(); 3159 3160 // this routine is shared by 'omp distribute parallel for' and 3161 // 'omp distribute': select the right EUB expression depending on the 3162 // directive 3163 OMPLoopArguments OuterLoopArgs; 3164 OuterLoopArgs.LB = LoopArgs.LB; 3165 OuterLoopArgs.UB = LoopArgs.UB; 3166 OuterLoopArgs.ST = LoopArgs.ST; 3167 OuterLoopArgs.IL = LoopArgs.IL; 3168 OuterLoopArgs.Chunk = LoopArgs.Chunk; 3169 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(EKind) 3170 ? S.getCombinedEnsureUpperBound() 3171 : S.getEnsureUpperBound(); 3172 OuterLoopArgs.IncExpr = IncExpr; 3173 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(EKind) 3174 ? S.getCombinedInit() 3175 : S.getInit(); 3176 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(EKind) 3177 ? S.getCombinedCond() 3178 : S.getCond(); 3179 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(EKind) 3180 ? S.getCombinedNextLowerBound() 3181 : S.getNextLowerBound(); 3182 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(EKind) 3183 ? S.getCombinedNextUpperBound() 3184 : S.getNextUpperBound(); 3185 OuterLoopArgs.DKind = OMPD_distribute; 3186 3187 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 3188 LoopScope, OuterLoopArgs, CodeGenLoopContent, 3189 emitEmptyOrdered); 3190 } 3191 3192 static std::pair<LValue, LValue> 3193 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 3194 const OMPExecutableDirective &S) { 3195 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3196 LValue LB = 3197 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3198 LValue UB = 3199 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3200 3201 // When composing 'distribute' with 'for' (e.g. as in 'distribute 3202 // parallel for') we need to use the 'distribute' 3203 // chunk lower and upper bounds rather than the whole loop iteration 3204 // space. These are parameters to the outlined function for 'parallel' 3205 // and we copy the bounds of the previous schedule into the 3206 // the current ones. 3207 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 3208 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 3209 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 3210 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 3211 PrevLBVal = CGF.EmitScalarConversion( 3212 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 3213 LS.getIterationVariable()->getType(), 3214 LS.getPrevLowerBoundVariable()->getExprLoc()); 3215 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 3216 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 3217 PrevUBVal = CGF.EmitScalarConversion( 3218 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 3219 LS.getIterationVariable()->getType(), 3220 LS.getPrevUpperBoundVariable()->getExprLoc()); 3221 3222 CGF.EmitStoreOfScalar(PrevLBVal, LB); 3223 CGF.EmitStoreOfScalar(PrevUBVal, UB); 3224 3225 return {LB, UB}; 3226 } 3227 3228 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 3229 /// we need to use the LB and UB expressions generated by the worksharing 3230 /// code generation support, whereas in non combined situations we would 3231 /// just emit 0 and the LastIteration expression 3232 /// This function is necessary due to the difference of the LB and UB 3233 /// types for the RT emission routines for 'for_static_init' and 3234 /// 'for_dispatch_init' 3235 static std::pair<llvm::Value *, llvm::Value *> 3236 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 3237 const OMPExecutableDirective &S, 3238 Address LB, Address UB) { 3239 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 3240 const Expr *IVExpr = LS.getIterationVariable(); 3241 // when implementing a dynamic schedule for a 'for' combined with a 3242 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 3243 // is not normalized as each team only executes its own assigned 3244 // distribute chunk 3245 QualType IteratorTy = IVExpr->getType(); 3246 llvm::Value *LBVal = 3247 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3248 llvm::Value *UBVal = 3249 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 3250 return {LBVal, UBVal}; 3251 } 3252 3253 static void emitDistributeParallelForDistributeInnerBoundParams( 3254 CodeGenFunction &CGF, const OMPExecutableDirective &S, 3255 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 3256 const auto &Dir = cast<OMPLoopDirective>(S); 3257 LValue LB = 3258 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 3259 llvm::Value *LBCast = CGF.Builder.CreateIntCast( 3260 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 3261 CapturedVars.push_back(LBCast); 3262 LValue UB = 3263 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 3264 3265 llvm::Value *UBCast = CGF.Builder.CreateIntCast( 3266 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 3267 CapturedVars.push_back(UBCast); 3268 } 3269 3270 static void 3271 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 3272 const OMPLoopDirective &S, 3273 CodeGenFunction::JumpDest LoopExit) { 3274 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3275 auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF, 3276 PrePostActionTy &Action) { 3277 Action.Enter(CGF); 3278 bool HasCancel = false; 3279 if (!isOpenMPSimdDirective(EKind)) { 3280 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 3281 HasCancel = D->hasCancel(); 3282 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 3283 HasCancel = D->hasCancel(); 3284 else if (const auto *D = 3285 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 3286 HasCancel = D->hasCancel(); 3287 } 3288 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); 3289 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 3290 emitDistributeParallelForInnerBounds, 3291 emitDistributeParallelForDispatchBounds); 3292 }; 3293 3294 emitCommonOMPParallelDirective( 3295 CGF, S, isOpenMPSimdDirective(EKind) ? OMPD_for_simd : OMPD_for, 3296 CGInlinedWorksharingLoop, 3297 emitDistributeParallelForDistributeInnerBoundParams); 3298 } 3299 3300 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 3301 const OMPDistributeParallelForDirective &S) { 3302 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3303 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3304 S.getDistInc()); 3305 }; 3306 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3307 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3308 } 3309 3310 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 3311 const OMPDistributeParallelForSimdDirective &S) { 3312 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3313 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 3314 S.getDistInc()); 3315 }; 3316 OMPLexicalScope Scope(*this, S, OMPD_parallel); 3317 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3318 } 3319 3320 void CodeGenFunction::EmitOMPDistributeSimdDirective( 3321 const OMPDistributeSimdDirective &S) { 3322 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3323 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3324 }; 3325 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3326 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 3327 } 3328 3329 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 3330 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 3331 // Emit SPMD target parallel for region as a standalone region. 3332 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3333 emitOMPSimdRegion(CGF, S, Action); 3334 }; 3335 llvm::Function *Fn; 3336 llvm::Constant *Addr; 3337 // Emit target region as a standalone region. 3338 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 3339 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 3340 assert(Fn && Addr && "Target device function emission failed."); 3341 } 3342 3343 void CodeGenFunction::EmitOMPTargetSimdDirective( 3344 const OMPTargetSimdDirective &S) { 3345 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3346 emitOMPSimdRegion(CGF, S, Action); 3347 }; 3348 emitCommonOMPTargetDirective(*this, S, CodeGen); 3349 } 3350 3351 namespace { 3352 struct ScheduleKindModifiersTy { 3353 OpenMPScheduleClauseKind Kind; 3354 OpenMPScheduleClauseModifier M1; 3355 OpenMPScheduleClauseModifier M2; 3356 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 3357 OpenMPScheduleClauseModifier M1, 3358 OpenMPScheduleClauseModifier M2) 3359 : Kind(Kind), M1(M1), M2(M2) {} 3360 }; 3361 } // namespace 3362 3363 bool CodeGenFunction::EmitOMPWorksharingLoop( 3364 const OMPLoopDirective &S, Expr *EUB, 3365 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 3366 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 3367 // Emit the loop iteration variable. 3368 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3369 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3370 EmitVarDecl(*IVDecl); 3371 3372 // Emit the iterations count variable. 3373 // If it is not a variable, Sema decided to calculate iterations count on each 3374 // iteration (e.g., it is foldable into a constant). 3375 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3376 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3377 // Emit calculation of the iterations count. 3378 EmitIgnoredExpr(S.getCalcLastIteration()); 3379 } 3380 3381 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3382 3383 bool HasLastprivateClause; 3384 // Check pre-condition. 3385 { 3386 OMPLoopScope PreInitScope(*this, S); 3387 // Skip the entire loop if we don't meet the precondition. 3388 // If the condition constant folds and can be elided, avoid emitting the 3389 // whole loop. 3390 bool CondConstant; 3391 llvm::BasicBlock *ContBlock = nullptr; 3392 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3393 if (!CondConstant) 3394 return false; 3395 } else { 3396 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3397 ContBlock = createBasicBlock("omp.precond.end"); 3398 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3399 getProfileCount(&S)); 3400 EmitBlock(ThenBlock); 3401 incrementProfileCounter(&S); 3402 } 3403 3404 RunCleanupsScope DoacrossCleanupScope(*this); 3405 bool Ordered = false; 3406 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 3407 if (OrderedClause->getNumForLoops()) 3408 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 3409 else 3410 Ordered = true; 3411 } 3412 3413 llvm::DenseSet<const Expr *> EmittedFinals; 3414 emitAlignedClause(*this, S); 3415 bool HasLinears = EmitOMPLinearClauseInit(S); 3416 // Emit helper vars inits. 3417 3418 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 3419 LValue LB = Bounds.first; 3420 LValue UB = Bounds.second; 3421 LValue ST = 3422 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3423 LValue IL = 3424 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3425 3426 // Emit 'then' code. 3427 { 3428 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3429 OMPPrivateScope LoopScope(*this); 3430 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 3431 // Emit implicit barrier to synchronize threads and avoid data races on 3432 // initialization of firstprivate variables and post-update of 3433 // lastprivate variables. 3434 CGM.getOpenMPRuntime().emitBarrierCall( 3435 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3436 /*ForceSimpleCall=*/true); 3437 } 3438 EmitOMPPrivateClause(S, LoopScope); 3439 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( 3440 *this, S, EmitLValue(S.getIterationVariable())); 3441 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3442 EmitOMPReductionClauseInit(S, LoopScope); 3443 EmitOMPPrivateLoopCounters(S, LoopScope); 3444 EmitOMPLinearClause(S, LoopScope); 3445 (void)LoopScope.Privatize(); 3446 if (isOpenMPTargetExecutionDirective(EKind)) 3447 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3448 3449 // Detect the loop schedule kind and chunk. 3450 const Expr *ChunkExpr = nullptr; 3451 OpenMPScheduleTy ScheduleKind; 3452 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 3453 ScheduleKind.Schedule = C->getScheduleKind(); 3454 ScheduleKind.M1 = C->getFirstScheduleModifier(); 3455 ScheduleKind.M2 = C->getSecondScheduleModifier(); 3456 ChunkExpr = C->getChunkSize(); 3457 } else { 3458 // Default behaviour for schedule clause. 3459 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 3460 *this, S, ScheduleKind.Schedule, ChunkExpr); 3461 } 3462 bool HasChunkSizeOne = false; 3463 llvm::Value *Chunk = nullptr; 3464 if (ChunkExpr) { 3465 Chunk = EmitScalarExpr(ChunkExpr); 3466 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 3467 S.getIterationVariable()->getType(), 3468 S.getBeginLoc()); 3469 Expr::EvalResult Result; 3470 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 3471 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 3472 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 3473 } 3474 } 3475 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3476 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3477 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 3478 // If the static schedule kind is specified or if the ordered clause is 3479 // specified, and if no monotonic modifier is specified, the effect will 3480 // be as if the monotonic modifier was specified. 3481 bool StaticChunkedOne = 3482 RT.isStaticChunked(ScheduleKind.Schedule, 3483 /* Chunked */ Chunk != nullptr) && 3484 HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(EKind); 3485 bool IsMonotonic = 3486 Ordered || 3487 (ScheduleKind.Schedule == OMPC_SCHEDULE_static && 3488 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || 3489 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || 3490 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 3491 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 3492 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3493 /* Chunked */ Chunk != nullptr) || 3494 StaticChunkedOne) && 3495 !Ordered) { 3496 JumpDest LoopExit = 3497 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3498 emitCommonSimdLoop( 3499 *this, S, 3500 [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) { 3501 if (isOpenMPSimdDirective(EKind)) { 3502 CGF.EmitOMPSimdInit(S); 3503 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3504 if (C->getKind() == OMPC_ORDER_concurrent) 3505 CGF.LoopStack.setParallel(/*Enable=*/true); 3506 } 3507 }, 3508 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, 3509 &S, ScheduleKind, LoopExit, EKind, 3510 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 3511 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 3512 // When no chunk_size is specified, the iteration space is divided 3513 // into chunks that are approximately equal in size, and at most 3514 // one chunk is distributed to each thread. Note that the size of 3515 // the chunks is unspecified in this case. 3516 CGOpenMPRuntime::StaticRTInput StaticInit( 3517 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 3518 UB.getAddress(), ST.getAddress(), 3519 StaticChunkedOne ? Chunk : nullptr); 3520 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 3521 CGF, S.getBeginLoc(), EKind, ScheduleKind, StaticInit); 3522 // UB = min(UB, GlobalUB); 3523 if (!StaticChunkedOne) 3524 CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); 3525 // IV = LB; 3526 CGF.EmitIgnoredExpr(S.getInit()); 3527 // For unchunked static schedule generate: 3528 // 3529 // while (idx <= UB) { 3530 // BODY; 3531 // ++idx; 3532 // } 3533 // 3534 // For static schedule with chunk one: 3535 // 3536 // while (IV <= PrevUB) { 3537 // BODY; 3538 // IV += ST; 3539 // } 3540 CGF.EmitOMPInnerLoop( 3541 S, LoopScope.requiresCleanups(), 3542 StaticChunkedOne ? S.getCombinedParForInDistCond() 3543 : S.getCond(), 3544 StaticChunkedOne ? S.getDistInc() : S.getInc(), 3545 [&S, LoopExit](CodeGenFunction &CGF) { 3546 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); 3547 }, 3548 [](CodeGenFunction &) {}); 3549 }); 3550 EmitBlock(LoopExit.getBlock()); 3551 // Tell the runtime we are done. 3552 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 3553 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3554 OMPD_for); 3555 }; 3556 OMPCancelStack.emitExit(*this, EKind, CodeGen); 3557 } else { 3558 // Emit the outer loop, which requests its work chunk [LB..UB] from 3559 // runtime and runs the inner loop to process it. 3560 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 3561 ST.getAddress(), IL.getAddress(), Chunk, 3562 EUB); 3563 LoopArguments.DKind = OMPD_for; 3564 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 3565 LoopArguments, CGDispatchBounds); 3566 } 3567 if (isOpenMPSimdDirective(EKind)) { 3568 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3569 return CGF.Builder.CreateIsNotNull( 3570 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3571 }); 3572 } 3573 EmitOMPReductionClauseFinal( 3574 S, /*ReductionKind=*/isOpenMPSimdDirective(EKind) 3575 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 3576 : /*Parallel only*/ OMPD_parallel); 3577 // Emit post-update of the reduction variables if IsLastIter != 0. 3578 emitPostUpdateForReductionClause( 3579 *this, S, [IL, &S](CodeGenFunction &CGF) { 3580 return CGF.Builder.CreateIsNotNull( 3581 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3582 }); 3583 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3584 if (HasLastprivateClause) 3585 EmitOMPLastprivateClauseFinal( 3586 S, isOpenMPSimdDirective(EKind), 3587 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3588 LoopScope.restoreMap(); 3589 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 3590 return CGF.Builder.CreateIsNotNull( 3591 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3592 }); 3593 } 3594 DoacrossCleanupScope.ForceCleanup(); 3595 // We're now done with the loop, so jump to the continuation block. 3596 if (ContBlock) { 3597 EmitBranch(ContBlock); 3598 EmitBlock(ContBlock, /*IsFinished=*/true); 3599 } 3600 } 3601 return HasLastprivateClause; 3602 } 3603 3604 /// The following two functions generate expressions for the loop lower 3605 /// and upper bounds in case of static and dynamic (dispatch) schedule 3606 /// of the associated 'for' or 'distribute' loop. 3607 static std::pair<LValue, LValue> 3608 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3609 const auto &LS = cast<OMPLoopDirective>(S); 3610 LValue LB = 3611 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 3612 LValue UB = 3613 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 3614 return {LB, UB}; 3615 } 3616 3617 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 3618 /// consider the lower and upper bound expressions generated by the 3619 /// worksharing loop support, but we use 0 and the iteration space size as 3620 /// constants 3621 static std::pair<llvm::Value *, llvm::Value *> 3622 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 3623 Address LB, Address UB) { 3624 const auto &LS = cast<OMPLoopDirective>(S); 3625 const Expr *IVExpr = LS.getIterationVariable(); 3626 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 3627 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3628 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3629 return {LBVal, UBVal}; 3630 } 3631 3632 /// Emits internal temp array declarations for the directive with inscan 3633 /// reductions. 3634 /// The code is the following: 3635 /// \code 3636 /// size num_iters = <num_iters>; 3637 /// <type> buffer[num_iters]; 3638 /// \endcode 3639 static void emitScanBasedDirectiveDecls( 3640 CodeGenFunction &CGF, const OMPLoopDirective &S, 3641 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3642 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3643 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3644 SmallVector<const Expr *, 4> Shareds; 3645 SmallVector<const Expr *, 4> Privates; 3646 SmallVector<const Expr *, 4> ReductionOps; 3647 SmallVector<const Expr *, 4> CopyArrayTemps; 3648 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3649 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3650 "Only inscan reductions are expected."); 3651 Shareds.append(C->varlist_begin(), C->varlist_end()); 3652 Privates.append(C->privates().begin(), C->privates().end()); 3653 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3654 CopyArrayTemps.append(C->copy_array_temps().begin(), 3655 C->copy_array_temps().end()); 3656 } 3657 { 3658 // Emit buffers for each reduction variables. 3659 // ReductionCodeGen is required to emit correctly the code for array 3660 // reductions. 3661 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); 3662 unsigned Count = 0; 3663 auto *ITA = CopyArrayTemps.begin(); 3664 for (const Expr *IRef : Privates) { 3665 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 3666 // Emit variably modified arrays, used for arrays/array sections 3667 // reductions. 3668 if (PrivateVD->getType()->isVariablyModifiedType()) { 3669 RedCG.emitSharedOrigLValue(CGF, Count); 3670 RedCG.emitAggregateType(CGF, Count); 3671 } 3672 CodeGenFunction::OpaqueValueMapping DimMapping( 3673 CGF, 3674 cast<OpaqueValueExpr>( 3675 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) 3676 ->getSizeExpr()), 3677 RValue::get(OMPScanNumIterations)); 3678 // Emit temp buffer. 3679 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); 3680 ++ITA; 3681 ++Count; 3682 } 3683 } 3684 } 3685 3686 /// Copies final inscan reductions values to the original variables. 3687 /// The code is the following: 3688 /// \code 3689 /// <orig_var> = buffer[num_iters-1]; 3690 /// \endcode 3691 static void emitScanBasedDirectiveFinals( 3692 CodeGenFunction &CGF, const OMPLoopDirective &S, 3693 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { 3694 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3695 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3696 SmallVector<const Expr *, 4> Shareds; 3697 SmallVector<const Expr *, 4> LHSs; 3698 SmallVector<const Expr *, 4> RHSs; 3699 SmallVector<const Expr *, 4> Privates; 3700 SmallVector<const Expr *, 4> CopyOps; 3701 SmallVector<const Expr *, 4> CopyArrayElems; 3702 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3703 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3704 "Only inscan reductions are expected."); 3705 Shareds.append(C->varlist_begin(), C->varlist_end()); 3706 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3707 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3708 Privates.append(C->privates().begin(), C->privates().end()); 3709 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 3710 CopyArrayElems.append(C->copy_array_elems().begin(), 3711 C->copy_array_elems().end()); 3712 } 3713 // Create temp var and copy LHS value to this temp value. 3714 // LHS = TMP[LastIter]; 3715 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( 3716 OMPScanNumIterations, 3717 llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); 3718 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 3719 const Expr *PrivateExpr = Privates[I]; 3720 const Expr *OrigExpr = Shareds[I]; 3721 const Expr *CopyArrayElem = CopyArrayElems[I]; 3722 CodeGenFunction::OpaqueValueMapping IdxMapping( 3723 CGF, 3724 cast<OpaqueValueExpr>( 3725 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3726 RValue::get(OMPLast)); 3727 LValue DestLVal = CGF.EmitLValue(OrigExpr); 3728 LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); 3729 CGF.EmitOMPCopy( 3730 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 3731 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 3732 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 3733 } 3734 } 3735 3736 /// Emits the code for the directive with inscan reductions. 3737 /// The code is the following: 3738 /// \code 3739 /// #pragma omp ... 3740 /// for (i: 0..<num_iters>) { 3741 /// <input phase>; 3742 /// buffer[i] = red; 3743 /// } 3744 /// #pragma omp master // in parallel region 3745 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) 3746 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) 3747 /// buffer[i] op= buffer[i-pow(2,k)]; 3748 /// #pragma omp barrier // in parallel region 3749 /// #pragma omp ... 3750 /// for (0..<num_iters>) { 3751 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; 3752 /// <scan phase>; 3753 /// } 3754 /// \endcode 3755 static void emitScanBasedDirective( 3756 CodeGenFunction &CGF, const OMPLoopDirective &S, 3757 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, 3758 llvm::function_ref<void(CodeGenFunction &)> FirstGen, 3759 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { 3760 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( 3761 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); 3762 SmallVector<const Expr *, 4> Privates; 3763 SmallVector<const Expr *, 4> ReductionOps; 3764 SmallVector<const Expr *, 4> LHSs; 3765 SmallVector<const Expr *, 4> RHSs; 3766 SmallVector<const Expr *, 4> CopyArrayElems; 3767 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 3768 assert(C->getModifier() == OMPC_REDUCTION_inscan && 3769 "Only inscan reductions are expected."); 3770 Privates.append(C->privates().begin(), C->privates().end()); 3771 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 3772 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 3773 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 3774 CopyArrayElems.append(C->copy_array_elems().begin(), 3775 C->copy_array_elems().end()); 3776 } 3777 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); 3778 { 3779 // Emit loop with input phase: 3780 // #pragma omp ... 3781 // for (i: 0..<num_iters>) { 3782 // <input phase>; 3783 // buffer[i] = red; 3784 // } 3785 CGF.OMPFirstScanLoop = true; 3786 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3787 FirstGen(CGF); 3788 } 3789 // #pragma omp barrier // in parallel region 3790 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, 3791 &ReductionOps, 3792 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { 3793 Action.Enter(CGF); 3794 // Emit prefix reduction: 3795 // #pragma omp master // in parallel region 3796 // for (int k = 0; k <= ceil(log2(n)); ++k) 3797 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); 3798 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); 3799 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); 3800 llvm::Function *F = 3801 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); 3802 llvm::Value *Arg = 3803 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); 3804 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); 3805 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); 3806 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); 3807 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); 3808 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( 3809 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3810 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); 3811 CGF.EmitBlock(LoopBB); 3812 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); 3813 // size pow2k = 1; 3814 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3815 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); 3816 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); 3817 // for (size i = n - 1; i >= 2 ^ k; --i) 3818 // tmp[i] op= tmp[i-pow2k]; 3819 llvm::BasicBlock *InnerLoopBB = 3820 CGF.createBasicBlock("omp.inner.log.scan.body"); 3821 llvm::BasicBlock *InnerExitBB = 3822 CGF.createBasicBlock("omp.inner.log.scan.exit"); 3823 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); 3824 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3825 CGF.EmitBlock(InnerLoopBB); 3826 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); 3827 IVal->addIncoming(NMin1, LoopBB); 3828 { 3829 CodeGenFunction::OMPPrivateScope PrivScope(CGF); 3830 auto *ILHS = LHSs.begin(); 3831 auto *IRHS = RHSs.begin(); 3832 for (const Expr *CopyArrayElem : CopyArrayElems) { 3833 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 3834 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 3835 Address LHSAddr = Address::invalid(); 3836 { 3837 CodeGenFunction::OpaqueValueMapping IdxMapping( 3838 CGF, 3839 cast<OpaqueValueExpr>( 3840 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3841 RValue::get(IVal)); 3842 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(); 3843 } 3844 PrivScope.addPrivate(LHSVD, LHSAddr); 3845 Address RHSAddr = Address::invalid(); 3846 { 3847 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); 3848 CodeGenFunction::OpaqueValueMapping IdxMapping( 3849 CGF, 3850 cast<OpaqueValueExpr>( 3851 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 3852 RValue::get(OffsetIVal)); 3853 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(); 3854 } 3855 PrivScope.addPrivate(RHSVD, RHSAddr); 3856 ++ILHS; 3857 ++IRHS; 3858 } 3859 PrivScope.Privatize(); 3860 CGF.CGM.getOpenMPRuntime().emitReduction( 3861 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 3862 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); 3863 } 3864 llvm::Value *NextIVal = 3865 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); 3866 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); 3867 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); 3868 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); 3869 CGF.EmitBlock(InnerExitBB); 3870 llvm::Value *Next = 3871 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); 3872 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); 3873 // pow2k <<= 1; 3874 llvm::Value *NextPow2K = 3875 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); 3876 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); 3877 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); 3878 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); 3879 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); 3880 CGF.EmitBlock(ExitBB); 3881 }; 3882 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3883 if (isOpenMPParallelDirective(EKind)) { 3884 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3885 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 3886 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3887 /*ForceSimpleCall=*/true); 3888 } else { 3889 RegionCodeGenTy RCG(CodeGen); 3890 RCG(CGF); 3891 } 3892 3893 CGF.OMPFirstScanLoop = false; 3894 SecondGen(CGF); 3895 } 3896 3897 static bool emitWorksharingDirective(CodeGenFunction &CGF, 3898 const OMPLoopDirective &S, 3899 bool HasCancel) { 3900 bool HasLastprivates; 3901 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 3902 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 3903 [](const OMPReductionClause *C) { 3904 return C->getModifier() == OMPC_REDUCTION_inscan; 3905 })) { 3906 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 3907 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 3908 OMPLoopScope LoopScope(CGF, S); 3909 return CGF.EmitScalarExpr(S.getNumIterations()); 3910 }; 3911 const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) { 3912 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); 3913 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3914 emitForLoopBounds, 3915 emitDispatchForLoopBounds); 3916 // Emit an implicit barrier at the end. 3917 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), 3918 OMPD_for); 3919 }; 3920 const auto &&SecondGen = [&S, HasCancel, EKind, 3921 &HasLastprivates](CodeGenFunction &CGF) { 3922 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); 3923 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3924 emitForLoopBounds, 3925 emitDispatchForLoopBounds); 3926 }; 3927 if (!isOpenMPParallelDirective(EKind)) 3928 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); 3929 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); 3930 if (!isOpenMPParallelDirective(EKind)) 3931 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); 3932 } else { 3933 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel); 3934 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3935 emitForLoopBounds, 3936 emitDispatchForLoopBounds); 3937 } 3938 return HasLastprivates; 3939 } 3940 3941 // Pass OMPLoopDirective (instead of OMPForDirective) to make this check 3942 // available for "loop bind(parallel)", which maps to "for". 3943 static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S, 3944 bool HasCancel) { 3945 if (HasCancel) 3946 return false; 3947 for (OMPClause *C : S.clauses()) { 3948 if (isa<OMPNowaitClause, OMPBindClause>(C)) 3949 continue; 3950 3951 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) { 3952 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 3953 return false; 3954 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) 3955 return false; 3956 switch (SC->getScheduleKind()) { 3957 case OMPC_SCHEDULE_auto: 3958 case OMPC_SCHEDULE_dynamic: 3959 case OMPC_SCHEDULE_runtime: 3960 case OMPC_SCHEDULE_guided: 3961 case OMPC_SCHEDULE_static: 3962 continue; 3963 case OMPC_SCHEDULE_unknown: 3964 return false; 3965 } 3966 } 3967 3968 return false; 3969 } 3970 3971 return true; 3972 } 3973 3974 static llvm::omp::ScheduleKind 3975 convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { 3976 switch (ScheduleClauseKind) { 3977 case OMPC_SCHEDULE_unknown: 3978 return llvm::omp::OMP_SCHEDULE_Default; 3979 case OMPC_SCHEDULE_auto: 3980 return llvm::omp::OMP_SCHEDULE_Auto; 3981 case OMPC_SCHEDULE_dynamic: 3982 return llvm::omp::OMP_SCHEDULE_Dynamic; 3983 case OMPC_SCHEDULE_guided: 3984 return llvm::omp::OMP_SCHEDULE_Guided; 3985 case OMPC_SCHEDULE_runtime: 3986 return llvm::omp::OMP_SCHEDULE_Runtime; 3987 case OMPC_SCHEDULE_static: 3988 return llvm::omp::OMP_SCHEDULE_Static; 3989 } 3990 llvm_unreachable("Unhandled schedule kind"); 3991 } 3992 3993 // Pass OMPLoopDirective (instead of OMPForDirective) to make this function 3994 // available for "loop bind(parallel)", which maps to "for". 3995 static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF, 3996 CodeGenModule &CGM, bool HasCancel) { 3997 bool HasLastprivates = false; 3998 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder && 3999 isForSupportedByOpenMPIRBuilder(S, HasCancel); 4000 auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates, 4001 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { 4002 // Use the OpenMPIRBuilder if enabled. 4003 if (UseOMPIRBuilder) { 4004 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); 4005 4006 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; 4007 llvm::Value *ChunkSize = nullptr; 4008 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { 4009 SchedKind = 4010 convertClauseKindToSchedKind(SchedClause->getScheduleKind()); 4011 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) 4012 ChunkSize = CGF.EmitScalarExpr(ChunkSizeExpr); 4013 } 4014 4015 // Emit the associated statement and get its loop representation. 4016 const Stmt *Inner = S.getRawStmt(); 4017 llvm::CanonicalLoopInfo *CLI = 4018 CGF.EmitOMPCollapsedCanonicalLoopNest(Inner, 1); 4019 4020 llvm::OpenMPIRBuilder &OMPBuilder = 4021 CGM.getOpenMPRuntime().getOMPBuilder(); 4022 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 4023 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); 4024 cantFail(OMPBuilder.applyWorkshareLoop( 4025 CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, 4026 SchedKind, ChunkSize, /*HasSimdModifier=*/false, 4027 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, 4028 /*HasOrderedClause=*/false)); 4029 return; 4030 } 4031 4032 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel); 4033 }; 4034 { 4035 auto LPCRegion = 4036 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 4037 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 4038 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_for, CodeGen, 4039 HasCancel); 4040 } 4041 4042 if (!UseOMPIRBuilder) { 4043 // Emit an implicit barrier at the end. 4044 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 4045 CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), OMPD_for); 4046 } 4047 // Check for outer lastprivate conditional update. 4048 checkForLastprivateConditionalUpdate(CGF, S); 4049 } 4050 4051 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 4052 return emitOMPForDirective(S, *this, CGM, S.hasCancel()); 4053 } 4054 4055 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 4056 bool HasLastprivates = false; 4057 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 4058 PrePostActionTy &) { 4059 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4060 }; 4061 { 4062 auto LPCRegion = 4063 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4064 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4065 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 4066 } 4067 4068 // Emit an implicit barrier at the end. 4069 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 4070 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 4071 // Check for outer lastprivate conditional update. 4072 checkForLastprivateConditionalUpdate(*this, S); 4073 } 4074 4075 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 4076 const Twine &Name, 4077 llvm::Value *Init = nullptr) { 4078 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 4079 if (Init) 4080 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 4081 return LVal; 4082 } 4083 4084 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 4085 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 4086 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 4087 bool HasLastprivates = false; 4088 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 4089 auto &&CodeGen = [&S, CapturedStmt, CS, EKind, 4090 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 4091 const ASTContext &C = CGF.getContext(); 4092 QualType KmpInt32Ty = 4093 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4094 // Emit helper vars inits. 4095 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 4096 CGF.Builder.getInt32(0)); 4097 llvm::ConstantInt *GlobalUBVal = CS != nullptr 4098 ? CGF.Builder.getInt32(CS->size() - 1) 4099 : CGF.Builder.getInt32(0); 4100 LValue UB = 4101 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 4102 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 4103 CGF.Builder.getInt32(1)); 4104 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 4105 CGF.Builder.getInt32(0)); 4106 // Loop counter. 4107 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 4108 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 4109 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 4110 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 4111 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 4112 // Generate condition for loop. 4113 BinaryOperator *Cond = BinaryOperator::Create( 4114 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, 4115 S.getBeginLoc(), FPOptionsOverride()); 4116 // Increment for loop counter. 4117 UnaryOperator *Inc = UnaryOperator::Create( 4118 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, 4119 S.getBeginLoc(), true, FPOptionsOverride()); 4120 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 4121 // Iterate through all sections and emit a switch construct: 4122 // switch (IV) { 4123 // case 0: 4124 // <SectionStmt[0]>; 4125 // break; 4126 // ... 4127 // case <NumSection> - 1: 4128 // <SectionStmt[<NumSection> - 1]>; 4129 // break; 4130 // } 4131 // .omp.sections.exit: 4132 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 4133 llvm::SwitchInst *SwitchStmt = 4134 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 4135 ExitBB, CS == nullptr ? 1 : CS->size()); 4136 if (CS) { 4137 unsigned CaseNumber = 0; 4138 for (const Stmt *SubStmt : CS->children()) { 4139 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 4140 CGF.EmitBlock(CaseBB); 4141 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 4142 CGF.EmitStmt(SubStmt); 4143 CGF.EmitBranch(ExitBB); 4144 ++CaseNumber; 4145 } 4146 } else { 4147 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 4148 CGF.EmitBlock(CaseBB); 4149 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 4150 CGF.EmitStmt(CapturedStmt); 4151 CGF.EmitBranch(ExitBB); 4152 } 4153 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 4154 }; 4155 4156 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 4157 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 4158 // Emit implicit barrier to synchronize threads and avoid data races on 4159 // initialization of firstprivate variables and post-update of lastprivate 4160 // variables. 4161 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 4162 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 4163 /*ForceSimpleCall=*/true); 4164 } 4165 CGF.EmitOMPPrivateClause(S, LoopScope); 4166 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); 4167 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 4168 CGF.EmitOMPReductionClauseInit(S, LoopScope); 4169 (void)LoopScope.Privatize(); 4170 if (isOpenMPTargetExecutionDirective(EKind)) 4171 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4172 4173 // Emit static non-chunked loop. 4174 OpenMPScheduleTy ScheduleKind; 4175 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 4176 CGOpenMPRuntime::StaticRTInput StaticInit( 4177 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 4178 LB.getAddress(), UB.getAddress(), ST.getAddress()); 4179 CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, S.getBeginLoc(), EKind, 4180 ScheduleKind, StaticInit); 4181 // UB = min(UB, GlobalUB); 4182 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 4183 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 4184 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 4185 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 4186 // IV = LB; 4187 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 4188 // while (idx <= UB) { BODY; ++idx; } 4189 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, 4190 [](CodeGenFunction &) {}); 4191 // Tell the runtime we are done. 4192 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 4193 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 4194 OMPD_sections); 4195 }; 4196 CGF.OMPCancelStack.emitExit(CGF, EKind, CodeGen); 4197 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4198 // Emit post-update of the reduction variables if IsLastIter != 0. 4199 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 4200 return CGF.Builder.CreateIsNotNull( 4201 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 4202 }); 4203 4204 // Emit final copy of the lastprivate variables if IsLastIter != 0. 4205 if (HasLastprivates) 4206 CGF.EmitOMPLastprivateClauseFinal( 4207 S, /*NoFinals=*/false, 4208 CGF.Builder.CreateIsNotNull( 4209 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 4210 }; 4211 4212 bool HasCancel = false; 4213 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 4214 HasCancel = OSD->hasCancel(); 4215 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 4216 HasCancel = OPSD->hasCancel(); 4217 OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel); 4218 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 4219 HasCancel); 4220 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 4221 // clause. Otherwise the barrier will be generated by the codegen for the 4222 // directive. 4223 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 4224 // Emit implicit barrier to synchronize threads and avoid data races on 4225 // initialization of firstprivate variables. 4226 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4227 OMPD_unknown); 4228 } 4229 } 4230 4231 void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) { 4232 { 4233 // Emit code for 'scope' region 4234 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4235 Action.Enter(CGF); 4236 OMPPrivateScope PrivateScope(CGF); 4237 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4238 CGF.EmitOMPPrivateClause(S, PrivateScope); 4239 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4240 (void)PrivateScope.Privatize(); 4241 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4242 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4243 }; 4244 auto LPCRegion = 4245 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4246 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4247 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_scope, CodeGen); 4248 } 4249 // Emit an implicit barrier at the end. 4250 if (!S.getSingleClause<OMPNowaitClause>()) { 4251 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_scope); 4252 } 4253 // Check for outer lastprivate conditional update. 4254 checkForLastprivateConditionalUpdate(*this, S); 4255 } 4256 4257 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 4258 if (CGM.getLangOpts().OpenMPIRBuilder) { 4259 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4260 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4261 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 4262 4263 auto FiniCB = [this](InsertPointTy IP) { 4264 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4265 return llvm::Error::success(); 4266 }; 4267 4268 const CapturedStmt *ICS = S.getInnermostCapturedStmt(); 4269 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 4270 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 4271 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 4272 if (CS) { 4273 for (const Stmt *SubStmt : CS->children()) { 4274 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, 4275 InsertPointTy CodeGenIP) { 4276 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4277 *this, SubStmt, AllocaIP, CodeGenIP, "section"); 4278 return llvm::Error::success(); 4279 }; 4280 SectionCBVector.push_back(SectionCB); 4281 } 4282 } else { 4283 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, 4284 InsertPointTy CodeGenIP) { 4285 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4286 *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); 4287 return llvm::Error::success(); 4288 }; 4289 SectionCBVector.push_back(SectionCB); 4290 } 4291 4292 // Privatization callback that performs appropriate action for 4293 // shared/private/firstprivate/lastprivate/copyin/... variables. 4294 // 4295 // TODO: This defaults to shared right now. 4296 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 4297 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 4298 // The next line is appropriate only for variables (Val) with the 4299 // data-sharing attribute "shared". 4300 ReplVal = &Val; 4301 4302 return CodeGenIP; 4303 }; 4304 4305 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); 4306 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 4307 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 4308 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); 4309 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 4310 cantFail(OMPBuilder.createSections( 4311 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), 4312 S.getSingleClause<OMPNowaitClause>())); 4313 Builder.restoreIP(AfterIP); 4314 return; 4315 } 4316 { 4317 auto LPCRegion = 4318 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4319 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4320 EmitSections(S); 4321 } 4322 // Emit an implicit barrier at the end. 4323 if (!S.getSingleClause<OMPNowaitClause>()) { 4324 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 4325 OMPD_sections); 4326 } 4327 // Check for outer lastprivate conditional update. 4328 checkForLastprivateConditionalUpdate(*this, S); 4329 } 4330 4331 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 4332 if (CGM.getLangOpts().OpenMPIRBuilder) { 4333 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4334 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4335 4336 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); 4337 auto FiniCB = [this](InsertPointTy IP) { 4338 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4339 return llvm::Error::success(); 4340 }; 4341 4342 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, 4343 InsertPointTy CodeGenIP) { 4344 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4345 *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); 4346 return llvm::Error::success(); 4347 }; 4348 4349 LexicalScope Scope(*this, S.getSourceRange()); 4350 EmitStopPoint(&S); 4351 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 4352 cantFail(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); 4353 Builder.restoreIP(AfterIP); 4354 4355 return; 4356 } 4357 LexicalScope Scope(*this, S.getSourceRange()); 4358 EmitStopPoint(&S); 4359 EmitStmt(S.getAssociatedStmt()); 4360 } 4361 4362 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 4363 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 4364 llvm::SmallVector<const Expr *, 8> DestExprs; 4365 llvm::SmallVector<const Expr *, 8> SrcExprs; 4366 llvm::SmallVector<const Expr *, 8> AssignmentOps; 4367 // Check if there are any 'copyprivate' clauses associated with this 4368 // 'single' construct. 4369 // Build a list of copyprivate variables along with helper expressions 4370 // (<source>, <destination>, <destination>=<source> expressions) 4371 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 4372 CopyprivateVars.append(C->varlist_begin(), C->varlist_end()); 4373 DestExprs.append(C->destination_exprs().begin(), 4374 C->destination_exprs().end()); 4375 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 4376 AssignmentOps.append(C->assignment_ops().begin(), 4377 C->assignment_ops().end()); 4378 } 4379 // Emit code for 'single' region along with 'copyprivate' clauses 4380 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4381 Action.Enter(CGF); 4382 OMPPrivateScope SingleScope(CGF); 4383 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 4384 CGF.EmitOMPPrivateClause(S, SingleScope); 4385 (void)SingleScope.Privatize(); 4386 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4387 }; 4388 { 4389 auto LPCRegion = 4390 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4391 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4392 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 4393 CopyprivateVars, DestExprs, 4394 SrcExprs, AssignmentOps); 4395 } 4396 // Emit an implicit barrier at the end (to avoid data race on firstprivate 4397 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 4398 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 4399 CGM.getOpenMPRuntime().emitBarrierCall( 4400 *this, S.getBeginLoc(), 4401 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 4402 } 4403 // Check for outer lastprivate conditional update. 4404 checkForLastprivateConditionalUpdate(*this, S); 4405 } 4406 4407 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4408 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4409 Action.Enter(CGF); 4410 CGF.EmitStmt(S.getRawStmt()); 4411 }; 4412 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 4413 } 4414 4415 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 4416 if (CGM.getLangOpts().OpenMPIRBuilder) { 4417 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4418 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4419 4420 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); 4421 4422 auto FiniCB = [this](InsertPointTy IP) { 4423 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4424 return llvm::Error::success(); 4425 }; 4426 4427 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, 4428 InsertPointTy CodeGenIP) { 4429 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4430 *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); 4431 return llvm::Error::success(); 4432 }; 4433 4434 LexicalScope Scope(*this, S.getSourceRange()); 4435 EmitStopPoint(&S); 4436 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 4437 cantFail(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 4438 Builder.restoreIP(AfterIP); 4439 4440 return; 4441 } 4442 LexicalScope Scope(*this, S.getSourceRange()); 4443 EmitStopPoint(&S); 4444 emitMaster(*this, S); 4445 } 4446 4447 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 4448 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4449 Action.Enter(CGF); 4450 CGF.EmitStmt(S.getRawStmt()); 4451 }; 4452 Expr *Filter = nullptr; 4453 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4454 Filter = FilterClause->getThreadID(); 4455 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), 4456 Filter); 4457 } 4458 4459 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { 4460 if (CGM.getLangOpts().OpenMPIRBuilder) { 4461 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4462 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4463 4464 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); 4465 const Expr *Filter = nullptr; 4466 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) 4467 Filter = FilterClause->getThreadID(); 4468 llvm::Value *FilterVal = Filter 4469 ? EmitScalarExpr(Filter, CGM.Int32Ty) 4470 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 4471 4472 auto FiniCB = [this](InsertPointTy IP) { 4473 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4474 return llvm::Error::success(); 4475 }; 4476 4477 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, 4478 InsertPointTy CodeGenIP) { 4479 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4480 *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); 4481 return llvm::Error::success(); 4482 }; 4483 4484 LexicalScope Scope(*this, S.getSourceRange()); 4485 EmitStopPoint(&S); 4486 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( 4487 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); 4488 Builder.restoreIP(AfterIP); 4489 4490 return; 4491 } 4492 LexicalScope Scope(*this, S.getSourceRange()); 4493 EmitStopPoint(&S); 4494 emitMasked(*this, S); 4495 } 4496 4497 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 4498 if (CGM.getLangOpts().OpenMPIRBuilder) { 4499 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 4500 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4501 4502 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); 4503 const Expr *Hint = nullptr; 4504 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4505 Hint = HintClause->getHint(); 4506 4507 // TODO: This is slightly different from what's currently being done in 4508 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything 4509 // about typing is final. 4510 llvm::Value *HintInst = nullptr; 4511 if (Hint) 4512 HintInst = 4513 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); 4514 4515 auto FiniCB = [this](InsertPointTy IP) { 4516 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 4517 return llvm::Error::success(); 4518 }; 4519 4520 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, 4521 InsertPointTy CodeGenIP) { 4522 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 4523 *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); 4524 return llvm::Error::success(); 4525 }; 4526 4527 LexicalScope Scope(*this, S.getSourceRange()); 4528 EmitStopPoint(&S); 4529 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 4530 cantFail(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB, 4531 S.getDirectiveName().getAsString(), 4532 HintInst)); 4533 Builder.restoreIP(AfterIP); 4534 4535 return; 4536 } 4537 4538 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4539 Action.Enter(CGF); 4540 CGF.EmitStmt(S.getAssociatedStmt()); 4541 }; 4542 const Expr *Hint = nullptr; 4543 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4544 Hint = HintClause->getHint(); 4545 LexicalScope Scope(*this, S.getSourceRange()); 4546 EmitStopPoint(&S); 4547 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 4548 S.getDirectiveName().getAsString(), 4549 CodeGen, S.getBeginLoc(), Hint); 4550 } 4551 4552 void CodeGenFunction::EmitOMPParallelForDirective( 4553 const OMPParallelForDirective &S) { 4554 // Emit directive as a combined directive that consists of two implicit 4555 // directives: 'parallel' with 'for' directive. 4556 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4557 Action.Enter(CGF); 4558 emitOMPCopyinClause(CGF, S); 4559 (void)emitWorksharingDirective(CGF, S, S.hasCancel()); 4560 }; 4561 { 4562 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4563 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4564 CGCapturedStmtInfo CGSI(CR_OpenMP); 4565 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4566 OMPLoopScope LoopScope(CGF, S); 4567 return CGF.EmitScalarExpr(S.getNumIterations()); 4568 }; 4569 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4570 [](const OMPReductionClause *C) { 4571 return C->getModifier() == OMPC_REDUCTION_inscan; 4572 }); 4573 if (IsInscan) 4574 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4575 auto LPCRegion = 4576 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4577 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 4578 emitEmptyBoundParameters); 4579 if (IsInscan) 4580 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); 4581 } 4582 // Check for outer lastprivate conditional update. 4583 checkForLastprivateConditionalUpdate(*this, S); 4584 } 4585 4586 void CodeGenFunction::EmitOMPParallelForSimdDirective( 4587 const OMPParallelForSimdDirective &S) { 4588 // Emit directive as a combined directive that consists of two implicit 4589 // directives: 'parallel' with 'for' directive. 4590 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4591 Action.Enter(CGF); 4592 emitOMPCopyinClause(CGF, S); 4593 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 4594 }; 4595 { 4596 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { 4597 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); 4598 CGCapturedStmtInfo CGSI(CR_OpenMP); 4599 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); 4600 OMPLoopScope LoopScope(CGF, S); 4601 return CGF.EmitScalarExpr(S.getNumIterations()); 4602 }; 4603 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), 4604 [](const OMPReductionClause *C) { 4605 return C->getModifier() == OMPC_REDUCTION_inscan; 4606 }); 4607 if (IsInscan) 4608 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); 4609 auto LPCRegion = 4610 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4611 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, 4612 emitEmptyBoundParameters); 4613 if (IsInscan) 4614 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); 4615 } 4616 // Check for outer lastprivate conditional update. 4617 checkForLastprivateConditionalUpdate(*this, S); 4618 } 4619 4620 void CodeGenFunction::EmitOMPParallelMasterDirective( 4621 const OMPParallelMasterDirective &S) { 4622 // Emit directive as a combined directive that consists of two implicit 4623 // directives: 'parallel' with 'master' directive. 4624 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4625 Action.Enter(CGF); 4626 OMPPrivateScope PrivateScope(CGF); 4627 emitOMPCopyinClause(CGF, S); 4628 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4629 CGF.EmitOMPPrivateClause(S, PrivateScope); 4630 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4631 (void)PrivateScope.Privatize(); 4632 emitMaster(CGF, S); 4633 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4634 }; 4635 { 4636 auto LPCRegion = 4637 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4638 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, 4639 emitEmptyBoundParameters); 4640 emitPostUpdateForReductionClause(*this, S, 4641 [](CodeGenFunction &) { return nullptr; }); 4642 } 4643 // Check for outer lastprivate conditional update. 4644 checkForLastprivateConditionalUpdate(*this, S); 4645 } 4646 4647 void CodeGenFunction::EmitOMPParallelMaskedDirective( 4648 const OMPParallelMaskedDirective &S) { 4649 // Emit directive as a combined directive that consists of two implicit 4650 // directives: 'parallel' with 'masked' directive. 4651 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4652 Action.Enter(CGF); 4653 OMPPrivateScope PrivateScope(CGF); 4654 emitOMPCopyinClause(CGF, S); 4655 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4656 CGF.EmitOMPPrivateClause(S, PrivateScope); 4657 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4658 (void)PrivateScope.Privatize(); 4659 emitMasked(CGF, S); 4660 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4661 }; 4662 { 4663 auto LPCRegion = 4664 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4665 emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, 4666 emitEmptyBoundParameters); 4667 emitPostUpdateForReductionClause(*this, S, 4668 [](CodeGenFunction &) { return nullptr; }); 4669 } 4670 // Check for outer lastprivate conditional update. 4671 checkForLastprivateConditionalUpdate(*this, S); 4672 } 4673 4674 void CodeGenFunction::EmitOMPParallelSectionsDirective( 4675 const OMPParallelSectionsDirective &S) { 4676 // Emit directive as a combined directive that consists of two implicit 4677 // directives: 'parallel' with 'sections' directive. 4678 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4679 Action.Enter(CGF); 4680 emitOMPCopyinClause(CGF, S); 4681 CGF.EmitSections(S); 4682 }; 4683 { 4684 auto LPCRegion = 4685 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4686 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 4687 emitEmptyBoundParameters); 4688 } 4689 // Check for outer lastprivate conditional update. 4690 checkForLastprivateConditionalUpdate(*this, S); 4691 } 4692 4693 namespace { 4694 /// Get the list of variables declared in the context of the untied tasks. 4695 class CheckVarsEscapingUntiedTaskDeclContext final 4696 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { 4697 llvm::SmallVector<const VarDecl *, 4> PrivateDecls; 4698 4699 public: 4700 explicit CheckVarsEscapingUntiedTaskDeclContext() = default; 4701 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; 4702 void VisitDeclStmt(const DeclStmt *S) { 4703 if (!S) 4704 return; 4705 // Need to privatize only local vars, static locals can be processed as is. 4706 for (const Decl *D : S->decls()) { 4707 if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) 4708 if (VD->hasLocalStorage()) 4709 PrivateDecls.push_back(VD); 4710 } 4711 } 4712 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} 4713 void VisitCapturedStmt(const CapturedStmt *) {} 4714 void VisitLambdaExpr(const LambdaExpr *) {} 4715 void VisitBlockExpr(const BlockExpr *) {} 4716 void VisitStmt(const Stmt *S) { 4717 if (!S) 4718 return; 4719 for (const Stmt *Child : S->children()) 4720 if (Child) 4721 Visit(Child); 4722 } 4723 4724 /// Swaps list of vars with the provided one. 4725 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } 4726 }; 4727 } // anonymous namespace 4728 4729 static void buildDependences(const OMPExecutableDirective &S, 4730 OMPTaskDataTy &Data) { 4731 4732 // First look for 'omp_all_memory' and add this first. 4733 bool OmpAllMemory = false; 4734 if (llvm::any_of( 4735 S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) { 4736 return C->getDependencyKind() == OMPC_DEPEND_outallmemory || 4737 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; 4738 })) { 4739 OmpAllMemory = true; 4740 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are 4741 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to 4742 // simplify. 4743 OMPTaskDataTy::DependData &DD = 4744 Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory, 4745 /*IteratorExpr=*/nullptr); 4746 // Add a nullptr Expr to simplify the codegen in emitDependData. 4747 DD.DepExprs.push_back(nullptr); 4748 } 4749 // Add remaining dependences skipping any 'out' or 'inout' if they are 4750 // overridden by 'omp_all_memory'. 4751 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4752 OpenMPDependClauseKind Kind = C->getDependencyKind(); 4753 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) 4754 continue; 4755 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) 4756 continue; 4757 OMPTaskDataTy::DependData &DD = 4758 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4759 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4760 } 4761 } 4762 4763 void CodeGenFunction::EmitOMPTaskBasedDirective( 4764 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 4765 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 4766 OMPTaskDataTy &Data) { 4767 // Emit outlined function for task construct. 4768 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 4769 auto I = CS->getCapturedDecl()->param_begin(); 4770 auto PartId = std::next(I); 4771 auto TaskT = std::next(I, 4); 4772 // Check if the task is final 4773 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 4774 // If the condition constant folds and can be elided, try to avoid emitting 4775 // the condition and the dead arm of the if/else. 4776 const Expr *Cond = Clause->getCondition(); 4777 bool CondConstant; 4778 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 4779 Data.Final.setInt(CondConstant); 4780 else 4781 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 4782 } else { 4783 // By default the task is not final. 4784 Data.Final.setInt(/*IntVal=*/false); 4785 } 4786 // Check if the task has 'priority' clause. 4787 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 4788 const Expr *Prio = Clause->getPriority(); 4789 Data.Priority.setInt(/*IntVal=*/true); 4790 Data.Priority.setPointer(EmitScalarConversion( 4791 EmitScalarExpr(Prio), Prio->getType(), 4792 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 4793 Prio->getExprLoc())); 4794 } 4795 // The first function argument for tasks is a thread id, the second one is a 4796 // part id (0 for tied tasks, >=0 for untied task). 4797 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 4798 // Get list of private variables. 4799 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 4800 auto IRef = C->varlist_begin(); 4801 for (const Expr *IInit : C->private_copies()) { 4802 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4803 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4804 Data.PrivateVars.push_back(*IRef); 4805 Data.PrivateCopies.push_back(IInit); 4806 } 4807 ++IRef; 4808 } 4809 } 4810 EmittedAsPrivate.clear(); 4811 // Get list of firstprivate variables. 4812 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 4813 auto IRef = C->varlist_begin(); 4814 auto IElemInitRef = C->inits().begin(); 4815 for (const Expr *IInit : C->private_copies()) { 4816 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4817 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4818 Data.FirstprivateVars.push_back(*IRef); 4819 Data.FirstprivateCopies.push_back(IInit); 4820 Data.FirstprivateInits.push_back(*IElemInitRef); 4821 } 4822 ++IRef; 4823 ++IElemInitRef; 4824 } 4825 } 4826 // Get list of lastprivate variables (for taskloops). 4827 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 4828 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 4829 auto IRef = C->varlist_begin(); 4830 auto ID = C->destination_exprs().begin(); 4831 for (const Expr *IInit : C->private_copies()) { 4832 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4833 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4834 Data.LastprivateVars.push_back(*IRef); 4835 Data.LastprivateCopies.push_back(IInit); 4836 } 4837 LastprivateDstsOrigs.insert( 4838 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 4839 cast<DeclRefExpr>(*IRef))); 4840 ++IRef; 4841 ++ID; 4842 } 4843 } 4844 SmallVector<const Expr *, 4> LHSs; 4845 SmallVector<const Expr *, 4> RHSs; 4846 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 4847 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 4848 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 4849 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 4850 Data.ReductionOps.append(C->reduction_ops().begin(), 4851 C->reduction_ops().end()); 4852 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 4853 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 4854 } 4855 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 4856 *this, S.getBeginLoc(), LHSs, RHSs, Data); 4857 // Build list of dependences. 4858 buildDependences(S, Data); 4859 // Get list of local vars for untied tasks. 4860 if (!Data.Tied) { 4861 CheckVarsEscapingUntiedTaskDeclContext Checker; 4862 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); 4863 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), 4864 Checker.getPrivateDecls().end()); 4865 } 4866 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 4867 CapturedRegion](CodeGenFunction &CGF, 4868 PrePostActionTy &Action) { 4869 llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 4870 std::pair<Address, Address>> 4871 UntiedLocalVars; 4872 // Set proper addresses for generated private copies. 4873 OMPPrivateScope Scope(CGF); 4874 // Generate debug info for variables present in shared clause. 4875 if (auto *DI = CGF.getDebugInfo()) { 4876 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = 4877 CGF.CapturedStmtInfo->getCaptureFields(); 4878 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); 4879 if (CaptureFields.size() && ContextValue) { 4880 unsigned CharWidth = CGF.getContext().getCharWidth(); 4881 // The shared variables are packed together as members of structure. 4882 // So the address of each shared variable can be computed by adding 4883 // offset of it (within record) to the base address of record. For each 4884 // shared variable, debug intrinsic llvm.dbg.declare is generated with 4885 // appropriate expressions (DIExpression). 4886 // Ex: 4887 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i 4888 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4889 // metadata !svar1, 4890 // metadata !DIExpression(DW_OP_deref)) 4891 // call void @llvm.dbg.declare(metadata %struct.anon* %12, 4892 // metadata !svar2, 4893 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) 4894 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { 4895 const VarDecl *SharedVar = It->first; 4896 RecordDecl *CaptureRecord = It->second->getParent(); 4897 const ASTRecordLayout &Layout = 4898 CGF.getContext().getASTRecordLayout(CaptureRecord); 4899 unsigned Offset = 4900 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; 4901 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 4902 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, 4903 CGF.Builder, false); 4904 // Get the call dbg.declare instruction we just created and update 4905 // its DIExpression to add offset to base address. 4906 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare, 4907 unsigned Offset) { 4908 SmallVector<uint64_t, 8> Ops; 4909 // Add offset to the base address if non zero. 4910 if (Offset) { 4911 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); 4912 Ops.push_back(Offset); 4913 } 4914 Ops.push_back(llvm::dwarf::DW_OP_deref); 4915 Declare->setExpression(llvm::DIExpression::get(Ctx, Ops)); 4916 }; 4917 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); 4918 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) 4919 UpdateExpr(DDI->getContext(), DDI, Offset); 4920 // If we're emitting using the new debug info format into a block 4921 // without a terminator, the record will be "trailing". 4922 assert(!Last.isTerminator() && "unexpected terminator"); 4923 if (auto *Marker = 4924 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) { 4925 for (llvm::DbgVariableRecord &DVR : llvm::reverse( 4926 llvm::filterDbgVars(Marker->getDbgRecordRange()))) { 4927 UpdateExpr(Last.getContext(), &DVR, Offset); 4928 break; 4929 } 4930 } 4931 } 4932 } 4933 } 4934 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 4935 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 4936 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { 4937 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4938 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4939 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4940 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4941 CS->getCapturedDecl()->getParam(PrivatesParam))); 4942 // Map privates. 4943 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4944 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4945 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 4946 CallArgs.push_back(PrivatesPtr); 4947 ParamTypes.push_back(PrivatesPtr->getType()); 4948 for (const Expr *E : Data.PrivateVars) { 4949 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4950 RawAddress PrivatePtr = CGF.CreateMemTemp( 4951 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 4952 PrivatePtrs.emplace_back(VD, PrivatePtr); 4953 CallArgs.push_back(PrivatePtr.getPointer()); 4954 ParamTypes.push_back(PrivatePtr.getType()); 4955 } 4956 for (const Expr *E : Data.FirstprivateVars) { 4957 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4958 RawAddress PrivatePtr = 4959 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4960 ".firstpriv.ptr.addr"); 4961 PrivatePtrs.emplace_back(VD, PrivatePtr); 4962 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 4963 CallArgs.push_back(PrivatePtr.getPointer()); 4964 ParamTypes.push_back(PrivatePtr.getType()); 4965 } 4966 for (const Expr *E : Data.LastprivateVars) { 4967 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4968 RawAddress PrivatePtr = 4969 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4970 ".lastpriv.ptr.addr"); 4971 PrivatePtrs.emplace_back(VD, PrivatePtr); 4972 CallArgs.push_back(PrivatePtr.getPointer()); 4973 ParamTypes.push_back(PrivatePtr.getType()); 4974 } 4975 for (const VarDecl *VD : Data.PrivateLocals) { 4976 QualType Ty = VD->getType().getNonReferenceType(); 4977 if (VD->getType()->isLValueReferenceType()) 4978 Ty = CGF.getContext().getPointerType(Ty); 4979 if (isAllocatableDecl(VD)) 4980 Ty = CGF.getContext().getPointerType(Ty); 4981 RawAddress PrivatePtr = CGF.CreateMemTemp( 4982 CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); 4983 auto Result = UntiedLocalVars.insert( 4984 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); 4985 // If key exists update in place. 4986 if (Result.second == false) 4987 *Result.first = std::make_pair( 4988 VD, std::make_pair(PrivatePtr, Address::invalid())); 4989 CallArgs.push_back(PrivatePtr.getPointer()); 4990 ParamTypes.push_back(PrivatePtr.getType()); 4991 } 4992 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 4993 ParamTypes, /*isVarArg=*/false); 4994 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4995 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4996 for (const auto &Pair : LastprivateDstsOrigs) { 4997 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 4998 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 4999 /*RefersToEnclosingVariableOrCapture=*/ 5000 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 5001 Pair.second->getType(), VK_LValue, 5002 Pair.second->getExprLoc()); 5003 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress()); 5004 } 5005 for (const auto &Pair : PrivatePtrs) { 5006 Address Replacement = Address( 5007 CGF.Builder.CreateLoad(Pair.second), 5008 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 5009 CGF.getContext().getDeclAlign(Pair.first)); 5010 Scope.addPrivate(Pair.first, Replacement); 5011 if (auto *DI = CGF.getDebugInfo()) 5012 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) 5013 (void)DI->EmitDeclareOfAutoVariable( 5014 Pair.first, Pair.second.getBasePointer(), CGF.Builder, 5015 /*UsePointerValue*/ true); 5016 } 5017 // Adjust mapping for internal locals by mapping actual memory instead of 5018 // a pointer to this memory. 5019 for (auto &Pair : UntiedLocalVars) { 5020 QualType VDType = Pair.first->getType().getNonReferenceType(); 5021 if (Pair.first->getType()->isLValueReferenceType()) 5022 VDType = CGF.getContext().getPointerType(VDType); 5023 if (isAllocatableDecl(Pair.first)) { 5024 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 5025 Address Replacement( 5026 Ptr, 5027 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)), 5028 CGF.getPointerAlign()); 5029 Pair.second.first = Replacement; 5030 Ptr = CGF.Builder.CreateLoad(Replacement); 5031 Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType), 5032 CGF.getContext().getDeclAlign(Pair.first)); 5033 Pair.second.second = Replacement; 5034 } else { 5035 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); 5036 Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType), 5037 CGF.getContext().getDeclAlign(Pair.first)); 5038 Pair.second.first = Replacement; 5039 } 5040 } 5041 } 5042 if (Data.Reductions) { 5043 OMPPrivateScope FirstprivateScope(CGF); 5044 for (const auto &Pair : FirstprivatePtrs) { 5045 Address Replacement( 5046 CGF.Builder.CreateLoad(Pair.second), 5047 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 5048 CGF.getContext().getDeclAlign(Pair.first)); 5049 FirstprivateScope.addPrivate(Pair.first, Replacement); 5050 } 5051 (void)FirstprivateScope.Privatize(); 5052 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 5053 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 5054 Data.ReductionCopies, Data.ReductionOps); 5055 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 5056 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 5057 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 5058 RedCG.emitSharedOrigLValue(CGF, Cnt); 5059 RedCG.emitAggregateType(CGF, Cnt); 5060 // FIXME: This must removed once the runtime library is fixed. 5061 // Emit required threadprivate variables for 5062 // initializer/combiner/finalizer. 5063 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5064 RedCG, Cnt); 5065 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5066 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5067 Replacement = Address( 5068 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF), 5069 CGF.getContext().VoidPtrTy, 5070 CGF.getContext().getPointerType( 5071 Data.ReductionCopies[Cnt]->getType()), 5072 Data.ReductionCopies[Cnt]->getExprLoc()), 5073 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), 5074 Replacement.getAlignment()); 5075 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5076 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5077 } 5078 } 5079 // Privatize all private variables except for in_reduction items. 5080 (void)Scope.Privatize(); 5081 SmallVector<const Expr *, 4> InRedVars; 5082 SmallVector<const Expr *, 4> InRedPrivs; 5083 SmallVector<const Expr *, 4> InRedOps; 5084 SmallVector<const Expr *, 4> TaskgroupDescriptors; 5085 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5086 auto IPriv = C->privates().begin(); 5087 auto IRed = C->reduction_ops().begin(); 5088 auto ITD = C->taskgroup_descriptors().begin(); 5089 for (const Expr *Ref : C->varlist()) { 5090 InRedVars.emplace_back(Ref); 5091 InRedPrivs.emplace_back(*IPriv); 5092 InRedOps.emplace_back(*IRed); 5093 TaskgroupDescriptors.emplace_back(*ITD); 5094 std::advance(IPriv, 1); 5095 std::advance(IRed, 1); 5096 std::advance(ITD, 1); 5097 } 5098 } 5099 // Privatize in_reduction items here, because taskgroup descriptors must be 5100 // privatized earlier. 5101 OMPPrivateScope InRedScope(CGF); 5102 if (!InRedVars.empty()) { 5103 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 5104 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 5105 RedCG.emitSharedOrigLValue(CGF, Cnt); 5106 RedCG.emitAggregateType(CGF, Cnt); 5107 // The taskgroup descriptor variable is always implicit firstprivate and 5108 // privatized already during processing of the firstprivates. 5109 // FIXME: This must removed once the runtime library is fixed. 5110 // Emit required threadprivate variables for 5111 // initializer/combiner/finalizer. 5112 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5113 RedCG, Cnt); 5114 llvm::Value *ReductionsPtr; 5115 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 5116 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), 5117 TRExpr->getExprLoc()); 5118 } else { 5119 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5120 } 5121 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5122 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5123 Replacement = Address( 5124 CGF.EmitScalarConversion( 5125 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy, 5126 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 5127 InRedPrivs[Cnt]->getExprLoc()), 5128 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), 5129 Replacement.getAlignment()); 5130 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5131 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5132 } 5133 } 5134 (void)InRedScope.Privatize(); 5135 5136 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, 5137 UntiedLocalVars); 5138 Action.Enter(CGF); 5139 BodyGen(CGF); 5140 }; 5141 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 5142 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 5143 S, *I, *PartId, *TaskT, EKind, CodeGen, Data.Tied, Data.NumberOfParts); 5144 OMPLexicalScope Scope(*this, S, std::nullopt, 5145 !isOpenMPParallelDirective(EKind) && 5146 !isOpenMPSimdDirective(EKind)); 5147 TaskGen(*this, OutlinedFn, Data); 5148 } 5149 5150 static ImplicitParamDecl * 5151 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 5152 QualType Ty, CapturedDecl *CD, 5153 SourceLocation Loc) { 5154 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 5155 ImplicitParamKind::Other); 5156 auto *OrigRef = DeclRefExpr::Create( 5157 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 5158 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 5159 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 5160 ImplicitParamKind::Other); 5161 auto *PrivateRef = DeclRefExpr::Create( 5162 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 5163 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 5164 QualType ElemType = C.getBaseElementType(Ty); 5165 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 5166 ImplicitParamKind::Other); 5167 auto *InitRef = DeclRefExpr::Create( 5168 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 5169 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 5170 PrivateVD->setInitStyle(VarDecl::CInit); 5171 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 5172 InitRef, /*BasePath=*/nullptr, 5173 VK_PRValue, FPOptionsOverride())); 5174 Data.FirstprivateVars.emplace_back(OrigRef); 5175 Data.FirstprivateCopies.emplace_back(PrivateRef); 5176 Data.FirstprivateInits.emplace_back(InitRef); 5177 return OrigVD; 5178 } 5179 5180 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 5181 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 5182 OMPTargetDataInfo &InputInfo) { 5183 // Emit outlined function for task construct. 5184 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 5185 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5186 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5187 auto I = CS->getCapturedDecl()->param_begin(); 5188 auto PartId = std::next(I); 5189 auto TaskT = std::next(I, 4); 5190 OMPTaskDataTy Data; 5191 // The task is not final. 5192 Data.Final.setInt(/*IntVal=*/false); 5193 // Get list of firstprivate variables. 5194 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 5195 auto IRef = C->varlist_begin(); 5196 auto IElemInitRef = C->inits().begin(); 5197 for (auto *IInit : C->private_copies()) { 5198 Data.FirstprivateVars.push_back(*IRef); 5199 Data.FirstprivateCopies.push_back(IInit); 5200 Data.FirstprivateInits.push_back(*IElemInitRef); 5201 ++IRef; 5202 ++IElemInitRef; 5203 } 5204 } 5205 SmallVector<const Expr *, 4> LHSs; 5206 SmallVector<const Expr *, 4> RHSs; 5207 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5208 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 5209 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 5210 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5211 Data.ReductionOps.append(C->reduction_ops().begin(), 5212 C->reduction_ops().end()); 5213 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5214 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5215 } 5216 OMPPrivateScope TargetScope(*this); 5217 VarDecl *BPVD = nullptr; 5218 VarDecl *PVD = nullptr; 5219 VarDecl *SVD = nullptr; 5220 VarDecl *MVD = nullptr; 5221 if (InputInfo.NumberOfTargetItems > 0) { 5222 auto *CD = CapturedDecl::Create( 5223 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 5224 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 5225 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( 5226 getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal, 5227 /*IndexTypeQuals=*/0); 5228 BPVD = createImplicitFirstprivateForType( 5229 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5230 PVD = createImplicitFirstprivateForType( 5231 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5232 QualType SizesType = getContext().getConstantArrayType( 5233 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 5234 ArrSize, nullptr, ArraySizeModifier::Normal, 5235 /*IndexTypeQuals=*/0); 5236 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 5237 S.getBeginLoc()); 5238 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray); 5239 TargetScope.addPrivate(PVD, InputInfo.PointersArray); 5240 TargetScope.addPrivate(SVD, InputInfo.SizesArray); 5241 // If there is no user-defined mapper, the mapper array will be nullptr. In 5242 // this case, we don't need to privatize it. 5243 if (!isa_and_nonnull<llvm::ConstantPointerNull>( 5244 InputInfo.MappersArray.emitRawPointer(*this))) { 5245 MVD = createImplicitFirstprivateForType( 5246 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); 5247 TargetScope.addPrivate(MVD, InputInfo.MappersArray); 5248 } 5249 } 5250 (void)TargetScope.Privatize(); 5251 buildDependences(S, Data); 5252 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 5253 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind, 5254 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 5255 // Set proper addresses for generated private copies. 5256 OMPPrivateScope Scope(CGF); 5257 if (!Data.FirstprivateVars.empty()) { 5258 enum { PrivatesParam = 2, CopyFnParam = 3 }; 5259 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 5260 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 5261 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 5262 CS->getCapturedDecl()->getParam(PrivatesParam))); 5263 // Map privates. 5264 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 5265 llvm::SmallVector<llvm::Value *, 16> CallArgs; 5266 llvm::SmallVector<llvm::Type *, 4> ParamTypes; 5267 CallArgs.push_back(PrivatesPtr); 5268 ParamTypes.push_back(PrivatesPtr->getType()); 5269 for (const Expr *E : Data.FirstprivateVars) { 5270 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5271 RawAddress PrivatePtr = 5272 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 5273 ".firstpriv.ptr.addr"); 5274 PrivatePtrs.emplace_back(VD, PrivatePtr); 5275 CallArgs.push_back(PrivatePtr.getPointer()); 5276 ParamTypes.push_back(PrivatePtr.getType()); 5277 } 5278 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), 5279 ParamTypes, /*isVarArg=*/false); 5280 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 5281 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 5282 for (const auto &Pair : PrivatePtrs) { 5283 Address Replacement( 5284 CGF.Builder.CreateLoad(Pair.second), 5285 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), 5286 CGF.getContext().getDeclAlign(Pair.first)); 5287 Scope.addPrivate(Pair.first, Replacement); 5288 } 5289 } 5290 CGF.processInReduction(S, Data, CGF, CS, Scope); 5291 if (InputInfo.NumberOfTargetItems > 0) { 5292 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 5293 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 5294 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 5295 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 5296 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 5297 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 5298 // If MVD is nullptr, the mapper array is not privatized 5299 if (MVD) 5300 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( 5301 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); 5302 } 5303 5304 Action.Enter(CGF); 5305 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 5306 auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 5307 if (CGF.CGM.getLangOpts().OpenMP >= 51 && 5308 needsTaskBasedThreadLimit(EKind) && TL) { 5309 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task 5310 // enclosing this target region. This will indirectly set the thread_limit 5311 // for every applicable construct within target region. 5312 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause( 5313 CGF, TL->getThreadLimit().front(), S.getBeginLoc()); 5314 } 5315 BodyGen(CGF); 5316 }; 5317 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 5318 S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true, 5319 Data.NumberOfParts); 5320 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 5321 IntegerLiteral IfCond(getContext(), TrueOrFalse, 5322 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 5323 SourceLocation()); 5324 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 5325 SharedsTy, CapturedStruct, &IfCond, Data); 5326 } 5327 5328 void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, 5329 OMPTaskDataTy &Data, 5330 CodeGenFunction &CGF, 5331 const CapturedStmt *CS, 5332 OMPPrivateScope &Scope) { 5333 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S); 5334 if (Data.Reductions) { 5335 OpenMPDirectiveKind CapturedRegion = EKind; 5336 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 5337 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, 5338 Data.ReductionCopies, Data.ReductionOps); 5339 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 5340 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4))); 5341 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 5342 RedCG.emitSharedOrigLValue(CGF, Cnt); 5343 RedCG.emitAggregateType(CGF, Cnt); 5344 // FIXME: This must removed once the runtime library is fixed. 5345 // Emit required threadprivate variables for 5346 // initializer/combiner/finalizer. 5347 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5348 RedCG, Cnt); 5349 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5350 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5351 Replacement = Address( 5352 CGF.EmitScalarConversion(Replacement.emitRawPointer(CGF), 5353 CGF.getContext().VoidPtrTy, 5354 CGF.getContext().getPointerType( 5355 Data.ReductionCopies[Cnt]->getType()), 5356 Data.ReductionCopies[Cnt]->getExprLoc()), 5357 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), 5358 Replacement.getAlignment()); 5359 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5360 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5361 } 5362 } 5363 (void)Scope.Privatize(); 5364 SmallVector<const Expr *, 4> InRedVars; 5365 SmallVector<const Expr *, 4> InRedPrivs; 5366 SmallVector<const Expr *, 4> InRedOps; 5367 SmallVector<const Expr *, 4> TaskgroupDescriptors; 5368 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 5369 auto IPriv = C->privates().begin(); 5370 auto IRed = C->reduction_ops().begin(); 5371 auto ITD = C->taskgroup_descriptors().begin(); 5372 for (const Expr *Ref : C->varlist()) { 5373 InRedVars.emplace_back(Ref); 5374 InRedPrivs.emplace_back(*IPriv); 5375 InRedOps.emplace_back(*IRed); 5376 TaskgroupDescriptors.emplace_back(*ITD); 5377 std::advance(IPriv, 1); 5378 std::advance(IRed, 1); 5379 std::advance(ITD, 1); 5380 } 5381 } 5382 OMPPrivateScope InRedScope(CGF); 5383 if (!InRedVars.empty()) { 5384 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); 5385 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 5386 RedCG.emitSharedOrigLValue(CGF, Cnt); 5387 RedCG.emitAggregateType(CGF, Cnt); 5388 // FIXME: This must removed once the runtime library is fixed. 5389 // Emit required threadprivate variables for 5390 // initializer/combiner/finalizer. 5391 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 5392 RedCG, Cnt); 5393 llvm::Value *ReductionsPtr; 5394 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { 5395 ReductionsPtr = 5396 CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc()); 5397 } else { 5398 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5399 } 5400 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 5401 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 5402 Replacement = Address( 5403 CGF.EmitScalarConversion( 5404 Replacement.emitRawPointer(CGF), CGF.getContext().VoidPtrTy, 5405 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 5406 InRedPrivs[Cnt]->getExprLoc()), 5407 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), 5408 Replacement.getAlignment()); 5409 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 5410 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); 5411 } 5412 } 5413 (void)InRedScope.Privatize(); 5414 } 5415 5416 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 5417 // Emit outlined function for task construct. 5418 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 5419 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 5420 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 5421 const Expr *IfCond = nullptr; 5422 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 5423 if (C->getNameModifier() == OMPD_unknown || 5424 C->getNameModifier() == OMPD_task) { 5425 IfCond = C->getCondition(); 5426 break; 5427 } 5428 } 5429 5430 OMPTaskDataTy Data; 5431 // Check if we should emit tied or untied task. 5432 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 5433 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 5434 CGF.EmitStmt(CS->getCapturedStmt()); 5435 }; 5436 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 5437 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 5438 const OMPTaskDataTy &Data) { 5439 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 5440 SharedsTy, CapturedStruct, IfCond, 5441 Data); 5442 }; 5443 auto LPCRegion = 5444 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 5445 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 5446 } 5447 5448 void CodeGenFunction::EmitOMPTaskyieldDirective( 5449 const OMPTaskyieldDirective &S) { 5450 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 5451 } 5452 5453 void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { 5454 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); 5455 Expr *ME = MC ? MC->getMessageString() : nullptr; 5456 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); 5457 bool IsFatal = false; 5458 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) 5459 IsFatal = true; 5460 CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal); 5461 } 5462 5463 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 5464 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 5465 } 5466 5467 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 5468 OMPTaskDataTy Data; 5469 // Build list of dependences 5470 buildDependences(S, Data); 5471 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); 5472 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); 5473 } 5474 5475 static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { 5476 return T.clauses().empty(); 5477 } 5478 5479 void CodeGenFunction::EmitOMPTaskgroupDirective( 5480 const OMPTaskgroupDirective &S) { 5481 OMPLexicalScope Scope(*this, S, OMPD_unknown); 5482 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) { 5483 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 5484 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 5485 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 5486 AllocaInsertPt->getIterator()); 5487 5488 auto BodyGenCB = [&, this](InsertPointTy AllocaIP, 5489 InsertPointTy CodeGenIP) { 5490 Builder.restoreIP(CodeGenIP); 5491 EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5492 return llvm::Error::success(); 5493 }; 5494 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 5495 if (!CapturedStmtInfo) 5496 CapturedStmtInfo = &CapStmtInfo; 5497 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 5498 cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); 5499 Builder.restoreIP(AfterIP); 5500 return; 5501 } 5502 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5503 Action.Enter(CGF); 5504 if (const Expr *E = S.getReductionRef()) { 5505 SmallVector<const Expr *, 4> LHSs; 5506 SmallVector<const Expr *, 4> RHSs; 5507 OMPTaskDataTy Data; 5508 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 5509 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); 5510 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); 5511 Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); 5512 Data.ReductionOps.append(C->reduction_ops().begin(), 5513 C->reduction_ops().end()); 5514 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5515 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5516 } 5517 llvm::Value *ReductionDesc = 5518 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 5519 LHSs, RHSs, Data); 5520 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5521 CGF.EmitVarDecl(*VD); 5522 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 5523 /*Volatile=*/false, E->getType()); 5524 } 5525 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 5526 }; 5527 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 5528 } 5529 5530 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 5531 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() 5532 ? llvm::AtomicOrdering::NotAtomic 5533 : llvm::AtomicOrdering::AcquireRelease; 5534 CGM.getOpenMPRuntime().emitFlush( 5535 *this, 5536 [&S]() -> ArrayRef<const Expr *> { 5537 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 5538 return llvm::ArrayRef(FlushClause->varlist_begin(), 5539 FlushClause->varlist_end()); 5540 return {}; 5541 }(), 5542 S.getBeginLoc(), AO); 5543 } 5544 5545 void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { 5546 const auto *DO = S.getSingleClause<OMPDepobjClause>(); 5547 LValue DOLVal = EmitLValue(DO->getDepobj()); 5548 if (const auto *DC = S.getSingleClause<OMPDependClause>()) { 5549 // Build list and emit dependences 5550 OMPTaskDataTy Data; 5551 buildDependences(S, Data); 5552 for (auto &Dep : Data.Dependences) { 5553 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( 5554 *this, Dep, DC->getBeginLoc()); 5555 EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal); 5556 } 5557 return; 5558 } 5559 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { 5560 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); 5561 return; 5562 } 5563 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 5564 CGM.getOpenMPRuntime().emitUpdateClause( 5565 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 5566 return; 5567 } 5568 } 5569 5570 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { 5571 if (!OMPParentLoopDirectiveForScan) 5572 return; 5573 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; 5574 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); 5575 SmallVector<const Expr *, 4> Shareds; 5576 SmallVector<const Expr *, 4> Privates; 5577 SmallVector<const Expr *, 4> LHSs; 5578 SmallVector<const Expr *, 4> RHSs; 5579 SmallVector<const Expr *, 4> ReductionOps; 5580 SmallVector<const Expr *, 4> CopyOps; 5581 SmallVector<const Expr *, 4> CopyArrayTemps; 5582 SmallVector<const Expr *, 4> CopyArrayElems; 5583 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { 5584 if (C->getModifier() != OMPC_REDUCTION_inscan) 5585 continue; 5586 Shareds.append(C->varlist_begin(), C->varlist_end()); 5587 Privates.append(C->privates().begin(), C->privates().end()); 5588 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 5589 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 5590 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 5591 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); 5592 CopyArrayTemps.append(C->copy_array_temps().begin(), 5593 C->copy_array_temps().end()); 5594 CopyArrayElems.append(C->copy_array_elems().begin(), 5595 C->copy_array_elems().end()); 5596 } 5597 if (ParentDir.getDirectiveKind() == OMPD_simd || 5598 (getLangOpts().OpenMPSimd && 5599 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { 5600 // For simd directive and simd-based directives in simd only mode, use the 5601 // following codegen: 5602 // int x = 0; 5603 // #pragma omp simd reduction(inscan, +: x) 5604 // for (..) { 5605 // <first part> 5606 // #pragma omp scan inclusive(x) 5607 // <second part> 5608 // } 5609 // is transformed to: 5610 // int x = 0; 5611 // for (..) { 5612 // int x_priv = 0; 5613 // <first part> 5614 // x = x_priv + x; 5615 // x_priv = x; 5616 // <second part> 5617 // } 5618 // and 5619 // int x = 0; 5620 // #pragma omp simd reduction(inscan, +: x) 5621 // for (..) { 5622 // <first part> 5623 // #pragma omp scan exclusive(x) 5624 // <second part> 5625 // } 5626 // to 5627 // int x = 0; 5628 // for (..) { 5629 // int x_priv = 0; 5630 // <second part> 5631 // int temp = x; 5632 // x = x_priv + x; 5633 // x_priv = temp; 5634 // <first part> 5635 // } 5636 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); 5637 EmitBranch(IsInclusive 5638 ? OMPScanReduce 5639 : BreakContinueStack.back().ContinueBlock.getBlock()); 5640 EmitBlock(OMPScanDispatch); 5641 { 5642 // New scope for correct construction/destruction of temp variables for 5643 // exclusive scan. 5644 LexicalScope Scope(*this, S.getSourceRange()); 5645 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); 5646 EmitBlock(OMPScanReduce); 5647 if (!IsInclusive) { 5648 // Create temp var and copy LHS value to this temp value. 5649 // TMP = LHS; 5650 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5651 const Expr *PrivateExpr = Privates[I]; 5652 const Expr *TempExpr = CopyArrayTemps[I]; 5653 EmitAutoVarDecl( 5654 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); 5655 LValue DestLVal = EmitLValue(TempExpr); 5656 LValue SrcLVal = EmitLValue(LHSs[I]); 5657 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(), 5658 SrcLVal.getAddress(), 5659 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5660 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), 5661 CopyOps[I]); 5662 } 5663 } 5664 CGM.getOpenMPRuntime().emitReduction( 5665 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, 5666 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); 5667 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5668 const Expr *PrivateExpr = Privates[I]; 5669 LValue DestLVal; 5670 LValue SrcLVal; 5671 if (IsInclusive) { 5672 DestLVal = EmitLValue(RHSs[I]); 5673 SrcLVal = EmitLValue(LHSs[I]); 5674 } else { 5675 const Expr *TempExpr = CopyArrayTemps[I]; 5676 DestLVal = EmitLValue(RHSs[I]); 5677 SrcLVal = EmitLValue(TempExpr); 5678 } 5679 EmitOMPCopy( 5680 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5681 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5682 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5683 } 5684 } 5685 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); 5686 OMPScanExitBlock = IsInclusive 5687 ? BreakContinueStack.back().ContinueBlock.getBlock() 5688 : OMPScanReduce; 5689 EmitBlock(OMPAfterScanBlock); 5690 return; 5691 } 5692 if (!IsInclusive) { 5693 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5694 EmitBlock(OMPScanExitBlock); 5695 } 5696 if (OMPFirstScanLoop) { 5697 // Emit buffer[i] = red; at the end of the input phase. 5698 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5699 .getIterationVariable() 5700 ->IgnoreParenImpCasts(); 5701 LValue IdxLVal = EmitLValue(IVExpr); 5702 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5703 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5704 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5705 const Expr *PrivateExpr = Privates[I]; 5706 const Expr *OrigExpr = Shareds[I]; 5707 const Expr *CopyArrayElem = CopyArrayElems[I]; 5708 OpaqueValueMapping IdxMapping( 5709 *this, 5710 cast<OpaqueValueExpr>( 5711 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5712 RValue::get(IdxVal)); 5713 LValue DestLVal = EmitLValue(CopyArrayElem); 5714 LValue SrcLVal = EmitLValue(OrigExpr); 5715 EmitOMPCopy( 5716 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5717 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5718 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5719 } 5720 } 5721 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5722 if (IsInclusive) { 5723 EmitBlock(OMPScanExitBlock); 5724 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); 5725 } 5726 EmitBlock(OMPScanDispatch); 5727 if (!OMPFirstScanLoop) { 5728 // Emit red = buffer[i]; at the entrance to the scan phase. 5729 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) 5730 .getIterationVariable() 5731 ->IgnoreParenImpCasts(); 5732 LValue IdxLVal = EmitLValue(IVExpr); 5733 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); 5734 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); 5735 llvm::BasicBlock *ExclusiveExitBB = nullptr; 5736 if (!IsInclusive) { 5737 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); 5738 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); 5739 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); 5740 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); 5741 EmitBlock(ContBB); 5742 // Use idx - 1 iteration for exclusive scan. 5743 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); 5744 } 5745 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { 5746 const Expr *PrivateExpr = Privates[I]; 5747 const Expr *OrigExpr = Shareds[I]; 5748 const Expr *CopyArrayElem = CopyArrayElems[I]; 5749 OpaqueValueMapping IdxMapping( 5750 *this, 5751 cast<OpaqueValueExpr>( 5752 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), 5753 RValue::get(IdxVal)); 5754 LValue SrcLVal = EmitLValue(CopyArrayElem); 5755 LValue DestLVal = EmitLValue(OrigExpr); 5756 EmitOMPCopy( 5757 PrivateExpr->getType(), DestLVal.getAddress(), SrcLVal.getAddress(), 5758 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), 5759 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), CopyOps[I]); 5760 } 5761 if (!IsInclusive) { 5762 EmitBlock(ExclusiveExitBB); 5763 } 5764 } 5765 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock 5766 : OMPAfterScanBlock); 5767 EmitBlock(OMPAfterScanBlock); 5768 } 5769 5770 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 5771 const CodeGenLoopTy &CodeGenLoop, 5772 Expr *IncExpr) { 5773 // Emit the loop iteration variable. 5774 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 5775 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 5776 EmitVarDecl(*IVDecl); 5777 5778 // Emit the iterations count variable. 5779 // If it is not a variable, Sema decided to calculate iterations count on each 5780 // iteration (e.g., it is foldable into a constant). 5781 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5782 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5783 // Emit calculation of the iterations count. 5784 EmitIgnoredExpr(S.getCalcLastIteration()); 5785 } 5786 5787 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 5788 5789 bool HasLastprivateClause = false; 5790 // Check pre-condition. 5791 { 5792 OMPLoopScope PreInitScope(*this, S); 5793 // Skip the entire loop if we don't meet the precondition. 5794 // If the condition constant folds and can be elided, avoid emitting the 5795 // whole loop. 5796 bool CondConstant; 5797 llvm::BasicBlock *ContBlock = nullptr; 5798 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5799 if (!CondConstant) 5800 return; 5801 } else { 5802 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 5803 ContBlock = createBasicBlock("omp.precond.end"); 5804 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 5805 getProfileCount(&S)); 5806 EmitBlock(ThenBlock); 5807 incrementProfileCounter(&S); 5808 } 5809 5810 emitAlignedClause(*this, S); 5811 // Emit 'then' code. 5812 { 5813 // Emit helper vars inits. 5814 5815 LValue LB = EmitOMPHelperVar( 5816 *this, cast<DeclRefExpr>( 5817 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5818 ? S.getCombinedLowerBoundVariable() 5819 : S.getLowerBoundVariable()))); 5820 LValue UB = EmitOMPHelperVar( 5821 *this, cast<DeclRefExpr>( 5822 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5823 ? S.getCombinedUpperBoundVariable() 5824 : S.getUpperBoundVariable()))); 5825 LValue ST = 5826 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 5827 LValue IL = 5828 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 5829 5830 OMPPrivateScope LoopScope(*this); 5831 if (EmitOMPFirstprivateClause(S, LoopScope)) { 5832 // Emit implicit barrier to synchronize threads and avoid data races 5833 // on initialization of firstprivate variables and post-update of 5834 // lastprivate variables. 5835 CGM.getOpenMPRuntime().emitBarrierCall( 5836 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 5837 /*ForceSimpleCall=*/true); 5838 } 5839 EmitOMPPrivateClause(S, LoopScope); 5840 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5841 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5842 !isOpenMPTeamsDirective(S.getDirectiveKind())) 5843 EmitOMPReductionClauseInit(S, LoopScope); 5844 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 5845 EmitOMPPrivateLoopCounters(S, LoopScope); 5846 (void)LoopScope.Privatize(); 5847 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5848 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 5849 5850 // Detect the distribute schedule kind and chunk. 5851 llvm::Value *Chunk = nullptr; 5852 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 5853 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 5854 ScheduleKind = C->getDistScheduleKind(); 5855 if (const Expr *Ch = C->getChunkSize()) { 5856 Chunk = EmitScalarExpr(Ch); 5857 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 5858 S.getIterationVariable()->getType(), 5859 S.getBeginLoc()); 5860 } 5861 } else { 5862 // Default behaviour for dist_schedule clause. 5863 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 5864 *this, S, ScheduleKind, Chunk); 5865 } 5866 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 5867 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 5868 5869 // OpenMP [2.10.8, distribute Construct, Description] 5870 // If dist_schedule is specified, kind must be static. If specified, 5871 // iterations are divided into chunks of size chunk_size, chunks are 5872 // assigned to the teams of the league in a round-robin fashion in the 5873 // order of the team number. When no chunk_size is specified, the 5874 // iteration space is divided into chunks that are approximately equal 5875 // in size, and at most one chunk is distributed to each team of the 5876 // league. The size of the chunks is unspecified in this case. 5877 bool StaticChunked = 5878 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && 5879 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 5880 if (RT.isStaticNonchunked(ScheduleKind, 5881 /* Chunked */ Chunk != nullptr) || 5882 StaticChunked) { 5883 CGOpenMPRuntime::StaticRTInput StaticInit( 5884 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 5885 LB.getAddress(), UB.getAddress(), ST.getAddress(), 5886 StaticChunked ? Chunk : nullptr); 5887 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 5888 StaticInit); 5889 JumpDest LoopExit = 5890 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 5891 // UB = min(UB, GlobalUB); 5892 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5893 ? S.getCombinedEnsureUpperBound() 5894 : S.getEnsureUpperBound()); 5895 // IV = LB; 5896 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5897 ? S.getCombinedInit() 5898 : S.getInit()); 5899 5900 const Expr *Cond = 5901 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 5902 ? S.getCombinedCond() 5903 : S.getCond(); 5904 5905 if (StaticChunked) 5906 Cond = S.getCombinedDistCond(); 5907 5908 // For static unchunked schedules generate: 5909 // 5910 // 1. For distribute alone, codegen 5911 // while (idx <= UB) { 5912 // BODY; 5913 // ++idx; 5914 // } 5915 // 5916 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 5917 // while (idx <= UB) { 5918 // <CodeGen rest of pragma>(LB, UB); 5919 // idx += ST; 5920 // } 5921 // 5922 // For static chunk one schedule generate: 5923 // 5924 // while (IV <= GlobalUB) { 5925 // <CodeGen rest of pragma>(LB, UB); 5926 // LB += ST; 5927 // UB += ST; 5928 // UB = min(UB, GlobalUB); 5929 // IV = LB; 5930 // } 5931 // 5932 emitCommonSimdLoop( 5933 *this, S, 5934 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5935 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5936 CGF.EmitOMPSimdInit(S); 5937 }, 5938 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 5939 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 5940 CGF.EmitOMPInnerLoop( 5941 S, LoopScope.requiresCleanups(), Cond, IncExpr, 5942 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 5943 CodeGenLoop(CGF, S, LoopExit); 5944 }, 5945 [&S, StaticChunked](CodeGenFunction &CGF) { 5946 if (StaticChunked) { 5947 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 5948 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 5949 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 5950 CGF.EmitIgnoredExpr(S.getCombinedInit()); 5951 } 5952 }); 5953 }); 5954 EmitBlock(LoopExit.getBlock()); 5955 // Tell the runtime we are done. 5956 RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute); 5957 } else { 5958 // Emit the outer loop, which requests its work chunk [LB..UB] from 5959 // runtime and runs the inner loop to process it. 5960 const OMPLoopArguments LoopArguments = { 5961 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 5962 Chunk}; 5963 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 5964 CodeGenLoop); 5965 } 5966 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 5967 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 5968 return CGF.Builder.CreateIsNotNull( 5969 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5970 }); 5971 } 5972 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 5973 !isOpenMPParallelDirective(S.getDirectiveKind()) && 5974 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 5975 EmitOMPReductionClauseFinal(S, OMPD_simd); 5976 // Emit post-update of the reduction variables if IsLastIter != 0. 5977 emitPostUpdateForReductionClause( 5978 *this, S, [IL, &S](CodeGenFunction &CGF) { 5979 return CGF.Builder.CreateIsNotNull( 5980 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 5981 }); 5982 } 5983 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5984 if (HasLastprivateClause) { 5985 EmitOMPLastprivateClauseFinal( 5986 S, /*NoFinals=*/false, 5987 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 5988 } 5989 } 5990 5991 // We're now done with the loop, so jump to the continuation block. 5992 if (ContBlock) { 5993 EmitBranch(ContBlock); 5994 EmitBlock(ContBlock, true); 5995 } 5996 } 5997 } 5998 5999 // Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this 6000 // function available for "loop bind(teams)", which maps to "distribute". 6001 static void emitOMPDistributeDirective(const OMPLoopDirective &S, 6002 CodeGenFunction &CGF, 6003 CodeGenModule &CGM) { 6004 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6005 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 6006 }; 6007 OMPLexicalScope Scope(CGF, S, OMPD_unknown); 6008 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, CodeGen); 6009 } 6010 6011 void CodeGenFunction::EmitOMPDistributeDirective( 6012 const OMPDistributeDirective &S) { 6013 emitOMPDistributeDirective(S, *this, CGM); 6014 } 6015 6016 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 6017 const CapturedStmt *S, 6018 SourceLocation Loc) { 6019 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 6020 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 6021 CGF.CapturedStmtInfo = &CapStmtInfo; 6022 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); 6023 Fn->setDoesNotRecurse(); 6024 return Fn; 6025 } 6026 6027 template <typename T> 6028 static void emitRestoreIP(CodeGenFunction &CGF, const T *C, 6029 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6030 llvm::OpenMPIRBuilder &OMPBuilder) { 6031 6032 unsigned NumLoops = C->getNumLoops(); 6033 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( 6034 /*DestWidth=*/64, /*Signed=*/1); 6035 llvm::SmallVector<llvm::Value *> StoreValues; 6036 for (unsigned I = 0; I < NumLoops; I++) { 6037 const Expr *CounterVal = C->getLoopData(I); 6038 assert(CounterVal); 6039 llvm::Value *StoreValue = CGF.EmitScalarConversion( 6040 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 6041 CounterVal->getExprLoc()); 6042 StoreValues.emplace_back(StoreValue); 6043 } 6044 OMPDoacrossKind<T> ODK; 6045 bool IsDependSource = ODK.isSource(C); 6046 CGF.Builder.restoreIP( 6047 OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops, 6048 StoreValues, ".cnt.addr", IsDependSource)); 6049 } 6050 6051 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 6052 if (CGM.getLangOpts().OpenMPIRBuilder) { 6053 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 6054 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 6055 6056 if (S.hasClausesOfKind<OMPDependClause>() || 6057 S.hasClausesOfKind<OMPDoacrossClause>()) { 6058 // The ordered directive with depend clause. 6059 assert(!S.hasAssociatedStmt() && "No associated statement must be in " 6060 "ordered depend|doacross construct."); 6061 InsertPointTy AllocaIP(AllocaInsertPt->getParent(), 6062 AllocaInsertPt->getIterator()); 6063 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 6064 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); 6065 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) 6066 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); 6067 } else { 6068 // The ordered directive with threads or simd clause, or without clause. 6069 // Without clause, it behaves as if the threads clause is specified. 6070 const auto *C = S.getSingleClause<OMPSIMDClause>(); 6071 6072 auto FiniCB = [this](InsertPointTy IP) { 6073 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 6074 return llvm::Error::success(); 6075 }; 6076 6077 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, 6078 InsertPointTy CodeGenIP) { 6079 Builder.restoreIP(CodeGenIP); 6080 6081 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 6082 if (C) { 6083 llvm::BasicBlock *FiniBB = splitBBWithSuffix( 6084 Builder, /*CreateBranch=*/false, ".ordered.after"); 6085 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6086 GenerateOpenMPCapturedVars(*CS, CapturedVars); 6087 llvm::Function *OutlinedFn = 6088 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 6089 assert(S.getBeginLoc().isValid() && 6090 "Outlined function call location must be valid."); 6091 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); 6092 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB, 6093 OutlinedFn, CapturedVars); 6094 } else { 6095 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( 6096 *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); 6097 } 6098 return llvm::Error::success(); 6099 }; 6100 6101 OMPLexicalScope Scope(*this, S, OMPD_unknown); 6102 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( 6103 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); 6104 Builder.restoreIP(AfterIP); 6105 } 6106 return; 6107 } 6108 6109 if (S.hasClausesOfKind<OMPDependClause>()) { 6110 assert(!S.hasAssociatedStmt() && 6111 "No associated statement must be in ordered depend construct."); 6112 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 6113 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 6114 return; 6115 } 6116 if (S.hasClausesOfKind<OMPDoacrossClause>()) { 6117 assert(!S.hasAssociatedStmt() && 6118 "No associated statement must be in ordered doacross construct."); 6119 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) 6120 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 6121 return; 6122 } 6123 const auto *C = S.getSingleClause<OMPSIMDClause>(); 6124 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 6125 PrePostActionTy &Action) { 6126 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 6127 if (C) { 6128 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6129 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6130 llvm::Function *OutlinedFn = 6131 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); 6132 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 6133 OutlinedFn, CapturedVars); 6134 } else { 6135 Action.Enter(CGF); 6136 CGF.EmitStmt(CS->getCapturedStmt()); 6137 } 6138 }; 6139 OMPLexicalScope Scope(*this, S, OMPD_unknown); 6140 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 6141 } 6142 6143 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 6144 QualType SrcType, QualType DestType, 6145 SourceLocation Loc) { 6146 assert(CGF.hasScalarEvaluationKind(DestType) && 6147 "DestType must have scalar evaluation kind."); 6148 assert(!Val.isAggregate() && "Must be a scalar or complex."); 6149 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 6150 DestType, Loc) 6151 : CGF.EmitComplexToScalarConversion( 6152 Val.getComplexVal(), SrcType, DestType, Loc); 6153 } 6154 6155 static CodeGenFunction::ComplexPairTy 6156 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 6157 QualType DestType, SourceLocation Loc) { 6158 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 6159 "DestType must have complex evaluation kind."); 6160 CodeGenFunction::ComplexPairTy ComplexVal; 6161 if (Val.isScalar()) { 6162 // Convert the input element to the element type of the complex. 6163 QualType DestElementType = 6164 DestType->castAs<ComplexType>()->getElementType(); 6165 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 6166 Val.getScalarVal(), SrcType, DestElementType, Loc); 6167 ComplexVal = CodeGenFunction::ComplexPairTy( 6168 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 6169 } else { 6170 assert(Val.isComplex() && "Must be a scalar or complex."); 6171 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 6172 QualType DestElementType = 6173 DestType->castAs<ComplexType>()->getElementType(); 6174 ComplexVal.first = CGF.EmitScalarConversion( 6175 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 6176 ComplexVal.second = CGF.EmitScalarConversion( 6177 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 6178 } 6179 return ComplexVal; 6180 } 6181 6182 static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 6183 LValue LVal, RValue RVal) { 6184 if (LVal.isGlobalReg()) 6185 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 6186 else 6187 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); 6188 } 6189 6190 static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, 6191 llvm::AtomicOrdering AO, LValue LVal, 6192 SourceLocation Loc) { 6193 if (LVal.isGlobalReg()) 6194 return CGF.EmitLoadOfLValue(LVal, Loc); 6195 return CGF.EmitAtomicLoad( 6196 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), 6197 LVal.isVolatile()); 6198 } 6199 6200 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 6201 QualType RValTy, SourceLocation Loc) { 6202 switch (getEvaluationKind(LVal.getType())) { 6203 case TEK_Scalar: 6204 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 6205 *this, RVal, RValTy, LVal.getType(), Loc)), 6206 LVal); 6207 break; 6208 case TEK_Complex: 6209 EmitStoreOfComplex( 6210 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 6211 /*isInit=*/false); 6212 break; 6213 case TEK_Aggregate: 6214 llvm_unreachable("Must be a scalar or complex."); 6215 } 6216 } 6217 6218 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, 6219 const Expr *X, const Expr *V, 6220 SourceLocation Loc) { 6221 // v = x; 6222 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 6223 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 6224 LValue XLValue = CGF.EmitLValue(X); 6225 LValue VLValue = CGF.EmitLValue(V); 6226 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); 6227 // OpenMP, 2.17.7, atomic Construct 6228 // If the read or capture clause is specified and the acquire, acq_rel, or 6229 // seq_cst clause is specified then the strong flush on exit from the atomic 6230 // operation is also an acquire flush. 6231 switch (AO) { 6232 case llvm::AtomicOrdering::Acquire: 6233 case llvm::AtomicOrdering::AcquireRelease: 6234 case llvm::AtomicOrdering::SequentiallyConsistent: 6235 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6236 llvm::AtomicOrdering::Acquire); 6237 break; 6238 case llvm::AtomicOrdering::Monotonic: 6239 case llvm::AtomicOrdering::Release: 6240 break; 6241 case llvm::AtomicOrdering::NotAtomic: 6242 case llvm::AtomicOrdering::Unordered: 6243 llvm_unreachable("Unexpected ordering."); 6244 } 6245 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 6246 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 6247 } 6248 6249 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, 6250 llvm::AtomicOrdering AO, const Expr *X, 6251 const Expr *E, SourceLocation Loc) { 6252 // x = expr; 6253 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 6254 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 6255 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6256 // OpenMP, 2.17.7, atomic Construct 6257 // If the write, update, or capture clause is specified and the release, 6258 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6259 // the atomic operation is also a release flush. 6260 switch (AO) { 6261 case llvm::AtomicOrdering::Release: 6262 case llvm::AtomicOrdering::AcquireRelease: 6263 case llvm::AtomicOrdering::SequentiallyConsistent: 6264 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6265 llvm::AtomicOrdering::Release); 6266 break; 6267 case llvm::AtomicOrdering::Acquire: 6268 case llvm::AtomicOrdering::Monotonic: 6269 break; 6270 case llvm::AtomicOrdering::NotAtomic: 6271 case llvm::AtomicOrdering::Unordered: 6272 llvm_unreachable("Unexpected ordering."); 6273 } 6274 } 6275 6276 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 6277 RValue Update, 6278 BinaryOperatorKind BO, 6279 llvm::AtomicOrdering AO, 6280 bool IsXLHSInRHSPart) { 6281 ASTContext &Context = CGF.getContext(); 6282 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 6283 // expression is simple and atomic is allowed for the given type for the 6284 // target platform. 6285 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || 6286 (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 6287 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) || 6288 !Context.getTargetInfo().hasBuiltinAtomic( 6289 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 6290 return std::make_pair(false, RValue::get(nullptr)); 6291 6292 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { 6293 if (T->isIntegerTy()) 6294 return true; 6295 6296 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) 6297 return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T)); 6298 6299 return false; 6300 }; 6301 6302 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || 6303 !CheckAtomicSupport(X.getAddress().getElementType(), BO)) 6304 return std::make_pair(false, RValue::get(nullptr)); 6305 6306 bool IsInteger = X.getAddress().getElementType()->isIntegerTy(); 6307 llvm::AtomicRMWInst::BinOp RMWOp; 6308 switch (BO) { 6309 case BO_Add: 6310 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; 6311 break; 6312 case BO_Sub: 6313 if (!IsXLHSInRHSPart) 6314 return std::make_pair(false, RValue::get(nullptr)); 6315 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; 6316 break; 6317 case BO_And: 6318 RMWOp = llvm::AtomicRMWInst::And; 6319 break; 6320 case BO_Or: 6321 RMWOp = llvm::AtomicRMWInst::Or; 6322 break; 6323 case BO_Xor: 6324 RMWOp = llvm::AtomicRMWInst::Xor; 6325 break; 6326 case BO_LT: 6327 if (IsInteger) 6328 RMWOp = X.getType()->hasSignedIntegerRepresentation() 6329 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 6330 : llvm::AtomicRMWInst::Max) 6331 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 6332 : llvm::AtomicRMWInst::UMax); 6333 else 6334 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin 6335 : llvm::AtomicRMWInst::FMax; 6336 break; 6337 case BO_GT: 6338 if (IsInteger) 6339 RMWOp = X.getType()->hasSignedIntegerRepresentation() 6340 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 6341 : llvm::AtomicRMWInst::Min) 6342 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 6343 : llvm::AtomicRMWInst::UMin); 6344 else 6345 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax 6346 : llvm::AtomicRMWInst::FMin; 6347 break; 6348 case BO_Assign: 6349 RMWOp = llvm::AtomicRMWInst::Xchg; 6350 break; 6351 case BO_Mul: 6352 case BO_Div: 6353 case BO_Rem: 6354 case BO_Shl: 6355 case BO_Shr: 6356 case BO_LAnd: 6357 case BO_LOr: 6358 return std::make_pair(false, RValue::get(nullptr)); 6359 case BO_PtrMemD: 6360 case BO_PtrMemI: 6361 case BO_LE: 6362 case BO_GE: 6363 case BO_EQ: 6364 case BO_NE: 6365 case BO_Cmp: 6366 case BO_AddAssign: 6367 case BO_SubAssign: 6368 case BO_AndAssign: 6369 case BO_OrAssign: 6370 case BO_XorAssign: 6371 case BO_MulAssign: 6372 case BO_DivAssign: 6373 case BO_RemAssign: 6374 case BO_ShlAssign: 6375 case BO_ShrAssign: 6376 case BO_Comma: 6377 llvm_unreachable("Unsupported atomic update operation"); 6378 } 6379 llvm::Value *UpdateVal = Update.getScalarVal(); 6380 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 6381 if (IsInteger) 6382 UpdateVal = CGF.Builder.CreateIntCast( 6383 IC, X.getAddress().getElementType(), 6384 X.getType()->hasSignedIntegerRepresentation()); 6385 else 6386 UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, 6387 X.getAddress().getElementType()); 6388 } 6389 llvm::AtomicRMWInst *Res = 6390 CGF.emitAtomicRMWInst(RMWOp, X.getAddress(), UpdateVal, AO); 6391 return std::make_pair(true, RValue::get(Res)); 6392 } 6393 6394 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 6395 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 6396 llvm::AtomicOrdering AO, SourceLocation Loc, 6397 const llvm::function_ref<RValue(RValue)> CommonGen) { 6398 // Update expressions are allowed to have the following forms: 6399 // x binop= expr; -> xrval + expr; 6400 // x++, ++x -> xrval + 1; 6401 // x--, --x -> xrval - 1; 6402 // x = x binop expr; -> xrval binop expr 6403 // x = expr Op x; - > expr binop xrval; 6404 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 6405 if (!Res.first) { 6406 if (X.isGlobalReg()) { 6407 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 6408 // 'xrval'. 6409 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 6410 } else { 6411 // Perform compare-and-swap procedure. 6412 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 6413 } 6414 } 6415 return Res; 6416 } 6417 6418 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, 6419 llvm::AtomicOrdering AO, const Expr *X, 6420 const Expr *E, const Expr *UE, 6421 bool IsXLHSInRHSPart, SourceLocation Loc) { 6422 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 6423 "Update expr in 'atomic update' must be a binary operator."); 6424 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 6425 // Update expressions are allowed to have the following forms: 6426 // x binop= expr; -> xrval + expr; 6427 // x++, ++x -> xrval + 1; 6428 // x--, --x -> xrval - 1; 6429 // x = x binop expr; -> xrval binop expr 6430 // x = expr Op x; - > expr binop xrval; 6431 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 6432 LValue XLValue = CGF.EmitLValue(X); 6433 RValue ExprRValue = CGF.EmitAnyExpr(E); 6434 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 6435 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 6436 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 6437 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 6438 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 6439 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6440 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 6441 return CGF.EmitAnyExpr(UE); 6442 }; 6443 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 6444 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 6445 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6446 // OpenMP, 2.17.7, atomic Construct 6447 // If the write, update, or capture clause is specified and the release, 6448 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6449 // the atomic operation is also a release flush. 6450 switch (AO) { 6451 case llvm::AtomicOrdering::Release: 6452 case llvm::AtomicOrdering::AcquireRelease: 6453 case llvm::AtomicOrdering::SequentiallyConsistent: 6454 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6455 llvm::AtomicOrdering::Release); 6456 break; 6457 case llvm::AtomicOrdering::Acquire: 6458 case llvm::AtomicOrdering::Monotonic: 6459 break; 6460 case llvm::AtomicOrdering::NotAtomic: 6461 case llvm::AtomicOrdering::Unordered: 6462 llvm_unreachable("Unexpected ordering."); 6463 } 6464 } 6465 6466 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 6467 QualType SourceType, QualType ResType, 6468 SourceLocation Loc) { 6469 switch (CGF.getEvaluationKind(ResType)) { 6470 case TEK_Scalar: 6471 return RValue::get( 6472 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 6473 case TEK_Complex: { 6474 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 6475 return RValue::getComplex(Res.first, Res.second); 6476 } 6477 case TEK_Aggregate: 6478 break; 6479 } 6480 llvm_unreachable("Must be a scalar or complex."); 6481 } 6482 6483 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, 6484 llvm::AtomicOrdering AO, 6485 bool IsPostfixUpdate, const Expr *V, 6486 const Expr *X, const Expr *E, 6487 const Expr *UE, bool IsXLHSInRHSPart, 6488 SourceLocation Loc) { 6489 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 6490 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 6491 RValue NewVVal; 6492 LValue VLValue = CGF.EmitLValue(V); 6493 LValue XLValue = CGF.EmitLValue(X); 6494 RValue ExprRValue = CGF.EmitAnyExpr(E); 6495 QualType NewVValType; 6496 if (UE) { 6497 // 'x' is updated with some additional value. 6498 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 6499 "Update expr in 'atomic capture' must be a binary operator."); 6500 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 6501 // Update expressions are allowed to have the following forms: 6502 // x binop= expr; -> xrval + expr; 6503 // x++, ++x -> xrval + 1; 6504 // x--, --x -> xrval - 1; 6505 // x = x binop expr; -> xrval binop expr 6506 // x = expr Op x; - > expr binop xrval; 6507 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 6508 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 6509 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 6510 NewVValType = XRValExpr->getType(); 6511 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 6512 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 6513 IsPostfixUpdate](RValue XRValue) { 6514 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6515 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 6516 RValue Res = CGF.EmitAnyExpr(UE); 6517 NewVVal = IsPostfixUpdate ? XRValue : Res; 6518 return Res; 6519 }; 6520 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 6521 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 6522 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6523 if (Res.first) { 6524 // 'atomicrmw' instruction was generated. 6525 if (IsPostfixUpdate) { 6526 // Use old value from 'atomicrmw'. 6527 NewVVal = Res.second; 6528 } else { 6529 // 'atomicrmw' does not provide new value, so evaluate it using old 6530 // value of 'x'. 6531 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 6532 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 6533 NewVVal = CGF.EmitAnyExpr(UE); 6534 } 6535 } 6536 } else { 6537 // 'x' is simply rewritten with some 'expr'. 6538 NewVValType = X->getType().getNonReferenceType(); 6539 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 6540 X->getType().getNonReferenceType(), Loc); 6541 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 6542 NewVVal = XRValue; 6543 return ExprRValue; 6544 }; 6545 // Try to perform atomicrmw xchg, otherwise simple exchange. 6546 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 6547 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 6548 Loc, Gen); 6549 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); 6550 if (Res.first) { 6551 // 'atomicrmw' instruction was generated. 6552 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 6553 } 6554 } 6555 // Emit post-update store to 'v' of old/new 'x' value. 6556 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 6557 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); 6558 // OpenMP 5.1 removes the required flush for capture clause. 6559 if (CGF.CGM.getLangOpts().OpenMP < 51) { 6560 // OpenMP, 2.17.7, atomic Construct 6561 // If the write, update, or capture clause is specified and the release, 6562 // acq_rel, or seq_cst clause is specified then the strong flush on entry to 6563 // the atomic operation is also a release flush. 6564 // If the read or capture clause is specified and the acquire, acq_rel, or 6565 // seq_cst clause is specified then the strong flush on exit from the atomic 6566 // operation is also an acquire flush. 6567 switch (AO) { 6568 case llvm::AtomicOrdering::Release: 6569 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6570 llvm::AtomicOrdering::Release); 6571 break; 6572 case llvm::AtomicOrdering::Acquire: 6573 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, {}, Loc, 6574 llvm::AtomicOrdering::Acquire); 6575 break; 6576 case llvm::AtomicOrdering::AcquireRelease: 6577 case llvm::AtomicOrdering::SequentiallyConsistent: 6578 CGF.CGM.getOpenMPRuntime().emitFlush( 6579 CGF, {}, Loc, llvm::AtomicOrdering::AcquireRelease); 6580 break; 6581 case llvm::AtomicOrdering::Monotonic: 6582 break; 6583 case llvm::AtomicOrdering::NotAtomic: 6584 case llvm::AtomicOrdering::Unordered: 6585 llvm_unreachable("Unexpected ordering."); 6586 } 6587 } 6588 } 6589 6590 static void emitOMPAtomicCompareExpr( 6591 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, 6592 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, 6593 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, 6594 SourceLocation Loc) { 6595 llvm::OpenMPIRBuilder &OMPBuilder = 6596 CGF.CGM.getOpenMPRuntime().getOMPBuilder(); 6597 6598 OMPAtomicCompareOp Op; 6599 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator"); 6600 switch (cast<BinaryOperator>(CE)->getOpcode()) { 6601 case BO_EQ: 6602 Op = OMPAtomicCompareOp::EQ; 6603 break; 6604 case BO_LT: 6605 Op = OMPAtomicCompareOp::MIN; 6606 break; 6607 case BO_GT: 6608 Op = OMPAtomicCompareOp::MAX; 6609 break; 6610 default: 6611 llvm_unreachable("unsupported atomic compare binary operator"); 6612 } 6613 6614 LValue XLVal = CGF.EmitLValue(X); 6615 Address XAddr = XLVal.getAddress(); 6616 6617 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { 6618 if (X->getType() == E->getType()) 6619 return CGF.EmitScalarExpr(E); 6620 const Expr *NewE = E->IgnoreImplicitAsWritten(); 6621 llvm::Value *V = CGF.EmitScalarExpr(NewE); 6622 if (NewE->getType() == X->getType()) 6623 return V; 6624 return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc); 6625 }; 6626 6627 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); 6628 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; 6629 if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal)) 6630 EVal = CGF.Builder.CreateIntCast( 6631 CI, XLVal.getAddress().getElementType(), 6632 E->getType()->hasSignedIntegerRepresentation()); 6633 if (DVal) 6634 if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal)) 6635 DVal = CGF.Builder.CreateIntCast( 6636 CI, XLVal.getAddress().getElementType(), 6637 D->getType()->hasSignedIntegerRepresentation()); 6638 6639 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ 6640 XAddr.emitRawPointer(CGF), XAddr.getElementType(), 6641 X->getType()->hasSignedIntegerRepresentation(), 6642 X->getType().isVolatileQualified()}; 6643 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; 6644 if (V) { 6645 LValue LV = CGF.EmitLValue(V); 6646 Address Addr = LV.getAddress(); 6647 VOpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(), 6648 V->getType()->hasSignedIntegerRepresentation(), 6649 V->getType().isVolatileQualified()}; 6650 } 6651 if (R) { 6652 LValue LV = CGF.EmitLValue(R); 6653 Address Addr = LV.getAddress(); 6654 ROpVal = {Addr.emitRawPointer(CGF), Addr.getElementType(), 6655 R->getType()->hasSignedIntegerRepresentation(), 6656 R->getType().isVolatileQualified()}; 6657 } 6658 6659 if (FailAO == llvm::AtomicOrdering::NotAtomic) { 6660 // fail clause was not mentioned on the 6661 // "#pragma omp atomic compare" construct. 6662 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( 6663 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, 6664 IsPostfixUpdate, IsFailOnly)); 6665 } else 6666 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( 6667 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, 6668 IsPostfixUpdate, IsFailOnly, FailAO)); 6669 } 6670 6671 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 6672 llvm::AtomicOrdering AO, 6673 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, 6674 const Expr *X, const Expr *V, const Expr *R, 6675 const Expr *E, const Expr *UE, const Expr *D, 6676 const Expr *CE, bool IsXLHSInRHSPart, 6677 bool IsFailOnly, SourceLocation Loc) { 6678 switch (Kind) { 6679 case OMPC_read: 6680 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); 6681 break; 6682 case OMPC_write: 6683 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); 6684 break; 6685 case OMPC_unknown: 6686 case OMPC_update: 6687 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); 6688 break; 6689 case OMPC_capture: 6690 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, 6691 IsXLHSInRHSPart, Loc); 6692 break; 6693 case OMPC_compare: { 6694 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE, 6695 IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc); 6696 break; 6697 } 6698 default: 6699 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 6700 } 6701 } 6702 6703 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 6704 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 6705 // Fail Memory Clause Ordering. 6706 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; 6707 bool MemOrderingSpecified = false; 6708 if (S.getSingleClause<OMPSeqCstClause>()) { 6709 AO = llvm::AtomicOrdering::SequentiallyConsistent; 6710 MemOrderingSpecified = true; 6711 } else if (S.getSingleClause<OMPAcqRelClause>()) { 6712 AO = llvm::AtomicOrdering::AcquireRelease; 6713 MemOrderingSpecified = true; 6714 } else if (S.getSingleClause<OMPAcquireClause>()) { 6715 AO = llvm::AtomicOrdering::Acquire; 6716 MemOrderingSpecified = true; 6717 } else if (S.getSingleClause<OMPReleaseClause>()) { 6718 AO = llvm::AtomicOrdering::Release; 6719 MemOrderingSpecified = true; 6720 } else if (S.getSingleClause<OMPRelaxedClause>()) { 6721 AO = llvm::AtomicOrdering::Monotonic; 6722 MemOrderingSpecified = true; 6723 } 6724 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; 6725 OpenMPClauseKind Kind = OMPC_unknown; 6726 for (const OMPClause *C : S.clauses()) { 6727 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, 6728 // if it is first). 6729 OpenMPClauseKind K = C->getClauseKind(); 6730 // TBD 6731 if (K == OMPC_weak) 6732 return; 6733 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || 6734 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) 6735 continue; 6736 Kind = K; 6737 KindsEncountered.insert(K); 6738 } 6739 // We just need to correct Kind here. No need to set a bool saying it is 6740 // actually compare capture because we can tell from whether V and R are 6741 // nullptr. 6742 if (KindsEncountered.contains(OMPC_compare) && 6743 KindsEncountered.contains(OMPC_capture)) 6744 Kind = OMPC_compare; 6745 if (!MemOrderingSpecified) { 6746 llvm::AtomicOrdering DefaultOrder = 6747 CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); 6748 if (DefaultOrder == llvm::AtomicOrdering::Monotonic || 6749 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || 6750 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && 6751 Kind == OMPC_capture)) { 6752 AO = DefaultOrder; 6753 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { 6754 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { 6755 AO = llvm::AtomicOrdering::Release; 6756 } else if (Kind == OMPC_read) { 6757 assert(Kind == OMPC_read && "Unexpected atomic kind."); 6758 AO = llvm::AtomicOrdering::Acquire; 6759 } 6760 } 6761 } 6762 6763 if (KindsEncountered.contains(OMPC_compare) && 6764 KindsEncountered.contains(OMPC_fail)) { 6765 Kind = OMPC_compare; 6766 const auto *FailClause = S.getSingleClause<OMPFailClause>(); 6767 if (FailClause) { 6768 OpenMPClauseKind FailParameter = FailClause->getFailParameter(); 6769 if (FailParameter == llvm::omp::OMPC_relaxed) 6770 FailAO = llvm::AtomicOrdering::Monotonic; 6771 else if (FailParameter == llvm::omp::OMPC_acquire) 6772 FailAO = llvm::AtomicOrdering::Acquire; 6773 else if (FailParameter == llvm::omp::OMPC_seq_cst) 6774 FailAO = llvm::AtomicOrdering::SequentiallyConsistent; 6775 } 6776 } 6777 6778 LexicalScope Scope(*this, S.getSourceRange()); 6779 EmitStopPoint(S.getAssociatedStmt()); 6780 emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(), 6781 S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(), 6782 S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(), 6783 S.isFailOnly(), S.getBeginLoc()); 6784 } 6785 6786 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 6787 const OMPExecutableDirective &S, 6788 const RegionCodeGenTy &CodeGen) { 6789 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 6790 CodeGenModule &CGM = CGF.CGM; 6791 6792 // On device emit this construct as inlined code. 6793 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 6794 OMPLexicalScope Scope(CGF, S, OMPD_target); 6795 CGM.getOpenMPRuntime().emitInlinedDirective( 6796 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 6797 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 6798 }); 6799 return; 6800 } 6801 6802 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); 6803 llvm::Function *Fn = nullptr; 6804 llvm::Constant *FnID = nullptr; 6805 6806 const Expr *IfCond = nullptr; 6807 // Check for the at most one if clause associated with the target region. 6808 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 6809 if (C->getNameModifier() == OMPD_unknown || 6810 C->getNameModifier() == OMPD_target) { 6811 IfCond = C->getCondition(); 6812 break; 6813 } 6814 } 6815 6816 // Check if we have any device clause associated with the directive. 6817 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( 6818 nullptr, OMPC_DEVICE_unknown); 6819 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 6820 Device.setPointerAndInt(C->getDevice(), C->getModifier()); 6821 6822 // Check if we have an if clause whose conditional always evaluates to false 6823 // or if we do not have any targets specified. If so the target region is not 6824 // an offload entry point. 6825 bool IsOffloadEntry = true; 6826 if (IfCond) { 6827 bool Val; 6828 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 6829 IsOffloadEntry = false; 6830 } 6831 if (CGM.getLangOpts().OMPTargetTriples.empty()) 6832 IsOffloadEntry = false; 6833 6834 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { 6835 unsigned DiagID = CGM.getDiags().getCustomDiagID( 6836 DiagnosticsEngine::Error, 6837 "No offloading entry generated while offloading is mandatory."); 6838 CGM.getDiags().Report(DiagID); 6839 } 6840 6841 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 6842 StringRef ParentName; 6843 // In case we have Ctors/Dtors we use the complete type variant to produce 6844 // the mangling of the device outlined kernel. 6845 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 6846 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 6847 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 6848 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 6849 else 6850 ParentName = 6851 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 6852 6853 // Emit target region as a standalone region. 6854 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 6855 IsOffloadEntry, CodeGen); 6856 OMPLexicalScope Scope(CGF, S, OMPD_task); 6857 auto &&SizeEmitter = 6858 [IsOffloadEntry](CodeGenFunction &CGF, 6859 const OMPLoopDirective &D) -> llvm::Value * { 6860 if (IsOffloadEntry) { 6861 OMPLoopScope(CGF, D); 6862 // Emit calculation of the iterations count. 6863 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 6864 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 6865 /*isSigned=*/false); 6866 return NumIterations; 6867 } 6868 return nullptr; 6869 }; 6870 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 6871 SizeEmitter); 6872 } 6873 6874 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 6875 PrePostActionTy &Action) { 6876 Action.Enter(CGF); 6877 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6878 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6879 CGF.EmitOMPPrivateClause(S, PrivateScope); 6880 (void)PrivateScope.Privatize(); 6881 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6882 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6883 6884 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 6885 CGF.EnsureInsertPoint(); 6886 } 6887 6888 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 6889 StringRef ParentName, 6890 const OMPTargetDirective &S) { 6891 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6892 emitTargetRegion(CGF, S, Action); 6893 }; 6894 llvm::Function *Fn; 6895 llvm::Constant *Addr; 6896 // Emit target region as a standalone region. 6897 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6898 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6899 assert(Fn && Addr && "Target device function emission failed."); 6900 } 6901 6902 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 6903 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6904 emitTargetRegion(CGF, S, Action); 6905 }; 6906 emitCommonOMPTargetDirective(*this, S, CodeGen); 6907 } 6908 6909 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 6910 const OMPExecutableDirective &S, 6911 OpenMPDirectiveKind InnermostKind, 6912 const RegionCodeGenTy &CodeGen) { 6913 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 6914 llvm::Function *OutlinedFn = 6915 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 6916 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, 6917 CodeGen); 6918 6919 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 6920 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 6921 if (NT || TL) { 6922 const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr; 6923 const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr; 6924 6925 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 6926 S.getBeginLoc()); 6927 } 6928 6929 OMPTeamsScope Scope(CGF, S); 6930 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6931 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6932 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 6933 CapturedVars); 6934 } 6935 6936 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 6937 // Emit teams region as a standalone region. 6938 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6939 Action.Enter(CGF); 6940 OMPPrivateScope PrivateScope(CGF); 6941 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6942 CGF.EmitOMPPrivateClause(S, PrivateScope); 6943 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6944 (void)PrivateScope.Privatize(); 6945 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 6946 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6947 }; 6948 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 6949 emitPostUpdateForReductionClause(*this, S, 6950 [](CodeGenFunction &) { return nullptr; }); 6951 } 6952 6953 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6954 const OMPTargetTeamsDirective &S) { 6955 auto *CS = S.getCapturedStmt(OMPD_teams); 6956 Action.Enter(CGF); 6957 // Emit teams region as a standalone region. 6958 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 6959 Action.Enter(CGF); 6960 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6961 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 6962 CGF.EmitOMPPrivateClause(S, PrivateScope); 6963 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 6964 (void)PrivateScope.Privatize(); 6965 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 6966 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 6967 CGF.EmitStmt(CS->getCapturedStmt()); 6968 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 6969 }; 6970 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 6971 emitPostUpdateForReductionClause(CGF, S, 6972 [](CodeGenFunction &) { return nullptr; }); 6973 } 6974 6975 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 6976 CodeGenModule &CGM, StringRef ParentName, 6977 const OMPTargetTeamsDirective &S) { 6978 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6979 emitTargetTeamsRegion(CGF, Action, S); 6980 }; 6981 llvm::Function *Fn; 6982 llvm::Constant *Addr; 6983 // Emit target region as a standalone region. 6984 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 6985 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 6986 assert(Fn && Addr && "Target device function emission failed."); 6987 } 6988 6989 void CodeGenFunction::EmitOMPTargetTeamsDirective( 6990 const OMPTargetTeamsDirective &S) { 6991 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 6992 emitTargetTeamsRegion(CGF, Action, S); 6993 }; 6994 emitCommonOMPTargetDirective(*this, S, CodeGen); 6995 } 6996 6997 static void 6998 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 6999 const OMPTargetTeamsDistributeDirective &S) { 7000 Action.Enter(CGF); 7001 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7002 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7003 }; 7004 7005 // Emit teams region as a standalone region. 7006 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7007 PrePostActionTy &Action) { 7008 Action.Enter(CGF); 7009 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7010 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7011 (void)PrivateScope.Privatize(); 7012 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7013 CodeGenDistribute); 7014 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7015 }; 7016 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 7017 emitPostUpdateForReductionClause(CGF, S, 7018 [](CodeGenFunction &) { return nullptr; }); 7019 } 7020 7021 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 7022 CodeGenModule &CGM, StringRef ParentName, 7023 const OMPTargetTeamsDistributeDirective &S) { 7024 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7025 emitTargetTeamsDistributeRegion(CGF, Action, S); 7026 }; 7027 llvm::Function *Fn; 7028 llvm::Constant *Addr; 7029 // Emit target region as a standalone region. 7030 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7031 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7032 assert(Fn && Addr && "Target device function emission failed."); 7033 } 7034 7035 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 7036 const OMPTargetTeamsDistributeDirective &S) { 7037 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7038 emitTargetTeamsDistributeRegion(CGF, Action, S); 7039 }; 7040 emitCommonOMPTargetDirective(*this, S, CodeGen); 7041 } 7042 7043 static void emitTargetTeamsDistributeSimdRegion( 7044 CodeGenFunction &CGF, PrePostActionTy &Action, 7045 const OMPTargetTeamsDistributeSimdDirective &S) { 7046 Action.Enter(CGF); 7047 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7048 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7049 }; 7050 7051 // Emit teams region as a standalone region. 7052 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7053 PrePostActionTy &Action) { 7054 Action.Enter(CGF); 7055 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7056 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7057 (void)PrivateScope.Privatize(); 7058 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7059 CodeGenDistribute); 7060 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7061 }; 7062 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 7063 emitPostUpdateForReductionClause(CGF, S, 7064 [](CodeGenFunction &) { return nullptr; }); 7065 } 7066 7067 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 7068 CodeGenModule &CGM, StringRef ParentName, 7069 const OMPTargetTeamsDistributeSimdDirective &S) { 7070 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7071 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 7072 }; 7073 llvm::Function *Fn; 7074 llvm::Constant *Addr; 7075 // Emit target region as a standalone region. 7076 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7077 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7078 assert(Fn && Addr && "Target device function emission failed."); 7079 } 7080 7081 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 7082 const OMPTargetTeamsDistributeSimdDirective &S) { 7083 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7084 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 7085 }; 7086 emitCommonOMPTargetDirective(*this, S, CodeGen); 7087 } 7088 7089 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 7090 const OMPTeamsDistributeDirective &S) { 7091 7092 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7093 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7094 }; 7095 7096 // Emit teams region as a standalone region. 7097 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7098 PrePostActionTy &Action) { 7099 Action.Enter(CGF); 7100 OMPPrivateScope PrivateScope(CGF); 7101 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7102 (void)PrivateScope.Privatize(); 7103 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7104 CodeGenDistribute); 7105 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7106 }; 7107 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 7108 emitPostUpdateForReductionClause(*this, S, 7109 [](CodeGenFunction &) { return nullptr; }); 7110 } 7111 7112 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 7113 const OMPTeamsDistributeSimdDirective &S) { 7114 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7115 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 7116 }; 7117 7118 // Emit teams region as a standalone region. 7119 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7120 PrePostActionTy &Action) { 7121 Action.Enter(CGF); 7122 OMPPrivateScope PrivateScope(CGF); 7123 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7124 (void)PrivateScope.Privatize(); 7125 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 7126 CodeGenDistribute); 7127 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7128 }; 7129 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 7130 emitPostUpdateForReductionClause(*this, S, 7131 [](CodeGenFunction &) { return nullptr; }); 7132 } 7133 7134 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 7135 const OMPTeamsDistributeParallelForDirective &S) { 7136 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7137 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7138 S.getDistInc()); 7139 }; 7140 7141 // Emit teams region as a standalone region. 7142 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7143 PrePostActionTy &Action) { 7144 Action.Enter(CGF); 7145 OMPPrivateScope PrivateScope(CGF); 7146 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7147 (void)PrivateScope.Privatize(); 7148 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 7149 CodeGenDistribute); 7150 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7151 }; 7152 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 7153 emitPostUpdateForReductionClause(*this, S, 7154 [](CodeGenFunction &) { return nullptr; }); 7155 } 7156 7157 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 7158 const OMPTeamsDistributeParallelForSimdDirective &S) { 7159 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7160 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7161 S.getDistInc()); 7162 }; 7163 7164 // Emit teams region as a standalone region. 7165 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7166 PrePostActionTy &Action) { 7167 Action.Enter(CGF); 7168 OMPPrivateScope PrivateScope(CGF); 7169 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7170 (void)PrivateScope.Privatize(); 7171 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7172 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7173 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7174 }; 7175 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, 7176 CodeGen); 7177 emitPostUpdateForReductionClause(*this, S, 7178 [](CodeGenFunction &) { return nullptr; }); 7179 } 7180 7181 void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { 7182 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 7183 llvm::Value *Device = nullptr; 7184 llvm::Value *NumDependences = nullptr; 7185 llvm::Value *DependenceList = nullptr; 7186 7187 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7188 Device = EmitScalarExpr(C->getDevice()); 7189 7190 // Build list and emit dependences 7191 OMPTaskDataTy Data; 7192 buildDependences(S, Data); 7193 if (!Data.Dependences.empty()) { 7194 Address DependenciesArray = Address::invalid(); 7195 std::tie(NumDependences, DependenciesArray) = 7196 CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences, 7197 S.getBeginLoc()); 7198 DependenceList = DependenciesArray.emitRawPointer(*this); 7199 } 7200 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); 7201 7202 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || 7203 S.getSingleClause<OMPDestroyClause>() || 7204 S.getSingleClause<OMPUseClause>())) && 7205 "OMPNowaitClause clause is used separately in OMPInteropDirective."); 7206 7207 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>(); 7208 if (!ItOMPInitClause.empty()) { 7209 // Look at the multiple init clauses 7210 for (const OMPInitClause *C : ItOMPInitClause) { 7211 llvm::Value *InteropvarPtr = 7212 EmitLValue(C->getInteropVar()).getPointer(*this); 7213 llvm::omp::OMPInteropType InteropType = 7214 llvm::omp::OMPInteropType::Unknown; 7215 if (C->getIsTarget()) { 7216 InteropType = llvm::omp::OMPInteropType::Target; 7217 } else { 7218 assert(C->getIsTargetSync() && 7219 "Expected interop-type target/targetsync"); 7220 InteropType = llvm::omp::OMPInteropType::TargetSync; 7221 } 7222 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, 7223 Device, NumDependences, DependenceList, 7224 Data.HasNowaitClause); 7225 } 7226 } 7227 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>(); 7228 if (!ItOMPDestroyClause.empty()) { 7229 // Look at the multiple destroy clauses 7230 for (const OMPDestroyClause *C : ItOMPDestroyClause) { 7231 llvm::Value *InteropvarPtr = 7232 EmitLValue(C->getInteropVar()).getPointer(*this); 7233 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, 7234 NumDependences, DependenceList, 7235 Data.HasNowaitClause); 7236 } 7237 } 7238 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>(); 7239 if (!ItOMPUseClause.empty()) { 7240 // Look at the multiple use clauses 7241 for (const OMPUseClause *C : ItOMPUseClause) { 7242 llvm::Value *InteropvarPtr = 7243 EmitLValue(C->getInteropVar()).getPointer(*this); 7244 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, 7245 NumDependences, DependenceList, 7246 Data.HasNowaitClause); 7247 } 7248 } 7249 } 7250 7251 static void emitTargetTeamsDistributeParallelForRegion( 7252 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 7253 PrePostActionTy &Action) { 7254 Action.Enter(CGF); 7255 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7256 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7257 S.getDistInc()); 7258 }; 7259 7260 // Emit teams region as a standalone region. 7261 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7262 PrePostActionTy &Action) { 7263 Action.Enter(CGF); 7264 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7265 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7266 (void)PrivateScope.Privatize(); 7267 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7268 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7269 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7270 }; 7271 7272 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 7273 CodeGenTeams); 7274 emitPostUpdateForReductionClause(CGF, S, 7275 [](CodeGenFunction &) { return nullptr; }); 7276 } 7277 7278 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 7279 CodeGenModule &CGM, StringRef ParentName, 7280 const OMPTargetTeamsDistributeParallelForDirective &S) { 7281 // Emit SPMD target teams distribute parallel for region as a standalone 7282 // region. 7283 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7284 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 7285 }; 7286 llvm::Function *Fn; 7287 llvm::Constant *Addr; 7288 // Emit target region as a standalone region. 7289 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7290 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7291 assert(Fn && Addr && "Target device function emission failed."); 7292 } 7293 7294 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 7295 const OMPTargetTeamsDistributeParallelForDirective &S) { 7296 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7297 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 7298 }; 7299 emitCommonOMPTargetDirective(*this, S, CodeGen); 7300 } 7301 7302 static void emitTargetTeamsDistributeParallelForSimdRegion( 7303 CodeGenFunction &CGF, 7304 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 7305 PrePostActionTy &Action) { 7306 Action.Enter(CGF); 7307 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7308 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 7309 S.getDistInc()); 7310 }; 7311 7312 // Emit teams region as a standalone region. 7313 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 7314 PrePostActionTy &Action) { 7315 Action.Enter(CGF); 7316 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7317 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7318 (void)PrivateScope.Privatize(); 7319 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 7320 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 7321 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 7322 }; 7323 7324 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 7325 CodeGenTeams); 7326 emitPostUpdateForReductionClause(CGF, S, 7327 [](CodeGenFunction &) { return nullptr; }); 7328 } 7329 7330 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 7331 CodeGenModule &CGM, StringRef ParentName, 7332 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 7333 // Emit SPMD target teams distribute parallel for simd region as a standalone 7334 // region. 7335 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7336 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 7337 }; 7338 llvm::Function *Fn; 7339 llvm::Constant *Addr; 7340 // Emit target region as a standalone region. 7341 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7342 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7343 assert(Fn && Addr && "Target device function emission failed."); 7344 } 7345 7346 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 7347 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 7348 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7349 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 7350 }; 7351 emitCommonOMPTargetDirective(*this, S, CodeGen); 7352 } 7353 7354 void CodeGenFunction::EmitOMPCancellationPointDirective( 7355 const OMPCancellationPointDirective &S) { 7356 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 7357 S.getCancelRegion()); 7358 } 7359 7360 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 7361 const Expr *IfCond = nullptr; 7362 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7363 if (C->getNameModifier() == OMPD_unknown || 7364 C->getNameModifier() == OMPD_cancel) { 7365 IfCond = C->getCondition(); 7366 break; 7367 } 7368 } 7369 if (CGM.getLangOpts().OpenMPIRBuilder) { 7370 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 7371 // TODO: This check is necessary as we only generate `omp parallel` through 7372 // the OpenMPIRBuilder for now. 7373 if (S.getCancelRegion() == OMPD_parallel || 7374 S.getCancelRegion() == OMPD_sections || 7375 S.getCancelRegion() == OMPD_section) { 7376 llvm::Value *IfCondition = nullptr; 7377 if (IfCond) 7378 IfCondition = EmitScalarExpr(IfCond, 7379 /*IgnoreResultAssign=*/true); 7380 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( 7381 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); 7382 return Builder.restoreIP(AfterIP); 7383 } 7384 } 7385 7386 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 7387 S.getCancelRegion()); 7388 } 7389 7390 CodeGenFunction::JumpDest 7391 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 7392 if (Kind == OMPD_parallel || Kind == OMPD_task || 7393 Kind == OMPD_target_parallel || Kind == OMPD_taskloop || 7394 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) 7395 return ReturnBlock; 7396 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 7397 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 7398 Kind == OMPD_distribute_parallel_for || 7399 Kind == OMPD_target_parallel_for || 7400 Kind == OMPD_teams_distribute_parallel_for || 7401 Kind == OMPD_target_teams_distribute_parallel_for); 7402 return OMPCancelStack.getExitBlock(); 7403 } 7404 7405 void CodeGenFunction::EmitOMPUseDevicePtrClause( 7406 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, 7407 const llvm::DenseMap<const ValueDecl *, llvm::Value *> 7408 CaptureDeviceAddrMap) { 7409 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7410 for (const Expr *OrigVarIt : C.varlist()) { 7411 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl()); 7412 if (!Processed.insert(OrigVD).second) 7413 continue; 7414 7415 // In order to identify the right initializer we need to match the 7416 // declaration used by the mapping logic. In some cases we may get 7417 // OMPCapturedExprDecl that refers to the original declaration. 7418 const ValueDecl *MatchingVD = OrigVD; 7419 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 7420 // OMPCapturedExprDecl are used to privative fields of the current 7421 // structure. 7422 const auto *ME = cast<MemberExpr>(OED->getInit()); 7423 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && 7424 "Base should be the current struct!"); 7425 MatchingVD = ME->getMemberDecl(); 7426 } 7427 7428 // If we don't have information about the current list item, move on to 7429 // the next one. 7430 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 7431 if (InitAddrIt == CaptureDeviceAddrMap.end()) 7432 continue; 7433 7434 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 7435 7436 // Return the address of the private variable. 7437 bool IsRegistered = PrivateScope.addPrivate( 7438 OrigVD, 7439 Address(InitAddrIt->second, Ty, 7440 getContext().getTypeAlignInChars(getContext().VoidPtrTy))); 7441 assert(IsRegistered && "firstprivate var already registered as private"); 7442 // Silence the warning about unused variable. 7443 (void)IsRegistered; 7444 } 7445 } 7446 7447 static const VarDecl *getBaseDecl(const Expr *Ref) { 7448 const Expr *Base = Ref->IgnoreParenImpCasts(); 7449 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Base)) 7450 Base = OASE->getBase()->IgnoreParenImpCasts(); 7451 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) 7452 Base = ASE->getBase()->IgnoreParenImpCasts(); 7453 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); 7454 } 7455 7456 void CodeGenFunction::EmitOMPUseDeviceAddrClause( 7457 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, 7458 const llvm::DenseMap<const ValueDecl *, llvm::Value *> 7459 CaptureDeviceAddrMap) { 7460 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 7461 for (const Expr *Ref : C.varlist()) { 7462 const VarDecl *OrigVD = getBaseDecl(Ref); 7463 if (!Processed.insert(OrigVD).second) 7464 continue; 7465 // In order to identify the right initializer we need to match the 7466 // declaration used by the mapping logic. In some cases we may get 7467 // OMPCapturedExprDecl that refers to the original declaration. 7468 const ValueDecl *MatchingVD = OrigVD; 7469 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 7470 // OMPCapturedExprDecl are used to privative fields of the current 7471 // structure. 7472 const auto *ME = cast<MemberExpr>(OED->getInit()); 7473 assert(isa<CXXThisExpr>(ME->getBase()) && 7474 "Base should be the current struct!"); 7475 MatchingVD = ME->getMemberDecl(); 7476 } 7477 7478 // If we don't have information about the current list item, move on to 7479 // the next one. 7480 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 7481 if (InitAddrIt == CaptureDeviceAddrMap.end()) 7482 continue; 7483 7484 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); 7485 7486 Address PrivAddr = 7487 Address(InitAddrIt->second, Ty, 7488 getContext().getTypeAlignInChars(getContext().VoidPtrTy)); 7489 // For declrefs and variable length array need to load the pointer for 7490 // correct mapping, since the pointer to the data was passed to the runtime. 7491 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || 7492 MatchingVD->getType()->isArrayType()) { 7493 QualType PtrTy = getContext().getPointerType( 7494 OrigVD->getType().getNonReferenceType()); 7495 PrivAddr = 7496 EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)), 7497 PtrTy->castAs<PointerType>()); 7498 } 7499 7500 (void)PrivateScope.addPrivate(OrigVD, PrivAddr); 7501 } 7502 } 7503 7504 // Generate the instructions for '#pragma omp target data' directive. 7505 void CodeGenFunction::EmitOMPTargetDataDirective( 7506 const OMPTargetDataDirective &S) { 7507 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, 7508 /*SeparateBeginEndCalls=*/true); 7509 7510 // Create a pre/post action to signal the privatization of the device pointer. 7511 // This action can be replaced by the OpenMP runtime code generation to 7512 // deactivate privatization. 7513 bool PrivatizeDevicePointers = false; 7514 class DevicePointerPrivActionTy : public PrePostActionTy { 7515 bool &PrivatizeDevicePointers; 7516 7517 public: 7518 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 7519 : PrivatizeDevicePointers(PrivatizeDevicePointers) {} 7520 void Enter(CodeGenFunction &CGF) override { 7521 PrivatizeDevicePointers = true; 7522 } 7523 }; 7524 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 7525 7526 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { 7527 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7528 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 7529 }; 7530 7531 // Codegen that selects whether to generate the privatization code or not. 7532 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { 7533 RegionCodeGenTy RCG(InnermostCodeGen); 7534 PrivatizeDevicePointers = false; 7535 7536 // Call the pre-action to change the status of PrivatizeDevicePointers if 7537 // needed. 7538 Action.Enter(CGF); 7539 7540 if (PrivatizeDevicePointers) { 7541 OMPPrivateScope PrivateScope(CGF); 7542 // Emit all instances of the use_device_ptr clause. 7543 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7544 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 7545 Info.CaptureDeviceAddrMap); 7546 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7547 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, 7548 Info.CaptureDeviceAddrMap); 7549 (void)PrivateScope.Privatize(); 7550 RCG(CGF); 7551 } else { 7552 // If we don't have target devices, don't bother emitting the data 7553 // mapping code. 7554 std::optional<OpenMPDirectiveKind> CaptureRegion; 7555 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7556 // Emit helper decls of the use_device_ptr/use_device_addr clauses. 7557 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 7558 for (const Expr *E : C->varlist()) { 7559 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 7560 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 7561 CGF.EmitVarDecl(*OED); 7562 } 7563 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) 7564 for (const Expr *E : C->varlist()) { 7565 const Decl *D = getBaseDecl(E); 7566 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 7567 CGF.EmitVarDecl(*OED); 7568 } 7569 } else { 7570 CaptureRegion = OMPD_unknown; 7571 } 7572 7573 OMPLexicalScope Scope(CGF, S, CaptureRegion); 7574 RCG(CGF); 7575 } 7576 }; 7577 7578 // Forward the provided action to the privatization codegen. 7579 RegionCodeGenTy PrivRCG(PrivCodeGen); 7580 PrivRCG.setAction(Action); 7581 7582 // Notwithstanding the body of the region is emitted as inlined directive, 7583 // we don't use an inline scope as changes in the references inside the 7584 // region are expected to be visible outside, so we do not privative them. 7585 OMPLexicalScope Scope(CGF, S); 7586 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 7587 PrivRCG); 7588 }; 7589 7590 RegionCodeGenTy RCG(CodeGen); 7591 7592 // If we don't have target devices, don't bother emitting the data mapping 7593 // code. 7594 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 7595 RCG(*this); 7596 return; 7597 } 7598 7599 // Check if we have any if clause associated with the directive. 7600 const Expr *IfCond = nullptr; 7601 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7602 IfCond = C->getCondition(); 7603 7604 // Check if we have any device clause associated with the directive. 7605 const Expr *Device = nullptr; 7606 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7607 Device = C->getDevice(); 7608 7609 // Set the action to signal privatization of device pointers. 7610 RCG.setAction(PrivAction); 7611 7612 // Emit region code. 7613 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 7614 Info); 7615 } 7616 7617 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 7618 const OMPTargetEnterDataDirective &S) { 7619 // If we don't have target devices, don't bother emitting the data mapping 7620 // code. 7621 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7622 return; 7623 7624 // Check if we have any if clause associated with the directive. 7625 const Expr *IfCond = nullptr; 7626 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7627 IfCond = C->getCondition(); 7628 7629 // Check if we have any device clause associated with the directive. 7630 const Expr *Device = nullptr; 7631 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7632 Device = C->getDevice(); 7633 7634 OMPLexicalScope Scope(*this, S, OMPD_task); 7635 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7636 } 7637 7638 void CodeGenFunction::EmitOMPTargetExitDataDirective( 7639 const OMPTargetExitDataDirective &S) { 7640 // If we don't have target devices, don't bother emitting the data mapping 7641 // code. 7642 if (CGM.getLangOpts().OMPTargetTriples.empty()) 7643 return; 7644 7645 // Check if we have any if clause associated with the directive. 7646 const Expr *IfCond = nullptr; 7647 if (const auto *C = S.getSingleClause<OMPIfClause>()) 7648 IfCond = C->getCondition(); 7649 7650 // Check if we have any device clause associated with the directive. 7651 const Expr *Device = nullptr; 7652 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 7653 Device = C->getDevice(); 7654 7655 OMPLexicalScope Scope(*this, S, OMPD_task); 7656 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7657 } 7658 7659 static void emitTargetParallelRegion(CodeGenFunction &CGF, 7660 const OMPTargetParallelDirective &S, 7661 PrePostActionTy &Action) { 7662 // Get the captured statement associated with the 'parallel' region. 7663 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 7664 Action.Enter(CGF); 7665 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 7666 Action.Enter(CGF); 7667 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 7668 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 7669 CGF.EmitOMPPrivateClause(S, PrivateScope); 7670 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 7671 (void)PrivateScope.Privatize(); 7672 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 7673 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 7674 // TODO: Add support for clauses. 7675 CGF.EmitStmt(CS->getCapturedStmt()); 7676 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 7677 }; 7678 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 7679 emitEmptyBoundParameters); 7680 emitPostUpdateForReductionClause(CGF, S, 7681 [](CodeGenFunction &) { return nullptr; }); 7682 } 7683 7684 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 7685 CodeGenModule &CGM, StringRef ParentName, 7686 const OMPTargetParallelDirective &S) { 7687 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7688 emitTargetParallelRegion(CGF, S, Action); 7689 }; 7690 llvm::Function *Fn; 7691 llvm::Constant *Addr; 7692 // Emit target region as a standalone region. 7693 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7694 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7695 assert(Fn && Addr && "Target device function emission failed."); 7696 } 7697 7698 void CodeGenFunction::EmitOMPTargetParallelDirective( 7699 const OMPTargetParallelDirective &S) { 7700 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7701 emitTargetParallelRegion(CGF, S, Action); 7702 }; 7703 emitCommonOMPTargetDirective(*this, S, CodeGen); 7704 } 7705 7706 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 7707 const OMPTargetParallelForDirective &S, 7708 PrePostActionTy &Action) { 7709 Action.Enter(CGF); 7710 // Emit directive as a combined directive that consists of two implicit 7711 // directives: 'parallel' with 'for' directive. 7712 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7713 Action.Enter(CGF); 7714 CodeGenFunction::OMPCancelStackRAII CancelRegion( 7715 CGF, OMPD_target_parallel_for, S.hasCancel()); 7716 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7717 emitDispatchForLoopBounds); 7718 }; 7719 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 7720 emitEmptyBoundParameters); 7721 } 7722 7723 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 7724 CodeGenModule &CGM, StringRef ParentName, 7725 const OMPTargetParallelForDirective &S) { 7726 // Emit SPMD target parallel for region as a standalone region. 7727 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7728 emitTargetParallelForRegion(CGF, S, Action); 7729 }; 7730 llvm::Function *Fn; 7731 llvm::Constant *Addr; 7732 // Emit target region as a standalone region. 7733 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7734 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7735 assert(Fn && Addr && "Target device function emission failed."); 7736 } 7737 7738 void CodeGenFunction::EmitOMPTargetParallelForDirective( 7739 const OMPTargetParallelForDirective &S) { 7740 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7741 emitTargetParallelForRegion(CGF, S, Action); 7742 }; 7743 emitCommonOMPTargetDirective(*this, S, CodeGen); 7744 } 7745 7746 static void 7747 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 7748 const OMPTargetParallelForSimdDirective &S, 7749 PrePostActionTy &Action) { 7750 Action.Enter(CGF); 7751 // Emit directive as a combined directive that consists of two implicit 7752 // directives: 'parallel' with 'for' directive. 7753 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7754 Action.Enter(CGF); 7755 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 7756 emitDispatchForLoopBounds); 7757 }; 7758 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 7759 emitEmptyBoundParameters); 7760 } 7761 7762 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 7763 CodeGenModule &CGM, StringRef ParentName, 7764 const OMPTargetParallelForSimdDirective &S) { 7765 // Emit SPMD target parallel for region as a standalone region. 7766 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7767 emitTargetParallelForSimdRegion(CGF, S, Action); 7768 }; 7769 llvm::Function *Fn; 7770 llvm::Constant *Addr; 7771 // Emit target region as a standalone region. 7772 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 7773 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 7774 assert(Fn && Addr && "Target device function emission failed."); 7775 } 7776 7777 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 7778 const OMPTargetParallelForSimdDirective &S) { 7779 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7780 emitTargetParallelForSimdRegion(CGF, S, Action); 7781 }; 7782 emitCommonOMPTargetDirective(*this, S, CodeGen); 7783 } 7784 7785 /// Emit a helper variable and return corresponding lvalue. 7786 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 7787 const ImplicitParamDecl *PVD, 7788 CodeGenFunction::OMPPrivateScope &Privates) { 7789 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 7790 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD)); 7791 } 7792 7793 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 7794 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 7795 // Emit outlined function for task construct. 7796 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 7797 Address CapturedStruct = Address::invalid(); 7798 { 7799 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7800 CapturedStruct = GenerateCapturedStmtArgument(*CS); 7801 } 7802 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 7803 const Expr *IfCond = nullptr; 7804 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 7805 if (C->getNameModifier() == OMPD_unknown || 7806 C->getNameModifier() == OMPD_taskloop) { 7807 IfCond = C->getCondition(); 7808 break; 7809 } 7810 } 7811 7812 OMPTaskDataTy Data; 7813 // Check if taskloop must be emitted without taskgroup. 7814 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 7815 // TODO: Check if we should emit tied or untied task. 7816 Data.Tied = true; 7817 // Set scheduling for taskloop 7818 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { 7819 // grainsize clause 7820 Data.Schedule.setInt(/*IntVal=*/false); 7821 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 7822 Data.HasModifier = 7823 (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false; 7824 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { 7825 // num_tasks clause 7826 Data.Schedule.setInt(/*IntVal=*/true); 7827 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 7828 Data.HasModifier = 7829 (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false; 7830 } 7831 7832 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 7833 // if (PreCond) { 7834 // for (IV in 0..LastIteration) BODY; 7835 // <Final counter/linear vars updates>; 7836 // } 7837 // 7838 7839 // Emit: if (PreCond) - begin. 7840 // If the condition constant folds and can be elided, avoid emitting the 7841 // whole loop. 7842 bool CondConstant; 7843 llvm::BasicBlock *ContBlock = nullptr; 7844 OMPLoopScope PreInitScope(CGF, S); 7845 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 7846 if (!CondConstant) 7847 return; 7848 } else { 7849 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 7850 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 7851 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 7852 CGF.getProfileCount(&S)); 7853 CGF.EmitBlock(ThenBlock); 7854 CGF.incrementProfileCounter(&S); 7855 } 7856 7857 (void)CGF.EmitOMPLinearClauseInit(S); 7858 7859 OMPPrivateScope LoopScope(CGF); 7860 // Emit helper vars inits. 7861 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 7862 auto *I = CS->getCapturedDecl()->param_begin(); 7863 auto *LBP = std::next(I, LowerBound); 7864 auto *UBP = std::next(I, UpperBound); 7865 auto *STP = std::next(I, Stride); 7866 auto *LIP = std::next(I, LastIter); 7867 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 7868 LoopScope); 7869 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 7870 LoopScope); 7871 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 7872 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 7873 LoopScope); 7874 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 7875 CGF.EmitOMPLinearClause(S, LoopScope); 7876 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 7877 (void)LoopScope.Privatize(); 7878 // Emit the loop iteration variable. 7879 const Expr *IVExpr = S.getIterationVariable(); 7880 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 7881 CGF.EmitVarDecl(*IVDecl); 7882 CGF.EmitIgnoredExpr(S.getInit()); 7883 7884 // Emit the iterations count variable. 7885 // If it is not a variable, Sema decided to calculate iterations count on 7886 // each iteration (e.g., it is foldable into a constant). 7887 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 7888 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 7889 // Emit calculation of the iterations count. 7890 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 7891 } 7892 7893 { 7894 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); 7895 emitCommonSimdLoop( 7896 CGF, S, 7897 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 7898 if (isOpenMPSimdDirective(S.getDirectiveKind())) 7899 CGF.EmitOMPSimdInit(S); 7900 }, 7901 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 7902 CGF.EmitOMPInnerLoop( 7903 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 7904 [&S](CodeGenFunction &CGF) { 7905 emitOMPLoopBodyWithStopPoint(CGF, S, 7906 CodeGenFunction::JumpDest()); 7907 }, 7908 [](CodeGenFunction &) {}); 7909 }); 7910 } 7911 // Emit: if (PreCond) - end. 7912 if (ContBlock) { 7913 CGF.EmitBranch(ContBlock); 7914 CGF.EmitBlock(ContBlock, true); 7915 } 7916 // Emit final copy of the lastprivate variables if IsLastIter != 0. 7917 if (HasLastprivateClause) { 7918 CGF.EmitOMPLastprivateClauseFinal( 7919 S, isOpenMPSimdDirective(S.getDirectiveKind()), 7920 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 7921 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 7922 (*LIP)->getType(), S.getBeginLoc()))); 7923 } 7924 LoopScope.restoreMap(); 7925 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { 7926 return CGF.Builder.CreateIsNotNull( 7927 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 7928 (*LIP)->getType(), S.getBeginLoc())); 7929 }); 7930 }; 7931 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 7932 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 7933 const OMPTaskDataTy &Data) { 7934 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 7935 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 7936 OMPLoopScope PreInitScope(CGF, S); 7937 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 7938 OutlinedFn, SharedsTy, 7939 CapturedStruct, IfCond, Data); 7940 }; 7941 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 7942 CodeGen); 7943 }; 7944 if (Data.Nogroup) { 7945 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 7946 } else { 7947 CGM.getOpenMPRuntime().emitTaskgroupRegion( 7948 *this, 7949 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 7950 PrePostActionTy &Action) { 7951 Action.Enter(CGF); 7952 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 7953 Data); 7954 }, 7955 S.getBeginLoc()); 7956 } 7957 } 7958 7959 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 7960 auto LPCRegion = 7961 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7962 EmitOMPTaskLoopBasedDirective(S); 7963 } 7964 7965 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 7966 const OMPTaskLoopSimdDirective &S) { 7967 auto LPCRegion = 7968 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7969 OMPLexicalScope Scope(*this, S); 7970 EmitOMPTaskLoopBasedDirective(S); 7971 } 7972 7973 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 7974 const OMPMasterTaskLoopDirective &S) { 7975 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7976 Action.Enter(CGF); 7977 EmitOMPTaskLoopBasedDirective(S); 7978 }; 7979 auto LPCRegion = 7980 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7981 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); 7982 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 7983 } 7984 7985 void CodeGenFunction::EmitOMPMaskedTaskLoopDirective( 7986 const OMPMaskedTaskLoopDirective &S) { 7987 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 7988 Action.Enter(CGF); 7989 EmitOMPTaskLoopBasedDirective(S); 7990 }; 7991 auto LPCRegion = 7992 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 7993 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); 7994 CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc()); 7995 } 7996 7997 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 7998 const OMPMasterTaskLoopSimdDirective &S) { 7999 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8000 Action.Enter(CGF); 8001 EmitOMPTaskLoopBasedDirective(S); 8002 }; 8003 auto LPCRegion = 8004 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8005 OMPLexicalScope Scope(*this, S); 8006 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 8007 } 8008 8009 void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective( 8010 const OMPMaskedTaskLoopSimdDirective &S) { 8011 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8012 Action.Enter(CGF); 8013 EmitOMPTaskLoopBasedDirective(S); 8014 }; 8015 auto LPCRegion = 8016 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8017 OMPLexicalScope Scope(*this, S); 8018 CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc()); 8019 } 8020 8021 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 8022 const OMPParallelMasterTaskLoopDirective &S) { 8023 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8024 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 8025 PrePostActionTy &Action) { 8026 Action.Enter(CGF); 8027 CGF.EmitOMPTaskLoopBasedDirective(S); 8028 }; 8029 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 8030 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 8031 S.getBeginLoc()); 8032 }; 8033 auto LPCRegion = 8034 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8035 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 8036 emitEmptyBoundParameters); 8037 } 8038 8039 void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective( 8040 const OMPParallelMaskedTaskLoopDirective &S) { 8041 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8042 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 8043 PrePostActionTy &Action) { 8044 Action.Enter(CGF); 8045 CGF.EmitOMPTaskLoopBasedDirective(S); 8046 }; 8047 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 8048 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen, 8049 S.getBeginLoc()); 8050 }; 8051 auto LPCRegion = 8052 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8053 emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop, CodeGen, 8054 emitEmptyBoundParameters); 8055 } 8056 8057 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( 8058 const OMPParallelMasterTaskLoopSimdDirective &S) { 8059 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8060 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 8061 PrePostActionTy &Action) { 8062 Action.Enter(CGF); 8063 CGF.EmitOMPTaskLoopBasedDirective(S); 8064 }; 8065 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 8066 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 8067 S.getBeginLoc()); 8068 }; 8069 auto LPCRegion = 8070 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8071 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, 8072 emitEmptyBoundParameters); 8073 } 8074 8075 void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective( 8076 const OMPParallelMaskedTaskLoopSimdDirective &S) { 8077 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8078 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 8079 PrePostActionTy &Action) { 8080 Action.Enter(CGF); 8081 CGF.EmitOMPTaskLoopBasedDirective(S); 8082 }; 8083 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); 8084 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen, 8085 S.getBeginLoc()); 8086 }; 8087 auto LPCRegion = 8088 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8089 emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop_simd, CodeGen, 8090 emitEmptyBoundParameters); 8091 } 8092 8093 // Generate the instructions for '#pragma omp target update' directive. 8094 void CodeGenFunction::EmitOMPTargetUpdateDirective( 8095 const OMPTargetUpdateDirective &S) { 8096 // If we don't have target devices, don't bother emitting the data mapping 8097 // code. 8098 if (CGM.getLangOpts().OMPTargetTriples.empty()) 8099 return; 8100 8101 // Check if we have any if clause associated with the directive. 8102 const Expr *IfCond = nullptr; 8103 if (const auto *C = S.getSingleClause<OMPIfClause>()) 8104 IfCond = C->getCondition(); 8105 8106 // Check if we have any device clause associated with the directive. 8107 const Expr *Device = nullptr; 8108 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 8109 Device = C->getDevice(); 8110 8111 OMPLexicalScope Scope(*this, S, OMPD_task); 8112 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 8113 } 8114 8115 void CodeGenFunction::EmitOMPGenericLoopDirective( 8116 const OMPGenericLoopDirective &S) { 8117 // Always expect a bind clause on the loop directive. It it wasn't 8118 // in the source, it should have been added in sema. 8119 8120 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; 8121 if (const auto *C = S.getSingleClause<OMPBindClause>()) 8122 BindKind = C->getBindKind(); 8123 8124 switch (BindKind) { 8125 case OMPC_BIND_parallel: // for 8126 return emitOMPForDirective(S, *this, CGM, /*HasCancel=*/false); 8127 case OMPC_BIND_teams: // distribute 8128 return emitOMPDistributeDirective(S, *this, CGM); 8129 case OMPC_BIND_thread: // simd 8130 return emitOMPSimdDirective(S, *this, CGM); 8131 case OMPC_BIND_unknown: 8132 break; 8133 } 8134 8135 // Unimplemented, just inline the underlying statement for now. 8136 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8137 // Emit the loop iteration variable. 8138 const Stmt *CS = 8139 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); 8140 const auto *ForS = dyn_cast<ForStmt>(CS); 8141 if (ForS && !isa<DeclStmt>(ForS->getInit())) { 8142 OMPPrivateScope LoopScope(CGF); 8143 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 8144 (void)LoopScope.Privatize(); 8145 CGF.EmitStmt(CS); 8146 LoopScope.restoreMap(); 8147 } else { 8148 CGF.EmitStmt(CS); 8149 } 8150 }; 8151 OMPLexicalScope Scope(*this, S, OMPD_unknown); 8152 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); 8153 } 8154 8155 void CodeGenFunction::EmitOMPParallelGenericLoopDirective( 8156 const OMPLoopDirective &S) { 8157 // Emit combined directive as if its constituent constructs are 'parallel' 8158 // and 'for'. 8159 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8160 Action.Enter(CGF); 8161 emitOMPCopyinClause(CGF, S); 8162 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); 8163 }; 8164 { 8165 auto LPCRegion = 8166 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 8167 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 8168 emitEmptyBoundParameters); 8169 } 8170 // Check for outer lastprivate conditional update. 8171 checkForLastprivateConditionalUpdate(*this, S); 8172 } 8173 8174 void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( 8175 const OMPTeamsGenericLoopDirective &S) { 8176 // To be consistent with current behavior of 'target teams loop', emit 8177 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'. 8178 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 8179 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 8180 }; 8181 8182 // Emit teams region as a standalone region. 8183 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 8184 PrePostActionTy &Action) { 8185 Action.Enter(CGF); 8186 OMPPrivateScope PrivateScope(CGF); 8187 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 8188 (void)PrivateScope.Privatize(); 8189 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 8190 CodeGenDistribute); 8191 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 8192 }; 8193 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 8194 emitPostUpdateForReductionClause(*this, S, 8195 [](CodeGenFunction &) { return nullptr; }); 8196 } 8197 8198 #ifndef NDEBUG 8199 static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, 8200 std::string StatusMsg, 8201 const OMPExecutableDirective &D) { 8202 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; 8203 if (IsDevice) 8204 StatusMsg += ": DEVICE"; 8205 else 8206 StatusMsg += ": HOST"; 8207 SourceLocation L = D.getBeginLoc(); 8208 auto &SM = CGF.getContext().getSourceManager(); 8209 PresumedLoc PLoc = SM.getPresumedLoc(L); 8210 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr; 8211 unsigned LineNo = 8212 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); 8213 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n"; 8214 } 8215 #endif 8216 8217 static void emitTargetTeamsGenericLoopRegionAsParallel( 8218 CodeGenFunction &CGF, PrePostActionTy &Action, 8219 const OMPTargetTeamsGenericLoopDirective &S) { 8220 Action.Enter(CGF); 8221 // Emit 'teams loop' as if its constituent constructs are 'distribute, 8222 // 'parallel, and 'for'. 8223 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 8224 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 8225 S.getDistInc()); 8226 }; 8227 8228 // Emit teams region as a standalone region. 8229 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 8230 PrePostActionTy &Action) { 8231 Action.Enter(CGF); 8232 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 8233 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 8234 (void)PrivateScope.Privatize(); 8235 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 8236 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 8237 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 8238 }; 8239 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, 8240 emitTargetTeamsLoopCodegenStatus( 8241 CGF, TTL_CODEGEN_TYPE " as parallel for", S)); 8242 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 8243 CodeGenTeams); 8244 emitPostUpdateForReductionClause(CGF, S, 8245 [](CodeGenFunction &) { return nullptr; }); 8246 } 8247 8248 static void emitTargetTeamsGenericLoopRegionAsDistribute( 8249 CodeGenFunction &CGF, PrePostActionTy &Action, 8250 const OMPTargetTeamsGenericLoopDirective &S) { 8251 Action.Enter(CGF); 8252 // Emit 'teams loop' as if its constituent construct is 'distribute'. 8253 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 8254 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 8255 }; 8256 8257 // Emit teams region as a standalone region. 8258 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 8259 PrePostActionTy &Action) { 8260 Action.Enter(CGF); 8261 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 8262 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 8263 (void)PrivateScope.Privatize(); 8264 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 8265 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 8266 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 8267 }; 8268 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, 8269 emitTargetTeamsLoopCodegenStatus( 8270 CGF, TTL_CODEGEN_TYPE " as distribute", S)); 8271 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 8272 emitPostUpdateForReductionClause(CGF, S, 8273 [](CodeGenFunction &) { return nullptr; }); 8274 } 8275 8276 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( 8277 const OMPTargetTeamsGenericLoopDirective &S) { 8278 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8279 if (S.canBeParallelFor()) 8280 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); 8281 else 8282 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); 8283 }; 8284 emitCommonOMPTargetDirective(*this, S, CodeGen); 8285 } 8286 8287 void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 8288 CodeGenModule &CGM, StringRef ParentName, 8289 const OMPTargetTeamsGenericLoopDirective &S) { 8290 // Emit SPMD target parallel loop region as a standalone region. 8291 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8292 if (S.canBeParallelFor()) 8293 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); 8294 else 8295 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); 8296 }; 8297 llvm::Function *Fn; 8298 llvm::Constant *Addr; 8299 // Emit target region as a standalone region. 8300 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 8301 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 8302 assert(Fn && Addr && 8303 "Target device function emission failed for 'target teams loop'."); 8304 } 8305 8306 static void emitTargetParallelGenericLoopRegion( 8307 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, 8308 PrePostActionTy &Action) { 8309 Action.Enter(CGF); 8310 // Emit as 'parallel for'. 8311 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8312 Action.Enter(CGF); 8313 CodeGenFunction::OMPCancelStackRAII CancelRegion( 8314 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); 8315 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 8316 emitDispatchForLoopBounds); 8317 }; 8318 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 8319 emitEmptyBoundParameters); 8320 } 8321 8322 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 8323 CodeGenModule &CGM, StringRef ParentName, 8324 const OMPTargetParallelGenericLoopDirective &S) { 8325 // Emit target parallel loop region as a standalone region. 8326 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8327 emitTargetParallelGenericLoopRegion(CGF, S, Action); 8328 }; 8329 llvm::Function *Fn; 8330 llvm::Constant *Addr; 8331 // Emit target region as a standalone region. 8332 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 8333 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 8334 assert(Fn && Addr && "Target device function emission failed."); 8335 } 8336 8337 /// Emit combined directive 'target parallel loop' as if its constituent 8338 /// constructs are 'target', 'parallel', and 'for'. 8339 void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( 8340 const OMPTargetParallelGenericLoopDirective &S) { 8341 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 8342 emitTargetParallelGenericLoopRegion(CGF, S, Action); 8343 }; 8344 emitCommonOMPTargetDirective(*this, S, CodeGen); 8345 } 8346 8347 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 8348 const OMPExecutableDirective &D) { 8349 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { 8350 EmitOMPScanDirective(*SD); 8351 return; 8352 } 8353 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 8354 return; 8355 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 8356 OMPPrivateScope GlobalsScope(CGF); 8357 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 8358 // Capture global firstprivates to avoid crash. 8359 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 8360 for (const Expr *Ref : C->varlist()) { 8361 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 8362 if (!DRE) 8363 continue; 8364 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); 8365 if (!VD || VD->hasLocalStorage()) 8366 continue; 8367 if (!CGF.LocalDeclMap.count(VD)) { 8368 LValue GlobLVal = CGF.EmitLValue(Ref); 8369 GlobalsScope.addPrivate(VD, GlobLVal.getAddress()); 8370 } 8371 } 8372 } 8373 } 8374 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 8375 (void)GlobalsScope.Privatize(); 8376 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); 8377 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 8378 } else { 8379 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 8380 for (const Expr *E : LD->counters()) { 8381 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 8382 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 8383 LValue GlobLVal = CGF.EmitLValue(E); 8384 GlobalsScope.addPrivate(VD, GlobLVal.getAddress()); 8385 } 8386 if (isa<OMPCapturedExprDecl>(VD)) { 8387 // Emit only those that were not explicitly referenced in clauses. 8388 if (!CGF.LocalDeclMap.count(VD)) 8389 CGF.EmitVarDecl(*VD); 8390 } 8391 } 8392 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 8393 if (!C->getNumForLoops()) 8394 continue; 8395 for (unsigned I = LD->getLoopsNumber(), 8396 E = C->getLoopNumIterations().size(); 8397 I < E; ++I) { 8398 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 8399 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 8400 // Emit only those that were not explicitly referenced in clauses. 8401 if (!CGF.LocalDeclMap.count(VD)) 8402 CGF.EmitVarDecl(*VD); 8403 } 8404 } 8405 } 8406 } 8407 (void)GlobalsScope.Privatize(); 8408 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 8409 } 8410 }; 8411 if (D.getDirectiveKind() == OMPD_atomic || 8412 D.getDirectiveKind() == OMPD_critical || 8413 D.getDirectiveKind() == OMPD_section || 8414 D.getDirectiveKind() == OMPD_master || 8415 D.getDirectiveKind() == OMPD_masked || 8416 D.getDirectiveKind() == OMPD_unroll || 8417 D.getDirectiveKind() == OMPD_assume) { 8418 EmitStmt(D.getAssociatedStmt()); 8419 } else { 8420 auto LPCRegion = 8421 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 8422 OMPSimdLexicalScope Scope(*this, D); 8423 CGM.getOpenMPRuntime().emitInlinedDirective( 8424 *this, 8425 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 8426 : D.getDirectiveKind(), 8427 CodeGen); 8428 } 8429 // Check for outer lastprivate conditional update. 8430 checkForLastprivateConditionalUpdate(*this, D); 8431 } 8432 8433 void CodeGenFunction::EmitOMPAssumeDirective(const OMPAssumeDirective &S) { 8434 EmitStmt(S.getAssociatedStmt()); 8435 } 8436