1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit OpenMP nodes as LLVM code. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCleanup.h" 14 #include "CGOpenMPRuntime.h" 15 #include "CodeGenFunction.h" 16 #include "CodeGenModule.h" 17 #include "TargetInfo.h" 18 #include "clang/AST/Stmt.h" 19 #include "clang/AST/StmtOpenMP.h" 20 #include "clang/AST/DeclOpenMP.h" 21 using namespace clang; 22 using namespace CodeGen; 23 24 namespace { 25 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 26 /// for captured expressions. 27 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 28 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 29 for (const auto *C : S.clauses()) { 30 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 31 if (const auto *PreInit = 32 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 33 for (const auto *I : PreInit->decls()) { 34 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 35 CGF.EmitVarDecl(cast<VarDecl>(*I)); 36 } else { 37 CodeGenFunction::AutoVarEmission Emission = 38 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 39 CGF.EmitAutoVarCleanups(Emission); 40 } 41 } 42 } 43 } 44 } 45 } 46 CodeGenFunction::OMPPrivateScope InlinedShareds; 47 48 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 49 return CGF.LambdaCaptureFields.lookup(VD) || 50 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 51 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); 52 } 53 54 public: 55 OMPLexicalScope( 56 CodeGenFunction &CGF, const OMPExecutableDirective &S, 57 const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None, 58 const bool EmitPreInitStmt = true) 59 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 60 InlinedShareds(CGF) { 61 if (EmitPreInitStmt) 62 emitPreInitStmt(CGF, S); 63 if (!CapturedRegion.hasValue()) 64 return; 65 assert(S.hasAssociatedStmt() && 66 "Expected associated statement for inlined directive."); 67 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); 68 for (const auto &C : CS->captures()) { 69 if (C.capturesVariable() || C.capturesVariableByCopy()) { 70 auto *VD = C.getCapturedVar(); 71 assert(VD == VD->getCanonicalDecl() && 72 "Canonical decl must be captured."); 73 DeclRefExpr DRE( 74 CGF.getContext(), const_cast<VarDecl *>(VD), 75 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && 76 InlinedShareds.isGlobalVarCaptured(VD)), 77 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); 78 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 79 return CGF.EmitLValue(&DRE).getAddress(); 80 }); 81 } 82 } 83 (void)InlinedShareds.Privatize(); 84 } 85 }; 86 87 /// Lexical scope for OpenMP parallel construct, that handles correct codegen 88 /// for captured expressions. 89 class OMPParallelScope final : public OMPLexicalScope { 90 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 91 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 92 return !(isOpenMPTargetExecutionDirective(Kind) || 93 isOpenMPLoopBoundSharingDirective(Kind)) && 94 isOpenMPParallelDirective(Kind); 95 } 96 97 public: 98 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 99 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 100 EmitPreInitStmt(S)) {} 101 }; 102 103 /// Lexical scope for OpenMP teams construct, that handles correct codegen 104 /// for captured expressions. 105 class OMPTeamsScope final : public OMPLexicalScope { 106 bool EmitPreInitStmt(const OMPExecutableDirective &S) { 107 OpenMPDirectiveKind Kind = S.getDirectiveKind(); 108 return !isOpenMPTargetExecutionDirective(Kind) && 109 isOpenMPTeamsDirective(Kind); 110 } 111 112 public: 113 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 114 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None, 115 EmitPreInitStmt(S)) {} 116 }; 117 118 /// Private scope for OpenMP loop-based directives, that supports capturing 119 /// of used expression from loop statement. 120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 122 CodeGenFunction::OMPMapVars PreCondVars; 123 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 124 for (const auto *E : S.counters()) { 125 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 126 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 127 (void)PreCondVars.setVarAddr( 128 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 129 } 130 // Mark private vars as undefs. 131 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 132 for (const Expr *IRef : C->varlists()) { 133 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 134 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 135 (void)PreCondVars.setVarAddr( 136 CGF, OrigVD, 137 Address(llvm::UndefValue::get( 138 CGF.ConvertTypeForMem(CGF.getContext().getPointerType( 139 OrigVD->getType().getNonReferenceType()))), 140 CGF.getContext().getDeclAlign(OrigVD))); 141 } 142 } 143 } 144 (void)PreCondVars.apply(CGF); 145 // Emit init, __range and __end variables for C++ range loops. 146 const Stmt *Body = 147 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 148 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { 149 Body = Body->IgnoreContainers(); 150 if (auto *For = dyn_cast<ForStmt>(Body)) { 151 Body = For->getBody(); 152 } else { 153 assert(isa<CXXForRangeStmt>(Body) && 154 "Expected canonical for loop or range-based for loop."); 155 auto *CXXFor = cast<CXXForRangeStmt>(Body); 156 if (const Stmt *Init = CXXFor->getInit()) 157 CGF.EmitStmt(Init); 158 CGF.EmitStmt(CXXFor->getRangeStmt()); 159 CGF.EmitStmt(CXXFor->getEndStmt()); 160 Body = CXXFor->getBody(); 161 } 162 } 163 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { 164 for (const auto *I : PreInits->decls()) 165 CGF.EmitVarDecl(cast<VarDecl>(*I)); 166 } 167 PreCondVars.restore(CGF); 168 } 169 170 public: 171 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 172 : CodeGenFunction::RunCleanupsScope(CGF) { 173 emitPreInitStmt(CGF, S); 174 } 175 }; 176 177 class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { 178 CodeGenFunction::OMPPrivateScope InlinedShareds; 179 180 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { 181 return CGF.LambdaCaptureFields.lookup(VD) || 182 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || 183 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && 184 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); 185 } 186 187 public: 188 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) 189 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), 190 InlinedShareds(CGF) { 191 for (const auto *C : S.clauses()) { 192 if (const auto *CPI = OMPClauseWithPreInit::get(C)) { 193 if (const auto *PreInit = 194 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { 195 for (const auto *I : PreInit->decls()) { 196 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 197 CGF.EmitVarDecl(cast<VarDecl>(*I)); 198 } else { 199 CodeGenFunction::AutoVarEmission Emission = 200 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 201 CGF.EmitAutoVarCleanups(Emission); 202 } 203 } 204 } 205 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { 206 for (const Expr *E : UDP->varlists()) { 207 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 208 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 209 CGF.EmitVarDecl(*OED); 210 } 211 } 212 } 213 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 214 CGF.EmitOMPPrivateClause(S, InlinedShareds); 215 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 216 if (const Expr *E = TG->getReductionRef()) 217 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 218 } 219 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 220 while (CS) { 221 for (auto &C : CS->captures()) { 222 if (C.capturesVariable() || C.capturesVariableByCopy()) { 223 auto *VD = C.getCapturedVar(); 224 assert(VD == VD->getCanonicalDecl() && 225 "Canonical decl must be captured."); 226 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 227 isCapturedVar(CGF, VD) || 228 (CGF.CapturedStmtInfo && 229 InlinedShareds.isGlobalVarCaptured(VD)), 230 VD->getType().getNonReferenceType(), VK_LValue, 231 C.getLocation()); 232 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address { 233 return CGF.EmitLValue(&DRE).getAddress(); 234 }); 235 } 236 } 237 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); 238 } 239 (void)InlinedShareds.Privatize(); 240 } 241 }; 242 243 } // namespace 244 245 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 246 const OMPExecutableDirective &S, 247 const RegionCodeGenTy &CodeGen); 248 249 LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { 250 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { 251 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { 252 OrigVD = OrigVD->getCanonicalDecl(); 253 bool IsCaptured = 254 LambdaCaptureFields.lookup(OrigVD) || 255 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || 256 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); 257 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, 258 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); 259 return EmitLValue(&DRE); 260 } 261 } 262 return EmitLValue(E); 263 } 264 265 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { 266 ASTContext &C = getContext(); 267 llvm::Value *Size = nullptr; 268 auto SizeInChars = C.getTypeSizeInChars(Ty); 269 if (SizeInChars.isZero()) { 270 // getTypeSizeInChars() returns 0 for a VLA. 271 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { 272 VlaSizePair VlaSize = getVLASize(VAT); 273 Ty = VlaSize.Type; 274 Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) 275 : VlaSize.NumElts; 276 } 277 SizeInChars = C.getTypeSizeInChars(Ty); 278 if (SizeInChars.isZero()) 279 return llvm::ConstantInt::get(SizeTy, /*V=*/0); 280 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 281 } 282 return CGM.getSize(SizeInChars); 283 } 284 285 void CodeGenFunction::GenerateOpenMPCapturedVars( 286 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 287 const RecordDecl *RD = S.getCapturedRecordDecl(); 288 auto CurField = RD->field_begin(); 289 auto CurCap = S.captures().begin(); 290 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), 291 E = S.capture_init_end(); 292 I != E; ++I, ++CurField, ++CurCap) { 293 if (CurField->hasCapturedVLAType()) { 294 const VariableArrayType *VAT = CurField->getCapturedVLAType(); 295 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; 296 CapturedVars.push_back(Val); 297 } else if (CurCap->capturesThis()) { 298 CapturedVars.push_back(CXXThisValue); 299 } else if (CurCap->capturesVariableByCopy()) { 300 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); 301 302 // If the field is not a pointer, we need to save the actual value 303 // and load it as a void pointer. 304 if (!CurField->getType()->isAnyPointerType()) { 305 ASTContext &Ctx = getContext(); 306 Address DstAddr = CreateMemTemp( 307 Ctx.getUIntPtrType(), 308 Twine(CurCap->getCapturedVar()->getName(), ".casted")); 309 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); 310 311 llvm::Value *SrcAddrVal = EmitScalarConversion( 312 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), 313 Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); 314 LValue SrcLV = 315 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); 316 317 // Store the value using the source type pointer. 318 EmitStoreThroughLValue(RValue::get(CV), SrcLV); 319 320 // Load the value using the destination type pointer. 321 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); 322 } 323 CapturedVars.push_back(CV); 324 } else { 325 assert(CurCap->capturesVariable() && "Expected capture by reference."); 326 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); 327 } 328 } 329 } 330 331 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, 332 QualType DstType, StringRef Name, 333 LValue AddrLV) { 334 ASTContext &Ctx = CGF.getContext(); 335 336 llvm::Value *CastedPtr = CGF.EmitScalarConversion( 337 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(), 338 Ctx.getPointerType(DstType), Loc); 339 Address TmpAddr = 340 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) 341 .getAddress(); 342 return TmpAddr; 343 } 344 345 static QualType getCanonicalParamType(ASTContext &C, QualType T) { 346 if (T->isLValueReferenceType()) 347 return C.getLValueReferenceType( 348 getCanonicalParamType(C, T.getNonReferenceType()), 349 /*SpelledAsLValue=*/false); 350 if (T->isPointerType()) 351 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); 352 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { 353 if (const auto *VLA = dyn_cast<VariableArrayType>(A)) 354 return getCanonicalParamType(C, VLA->getElementType()); 355 if (!A->isVariablyModifiedType()) 356 return C.getCanonicalType(T); 357 } 358 return C.getCanonicalParamType(T); 359 } 360 361 namespace { 362 /// Contains required data for proper outlined function codegen. 363 struct FunctionOptions { 364 /// Captured statement for which the function is generated. 365 const CapturedStmt *S = nullptr; 366 /// true if cast to/from UIntPtr is required for variables captured by 367 /// value. 368 const bool UIntPtrCastRequired = true; 369 /// true if only casted arguments must be registered as local args or VLA 370 /// sizes. 371 const bool RegisterCastedArgsOnly = false; 372 /// Name of the generated function. 373 const StringRef FunctionName; 374 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, 375 bool RegisterCastedArgsOnly, 376 StringRef FunctionName) 377 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 378 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 379 FunctionName(FunctionName) {} 380 }; 381 } 382 383 static llvm::Function *emitOutlinedFunctionPrologue( 384 CodeGenFunction &CGF, FunctionArgList &Args, 385 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 386 &LocalAddrs, 387 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> 388 &VLASizes, 389 llvm::Value *&CXXThisValue, const FunctionOptions &FO) { 390 const CapturedDecl *CD = FO.S->getCapturedDecl(); 391 const RecordDecl *RD = FO.S->getCapturedRecordDecl(); 392 assert(CD->hasBody() && "missing CapturedDecl body"); 393 394 CXXThisValue = nullptr; 395 // Build the argument list. 396 CodeGenModule &CGM = CGF.CGM; 397 ASTContext &Ctx = CGM.getContext(); 398 FunctionArgList TargetArgs; 399 Args.append(CD->param_begin(), 400 std::next(CD->param_begin(), CD->getContextParamPosition())); 401 TargetArgs.append( 402 CD->param_begin(), 403 std::next(CD->param_begin(), CD->getContextParamPosition())); 404 auto I = FO.S->captures().begin(); 405 FunctionDecl *DebugFunctionDecl = nullptr; 406 if (!FO.UIntPtrCastRequired) { 407 FunctionProtoType::ExtProtoInfo EPI; 408 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); 409 DebugFunctionDecl = FunctionDecl::Create( 410 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), 411 SourceLocation(), DeclarationName(), FunctionTy, 412 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, 413 /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); 414 } 415 for (const FieldDecl *FD : RD->fields()) { 416 QualType ArgType = FD->getType(); 417 IdentifierInfo *II = nullptr; 418 VarDecl *CapVar = nullptr; 419 420 // If this is a capture by copy and the type is not a pointer, the outlined 421 // function argument type should be uintptr and the value properly casted to 422 // uintptr. This is necessary given that the runtime library is only able to 423 // deal with pointers. We can pass in the same way the VLA type sizes to the 424 // outlined function. 425 if (FO.UIntPtrCastRequired && 426 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || 427 I->capturesVariableArrayType())) 428 ArgType = Ctx.getUIntPtrType(); 429 430 if (I->capturesVariable() || I->capturesVariableByCopy()) { 431 CapVar = I->getCapturedVar(); 432 II = CapVar->getIdentifier(); 433 } else if (I->capturesThis()) { 434 II = &Ctx.Idents.get("this"); 435 } else { 436 assert(I->capturesVariableArrayType()); 437 II = &Ctx.Idents.get("vla"); 438 } 439 if (ArgType->isVariablyModifiedType()) 440 ArgType = getCanonicalParamType(Ctx, ArgType); 441 VarDecl *Arg; 442 if (DebugFunctionDecl && (CapVar || I->capturesThis())) { 443 Arg = ParmVarDecl::Create( 444 Ctx, DebugFunctionDecl, 445 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), 446 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, 447 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); 448 } else { 449 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), 450 II, ArgType, ImplicitParamDecl::Other); 451 } 452 Args.emplace_back(Arg); 453 // Do not cast arguments if we emit function with non-original types. 454 TargetArgs.emplace_back( 455 FO.UIntPtrCastRequired 456 ? Arg 457 : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); 458 ++I; 459 } 460 Args.append( 461 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 462 CD->param_end()); 463 TargetArgs.append( 464 std::next(CD->param_begin(), CD->getContextParamPosition() + 1), 465 CD->param_end()); 466 467 // Create the function declaration. 468 const CGFunctionInfo &FuncInfo = 469 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); 470 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); 471 472 auto *F = 473 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, 474 FO.FunctionName, &CGM.getModule()); 475 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); 476 if (CD->isNothrow()) 477 F->setDoesNotThrow(); 478 F->setDoesNotRecurse(); 479 480 // Generate the function. 481 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, 482 FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); 483 unsigned Cnt = CD->getContextParamPosition(); 484 I = FO.S->captures().begin(); 485 for (const FieldDecl *FD : RD->fields()) { 486 // Do not map arguments if we emit function with non-original types. 487 Address LocalAddr(Address::invalid()); 488 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { 489 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], 490 TargetArgs[Cnt]); 491 } else { 492 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); 493 } 494 // If we are capturing a pointer by copy we don't need to do anything, just 495 // use the value that we get from the arguments. 496 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { 497 const VarDecl *CurVD = I->getCapturedVar(); 498 if (!FO.RegisterCastedArgsOnly) 499 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); 500 ++Cnt; 501 ++I; 502 continue; 503 } 504 505 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), 506 AlignmentSource::Decl); 507 if (FD->hasCapturedVLAType()) { 508 if (FO.UIntPtrCastRequired) { 509 ArgLVal = CGF.MakeAddrLValue( 510 castValueFromUintptr(CGF, I->getLocation(), FD->getType(), 511 Args[Cnt]->getName(), ArgLVal), 512 FD->getType(), AlignmentSource::Decl); 513 } 514 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 515 const VariableArrayType *VAT = FD->getCapturedVLAType(); 516 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); 517 } else if (I->capturesVariable()) { 518 const VarDecl *Var = I->getCapturedVar(); 519 QualType VarTy = Var->getType(); 520 Address ArgAddr = ArgLVal.getAddress(); 521 if (ArgLVal.getType()->isLValueReferenceType()) { 522 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); 523 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { 524 assert(ArgLVal.getType()->isPointerType()); 525 ArgAddr = CGF.EmitLoadOfPointer( 526 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); 527 } 528 if (!FO.RegisterCastedArgsOnly) { 529 LocalAddrs.insert( 530 {Args[Cnt], 531 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); 532 } 533 } else if (I->capturesVariableByCopy()) { 534 assert(!FD->getType()->isAnyPointerType() && 535 "Not expecting a captured pointer."); 536 const VarDecl *Var = I->getCapturedVar(); 537 LocalAddrs.insert({Args[Cnt], 538 {Var, FO.UIntPtrCastRequired 539 ? castValueFromUintptr( 540 CGF, I->getLocation(), FD->getType(), 541 Args[Cnt]->getName(), ArgLVal) 542 : ArgLVal.getAddress()}}); 543 } else { 544 // If 'this' is captured, load it into CXXThisValue. 545 assert(I->capturesThis()); 546 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); 547 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); 548 } 549 ++Cnt; 550 ++I; 551 } 552 553 return F; 554 } 555 556 llvm::Function * 557 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { 558 assert( 559 CapturedStmtInfo && 560 "CapturedStmtInfo should be set when generating the captured function"); 561 const CapturedDecl *CD = S.getCapturedDecl(); 562 // Build the argument list. 563 bool NeedWrapperFunction = 564 getDebugInfo() && 565 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; 566 FunctionArgList Args; 567 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; 568 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; 569 SmallString<256> Buffer; 570 llvm::raw_svector_ostream Out(Buffer); 571 Out << CapturedStmtInfo->getHelperName(); 572 if (NeedWrapperFunction) 573 Out << "_debug__"; 574 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, 575 Out.str()); 576 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, 577 VLASizes, CXXThisValue, FO); 578 CodeGenFunction::OMPPrivateScope LocalScope(*this); 579 for (const auto &LocalAddrPair : LocalAddrs) { 580 if (LocalAddrPair.second.first) { 581 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { 582 return LocalAddrPair.second.second; 583 }); 584 } 585 } 586 (void)LocalScope.Privatize(); 587 for (const auto &VLASizePair : VLASizes) 588 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; 589 PGO.assignRegionCounters(GlobalDecl(CD), F); 590 CapturedStmtInfo->EmitBody(*this, CD->getBody()); 591 (void)LocalScope.ForceCleanup(); 592 FinishFunction(CD->getBodyRBrace()); 593 if (!NeedWrapperFunction) 594 return F; 595 596 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, 597 /*RegisterCastedArgsOnly=*/true, 598 CapturedStmtInfo->getHelperName()); 599 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); 600 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; 601 Args.clear(); 602 LocalAddrs.clear(); 603 VLASizes.clear(); 604 llvm::Function *WrapperF = 605 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 606 WrapperCGF.CXXThisValue, WrapperFO); 607 llvm::SmallVector<llvm::Value *, 4> CallArgs; 608 for (const auto *Arg : Args) { 609 llvm::Value *CallArg; 610 auto I = LocalAddrs.find(Arg); 611 if (I != LocalAddrs.end()) { 612 LValue LV = WrapperCGF.MakeAddrLValue( 613 I->second.second, 614 I->second.first ? I->second.first->getType() : Arg->getType(), 615 AlignmentSource::Decl); 616 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 617 } else { 618 auto EI = VLASizes.find(Arg); 619 if (EI != VLASizes.end()) { 620 CallArg = EI->second.second; 621 } else { 622 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), 623 Arg->getType(), 624 AlignmentSource::Decl); 625 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 626 } 627 } 628 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 629 } 630 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(), 631 F, CallArgs); 632 WrapperCGF.FinishFunction(); 633 return WrapperF; 634 } 635 636 //===----------------------------------------------------------------------===// 637 // OpenMP Directive Emission 638 //===----------------------------------------------------------------------===// 639 void CodeGenFunction::EmitOMPAggregateAssign( 640 Address DestAddr, Address SrcAddr, QualType OriginalType, 641 const llvm::function_ref<void(Address, Address)> CopyGen) { 642 // Perform element-by-element initialization. 643 QualType ElementTy; 644 645 // Drill down to the base element type on both arrays. 646 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); 647 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); 648 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 649 650 llvm::Value *SrcBegin = SrcAddr.getPointer(); 651 llvm::Value *DestBegin = DestAddr.getPointer(); 652 // Cast from pointer to array type to pointer to single element. 653 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); 654 // The basic structure here is a while-do loop. 655 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 656 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 657 llvm::Value *IsEmpty = 658 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 659 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 660 661 // Enter the loop body, making that address the current address. 662 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); 663 EmitBlock(BodyBB); 664 665 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); 666 667 llvm::PHINode *SrcElementPHI = 668 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 669 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 670 Address SrcElementCurrent = 671 Address(SrcElementPHI, 672 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 673 674 llvm::PHINode *DestElementPHI = 675 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 676 DestElementPHI->addIncoming(DestBegin, EntryBB); 677 Address DestElementCurrent = 678 Address(DestElementPHI, 679 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 680 681 // Emit copy. 682 CopyGen(DestElementCurrent, SrcElementCurrent); 683 684 // Shift the address forward by one element. 685 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 686 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 687 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 688 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 689 // Check whether we've reached the end. 690 llvm::Value *Done = 691 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 692 Builder.CreateCondBr(Done, DoneBB, BodyBB); 693 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 694 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); 695 696 // Done. 697 EmitBlock(DoneBB, /*IsFinished=*/true); 698 } 699 700 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, 701 Address SrcAddr, const VarDecl *DestVD, 702 const VarDecl *SrcVD, const Expr *Copy) { 703 if (OriginalType->isArrayType()) { 704 const auto *BO = dyn_cast<BinaryOperator>(Copy); 705 if (BO && BO->getOpcode() == BO_Assign) { 706 // Perform simple memcpy for simple copying. 707 LValue Dest = MakeAddrLValue(DestAddr, OriginalType); 708 LValue Src = MakeAddrLValue(SrcAddr, OriginalType); 709 EmitAggregateAssign(Dest, Src, OriginalType); 710 } else { 711 // For arrays with complex element types perform element by element 712 // copying. 713 EmitOMPAggregateAssign( 714 DestAddr, SrcAddr, OriginalType, 715 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { 716 // Working with the single array element, so have to remap 717 // destination and source variables to corresponding array 718 // elements. 719 CodeGenFunction::OMPPrivateScope Remap(*this); 720 Remap.addPrivate(DestVD, [DestElement]() { return DestElement; }); 721 Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; }); 722 (void)Remap.Privatize(); 723 EmitIgnoredExpr(Copy); 724 }); 725 } 726 } else { 727 // Remap pseudo source variable to private copy. 728 CodeGenFunction::OMPPrivateScope Remap(*this); 729 Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; }); 730 Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; }); 731 (void)Remap.Privatize(); 732 // Emit copying of the whole variable. 733 EmitIgnoredExpr(Copy); 734 } 735 } 736 737 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, 738 OMPPrivateScope &PrivateScope) { 739 if (!HaveInsertPoint()) 740 return false; 741 bool DeviceConstTarget = 742 getLangOpts().OpenMPIsDevice && 743 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 744 bool FirstprivateIsLastprivate = false; 745 llvm::DenseSet<const VarDecl *> Lastprivates; 746 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 747 for (const auto *D : C->varlists()) 748 Lastprivates.insert( 749 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); 750 } 751 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; 752 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 753 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 754 // Force emission of the firstprivate copy if the directive does not emit 755 // outlined function, like omp for, omp simd, omp distribute etc. 756 bool MustEmitFirstprivateCopy = 757 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; 758 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { 759 auto IRef = C->varlist_begin(); 760 auto InitsRef = C->inits().begin(); 761 for (const Expr *IInit : C->private_copies()) { 762 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 763 bool ThisFirstprivateIsLastprivate = 764 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; 765 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); 766 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 767 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && 768 !FD->getType()->isReferenceType() && 769 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 770 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); 771 ++IRef; 772 ++InitsRef; 773 continue; 774 } 775 // Do not emit copy for firstprivate constant variables in target regions, 776 // captured by reference. 777 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 778 FD && FD->getType()->isReferenceType() && 779 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 780 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, 781 OrigVD); 782 ++IRef; 783 ++InitsRef; 784 continue; 785 } 786 FirstprivateIsLastprivate = 787 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; 788 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { 789 const auto *VDInit = 790 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); 791 bool IsRegistered; 792 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 793 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, 794 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 795 LValue OriginalLVal; 796 if (!FD) { 797 // Check if the firstprivate variable is just a constant value. 798 ConstantEmission CE = tryEmitAsConstant(&DRE); 799 if (CE && !CE.isReference()) { 800 // Constant value, no need to create a copy. 801 ++IRef; 802 ++InitsRef; 803 continue; 804 } 805 if (CE && CE.isReference()) { 806 OriginalLVal = CE.getReferenceLValue(*this, &DRE); 807 } else { 808 assert(!CE && "Expected non-constant firstprivate."); 809 OriginalLVal = EmitLValue(&DRE); 810 } 811 } else { 812 OriginalLVal = EmitLValue(&DRE); 813 } 814 QualType Type = VD->getType(); 815 if (Type->isArrayType()) { 816 // Emit VarDecl with copy init for arrays. 817 // Get the address of the original variable captured in current 818 // captured region. 819 IsRegistered = PrivateScope.addPrivate( 820 OrigVD, [this, VD, Type, OriginalLVal, VDInit]() { 821 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 822 const Expr *Init = VD->getInit(); 823 if (!isa<CXXConstructExpr>(Init) || 824 isTrivialInitializer(Init)) { 825 // Perform simple memcpy. 826 LValue Dest = 827 MakeAddrLValue(Emission.getAllocatedAddress(), Type); 828 EmitAggregateAssign(Dest, OriginalLVal, Type); 829 } else { 830 EmitOMPAggregateAssign( 831 Emission.getAllocatedAddress(), OriginalLVal.getAddress(), 832 Type, 833 [this, VDInit, Init](Address DestElement, 834 Address SrcElement) { 835 // Clean up any temporaries needed by the 836 // initialization. 837 RunCleanupsScope InitScope(*this); 838 // Emit initialization for single element. 839 setAddrOfLocalVar(VDInit, SrcElement); 840 EmitAnyExprToMem(Init, DestElement, 841 Init->getType().getQualifiers(), 842 /*IsInitializer*/ false); 843 LocalDeclMap.erase(VDInit); 844 }); 845 } 846 EmitAutoVarCleanups(Emission); 847 return Emission.getAllocatedAddress(); 848 }); 849 } else { 850 Address OriginalAddr = OriginalLVal.getAddress(); 851 IsRegistered = PrivateScope.addPrivate( 852 OrigVD, [this, VDInit, OriginalAddr, VD]() { 853 // Emit private VarDecl with copy init. 854 // Remap temp VDInit variable to the address of the original 855 // variable (for proper handling of captured global variables). 856 setAddrOfLocalVar(VDInit, OriginalAddr); 857 EmitDecl(*VD); 858 LocalDeclMap.erase(VDInit); 859 return GetAddrOfLocalVar(VD); 860 }); 861 } 862 assert(IsRegistered && 863 "firstprivate var already registered as private"); 864 // Silence the warning about unused variable. 865 (void)IsRegistered; 866 } 867 ++IRef; 868 ++InitsRef; 869 } 870 } 871 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); 872 } 873 874 void CodeGenFunction::EmitOMPPrivateClause( 875 const OMPExecutableDirective &D, 876 CodeGenFunction::OMPPrivateScope &PrivateScope) { 877 if (!HaveInsertPoint()) 878 return; 879 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 880 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { 881 auto IRef = C->varlist_begin(); 882 for (const Expr *IInit : C->private_copies()) { 883 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 884 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 885 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 886 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 887 // Emit private VarDecl with copy init. 888 EmitDecl(*VD); 889 return GetAddrOfLocalVar(VD); 890 }); 891 assert(IsRegistered && "private var already registered as private"); 892 // Silence the warning about unused variable. 893 (void)IsRegistered; 894 } 895 ++IRef; 896 } 897 } 898 } 899 900 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { 901 if (!HaveInsertPoint()) 902 return false; 903 // threadprivate_var1 = master_threadprivate_var1; 904 // operator=(threadprivate_var2, master_threadprivate_var2); 905 // ... 906 // __kmpc_barrier(&loc, global_tid); 907 llvm::DenseSet<const VarDecl *> CopiedVars; 908 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; 909 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { 910 auto IRef = C->varlist_begin(); 911 auto ISrcRef = C->source_exprs().begin(); 912 auto IDestRef = C->destination_exprs().begin(); 913 for (const Expr *AssignOp : C->assignment_ops()) { 914 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 915 QualType Type = VD->getType(); 916 if (CopiedVars.insert(VD->getCanonicalDecl()).second) { 917 // Get the address of the master variable. If we are emitting code with 918 // TLS support, the address is passed from the master as field in the 919 // captured declaration. 920 Address MasterAddr = Address::invalid(); 921 if (getLangOpts().OpenMPUseTLS && 922 getContext().getTargetInfo().isTLSSupported()) { 923 assert(CapturedStmtInfo->lookup(VD) && 924 "Copyin threadprivates should have been captured!"); 925 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, 926 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 927 MasterAddr = EmitLValue(&DRE).getAddress(); 928 LocalDeclMap.erase(VD); 929 } else { 930 MasterAddr = 931 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) 932 : CGM.GetAddrOfGlobal(VD), 933 getContext().getDeclAlign(VD)); 934 } 935 // Get the address of the threadprivate variable. 936 Address PrivateAddr = EmitLValue(*IRef).getAddress(); 937 if (CopiedVars.size() == 1) { 938 // At first check if current thread is a master thread. If it is, no 939 // need to copy data. 940 CopyBegin = createBasicBlock("copyin.not.master"); 941 CopyEnd = createBasicBlock("copyin.not.master.end"); 942 Builder.CreateCondBr( 943 Builder.CreateICmpNE( 944 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 945 Builder.CreatePtrToInt(PrivateAddr.getPointer(), 946 CGM.IntPtrTy)), 947 CopyBegin, CopyEnd); 948 EmitBlock(CopyBegin); 949 } 950 const auto *SrcVD = 951 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 952 const auto *DestVD = 953 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 954 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); 955 } 956 ++IRef; 957 ++ISrcRef; 958 ++IDestRef; 959 } 960 } 961 if (CopyEnd) { 962 // Exit out of copying procedure for non-master thread. 963 EmitBlock(CopyEnd, /*IsFinished=*/true); 964 return true; 965 } 966 return false; 967 } 968 969 bool CodeGenFunction::EmitOMPLastprivateClauseInit( 970 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { 971 if (!HaveInsertPoint()) 972 return false; 973 bool HasAtLeastOneLastprivate = false; 974 llvm::DenseSet<const VarDecl *> SIMDLCVs; 975 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 976 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 977 for (const Expr *C : LoopDirective->counters()) { 978 SIMDLCVs.insert( 979 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 980 } 981 } 982 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 983 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 984 HasAtLeastOneLastprivate = true; 985 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 986 !getLangOpts().OpenMPSimd) 987 break; 988 auto IRef = C->varlist_begin(); 989 auto IDestRef = C->destination_exprs().begin(); 990 for (const Expr *IInit : C->private_copies()) { 991 // Keep the address of the original variable for future update at the end 992 // of the loop. 993 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 994 // Taskloops do not require additional initialization, it is done in 995 // runtime support library. 996 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { 997 const auto *DestVD = 998 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 999 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { 1000 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1001 /*RefersToEnclosingVariableOrCapture=*/ 1002 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1003 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); 1004 return EmitLValue(&DRE).getAddress(); 1005 }); 1006 // Check if the variable is also a firstprivate: in this case IInit is 1007 // not generated. Initialization of this variable will happen in codegen 1008 // for 'firstprivate' clause. 1009 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { 1010 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); 1011 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { 1012 // Emit private VarDecl with copy init. 1013 EmitDecl(*VD); 1014 return GetAddrOfLocalVar(VD); 1015 }); 1016 assert(IsRegistered && 1017 "lastprivate var already registered as private"); 1018 (void)IsRegistered; 1019 } 1020 } 1021 ++IRef; 1022 ++IDestRef; 1023 } 1024 } 1025 return HasAtLeastOneLastprivate; 1026 } 1027 1028 void CodeGenFunction::EmitOMPLastprivateClauseFinal( 1029 const OMPExecutableDirective &D, bool NoFinals, 1030 llvm::Value *IsLastIterCond) { 1031 if (!HaveInsertPoint()) 1032 return; 1033 // Emit following code: 1034 // if (<IsLastIterCond>) { 1035 // orig_var1 = private_orig_var1; 1036 // ... 1037 // orig_varn = private_orig_varn; 1038 // } 1039 llvm::BasicBlock *ThenBB = nullptr; 1040 llvm::BasicBlock *DoneBB = nullptr; 1041 if (IsLastIterCond) { 1042 ThenBB = createBasicBlock(".omp.lastprivate.then"); 1043 DoneBB = createBasicBlock(".omp.lastprivate.done"); 1044 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); 1045 EmitBlock(ThenBB); 1046 } 1047 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; 1048 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; 1049 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { 1050 auto IC = LoopDirective->counters().begin(); 1051 for (const Expr *F : LoopDirective->finals()) { 1052 const auto *D = 1053 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); 1054 if (NoFinals) 1055 AlreadyEmittedVars.insert(D); 1056 else 1057 LoopCountersAndUpdates[D] = F; 1058 ++IC; 1059 } 1060 } 1061 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { 1062 auto IRef = C->varlist_begin(); 1063 auto ISrcRef = C->source_exprs().begin(); 1064 auto IDestRef = C->destination_exprs().begin(); 1065 for (const Expr *AssignOp : C->assignment_ops()) { 1066 const auto *PrivateVD = 1067 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 1068 QualType Type = PrivateVD->getType(); 1069 const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); 1070 if (AlreadyEmittedVars.insert(CanonicalVD).second) { 1071 // If lastprivate variable is a loop control variable for loop-based 1072 // directive, update its value before copyin back to original 1073 // variable. 1074 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) 1075 EmitIgnoredExpr(FinalExpr); 1076 const auto *SrcVD = 1077 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1078 const auto *DestVD = 1079 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); 1080 // Get the address of the original variable. 1081 Address OriginalAddr = GetAddrOfLocalVar(DestVD); 1082 // Get the address of the private variable. 1083 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); 1084 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) 1085 PrivateAddr = 1086 Address(Builder.CreateLoad(PrivateAddr), 1087 getNaturalTypeAlignment(RefTy->getPointeeType())); 1088 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); 1089 } 1090 ++IRef; 1091 ++ISrcRef; 1092 ++IDestRef; 1093 } 1094 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1095 EmitIgnoredExpr(PostUpdate); 1096 } 1097 if (IsLastIterCond) 1098 EmitBlock(DoneBB, /*IsFinished=*/true); 1099 } 1100 1101 void CodeGenFunction::EmitOMPReductionClauseInit( 1102 const OMPExecutableDirective &D, 1103 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1104 if (!HaveInsertPoint()) 1105 return; 1106 SmallVector<const Expr *, 4> Shareds; 1107 SmallVector<const Expr *, 4> Privates; 1108 SmallVector<const Expr *, 4> ReductionOps; 1109 SmallVector<const Expr *, 4> LHSs; 1110 SmallVector<const Expr *, 4> RHSs; 1111 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1112 auto IPriv = C->privates().begin(); 1113 auto IRed = C->reduction_ops().begin(); 1114 auto ILHS = C->lhs_exprs().begin(); 1115 auto IRHS = C->rhs_exprs().begin(); 1116 for (const Expr *Ref : C->varlists()) { 1117 Shareds.emplace_back(Ref); 1118 Privates.emplace_back(*IPriv); 1119 ReductionOps.emplace_back(*IRed); 1120 LHSs.emplace_back(*ILHS); 1121 RHSs.emplace_back(*IRHS); 1122 std::advance(IPriv, 1); 1123 std::advance(IRed, 1); 1124 std::advance(ILHS, 1); 1125 std::advance(IRHS, 1); 1126 } 1127 } 1128 ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); 1129 unsigned Count = 0; 1130 auto ILHS = LHSs.begin(); 1131 auto IRHS = RHSs.begin(); 1132 auto IPriv = Privates.begin(); 1133 for (const Expr *IRef : Shareds) { 1134 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); 1135 // Emit private VarDecl with reduction init. 1136 RedCG.emitSharedLValue(*this, Count); 1137 RedCG.emitAggregateType(*this, Count); 1138 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); 1139 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), 1140 RedCG.getSharedLValue(Count), 1141 [&Emission](CodeGenFunction &CGF) { 1142 CGF.EmitAutoVarInit(Emission); 1143 return true; 1144 }); 1145 EmitAutoVarCleanups(Emission); 1146 Address BaseAddr = RedCG.adjustPrivateAddress( 1147 *this, Count, Emission.getAllocatedAddress()); 1148 bool IsRegistered = PrivateScope.addPrivate( 1149 RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; }); 1150 assert(IsRegistered && "private var already registered as private"); 1151 // Silence the warning about unused variable. 1152 (void)IsRegistered; 1153 1154 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 1155 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 1156 QualType Type = PrivateVD->getType(); 1157 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); 1158 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { 1159 // Store the address of the original variable associated with the LHS 1160 // implicit variable. 1161 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { 1162 return RedCG.getSharedLValue(Count).getAddress(); 1163 }); 1164 PrivateScope.addPrivate( 1165 RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); }); 1166 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || 1167 isa<ArraySubscriptExpr>(IRef)) { 1168 // Store the address of the original variable associated with the LHS 1169 // implicit variable. 1170 PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() { 1171 return RedCG.getSharedLValue(Count).getAddress(); 1172 }); 1173 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() { 1174 return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), 1175 ConvertTypeForMem(RHSVD->getType()), 1176 "rhs.begin"); 1177 }); 1178 } else { 1179 QualType Type = PrivateVD->getType(); 1180 bool IsArray = getContext().getAsArrayType(Type) != nullptr; 1181 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); 1182 // Store the address of the original variable associated with the LHS 1183 // implicit variable. 1184 if (IsArray) { 1185 OriginalAddr = Builder.CreateElementBitCast( 1186 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); 1187 } 1188 PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; }); 1189 PrivateScope.addPrivate( 1190 RHSVD, [this, PrivateVD, RHSVD, IsArray]() { 1191 return IsArray 1192 ? Builder.CreateElementBitCast( 1193 GetAddrOfLocalVar(PrivateVD), 1194 ConvertTypeForMem(RHSVD->getType()), "rhs.begin") 1195 : GetAddrOfLocalVar(PrivateVD); 1196 }); 1197 } 1198 ++ILHS; 1199 ++IRHS; 1200 ++IPriv; 1201 ++Count; 1202 } 1203 } 1204 1205 void CodeGenFunction::EmitOMPReductionClauseFinal( 1206 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { 1207 if (!HaveInsertPoint()) 1208 return; 1209 llvm::SmallVector<const Expr *, 8> Privates; 1210 llvm::SmallVector<const Expr *, 8> LHSExprs; 1211 llvm::SmallVector<const Expr *, 8> RHSExprs; 1212 llvm::SmallVector<const Expr *, 8> ReductionOps; 1213 bool HasAtLeastOneReduction = false; 1214 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1215 HasAtLeastOneReduction = true; 1216 Privates.append(C->privates().begin(), C->privates().end()); 1217 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1218 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1219 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1220 } 1221 if (HasAtLeastOneReduction) { 1222 bool WithNowait = D.getSingleClause<OMPNowaitClause>() || 1223 isOpenMPParallelDirective(D.getDirectiveKind()) || 1224 ReductionKind == OMPD_simd; 1225 bool SimpleReduction = ReductionKind == OMPD_simd; 1226 // Emit nowait reduction if nowait clause is present or directive is a 1227 // parallel directive (it always has implicit barrier). 1228 CGM.getOpenMPRuntime().emitReduction( 1229 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, 1230 {WithNowait, SimpleReduction, ReductionKind}); 1231 } 1232 } 1233 1234 static void emitPostUpdateForReductionClause( 1235 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1236 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1237 if (!CGF.HaveInsertPoint()) 1238 return; 1239 llvm::BasicBlock *DoneBB = nullptr; 1240 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1241 if (const Expr *PostUpdate = C->getPostUpdateExpr()) { 1242 if (!DoneBB) { 1243 if (llvm::Value *Cond = CondGen(CGF)) { 1244 // If the first post-update expression is found, emit conditional 1245 // block if it was requested. 1246 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu"); 1247 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done"); 1248 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1249 CGF.EmitBlock(ThenBB); 1250 } 1251 } 1252 CGF.EmitIgnoredExpr(PostUpdate); 1253 } 1254 } 1255 if (DoneBB) 1256 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 1257 } 1258 1259 namespace { 1260 /// Codegen lambda for appending distribute lower and upper bounds to outlined 1261 /// parallel function. This is necessary for combined constructs such as 1262 /// 'distribute parallel for' 1263 typedef llvm::function_ref<void(CodeGenFunction &, 1264 const OMPExecutableDirective &, 1265 llvm::SmallVectorImpl<llvm::Value *> &)> 1266 CodeGenBoundParametersTy; 1267 } // anonymous namespace 1268 1269 static void emitCommonOMPParallelDirective( 1270 CodeGenFunction &CGF, const OMPExecutableDirective &S, 1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1272 const CodeGenBoundParametersTy &CodeGenBoundParameters) { 1273 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 1274 llvm::Function *OutlinedFn = 1275 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( 1276 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 1277 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { 1278 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 1279 llvm::Value *NumThreads = 1280 CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), 1281 /*IgnoreResultAssign=*/true); 1282 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( 1283 CGF, NumThreads, NumThreadsClause->getBeginLoc()); 1284 } 1285 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { 1286 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); 1287 CGF.CGM.getOpenMPRuntime().emitProcBindClause( 1288 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); 1289 } 1290 const Expr *IfCond = nullptr; 1291 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 1292 if (C->getNameModifier() == OMPD_unknown || 1293 C->getNameModifier() == OMPD_parallel) { 1294 IfCond = C->getCondition(); 1295 break; 1296 } 1297 } 1298 1299 OMPParallelScope Scope(CGF, S); 1300 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 1301 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1302 // lower and upper bounds with the pragma 'for' chunking mechanism. 1303 // The following lambda takes care of appending the lower and upper bound 1304 // parameters when necessary 1305 CodeGenBoundParameters(CGF, S, CapturedVars); 1306 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1307 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1308 CapturedVars, IfCond); 1309 } 1310 1311 static void emitEmptyBoundParameters(CodeGenFunction &, 1312 const OMPExecutableDirective &, 1313 llvm::SmallVectorImpl<llvm::Value *> &) {} 1314 1315 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1316 // Emit parallel region as a standalone region. 1317 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1318 Action.Enter(CGF); 1319 OMPPrivateScope PrivateScope(CGF); 1320 bool Copyins = CGF.EmitOMPCopyinClause(S); 1321 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 1322 if (Copyins) { 1323 // Emit implicit barrier to synchronize threads and avoid data races on 1324 // propagation master's thread values of threadprivate variables to local 1325 // instances of that variables of all other implicit threads. 1326 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 1327 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 1328 /*ForceSimpleCall=*/true); 1329 } 1330 CGF.EmitOMPPrivateClause(S, PrivateScope); 1331 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 1332 (void)PrivateScope.Privatize(); 1333 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); 1334 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 1335 }; 1336 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, 1337 emitEmptyBoundParameters); 1338 emitPostUpdateForReductionClause(*this, S, 1339 [](CodeGenFunction &) { return nullptr; }); 1340 } 1341 1342 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, 1343 JumpDest LoopExit) { 1344 RunCleanupsScope BodyScope(*this); 1345 // Update counters values on current iteration. 1346 for (const Expr *UE : D.updates()) 1347 EmitIgnoredExpr(UE); 1348 // Update the linear variables. 1349 // In distribute directives only loop counters may be marked as linear, no 1350 // need to generate the code for them. 1351 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { 1352 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1353 for (const Expr *UE : C->updates()) 1354 EmitIgnoredExpr(UE); 1355 } 1356 } 1357 1358 // On a continue in the body, jump to the end. 1359 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); 1360 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1361 for (const Expr *E : D.finals_conditions()) { 1362 if (!E) 1363 continue; 1364 // Check that loop counter in non-rectangular nest fits into the iteration 1365 // space. 1366 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1367 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1368 getProfileCount(D.getBody())); 1369 EmitBlock(NextBB); 1370 } 1371 // Emit loop variables for C++ range loops. 1372 const Stmt *Body = 1373 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1374 for (unsigned Cnt = 0; Cnt < D.getCollapsedNumber(); ++Cnt) { 1375 Body = Body->IgnoreContainers(); 1376 if (auto *For = dyn_cast<ForStmt>(Body)) { 1377 Body = For->getBody(); 1378 } else { 1379 assert(isa<CXXForRangeStmt>(Body) && 1380 "Expected canonical for loop or range-based for loop."); 1381 auto *CXXFor = cast<CXXForRangeStmt>(Body); 1382 EmitStmt(CXXFor->getLoopVarStmt()); 1383 Body = CXXFor->getBody(); 1384 } 1385 } 1386 // Emit loop body. 1387 EmitStmt(D.getBody()); 1388 // The end (updates/cleanups). 1389 EmitBlock(Continue.getBlock()); 1390 BreakContinueStack.pop_back(); 1391 } 1392 1393 void CodeGenFunction::EmitOMPInnerLoop( 1394 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 1395 const Expr *IncExpr, 1396 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 1397 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 1398 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 1399 1400 // Start the loop with a block that tests the condition. 1401 auto CondBlock = createBasicBlock("omp.inner.for.cond"); 1402 EmitBlock(CondBlock); 1403 const SourceRange R = S.getSourceRange(); 1404 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1405 SourceLocToDebugLoc(R.getEnd())); 1406 1407 // If there are any cleanups between here and the loop-exit scope, 1408 // create a block to stage a loop exit along. 1409 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 1410 if (RequiresCleanup) 1411 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); 1412 1413 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body"); 1414 1415 // Emit condition. 1416 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); 1417 if (ExitBlock != LoopExit.getBlock()) { 1418 EmitBlock(ExitBlock); 1419 EmitBranchThroughCleanup(LoopExit); 1420 } 1421 1422 EmitBlock(LoopBody); 1423 incrementProfileCounter(&S); 1424 1425 // Create a block for the increment. 1426 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); 1427 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1428 1429 BodyGen(*this); 1430 1431 // Emit "IV = IV + 1" and a back-edge to the condition block. 1432 EmitBlock(Continue.getBlock()); 1433 EmitIgnoredExpr(IncExpr); 1434 PostIncGen(*this); 1435 BreakContinueStack.pop_back(); 1436 EmitBranch(CondBlock); 1437 LoopStack.pop(); 1438 // Emit the fall-through block. 1439 EmitBlock(LoopExit.getBlock()); 1440 } 1441 1442 bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { 1443 if (!HaveInsertPoint()) 1444 return false; 1445 // Emit inits for the linear variables. 1446 bool HasLinears = false; 1447 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1448 for (const Expr *Init : C->inits()) { 1449 HasLinears = true; 1450 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); 1451 if (const auto *Ref = 1452 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { 1453 AutoVarEmission Emission = EmitAutoVarAlloca(*VD); 1454 const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); 1455 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1456 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1457 VD->getInit()->getType(), VK_LValue, 1458 VD->getInit()->getExprLoc()); 1459 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(), 1460 VD->getType()), 1461 /*capturedByInit=*/false); 1462 EmitAutoVarCleanups(Emission); 1463 } else { 1464 EmitVarDecl(*VD); 1465 } 1466 } 1467 // Emit the linear steps for the linear clauses. 1468 // If a step is not constant, it is pre-calculated before the loop. 1469 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) 1470 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { 1471 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); 1472 // Emit calculation of the linear step. 1473 EmitIgnoredExpr(CS); 1474 } 1475 } 1476 return HasLinears; 1477 } 1478 1479 void CodeGenFunction::EmitOMPLinearClauseFinal( 1480 const OMPLoopDirective &D, 1481 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1482 if (!HaveInsertPoint()) 1483 return; 1484 llvm::BasicBlock *DoneBB = nullptr; 1485 // Emit the final values of the linear variables. 1486 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1487 auto IC = C->varlist_begin(); 1488 for (const Expr *F : C->finals()) { 1489 if (!DoneBB) { 1490 if (llvm::Value *Cond = CondGen(*this)) { 1491 // If the first post-update expression is found, emit conditional 1492 // block if it was requested. 1493 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu"); 1494 DoneBB = createBasicBlock(".omp.linear.pu.done"); 1495 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1496 EmitBlock(ThenBB); 1497 } 1498 } 1499 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); 1500 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), 1501 CapturedStmtInfo->lookup(OrigVD) != nullptr, 1502 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); 1503 Address OrigAddr = EmitLValue(&DRE).getAddress(); 1504 CodeGenFunction::OMPPrivateScope VarScope(*this); 1505 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1506 (void)VarScope.Privatize(); 1507 EmitIgnoredExpr(F); 1508 ++IC; 1509 } 1510 if (const Expr *PostUpdate = C->getPostUpdateExpr()) 1511 EmitIgnoredExpr(PostUpdate); 1512 } 1513 if (DoneBB) 1514 EmitBlock(DoneBB, /*IsFinished=*/true); 1515 } 1516 1517 static void emitAlignedClause(CodeGenFunction &CGF, 1518 const OMPExecutableDirective &D) { 1519 if (!CGF.HaveInsertPoint()) 1520 return; 1521 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { 1522 llvm::APInt ClauseAlignment(64, 0); 1523 if (const Expr *AlignmentExpr = Clause->getAlignment()) { 1524 auto *AlignmentCI = 1525 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); 1526 ClauseAlignment = AlignmentCI->getValue(); 1527 } 1528 for (const Expr *E : Clause->varlists()) { 1529 llvm::APInt Alignment(ClauseAlignment); 1530 if (Alignment == 0) { 1531 // OpenMP [2.8.1, Description] 1532 // If no optional parameter is specified, implementation-defined default 1533 // alignments for SIMD instructions on the target platforms are assumed. 1534 Alignment = 1535 CGF.getContext() 1536 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( 1537 E->getType()->getPointeeType())) 1538 .getQuantity(); 1539 } 1540 assert((Alignment == 0 || Alignment.isPowerOf2()) && 1541 "alignment is not power of 2"); 1542 if (Alignment != 0) { 1543 llvm::Value *PtrValue = CGF.EmitScalarExpr(E); 1544 CGF.EmitAlignmentAssumption( 1545 PtrValue, E, /*No second loc needed*/ SourceLocation(), 1546 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); 1547 } 1548 } 1549 } 1550 } 1551 1552 void CodeGenFunction::EmitOMPPrivateLoopCounters( 1553 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { 1554 if (!HaveInsertPoint()) 1555 return; 1556 auto I = S.private_counters().begin(); 1557 for (const Expr *E : S.counters()) { 1558 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1559 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); 1560 // Emit var without initialization. 1561 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); 1562 EmitAutoVarCleanups(VarEmission); 1563 LocalDeclMap.erase(PrivateVD); 1564 (void)LoopScope.addPrivate(VD, [&VarEmission]() { 1565 return VarEmission.getAllocatedAddress(); 1566 }); 1567 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || 1568 VD->hasGlobalStorage()) { 1569 (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { 1570 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), 1571 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), 1572 E->getType(), VK_LValue, E->getExprLoc()); 1573 return EmitLValue(&DRE).getAddress(); 1574 }); 1575 } else { 1576 (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() { 1577 return VarEmission.getAllocatedAddress(); 1578 }); 1579 } 1580 ++I; 1581 } 1582 // Privatize extra loop counters used in loops for ordered(n) clauses. 1583 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 1584 if (!C->getNumForLoops()) 1585 continue; 1586 for (unsigned I = S.getCollapsedNumber(), 1587 E = C->getLoopNumIterations().size(); 1588 I < E; ++I) { 1589 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 1590 const auto *VD = cast<VarDecl>(DRE->getDecl()); 1591 // Override only those variables that can be captured to avoid re-emission 1592 // of the variables declared within the loops. 1593 if (DRE->refersToEnclosingVariableOrCapture()) { 1594 (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { 1595 return CreateMemTemp(DRE->getType(), VD->getName()); 1596 }); 1597 } 1598 } 1599 } 1600 } 1601 1602 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, 1603 const Expr *Cond, llvm::BasicBlock *TrueBlock, 1604 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { 1605 if (!CGF.HaveInsertPoint()) 1606 return; 1607 { 1608 CodeGenFunction::OMPPrivateScope PreCondScope(CGF); 1609 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); 1610 (void)PreCondScope.Privatize(); 1611 // Get initial values of real counters. 1612 for (const Expr *I : S.inits()) { 1613 CGF.EmitIgnoredExpr(I); 1614 } 1615 } 1616 // Create temp loop control variables with their init values to support 1617 // non-rectangular loops. 1618 CodeGenFunction::OMPMapVars PreCondVars; 1619 for (const Expr * E: S.dependent_counters()) { 1620 if (!E) 1621 continue; 1622 assert(!E->getType().getNonReferenceType()->isRecordType() && 1623 "dependent counter must not be an iterator."); 1624 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1625 Address CounterAddr = 1626 CGF.CreateMemTemp(VD->getType().getNonReferenceType()); 1627 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); 1628 } 1629 (void)PreCondVars.apply(CGF); 1630 for (const Expr *E : S.dependent_inits()) { 1631 if (!E) 1632 continue; 1633 CGF.EmitIgnoredExpr(E); 1634 } 1635 // Check that loop is executed at least one time. 1636 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); 1637 PreCondVars.restore(CGF); 1638 } 1639 1640 void CodeGenFunction::EmitOMPLinearClause( 1641 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { 1642 if (!HaveInsertPoint()) 1643 return; 1644 llvm::DenseSet<const VarDecl *> SIMDLCVs; 1645 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 1646 const auto *LoopDirective = cast<OMPLoopDirective>(&D); 1647 for (const Expr *C : LoopDirective->counters()) { 1648 SIMDLCVs.insert( 1649 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); 1650 } 1651 } 1652 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { 1653 auto CurPrivate = C->privates().begin(); 1654 for (const Expr *E : C->varlists()) { 1655 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 1656 const auto *PrivateVD = 1657 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); 1658 if (!SIMDLCVs.count(VD->getCanonicalDecl())) { 1659 bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() { 1660 // Emit private VarDecl with copy init. 1661 EmitVarDecl(*PrivateVD); 1662 return GetAddrOfLocalVar(PrivateVD); 1663 }); 1664 assert(IsRegistered && "linear var already registered as private"); 1665 // Silence the warning about unused variable. 1666 (void)IsRegistered; 1667 } else { 1668 EmitVarDecl(*PrivateVD); 1669 } 1670 ++CurPrivate; 1671 } 1672 } 1673 } 1674 1675 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 1676 const OMPExecutableDirective &D, 1677 bool IsMonotonic) { 1678 if (!CGF.HaveInsertPoint()) 1679 return; 1680 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 1681 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 1682 /*ignoreResult=*/true); 1683 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1684 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1685 // In presence of finite 'safelen', it may be unsafe to mark all 1686 // the memory instructions parallel, because loop-carried 1687 // dependences of 'safelen' iterations are possible. 1688 if (!IsMonotonic) 1689 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); 1690 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 1691 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 1692 /*ignoreResult=*/true); 1693 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 1694 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 1695 // In presence of finite 'safelen', it may be unsafe to mark all 1696 // the memory instructions parallel, because loop-carried 1697 // dependences of 'safelen' iterations are possible. 1698 CGF.LoopStack.setParallel(/*Enable=*/false); 1699 } 1700 } 1701 1702 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 1703 bool IsMonotonic) { 1704 // Walk clauses and process safelen/lastprivate. 1705 LoopStack.setParallel(!IsMonotonic); 1706 LoopStack.setVectorizeEnable(); 1707 emitSimdlenSafelenClause(*this, D, IsMonotonic); 1708 } 1709 1710 void CodeGenFunction::EmitOMPSimdFinal( 1711 const OMPLoopDirective &D, 1712 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 1713 if (!HaveInsertPoint()) 1714 return; 1715 llvm::BasicBlock *DoneBB = nullptr; 1716 auto IC = D.counters().begin(); 1717 auto IPC = D.private_counters().begin(); 1718 for (const Expr *F : D.finals()) { 1719 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); 1720 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); 1721 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); 1722 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || 1723 OrigVD->hasGlobalStorage() || CED) { 1724 if (!DoneBB) { 1725 if (llvm::Value *Cond = CondGen(*this)) { 1726 // If the first post-update expression is found, emit conditional 1727 // block if it was requested. 1728 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then"); 1729 DoneBB = createBasicBlock(".omp.final.done"); 1730 Builder.CreateCondBr(Cond, ThenBB, DoneBB); 1731 EmitBlock(ThenBB); 1732 } 1733 } 1734 Address OrigAddr = Address::invalid(); 1735 if (CED) { 1736 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); 1737 } else { 1738 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), 1739 /*RefersToEnclosingVariableOrCapture=*/false, 1740 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); 1741 OrigAddr = EmitLValue(&DRE).getAddress(); 1742 } 1743 OMPPrivateScope VarScope(*this); 1744 VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; }); 1745 (void)VarScope.Privatize(); 1746 EmitIgnoredExpr(F); 1747 } 1748 ++IC; 1749 ++IPC; 1750 } 1751 if (DoneBB) 1752 EmitBlock(DoneBB, /*IsFinished=*/true); 1753 } 1754 1755 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, 1756 const OMPLoopDirective &S, 1757 CodeGenFunction::JumpDest LoopExit) { 1758 CGF.EmitOMPLoopBody(S, LoopExit); 1759 CGF.EmitStopPoint(&S); 1760 } 1761 1762 /// Emit a helper variable and return corresponding lvalue. 1763 static LValue EmitOMPHelperVar(CodeGenFunction &CGF, 1764 const DeclRefExpr *Helper) { 1765 auto VDecl = cast<VarDecl>(Helper->getDecl()); 1766 CGF.EmitVarDecl(*VDecl); 1767 return CGF.EmitLValue(Helper); 1768 } 1769 1770 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, 1771 PrePostActionTy &Action) { 1772 Action.Enter(CGF); 1773 assert(isOpenMPSimdDirective(S.getDirectiveKind()) && 1774 "Expected simd directive"); 1775 OMPLoopScope PreInitScope(CGF, S); 1776 // if (PreCond) { 1777 // for (IV in 0..LastIteration) BODY; 1778 // <Final counter/linear vars updates>; 1779 // } 1780 // 1781 if (isOpenMPDistributeDirective(S.getDirectiveKind()) || 1782 isOpenMPWorksharingDirective(S.getDirectiveKind()) || 1783 isOpenMPTaskLoopDirective(S.getDirectiveKind())) { 1784 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); 1785 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); 1786 } 1787 1788 // Emit: if (PreCond) - begin. 1789 // If the condition constant folds and can be elided, avoid emitting the 1790 // whole loop. 1791 bool CondConstant; 1792 llvm::BasicBlock *ContBlock = nullptr; 1793 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 1794 if (!CondConstant) 1795 return; 1796 } else { 1797 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then"); 1798 ContBlock = CGF.createBasicBlock("simd.if.end"); 1799 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 1800 CGF.getProfileCount(&S)); 1801 CGF.EmitBlock(ThenBlock); 1802 CGF.incrementProfileCounter(&S); 1803 } 1804 1805 // Emit the loop iteration variable. 1806 const Expr *IVExpr = S.getIterationVariable(); 1807 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 1808 CGF.EmitVarDecl(*IVDecl); 1809 CGF.EmitIgnoredExpr(S.getInit()); 1810 1811 // Emit the iterations count variable. 1812 // If it is not a variable, Sema decided to calculate iterations count on 1813 // each iteration (e.g., it is foldable into a constant). 1814 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 1815 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 1816 // Emit calculation of the iterations count. 1817 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 1818 } 1819 1820 CGF.EmitOMPSimdInit(S); 1821 1822 emitAlignedClause(CGF, S); 1823 (void)CGF.EmitOMPLinearClauseInit(S); 1824 { 1825 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 1826 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 1827 CGF.EmitOMPLinearClause(S, LoopScope); 1828 CGF.EmitOMPPrivateClause(S, LoopScope); 1829 CGF.EmitOMPReductionClauseInit(S, LoopScope); 1830 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 1831 (void)LoopScope.Privatize(); 1832 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 1833 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 1834 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 1835 S.getInc(), 1836 [&S](CodeGenFunction &CGF) { 1837 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 1838 CGF.EmitStopPoint(&S); 1839 }, 1840 [](CodeGenFunction &) {}); 1841 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 1842 // Emit final copy of the lastprivate variables at the end of loops. 1843 if (HasLastprivateClause) 1844 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); 1845 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); 1846 emitPostUpdateForReductionClause(CGF, S, 1847 [](CodeGenFunction &) { return nullptr; }); 1848 } 1849 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); 1850 // Emit: if (PreCond) - end. 1851 if (ContBlock) { 1852 CGF.EmitBranch(ContBlock); 1853 CGF.EmitBlock(ContBlock, true); 1854 } 1855 } 1856 1857 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 1858 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1859 emitOMPSimdRegion(CGF, S, Action); 1860 }; 1861 OMPLexicalScope Scope(*this, S, OMPD_unknown); 1862 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 1863 } 1864 1865 void CodeGenFunction::EmitOMPOuterLoop( 1866 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 1867 CodeGenFunction::OMPPrivateScope &LoopScope, 1868 const CodeGenFunction::OMPLoopArguments &LoopArgs, 1869 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 1870 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { 1871 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 1872 1873 const Expr *IVExpr = S.getIterationVariable(); 1874 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 1875 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 1876 1877 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); 1878 1879 // Start the loop with a block that tests the condition. 1880 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 1881 EmitBlock(CondBlock); 1882 const SourceRange R = S.getSourceRange(); 1883 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 1884 SourceLocToDebugLoc(R.getEnd())); 1885 1886 llvm::Value *BoolCondVal = nullptr; 1887 if (!DynamicOrOrdered) { 1888 // UB = min(UB, GlobalUB) or 1889 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. 1890 // 'distribute parallel for') 1891 EmitIgnoredExpr(LoopArgs.EUB); 1892 // IV = LB 1893 EmitIgnoredExpr(LoopArgs.Init); 1894 // IV < UB 1895 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); 1896 } else { 1897 BoolCondVal = 1898 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, 1899 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); 1900 } 1901 1902 // If there are any cleanups between here and the loop-exit scope, 1903 // create a block to stage a loop exit along. 1904 llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); 1905 if (LoopScope.requiresCleanups()) 1906 ExitBlock = createBasicBlock("omp.dispatch.cleanup"); 1907 1908 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body"); 1909 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); 1910 if (ExitBlock != LoopExit.getBlock()) { 1911 EmitBlock(ExitBlock); 1912 EmitBranchThroughCleanup(LoopExit); 1913 } 1914 EmitBlock(LoopBody); 1915 1916 // Emit "IV = LB" (in case of static schedule, we have already calculated new 1917 // LB for loop condition and emitted it above). 1918 if (DynamicOrOrdered) 1919 EmitIgnoredExpr(LoopArgs.Init); 1920 1921 // Create a block for the increment. 1922 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); 1923 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); 1924 1925 // Generate !llvm.loop.parallel metadata for loads and stores for loops 1926 // with dynamic/guided scheduling and without ordered clause. 1927 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 1928 LoopStack.setParallel(!IsMonotonic); 1929 else 1930 EmitOMPSimdInit(S, IsMonotonic); 1931 1932 SourceLocation Loc = S.getBeginLoc(); 1933 1934 // when 'distribute' is not combined with a 'for': 1935 // while (idx <= UB) { BODY; ++idx; } 1936 // when 'distribute' is combined with a 'for' 1937 // (e.g. 'distribute parallel for') 1938 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } 1939 EmitOMPInnerLoop( 1940 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, 1941 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 1942 CodeGenLoop(CGF, S, LoopExit); 1943 }, 1944 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { 1945 CodeGenOrdered(CGF, Loc, IVSize, IVSigned); 1946 }); 1947 1948 EmitBlock(Continue.getBlock()); 1949 BreakContinueStack.pop_back(); 1950 if (!DynamicOrOrdered) { 1951 // Emit "LB = LB + Stride", "UB = UB + Stride". 1952 EmitIgnoredExpr(LoopArgs.NextLB); 1953 EmitIgnoredExpr(LoopArgs.NextUB); 1954 } 1955 1956 EmitBranch(CondBlock); 1957 LoopStack.pop(); 1958 // Emit the fall-through block. 1959 EmitBlock(LoopExit.getBlock()); 1960 1961 // Tell the runtime we are done. 1962 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { 1963 if (!DynamicOrOrdered) 1964 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 1965 S.getDirectiveKind()); 1966 }; 1967 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 1968 } 1969 1970 void CodeGenFunction::EmitOMPForOuterLoop( 1971 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, 1972 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, 1973 const OMPLoopArguments &LoopArgs, 1974 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 1975 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 1976 1977 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). 1978 const bool DynamicOrOrdered = 1979 Ordered || RT.isDynamic(ScheduleKind.Schedule); 1980 1981 assert((Ordered || 1982 !RT.isStaticNonchunked(ScheduleKind.Schedule, 1983 LoopArgs.Chunk != nullptr)) && 1984 "static non-chunked schedule does not need outer loop"); 1985 1986 // Emit outer loop. 1987 // 1988 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 1989 // When schedule(dynamic,chunk_size) is specified, the iterations are 1990 // distributed to threads in the team in chunks as the threads request them. 1991 // Each thread executes a chunk of iterations, then requests another chunk, 1992 // until no chunks remain to be distributed. Each chunk contains chunk_size 1993 // iterations, except for the last chunk to be distributed, which may have 1994 // fewer iterations. When no chunk_size is specified, it defaults to 1. 1995 // 1996 // When schedule(guided,chunk_size) is specified, the iterations are assigned 1997 // to threads in the team in chunks as the executing threads request them. 1998 // Each thread executes a chunk of iterations, then requests another chunk, 1999 // until no chunks remain to be assigned. For a chunk_size of 1, the size of 2000 // each chunk is proportional to the number of unassigned iterations divided 2001 // by the number of threads in the team, decreasing to 1. For a chunk_size 2002 // with value k (greater than 1), the size of each chunk is determined in the 2003 // same way, with the restriction that the chunks do not contain fewer than k 2004 // iterations (except for the last chunk to be assigned, which may have fewer 2005 // than k iterations). 2006 // 2007 // When schedule(auto) is specified, the decision regarding scheduling is 2008 // delegated to the compiler and/or runtime system. The programmer gives the 2009 // implementation the freedom to choose any possible mapping of iterations to 2010 // threads in the team. 2011 // 2012 // When schedule(runtime) is specified, the decision regarding scheduling is 2013 // deferred until run time, and the schedule and chunk size are taken from the 2014 // run-sched-var ICV. If the ICV is set to auto, the schedule is 2015 // implementation defined 2016 // 2017 // while(__kmpc_dispatch_next(&LB, &UB)) { 2018 // idx = LB; 2019 // while (idx <= UB) { BODY; ++idx; 2020 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. 2021 // } // inner loop 2022 // } 2023 // 2024 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2025 // When schedule(static, chunk_size) is specified, iterations are divided into 2026 // chunks of size chunk_size, and the chunks are assigned to the threads in 2027 // the team in a round-robin fashion in the order of the thread number. 2028 // 2029 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { 2030 // while (idx <= UB) { BODY; ++idx; } // inner loop 2031 // LB = LB + ST; 2032 // UB = UB + ST; 2033 // } 2034 // 2035 2036 const Expr *IVExpr = S.getIterationVariable(); 2037 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2038 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2039 2040 if (DynamicOrOrdered) { 2041 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = 2042 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); 2043 llvm::Value *LBVal = DispatchBounds.first; 2044 llvm::Value *UBVal = DispatchBounds.second; 2045 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, 2046 LoopArgs.Chunk}; 2047 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, 2048 IVSigned, Ordered, DipatchRTInputValues); 2049 } else { 2050 CGOpenMPRuntime::StaticRTInput StaticInit( 2051 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, 2052 LoopArgs.ST, LoopArgs.Chunk); 2053 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2054 ScheduleKind, StaticInit); 2055 } 2056 2057 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, 2058 const unsigned IVSize, 2059 const bool IVSigned) { 2060 if (Ordered) { 2061 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, 2062 IVSigned); 2063 } 2064 }; 2065 2066 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, 2067 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); 2068 OuterLoopArgs.IncExpr = S.getInc(); 2069 OuterLoopArgs.Init = S.getInit(); 2070 OuterLoopArgs.Cond = S.getCond(); 2071 OuterLoopArgs.NextLB = S.getNextLowerBound(); 2072 OuterLoopArgs.NextUB = S.getNextUpperBound(); 2073 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, 2074 emitOMPLoopBodyWithStopPoint, CodeGenOrdered); 2075 } 2076 2077 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, 2078 const unsigned IVSize, const bool IVSigned) {} 2079 2080 void CodeGenFunction::EmitOMPDistributeOuterLoop( 2081 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, 2082 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, 2083 const CodeGenLoopTy &CodeGenLoopContent) { 2084 2085 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2086 2087 // Emit outer loop. 2088 // Same behavior as a OMPForOuterLoop, except that schedule cannot be 2089 // dynamic 2090 // 2091 2092 const Expr *IVExpr = S.getIterationVariable(); 2093 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2094 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2095 2096 CGOpenMPRuntime::StaticRTInput StaticInit( 2097 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, 2098 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); 2099 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); 2100 2101 // for combined 'distribute' and 'for' the increment expression of distribute 2102 // is stored in DistInc. For 'distribute' alone, it is in Inc. 2103 Expr *IncExpr; 2104 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) 2105 IncExpr = S.getDistInc(); 2106 else 2107 IncExpr = S.getInc(); 2108 2109 // this routine is shared by 'omp distribute parallel for' and 2110 // 'omp distribute': select the right EUB expression depending on the 2111 // directive 2112 OMPLoopArguments OuterLoopArgs; 2113 OuterLoopArgs.LB = LoopArgs.LB; 2114 OuterLoopArgs.UB = LoopArgs.UB; 2115 OuterLoopArgs.ST = LoopArgs.ST; 2116 OuterLoopArgs.IL = LoopArgs.IL; 2117 OuterLoopArgs.Chunk = LoopArgs.Chunk; 2118 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2119 ? S.getCombinedEnsureUpperBound() 2120 : S.getEnsureUpperBound(); 2121 OuterLoopArgs.IncExpr = IncExpr; 2122 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2123 ? S.getCombinedInit() 2124 : S.getInit(); 2125 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2126 ? S.getCombinedCond() 2127 : S.getCond(); 2128 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2129 ? S.getCombinedNextLowerBound() 2130 : S.getNextLowerBound(); 2131 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 2132 ? S.getCombinedNextUpperBound() 2133 : S.getNextUpperBound(); 2134 2135 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, 2136 LoopScope, OuterLoopArgs, CodeGenLoopContent, 2137 emitEmptyOrdered); 2138 } 2139 2140 static std::pair<LValue, LValue> 2141 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, 2142 const OMPExecutableDirective &S) { 2143 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2144 LValue LB = 2145 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2146 LValue UB = 2147 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2148 2149 // When composing 'distribute' with 'for' (e.g. as in 'distribute 2150 // parallel for') we need to use the 'distribute' 2151 // chunk lower and upper bounds rather than the whole loop iteration 2152 // space. These are parameters to the outlined function for 'parallel' 2153 // and we copy the bounds of the previous schedule into the 2154 // the current ones. 2155 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); 2156 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); 2157 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( 2158 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); 2159 PrevLBVal = CGF.EmitScalarConversion( 2160 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), 2161 LS.getIterationVariable()->getType(), 2162 LS.getPrevLowerBoundVariable()->getExprLoc()); 2163 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( 2164 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); 2165 PrevUBVal = CGF.EmitScalarConversion( 2166 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), 2167 LS.getIterationVariable()->getType(), 2168 LS.getPrevUpperBoundVariable()->getExprLoc()); 2169 2170 CGF.EmitStoreOfScalar(PrevLBVal, LB); 2171 CGF.EmitStoreOfScalar(PrevUBVal, UB); 2172 2173 return {LB, UB}; 2174 } 2175 2176 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then 2177 /// we need to use the LB and UB expressions generated by the worksharing 2178 /// code generation support, whereas in non combined situations we would 2179 /// just emit 0 and the LastIteration expression 2180 /// This function is necessary due to the difference of the LB and UB 2181 /// types for the RT emission routines for 'for_static_init' and 2182 /// 'for_dispatch_init' 2183 static std::pair<llvm::Value *, llvm::Value *> 2184 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, 2185 const OMPExecutableDirective &S, 2186 Address LB, Address UB) { 2187 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); 2188 const Expr *IVExpr = LS.getIterationVariable(); 2189 // when implementing a dynamic schedule for a 'for' combined with a 2190 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop 2191 // is not normalized as each team only executes its own assigned 2192 // distribute chunk 2193 QualType IteratorTy = IVExpr->getType(); 2194 llvm::Value *LBVal = 2195 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2196 llvm::Value *UBVal = 2197 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); 2198 return {LBVal, UBVal}; 2199 } 2200 2201 static void emitDistributeParallelForDistributeInnerBoundParams( 2202 CodeGenFunction &CGF, const OMPExecutableDirective &S, 2203 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { 2204 const auto &Dir = cast<OMPLoopDirective>(S); 2205 LValue LB = 2206 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); 2207 llvm::Value *LBCast = CGF.Builder.CreateIntCast( 2208 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2209 CapturedVars.push_back(LBCast); 2210 LValue UB = 2211 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); 2212 2213 llvm::Value *UBCast = CGF.Builder.CreateIntCast( 2214 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); 2215 CapturedVars.push_back(UBCast); 2216 } 2217 2218 static void 2219 emitInnerParallelForWhenCombined(CodeGenFunction &CGF, 2220 const OMPLoopDirective &S, 2221 CodeGenFunction::JumpDest LoopExit) { 2222 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, 2223 PrePostActionTy &Action) { 2224 Action.Enter(CGF); 2225 bool HasCancel = false; 2226 if (!isOpenMPSimdDirective(S.getDirectiveKind())) { 2227 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) 2228 HasCancel = D->hasCancel(); 2229 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) 2230 HasCancel = D->hasCancel(); 2231 else if (const auto *D = 2232 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) 2233 HasCancel = D->hasCancel(); 2234 } 2235 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), 2236 HasCancel); 2237 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), 2238 emitDistributeParallelForInnerBounds, 2239 emitDistributeParallelForDispatchBounds); 2240 }; 2241 2242 emitCommonOMPParallelDirective( 2243 CGF, S, 2244 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, 2245 CGInlinedWorksharingLoop, 2246 emitDistributeParallelForDistributeInnerBoundParams); 2247 } 2248 2249 void CodeGenFunction::EmitOMPDistributeParallelForDirective( 2250 const OMPDistributeParallelForDirective &S) { 2251 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2252 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2253 S.getDistInc()); 2254 }; 2255 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2256 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2257 } 2258 2259 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( 2260 const OMPDistributeParallelForSimdDirective &S) { 2261 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2262 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 2263 S.getDistInc()); 2264 }; 2265 OMPLexicalScope Scope(*this, S, OMPD_parallel); 2266 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 2267 } 2268 2269 void CodeGenFunction::EmitOMPDistributeSimdDirective( 2270 const OMPDistributeSimdDirective &S) { 2271 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2272 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 2273 }; 2274 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2275 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2276 } 2277 2278 void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 2279 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { 2280 // Emit SPMD target parallel for region as a standalone region. 2281 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2282 emitOMPSimdRegion(CGF, S, Action); 2283 }; 2284 llvm::Function *Fn; 2285 llvm::Constant *Addr; 2286 // Emit target region as a standalone region. 2287 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 2288 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 2289 assert(Fn && Addr && "Target device function emission failed."); 2290 } 2291 2292 void CodeGenFunction::EmitOMPTargetSimdDirective( 2293 const OMPTargetSimdDirective &S) { 2294 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2295 emitOMPSimdRegion(CGF, S, Action); 2296 }; 2297 emitCommonOMPTargetDirective(*this, S, CodeGen); 2298 } 2299 2300 namespace { 2301 struct ScheduleKindModifiersTy { 2302 OpenMPScheduleClauseKind Kind; 2303 OpenMPScheduleClauseModifier M1; 2304 OpenMPScheduleClauseModifier M2; 2305 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, 2306 OpenMPScheduleClauseModifier M1, 2307 OpenMPScheduleClauseModifier M2) 2308 : Kind(Kind), M1(M1), M2(M2) {} 2309 }; 2310 } // namespace 2311 2312 bool CodeGenFunction::EmitOMPWorksharingLoop( 2313 const OMPLoopDirective &S, Expr *EUB, 2314 const CodeGenLoopBoundsTy &CodeGenLoopBounds, 2315 const CodeGenDispatchBoundsTy &CGDispatchBounds) { 2316 // Emit the loop iteration variable. 2317 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 2318 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 2319 EmitVarDecl(*IVDecl); 2320 2321 // Emit the iterations count variable. 2322 // If it is not a variable, Sema decided to calculate iterations count on each 2323 // iteration (e.g., it is foldable into a constant). 2324 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 2325 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 2326 // Emit calculation of the iterations count. 2327 EmitIgnoredExpr(S.getCalcLastIteration()); 2328 } 2329 2330 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 2331 2332 bool HasLastprivateClause; 2333 // Check pre-condition. 2334 { 2335 OMPLoopScope PreInitScope(*this, S); 2336 // Skip the entire loop if we don't meet the precondition. 2337 // If the condition constant folds and can be elided, avoid emitting the 2338 // whole loop. 2339 bool CondConstant; 2340 llvm::BasicBlock *ContBlock = nullptr; 2341 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 2342 if (!CondConstant) 2343 return false; 2344 } else { 2345 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 2346 ContBlock = createBasicBlock("omp.precond.end"); 2347 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 2348 getProfileCount(&S)); 2349 EmitBlock(ThenBlock); 2350 incrementProfileCounter(&S); 2351 } 2352 2353 RunCleanupsScope DoacrossCleanupScope(*this); 2354 bool Ordered = false; 2355 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { 2356 if (OrderedClause->getNumForLoops()) 2357 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); 2358 else 2359 Ordered = true; 2360 } 2361 2362 llvm::DenseSet<const Expr *> EmittedFinals; 2363 emitAlignedClause(*this, S); 2364 bool HasLinears = EmitOMPLinearClauseInit(S); 2365 // Emit helper vars inits. 2366 2367 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); 2368 LValue LB = Bounds.first; 2369 LValue UB = Bounds.second; 2370 LValue ST = 2371 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 2372 LValue IL = 2373 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 2374 2375 // Emit 'then' code. 2376 { 2377 OMPPrivateScope LoopScope(*this); 2378 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { 2379 // Emit implicit barrier to synchronize threads and avoid data races on 2380 // initialization of firstprivate variables and post-update of 2381 // lastprivate variables. 2382 CGM.getOpenMPRuntime().emitBarrierCall( 2383 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2384 /*ForceSimpleCall=*/true); 2385 } 2386 EmitOMPPrivateClause(S, LoopScope); 2387 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 2388 EmitOMPReductionClauseInit(S, LoopScope); 2389 EmitOMPPrivateLoopCounters(S, LoopScope); 2390 EmitOMPLinearClause(S, LoopScope); 2391 (void)LoopScope.Privatize(); 2392 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2393 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 2394 2395 // Detect the loop schedule kind and chunk. 2396 const Expr *ChunkExpr = nullptr; 2397 OpenMPScheduleTy ScheduleKind; 2398 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { 2399 ScheduleKind.Schedule = C->getScheduleKind(); 2400 ScheduleKind.M1 = C->getFirstScheduleModifier(); 2401 ScheduleKind.M2 = C->getSecondScheduleModifier(); 2402 ChunkExpr = C->getChunkSize(); 2403 } else { 2404 // Default behaviour for schedule clause. 2405 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( 2406 *this, S, ScheduleKind.Schedule, ChunkExpr); 2407 } 2408 bool HasChunkSizeOne = false; 2409 llvm::Value *Chunk = nullptr; 2410 if (ChunkExpr) { 2411 Chunk = EmitScalarExpr(ChunkExpr); 2412 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), 2413 S.getIterationVariable()->getType(), 2414 S.getBeginLoc()); 2415 Expr::EvalResult Result; 2416 if (ChunkExpr->EvaluateAsInt(Result, getContext())) { 2417 llvm::APSInt EvaluatedChunk = Result.Val.getInt(); 2418 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); 2419 } 2420 } 2421 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 2422 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 2423 // OpenMP 4.5, 2.7.1 Loop Construct, Description. 2424 // If the static schedule kind is specified or if the ordered clause is 2425 // specified, and if no monotonic modifier is specified, the effect will 2426 // be as if the monotonic modifier was specified. 2427 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 2428 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 2429 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 2430 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 2431 /* Chunked */ Chunk != nullptr) || 2432 StaticChunkedOne) && 2433 !Ordered) { 2434 if (isOpenMPSimdDirective(S.getDirectiveKind())) 2435 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 2436 // OpenMP [2.7.1, Loop Construct, Description, table 2-1] 2437 // When no chunk_size is specified, the iteration space is divided into 2438 // chunks that are approximately equal in size, and at most one chunk is 2439 // distributed to each thread. Note that the size of the chunks is 2440 // unspecified in this case. 2441 CGOpenMPRuntime::StaticRTInput StaticInit( 2442 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), 2443 UB.getAddress(), ST.getAddress(), 2444 StaticChunkedOne ? Chunk : nullptr); 2445 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), 2446 ScheduleKind, StaticInit); 2447 JumpDest LoopExit = 2448 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 2449 // UB = min(UB, GlobalUB); 2450 if (!StaticChunkedOne) 2451 EmitIgnoredExpr(S.getEnsureUpperBound()); 2452 // IV = LB; 2453 EmitIgnoredExpr(S.getInit()); 2454 // For unchunked static schedule generate: 2455 // 2456 // while (idx <= UB) { 2457 // BODY; 2458 // ++idx; 2459 // } 2460 // 2461 // For static schedule with chunk one: 2462 // 2463 // while (IV <= PrevUB) { 2464 // BODY; 2465 // IV += ST; 2466 // } 2467 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), 2468 StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(), 2469 StaticChunkedOne ? S.getDistInc() : S.getInc(), 2470 [&S, LoopExit](CodeGenFunction &CGF) { 2471 CGF.EmitOMPLoopBody(S, LoopExit); 2472 CGF.EmitStopPoint(&S); 2473 }, 2474 [](CodeGenFunction &) {}); 2475 EmitBlock(LoopExit.getBlock()); 2476 // Tell the runtime we are done. 2477 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2478 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2479 S.getDirectiveKind()); 2480 }; 2481 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 2482 } else { 2483 const bool IsMonotonic = 2484 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || 2485 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || 2486 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 2487 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 2488 // Emit the outer loop, which requests its work chunk [LB..UB] from 2489 // runtime and runs the inner loop to process it. 2490 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), 2491 ST.getAddress(), IL.getAddress(), 2492 Chunk, EUB); 2493 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, 2494 LoopArguments, CGDispatchBounds); 2495 } 2496 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 2497 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 2498 return CGF.Builder.CreateIsNotNull( 2499 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2500 }); 2501 } 2502 EmitOMPReductionClauseFinal( 2503 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) 2504 ? /*Parallel and Simd*/ OMPD_parallel_for_simd 2505 : /*Parallel only*/ OMPD_parallel); 2506 // Emit post-update of the reduction variables if IsLastIter != 0. 2507 emitPostUpdateForReductionClause( 2508 *this, S, [IL, &S](CodeGenFunction &CGF) { 2509 return CGF.Builder.CreateIsNotNull( 2510 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2511 }); 2512 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2513 if (HasLastprivateClause) 2514 EmitOMPLastprivateClauseFinal( 2515 S, isOpenMPSimdDirective(S.getDirectiveKind()), 2516 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 2517 } 2518 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { 2519 return CGF.Builder.CreateIsNotNull( 2520 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2521 }); 2522 DoacrossCleanupScope.ForceCleanup(); 2523 // We're now done with the loop, so jump to the continuation block. 2524 if (ContBlock) { 2525 EmitBranch(ContBlock); 2526 EmitBlock(ContBlock, /*IsFinished=*/true); 2527 } 2528 } 2529 return HasLastprivateClause; 2530 } 2531 2532 /// The following two functions generate expressions for the loop lower 2533 /// and upper bounds in case of static and dynamic (dispatch) schedule 2534 /// of the associated 'for' or 'distribute' loop. 2535 static std::pair<LValue, LValue> 2536 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 2537 const auto &LS = cast<OMPLoopDirective>(S); 2538 LValue LB = 2539 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); 2540 LValue UB = 2541 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); 2542 return {LB, UB}; 2543 } 2544 2545 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not 2546 /// consider the lower and upper bound expressions generated by the 2547 /// worksharing loop support, but we use 0 and the iteration space size as 2548 /// constants 2549 static std::pair<llvm::Value *, llvm::Value *> 2550 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, 2551 Address LB, Address UB) { 2552 const auto &LS = cast<OMPLoopDirective>(S); 2553 const Expr *IVExpr = LS.getIterationVariable(); 2554 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); 2555 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 2556 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 2557 return {LBVal, UBVal}; 2558 } 2559 2560 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 2561 bool HasLastprivates = false; 2562 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2563 PrePostActionTy &) { 2564 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 2565 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2566 emitForLoopBounds, 2567 emitDispatchForLoopBounds); 2568 }; 2569 { 2570 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2571 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 2572 S.hasCancel()); 2573 } 2574 2575 // Emit an implicit barrier at the end. 2576 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 2577 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 2578 } 2579 2580 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 2581 bool HasLastprivates = false; 2582 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 2583 PrePostActionTy &) { 2584 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 2585 emitForLoopBounds, 2586 emitDispatchForLoopBounds); 2587 }; 2588 { 2589 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2590 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2591 } 2592 2593 // Emit an implicit barrier at the end. 2594 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 2595 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 2596 } 2597 2598 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, 2599 const Twine &Name, 2600 llvm::Value *Init = nullptr) { 2601 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); 2602 if (Init) 2603 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); 2604 return LVal; 2605 } 2606 2607 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { 2608 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); 2609 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); 2610 bool HasLastprivates = false; 2611 auto &&CodeGen = [&S, CapturedStmt, CS, 2612 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { 2613 ASTContext &C = CGF.getContext(); 2614 QualType KmpInt32Ty = 2615 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2616 // Emit helper vars inits. 2617 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.", 2618 CGF.Builder.getInt32(0)); 2619 llvm::ConstantInt *GlobalUBVal = CS != nullptr 2620 ? CGF.Builder.getInt32(CS->size() - 1) 2621 : CGF.Builder.getInt32(0); 2622 LValue UB = 2623 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); 2624 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.", 2625 CGF.Builder.getInt32(1)); 2626 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.", 2627 CGF.Builder.getInt32(0)); 2628 // Loop counter. 2629 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); 2630 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 2631 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 2632 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 2633 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 2634 // Generate condition for loop. 2635 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, 2636 OK_Ordinary, S.getBeginLoc(), FPOptions()); 2637 // Increment for loop counter. 2638 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 2639 S.getBeginLoc(), true); 2640 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 2641 // Iterate through all sections and emit a switch construct: 2642 // switch (IV) { 2643 // case 0: 2644 // <SectionStmt[0]>; 2645 // break; 2646 // ... 2647 // case <NumSection> - 1: 2648 // <SectionStmt[<NumSection> - 1]>; 2649 // break; 2650 // } 2651 // .omp.sections.exit: 2652 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); 2653 llvm::SwitchInst *SwitchStmt = 2654 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), 2655 ExitBB, CS == nullptr ? 1 : CS->size()); 2656 if (CS) { 2657 unsigned CaseNumber = 0; 2658 for (const Stmt *SubStmt : CS->children()) { 2659 auto CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2660 CGF.EmitBlock(CaseBB); 2661 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); 2662 CGF.EmitStmt(SubStmt); 2663 CGF.EmitBranch(ExitBB); 2664 ++CaseNumber; 2665 } 2666 } else { 2667 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case"); 2668 CGF.EmitBlock(CaseBB); 2669 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); 2670 CGF.EmitStmt(CapturedStmt); 2671 CGF.EmitBranch(ExitBB); 2672 } 2673 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 2674 }; 2675 2676 CodeGenFunction::OMPPrivateScope LoopScope(CGF); 2677 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { 2678 // Emit implicit barrier to synchronize threads and avoid data races on 2679 // initialization of firstprivate variables and post-update of lastprivate 2680 // variables. 2681 CGF.CGM.getOpenMPRuntime().emitBarrierCall( 2682 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 2683 /*ForceSimpleCall=*/true); 2684 } 2685 CGF.EmitOMPPrivateClause(S, LoopScope); 2686 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 2687 CGF.EmitOMPReductionClauseInit(S, LoopScope); 2688 (void)LoopScope.Privatize(); 2689 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 2690 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 2691 2692 // Emit static non-chunked loop. 2693 OpenMPScheduleTy ScheduleKind; 2694 ScheduleKind.Schedule = OMPC_SCHEDULE_static; 2695 CGOpenMPRuntime::StaticRTInput StaticInit( 2696 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), 2697 LB.getAddress(), UB.getAddress(), ST.getAddress()); 2698 CGF.CGM.getOpenMPRuntime().emitForStaticInit( 2699 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); 2700 // UB = min(UB, GlobalUB); 2701 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); 2702 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( 2703 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); 2704 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); 2705 // IV = LB; 2706 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); 2707 // while (idx <= UB) { BODY; ++idx; } 2708 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, 2709 [](CodeGenFunction &) {}); 2710 // Tell the runtime we are done. 2711 auto &&CodeGen = [&S](CodeGenFunction &CGF) { 2712 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 2713 S.getDirectiveKind()); 2714 }; 2715 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); 2716 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 2717 // Emit post-update of the reduction variables if IsLastIter != 0. 2718 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { 2719 return CGF.Builder.CreateIsNotNull( 2720 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 2721 }); 2722 2723 // Emit final copy of the lastprivate variables if IsLastIter != 0. 2724 if (HasLastprivates) 2725 CGF.EmitOMPLastprivateClauseFinal( 2726 S, /*NoFinals=*/false, 2727 CGF.Builder.CreateIsNotNull( 2728 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); 2729 }; 2730 2731 bool HasCancel = false; 2732 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) 2733 HasCancel = OSD->hasCancel(); 2734 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) 2735 HasCancel = OPSD->hasCancel(); 2736 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); 2737 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, 2738 HasCancel); 2739 // Emit barrier for lastprivates only if 'sections' directive has 'nowait' 2740 // clause. Otherwise the barrier will be generated by the codegen for the 2741 // directive. 2742 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { 2743 // Emit implicit barrier to synchronize threads and avoid data races on 2744 // initialization of firstprivate variables. 2745 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 2746 OMPD_unknown); 2747 } 2748 } 2749 2750 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 2751 { 2752 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2753 EmitSections(S); 2754 } 2755 // Emit an implicit barrier at the end. 2756 if (!S.getSingleClause<OMPNowaitClause>()) { 2757 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), 2758 OMPD_sections); 2759 } 2760 } 2761 2762 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 2763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 2764 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2765 }; 2766 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2767 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 2768 S.hasCancel()); 2769 } 2770 2771 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 2772 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 2773 llvm::SmallVector<const Expr *, 8> DestExprs; 2774 llvm::SmallVector<const Expr *, 8> SrcExprs; 2775 llvm::SmallVector<const Expr *, 8> AssignmentOps; 2776 // Check if there are any 'copyprivate' clauses associated with this 2777 // 'single' construct. 2778 // Build a list of copyprivate variables along with helper expressions 2779 // (<source>, <destination>, <destination>=<source> expressions) 2780 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { 2781 CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); 2782 DestExprs.append(C->destination_exprs().begin(), 2783 C->destination_exprs().end()); 2784 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); 2785 AssignmentOps.append(C->assignment_ops().begin(), 2786 C->assignment_ops().end()); 2787 } 2788 // Emit code for 'single' region along with 'copyprivate' clauses 2789 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2790 Action.Enter(CGF); 2791 OMPPrivateScope SingleScope(CGF); 2792 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); 2793 CGF.EmitOMPPrivateClause(S, SingleScope); 2794 (void)SingleScope.Privatize(); 2795 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2796 }; 2797 { 2798 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2799 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), 2800 CopyprivateVars, DestExprs, 2801 SrcExprs, AssignmentOps); 2802 } 2803 // Emit an implicit barrier at the end (to avoid data race on firstprivate 2804 // init or if no 'nowait' clause was specified and no 'copyprivate' clause). 2805 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { 2806 CGM.getOpenMPRuntime().emitBarrierCall( 2807 *this, S.getBeginLoc(), 2808 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); 2809 } 2810 } 2811 2812 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 2813 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2814 Action.Enter(CGF); 2815 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2816 }; 2817 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2818 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 2819 } 2820 2821 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 2822 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2823 Action.Enter(CGF); 2824 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 2825 }; 2826 const Expr *Hint = nullptr; 2827 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 2828 Hint = HintClause->getHint(); 2829 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2830 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 2831 S.getDirectiveName().getAsString(), 2832 CodeGen, S.getBeginLoc(), Hint); 2833 } 2834 2835 void CodeGenFunction::EmitOMPParallelForDirective( 2836 const OMPParallelForDirective &S) { 2837 // Emit directive as a combined directive that consists of two implicit 2838 // directives: 'parallel' with 'for' directive. 2839 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2840 Action.Enter(CGF); 2841 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 2842 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2843 emitDispatchForLoopBounds); 2844 }; 2845 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 2846 emitEmptyBoundParameters); 2847 } 2848 2849 void CodeGenFunction::EmitOMPParallelForSimdDirective( 2850 const OMPParallelForSimdDirective &S) { 2851 // Emit directive as a combined directive that consists of two implicit 2852 // directives: 'parallel' with 'for' directive. 2853 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2854 Action.Enter(CGF); 2855 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 2856 emitDispatchForLoopBounds); 2857 }; 2858 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 2859 emitEmptyBoundParameters); 2860 } 2861 2862 void CodeGenFunction::EmitOMPParallelSectionsDirective( 2863 const OMPParallelSectionsDirective &S) { 2864 // Emit directive as a combined directive that consists of two implicit 2865 // directives: 'parallel' with 'sections' directive. 2866 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2867 Action.Enter(CGF); 2868 CGF.EmitSections(S); 2869 }; 2870 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, 2871 emitEmptyBoundParameters); 2872 } 2873 2874 void CodeGenFunction::EmitOMPTaskBasedDirective( 2875 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 2876 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 2877 OMPTaskDataTy &Data) { 2878 // Emit outlined function for task construct. 2879 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); 2880 auto I = CS->getCapturedDecl()->param_begin(); 2881 auto PartId = std::next(I); 2882 auto TaskT = std::next(I, 4); 2883 // Check if the task is final 2884 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { 2885 // If the condition constant folds and can be elided, try to avoid emitting 2886 // the condition and the dead arm of the if/else. 2887 const Expr *Cond = Clause->getCondition(); 2888 bool CondConstant; 2889 if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) 2890 Data.Final.setInt(CondConstant); 2891 else 2892 Data.Final.setPointer(EvaluateExprAsBool(Cond)); 2893 } else { 2894 // By default the task is not final. 2895 Data.Final.setInt(/*IntVal=*/false); 2896 } 2897 // Check if the task has 'priority' clause. 2898 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { 2899 const Expr *Prio = Clause->getPriority(); 2900 Data.Priority.setInt(/*IntVal=*/true); 2901 Data.Priority.setPointer(EmitScalarConversion( 2902 EmitScalarExpr(Prio), Prio->getType(), 2903 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), 2904 Prio->getExprLoc())); 2905 } 2906 // The first function argument for tasks is a thread id, the second one is a 2907 // part id (0 for tied tasks, >=0 for untied task). 2908 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 2909 // Get list of private variables. 2910 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 2911 auto IRef = C->varlist_begin(); 2912 for (const Expr *IInit : C->private_copies()) { 2913 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2914 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2915 Data.PrivateVars.push_back(*IRef); 2916 Data.PrivateCopies.push_back(IInit); 2917 } 2918 ++IRef; 2919 } 2920 } 2921 EmittedAsPrivate.clear(); 2922 // Get list of firstprivate variables. 2923 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 2924 auto IRef = C->varlist_begin(); 2925 auto IElemInitRef = C->inits().begin(); 2926 for (const Expr *IInit : C->private_copies()) { 2927 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2928 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2929 Data.FirstprivateVars.push_back(*IRef); 2930 Data.FirstprivateCopies.push_back(IInit); 2931 Data.FirstprivateInits.push_back(*IElemInitRef); 2932 } 2933 ++IRef; 2934 ++IElemInitRef; 2935 } 2936 } 2937 // Get list of lastprivate variables (for taskloops). 2938 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 2939 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 2940 auto IRef = C->varlist_begin(); 2941 auto ID = C->destination_exprs().begin(); 2942 for (const Expr *IInit : C->private_copies()) { 2943 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 2944 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 2945 Data.LastprivateVars.push_back(*IRef); 2946 Data.LastprivateCopies.push_back(IInit); 2947 } 2948 LastprivateDstsOrigs.insert( 2949 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 2950 cast<DeclRefExpr>(*IRef)}); 2951 ++IRef; 2952 ++ID; 2953 } 2954 } 2955 SmallVector<const Expr *, 4> LHSs; 2956 SmallVector<const Expr *, 4> RHSs; 2957 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 2958 auto IPriv = C->privates().begin(); 2959 auto IRed = C->reduction_ops().begin(); 2960 auto ILHS = C->lhs_exprs().begin(); 2961 auto IRHS = C->rhs_exprs().begin(); 2962 for (const Expr *Ref : C->varlists()) { 2963 Data.ReductionVars.emplace_back(Ref); 2964 Data.ReductionCopies.emplace_back(*IPriv); 2965 Data.ReductionOps.emplace_back(*IRed); 2966 LHSs.emplace_back(*ILHS); 2967 RHSs.emplace_back(*IRHS); 2968 std::advance(IPriv, 1); 2969 std::advance(IRed, 1); 2970 std::advance(ILHS, 1); 2971 std::advance(IRHS, 1); 2972 } 2973 } 2974 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( 2975 *this, S.getBeginLoc(), LHSs, RHSs, Data); 2976 // Build list of dependences. 2977 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 2978 for (const Expr *IRef : C->varlists()) 2979 Data.Dependences.emplace_back(C->getDependencyKind(), IRef); 2980 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 2981 CapturedRegion](CodeGenFunction &CGF, 2982 PrePostActionTy &Action) { 2983 // Set proper addresses for generated private copies. 2984 OMPPrivateScope Scope(CGF); 2985 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 2986 !Data.LastprivateVars.empty()) { 2987 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 2988 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 2989 enum { PrivatesParam = 2, CopyFnParam = 3 }; 2990 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 2991 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 2992 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 2993 CS->getCapturedDecl()->getParam(PrivatesParam))); 2994 // Map privates. 2995 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 2996 llvm::SmallVector<llvm::Value *, 16> CallArgs; 2997 CallArgs.push_back(PrivatesPtr); 2998 for (const Expr *E : Data.PrivateVars) { 2999 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3000 Address PrivatePtr = CGF.CreateMemTemp( 3001 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 3002 PrivatePtrs.emplace_back(VD, PrivatePtr); 3003 CallArgs.push_back(PrivatePtr.getPointer()); 3004 } 3005 for (const Expr *E : Data.FirstprivateVars) { 3006 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3007 Address PrivatePtr = 3008 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3009 ".firstpriv.ptr.addr"); 3010 PrivatePtrs.emplace_back(VD, PrivatePtr); 3011 CallArgs.push_back(PrivatePtr.getPointer()); 3012 } 3013 for (const Expr *E : Data.LastprivateVars) { 3014 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3015 Address PrivatePtr = 3016 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3017 ".lastpriv.ptr.addr"); 3018 PrivatePtrs.emplace_back(VD, PrivatePtr); 3019 CallArgs.push_back(PrivatePtr.getPointer()); 3020 } 3021 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 3022 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 3023 for (const auto &Pair : LastprivateDstsOrigs) { 3024 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 3025 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 3026 /*RefersToEnclosingVariableOrCapture=*/ 3027 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, 3028 Pair.second->getType(), VK_LValue, 3029 Pair.second->getExprLoc()); 3030 Scope.addPrivate(Pair.first, [&CGF, &DRE]() { 3031 return CGF.EmitLValue(&DRE).getAddress(); 3032 }); 3033 } 3034 for (const auto &Pair : PrivatePtrs) { 3035 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3036 CGF.getContext().getDeclAlign(Pair.first)); 3037 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3038 } 3039 } 3040 if (Data.Reductions) { 3041 OMPLexicalScope LexScope(CGF, S, CapturedRegion); 3042 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, 3043 Data.ReductionOps); 3044 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( 3045 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); 3046 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { 3047 RedCG.emitSharedLValue(CGF, Cnt); 3048 RedCG.emitAggregateType(CGF, Cnt); 3049 // FIXME: This must removed once the runtime library is fixed. 3050 // Emit required threadprivate variables for 3051 // initializer/combiner/finalizer. 3052 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3053 RedCG, Cnt); 3054 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3055 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3056 Replacement = 3057 Address(CGF.EmitScalarConversion( 3058 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3059 CGF.getContext().getPointerType( 3060 Data.ReductionCopies[Cnt]->getType()), 3061 Data.ReductionCopies[Cnt]->getExprLoc()), 3062 Replacement.getAlignment()); 3063 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3064 Scope.addPrivate(RedCG.getBaseDecl(Cnt), 3065 [Replacement]() { return Replacement; }); 3066 } 3067 } 3068 // Privatize all private variables except for in_reduction items. 3069 (void)Scope.Privatize(); 3070 SmallVector<const Expr *, 4> InRedVars; 3071 SmallVector<const Expr *, 4> InRedPrivs; 3072 SmallVector<const Expr *, 4> InRedOps; 3073 SmallVector<const Expr *, 4> TaskgroupDescriptors; 3074 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { 3075 auto IPriv = C->privates().begin(); 3076 auto IRed = C->reduction_ops().begin(); 3077 auto ITD = C->taskgroup_descriptors().begin(); 3078 for (const Expr *Ref : C->varlists()) { 3079 InRedVars.emplace_back(Ref); 3080 InRedPrivs.emplace_back(*IPriv); 3081 InRedOps.emplace_back(*IRed); 3082 TaskgroupDescriptors.emplace_back(*ITD); 3083 std::advance(IPriv, 1); 3084 std::advance(IRed, 1); 3085 std::advance(ITD, 1); 3086 } 3087 } 3088 // Privatize in_reduction items here, because taskgroup descriptors must be 3089 // privatized earlier. 3090 OMPPrivateScope InRedScope(CGF); 3091 if (!InRedVars.empty()) { 3092 ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); 3093 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { 3094 RedCG.emitSharedLValue(CGF, Cnt); 3095 RedCG.emitAggregateType(CGF, Cnt); 3096 // The taskgroup descriptor variable is always implicit firstprivate and 3097 // privatized already during processing of the firstprivates. 3098 // FIXME: This must removed once the runtime library is fixed. 3099 // Emit required threadprivate variables for 3100 // initializer/combiner/finalizer. 3101 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), 3102 RedCG, Cnt); 3103 llvm::Value *ReductionsPtr = 3104 CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), 3105 TaskgroupDescriptors[Cnt]->getExprLoc()); 3106 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( 3107 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); 3108 Replacement = Address( 3109 CGF.EmitScalarConversion( 3110 Replacement.getPointer(), CGF.getContext().VoidPtrTy, 3111 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), 3112 InRedPrivs[Cnt]->getExprLoc()), 3113 Replacement.getAlignment()); 3114 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); 3115 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), 3116 [Replacement]() { return Replacement; }); 3117 } 3118 } 3119 (void)InRedScope.Privatize(); 3120 3121 Action.Enter(CGF); 3122 BodyGen(CGF); 3123 }; 3124 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3125 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 3126 Data.NumberOfParts); 3127 OMPLexicalScope Scope(*this, S, llvm::None, 3128 !isOpenMPParallelDirective(S.getDirectiveKind())); 3129 TaskGen(*this, OutlinedFn, Data); 3130 } 3131 3132 static ImplicitParamDecl * 3133 createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, 3134 QualType Ty, CapturedDecl *CD, 3135 SourceLocation Loc) { 3136 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3137 ImplicitParamDecl::Other); 3138 auto *OrigRef = DeclRefExpr::Create( 3139 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, 3140 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3141 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, 3142 ImplicitParamDecl::Other); 3143 auto *PrivateRef = DeclRefExpr::Create( 3144 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, 3145 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); 3146 QualType ElemType = C.getBaseElementType(Ty); 3147 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, 3148 ImplicitParamDecl::Other); 3149 auto *InitRef = DeclRefExpr::Create( 3150 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 3151 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 3152 PrivateVD->setInitStyle(VarDecl::CInit); 3153 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 3154 InitRef, /*BasePath=*/nullptr, 3155 VK_RValue)); 3156 Data.FirstprivateVars.emplace_back(OrigRef); 3157 Data.FirstprivateCopies.emplace_back(PrivateRef); 3158 Data.FirstprivateInits.emplace_back(InitRef); 3159 return OrigVD; 3160 } 3161 3162 void CodeGenFunction::EmitOMPTargetTaskBasedDirective( 3163 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, 3164 OMPTargetDataInfo &InputInfo) { 3165 // Emit outlined function for task construct. 3166 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3167 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3168 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3169 auto I = CS->getCapturedDecl()->param_begin(); 3170 auto PartId = std::next(I); 3171 auto TaskT = std::next(I, 4); 3172 OMPTaskDataTy Data; 3173 // The task is not final. 3174 Data.Final.setInt(/*IntVal=*/false); 3175 // Get list of firstprivate variables. 3176 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 3177 auto IRef = C->varlist_begin(); 3178 auto IElemInitRef = C->inits().begin(); 3179 for (auto *IInit : C->private_copies()) { 3180 Data.FirstprivateVars.push_back(*IRef); 3181 Data.FirstprivateCopies.push_back(IInit); 3182 Data.FirstprivateInits.push_back(*IElemInitRef); 3183 ++IRef; 3184 ++IElemInitRef; 3185 } 3186 } 3187 OMPPrivateScope TargetScope(*this); 3188 VarDecl *BPVD = nullptr; 3189 VarDecl *PVD = nullptr; 3190 VarDecl *SVD = nullptr; 3191 if (InputInfo.NumberOfTargetItems > 0) { 3192 auto *CD = CapturedDecl::Create( 3193 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 3194 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 3195 QualType BaseAndPointersType = getContext().getConstantArrayType( 3196 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 3197 /*IndexTypeQuals=*/0); 3198 BPVD = createImplicitFirstprivateForType( 3199 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3200 PVD = createImplicitFirstprivateForType( 3201 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 3202 QualType SizesType = getContext().getConstantArrayType( 3203 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 3204 ArrSize, nullptr, ArrayType::Normal, 3205 /*IndexTypeQuals=*/0); 3206 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 3207 S.getBeginLoc()); 3208 TargetScope.addPrivate( 3209 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 3210 TargetScope.addPrivate(PVD, 3211 [&InputInfo]() { return InputInfo.PointersArray; }); 3212 TargetScope.addPrivate(SVD, 3213 [&InputInfo]() { return InputInfo.SizesArray; }); 3214 } 3215 (void)TargetScope.Privatize(); 3216 // Build list of dependences. 3217 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) 3218 for (const Expr *IRef : C->varlists()) 3219 Data.Dependences.emplace_back(C->getDependencyKind(), IRef); 3220 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 3221 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 3222 // Set proper addresses for generated private copies. 3223 OMPPrivateScope Scope(CGF); 3224 if (!Data.FirstprivateVars.empty()) { 3225 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( 3226 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); 3227 enum { PrivatesParam = 2, CopyFnParam = 3 }; 3228 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 3229 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 3230 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 3231 CS->getCapturedDecl()->getParam(PrivatesParam))); 3232 // Map privates. 3233 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 3234 llvm::SmallVector<llvm::Value *, 16> CallArgs; 3235 CallArgs.push_back(PrivatesPtr); 3236 for (const Expr *E : Data.FirstprivateVars) { 3237 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3238 Address PrivatePtr = 3239 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 3240 ".firstpriv.ptr.addr"); 3241 PrivatePtrs.emplace_back(VD, PrivatePtr); 3242 CallArgs.push_back(PrivatePtr.getPointer()); 3243 } 3244 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 3245 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 3246 for (const auto &Pair : PrivatePtrs) { 3247 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 3248 CGF.getContext().getDeclAlign(Pair.first)); 3249 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 3250 } 3251 } 3252 // Privatize all private variables except for in_reduction items. 3253 (void)Scope.Privatize(); 3254 if (InputInfo.NumberOfTargetItems > 0) { 3255 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( 3256 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 3257 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 3258 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 3259 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 3260 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 3261 } 3262 3263 Action.Enter(CGF); 3264 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 3265 BodyGen(CGF); 3266 }; 3267 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 3268 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, 3269 Data.NumberOfParts); 3270 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); 3271 IntegerLiteral IfCond(getContext(), TrueOrFalse, 3272 getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3273 SourceLocation()); 3274 3275 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, 3276 SharedsTy, CapturedStruct, &IfCond, Data); 3277 } 3278 3279 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { 3280 // Emit outlined function for task construct. 3281 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); 3282 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 3283 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 3284 const Expr *IfCond = nullptr; 3285 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 3286 if (C->getNameModifier() == OMPD_unknown || 3287 C->getNameModifier() == OMPD_task) { 3288 IfCond = C->getCondition(); 3289 break; 3290 } 3291 } 3292 3293 OMPTaskDataTy Data; 3294 // Check if we should emit tied or untied task. 3295 Data.Tied = !S.getSingleClause<OMPUntiedClause>(); 3296 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { 3297 CGF.EmitStmt(CS->getCapturedStmt()); 3298 }; 3299 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 3300 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 3301 const OMPTaskDataTy &Data) { 3302 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, 3303 SharedsTy, CapturedStruct, IfCond, 3304 Data); 3305 }; 3306 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); 3307 } 3308 3309 void CodeGenFunction::EmitOMPTaskyieldDirective( 3310 const OMPTaskyieldDirective &S) { 3311 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); 3312 } 3313 3314 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { 3315 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); 3316 } 3317 3318 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { 3319 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); 3320 } 3321 3322 void CodeGenFunction::EmitOMPTaskgroupDirective( 3323 const OMPTaskgroupDirective &S) { 3324 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3325 Action.Enter(CGF); 3326 if (const Expr *E = S.getReductionRef()) { 3327 SmallVector<const Expr *, 4> LHSs; 3328 SmallVector<const Expr *, 4> RHSs; 3329 OMPTaskDataTy Data; 3330 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { 3331 auto IPriv = C->privates().begin(); 3332 auto IRed = C->reduction_ops().begin(); 3333 auto ILHS = C->lhs_exprs().begin(); 3334 auto IRHS = C->rhs_exprs().begin(); 3335 for (const Expr *Ref : C->varlists()) { 3336 Data.ReductionVars.emplace_back(Ref); 3337 Data.ReductionCopies.emplace_back(*IPriv); 3338 Data.ReductionOps.emplace_back(*IRed); 3339 LHSs.emplace_back(*ILHS); 3340 RHSs.emplace_back(*IRHS); 3341 std::advance(IPriv, 1); 3342 std::advance(IRed, 1); 3343 std::advance(ILHS, 1); 3344 std::advance(IRHS, 1); 3345 } 3346 } 3347 llvm::Value *ReductionDesc = 3348 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), 3349 LHSs, RHSs, Data); 3350 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3351 CGF.EmitVarDecl(*VD); 3352 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), 3353 /*Volatile=*/false, E->getType()); 3354 } 3355 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3356 }; 3357 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3358 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); 3359 } 3360 3361 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { 3362 CGM.getOpenMPRuntime().emitFlush( 3363 *this, 3364 [&S]() -> ArrayRef<const Expr *> { 3365 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) 3366 return llvm::makeArrayRef(FlushClause->varlist_begin(), 3367 FlushClause->varlist_end()); 3368 return llvm::None; 3369 }(), 3370 S.getBeginLoc()); 3371 } 3372 3373 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 3374 const CodeGenLoopTy &CodeGenLoop, 3375 Expr *IncExpr) { 3376 // Emit the loop iteration variable. 3377 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); 3378 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); 3379 EmitVarDecl(*IVDecl); 3380 3381 // Emit the iterations count variable. 3382 // If it is not a variable, Sema decided to calculate iterations count on each 3383 // iteration (e.g., it is foldable into a constant). 3384 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 3385 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 3386 // Emit calculation of the iterations count. 3387 EmitIgnoredExpr(S.getCalcLastIteration()); 3388 } 3389 3390 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); 3391 3392 bool HasLastprivateClause = false; 3393 // Check pre-condition. 3394 { 3395 OMPLoopScope PreInitScope(*this, S); 3396 // Skip the entire loop if we don't meet the precondition. 3397 // If the condition constant folds and can be elided, avoid emitting the 3398 // whole loop. 3399 bool CondConstant; 3400 llvm::BasicBlock *ContBlock = nullptr; 3401 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 3402 if (!CondConstant) 3403 return; 3404 } else { 3405 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then"); 3406 ContBlock = createBasicBlock("omp.precond.end"); 3407 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, 3408 getProfileCount(&S)); 3409 EmitBlock(ThenBlock); 3410 incrementProfileCounter(&S); 3411 } 3412 3413 emitAlignedClause(*this, S); 3414 // Emit 'then' code. 3415 { 3416 // Emit helper vars inits. 3417 3418 LValue LB = EmitOMPHelperVar( 3419 *this, cast<DeclRefExpr>( 3420 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3421 ? S.getCombinedLowerBoundVariable() 3422 : S.getLowerBoundVariable()))); 3423 LValue UB = EmitOMPHelperVar( 3424 *this, cast<DeclRefExpr>( 3425 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3426 ? S.getCombinedUpperBoundVariable() 3427 : S.getUpperBoundVariable()))); 3428 LValue ST = 3429 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); 3430 LValue IL = 3431 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); 3432 3433 OMPPrivateScope LoopScope(*this); 3434 if (EmitOMPFirstprivateClause(S, LoopScope)) { 3435 // Emit implicit barrier to synchronize threads and avoid data races 3436 // on initialization of firstprivate variables and post-update of 3437 // lastprivate variables. 3438 CGM.getOpenMPRuntime().emitBarrierCall( 3439 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, 3440 /*ForceSimpleCall=*/true); 3441 } 3442 EmitOMPPrivateClause(S, LoopScope); 3443 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3444 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3445 !isOpenMPTeamsDirective(S.getDirectiveKind())) 3446 EmitOMPReductionClauseInit(S, LoopScope); 3447 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); 3448 EmitOMPPrivateLoopCounters(S, LoopScope); 3449 (void)LoopScope.Privatize(); 3450 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 3451 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); 3452 3453 // Detect the distribute schedule kind and chunk. 3454 llvm::Value *Chunk = nullptr; 3455 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; 3456 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { 3457 ScheduleKind = C->getDistScheduleKind(); 3458 if (const Expr *Ch = C->getChunkSize()) { 3459 Chunk = EmitScalarExpr(Ch); 3460 Chunk = EmitScalarConversion(Chunk, Ch->getType(), 3461 S.getIterationVariable()->getType(), 3462 S.getBeginLoc()); 3463 } 3464 } else { 3465 // Default behaviour for dist_schedule clause. 3466 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( 3467 *this, S, ScheduleKind, Chunk); 3468 } 3469 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); 3470 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); 3471 3472 // OpenMP [2.10.8, distribute Construct, Description] 3473 // If dist_schedule is specified, kind must be static. If specified, 3474 // iterations are divided into chunks of size chunk_size, chunks are 3475 // assigned to the teams of the league in a round-robin fashion in the 3476 // order of the team number. When no chunk_size is specified, the 3477 // iteration space is divided into chunks that are approximately equal 3478 // in size, and at most one chunk is distributed to each team of the 3479 // league. The size of the chunks is unspecified in this case. 3480 bool StaticChunked = RT.isStaticChunked( 3481 ScheduleKind, /* Chunked */ Chunk != nullptr) && 3482 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3483 if (RT.isStaticNonchunked(ScheduleKind, 3484 /* Chunked */ Chunk != nullptr) || 3485 StaticChunked) { 3486 if (isOpenMPSimdDirective(S.getDirectiveKind())) 3487 EmitOMPSimdInit(S, /*IsMonotonic=*/true); 3488 CGOpenMPRuntime::StaticRTInput StaticInit( 3489 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), 3490 LB.getAddress(), UB.getAddress(), ST.getAddress(), 3491 StaticChunked ? Chunk : nullptr); 3492 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, 3493 StaticInit); 3494 JumpDest LoopExit = 3495 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3496 // UB = min(UB, GlobalUB); 3497 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3498 ? S.getCombinedEnsureUpperBound() 3499 : S.getEnsureUpperBound()); 3500 // IV = LB; 3501 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3502 ? S.getCombinedInit() 3503 : S.getInit()); 3504 3505 const Expr *Cond = 3506 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) 3507 ? S.getCombinedCond() 3508 : S.getCond(); 3509 3510 if (StaticChunked) 3511 Cond = S.getCombinedDistCond(); 3512 3513 // For static unchunked schedules generate: 3514 // 3515 // 1. For distribute alone, codegen 3516 // while (idx <= UB) { 3517 // BODY; 3518 // ++idx; 3519 // } 3520 // 3521 // 2. When combined with 'for' (e.g. as in 'distribute parallel for') 3522 // while (idx <= UB) { 3523 // <CodeGen rest of pragma>(LB, UB); 3524 // idx += ST; 3525 // } 3526 // 3527 // For static chunk one schedule generate: 3528 // 3529 // while (IV <= GlobalUB) { 3530 // <CodeGen rest of pragma>(LB, UB); 3531 // LB += ST; 3532 // UB += ST; 3533 // UB = min(UB, GlobalUB); 3534 // IV = LB; 3535 // } 3536 // 3537 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, 3538 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { 3539 CodeGenLoop(CGF, S, LoopExit); 3540 }, 3541 [&S, StaticChunked](CodeGenFunction &CGF) { 3542 if (StaticChunked) { 3543 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); 3544 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); 3545 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); 3546 CGF.EmitIgnoredExpr(S.getCombinedInit()); 3547 } 3548 }); 3549 EmitBlock(LoopExit.getBlock()); 3550 // Tell the runtime we are done. 3551 RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); 3552 } else { 3553 // Emit the outer loop, which requests its work chunk [LB..UB] from 3554 // runtime and runs the inner loop to process it. 3555 const OMPLoopArguments LoopArguments = { 3556 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), 3557 Chunk}; 3558 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, 3559 CodeGenLoop); 3560 } 3561 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3562 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { 3563 return CGF.Builder.CreateIsNotNull( 3564 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3565 }); 3566 } 3567 if (isOpenMPSimdDirective(S.getDirectiveKind()) && 3568 !isOpenMPParallelDirective(S.getDirectiveKind()) && 3569 !isOpenMPTeamsDirective(S.getDirectiveKind())) { 3570 EmitOMPReductionClauseFinal(S, OMPD_simd); 3571 // Emit post-update of the reduction variables if IsLastIter != 0. 3572 emitPostUpdateForReductionClause( 3573 *this, S, [IL, &S](CodeGenFunction &CGF) { 3574 return CGF.Builder.CreateIsNotNull( 3575 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); 3576 }); 3577 } 3578 // Emit final copy of the lastprivate variables if IsLastIter != 0. 3579 if (HasLastprivateClause) { 3580 EmitOMPLastprivateClauseFinal( 3581 S, /*NoFinals=*/false, 3582 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); 3583 } 3584 } 3585 3586 // We're now done with the loop, so jump to the continuation block. 3587 if (ContBlock) { 3588 EmitBranch(ContBlock); 3589 EmitBlock(ContBlock, true); 3590 } 3591 } 3592 } 3593 3594 void CodeGenFunction::EmitOMPDistributeDirective( 3595 const OMPDistributeDirective &S) { 3596 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3597 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 3598 }; 3599 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3600 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); 3601 } 3602 3603 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, 3604 const CapturedStmt *S) { 3605 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); 3606 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; 3607 CGF.CapturedStmtInfo = &CapStmtInfo; 3608 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); 3609 Fn->setDoesNotRecurse(); 3610 return Fn; 3611 } 3612 3613 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 3614 if (S.hasClausesOfKind<OMPDependClause>()) { 3615 assert(!S.getAssociatedStmt() && 3616 "No associated statement must be in ordered depend construct."); 3617 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 3618 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 3619 return; 3620 } 3621 const auto *C = S.getSingleClause<OMPSIMDClause>(); 3622 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, 3623 PrePostActionTy &Action) { 3624 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3625 if (C) { 3626 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 3627 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 3628 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); 3629 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), 3630 OutlinedFn, CapturedVars); 3631 } else { 3632 Action.Enter(CGF); 3633 CGF.EmitStmt(CS->getCapturedStmt()); 3634 } 3635 }; 3636 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3637 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); 3638 } 3639 3640 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, 3641 QualType SrcType, QualType DestType, 3642 SourceLocation Loc) { 3643 assert(CGF.hasScalarEvaluationKind(DestType) && 3644 "DestType must have scalar evaluation kind."); 3645 assert(!Val.isAggregate() && "Must be a scalar or complex."); 3646 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, 3647 DestType, Loc) 3648 : CGF.EmitComplexToScalarConversion( 3649 Val.getComplexVal(), SrcType, DestType, Loc); 3650 } 3651 3652 static CodeGenFunction::ComplexPairTy 3653 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, 3654 QualType DestType, SourceLocation Loc) { 3655 assert(CGF.getEvaluationKind(DestType) == TEK_Complex && 3656 "DestType must have complex evaluation kind."); 3657 CodeGenFunction::ComplexPairTy ComplexVal; 3658 if (Val.isScalar()) { 3659 // Convert the input element to the element type of the complex. 3660 QualType DestElementType = 3661 DestType->castAs<ComplexType>()->getElementType(); 3662 llvm::Value *ScalarVal = CGF.EmitScalarConversion( 3663 Val.getScalarVal(), SrcType, DestElementType, Loc); 3664 ComplexVal = CodeGenFunction::ComplexPairTy( 3665 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); 3666 } else { 3667 assert(Val.isComplex() && "Must be a scalar or complex."); 3668 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); 3669 QualType DestElementType = 3670 DestType->castAs<ComplexType>()->getElementType(); 3671 ComplexVal.first = CGF.EmitScalarConversion( 3672 Val.getComplexVal().first, SrcElementType, DestElementType, Loc); 3673 ComplexVal.second = CGF.EmitScalarConversion( 3674 Val.getComplexVal().second, SrcElementType, DestElementType, Loc); 3675 } 3676 return ComplexVal; 3677 } 3678 3679 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, 3680 LValue LVal, RValue RVal) { 3681 if (LVal.isGlobalReg()) { 3682 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); 3683 } else { 3684 CGF.EmitAtomicStore(RVal, LVal, 3685 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3686 : llvm::AtomicOrdering::Monotonic, 3687 LVal.isVolatile(), /*isInit=*/false); 3688 } 3689 } 3690 3691 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, 3692 QualType RValTy, SourceLocation Loc) { 3693 switch (getEvaluationKind(LVal.getType())) { 3694 case TEK_Scalar: 3695 EmitStoreThroughLValue(RValue::get(convertToScalarValue( 3696 *this, RVal, RValTy, LVal.getType(), Loc)), 3697 LVal); 3698 break; 3699 case TEK_Complex: 3700 EmitStoreOfComplex( 3701 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, 3702 /*isInit=*/false); 3703 break; 3704 case TEK_Aggregate: 3705 llvm_unreachable("Must be a scalar or complex."); 3706 } 3707 } 3708 3709 static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, 3710 const Expr *X, const Expr *V, 3711 SourceLocation Loc) { 3712 // v = x; 3713 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); 3714 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); 3715 LValue XLValue = CGF.EmitLValue(X); 3716 LValue VLValue = CGF.EmitLValue(V); 3717 RValue Res = XLValue.isGlobalReg() 3718 ? CGF.EmitLoadOfLValue(XLValue, Loc) 3719 : CGF.EmitAtomicLoad( 3720 XLValue, Loc, 3721 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent 3722 : llvm::AtomicOrdering::Monotonic, 3723 XLValue.isVolatile()); 3724 // OpenMP, 2.12.6, atomic Construct 3725 // Any atomic construct with a seq_cst clause forces the atomically 3726 // performed operation to include an implicit flush operation without a 3727 // list. 3728 if (IsSeqCst) 3729 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3730 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); 3731 } 3732 3733 static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, 3734 const Expr *X, const Expr *E, 3735 SourceLocation Loc) { 3736 // x = expr; 3737 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); 3738 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); 3739 // OpenMP, 2.12.6, atomic Construct 3740 // Any atomic construct with a seq_cst clause forces the atomically 3741 // performed operation to include an implicit flush operation without a 3742 // list. 3743 if (IsSeqCst) 3744 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3745 } 3746 3747 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, 3748 RValue Update, 3749 BinaryOperatorKind BO, 3750 llvm::AtomicOrdering AO, 3751 bool IsXLHSInRHSPart) { 3752 ASTContext &Context = CGF.getContext(); 3753 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' 3754 // expression is simple and atomic is allowed for the given type for the 3755 // target platform. 3756 if (BO == BO_Comma || !Update.isScalar() || 3757 !Update.getScalarVal()->getType()->isIntegerTy() || 3758 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) && 3759 (Update.getScalarVal()->getType() != 3760 X.getAddress().getElementType())) || 3761 !X.getAddress().getElementType()->isIntegerTy() || 3762 !Context.getTargetInfo().hasBuiltinAtomic( 3763 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) 3764 return std::make_pair(false, RValue::get(nullptr)); 3765 3766 llvm::AtomicRMWInst::BinOp RMWOp; 3767 switch (BO) { 3768 case BO_Add: 3769 RMWOp = llvm::AtomicRMWInst::Add; 3770 break; 3771 case BO_Sub: 3772 if (!IsXLHSInRHSPart) 3773 return std::make_pair(false, RValue::get(nullptr)); 3774 RMWOp = llvm::AtomicRMWInst::Sub; 3775 break; 3776 case BO_And: 3777 RMWOp = llvm::AtomicRMWInst::And; 3778 break; 3779 case BO_Or: 3780 RMWOp = llvm::AtomicRMWInst::Or; 3781 break; 3782 case BO_Xor: 3783 RMWOp = llvm::AtomicRMWInst::Xor; 3784 break; 3785 case BO_LT: 3786 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3787 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min 3788 : llvm::AtomicRMWInst::Max) 3789 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin 3790 : llvm::AtomicRMWInst::UMax); 3791 break; 3792 case BO_GT: 3793 RMWOp = X.getType()->hasSignedIntegerRepresentation() 3794 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max 3795 : llvm::AtomicRMWInst::Min) 3796 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax 3797 : llvm::AtomicRMWInst::UMin); 3798 break; 3799 case BO_Assign: 3800 RMWOp = llvm::AtomicRMWInst::Xchg; 3801 break; 3802 case BO_Mul: 3803 case BO_Div: 3804 case BO_Rem: 3805 case BO_Shl: 3806 case BO_Shr: 3807 case BO_LAnd: 3808 case BO_LOr: 3809 return std::make_pair(false, RValue::get(nullptr)); 3810 case BO_PtrMemD: 3811 case BO_PtrMemI: 3812 case BO_LE: 3813 case BO_GE: 3814 case BO_EQ: 3815 case BO_NE: 3816 case BO_Cmp: 3817 case BO_AddAssign: 3818 case BO_SubAssign: 3819 case BO_AndAssign: 3820 case BO_OrAssign: 3821 case BO_XorAssign: 3822 case BO_MulAssign: 3823 case BO_DivAssign: 3824 case BO_RemAssign: 3825 case BO_ShlAssign: 3826 case BO_ShrAssign: 3827 case BO_Comma: 3828 llvm_unreachable("Unsupported atomic update operation"); 3829 } 3830 llvm::Value *UpdateVal = Update.getScalarVal(); 3831 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { 3832 UpdateVal = CGF.Builder.CreateIntCast( 3833 IC, X.getAddress().getElementType(), 3834 X.getType()->hasSignedIntegerRepresentation()); 3835 } 3836 llvm::Value *Res = 3837 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO); 3838 return std::make_pair(true, RValue::get(Res)); 3839 } 3840 3841 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( 3842 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, 3843 llvm::AtomicOrdering AO, SourceLocation Loc, 3844 const llvm::function_ref<RValue(RValue)> CommonGen) { 3845 // Update expressions are allowed to have the following forms: 3846 // x binop= expr; -> xrval + expr; 3847 // x++, ++x -> xrval + 1; 3848 // x--, --x -> xrval - 1; 3849 // x = x binop expr; -> xrval binop expr 3850 // x = expr Op x; - > expr binop xrval; 3851 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); 3852 if (!Res.first) { 3853 if (X.isGlobalReg()) { 3854 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop 3855 // 'xrval'. 3856 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); 3857 } else { 3858 // Perform compare-and-swap procedure. 3859 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); 3860 } 3861 } 3862 return Res; 3863 } 3864 3865 static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, 3866 const Expr *X, const Expr *E, 3867 const Expr *UE, bool IsXLHSInRHSPart, 3868 SourceLocation Loc) { 3869 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3870 "Update expr in 'atomic update' must be a binary operator."); 3871 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3872 // Update expressions are allowed to have the following forms: 3873 // x binop= expr; -> xrval + expr; 3874 // x++, ++x -> xrval + 1; 3875 // x--, --x -> xrval - 1; 3876 // x = x binop expr; -> xrval binop expr 3877 // x = expr Op x; - > expr binop xrval; 3878 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); 3879 LValue XLValue = CGF.EmitLValue(X); 3880 RValue ExprRValue = CGF.EmitAnyExpr(E); 3881 llvm::AtomicOrdering AO = IsSeqCst 3882 ? llvm::AtomicOrdering::SequentiallyConsistent 3883 : llvm::AtomicOrdering::Monotonic; 3884 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3885 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3886 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3887 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3888 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { 3889 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3890 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3891 return CGF.EmitAnyExpr(UE); 3892 }; 3893 (void)CGF.EmitOMPAtomicSimpleUpdateExpr( 3894 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3895 // OpenMP, 2.12.6, atomic Construct 3896 // Any atomic construct with a seq_cst clause forces the atomically 3897 // performed operation to include an implicit flush operation without a 3898 // list. 3899 if (IsSeqCst) 3900 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 3901 } 3902 3903 static RValue convertToType(CodeGenFunction &CGF, RValue Value, 3904 QualType SourceType, QualType ResType, 3905 SourceLocation Loc) { 3906 switch (CGF.getEvaluationKind(ResType)) { 3907 case TEK_Scalar: 3908 return RValue::get( 3909 convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); 3910 case TEK_Complex: { 3911 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); 3912 return RValue::getComplex(Res.first, Res.second); 3913 } 3914 case TEK_Aggregate: 3915 break; 3916 } 3917 llvm_unreachable("Must be a scalar or complex."); 3918 } 3919 3920 static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, 3921 bool IsPostfixUpdate, const Expr *V, 3922 const Expr *X, const Expr *E, 3923 const Expr *UE, bool IsXLHSInRHSPart, 3924 SourceLocation Loc) { 3925 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue"); 3926 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue"); 3927 RValue NewVVal; 3928 LValue VLValue = CGF.EmitLValue(V); 3929 LValue XLValue = CGF.EmitLValue(X); 3930 RValue ExprRValue = CGF.EmitAnyExpr(E); 3931 llvm::AtomicOrdering AO = IsSeqCst 3932 ? llvm::AtomicOrdering::SequentiallyConsistent 3933 : llvm::AtomicOrdering::Monotonic; 3934 QualType NewVValType; 3935 if (UE) { 3936 // 'x' is updated with some additional value. 3937 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && 3938 "Update expr in 'atomic capture' must be a binary operator."); 3939 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); 3940 // Update expressions are allowed to have the following forms: 3941 // x binop= expr; -> xrval + expr; 3942 // x++, ++x -> xrval + 1; 3943 // x--, --x -> xrval - 1; 3944 // x = x binop expr; -> xrval binop expr 3945 // x = expr Op x; - > expr binop xrval; 3946 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); 3947 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); 3948 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; 3949 NewVValType = XRValExpr->getType(); 3950 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; 3951 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, 3952 IsPostfixUpdate](RValue XRValue) { 3953 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3954 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); 3955 RValue Res = CGF.EmitAnyExpr(UE); 3956 NewVVal = IsPostfixUpdate ? XRValue : Res; 3957 return Res; 3958 }; 3959 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3960 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); 3961 if (Res.first) { 3962 // 'atomicrmw' instruction was generated. 3963 if (IsPostfixUpdate) { 3964 // Use old value from 'atomicrmw'. 3965 NewVVal = Res.second; 3966 } else { 3967 // 'atomicrmw' does not provide new value, so evaluate it using old 3968 // value of 'x'. 3969 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); 3970 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); 3971 NewVVal = CGF.EmitAnyExpr(UE); 3972 } 3973 } 3974 } else { 3975 // 'x' is simply rewritten with some 'expr'. 3976 NewVValType = X->getType().getNonReferenceType(); 3977 ExprRValue = convertToType(CGF, ExprRValue, E->getType(), 3978 X->getType().getNonReferenceType(), Loc); 3979 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { 3980 NewVVal = XRValue; 3981 return ExprRValue; 3982 }; 3983 // Try to perform atomicrmw xchg, otherwise simple exchange. 3984 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( 3985 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, 3986 Loc, Gen); 3987 if (Res.first) { 3988 // 'atomicrmw' instruction was generated. 3989 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; 3990 } 3991 } 3992 // Emit post-update store to 'v' of old/new 'x' value. 3993 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); 3994 // OpenMP, 2.12.6, atomic Construct 3995 // Any atomic construct with a seq_cst clause forces the atomically 3996 // performed operation to include an implicit flush operation without a 3997 // list. 3998 if (IsSeqCst) 3999 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); 4000 } 4001 4002 static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, 4003 bool IsSeqCst, bool IsPostfixUpdate, 4004 const Expr *X, const Expr *V, const Expr *E, 4005 const Expr *UE, bool IsXLHSInRHSPart, 4006 SourceLocation Loc) { 4007 switch (Kind) { 4008 case OMPC_read: 4009 emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); 4010 break; 4011 case OMPC_write: 4012 emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); 4013 break; 4014 case OMPC_unknown: 4015 case OMPC_update: 4016 emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); 4017 break; 4018 case OMPC_capture: 4019 emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, 4020 IsXLHSInRHSPart, Loc); 4021 break; 4022 case OMPC_if: 4023 case OMPC_final: 4024 case OMPC_num_threads: 4025 case OMPC_private: 4026 case OMPC_firstprivate: 4027 case OMPC_lastprivate: 4028 case OMPC_reduction: 4029 case OMPC_task_reduction: 4030 case OMPC_in_reduction: 4031 case OMPC_safelen: 4032 case OMPC_simdlen: 4033 case OMPC_allocator: 4034 case OMPC_allocate: 4035 case OMPC_collapse: 4036 case OMPC_default: 4037 case OMPC_seq_cst: 4038 case OMPC_shared: 4039 case OMPC_linear: 4040 case OMPC_aligned: 4041 case OMPC_copyin: 4042 case OMPC_copyprivate: 4043 case OMPC_flush: 4044 case OMPC_proc_bind: 4045 case OMPC_schedule: 4046 case OMPC_ordered: 4047 case OMPC_nowait: 4048 case OMPC_untied: 4049 case OMPC_threadprivate: 4050 case OMPC_depend: 4051 case OMPC_mergeable: 4052 case OMPC_device: 4053 case OMPC_threads: 4054 case OMPC_simd: 4055 case OMPC_map: 4056 case OMPC_num_teams: 4057 case OMPC_thread_limit: 4058 case OMPC_priority: 4059 case OMPC_grainsize: 4060 case OMPC_nogroup: 4061 case OMPC_num_tasks: 4062 case OMPC_hint: 4063 case OMPC_dist_schedule: 4064 case OMPC_defaultmap: 4065 case OMPC_uniform: 4066 case OMPC_to: 4067 case OMPC_from: 4068 case OMPC_use_device_ptr: 4069 case OMPC_is_device_ptr: 4070 case OMPC_unified_address: 4071 case OMPC_unified_shared_memory: 4072 case OMPC_reverse_offload: 4073 case OMPC_dynamic_allocators: 4074 case OMPC_atomic_default_mem_order: 4075 case OMPC_device_type: 4076 case OMPC_match: 4077 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 4078 } 4079 } 4080 4081 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 4082 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); 4083 OpenMPClauseKind Kind = OMPC_unknown; 4084 for (const OMPClause *C : S.clauses()) { 4085 // Find first clause (skip seq_cst clause, if it is first). 4086 if (C->getClauseKind() != OMPC_seq_cst) { 4087 Kind = C->getClauseKind(); 4088 break; 4089 } 4090 } 4091 4092 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 4093 if (const auto *FE = dyn_cast<FullExpr>(CS)) 4094 enterFullExpression(FE); 4095 // Processing for statements under 'atomic capture'. 4096 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 4097 for (const Stmt *C : Compound->body()) { 4098 if (const auto *FE = dyn_cast<FullExpr>(C)) 4099 enterFullExpression(FE); 4100 } 4101 } 4102 4103 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, 4104 PrePostActionTy &) { 4105 CGF.EmitStopPoint(CS); 4106 emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), 4107 S.getV(), S.getExpr(), S.getUpdateExpr(), 4108 S.isXLHSInRHSPart(), S.getBeginLoc()); 4109 }; 4110 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4111 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); 4112 } 4113 4114 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 4115 const OMPExecutableDirective &S, 4116 const RegionCodeGenTy &CodeGen) { 4117 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); 4118 CodeGenModule &CGM = CGF.CGM; 4119 4120 // On device emit this construct as inlined code. 4121 if (CGM.getLangOpts().OpenMPIsDevice) { 4122 OMPLexicalScope Scope(CGF, S, OMPD_target); 4123 CGM.getOpenMPRuntime().emitInlinedDirective( 4124 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4125 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4126 }); 4127 return; 4128 } 4129 4130 llvm::Function *Fn = nullptr; 4131 llvm::Constant *FnID = nullptr; 4132 4133 const Expr *IfCond = nullptr; 4134 // Check for the at most one if clause associated with the target region. 4135 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4136 if (C->getNameModifier() == OMPD_unknown || 4137 C->getNameModifier() == OMPD_target) { 4138 IfCond = C->getCondition(); 4139 break; 4140 } 4141 } 4142 4143 // Check if we have any device clause associated with the directive. 4144 const Expr *Device = nullptr; 4145 if (auto *C = S.getSingleClause<OMPDeviceClause>()) 4146 Device = C->getDevice(); 4147 4148 // Check if we have an if clause whose conditional always evaluates to false 4149 // or if we do not have any targets specified. If so the target region is not 4150 // an offload entry point. 4151 bool IsOffloadEntry = true; 4152 if (IfCond) { 4153 bool Val; 4154 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) 4155 IsOffloadEntry = false; 4156 } 4157 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4158 IsOffloadEntry = false; 4159 4160 assert(CGF.CurFuncDecl && "No parent declaration for target region!"); 4161 StringRef ParentName; 4162 // In case we have Ctors/Dtors we use the complete type variant to produce 4163 // the mangling of the device outlined kernel. 4164 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) 4165 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); 4166 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) 4167 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); 4168 else 4169 ParentName = 4170 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); 4171 4172 // Emit target region as a standalone region. 4173 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, 4174 IsOffloadEntry, CodeGen); 4175 OMPLexicalScope Scope(CGF, S, OMPD_task); 4176 auto &&SizeEmitter = 4177 [IsOffloadEntry](CodeGenFunction &CGF, 4178 const OMPLoopDirective &D) -> llvm::Value * { 4179 if (IsOffloadEntry) { 4180 OMPLoopScope(CGF, D); 4181 // Emit calculation of the iterations count. 4182 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); 4183 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, 4184 /*isSigned=*/false); 4185 return NumIterations; 4186 } 4187 return nullptr; 4188 }; 4189 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, 4190 SizeEmitter); 4191 } 4192 4193 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, 4194 PrePostActionTy &Action) { 4195 Action.Enter(CGF); 4196 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4197 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4198 CGF.EmitOMPPrivateClause(S, PrivateScope); 4199 (void)PrivateScope.Privatize(); 4200 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4201 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4202 4203 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 4204 } 4205 4206 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 4207 StringRef ParentName, 4208 const OMPTargetDirective &S) { 4209 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4210 emitTargetRegion(CGF, S, Action); 4211 }; 4212 llvm::Function *Fn; 4213 llvm::Constant *Addr; 4214 // Emit target region as a standalone region. 4215 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4216 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4217 assert(Fn && Addr && "Target device function emission failed."); 4218 } 4219 4220 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { 4221 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4222 emitTargetRegion(CGF, S, Action); 4223 }; 4224 emitCommonOMPTargetDirective(*this, S, CodeGen); 4225 } 4226 4227 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, 4228 const OMPExecutableDirective &S, 4229 OpenMPDirectiveKind InnermostKind, 4230 const RegionCodeGenTy &CodeGen) { 4231 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); 4232 llvm::Function *OutlinedFn = 4233 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( 4234 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); 4235 4236 const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); 4237 const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); 4238 if (NT || TL) { 4239 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; 4240 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; 4241 4242 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, 4243 S.getBeginLoc()); 4244 } 4245 4246 OMPTeamsScope Scope(CGF, S); 4247 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 4248 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 4249 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 4250 CapturedVars); 4251 } 4252 4253 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 4254 // Emit teams region as a standalone region. 4255 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4256 Action.Enter(CGF); 4257 OMPPrivateScope PrivateScope(CGF); 4258 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4259 CGF.EmitOMPPrivateClause(S, PrivateScope); 4260 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4261 (void)PrivateScope.Privatize(); 4262 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); 4263 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4264 }; 4265 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4266 emitPostUpdateForReductionClause(*this, S, 4267 [](CodeGenFunction &) { return nullptr; }); 4268 } 4269 4270 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4271 const OMPTargetTeamsDirective &S) { 4272 auto *CS = S.getCapturedStmt(OMPD_teams); 4273 Action.Enter(CGF); 4274 // Emit teams region as a standalone region. 4275 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4276 Action.Enter(CGF); 4277 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4278 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4279 CGF.EmitOMPPrivateClause(S, PrivateScope); 4280 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4281 (void)PrivateScope.Privatize(); 4282 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4283 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4284 CGF.EmitStmt(CS->getCapturedStmt()); 4285 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4286 }; 4287 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); 4288 emitPostUpdateForReductionClause(CGF, S, 4289 [](CodeGenFunction &) { return nullptr; }); 4290 } 4291 4292 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 4293 CodeGenModule &CGM, StringRef ParentName, 4294 const OMPTargetTeamsDirective &S) { 4295 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4296 emitTargetTeamsRegion(CGF, Action, S); 4297 }; 4298 llvm::Function *Fn; 4299 llvm::Constant *Addr; 4300 // Emit target region as a standalone region. 4301 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4302 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4303 assert(Fn && Addr && "Target device function emission failed."); 4304 } 4305 4306 void CodeGenFunction::EmitOMPTargetTeamsDirective( 4307 const OMPTargetTeamsDirective &S) { 4308 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4309 emitTargetTeamsRegion(CGF, Action, S); 4310 }; 4311 emitCommonOMPTargetDirective(*this, S, CodeGen); 4312 } 4313 4314 static void 4315 emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, 4316 const OMPTargetTeamsDistributeDirective &S) { 4317 Action.Enter(CGF); 4318 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4319 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4320 }; 4321 4322 // Emit teams region as a standalone region. 4323 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4324 PrePostActionTy &Action) { 4325 Action.Enter(CGF); 4326 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4327 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4328 (void)PrivateScope.Privatize(); 4329 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4330 CodeGenDistribute); 4331 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4332 }; 4333 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); 4334 emitPostUpdateForReductionClause(CGF, S, 4335 [](CodeGenFunction &) { return nullptr; }); 4336 } 4337 4338 void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 4339 CodeGenModule &CGM, StringRef ParentName, 4340 const OMPTargetTeamsDistributeDirective &S) { 4341 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4342 emitTargetTeamsDistributeRegion(CGF, Action, S); 4343 }; 4344 llvm::Function *Fn; 4345 llvm::Constant *Addr; 4346 // Emit target region as a standalone region. 4347 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4348 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4349 assert(Fn && Addr && "Target device function emission failed."); 4350 } 4351 4352 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( 4353 const OMPTargetTeamsDistributeDirective &S) { 4354 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4355 emitTargetTeamsDistributeRegion(CGF, Action, S); 4356 }; 4357 emitCommonOMPTargetDirective(*this, S, CodeGen); 4358 } 4359 4360 static void emitTargetTeamsDistributeSimdRegion( 4361 CodeGenFunction &CGF, PrePostActionTy &Action, 4362 const OMPTargetTeamsDistributeSimdDirective &S) { 4363 Action.Enter(CGF); 4364 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4365 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4366 }; 4367 4368 // Emit teams region as a standalone region. 4369 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4370 PrePostActionTy &Action) { 4371 Action.Enter(CGF); 4372 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4373 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4374 (void)PrivateScope.Privatize(); 4375 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4376 CodeGenDistribute); 4377 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4378 }; 4379 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); 4380 emitPostUpdateForReductionClause(CGF, S, 4381 [](CodeGenFunction &) { return nullptr; }); 4382 } 4383 4384 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 4385 CodeGenModule &CGM, StringRef ParentName, 4386 const OMPTargetTeamsDistributeSimdDirective &S) { 4387 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4388 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4389 }; 4390 llvm::Function *Fn; 4391 llvm::Constant *Addr; 4392 // Emit target region as a standalone region. 4393 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4394 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4395 assert(Fn && Addr && "Target device function emission failed."); 4396 } 4397 4398 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( 4399 const OMPTargetTeamsDistributeSimdDirective &S) { 4400 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4401 emitTargetTeamsDistributeSimdRegion(CGF, Action, S); 4402 }; 4403 emitCommonOMPTargetDirective(*this, S, CodeGen); 4404 } 4405 4406 void CodeGenFunction::EmitOMPTeamsDistributeDirective( 4407 const OMPTeamsDistributeDirective &S) { 4408 4409 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4410 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4411 }; 4412 4413 // Emit teams region as a standalone region. 4414 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4415 PrePostActionTy &Action) { 4416 Action.Enter(CGF); 4417 OMPPrivateScope PrivateScope(CGF); 4418 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4419 (void)PrivateScope.Privatize(); 4420 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4421 CodeGenDistribute); 4422 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4423 }; 4424 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); 4425 emitPostUpdateForReductionClause(*this, S, 4426 [](CodeGenFunction &) { return nullptr; }); 4427 } 4428 4429 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( 4430 const OMPTeamsDistributeSimdDirective &S) { 4431 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4432 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); 4433 }; 4434 4435 // Emit teams region as a standalone region. 4436 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4437 PrePostActionTy &Action) { 4438 Action.Enter(CGF); 4439 OMPPrivateScope PrivateScope(CGF); 4440 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4441 (void)PrivateScope.Privatize(); 4442 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, 4443 CodeGenDistribute); 4444 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4445 }; 4446 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); 4447 emitPostUpdateForReductionClause(*this, S, 4448 [](CodeGenFunction &) { return nullptr; }); 4449 } 4450 4451 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( 4452 const OMPTeamsDistributeParallelForDirective &S) { 4453 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4454 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4455 S.getDistInc()); 4456 }; 4457 4458 // Emit teams region as a standalone region. 4459 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4460 PrePostActionTy &Action) { 4461 Action.Enter(CGF); 4462 OMPPrivateScope PrivateScope(CGF); 4463 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4464 (void)PrivateScope.Privatize(); 4465 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, 4466 CodeGenDistribute); 4467 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4468 }; 4469 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4470 emitPostUpdateForReductionClause(*this, S, 4471 [](CodeGenFunction &) { return nullptr; }); 4472 } 4473 4474 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( 4475 const OMPTeamsDistributeParallelForSimdDirective &S) { 4476 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4477 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4478 S.getDistInc()); 4479 }; 4480 4481 // Emit teams region as a standalone region. 4482 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4483 PrePostActionTy &Action) { 4484 Action.Enter(CGF); 4485 OMPPrivateScope PrivateScope(CGF); 4486 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4487 (void)PrivateScope.Privatize(); 4488 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4489 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4490 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4491 }; 4492 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); 4493 emitPostUpdateForReductionClause(*this, S, 4494 [](CodeGenFunction &) { return nullptr; }); 4495 } 4496 4497 static void emitTargetTeamsDistributeParallelForRegion( 4498 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, 4499 PrePostActionTy &Action) { 4500 Action.Enter(CGF); 4501 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4502 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4503 S.getDistInc()); 4504 }; 4505 4506 // Emit teams region as a standalone region. 4507 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4508 PrePostActionTy &Action) { 4509 Action.Enter(CGF); 4510 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4511 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4512 (void)PrivateScope.Privatize(); 4513 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4514 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4515 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4516 }; 4517 4518 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, 4519 CodeGenTeams); 4520 emitPostUpdateForReductionClause(CGF, S, 4521 [](CodeGenFunction &) { return nullptr; }); 4522 } 4523 4524 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 4525 CodeGenModule &CGM, StringRef ParentName, 4526 const OMPTargetTeamsDistributeParallelForDirective &S) { 4527 // Emit SPMD target teams distribute parallel for region as a standalone 4528 // region. 4529 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4530 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4531 }; 4532 llvm::Function *Fn; 4533 llvm::Constant *Addr; 4534 // Emit target region as a standalone region. 4535 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4536 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4537 assert(Fn && Addr && "Target device function emission failed."); 4538 } 4539 4540 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( 4541 const OMPTargetTeamsDistributeParallelForDirective &S) { 4542 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4543 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); 4544 }; 4545 emitCommonOMPTargetDirective(*this, S, CodeGen); 4546 } 4547 4548 static void emitTargetTeamsDistributeParallelForSimdRegion( 4549 CodeGenFunction &CGF, 4550 const OMPTargetTeamsDistributeParallelForSimdDirective &S, 4551 PrePostActionTy &Action) { 4552 Action.Enter(CGF); 4553 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4554 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, 4555 S.getDistInc()); 4556 }; 4557 4558 // Emit teams region as a standalone region. 4559 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, 4560 PrePostActionTy &Action) { 4561 Action.Enter(CGF); 4562 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4563 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4564 (void)PrivateScope.Privatize(); 4565 CGF.CGM.getOpenMPRuntime().emitInlinedDirective( 4566 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); 4567 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); 4568 }; 4569 4570 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, 4571 CodeGenTeams); 4572 emitPostUpdateForReductionClause(CGF, S, 4573 [](CodeGenFunction &) { return nullptr; }); 4574 } 4575 4576 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 4577 CodeGenModule &CGM, StringRef ParentName, 4578 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4579 // Emit SPMD target teams distribute parallel for simd region as a standalone 4580 // region. 4581 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4582 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4583 }; 4584 llvm::Function *Fn; 4585 llvm::Constant *Addr; 4586 // Emit target region as a standalone region. 4587 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4588 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4589 assert(Fn && Addr && "Target device function emission failed."); 4590 } 4591 4592 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( 4593 const OMPTargetTeamsDistributeParallelForSimdDirective &S) { 4594 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4595 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); 4596 }; 4597 emitCommonOMPTargetDirective(*this, S, CodeGen); 4598 } 4599 4600 void CodeGenFunction::EmitOMPCancellationPointDirective( 4601 const OMPCancellationPointDirective &S) { 4602 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), 4603 S.getCancelRegion()); 4604 } 4605 4606 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { 4607 const Expr *IfCond = nullptr; 4608 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4609 if (C->getNameModifier() == OMPD_unknown || 4610 C->getNameModifier() == OMPD_cancel) { 4611 IfCond = C->getCondition(); 4612 break; 4613 } 4614 } 4615 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 4616 S.getCancelRegion()); 4617 } 4618 4619 CodeGenFunction::JumpDest 4620 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { 4621 if (Kind == OMPD_parallel || Kind == OMPD_task || 4622 Kind == OMPD_target_parallel) 4623 return ReturnBlock; 4624 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || 4625 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || 4626 Kind == OMPD_distribute_parallel_for || 4627 Kind == OMPD_target_parallel_for || 4628 Kind == OMPD_teams_distribute_parallel_for || 4629 Kind == OMPD_target_teams_distribute_parallel_for); 4630 return OMPCancelStack.getExitBlock(); 4631 } 4632 4633 void CodeGenFunction::EmitOMPUseDevicePtrClause( 4634 const OMPClause &NC, OMPPrivateScope &PrivateScope, 4635 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 4636 const auto &C = cast<OMPUseDevicePtrClause>(NC); 4637 auto OrigVarIt = C.varlist_begin(); 4638 auto InitIt = C.inits().begin(); 4639 for (const Expr *PvtVarIt : C.private_copies()) { 4640 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 4641 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 4642 const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl()); 4643 4644 // In order to identify the right initializer we need to match the 4645 // declaration used by the mapping logic. In some cases we may get 4646 // OMPCapturedExprDecl that refers to the original declaration. 4647 const ValueDecl *MatchingVD = OrigVD; 4648 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { 4649 // OMPCapturedExprDecl are used to privative fields of the current 4650 // structure. 4651 const auto *ME = cast<MemberExpr>(OED->getInit()); 4652 assert(isa<CXXThisExpr>(ME->getBase()) && 4653 "Base should be the current struct!"); 4654 MatchingVD = ME->getMemberDecl(); 4655 } 4656 4657 // If we don't have information about the current list item, move on to 4658 // the next one. 4659 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); 4660 if (InitAddrIt == CaptureDeviceAddrMap.end()) 4661 continue; 4662 4663 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD, 4664 InitAddrIt, InitVD, 4665 PvtVD]() { 4666 // Initialize the temporary initialization variable with the address we 4667 // get from the runtime library. We have to cast the source address 4668 // because it is always a void *. References are materialized in the 4669 // privatization scope, so the initialization here disregards the fact 4670 // the original variable is a reference. 4671 QualType AddrQTy = 4672 getContext().getPointerType(OrigVD->getType().getNonReferenceType()); 4673 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy); 4674 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy); 4675 setAddrOfLocalVar(InitVD, InitAddr); 4676 4677 // Emit private declaration, it will be initialized by the value we 4678 // declaration we just added to the local declarations map. 4679 EmitDecl(*PvtVD); 4680 4681 // The initialization variables reached its purpose in the emission 4682 // of the previous declaration, so we don't need it anymore. 4683 LocalDeclMap.erase(InitVD); 4684 4685 // Return the address of the private variable. 4686 return GetAddrOfLocalVar(PvtVD); 4687 }); 4688 assert(IsRegistered && "firstprivate var already registered as private"); 4689 // Silence the warning about unused variable. 4690 (void)IsRegistered; 4691 4692 ++OrigVarIt; 4693 ++InitIt; 4694 } 4695 } 4696 4697 // Generate the instructions for '#pragma omp target data' directive. 4698 void CodeGenFunction::EmitOMPTargetDataDirective( 4699 const OMPTargetDataDirective &S) { 4700 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 4701 4702 // Create a pre/post action to signal the privatization of the device pointer. 4703 // This action can be replaced by the OpenMP runtime code generation to 4704 // deactivate privatization. 4705 bool PrivatizeDevicePointers = false; 4706 class DevicePointerPrivActionTy : public PrePostActionTy { 4707 bool &PrivatizeDevicePointers; 4708 4709 public: 4710 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) 4711 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {} 4712 void Enter(CodeGenFunction &CGF) override { 4713 PrivatizeDevicePointers = true; 4714 } 4715 }; 4716 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); 4717 4718 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers]( 4719 CodeGenFunction &CGF, PrePostActionTy &Action) { 4720 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 4721 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4722 }; 4723 4724 // Codegen that selects whether to generate the privatization code or not. 4725 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers, 4726 &InnermostCodeGen](CodeGenFunction &CGF, 4727 PrePostActionTy &Action) { 4728 RegionCodeGenTy RCG(InnermostCodeGen); 4729 PrivatizeDevicePointers = false; 4730 4731 // Call the pre-action to change the status of PrivatizeDevicePointers if 4732 // needed. 4733 Action.Enter(CGF); 4734 4735 if (PrivatizeDevicePointers) { 4736 OMPPrivateScope PrivateScope(CGF); 4737 // Emit all instances of the use_device_ptr clause. 4738 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 4739 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 4740 Info.CaptureDeviceAddrMap); 4741 (void)PrivateScope.Privatize(); 4742 RCG(CGF); 4743 } else { 4744 RCG(CGF); 4745 } 4746 }; 4747 4748 // Forward the provided action to the privatization codegen. 4749 RegionCodeGenTy PrivRCG(PrivCodeGen); 4750 PrivRCG.setAction(Action); 4751 4752 // Notwithstanding the body of the region is emitted as inlined directive, 4753 // we don't use an inline scope as changes in the references inside the 4754 // region are expected to be visible outside, so we do not privative them. 4755 OMPLexicalScope Scope(CGF, S); 4756 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, 4757 PrivRCG); 4758 }; 4759 4760 RegionCodeGenTy RCG(CodeGen); 4761 4762 // If we don't have target devices, don't bother emitting the data mapping 4763 // code. 4764 if (CGM.getLangOpts().OMPTargetTriples.empty()) { 4765 RCG(*this); 4766 return; 4767 } 4768 4769 // Check if we have any if clause associated with the directive. 4770 const Expr *IfCond = nullptr; 4771 if (const auto *C = S.getSingleClause<OMPIfClause>()) 4772 IfCond = C->getCondition(); 4773 4774 // Check if we have any device clause associated with the directive. 4775 const Expr *Device = nullptr; 4776 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 4777 Device = C->getDevice(); 4778 4779 // Set the action to signal privatization of device pointers. 4780 RCG.setAction(PrivAction); 4781 4782 // Emit region code. 4783 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, 4784 Info); 4785 } 4786 4787 void CodeGenFunction::EmitOMPTargetEnterDataDirective( 4788 const OMPTargetEnterDataDirective &S) { 4789 // If we don't have target devices, don't bother emitting the data mapping 4790 // code. 4791 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4792 return; 4793 4794 // Check if we have any if clause associated with the directive. 4795 const Expr *IfCond = nullptr; 4796 if (const auto *C = S.getSingleClause<OMPIfClause>()) 4797 IfCond = C->getCondition(); 4798 4799 // Check if we have any device clause associated with the directive. 4800 const Expr *Device = nullptr; 4801 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 4802 Device = C->getDevice(); 4803 4804 OMPLexicalScope Scope(*this, S, OMPD_task); 4805 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4806 } 4807 4808 void CodeGenFunction::EmitOMPTargetExitDataDirective( 4809 const OMPTargetExitDataDirective &S) { 4810 // If we don't have target devices, don't bother emitting the data mapping 4811 // code. 4812 if (CGM.getLangOpts().OMPTargetTriples.empty()) 4813 return; 4814 4815 // Check if we have any if clause associated with the directive. 4816 const Expr *IfCond = nullptr; 4817 if (const auto *C = S.getSingleClause<OMPIfClause>()) 4818 IfCond = C->getCondition(); 4819 4820 // Check if we have any device clause associated with the directive. 4821 const Expr *Device = nullptr; 4822 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 4823 Device = C->getDevice(); 4824 4825 OMPLexicalScope Scope(*this, S, OMPD_task); 4826 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 4827 } 4828 4829 static void emitTargetParallelRegion(CodeGenFunction &CGF, 4830 const OMPTargetParallelDirective &S, 4831 PrePostActionTy &Action) { 4832 // Get the captured statement associated with the 'parallel' region. 4833 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); 4834 Action.Enter(CGF); 4835 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { 4836 Action.Enter(CGF); 4837 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 4838 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); 4839 CGF.EmitOMPPrivateClause(S, PrivateScope); 4840 CGF.EmitOMPReductionClauseInit(S, PrivateScope); 4841 (void)PrivateScope.Privatize(); 4842 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 4843 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 4844 // TODO: Add support for clauses. 4845 CGF.EmitStmt(CS->getCapturedStmt()); 4846 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); 4847 }; 4848 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, 4849 emitEmptyBoundParameters); 4850 emitPostUpdateForReductionClause(CGF, S, 4851 [](CodeGenFunction &) { return nullptr; }); 4852 } 4853 4854 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 4855 CodeGenModule &CGM, StringRef ParentName, 4856 const OMPTargetParallelDirective &S) { 4857 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4858 emitTargetParallelRegion(CGF, S, Action); 4859 }; 4860 llvm::Function *Fn; 4861 llvm::Constant *Addr; 4862 // Emit target region as a standalone region. 4863 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4864 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4865 assert(Fn && Addr && "Target device function emission failed."); 4866 } 4867 4868 void CodeGenFunction::EmitOMPTargetParallelDirective( 4869 const OMPTargetParallelDirective &S) { 4870 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4871 emitTargetParallelRegion(CGF, S, Action); 4872 }; 4873 emitCommonOMPTargetDirective(*this, S, CodeGen); 4874 } 4875 4876 static void emitTargetParallelForRegion(CodeGenFunction &CGF, 4877 const OMPTargetParallelForDirective &S, 4878 PrePostActionTy &Action) { 4879 Action.Enter(CGF); 4880 // Emit directive as a combined directive that consists of two implicit 4881 // directives: 'parallel' with 'for' directive. 4882 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4883 Action.Enter(CGF); 4884 CodeGenFunction::OMPCancelStackRAII CancelRegion( 4885 CGF, OMPD_target_parallel_for, S.hasCancel()); 4886 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4887 emitDispatchForLoopBounds); 4888 }; 4889 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, 4890 emitEmptyBoundParameters); 4891 } 4892 4893 void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 4894 CodeGenModule &CGM, StringRef ParentName, 4895 const OMPTargetParallelForDirective &S) { 4896 // Emit SPMD target parallel for region as a standalone region. 4897 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4898 emitTargetParallelForRegion(CGF, S, Action); 4899 }; 4900 llvm::Function *Fn; 4901 llvm::Constant *Addr; 4902 // Emit target region as a standalone region. 4903 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4904 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4905 assert(Fn && Addr && "Target device function emission failed."); 4906 } 4907 4908 void CodeGenFunction::EmitOMPTargetParallelForDirective( 4909 const OMPTargetParallelForDirective &S) { 4910 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4911 emitTargetParallelForRegion(CGF, S, Action); 4912 }; 4913 emitCommonOMPTargetDirective(*this, S, CodeGen); 4914 } 4915 4916 static void 4917 emitTargetParallelForSimdRegion(CodeGenFunction &CGF, 4918 const OMPTargetParallelForSimdDirective &S, 4919 PrePostActionTy &Action) { 4920 Action.Enter(CGF); 4921 // Emit directive as a combined directive that consists of two implicit 4922 // directives: 'parallel' with 'for' directive. 4923 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4924 Action.Enter(CGF); 4925 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4926 emitDispatchForLoopBounds); 4927 }; 4928 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, 4929 emitEmptyBoundParameters); 4930 } 4931 4932 void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 4933 CodeGenModule &CGM, StringRef ParentName, 4934 const OMPTargetParallelForSimdDirective &S) { 4935 // Emit SPMD target parallel for region as a standalone region. 4936 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4937 emitTargetParallelForSimdRegion(CGF, S, Action); 4938 }; 4939 llvm::Function *Fn; 4940 llvm::Constant *Addr; 4941 // Emit target region as a standalone region. 4942 CGM.getOpenMPRuntime().emitTargetOutlinedFunction( 4943 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); 4944 assert(Fn && Addr && "Target device function emission failed."); 4945 } 4946 4947 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( 4948 const OMPTargetParallelForSimdDirective &S) { 4949 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4950 emitTargetParallelForSimdRegion(CGF, S, Action); 4951 }; 4952 emitCommonOMPTargetDirective(*this, S, CodeGen); 4953 } 4954 4955 /// Emit a helper variable and return corresponding lvalue. 4956 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, 4957 const ImplicitParamDecl *PVD, 4958 CodeGenFunction::OMPPrivateScope &Privates) { 4959 const auto *VDecl = cast<VarDecl>(Helper->getDecl()); 4960 Privates.addPrivate(VDecl, 4961 [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); }); 4962 } 4963 4964 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { 4965 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); 4966 // Emit outlined function for task construct. 4967 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); 4968 Address CapturedStruct = GenerateCapturedStmtArgument(*CS); 4969 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); 4970 const Expr *IfCond = nullptr; 4971 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { 4972 if (C->getNameModifier() == OMPD_unknown || 4973 C->getNameModifier() == OMPD_taskloop) { 4974 IfCond = C->getCondition(); 4975 break; 4976 } 4977 } 4978 4979 OMPTaskDataTy Data; 4980 // Check if taskloop must be emitted without taskgroup. 4981 Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); 4982 // TODO: Check if we should emit tied or untied task. 4983 Data.Tied = true; 4984 // Set scheduling for taskloop 4985 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) { 4986 // grainsize clause 4987 Data.Schedule.setInt(/*IntVal=*/false); 4988 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); 4989 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) { 4990 // num_tasks clause 4991 Data.Schedule.setInt(/*IntVal=*/true); 4992 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); 4993 } 4994 4995 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { 4996 // if (PreCond) { 4997 // for (IV in 0..LastIteration) BODY; 4998 // <Final counter/linear vars updates>; 4999 // } 5000 // 5001 5002 // Emit: if (PreCond) - begin. 5003 // If the condition constant folds and can be elided, avoid emitting the 5004 // whole loop. 5005 bool CondConstant; 5006 llvm::BasicBlock *ContBlock = nullptr; 5007 OMPLoopScope PreInitScope(CGF, S); 5008 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { 5009 if (!CondConstant) 5010 return; 5011 } else { 5012 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); 5013 ContBlock = CGF.createBasicBlock("taskloop.if.end"); 5014 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, 5015 CGF.getProfileCount(&S)); 5016 CGF.EmitBlock(ThenBlock); 5017 CGF.incrementProfileCounter(&S); 5018 } 5019 5020 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5021 CGF.EmitOMPSimdInit(S); 5022 5023 OMPPrivateScope LoopScope(CGF); 5024 // Emit helper vars inits. 5025 enum { LowerBound = 5, UpperBound, Stride, LastIter }; 5026 auto *I = CS->getCapturedDecl()->param_begin(); 5027 auto *LBP = std::next(I, LowerBound); 5028 auto *UBP = std::next(I, UpperBound); 5029 auto *STP = std::next(I, Stride); 5030 auto *LIP = std::next(I, LastIter); 5031 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, 5032 LoopScope); 5033 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, 5034 LoopScope); 5035 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); 5036 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, 5037 LoopScope); 5038 CGF.EmitOMPPrivateLoopCounters(S, LoopScope); 5039 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); 5040 (void)LoopScope.Privatize(); 5041 // Emit the loop iteration variable. 5042 const Expr *IVExpr = S.getIterationVariable(); 5043 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); 5044 CGF.EmitVarDecl(*IVDecl); 5045 CGF.EmitIgnoredExpr(S.getInit()); 5046 5047 // Emit the iterations count variable. 5048 // If it is not a variable, Sema decided to calculate iterations count on 5049 // each iteration (e.g., it is foldable into a constant). 5050 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { 5051 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); 5052 // Emit calculation of the iterations count. 5053 CGF.EmitIgnoredExpr(S.getCalcLastIteration()); 5054 } 5055 5056 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), 5057 S.getInc(), 5058 [&S](CodeGenFunction &CGF) { 5059 CGF.EmitOMPLoopBody(S, JumpDest()); 5060 CGF.EmitStopPoint(&S); 5061 }, 5062 [](CodeGenFunction &) {}); 5063 // Emit: if (PreCond) - end. 5064 if (ContBlock) { 5065 CGF.EmitBranch(ContBlock); 5066 CGF.EmitBlock(ContBlock, true); 5067 } 5068 // Emit final copy of the lastprivate variables if IsLastIter != 0. 5069 if (HasLastprivateClause) { 5070 CGF.EmitOMPLastprivateClauseFinal( 5071 S, isOpenMPSimdDirective(S.getDirectiveKind()), 5072 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( 5073 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, 5074 (*LIP)->getType(), S.getBeginLoc()))); 5075 } 5076 }; 5077 auto &&TaskGen = [&S, SharedsTy, CapturedStruct, 5078 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, 5079 const OMPTaskDataTy &Data) { 5080 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, 5081 &Data](CodeGenFunction &CGF, PrePostActionTy &) { 5082 OMPLoopScope PreInitScope(CGF, S); 5083 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, 5084 OutlinedFn, SharedsTy, 5085 CapturedStruct, IfCond, Data); 5086 }; 5087 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, 5088 CodeGen); 5089 }; 5090 if (Data.Nogroup) { 5091 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); 5092 } else { 5093 CGM.getOpenMPRuntime().emitTaskgroupRegion( 5094 *this, 5095 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, 5096 PrePostActionTy &Action) { 5097 Action.Enter(CGF); 5098 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, 5099 Data); 5100 }, 5101 S.getBeginLoc()); 5102 } 5103 } 5104 5105 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { 5106 EmitOMPTaskLoopBasedDirective(S); 5107 } 5108 5109 void CodeGenFunction::EmitOMPTaskLoopSimdDirective( 5110 const OMPTaskLoopSimdDirective &S) { 5111 EmitOMPTaskLoopBasedDirective(S); 5112 } 5113 5114 void CodeGenFunction::EmitOMPMasterTaskLoopDirective( 5115 const OMPMasterTaskLoopDirective &S) { 5116 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5117 Action.Enter(CGF); 5118 EmitOMPTaskLoopBasedDirective(S); 5119 }; 5120 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 5121 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 5122 } 5123 5124 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( 5125 const OMPMasterTaskLoopSimdDirective &S) { 5126 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5127 Action.Enter(CGF); 5128 EmitOMPTaskLoopBasedDirective(S); 5129 }; 5130 OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); 5131 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); 5132 } 5133 5134 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( 5135 const OMPParallelMasterTaskLoopDirective &S) { 5136 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { 5137 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, 5138 PrePostActionTy &Action) { 5139 Action.Enter(CGF); 5140 CGF.EmitOMPTaskLoopBasedDirective(S); 5141 }; 5142 OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); 5143 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, 5144 S.getBeginLoc()); 5145 }; 5146 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, 5147 emitEmptyBoundParameters); 5148 } 5149 5150 // Generate the instructions for '#pragma omp target update' directive. 5151 void CodeGenFunction::EmitOMPTargetUpdateDirective( 5152 const OMPTargetUpdateDirective &S) { 5153 // If we don't have target devices, don't bother emitting the data mapping 5154 // code. 5155 if (CGM.getLangOpts().OMPTargetTriples.empty()) 5156 return; 5157 5158 // Check if we have any if clause associated with the directive. 5159 const Expr *IfCond = nullptr; 5160 if (const auto *C = S.getSingleClause<OMPIfClause>()) 5161 IfCond = C->getCondition(); 5162 5163 // Check if we have any device clause associated with the directive. 5164 const Expr *Device = nullptr; 5165 if (const auto *C = S.getSingleClause<OMPDeviceClause>()) 5166 Device = C->getDevice(); 5167 5168 OMPLexicalScope Scope(*this, S, OMPD_task); 5169 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 5170 } 5171 5172 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 5173 const OMPExecutableDirective &D) { 5174 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 5175 return; 5176 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 5177 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 5178 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 5179 } else { 5180 OMPPrivateScope LoopGlobals(CGF); 5181 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 5182 for (const Expr *E : LD->counters()) { 5183 const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5184 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { 5185 LValue GlobLVal = CGF.EmitLValue(E); 5186 LoopGlobals.addPrivate( 5187 VD, [&GlobLVal]() { return GlobLVal.getAddress(); }); 5188 } 5189 if (isa<OMPCapturedExprDecl>(VD)) { 5190 // Emit only those that were not explicitly referenced in clauses. 5191 if (!CGF.LocalDeclMap.count(VD)) 5192 CGF.EmitVarDecl(*VD); 5193 } 5194 } 5195 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 5196 if (!C->getNumForLoops()) 5197 continue; 5198 for (unsigned I = LD->getCollapsedNumber(), 5199 E = C->getLoopNumIterations().size(); 5200 I < E; ++I) { 5201 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 5202 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 5203 // Emit only those that were not explicitly referenced in clauses. 5204 if (!CGF.LocalDeclMap.count(VD)) 5205 CGF.EmitVarDecl(*VD); 5206 } 5207 } 5208 } 5209 } 5210 LoopGlobals.Privatize(); 5211 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 5212 } 5213 }; 5214 OMPSimdLexicalScope Scope(*this, D); 5215 CGM.getOpenMPRuntime().emitInlinedDirective( 5216 *this, 5217 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd 5218 : D.getDirectiveKind(), 5219 CodeGen); 5220 } 5221