1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Frontend/OpenMP/OMPConstants.h" 10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" 11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 12 #include "llvm/IR/BasicBlock.h" 13 #include "llvm/IR/DIBuilder.h" 14 #include "llvm/IR/Function.h" 15 #include "llvm/IR/InstIterator.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/IR/LLVMContext.h" 18 #include "llvm/IR/Module.h" 19 #include "llvm/IR/Verifier.h" 20 #include "llvm/Passes/PassBuilder.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 23 #include "gmock/gmock.h" 24 #include "gtest/gtest.h" 25 #include <optional> 26 27 using namespace llvm; 28 using namespace omp; 29 30 namespace { 31 32 /// Create an instruction that uses the values in \p Values. We use "printf" 33 /// just because it is often used for this purpose in test code, but it is never 34 /// executed here. 35 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr, 36 ArrayRef<Value *> Values) { 37 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 38 39 GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M); 40 Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); 41 Constant *Indices[] = {Zero, Zero}; 42 Constant *FormatStrConst = 43 ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices); 44 45 Function *PrintfDecl = M->getFunction("printf"); 46 if (!PrintfDecl) { 47 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 48 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true); 49 PrintfDecl = Function::Create(Ty, Linkage, "printf", M); 50 } 51 52 SmallVector<Value *, 4> Args; 53 Args.push_back(FormatStrConst); 54 Args.append(Values.begin(), Values.end()); 55 return Builder.CreateCall(PrintfDecl, Args); 56 } 57 58 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit 59 /// order the control flow of \p F. 60 /// 61 /// This is an easy way to verify the branching structure of the CFG without 62 /// checking every branch instruction individually. For the CFG of a 63 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering 64 /// the body, i.e. the DFS order corresponds to the execution order with one 65 /// loop iteration. 66 static testing::AssertionResult 67 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { 68 ArrayRef<BasicBlock *>::iterator It = RefOrder.begin(); 69 ArrayRef<BasicBlock *>::iterator E = RefOrder.end(); 70 71 df_iterator_default_set<BasicBlock *, 16> Visited; 72 auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited); 73 74 BasicBlock *Prev = nullptr; 75 for (BasicBlock *BB : DFS) { 76 if (It != E && BB == *It) { 77 Prev = *It; 78 ++It; 79 } 80 } 81 82 if (It == E) 83 return testing::AssertionSuccess(); 84 if (!Prev) 85 return testing::AssertionFailure() 86 << "Did not find " << (*It)->getName() << " in control flow"; 87 return testing::AssertionFailure() 88 << "Expected " << Prev->getName() << " before " << (*It)->getName() 89 << " in control flow"; 90 } 91 92 /// Verify that blocks in \p RefOrder are in the same relative order in the 93 /// linked lists of blocks in \p F. The linked list may contain additional 94 /// blocks in-between. 95 /// 96 /// While the order in the linked list is not relevant for semantics, keeping 97 /// the order roughly in execution order makes its printout easier to read. 98 static testing::AssertionResult 99 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { 100 ArrayRef<BasicBlock *>::iterator It = RefOrder.begin(); 101 ArrayRef<BasicBlock *>::iterator E = RefOrder.end(); 102 103 BasicBlock *Prev = nullptr; 104 for (BasicBlock &BB : *F) { 105 if (It != E && &BB == *It) { 106 Prev = *It; 107 ++It; 108 } 109 } 110 111 if (It == E) 112 return testing::AssertionSuccess(); 113 if (!Prev) 114 return testing::AssertionFailure() << "Did not find " << (*It)->getName() 115 << " in function " << F->getName(); 116 return testing::AssertionFailure() 117 << "Expected " << Prev->getName() << " before " << (*It)->getName() 118 << " in function " << F->getName(); 119 } 120 121 /// Populate Calls with call instructions calling the function with the given 122 /// FnID from the given function F. 123 static void findCalls(Function *F, omp::RuntimeFunction FnID, 124 OpenMPIRBuilder &OMPBuilder, 125 SmallVectorImpl<CallInst *> &Calls) { 126 Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID); 127 for (BasicBlock &BB : *F) { 128 for (Instruction &I : BB) { 129 auto *Call = dyn_cast<CallInst>(&I); 130 if (Call && Call->getCalledFunction() == Fn) 131 Calls.push_back(Call); 132 } 133 } 134 } 135 136 /// Assuming \p F contains only one call to the function with the given \p FnID, 137 /// return that call. 138 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID, 139 OpenMPIRBuilder &OMPBuilder) { 140 SmallVector<CallInst *, 1> Calls; 141 findCalls(F, FnID, OMPBuilder, Calls); 142 EXPECT_EQ(1u, Calls.size()); 143 if (Calls.size() != 1) 144 return nullptr; 145 return Calls.front(); 146 } 147 148 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) { 149 switch (SchedType & ~omp::OMPScheduleType::ModifierMask) { 150 case omp::OMPScheduleType::BaseDynamicChunked: 151 return omp::OMP_SCHEDULE_Dynamic; 152 case omp::OMPScheduleType::BaseGuidedChunked: 153 return omp::OMP_SCHEDULE_Guided; 154 case omp::OMPScheduleType::BaseAuto: 155 return omp::OMP_SCHEDULE_Auto; 156 case omp::OMPScheduleType::BaseRuntime: 157 return omp::OMP_SCHEDULE_Runtime; 158 default: 159 llvm_unreachable("unknown type for this test"); 160 } 161 } 162 163 class OpenMPIRBuilderTest : public testing::Test { 164 protected: 165 void SetUp() override { 166 M.reset(new Module("MyModule", Ctx)); 167 FunctionType *FTy = 168 FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, 169 /*isVarArg=*/false); 170 F = Function::Create(FTy, Function::ExternalLinkage, "", M.get()); 171 BB = BasicBlock::Create(Ctx, "", F); 172 173 DIBuilder DIB(*M); 174 auto File = DIB.createFile("test.dbg", "/src", std::nullopt, 175 std::optional<StringRef>("/src/test.dbg")); 176 auto CU = 177 DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0); 178 auto Type = 179 DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt)); 180 auto SP = DIB.createFunction( 181 CU, "foo", "", File, 1, Type, 1, DINode::FlagZero, 182 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized); 183 F->setSubprogram(SP); 184 auto Scope = DIB.createLexicalBlockFile(SP, File, 0); 185 DIB.finalize(); 186 DL = DILocation::get(Ctx, 3, 7, Scope); 187 } 188 189 void TearDown() override { 190 BB = nullptr; 191 M.reset(); 192 } 193 194 /// Create a function with a simple loop that calls printf using the logical 195 /// loop counter for use with tests that need a CanonicalLoopInfo object. 196 CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL, 197 OpenMPIRBuilder &OMPBuilder, 198 int UseIVBits, 199 CallInst **Call = nullptr, 200 BasicBlock **BodyCode = nullptr) { 201 OMPBuilder.initialize(); 202 F->setName("func"); 203 204 IRBuilder<> Builder(BB); 205 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 206 Value *TripCount = F->getArg(0); 207 208 Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits); 209 Value *CastedTripCount = 210 Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount"); 211 212 auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP, 213 llvm::Value *LC) { 214 Builder.restoreIP(CodeGenIP); 215 if (BodyCode) 216 *BodyCode = Builder.GetInsertBlock(); 217 218 // Add something that consumes the induction variable to the body. 219 CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC}); 220 if (Call) 221 *Call = CallInst; 222 }; 223 CanonicalLoopInfo *Loop = 224 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount); 225 226 // Finalize the function. 227 Builder.restoreIP(Loop->getAfterIP()); 228 Builder.CreateRetVoid(); 229 230 return Loop; 231 } 232 233 LLVMContext Ctx; 234 std::unique_ptr<Module> M; 235 Function *F; 236 BasicBlock *BB; 237 DebugLoc DL; 238 }; 239 240 class OpenMPIRBuilderTestWithParams 241 : public OpenMPIRBuilderTest, 242 public ::testing::WithParamInterface<omp::OMPScheduleType> {}; 243 244 class OpenMPIRBuilderTestWithIVBits 245 : public OpenMPIRBuilderTest, 246 public ::testing::WithParamInterface<int> {}; 247 248 // Returns the value stored in the given allocation. Returns null if the given 249 // value is not a result of an InstTy instruction, if no value is stored or if 250 // there is more than one store. 251 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) { 252 Instruction *Inst = dyn_cast<InstTy>(AllocaValue); 253 if (!Inst) 254 return nullptr; 255 StoreInst *Store = nullptr; 256 for (Use &U : Inst->uses()) { 257 if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) { 258 EXPECT_EQ(Store, nullptr); 259 Store = CandidateStore; 260 } 261 } 262 if (!Store) 263 return nullptr; 264 return Store->getValueOperand(); 265 } 266 267 // Returns the value stored in the aggregate argument of an outlined function, 268 // or nullptr if it is not found. 269 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate, 270 unsigned Idx) { 271 GetElementPtrInst *GEPAtIdx = nullptr; 272 // Find GEP instruction at that index. 273 for (User *Usr : Aggregate->users()) { 274 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr); 275 if (!GEP) 276 continue; 277 278 if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx)) 279 continue; 280 281 EXPECT_EQ(GEPAtIdx, nullptr); 282 GEPAtIdx = GEP; 283 } 284 285 EXPECT_NE(GEPAtIdx, nullptr); 286 EXPECT_EQ(GEPAtIdx->getNumUses(), 1U); 287 288 // Find the value stored to the aggregate. 289 StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin()); 290 Value *StoredAggValue = StoreToAgg->getValueOperand(); 291 292 Value *StoredValue = nullptr; 293 294 // Find the value stored to the value stored in the aggregate. 295 for (User *Usr : StoredAggValue->users()) { 296 StoreInst *Store = dyn_cast<StoreInst>(Usr); 297 if (!Store) 298 continue; 299 300 if (Store->getPointerOperand() != StoredAggValue) 301 continue; 302 303 EXPECT_EQ(StoredValue, nullptr); 304 StoredValue = Store->getValueOperand(); 305 } 306 307 return StoredValue; 308 } 309 310 // Returns the aggregate that the value is originating from. 311 static Value *findAggregateFromValue(Value *V) { 312 // Expects a load instruction that loads from the aggregate. 313 LoadInst *Load = dyn_cast<LoadInst>(V); 314 EXPECT_NE(Load, nullptr); 315 // Find the GEP instruction used in the load instruction. 316 GetElementPtrInst *GEP = 317 dyn_cast<GetElementPtrInst>(Load->getPointerOperand()); 318 EXPECT_NE(GEP, nullptr); 319 // Find the aggregate used in the GEP instruction. 320 Value *Aggregate = GEP->getPointerOperand(); 321 322 return Aggregate; 323 } 324 325 TEST_F(OpenMPIRBuilderTest, CreateBarrier) { 326 OpenMPIRBuilder OMPBuilder(*M); 327 OMPBuilder.initialize(); 328 329 IRBuilder<> Builder(BB); 330 331 OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for); 332 EXPECT_TRUE(M->global_empty()); 333 EXPECT_EQ(M->size(), 1U); 334 EXPECT_EQ(F->size(), 1U); 335 EXPECT_EQ(BB->size(), 0U); 336 337 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 338 OMPBuilder.createBarrier(Loc, OMPD_for); 339 EXPECT_FALSE(M->global_empty()); 340 EXPECT_EQ(M->size(), 3U); 341 EXPECT_EQ(F->size(), 1U); 342 EXPECT_EQ(BB->size(), 2U); 343 344 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 345 EXPECT_NE(GTID, nullptr); 346 EXPECT_EQ(GTID->arg_size(), 1U); 347 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 348 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 349 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 350 351 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 352 EXPECT_NE(Barrier, nullptr); 353 EXPECT_EQ(Barrier->arg_size(), 2U); 354 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier"); 355 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 356 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 357 358 EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); 359 360 Builder.CreateUnreachable(); 361 EXPECT_FALSE(verifyModule(*M, &errs())); 362 } 363 364 TEST_F(OpenMPIRBuilderTest, CreateCancel) { 365 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 366 OpenMPIRBuilder OMPBuilder(*M); 367 OMPBuilder.initialize(); 368 369 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 370 new UnreachableInst(Ctx, CBB); 371 auto FiniCB = [&](InsertPointTy IP) { 372 ASSERT_NE(IP.getBlock(), nullptr); 373 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 374 BranchInst::Create(CBB, IP.getBlock()); 375 }; 376 OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); 377 378 IRBuilder<> Builder(BB); 379 380 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 381 auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel); 382 Builder.restoreIP(NewIP); 383 EXPECT_FALSE(M->global_empty()); 384 EXPECT_EQ(M->size(), 4U); 385 EXPECT_EQ(F->size(), 4U); 386 EXPECT_EQ(BB->size(), 4U); 387 388 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 389 EXPECT_NE(GTID, nullptr); 390 EXPECT_EQ(GTID->arg_size(), 1U); 391 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 392 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 393 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 394 395 CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode()); 396 EXPECT_NE(Cancel, nullptr); 397 EXPECT_EQ(Cancel->arg_size(), 3U); 398 EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); 399 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); 400 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); 401 EXPECT_EQ(Cancel->getNumUses(), 1U); 402 Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); 403 EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); 404 EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock()); 405 EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); 406 CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front()); 407 EXPECT_NE(GTID1, nullptr); 408 EXPECT_EQ(GTID1->arg_size(), 1U); 409 EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 410 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); 411 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); 412 CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode()); 413 EXPECT_NE(Barrier, nullptr); 414 EXPECT_EQ(Barrier->arg_size(), 2U); 415 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 416 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 417 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 418 EXPECT_EQ(Barrier->getNumUses(), 0U); 419 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 420 1U); 421 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); 422 423 EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID); 424 425 OMPBuilder.popFinalizationCB(); 426 427 Builder.CreateUnreachable(); 428 EXPECT_FALSE(verifyModule(*M, &errs())); 429 } 430 431 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { 432 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 433 OpenMPIRBuilder OMPBuilder(*M); 434 OMPBuilder.initialize(); 435 436 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 437 new UnreachableInst(Ctx, CBB); 438 auto FiniCB = [&](InsertPointTy IP) { 439 ASSERT_NE(IP.getBlock(), nullptr); 440 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 441 BranchInst::Create(CBB, IP.getBlock()); 442 }; 443 OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); 444 445 IRBuilder<> Builder(BB); 446 447 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 448 auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel); 449 Builder.restoreIP(NewIP); 450 EXPECT_FALSE(M->global_empty()); 451 EXPECT_EQ(M->size(), 4U); 452 EXPECT_EQ(F->size(), 7U); 453 EXPECT_EQ(BB->size(), 1U); 454 ASSERT_TRUE(isa<BranchInst>(BB->getTerminator())); 455 ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U); 456 BB = BB->getTerminator()->getSuccessor(0); 457 EXPECT_EQ(BB->size(), 4U); 458 459 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 460 EXPECT_NE(GTID, nullptr); 461 EXPECT_EQ(GTID->arg_size(), 1U); 462 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 463 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 464 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 465 466 CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode()); 467 EXPECT_NE(Cancel, nullptr); 468 EXPECT_EQ(Cancel->arg_size(), 3U); 469 EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); 470 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); 471 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); 472 EXPECT_EQ(Cancel->getNumUses(), 1U); 473 Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); 474 EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); 475 EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); 476 EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), 477 NewIP.getBlock()); 478 EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); 479 CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front()); 480 EXPECT_NE(GTID1, nullptr); 481 EXPECT_EQ(GTID1->arg_size(), 1U); 482 EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 483 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); 484 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); 485 CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode()); 486 EXPECT_NE(Barrier, nullptr); 487 EXPECT_EQ(Barrier->arg_size(), 2U); 488 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 489 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 490 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 491 EXPECT_EQ(Barrier->getNumUses(), 0U); 492 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 493 1U); 494 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); 495 496 EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID); 497 498 OMPBuilder.popFinalizationCB(); 499 500 Builder.CreateUnreachable(); 501 EXPECT_FALSE(verifyModule(*M, &errs())); 502 } 503 504 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { 505 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 506 OpenMPIRBuilder OMPBuilder(*M); 507 OMPBuilder.initialize(); 508 509 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 510 new UnreachableInst(Ctx, CBB); 511 auto FiniCB = [&](InsertPointTy IP) { 512 ASSERT_NE(IP.getBlock(), nullptr); 513 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 514 BranchInst::Create(CBB, IP.getBlock()); 515 }; 516 OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); 517 518 IRBuilder<> Builder(BB); 519 520 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 521 auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for); 522 Builder.restoreIP(NewIP); 523 EXPECT_FALSE(M->global_empty()); 524 EXPECT_EQ(M->size(), 3U); 525 EXPECT_EQ(F->size(), 4U); 526 EXPECT_EQ(BB->size(), 4U); 527 528 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 529 EXPECT_NE(GTID, nullptr); 530 EXPECT_EQ(GTID->arg_size(), 1U); 531 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 532 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 533 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 534 535 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 536 EXPECT_NE(Barrier, nullptr); 537 EXPECT_EQ(Barrier->arg_size(), 2U); 538 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 539 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 540 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 541 EXPECT_EQ(Barrier->getNumUses(), 1U); 542 Instruction *BarrierBBTI = Barrier->getParent()->getTerminator(); 543 EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U); 544 EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock()); 545 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); 546 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 547 1U); 548 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), 549 CBB); 550 551 EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); 552 553 OMPBuilder.popFinalizationCB(); 554 555 Builder.CreateUnreachable(); 556 EXPECT_FALSE(verifyModule(*M, &errs())); 557 } 558 559 TEST_F(OpenMPIRBuilderTest, DbgLoc) { 560 OpenMPIRBuilder OMPBuilder(*M); 561 OMPBuilder.initialize(); 562 F->setName("func"); 563 564 IRBuilder<> Builder(BB); 565 566 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 567 OMPBuilder.createBarrier(Loc, OMPD_for); 568 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 569 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 570 EXPECT_EQ(GTID->getDebugLoc(), DL); 571 EXPECT_EQ(Barrier->getDebugLoc(), DL); 572 EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0))); 573 if (!isa<GlobalVariable>(Barrier->getOperand(0))) 574 return; 575 GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0)); 576 EXPECT_TRUE(Ident->hasInitializer()); 577 if (!Ident->hasInitializer()) 578 return; 579 Constant *Initializer = Ident->getInitializer(); 580 EXPECT_TRUE( 581 isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts())); 582 GlobalVariable *SrcStrGlob = 583 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 584 if (!SrcStrGlob) 585 return; 586 EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer())); 587 ConstantDataArray *SrcSrc = 588 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 589 if (!SrcSrc) 590 return; 591 EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;"); 592 } 593 594 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { 595 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 596 std::string oldDLStr = M->getDataLayoutStr(); 597 M->setDataLayout( 598 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" 599 "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" 600 "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); 601 OpenMPIRBuilder OMPBuilder(*M); 602 OMPBuilder.Config.IsTargetDevice = true; 603 OMPBuilder.initialize(); 604 F->setName("func"); 605 IRBuilder<> Builder(BB); 606 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 607 Builder.CreateBr(EnterBB); 608 Builder.SetInsertPoint(EnterBB); 609 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 610 611 AllocaInst *PrivAI = nullptr; 612 613 unsigned NumBodiesGenerated = 0; 614 unsigned NumPrivatizedVars = 0; 615 unsigned NumFinalizationPoints = 0; 616 617 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 618 ++NumBodiesGenerated; 619 620 Builder.restoreIP(AllocaIP); 621 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 622 Builder.CreateStore(F->arg_begin(), PrivAI); 623 624 Builder.restoreIP(CodeGenIP); 625 Value *PrivLoad = 626 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 627 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 628 Instruction *ThenTerm, *ElseTerm; 629 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 630 &ThenTerm, &ElseTerm); 631 }; 632 633 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 634 Value &Orig, Value &Inner, 635 Value *&ReplacementValue) -> InsertPointTy { 636 ++NumPrivatizedVars; 637 638 if (!isa<AllocaInst>(Orig)) { 639 EXPECT_EQ(&Orig, F->arg_begin()); 640 ReplacementValue = &Inner; 641 return CodeGenIP; 642 } 643 644 // Since the original value is an allocation, it has a pointer type and 645 // therefore no additional wrapping should happen. 646 EXPECT_EQ(&Orig, &Inner); 647 648 // Trivial copy (=firstprivate). 649 Builder.restoreIP(AllocaIP); 650 Type *VTy = ReplacementValue->getType(); 651 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 652 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 653 Builder.restoreIP(CodeGenIP); 654 Builder.CreateStore(V, ReplacementValue); 655 return CodeGenIP; 656 }; 657 658 auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; 659 660 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 661 F->getEntryBlock().getFirstInsertionPt()); 662 IRBuilder<>::InsertPoint AfterIP = 663 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 664 nullptr, nullptr, OMP_PROC_BIND_default, false); 665 666 EXPECT_EQ(NumBodiesGenerated, 1U); 667 EXPECT_EQ(NumPrivatizedVars, 1U); 668 EXPECT_EQ(NumFinalizationPoints, 1U); 669 670 Builder.restoreIP(AfterIP); 671 Builder.CreateRetVoid(); 672 673 OMPBuilder.finalize(); 674 Function *OutlinedFn = PrivAI->getFunction(); 675 EXPECT_FALSE(verifyModule(*M, &errs())); 676 EXPECT_NE(OutlinedFn, F); 677 EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); 678 EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); 679 EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); 680 681 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 682 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 683 // Make sure that arguments are pointers in 0 address address space 684 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), 685 PointerType::get(M->getContext(), 0)); 686 EXPECT_EQ(OutlinedFn->getArg(1)->getType(), 687 PointerType::get(M->getContext(), 0)); 688 EXPECT_EQ(OutlinedFn->getArg(2)->getType(), 689 PointerType::get(M->getContext(), 0)); 690 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 691 EXPECT_EQ(OutlinedFn->getNumUses(), 1U); 692 User *Usr = OutlinedFn->user_back(); 693 ASSERT_TRUE(isa<CallInst>(Usr)); 694 CallInst *Parallel51CI = dyn_cast<CallInst>(Usr); 695 ASSERT_NE(Parallel51CI, nullptr); 696 697 EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51"); 698 EXPECT_EQ(Parallel51CI->arg_size(), 9U); 699 EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn); 700 EXPECT_TRUE( 701 isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts())); 702 EXPECT_EQ(Parallel51CI, Usr); 703 M->setDataLayout(oldDLStr); 704 } 705 706 TEST_F(OpenMPIRBuilderTest, ParallelSimple) { 707 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 708 OpenMPIRBuilder OMPBuilder(*M); 709 OMPBuilder.Config.IsTargetDevice = false; 710 OMPBuilder.initialize(); 711 F->setName("func"); 712 IRBuilder<> Builder(BB); 713 714 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 715 Builder.CreateBr(EnterBB); 716 Builder.SetInsertPoint(EnterBB); 717 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 718 719 AllocaInst *PrivAI = nullptr; 720 721 unsigned NumBodiesGenerated = 0; 722 unsigned NumPrivatizedVars = 0; 723 unsigned NumFinalizationPoints = 0; 724 725 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 726 ++NumBodiesGenerated; 727 728 Builder.restoreIP(AllocaIP); 729 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 730 Builder.CreateStore(F->arg_begin(), PrivAI); 731 732 Builder.restoreIP(CodeGenIP); 733 Value *PrivLoad = 734 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 735 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 736 Instruction *ThenTerm, *ElseTerm; 737 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 738 &ThenTerm, &ElseTerm); 739 }; 740 741 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 742 Value &Orig, Value &Inner, 743 Value *&ReplacementValue) -> InsertPointTy { 744 ++NumPrivatizedVars; 745 746 if (!isa<AllocaInst>(Orig)) { 747 EXPECT_EQ(&Orig, F->arg_begin()); 748 ReplacementValue = &Inner; 749 return CodeGenIP; 750 } 751 752 // Since the original value is an allocation, it has a pointer type and 753 // therefore no additional wrapping should happen. 754 EXPECT_EQ(&Orig, &Inner); 755 756 // Trivial copy (=firstprivate). 757 Builder.restoreIP(AllocaIP); 758 Type *VTy = ReplacementValue->getType(); 759 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 760 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 761 Builder.restoreIP(CodeGenIP); 762 Builder.CreateStore(V, ReplacementValue); 763 return CodeGenIP; 764 }; 765 766 auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; 767 768 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 769 F->getEntryBlock().getFirstInsertionPt()); 770 IRBuilder<>::InsertPoint AfterIP = 771 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 772 nullptr, nullptr, OMP_PROC_BIND_default, false); 773 EXPECT_EQ(NumBodiesGenerated, 1U); 774 EXPECT_EQ(NumPrivatizedVars, 1U); 775 EXPECT_EQ(NumFinalizationPoints, 1U); 776 777 Builder.restoreIP(AfterIP); 778 Builder.CreateRetVoid(); 779 780 OMPBuilder.finalize(); 781 782 EXPECT_NE(PrivAI, nullptr); 783 Function *OutlinedFn = PrivAI->getFunction(); 784 EXPECT_NE(F, OutlinedFn); 785 EXPECT_FALSE(verifyModule(*M, &errs())); 786 EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); 787 EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); 788 EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); 789 790 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 791 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 792 793 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 794 EXPECT_EQ(OutlinedFn->getNumUses(), 1U); 795 User *Usr = OutlinedFn->user_back(); 796 ASSERT_TRUE(isa<CallInst>(Usr)); 797 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 798 ASSERT_NE(ForkCI, nullptr); 799 800 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 801 EXPECT_EQ(ForkCI->arg_size(), 4U); 802 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 803 EXPECT_EQ(ForkCI->getArgOperand(1), 804 ConstantInt::get(Type::getInt32Ty(Ctx), 1U)); 805 EXPECT_EQ(ForkCI, Usr); 806 Value *StoredValue = 807 findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0); 808 EXPECT_EQ(StoredValue, F->arg_begin()); 809 } 810 811 TEST_F(OpenMPIRBuilderTest, ParallelNested) { 812 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 813 OpenMPIRBuilder OMPBuilder(*M); 814 OMPBuilder.Config.IsTargetDevice = false; 815 OMPBuilder.initialize(); 816 F->setName("func"); 817 IRBuilder<> Builder(BB); 818 819 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 820 Builder.CreateBr(EnterBB); 821 Builder.SetInsertPoint(EnterBB); 822 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 823 824 unsigned NumInnerBodiesGenerated = 0; 825 unsigned NumOuterBodiesGenerated = 0; 826 unsigned NumFinalizationPoints = 0; 827 828 auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 829 ++NumInnerBodiesGenerated; 830 }; 831 832 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 833 Value &Orig, Value &Inner, 834 Value *&ReplacementValue) -> InsertPointTy { 835 // Trivial copy (=firstprivate). 836 Builder.restoreIP(AllocaIP); 837 Type *VTy = ReplacementValue->getType(); 838 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 839 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 840 Builder.restoreIP(CodeGenIP); 841 Builder.CreateStore(V, ReplacementValue); 842 return CodeGenIP; 843 }; 844 845 auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; 846 847 auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 848 ++NumOuterBodiesGenerated; 849 Builder.restoreIP(CodeGenIP); 850 BasicBlock *CGBB = CodeGenIP.getBlock(); 851 BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); 852 CGBB->getTerminator()->eraseFromParent(); 853 ; 854 855 IRBuilder<>::InsertPoint AfterIP = OMPBuilder.createParallel( 856 InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB, 857 FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); 858 859 Builder.restoreIP(AfterIP); 860 Builder.CreateBr(NewBB); 861 }; 862 863 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 864 F->getEntryBlock().getFirstInsertionPt()); 865 IRBuilder<>::InsertPoint AfterIP = 866 OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB, 867 nullptr, nullptr, OMP_PROC_BIND_default, false); 868 869 EXPECT_EQ(NumInnerBodiesGenerated, 1U); 870 EXPECT_EQ(NumOuterBodiesGenerated, 1U); 871 EXPECT_EQ(NumFinalizationPoints, 2U); 872 873 Builder.restoreIP(AfterIP); 874 Builder.CreateRetVoid(); 875 876 OMPBuilder.finalize(); 877 878 EXPECT_EQ(M->size(), 5U); 879 for (Function &OutlinedFn : *M) { 880 if (F == &OutlinedFn || OutlinedFn.isDeclaration()) 881 continue; 882 EXPECT_FALSE(verifyModule(*M, &errs())); 883 EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); 884 EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); 885 EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); 886 887 EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); 888 EXPECT_EQ(OutlinedFn.arg_size(), 2U); 889 890 EXPECT_EQ(OutlinedFn.getNumUses(), 1U); 891 User *Usr = OutlinedFn.user_back(); 892 ASSERT_TRUE(isa<CallInst>(Usr)); 893 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 894 ASSERT_NE(ForkCI, nullptr); 895 896 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 897 EXPECT_EQ(ForkCI->arg_size(), 3U); 898 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 899 EXPECT_EQ(ForkCI->getArgOperand(1), 900 ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); 901 EXPECT_EQ(ForkCI, Usr); 902 } 903 } 904 905 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { 906 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 907 OpenMPIRBuilder OMPBuilder(*M); 908 OMPBuilder.Config.IsTargetDevice = false; 909 OMPBuilder.initialize(); 910 F->setName("func"); 911 IRBuilder<> Builder(BB); 912 913 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 914 Builder.CreateBr(EnterBB); 915 Builder.SetInsertPoint(EnterBB); 916 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 917 918 unsigned NumInnerBodiesGenerated = 0; 919 unsigned NumOuterBodiesGenerated = 0; 920 unsigned NumFinalizationPoints = 0; 921 922 auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 923 ++NumInnerBodiesGenerated; 924 }; 925 926 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 927 Value &Orig, Value &Inner, 928 Value *&ReplacementValue) -> InsertPointTy { 929 // Trivial copy (=firstprivate). 930 Builder.restoreIP(AllocaIP); 931 Type *VTy = ReplacementValue->getType(); 932 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 933 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 934 Builder.restoreIP(CodeGenIP); 935 Builder.CreateStore(V, ReplacementValue); 936 return CodeGenIP; 937 }; 938 939 auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; 940 941 auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 942 ++NumOuterBodiesGenerated; 943 Builder.restoreIP(CodeGenIP); 944 BasicBlock *CGBB = CodeGenIP.getBlock(); 945 BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint()); 946 BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt()); 947 CGBB->getTerminator()->eraseFromParent(); 948 ; 949 NewBB1->getTerminator()->eraseFromParent(); 950 ; 951 952 IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.createParallel( 953 InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB, 954 FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); 955 956 Builder.restoreIP(AfterIP1); 957 Builder.CreateBr(NewBB1); 958 959 IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.createParallel( 960 InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB, 961 FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); 962 963 Builder.restoreIP(AfterIP2); 964 Builder.CreateBr(NewBB2); 965 }; 966 967 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 968 F->getEntryBlock().getFirstInsertionPt()); 969 IRBuilder<>::InsertPoint AfterIP = 970 OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB, 971 nullptr, nullptr, OMP_PROC_BIND_default, false); 972 973 EXPECT_EQ(NumInnerBodiesGenerated, 2U); 974 EXPECT_EQ(NumOuterBodiesGenerated, 1U); 975 EXPECT_EQ(NumFinalizationPoints, 3U); 976 977 Builder.restoreIP(AfterIP); 978 Builder.CreateRetVoid(); 979 980 OMPBuilder.finalize(); 981 982 EXPECT_EQ(M->size(), 6U); 983 for (Function &OutlinedFn : *M) { 984 if (F == &OutlinedFn || OutlinedFn.isDeclaration()) 985 continue; 986 EXPECT_FALSE(verifyModule(*M, &errs())); 987 EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); 988 EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); 989 EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); 990 991 EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); 992 EXPECT_EQ(OutlinedFn.arg_size(), 2U); 993 994 unsigned NumAllocas = 0; 995 for (Instruction &I : instructions(OutlinedFn)) 996 NumAllocas += isa<AllocaInst>(I); 997 EXPECT_EQ(NumAllocas, 1U); 998 999 EXPECT_EQ(OutlinedFn.getNumUses(), 1U); 1000 User *Usr = OutlinedFn.user_back(); 1001 ASSERT_TRUE(isa<CallInst>(Usr)); 1002 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 1003 ASSERT_NE(ForkCI, nullptr); 1004 1005 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 1006 EXPECT_EQ(ForkCI->arg_size(), 3U); 1007 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 1008 EXPECT_EQ(ForkCI->getArgOperand(1), 1009 ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); 1010 EXPECT_EQ(ForkCI, Usr); 1011 } 1012 } 1013 1014 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { 1015 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1016 OpenMPIRBuilder OMPBuilder(*M); 1017 OMPBuilder.Config.IsTargetDevice = false; 1018 OMPBuilder.initialize(); 1019 F->setName("func"); 1020 IRBuilder<> Builder(BB); 1021 1022 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 1023 Builder.CreateBr(EnterBB); 1024 Builder.SetInsertPoint(EnterBB); 1025 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1026 1027 AllocaInst *PrivAI = nullptr; 1028 1029 unsigned NumBodiesGenerated = 0; 1030 unsigned NumPrivatizedVars = 0; 1031 unsigned NumFinalizationPoints = 0; 1032 1033 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1034 ++NumBodiesGenerated; 1035 1036 Builder.restoreIP(AllocaIP); 1037 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 1038 Builder.CreateStore(F->arg_begin(), PrivAI); 1039 1040 Builder.restoreIP(CodeGenIP); 1041 Value *PrivLoad = 1042 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 1043 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 1044 Instruction *ThenTerm, *ElseTerm; 1045 SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm, 1046 &ElseTerm); 1047 }; 1048 1049 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1050 Value &Orig, Value &Inner, 1051 Value *&ReplacementValue) -> InsertPointTy { 1052 ++NumPrivatizedVars; 1053 1054 if (!isa<AllocaInst>(Orig)) { 1055 EXPECT_EQ(&Orig, F->arg_begin()); 1056 ReplacementValue = &Inner; 1057 return CodeGenIP; 1058 } 1059 1060 // Since the original value is an allocation, it has a pointer type and 1061 // therefore no additional wrapping should happen. 1062 EXPECT_EQ(&Orig, &Inner); 1063 1064 // Trivial copy (=firstprivate). 1065 Builder.restoreIP(AllocaIP); 1066 Type *VTy = ReplacementValue->getType(); 1067 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 1068 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 1069 Builder.restoreIP(CodeGenIP); 1070 Builder.CreateStore(V, ReplacementValue); 1071 return CodeGenIP; 1072 }; 1073 1074 auto FiniCB = [&](InsertPointTy CodeGenIP) { 1075 ++NumFinalizationPoints; 1076 // No destructors. 1077 }; 1078 1079 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1080 F->getEntryBlock().getFirstInsertionPt()); 1081 IRBuilder<>::InsertPoint AfterIP = 1082 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1083 Builder.CreateIsNotNull(F->arg_begin()), 1084 nullptr, OMP_PROC_BIND_default, false); 1085 1086 EXPECT_EQ(NumBodiesGenerated, 1U); 1087 EXPECT_EQ(NumPrivatizedVars, 1U); 1088 EXPECT_EQ(NumFinalizationPoints, 1U); 1089 1090 Builder.restoreIP(AfterIP); 1091 Builder.CreateRetVoid(); 1092 OMPBuilder.finalize(); 1093 1094 EXPECT_NE(PrivAI, nullptr); 1095 Function *OutlinedFn = PrivAI->getFunction(); 1096 EXPECT_NE(F, OutlinedFn); 1097 EXPECT_FALSE(verifyModule(*M, &errs())); 1098 1099 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 1100 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 1101 1102 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 1103 ASSERT_EQ(OutlinedFn->getNumUses(), 1U); 1104 1105 CallInst *ForkCI = nullptr; 1106 for (User *Usr : OutlinedFn->users()) { 1107 ASSERT_TRUE(isa<CallInst>(Usr)); 1108 ForkCI = cast<CallInst>(Usr); 1109 } 1110 1111 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if"); 1112 EXPECT_EQ(ForkCI->arg_size(), 5U); 1113 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 1114 EXPECT_EQ(ForkCI->getArgOperand(1), 1115 ConstantInt::get(Type::getInt32Ty(Ctx), 1)); 1116 EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx)); 1117 } 1118 1119 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { 1120 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1121 OpenMPIRBuilder OMPBuilder(*M); 1122 OMPBuilder.Config.IsTargetDevice = false; 1123 OMPBuilder.initialize(); 1124 F->setName("func"); 1125 IRBuilder<> Builder(BB); 1126 1127 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 1128 Builder.CreateBr(EnterBB); 1129 Builder.SetInsertPoint(EnterBB); 1130 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1131 1132 unsigned NumBodiesGenerated = 0; 1133 unsigned NumPrivatizedVars = 0; 1134 unsigned NumFinalizationPoints = 0; 1135 1136 CallInst *CheckedBarrier = nullptr; 1137 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1138 ++NumBodiesGenerated; 1139 1140 Builder.restoreIP(CodeGenIP); 1141 1142 // Create three barriers, two cancel barriers but only one checked. 1143 Function *CBFn, *BFn; 1144 1145 Builder.restoreIP( 1146 OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel)); 1147 1148 CBFn = M->getFunction("__kmpc_cancel_barrier"); 1149 BFn = M->getFunction("__kmpc_barrier"); 1150 ASSERT_NE(CBFn, nullptr); 1151 ASSERT_EQ(BFn, nullptr); 1152 ASSERT_EQ(CBFn->getNumUses(), 1U); 1153 ASSERT_TRUE(isa<CallInst>(CBFn->user_back())); 1154 ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U); 1155 CheckedBarrier = cast<CallInst>(CBFn->user_back()); 1156 1157 Builder.restoreIP( 1158 OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true)); 1159 CBFn = M->getFunction("__kmpc_cancel_barrier"); 1160 BFn = M->getFunction("__kmpc_barrier"); 1161 ASSERT_NE(CBFn, nullptr); 1162 ASSERT_NE(BFn, nullptr); 1163 ASSERT_EQ(CBFn->getNumUses(), 1U); 1164 ASSERT_EQ(BFn->getNumUses(), 1U); 1165 ASSERT_TRUE(isa<CallInst>(BFn->user_back())); 1166 ASSERT_EQ(BFn->user_back()->getNumUses(), 0U); 1167 1168 Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, 1169 false, false)); 1170 ASSERT_EQ(CBFn->getNumUses(), 2U); 1171 ASSERT_EQ(BFn->getNumUses(), 1U); 1172 ASSERT_TRUE(CBFn->user_back() != CheckedBarrier); 1173 ASSERT_TRUE(isa<CallInst>(CBFn->user_back())); 1174 ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U); 1175 }; 1176 1177 auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &, 1178 Value *&) -> InsertPointTy { 1179 ++NumPrivatizedVars; 1180 llvm_unreachable("No privatization callback call expected!"); 1181 }; 1182 1183 FunctionType *FakeDestructorTy = 1184 FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, 1185 /*isVarArg=*/false); 1186 auto *FakeDestructor = Function::Create( 1187 FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get()); 1188 1189 auto FiniCB = [&](InsertPointTy IP) { 1190 ++NumFinalizationPoints; 1191 Builder.restoreIP(IP); 1192 Builder.CreateCall(FakeDestructor, 1193 {Builder.getInt32(NumFinalizationPoints)}); 1194 }; 1195 1196 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1197 F->getEntryBlock().getFirstInsertionPt()); 1198 IRBuilder<>::InsertPoint AfterIP = 1199 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1200 Builder.CreateIsNotNull(F->arg_begin()), 1201 nullptr, OMP_PROC_BIND_default, true); 1202 1203 EXPECT_EQ(NumBodiesGenerated, 1U); 1204 EXPECT_EQ(NumPrivatizedVars, 0U); 1205 EXPECT_EQ(NumFinalizationPoints, 2U); 1206 EXPECT_EQ(FakeDestructor->getNumUses(), 2U); 1207 1208 Builder.restoreIP(AfterIP); 1209 Builder.CreateRetVoid(); 1210 OMPBuilder.finalize(); 1211 1212 EXPECT_FALSE(verifyModule(*M, &errs())); 1213 1214 BasicBlock *ExitBB = nullptr; 1215 for (const User *Usr : FakeDestructor->users()) { 1216 const CallInst *CI = dyn_cast<CallInst>(Usr); 1217 ASSERT_EQ(CI->getCalledFunction(), FakeDestructor); 1218 ASSERT_TRUE(isa<BranchInst>(CI->getNextNode())); 1219 ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); 1220 if (ExitBB) 1221 ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB); 1222 else 1223 ExitBB = CI->getNextNode()->getSuccessor(0); 1224 ASSERT_EQ(ExitBB->size(), 1U); 1225 if (!isa<ReturnInst>(ExitBB->front())) { 1226 ASSERT_TRUE(isa<BranchInst>(ExitBB->front())); 1227 ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U); 1228 ASSERT_TRUE(isa<ReturnInst>( 1229 cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front())); 1230 } 1231 } 1232 } 1233 1234 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { 1235 OpenMPIRBuilder OMPBuilder(*M); 1236 OMPBuilder.Config.IsTargetDevice = false; 1237 OMPBuilder.initialize(); 1238 F->setName("func"); 1239 IRBuilder<> Builder(BB); 1240 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1241 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1242 1243 Type *I32Ty = Type::getInt32Ty(M->getContext()); 1244 Type *PtrTy = PointerType::get(M->getContext(), 0); 1245 Type *StructTy = StructType::get(I32Ty, PtrTy); 1246 Type *VoidTy = Type::getVoidTy(M->getContext()); 1247 FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty); 1248 FunctionCallee TakeI32Func = 1249 M->getOrInsertFunction("take_i32", VoidTy, I32Ty); 1250 FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy); 1251 FunctionCallee TakeI32PtrFunc = 1252 M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy); 1253 FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy); 1254 FunctionCallee TakeStructFunc = 1255 M->getOrInsertFunction("take_struct", VoidTy, StructTy); 1256 FunctionCallee RetStructPtrFunc = 1257 M->getOrInsertFunction("ret_structptr", PtrTy); 1258 FunctionCallee TakeStructPtrFunc = 1259 M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy); 1260 Value *I32Val = Builder.CreateCall(RetI32Func); 1261 Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc); 1262 Value *StructVal = Builder.CreateCall(RetStructFunc); 1263 Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); 1264 1265 Instruction *Internal; 1266 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1267 IRBuilder<>::InsertPointGuard Guard(Builder); 1268 Builder.restoreIP(CodeGenIP); 1269 Internal = Builder.CreateCall(TakeI32Func, I32Val); 1270 Builder.CreateCall(TakeI32PtrFunc, I32PtrVal); 1271 Builder.CreateCall(TakeStructFunc, StructVal); 1272 Builder.CreateCall(TakeStructPtrFunc, StructPtrVal); 1273 }; 1274 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, 1275 Value &Inner, Value *&ReplacementValue) { 1276 ReplacementValue = &Inner; 1277 return CodeGenIP; 1278 }; 1279 auto FiniCB = [](InsertPointTy) {}; 1280 1281 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1282 F->getEntryBlock().getFirstInsertionPt()); 1283 IRBuilder<>::InsertPoint AfterIP = 1284 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1285 nullptr, nullptr, OMP_PROC_BIND_default, false); 1286 Builder.restoreIP(AfterIP); 1287 Builder.CreateRetVoid(); 1288 1289 OMPBuilder.finalize(); 1290 1291 EXPECT_FALSE(verifyModule(*M, &errs())); 1292 Function *OutlinedFn = Internal->getFunction(); 1293 1294 Type *Arg2Type = OutlinedFn->getArg(2)->getType(); 1295 EXPECT_TRUE(Arg2Type->isPointerTy()); 1296 } 1297 1298 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) { 1299 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1300 OpenMPIRBuilder OMPBuilder(*M); 1301 OMPBuilder.initialize(); 1302 IRBuilder<> Builder(BB); 1303 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1304 Value *TripCount = F->getArg(0); 1305 1306 unsigned NumBodiesGenerated = 0; 1307 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { 1308 NumBodiesGenerated += 1; 1309 1310 Builder.restoreIP(CodeGenIP); 1311 1312 Value *Cmp = Builder.CreateICmpEQ(LC, TripCount); 1313 Instruction *ThenTerm, *ElseTerm; 1314 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 1315 &ThenTerm, &ElseTerm); 1316 }; 1317 1318 CanonicalLoopInfo *Loop = 1319 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount); 1320 1321 Builder.restoreIP(Loop->getAfterIP()); 1322 ReturnInst *RetInst = Builder.CreateRetVoid(); 1323 OMPBuilder.finalize(); 1324 1325 Loop->assertOK(); 1326 EXPECT_FALSE(verifyModule(*M, &errs())); 1327 1328 EXPECT_EQ(NumBodiesGenerated, 1U); 1329 1330 // Verify control flow structure (in addition to Loop->assertOK()). 1331 EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock()); 1332 EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock()); 1333 1334 Instruction *IndVar = Loop->getIndVar(); 1335 EXPECT_TRUE(isa<PHINode>(IndVar)); 1336 EXPECT_EQ(IndVar->getType(), TripCount->getType()); 1337 EXPECT_EQ(IndVar->getParent(), Loop->getHeader()); 1338 1339 EXPECT_EQ(Loop->getTripCount(), TripCount); 1340 1341 BasicBlock *Body = Loop->getBody(); 1342 Instruction *CmpInst = &Body->front(); 1343 EXPECT_TRUE(isa<ICmpInst>(CmpInst)); 1344 EXPECT_EQ(CmpInst->getOperand(0), IndVar); 1345 1346 BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor(); 1347 EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) { 1348 return SuccBB->getSingleSuccessor() == LatchPred; 1349 })); 1350 1351 EXPECT_EQ(&Loop->getAfter()->front(), RetInst); 1352 } 1353 1354 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) { 1355 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1356 OpenMPIRBuilder OMPBuilder(*M); 1357 OMPBuilder.initialize(); 1358 IRBuilder<> Builder(BB); 1359 1360 // Check the trip count is computed correctly. We generate the canonical loop 1361 // but rely on the IRBuilder's constant folder to compute the final result 1362 // since all inputs are constant. To verify overflow situations, limit the 1363 // trip count / loop counter widths to 16 bits. 1364 auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step, 1365 bool IsSigned, bool InclusiveStop) -> int64_t { 1366 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1367 Type *LCTy = Type::getInt16Ty(Ctx); 1368 Value *StartVal = ConstantInt::get(LCTy, Start); 1369 Value *StopVal = ConstantInt::get(LCTy, Stop); 1370 Value *StepVal = ConstantInt::get(LCTy, Step); 1371 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {}; 1372 CanonicalLoopInfo *Loop = 1373 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, 1374 StepVal, IsSigned, InclusiveStop); 1375 Loop->assertOK(); 1376 Builder.restoreIP(Loop->getAfterIP()); 1377 Value *TripCount = Loop->getTripCount(); 1378 return cast<ConstantInt>(TripCount)->getValue().getZExtValue(); 1379 }; 1380 1381 EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0); 1382 EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1); 1383 EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42); 1384 EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21); 1385 EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21); 1386 EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1); 1387 EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2); 1388 EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3); 1389 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF); 1390 EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0); 1391 EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1); 1392 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100); 1393 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1); 1394 1395 EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2); 1396 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2); 1397 EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1); 1398 EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1); 1399 EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF); 1400 EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000); 1401 1402 EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0); 1403 EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0); 1404 EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3); 1405 EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4); 1406 EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1); 1407 EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1); 1408 EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2); 1409 1410 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000); 1411 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001); 1412 EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF); 1413 EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF); 1414 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2); 1415 EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF); 1416 EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000); 1417 EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800); 1418 EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF); 1419 EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1); 1420 EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2); 1421 1422 // Finalize the function and verify it. 1423 Builder.CreateRetVoid(); 1424 OMPBuilder.finalize(); 1425 EXPECT_FALSE(verifyModule(*M, &errs())); 1426 } 1427 1428 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) { 1429 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1430 OpenMPIRBuilder OMPBuilder(*M); 1431 OMPBuilder.initialize(); 1432 F->setName("func"); 1433 1434 IRBuilder<> Builder(BB); 1435 1436 Type *LCTy = F->getArg(0)->getType(); 1437 Constant *One = ConstantInt::get(LCTy, 1); 1438 Constant *Two = ConstantInt::get(LCTy, 2); 1439 Value *OuterTripCount = 1440 Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer"); 1441 Value *InnerTripCount = 1442 Builder.CreateAdd(F->getArg(0), One, "tripcount.inner"); 1443 1444 // Fix an insertion point for ComputeIP. 1445 BasicBlock *LoopNextEnter = 1446 BasicBlock::Create(M->getContext(), "loopnest.enter", F, 1447 Builder.GetInsertBlock()->getNextNode()); 1448 BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter); 1449 InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()}; 1450 1451 Builder.SetInsertPoint(LoopNextEnter); 1452 OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL); 1453 1454 CanonicalLoopInfo *InnerLoop = nullptr; 1455 CallInst *InbetweenLead = nullptr; 1456 CallInst *InbetweenTrail = nullptr; 1457 CallInst *Call = nullptr; 1458 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) { 1459 Builder.restoreIP(OuterCodeGenIP); 1460 InbetweenLead = 1461 createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC}); 1462 1463 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1464 Value *InnerLC) { 1465 Builder.restoreIP(InnerCodeGenIP); 1466 Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC}); 1467 }; 1468 InnerLoop = OMPBuilder.createCanonicalLoop( 1469 Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner"); 1470 1471 Builder.restoreIP(InnerLoop->getAfterIP()); 1472 InbetweenTrail = 1473 createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC}); 1474 }; 1475 CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( 1476 OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer"); 1477 1478 // Finish the function. 1479 Builder.restoreIP(OuterLoop->getAfterIP()); 1480 Builder.CreateRetVoid(); 1481 1482 CanonicalLoopInfo *Collapsed = 1483 OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP); 1484 1485 OMPBuilder.finalize(); 1486 EXPECT_FALSE(verifyModule(*M, &errs())); 1487 1488 // Verify control flow and BB order. 1489 BasicBlock *RefOrder[] = { 1490 Collapsed->getPreheader(), Collapsed->getHeader(), 1491 Collapsed->getCond(), Collapsed->getBody(), 1492 InbetweenLead->getParent(), Call->getParent(), 1493 InbetweenTrail->getParent(), Collapsed->getLatch(), 1494 Collapsed->getExit(), Collapsed->getAfter(), 1495 }; 1496 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1497 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1498 1499 // Verify the total trip count. 1500 auto *TripCount = cast<MulOperator>(Collapsed->getTripCount()); 1501 EXPECT_EQ(TripCount->getOperand(0), OuterTripCount); 1502 EXPECT_EQ(TripCount->getOperand(1), InnerTripCount); 1503 1504 // Verify the changed indvar. 1505 auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1)); 1506 EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv); 1507 EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody()); 1508 EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount); 1509 EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar()); 1510 1511 auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2)); 1512 EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem); 1513 EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody()); 1514 EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar()); 1515 EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount); 1516 1517 EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV); 1518 EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV); 1519 } 1520 1521 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) { 1522 OpenMPIRBuilder OMPBuilder(*M); 1523 CallInst *Call; 1524 BasicBlock *BodyCode; 1525 CanonicalLoopInfo *Loop = 1526 buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode); 1527 1528 Instruction *OrigIndVar = Loop->getIndVar(); 1529 EXPECT_EQ(Call->getOperand(1), OrigIndVar); 1530 1531 // Tile the loop. 1532 Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7)); 1533 std::vector<CanonicalLoopInfo *> GenLoops = 1534 OMPBuilder.tileLoops(DL, {Loop}, {TileSize}); 1535 1536 OMPBuilder.finalize(); 1537 EXPECT_FALSE(verifyModule(*M, &errs())); 1538 1539 EXPECT_EQ(GenLoops.size(), 2u); 1540 CanonicalLoopInfo *Floor = GenLoops[0]; 1541 CanonicalLoopInfo *Tile = GenLoops[1]; 1542 1543 BasicBlock *RefOrder[] = { 1544 Floor->getPreheader(), Floor->getHeader(), Floor->getCond(), 1545 Floor->getBody(), Tile->getPreheader(), Tile->getHeader(), 1546 Tile->getCond(), Tile->getBody(), BodyCode, 1547 Tile->getLatch(), Tile->getExit(), Tile->getAfter(), 1548 Floor->getLatch(), Floor->getExit(), Floor->getAfter(), 1549 }; 1550 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1551 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1552 1553 // Check the induction variable. 1554 EXPECT_EQ(Call->getParent(), BodyCode); 1555 auto *Shift = cast<AddOperator>(Call->getOperand(1)); 1556 EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody()); 1557 EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar()); 1558 auto *Scale = cast<MulOperator>(Shift->getOperand(0)); 1559 EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody()); 1560 EXPECT_EQ(Scale->getOperand(0), TileSize); 1561 EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar()); 1562 } 1563 1564 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) { 1565 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1566 OpenMPIRBuilder OMPBuilder(*M); 1567 OMPBuilder.initialize(); 1568 F->setName("func"); 1569 1570 IRBuilder<> Builder(BB); 1571 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1572 Value *TripCount = F->getArg(0); 1573 Type *LCTy = TripCount->getType(); 1574 1575 BasicBlock *BodyCode = nullptr; 1576 CanonicalLoopInfo *InnerLoop = nullptr; 1577 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, 1578 llvm::Value *OuterLC) { 1579 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1580 llvm::Value *InnerLC) { 1581 Builder.restoreIP(InnerCodeGenIP); 1582 BodyCode = Builder.GetInsertBlock(); 1583 1584 // Add something that consumes the induction variables to the body. 1585 createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); 1586 }; 1587 InnerLoop = OMPBuilder.createCanonicalLoop( 1588 OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner"); 1589 }; 1590 CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( 1591 Loc, OuterLoopBodyGenCB, TripCount, "outer"); 1592 1593 // Finalize the function. 1594 Builder.restoreIP(OuterLoop->getAfterIP()); 1595 Builder.CreateRetVoid(); 1596 1597 // Tile to loop nest. 1598 Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11)); 1599 Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7)); 1600 std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops( 1601 DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize}); 1602 1603 OMPBuilder.finalize(); 1604 EXPECT_FALSE(verifyModule(*M, &errs())); 1605 1606 EXPECT_EQ(GenLoops.size(), 4u); 1607 CanonicalLoopInfo *Floor1 = GenLoops[0]; 1608 CanonicalLoopInfo *Floor2 = GenLoops[1]; 1609 CanonicalLoopInfo *Tile1 = GenLoops[2]; 1610 CanonicalLoopInfo *Tile2 = GenLoops[3]; 1611 1612 BasicBlock *RefOrder[] = { 1613 Floor1->getPreheader(), 1614 Floor1->getHeader(), 1615 Floor1->getCond(), 1616 Floor1->getBody(), 1617 Floor2->getPreheader(), 1618 Floor2->getHeader(), 1619 Floor2->getCond(), 1620 Floor2->getBody(), 1621 Tile1->getPreheader(), 1622 Tile1->getHeader(), 1623 Tile1->getCond(), 1624 Tile1->getBody(), 1625 Tile2->getPreheader(), 1626 Tile2->getHeader(), 1627 Tile2->getCond(), 1628 Tile2->getBody(), 1629 BodyCode, 1630 Tile2->getLatch(), 1631 Tile2->getExit(), 1632 Tile2->getAfter(), 1633 Tile1->getLatch(), 1634 Tile1->getExit(), 1635 Tile1->getAfter(), 1636 Floor2->getLatch(), 1637 Floor2->getExit(), 1638 Floor2->getAfter(), 1639 Floor1->getLatch(), 1640 Floor1->getExit(), 1641 Floor1->getAfter(), 1642 }; 1643 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1644 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1645 } 1646 1647 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) { 1648 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1649 OpenMPIRBuilder OMPBuilder(*M); 1650 OMPBuilder.initialize(); 1651 F->setName("func"); 1652 1653 IRBuilder<> Builder(BB); 1654 Value *TripCount = F->getArg(0); 1655 Type *LCTy = TripCount->getType(); 1656 1657 Value *OuterStartVal = ConstantInt::get(LCTy, 2); 1658 Value *OuterStopVal = TripCount; 1659 Value *OuterStep = ConstantInt::get(LCTy, 5); 1660 Value *InnerStartVal = ConstantInt::get(LCTy, 13); 1661 Value *InnerStopVal = TripCount; 1662 Value *InnerStep = ConstantInt::get(LCTy, 3); 1663 1664 // Fix an insertion point for ComputeIP. 1665 BasicBlock *LoopNextEnter = 1666 BasicBlock::Create(M->getContext(), "loopnest.enter", F, 1667 Builder.GetInsertBlock()->getNextNode()); 1668 BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter); 1669 InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()}; 1670 1671 InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()}; 1672 OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL}); 1673 1674 BasicBlock *BodyCode = nullptr; 1675 CanonicalLoopInfo *InnerLoop = nullptr; 1676 CallInst *Call = nullptr; 1677 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, 1678 llvm::Value *OuterLC) { 1679 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1680 llvm::Value *InnerLC) { 1681 Builder.restoreIP(InnerCodeGenIP); 1682 BodyCode = Builder.GetInsertBlock(); 1683 1684 // Add something that consumes the induction variable to the body. 1685 Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); 1686 }; 1687 InnerLoop = OMPBuilder.createCanonicalLoop( 1688 OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal, 1689 InnerStep, false, false, ComputeIP, "inner"); 1690 }; 1691 CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( 1692 Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false, 1693 false, ComputeIP, "outer"); 1694 1695 // Finalize the function 1696 Builder.restoreIP(OuterLoop->getAfterIP()); 1697 Builder.CreateRetVoid(); 1698 1699 // Tile the loop nest. 1700 Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11)); 1701 Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7)); 1702 std::vector<CanonicalLoopInfo *> GenLoops = 1703 OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1}); 1704 1705 OMPBuilder.finalize(); 1706 EXPECT_FALSE(verifyModule(*M, &errs())); 1707 1708 EXPECT_EQ(GenLoops.size(), 4u); 1709 CanonicalLoopInfo *Floor0 = GenLoops[0]; 1710 CanonicalLoopInfo *Floor1 = GenLoops[1]; 1711 CanonicalLoopInfo *Tile0 = GenLoops[2]; 1712 CanonicalLoopInfo *Tile1 = GenLoops[3]; 1713 1714 BasicBlock *RefOrder[] = { 1715 Floor0->getPreheader(), 1716 Floor0->getHeader(), 1717 Floor0->getCond(), 1718 Floor0->getBody(), 1719 Floor1->getPreheader(), 1720 Floor1->getHeader(), 1721 Floor1->getCond(), 1722 Floor1->getBody(), 1723 Tile0->getPreheader(), 1724 Tile0->getHeader(), 1725 Tile0->getCond(), 1726 Tile0->getBody(), 1727 Tile1->getPreheader(), 1728 Tile1->getHeader(), 1729 Tile1->getCond(), 1730 Tile1->getBody(), 1731 BodyCode, 1732 Tile1->getLatch(), 1733 Tile1->getExit(), 1734 Tile1->getAfter(), 1735 Tile0->getLatch(), 1736 Tile0->getExit(), 1737 Tile0->getAfter(), 1738 Floor1->getLatch(), 1739 Floor1->getExit(), 1740 Floor1->getAfter(), 1741 Floor0->getLatch(), 1742 Floor0->getExit(), 1743 Floor0->getAfter(), 1744 }; 1745 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1746 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1747 1748 EXPECT_EQ(Call->getParent(), BodyCode); 1749 1750 auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1)); 1751 EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal); 1752 auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0)); 1753 EXPECT_EQ(RangeScale0->getOperand(1), OuterStep); 1754 auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0)); 1755 EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody()); 1756 EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar()); 1757 auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0)); 1758 EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody()); 1759 EXPECT_EQ(TileScale0->getOperand(0), TileSize0); 1760 EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar()); 1761 1762 auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2)); 1763 EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode); 1764 EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal); 1765 auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0)); 1766 EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode); 1767 EXPECT_EQ(RangeScale1->getOperand(1), InnerStep); 1768 auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0)); 1769 EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody()); 1770 EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar()); 1771 auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0)); 1772 EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody()); 1773 EXPECT_EQ(TileScale1->getOperand(0), TileSize1); 1774 EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar()); 1775 } 1776 1777 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { 1778 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1779 OpenMPIRBuilder OMPBuilder(*M); 1780 OMPBuilder.initialize(); 1781 IRBuilder<> Builder(BB); 1782 1783 // Create a loop, tile it, and extract its trip count. All input values are 1784 // constant and IRBuilder evaluates all-constant arithmetic inplace, such that 1785 // the floor trip count itself will be a ConstantInt. Unfortunately we cannot 1786 // do the same for the tile loop. 1787 auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step, 1788 bool IsSigned, bool InclusiveStop, 1789 int64_t TileSize) -> uint64_t { 1790 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 1791 Type *LCTy = Type::getInt16Ty(Ctx); 1792 Value *StartVal = ConstantInt::get(LCTy, Start); 1793 Value *StopVal = ConstantInt::get(LCTy, Stop); 1794 Value *StepVal = ConstantInt::get(LCTy, Step); 1795 1796 // Generate a loop. 1797 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {}; 1798 CanonicalLoopInfo *Loop = 1799 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, 1800 StepVal, IsSigned, InclusiveStop); 1801 InsertPointTy AfterIP = Loop->getAfterIP(); 1802 1803 // Tile the loop. 1804 Value *TileSizeVal = ConstantInt::get(LCTy, TileSize); 1805 std::vector<CanonicalLoopInfo *> GenLoops = 1806 OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal}); 1807 1808 // Set the insertion pointer to after loop, where the next loop will be 1809 // emitted. 1810 Builder.restoreIP(AfterIP); 1811 1812 // Extract the trip count. 1813 CanonicalLoopInfo *FloorLoop = GenLoops[0]; 1814 Value *FloorTripCount = FloorLoop->getTripCount(); 1815 return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue(); 1816 }; 1817 1818 // Empty iteration domain. 1819 EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u); 1820 EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u); 1821 EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u); 1822 EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u); 1823 EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u); 1824 1825 // Only complete tiles. 1826 EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u); 1827 EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u); 1828 EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u); 1829 EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u); 1830 EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u); 1831 EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u); 1832 1833 // Only a partial tile. 1834 EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u); 1835 EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u); 1836 EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u); 1837 EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u); 1838 EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u); 1839 1840 // Complete and partial tiles. 1841 EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u); 1842 EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u); 1843 EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u); 1844 EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u); 1845 EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u); 1846 1847 // Close to 16-bit integer range. 1848 EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu); 1849 EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1); 1850 EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1); 1851 EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1); 1852 EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1); 1853 EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u); 1854 EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u); 1855 1856 // Finalize the function. 1857 Builder.CreateRetVoid(); 1858 OMPBuilder.finalize(); 1859 1860 EXPECT_FALSE(verifyModule(*M, &errs())); 1861 } 1862 1863 TEST_F(OpenMPIRBuilderTest, ApplySimd) { 1864 OpenMPIRBuilder OMPBuilder(*M); 1865 MapVector<Value *, Value *> AlignedVars; 1866 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1867 1868 // Simd-ize the loop. 1869 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 1870 OrderKind::OMP_ORDER_unknown, 1871 /* Simdlen */ nullptr, 1872 /* Safelen */ nullptr); 1873 1874 OMPBuilder.finalize(); 1875 EXPECT_FALSE(verifyModule(*M, &errs())); 1876 1877 PassBuilder PB; 1878 FunctionAnalysisManager FAM; 1879 PB.registerFunctionAnalyses(FAM); 1880 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 1881 1882 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 1883 EXPECT_EQ(TopLvl.size(), 1u); 1884 1885 Loop *L = TopLvl.front(); 1886 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 1887 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 1888 1889 // Check for llvm.access.group metadata attached to the printf 1890 // function in the loop body. 1891 BasicBlock *LoopBody = CLI->getBody(); 1892 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 1893 return I.getMetadata("llvm.access.group") != nullptr; 1894 })); 1895 } 1896 1897 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) { 1898 OpenMPIRBuilder OMPBuilder(*M); 1899 IRBuilder<> Builder(BB); 1900 const int AlignmentValue = 32; 1901 AllocaInst *Alloc1 = 1902 Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1)); 1903 LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); 1904 MapVector<Value *, Value *> AlignedVars; 1905 AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)}); 1906 1907 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1908 1909 // Simd-ize the loop. 1910 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 1911 OrderKind::OMP_ORDER_unknown, 1912 /* Simdlen */ nullptr, 1913 /* Safelen */ nullptr); 1914 1915 OMPBuilder.finalize(); 1916 EXPECT_FALSE(verifyModule(*M, &errs())); 1917 1918 PassBuilder PB; 1919 FunctionAnalysisManager FAM; 1920 PB.registerFunctionAnalyses(FAM); 1921 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 1922 1923 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 1924 EXPECT_EQ(TopLvl.size(), 1u); 1925 1926 Loop *L = TopLvl.front(); 1927 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 1928 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 1929 1930 // Check for llvm.access.group metadata attached to the printf 1931 // function in the loop body. 1932 BasicBlock *LoopBody = CLI->getBody(); 1933 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 1934 return I.getMetadata("llvm.access.group") != nullptr; 1935 })); 1936 1937 // Check if number of assumption instructions is equal to number of aligned 1938 // variables 1939 BasicBlock *LoopPreheader = CLI->getPreheader(); 1940 size_t NumAssummptionCallsInPreheader = count_if( 1941 *LoopPreheader, [](Instruction &I) { return isa<AssumeInst>(I); }); 1942 EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); 1943 1944 // Check if variables are correctly aligned 1945 for (Instruction &Instr : *LoopPreheader) { 1946 if (!isa<AssumeInst>(Instr)) 1947 continue; 1948 AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr); 1949 if (AssumeInstruction->getNumTotalBundleOperands()) { 1950 auto Bundle = AssumeInstruction->getOperandBundleAt(0); 1951 if (Bundle.getTagName() == "align") { 1952 EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1])); 1953 auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]); 1954 EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue); 1955 } 1956 } 1957 } 1958 } 1959 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { 1960 OpenMPIRBuilder OMPBuilder(*M); 1961 MapVector<Value *, Value *> AlignedVars; 1962 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1963 1964 // Simd-ize the loop. 1965 OMPBuilder.applySimd(CLI, AlignedVars, 1966 /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, 1967 ConstantInt::get(Type::getInt32Ty(Ctx), 3), 1968 /* Safelen */ nullptr); 1969 1970 OMPBuilder.finalize(); 1971 EXPECT_FALSE(verifyModule(*M, &errs())); 1972 1973 PassBuilder PB; 1974 FunctionAnalysisManager FAM; 1975 PB.registerFunctionAnalyses(FAM); 1976 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 1977 1978 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 1979 EXPECT_EQ(TopLvl.size(), 1u); 1980 1981 Loop *L = TopLvl.front(); 1982 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 1983 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 1984 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 1985 1986 // Check for llvm.access.group metadata attached to the printf 1987 // function in the loop body. 1988 BasicBlock *LoopBody = CLI->getBody(); 1989 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 1990 return I.getMetadata("llvm.access.group") != nullptr; 1991 })); 1992 } 1993 1994 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { 1995 OpenMPIRBuilder OMPBuilder(*M); 1996 MapVector<Value *, Value *> AlignedVars; 1997 1998 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1999 2000 // Simd-ize the loop. 2001 OMPBuilder.applySimd( 2002 CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, 2003 /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2004 2005 OMPBuilder.finalize(); 2006 EXPECT_FALSE(verifyModule(*M, &errs())); 2007 2008 PassBuilder PB; 2009 FunctionAnalysisManager FAM; 2010 PB.registerFunctionAnalyses(FAM); 2011 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2012 2013 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2014 EXPECT_EQ(TopLvl.size(), 1u); 2015 2016 Loop *L = TopLvl.front(); 2017 // Parallel metadata shoudl be attached because of presence of 2018 // the order(concurrent) OpenMP clause 2019 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2020 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2021 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2022 2023 // Check for llvm.access.group metadata attached to the printf 2024 // function in the loop body. 2025 BasicBlock *LoopBody = CLI->getBody(); 2026 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2027 return I.getMetadata("llvm.access.group") != nullptr; 2028 })); 2029 } 2030 2031 TEST_F(OpenMPIRBuilderTest, ApplySafelen) { 2032 OpenMPIRBuilder OMPBuilder(*M); 2033 MapVector<Value *, Value *> AlignedVars; 2034 2035 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2036 2037 OMPBuilder.applySimd( 2038 CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, 2039 /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2040 2041 OMPBuilder.finalize(); 2042 EXPECT_FALSE(verifyModule(*M, &errs())); 2043 2044 PassBuilder PB; 2045 FunctionAnalysisManager FAM; 2046 PB.registerFunctionAnalyses(FAM); 2047 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2048 2049 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2050 EXPECT_EQ(TopLvl.size(), 1u); 2051 2052 Loop *L = TopLvl.front(); 2053 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2054 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2055 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2056 2057 // Check for llvm.access.group metadata attached to the printf 2058 // function in the loop body. 2059 BasicBlock *LoopBody = CLI->getBody(); 2060 EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) { 2061 return I.getMetadata("llvm.access.group") != nullptr; 2062 })); 2063 } 2064 2065 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { 2066 OpenMPIRBuilder OMPBuilder(*M); 2067 MapVector<Value *, Value *> AlignedVars; 2068 2069 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2070 2071 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 2072 OrderKind::OMP_ORDER_unknown, 2073 ConstantInt::get(Type::getInt32Ty(Ctx), 2), 2074 ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2075 2076 OMPBuilder.finalize(); 2077 EXPECT_FALSE(verifyModule(*M, &errs())); 2078 2079 PassBuilder PB; 2080 FunctionAnalysisManager FAM; 2081 PB.registerFunctionAnalyses(FAM); 2082 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2083 2084 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2085 EXPECT_EQ(TopLvl.size(), 1u); 2086 2087 Loop *L = TopLvl.front(); 2088 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2089 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2090 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2); 2091 2092 // Check for llvm.access.group metadata attached to the printf 2093 // function in the loop body. 2094 BasicBlock *LoopBody = CLI->getBody(); 2095 EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) { 2096 return I.getMetadata("llvm.access.group") != nullptr; 2097 })); 2098 } 2099 2100 TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) { 2101 OpenMPIRBuilder OMPBuilder(*M); 2102 IRBuilder<> Builder(BB); 2103 MapVector<Value *, Value *> AlignedVars; 2104 AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); 2105 AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); 2106 2107 // Generation of if condition 2108 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1); 2109 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2); 2110 LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); 2111 LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2); 2112 2113 Value *IfCmp = Builder.CreateICmpNE(Load1, Load2); 2114 2115 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2116 2117 // Simd-ize the loop with if condition 2118 OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown, 2119 ConstantInt::get(Type::getInt32Ty(Ctx), 3), 2120 /* Safelen */ nullptr); 2121 2122 OMPBuilder.finalize(); 2123 EXPECT_FALSE(verifyModule(*M, &errs())); 2124 2125 PassBuilder PB; 2126 FunctionAnalysisManager FAM; 2127 PB.registerFunctionAnalyses(FAM); 2128 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2129 2130 // Check if there are two loops (one with enabled vectorization) 2131 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2132 EXPECT_EQ(TopLvl.size(), 2u); 2133 2134 Loop *L = TopLvl[0]; 2135 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2136 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2137 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2138 2139 // The second loop should have disabled vectorization 2140 L = TopLvl[1]; 2141 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2142 EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2143 // Check for llvm.access.group metadata attached to the printf 2144 // function in the loop body. 2145 BasicBlock *LoopBody = CLI->getBody(); 2146 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2147 return I.getMetadata("llvm.access.group") != nullptr; 2148 })); 2149 } 2150 2151 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) { 2152 OpenMPIRBuilder OMPBuilder(*M); 2153 2154 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2155 2156 // Unroll the loop. 2157 OMPBuilder.unrollLoopFull(DL, CLI); 2158 2159 OMPBuilder.finalize(); 2160 EXPECT_FALSE(verifyModule(*M, &errs())); 2161 2162 PassBuilder PB; 2163 FunctionAnalysisManager FAM; 2164 PB.registerFunctionAnalyses(FAM); 2165 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2166 2167 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2168 EXPECT_EQ(TopLvl.size(), 1u); 2169 2170 Loop *L = TopLvl.front(); 2171 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); 2172 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full")); 2173 } 2174 2175 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) { 2176 OpenMPIRBuilder OMPBuilder(*M); 2177 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2178 2179 // Unroll the loop. 2180 CanonicalLoopInfo *UnrolledLoop = nullptr; 2181 OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop); 2182 ASSERT_NE(UnrolledLoop, nullptr); 2183 2184 OMPBuilder.finalize(); 2185 EXPECT_FALSE(verifyModule(*M, &errs())); 2186 UnrolledLoop->assertOK(); 2187 2188 PassBuilder PB; 2189 FunctionAnalysisManager FAM; 2190 PB.registerFunctionAnalyses(FAM); 2191 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2192 2193 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2194 EXPECT_EQ(TopLvl.size(), 1u); 2195 Loop *Outer = TopLvl.front(); 2196 EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader()); 2197 EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch()); 2198 EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond()); 2199 EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit()); 2200 2201 EXPECT_EQ(Outer->getSubLoops().size(), 1u); 2202 Loop *Inner = Outer->getSubLoops().front(); 2203 2204 EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable")); 2205 EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5); 2206 } 2207 2208 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) { 2209 OpenMPIRBuilder OMPBuilder(*M); 2210 2211 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2212 2213 // Unroll the loop. 2214 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2215 2216 OMPBuilder.finalize(); 2217 EXPECT_FALSE(verifyModule(*M, &errs())); 2218 2219 PassBuilder PB; 2220 FunctionAnalysisManager FAM; 2221 PB.registerFunctionAnalyses(FAM); 2222 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2223 2224 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2225 EXPECT_EQ(TopLvl.size(), 1u); 2226 2227 Loop *L = TopLvl.front(); 2228 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); 2229 } 2230 2231 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) { 2232 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2233 std::string oldDLStr = M->getDataLayoutStr(); 2234 M->setDataLayout( 2235 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" 2236 "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" 2237 "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); 2238 OpenMPIRBuilder OMPBuilder(*M); 2239 OMPBuilder.Config.IsTargetDevice = true; 2240 OMPBuilder.initialize(); 2241 IRBuilder<> Builder(BB); 2242 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2243 InsertPointTy AllocaIP = Builder.saveIP(); 2244 2245 Type *LCTy = Type::getInt32Ty(Ctx); 2246 Value *StartVal = ConstantInt::get(LCTy, 10); 2247 Value *StopVal = ConstantInt::get(LCTy, 52); 2248 Value *StepVal = ConstantInt::get(LCTy, 2); 2249 auto LoopBodyGen = [&](InsertPointTy, Value *) {}; 2250 2251 CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( 2252 Loc, LoopBodyGen, StartVal, StopVal, StepVal, false, false); 2253 BasicBlock *Preheader = CLI->getPreheader(); 2254 Value *TripCount = CLI->getTripCount(); 2255 2256 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2257 2258 IRBuilder<>::InsertPoint AfterIP = OMPBuilder.applyWorkshareLoop( 2259 DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static, nullptr, false, false, 2260 false, false, WorksharingLoopType::ForStaticLoop); 2261 Builder.restoreIP(AfterIP); 2262 Builder.CreateRetVoid(); 2263 2264 OMPBuilder.finalize(); 2265 EXPECT_FALSE(verifyModule(*M, &errs())); 2266 2267 CallInst *WorkshareLoopRuntimeCall = nullptr; 2268 int WorkshareLoopRuntimeCallCnt = 0; 2269 for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) { 2270 CallInst *Call = dyn_cast<CallInst>(Inst); 2271 if (!Call) 2272 continue; 2273 if (!Call->getCalledFunction()) 2274 continue; 2275 2276 if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") { 2277 WorkshareLoopRuntimeCall = Call; 2278 WorkshareLoopRuntimeCallCnt++; 2279 } 2280 } 2281 EXPECT_NE(WorkshareLoopRuntimeCall, nullptr); 2282 // Verify that there is only one call to workshare loop function 2283 EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1); 2284 // Check that pointer to loop body function is passed as second argument 2285 Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1); 2286 EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType()); 2287 Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg); 2288 EXPECT_NE(ArgFunction, nullptr); 2289 EXPECT_EQ(ArgFunction->arg_size(), 1u); 2290 EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType()); 2291 // Check that no variables except for loop counter are used in loop body 2292 EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()), 2293 WorkshareLoopRuntimeCall->getArgOperand(2)); 2294 // Check loop trip count argument 2295 EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3)); 2296 } 2297 2298 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) { 2299 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2300 OpenMPIRBuilder OMPBuilder(*M); 2301 OMPBuilder.Config.IsTargetDevice = false; 2302 OMPBuilder.initialize(); 2303 IRBuilder<> Builder(BB); 2304 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2305 2306 Type *LCTy = Type::getInt32Ty(Ctx); 2307 Value *StartVal = ConstantInt::get(LCTy, 10); 2308 Value *StopVal = ConstantInt::get(LCTy, 52); 2309 Value *StepVal = ConstantInt::get(LCTy, 2); 2310 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; 2311 2312 CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( 2313 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2314 /*IsSigned=*/false, /*InclusiveStop=*/false); 2315 BasicBlock *Preheader = CLI->getPreheader(); 2316 BasicBlock *Body = CLI->getBody(); 2317 Value *IV = CLI->getIndVar(); 2318 BasicBlock *ExitBlock = CLI->getExit(); 2319 2320 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2321 InsertPointTy AllocaIP = Builder.saveIP(); 2322 2323 OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true, 2324 OMP_SCHEDULE_Static); 2325 2326 BasicBlock *Cond = Body->getSinglePredecessor(); 2327 Instruction *Cmp = &*Cond->begin(); 2328 Value *TripCount = Cmp->getOperand(1); 2329 2330 auto AllocaIter = BB->begin(); 2331 ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); 2332 AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2333 AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2334 AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2335 AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2336 EXPECT_NE(PLastIter, nullptr); 2337 EXPECT_NE(PLowerBound, nullptr); 2338 EXPECT_NE(PUpperBound, nullptr); 2339 EXPECT_NE(PStride, nullptr); 2340 2341 auto PreheaderIter = Preheader->begin(); 2342 ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7); 2343 StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2344 StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2345 StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2346 ASSERT_NE(LowerBoundStore, nullptr); 2347 ASSERT_NE(UpperBoundStore, nullptr); 2348 ASSERT_NE(StrideStore, nullptr); 2349 2350 auto *OrigLowerBound = 2351 dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand()); 2352 auto *OrigUpperBound = 2353 dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand()); 2354 auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand()); 2355 ASSERT_NE(OrigLowerBound, nullptr); 2356 ASSERT_NE(OrigUpperBound, nullptr); 2357 ASSERT_NE(OrigStride, nullptr); 2358 EXPECT_EQ(OrigLowerBound->getValue(), 0); 2359 EXPECT_EQ(OrigUpperBound->getValue(), 20); 2360 EXPECT_EQ(OrigStride->getValue(), 1); 2361 2362 // Check that the loop IV is updated to account for the lower bound returned 2363 // by the OpenMP runtime call. 2364 BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front()); 2365 EXPECT_EQ(Add->getOperand(0), IV); 2366 auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1)); 2367 ASSERT_NE(LoadedLowerBound, nullptr); 2368 EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound); 2369 2370 // Check that the trip count is updated to account for the lower and upper 2371 // bounds return by the OpenMP runtime call. 2372 auto *AddOne = dyn_cast<Instruction>(TripCount); 2373 ASSERT_NE(AddOne, nullptr); 2374 ASSERT_TRUE(AddOne->isBinaryOp()); 2375 auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1)); 2376 ASSERT_NE(One, nullptr); 2377 EXPECT_EQ(One->getValue(), 1); 2378 auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0)); 2379 ASSERT_NE(Difference, nullptr); 2380 ASSERT_TRUE(Difference->isBinaryOp()); 2381 EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound); 2382 auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0)); 2383 ASSERT_NE(LoadedUpperBound, nullptr); 2384 EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound); 2385 2386 // The original loop iterator should only be used in the condition, in the 2387 // increment and in the statement that adds the lower bound to it. 2388 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2389 2390 // The exit block should contain the "fini" call and the barrier call, 2391 // plus the call to obtain the thread ID. 2392 size_t NumCallsInExitBlock = 2393 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2394 EXPECT_EQ(NumCallsInExitBlock, 3u); 2395 } 2396 2397 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) { 2398 unsigned IVBits = GetParam(); 2399 2400 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2401 OpenMPIRBuilder OMPBuilder(*M); 2402 OMPBuilder.Config.IsTargetDevice = false; 2403 2404 BasicBlock *Body; 2405 CallInst *Call; 2406 CanonicalLoopInfo *CLI = 2407 buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body); 2408 2409 Instruction *OrigIndVar = CLI->getIndVar(); 2410 EXPECT_EQ(Call->getOperand(1), OrigIndVar); 2411 2412 Type *LCTy = Type::getInt32Ty(Ctx); 2413 Value *ChunkSize = ConstantInt::get(LCTy, 5); 2414 InsertPointTy AllocaIP{&F->getEntryBlock(), 2415 F->getEntryBlock().getFirstInsertionPt()}; 2416 OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true, 2417 OMP_SCHEDULE_Static, ChunkSize); 2418 2419 OMPBuilder.finalize(); 2420 EXPECT_FALSE(verifyModule(*M, &errs())); 2421 2422 BasicBlock *Entry = &F->getEntryBlock(); 2423 BasicBlock *Preheader = Entry->getSingleSuccessor(); 2424 2425 BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor(); 2426 BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor(); 2427 BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor(); 2428 BasicBlock *DispatchBody = succ_begin(DispatchCond)[0]; 2429 BasicBlock *DispatchExit = succ_begin(DispatchCond)[1]; 2430 BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor(); 2431 BasicBlock *Return = DispatchAfter->getSingleSuccessor(); 2432 2433 BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor(); 2434 BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor(); 2435 BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor(); 2436 BasicBlock *ChunkBody = succ_begin(ChunkCond)[0]; 2437 BasicBlock *ChunkExit = succ_begin(ChunkCond)[1]; 2438 BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor(); 2439 BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor(); 2440 2441 BasicBlock *DispatchInc = ChunkAfter; 2442 2443 EXPECT_EQ(ChunkBody, Body); 2444 EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader); 2445 EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader); 2446 2447 EXPECT_TRUE(isa<ReturnInst>(Return->front())); 2448 2449 Value *NewIV = Call->getOperand(1); 2450 EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits); 2451 2452 CallInst *InitCall = findSingleCall( 2453 F, 2454 (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u 2455 : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u, 2456 OMPBuilder); 2457 EXPECT_EQ(InitCall->getParent(), Preheader); 2458 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33); 2459 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1); 2460 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5); 2461 2462 CallInst *FiniCall = findSingleCall( 2463 F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder); 2464 EXPECT_EQ(FiniCall->getParent(), DispatchExit); 2465 2466 CallInst *BarrierCall = findSingleCall( 2467 F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder); 2468 EXPECT_EQ(BarrierCall->getParent(), DispatchExit); 2469 } 2470 2471 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits, 2472 ::testing::Values(8, 16, 32, 64)); 2473 2474 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { 2475 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2476 OpenMPIRBuilder OMPBuilder(*M); 2477 OMPBuilder.Config.IsTargetDevice = false; 2478 OMPBuilder.initialize(); 2479 IRBuilder<> Builder(BB); 2480 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2481 2482 omp::OMPScheduleType SchedType = GetParam(); 2483 uint32_t ChunkSize = 1; 2484 switch (SchedType & ~OMPScheduleType::ModifierMask) { 2485 case omp::OMPScheduleType::BaseDynamicChunked: 2486 case omp::OMPScheduleType::BaseGuidedChunked: 2487 ChunkSize = 7; 2488 break; 2489 case omp::OMPScheduleType::BaseAuto: 2490 case omp::OMPScheduleType::BaseRuntime: 2491 ChunkSize = 1; 2492 break; 2493 default: 2494 assert(0 && "unknown type for this test"); 2495 break; 2496 } 2497 2498 Type *LCTy = Type::getInt32Ty(Ctx); 2499 Value *StartVal = ConstantInt::get(LCTy, 10); 2500 Value *StopVal = ConstantInt::get(LCTy, 52); 2501 Value *StepVal = ConstantInt::get(LCTy, 2); 2502 Value *ChunkVal = 2503 (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize); 2504 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; 2505 2506 CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( 2507 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2508 /*IsSigned=*/false, /*InclusiveStop=*/false); 2509 2510 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2511 InsertPointTy AllocaIP = Builder.saveIP(); 2512 2513 // Collect all the info from CLI, as it isn't usable after the call to 2514 // createDynamicWorkshareLoop. 2515 InsertPointTy AfterIP = CLI->getAfterIP(); 2516 BasicBlock *Preheader = CLI->getPreheader(); 2517 BasicBlock *ExitBlock = CLI->getExit(); 2518 BasicBlock *LatchBlock = CLI->getLatch(); 2519 Value *IV = CLI->getIndVar(); 2520 2521 InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop( 2522 DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType), 2523 ChunkVal, /*Simd=*/false, 2524 (SchedType & omp::OMPScheduleType::ModifierMonotonic) == 2525 omp::OMPScheduleType::ModifierMonotonic, 2526 (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) == 2527 omp::OMPScheduleType::ModifierNonmonotonic, 2528 /*Ordered=*/false); 2529 2530 // The returned value should be the "after" point. 2531 ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); 2532 ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint()); 2533 2534 auto AllocaIter = BB->begin(); 2535 ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); 2536 AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2537 AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2538 AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2539 AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2540 EXPECT_NE(PLastIter, nullptr); 2541 EXPECT_NE(PLowerBound, nullptr); 2542 EXPECT_NE(PUpperBound, nullptr); 2543 EXPECT_NE(PStride, nullptr); 2544 2545 auto PreheaderIter = Preheader->begin(); 2546 ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6); 2547 StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2548 StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2549 StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2550 ASSERT_NE(LowerBoundStore, nullptr); 2551 ASSERT_NE(UpperBoundStore, nullptr); 2552 ASSERT_NE(StrideStore, nullptr); 2553 2554 CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++)); 2555 ASSERT_NE(ThreadIdCall, nullptr); 2556 EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(), 2557 "__kmpc_global_thread_num"); 2558 2559 CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter); 2560 2561 ASSERT_NE(InitCall, nullptr); 2562 EXPECT_EQ(InitCall->getCalledFunction()->getName(), 2563 "__kmpc_dispatch_init_4u"); 2564 EXPECT_EQ(InitCall->arg_size(), 7U); 2565 EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize)); 2566 ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2)); 2567 if ((SchedType & OMPScheduleType::MonotonicityMask) == 2568 OMPScheduleType::None) { 2569 // Implementation is allowed to add default nonmonotonicity flag 2570 EXPECT_EQ( 2571 static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) | 2572 OMPScheduleType::ModifierNonmonotonic, 2573 SchedType | OMPScheduleType::ModifierNonmonotonic); 2574 } else { 2575 EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()), 2576 SchedType); 2577 } 2578 2579 ConstantInt *OrigLowerBound = 2580 dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand()); 2581 ConstantInt *OrigUpperBound = 2582 dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand()); 2583 ConstantInt *OrigStride = 2584 dyn_cast<ConstantInt>(StrideStore->getValueOperand()); 2585 ASSERT_NE(OrigLowerBound, nullptr); 2586 ASSERT_NE(OrigUpperBound, nullptr); 2587 ASSERT_NE(OrigStride, nullptr); 2588 EXPECT_EQ(OrigLowerBound->getValue(), 1); 2589 EXPECT_EQ(OrigUpperBound->getValue(), 21); 2590 EXPECT_EQ(OrigStride->getValue(), 1); 2591 2592 CallInst *FiniCall = dyn_cast<CallInst>( 2593 &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); 2594 EXPECT_EQ(FiniCall, nullptr); 2595 2596 // The original loop iterator should only be used in the condition, in the 2597 // increment and in the statement that adds the lower bound to it. 2598 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2599 2600 // The exit block should contain the barrier call, plus the call to obtain 2601 // the thread ID. 2602 size_t NumCallsInExitBlock = 2603 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2604 EXPECT_EQ(NumCallsInExitBlock, 2u); 2605 2606 // Add a termination to our block and check that it is internally consistent. 2607 Builder.restoreIP(EndIP); 2608 Builder.CreateRetVoid(); 2609 OMPBuilder.finalize(); 2610 EXPECT_FALSE(verifyModule(*M, &errs())); 2611 } 2612 2613 INSTANTIATE_TEST_SUITE_P( 2614 OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams, 2615 ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked, 2616 omp::OMPScheduleType::UnorderedGuidedChunked, 2617 omp::OMPScheduleType::UnorderedAuto, 2618 omp::OMPScheduleType::UnorderedRuntime, 2619 omp::OMPScheduleType::UnorderedDynamicChunked | 2620 omp::OMPScheduleType::ModifierMonotonic, 2621 omp::OMPScheduleType::UnorderedDynamicChunked | 2622 omp::OMPScheduleType::ModifierNonmonotonic, 2623 omp::OMPScheduleType::UnorderedGuidedChunked | 2624 omp::OMPScheduleType::ModifierMonotonic, 2625 omp::OMPScheduleType::UnorderedGuidedChunked | 2626 omp::OMPScheduleType::ModifierNonmonotonic, 2627 omp::OMPScheduleType::UnorderedAuto | 2628 omp::OMPScheduleType::ModifierMonotonic, 2629 omp::OMPScheduleType::UnorderedRuntime | 2630 omp::OMPScheduleType::ModifierMonotonic)); 2631 2632 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) { 2633 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2634 OpenMPIRBuilder OMPBuilder(*M); 2635 OMPBuilder.Config.IsTargetDevice = false; 2636 OMPBuilder.initialize(); 2637 IRBuilder<> Builder(BB); 2638 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2639 2640 uint32_t ChunkSize = 1; 2641 Type *LCTy = Type::getInt32Ty(Ctx); 2642 Value *StartVal = ConstantInt::get(LCTy, 10); 2643 Value *StopVal = ConstantInt::get(LCTy, 52); 2644 Value *StepVal = ConstantInt::get(LCTy, 2); 2645 Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize); 2646 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; 2647 2648 CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( 2649 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2650 /*IsSigned=*/false, /*InclusiveStop=*/false); 2651 2652 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2653 InsertPointTy AllocaIP = Builder.saveIP(); 2654 2655 // Collect all the info from CLI, as it isn't usable after the call to 2656 // createDynamicWorkshareLoop. 2657 BasicBlock *Preheader = CLI->getPreheader(); 2658 BasicBlock *ExitBlock = CLI->getExit(); 2659 BasicBlock *LatchBlock = CLI->getLatch(); 2660 Value *IV = CLI->getIndVar(); 2661 2662 InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop( 2663 DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkVal, 2664 /*HasSimdModifier=*/false, /*HasMonotonicModifier=*/false, 2665 /*HasNonmonotonicModifier=*/false, 2666 /*HasOrderedClause=*/true); 2667 2668 // Add a termination to our block and check that it is internally consistent. 2669 Builder.restoreIP(EndIP); 2670 Builder.CreateRetVoid(); 2671 OMPBuilder.finalize(); 2672 EXPECT_FALSE(verifyModule(*M, &errs())); 2673 2674 CallInst *InitCall = nullptr; 2675 for (Instruction &EI : *Preheader) { 2676 Instruction *Cur = &EI; 2677 if (isa<CallInst>(Cur)) { 2678 InitCall = cast<CallInst>(Cur); 2679 if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u") 2680 break; 2681 InitCall = nullptr; 2682 } 2683 } 2684 EXPECT_NE(InitCall, nullptr); 2685 EXPECT_EQ(InitCall->arg_size(), 7U); 2686 ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2)); 2687 EXPECT_EQ(SchedVal->getValue(), 2688 static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked)); 2689 2690 CallInst *FiniCall = dyn_cast<CallInst>( 2691 &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); 2692 ASSERT_NE(FiniCall, nullptr); 2693 EXPECT_EQ(FiniCall->getCalledFunction()->getName(), 2694 "__kmpc_dispatch_fini_4u"); 2695 EXPECT_EQ(FiniCall->arg_size(), 2U); 2696 EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0)); 2697 EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1)); 2698 2699 // The original loop iterator should only be used in the condition, in the 2700 // increment and in the statement that adds the lower bound to it. 2701 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2702 2703 // The exit block should contain the barrier call, plus the call to obtain 2704 // the thread ID. 2705 size_t NumCallsInExitBlock = 2706 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2707 EXPECT_EQ(NumCallsInExitBlock, 2u); 2708 } 2709 2710 TEST_F(OpenMPIRBuilderTest, MasterDirective) { 2711 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2712 OpenMPIRBuilder OMPBuilder(*M); 2713 OMPBuilder.initialize(); 2714 F->setName("func"); 2715 IRBuilder<> Builder(BB); 2716 2717 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2718 2719 AllocaInst *PrivAI = nullptr; 2720 2721 BasicBlock *EntryBB = nullptr; 2722 BasicBlock *ThenBB = nullptr; 2723 2724 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2725 if (AllocaIP.isSet()) 2726 Builder.restoreIP(AllocaIP); 2727 else 2728 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 2729 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2730 Builder.CreateStore(F->arg_begin(), PrivAI); 2731 2732 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2733 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2734 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2735 2736 Builder.restoreIP(CodeGenIP); 2737 2738 // collect some info for checks later 2739 ThenBB = Builder.GetInsertBlock(); 2740 EntryBB = ThenBB->getUniquePredecessor(); 2741 2742 // simple instructions for body 2743 Value *PrivLoad = 2744 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2745 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2746 }; 2747 2748 auto FiniCB = [&](InsertPointTy IP) { 2749 BasicBlock *IPBB = IP.getBlock(); 2750 EXPECT_NE(IPBB->end(), IP.getPoint()); 2751 }; 2752 2753 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 2754 Value *EntryBBTI = EntryBB->getTerminator(); 2755 EXPECT_NE(EntryBBTI, nullptr); 2756 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 2757 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 2758 EXPECT_TRUE(EntryBr->isConditional()); 2759 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 2760 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 2761 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 2762 2763 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 2764 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 2765 2766 CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0)); 2767 EXPECT_EQ(MasterEntryCI->arg_size(), 2U); 2768 EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master"); 2769 EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0))); 2770 2771 CallInst *MasterEndCI = nullptr; 2772 for (auto &FI : *ThenBB) { 2773 Instruction *cur = &FI; 2774 if (isa<CallInst>(cur)) { 2775 MasterEndCI = cast<CallInst>(cur); 2776 if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master") 2777 break; 2778 MasterEndCI = nullptr; 2779 } 2780 } 2781 EXPECT_NE(MasterEndCI, nullptr); 2782 EXPECT_EQ(MasterEndCI->arg_size(), 2U); 2783 EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0))); 2784 EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1)); 2785 } 2786 2787 TEST_F(OpenMPIRBuilderTest, MaskedDirective) { 2788 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2789 OpenMPIRBuilder OMPBuilder(*M); 2790 OMPBuilder.initialize(); 2791 F->setName("func"); 2792 IRBuilder<> Builder(BB); 2793 2794 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2795 2796 AllocaInst *PrivAI = nullptr; 2797 2798 BasicBlock *EntryBB = nullptr; 2799 BasicBlock *ThenBB = nullptr; 2800 2801 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2802 if (AllocaIP.isSet()) 2803 Builder.restoreIP(AllocaIP); 2804 else 2805 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 2806 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2807 Builder.CreateStore(F->arg_begin(), PrivAI); 2808 2809 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2810 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2811 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2812 2813 Builder.restoreIP(CodeGenIP); 2814 2815 // collect some info for checks later 2816 ThenBB = Builder.GetInsertBlock(); 2817 EntryBB = ThenBB->getUniquePredecessor(); 2818 2819 // simple instructions for body 2820 Value *PrivLoad = 2821 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2822 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2823 }; 2824 2825 auto FiniCB = [&](InsertPointTy IP) { 2826 BasicBlock *IPBB = IP.getBlock(); 2827 EXPECT_NE(IPBB->end(), IP.getPoint()); 2828 }; 2829 2830 Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); 2831 Builder.restoreIP( 2832 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, Filter)); 2833 Value *EntryBBTI = EntryBB->getTerminator(); 2834 EXPECT_NE(EntryBBTI, nullptr); 2835 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 2836 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 2837 EXPECT_TRUE(EntryBr->isConditional()); 2838 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 2839 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 2840 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 2841 2842 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 2843 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 2844 2845 CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0)); 2846 EXPECT_EQ(MaskedEntryCI->arg_size(), 3U); 2847 EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked"); 2848 EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0))); 2849 2850 CallInst *MaskedEndCI = nullptr; 2851 for (auto &FI : *ThenBB) { 2852 Instruction *cur = &FI; 2853 if (isa<CallInst>(cur)) { 2854 MaskedEndCI = cast<CallInst>(cur); 2855 if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked") 2856 break; 2857 MaskedEndCI = nullptr; 2858 } 2859 } 2860 EXPECT_NE(MaskedEndCI, nullptr); 2861 EXPECT_EQ(MaskedEndCI->arg_size(), 2U); 2862 EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0))); 2863 EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1)); 2864 } 2865 2866 TEST_F(OpenMPIRBuilderTest, CriticalDirective) { 2867 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2868 OpenMPIRBuilder OMPBuilder(*M); 2869 OMPBuilder.initialize(); 2870 F->setName("func"); 2871 IRBuilder<> Builder(BB); 2872 2873 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2874 2875 AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2876 2877 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2878 // actual start for bodyCB 2879 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2880 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2881 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2882 2883 // body begin 2884 Builder.restoreIP(CodeGenIP); 2885 Builder.CreateStore(F->arg_begin(), PrivAI); 2886 Value *PrivLoad = 2887 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2888 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2889 }; 2890 2891 auto FiniCB = [&](InsertPointTy IP) { 2892 BasicBlock *IPBB = IP.getBlock(); 2893 EXPECT_NE(IPBB->end(), IP.getPoint()); 2894 }; 2895 BasicBlock *EntryBB = Builder.GetInsertBlock(); 2896 2897 Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB, 2898 "testCRT", nullptr)); 2899 2900 CallInst *CriticalEntryCI = nullptr; 2901 for (auto &EI : *EntryBB) { 2902 Instruction *cur = &EI; 2903 if (isa<CallInst>(cur)) { 2904 CriticalEntryCI = cast<CallInst>(cur); 2905 if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical") 2906 break; 2907 CriticalEntryCI = nullptr; 2908 } 2909 } 2910 EXPECT_NE(CriticalEntryCI, nullptr); 2911 EXPECT_EQ(CriticalEntryCI->arg_size(), 3U); 2912 EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical"); 2913 EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0))); 2914 2915 CallInst *CriticalEndCI = nullptr; 2916 for (auto &FI : *EntryBB) { 2917 Instruction *cur = &FI; 2918 if (isa<CallInst>(cur)) { 2919 CriticalEndCI = cast<CallInst>(cur); 2920 if (CriticalEndCI->getCalledFunction()->getName() == 2921 "__kmpc_end_critical") 2922 break; 2923 CriticalEndCI = nullptr; 2924 } 2925 } 2926 EXPECT_NE(CriticalEndCI, nullptr); 2927 EXPECT_EQ(CriticalEndCI->arg_size(), 3U); 2928 EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0))); 2929 EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1)); 2930 PointerType *CriticalNamePtrTy = 2931 PointerType::getUnqual(ArrayType::get(Type::getInt32Ty(Ctx), 8)); 2932 EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2)); 2933 GlobalVariable *GV = 2934 dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2)); 2935 ASSERT_NE(GV, nullptr); 2936 EXPECT_EQ(GV->getType(), CriticalNamePtrTy); 2937 const DataLayout &DL = M->getDataLayout(); 2938 const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy); 2939 const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace()); 2940 if (const llvm::MaybeAlign Alignment = GV->getAlign()) 2941 EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign)); 2942 } 2943 2944 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) { 2945 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2946 OpenMPIRBuilder OMPBuilder(*M); 2947 OMPBuilder.initialize(); 2948 F->setName("func"); 2949 IRBuilder<> Builder(BB); 2950 LLVMContext &Ctx = M->getContext(); 2951 2952 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2953 2954 InsertPointTy AllocaIP(&F->getEntryBlock(), 2955 F->getEntryBlock().getFirstInsertionPt()); 2956 2957 unsigned NumLoops = 2; 2958 SmallVector<Value *, 2> StoreValues; 2959 Type *LCTy = Type::getInt64Ty(Ctx); 2960 StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); 2961 StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); 2962 2963 // Test for "#omp ordered depend(source)" 2964 Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, 2965 StoreValues, ".cnt.addr", 2966 /*IsDependSource=*/true)); 2967 2968 Builder.CreateRetVoid(); 2969 OMPBuilder.finalize(); 2970 EXPECT_FALSE(verifyModule(*M, &errs())); 2971 2972 AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front()); 2973 ASSERT_NE(AllocInst, nullptr); 2974 ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType()); 2975 EXPECT_EQ(ArrType->getNumElements(), NumLoops); 2976 EXPECT_TRUE( 2977 AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); 2978 2979 Instruction *IterInst = dyn_cast<Instruction>(AllocInst); 2980 for (unsigned Iter = 0; Iter < NumLoops; Iter++) { 2981 GetElementPtrInst *DependAddrGEPIter = 2982 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 2983 ASSERT_NE(DependAddrGEPIter, nullptr); 2984 EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); 2985 EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); 2986 auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1)); 2987 auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2)); 2988 ASSERT_NE(FirstIdx, nullptr); 2989 ASSERT_NE(SecondIdx, nullptr); 2990 EXPECT_EQ(FirstIdx->getValue(), 0); 2991 EXPECT_EQ(SecondIdx->getValue(), Iter); 2992 StoreInst *StoreValue = 2993 dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode()); 2994 ASSERT_NE(StoreValue, nullptr); 2995 EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); 2996 EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); 2997 EXPECT_EQ(StoreValue->getAlign(), Align(8)); 2998 IterInst = dyn_cast<Instruction>(StoreValue); 2999 } 3000 3001 GetElementPtrInst *DependBaseAddrGEP = 3002 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3003 ASSERT_NE(DependBaseAddrGEP, nullptr); 3004 EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); 3005 EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); 3006 auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1)); 3007 auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2)); 3008 ASSERT_NE(FirstIdx, nullptr); 3009 ASSERT_NE(SecondIdx, nullptr); 3010 EXPECT_EQ(FirstIdx->getValue(), 0); 3011 EXPECT_EQ(SecondIdx->getValue(), 0); 3012 3013 CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode()); 3014 ASSERT_NE(GTID, nullptr); 3015 EXPECT_EQ(GTID->arg_size(), 1U); 3016 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 3017 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 3018 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 3019 3020 CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode()); 3021 ASSERT_NE(Depend, nullptr); 3022 EXPECT_EQ(Depend->arg_size(), 3U); 3023 EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post"); 3024 EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0))); 3025 EXPECT_EQ(Depend->getArgOperand(1), GTID); 3026 EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); 3027 } 3028 3029 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) { 3030 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3031 OpenMPIRBuilder OMPBuilder(*M); 3032 OMPBuilder.initialize(); 3033 F->setName("func"); 3034 IRBuilder<> Builder(BB); 3035 LLVMContext &Ctx = M->getContext(); 3036 3037 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3038 3039 InsertPointTy AllocaIP(&F->getEntryBlock(), 3040 F->getEntryBlock().getFirstInsertionPt()); 3041 3042 unsigned NumLoops = 2; 3043 SmallVector<Value *, 2> StoreValues; 3044 Type *LCTy = Type::getInt64Ty(Ctx); 3045 StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); 3046 StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); 3047 3048 // Test for "#omp ordered depend(sink: vec)" 3049 Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, 3050 StoreValues, ".cnt.addr", 3051 /*IsDependSource=*/false)); 3052 3053 Builder.CreateRetVoid(); 3054 OMPBuilder.finalize(); 3055 EXPECT_FALSE(verifyModule(*M, &errs())); 3056 3057 AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front()); 3058 ASSERT_NE(AllocInst, nullptr); 3059 ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType()); 3060 EXPECT_EQ(ArrType->getNumElements(), NumLoops); 3061 EXPECT_TRUE( 3062 AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); 3063 3064 Instruction *IterInst = dyn_cast<Instruction>(AllocInst); 3065 for (unsigned Iter = 0; Iter < NumLoops; Iter++) { 3066 GetElementPtrInst *DependAddrGEPIter = 3067 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3068 ASSERT_NE(DependAddrGEPIter, nullptr); 3069 EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); 3070 EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); 3071 auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1)); 3072 auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2)); 3073 ASSERT_NE(FirstIdx, nullptr); 3074 ASSERT_NE(SecondIdx, nullptr); 3075 EXPECT_EQ(FirstIdx->getValue(), 0); 3076 EXPECT_EQ(SecondIdx->getValue(), Iter); 3077 StoreInst *StoreValue = 3078 dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode()); 3079 ASSERT_NE(StoreValue, nullptr); 3080 EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); 3081 EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); 3082 EXPECT_EQ(StoreValue->getAlign(), Align(8)); 3083 IterInst = dyn_cast<Instruction>(StoreValue); 3084 } 3085 3086 GetElementPtrInst *DependBaseAddrGEP = 3087 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3088 ASSERT_NE(DependBaseAddrGEP, nullptr); 3089 EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); 3090 EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); 3091 auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1)); 3092 auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2)); 3093 ASSERT_NE(FirstIdx, nullptr); 3094 ASSERT_NE(SecondIdx, nullptr); 3095 EXPECT_EQ(FirstIdx->getValue(), 0); 3096 EXPECT_EQ(SecondIdx->getValue(), 0); 3097 3098 CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode()); 3099 ASSERT_NE(GTID, nullptr); 3100 EXPECT_EQ(GTID->arg_size(), 1U); 3101 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 3102 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 3103 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 3104 3105 CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode()); 3106 ASSERT_NE(Depend, nullptr); 3107 EXPECT_EQ(Depend->arg_size(), 3U); 3108 EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait"); 3109 EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0))); 3110 EXPECT_EQ(Depend->getArgOperand(1), GTID); 3111 EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); 3112 } 3113 3114 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { 3115 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3116 OpenMPIRBuilder OMPBuilder(*M); 3117 OMPBuilder.initialize(); 3118 F->setName("func"); 3119 IRBuilder<> Builder(BB); 3120 3121 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3122 3123 AllocaInst *PrivAI = 3124 Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); 3125 3126 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3127 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3128 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3129 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3130 3131 Builder.restoreIP(CodeGenIP); 3132 Builder.CreateStore(F->arg_begin(), PrivAI); 3133 Value *PrivLoad = 3134 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3135 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3136 }; 3137 3138 auto FiniCB = [&](InsertPointTy IP) { 3139 BasicBlock *IPBB = IP.getBlock(); 3140 EXPECT_NE(IPBB->end(), IP.getPoint()); 3141 }; 3142 3143 // Test for "#omp ordered [threads]" 3144 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3145 Builder.restoreIP( 3146 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true)); 3147 3148 Builder.CreateRetVoid(); 3149 OMPBuilder.finalize(); 3150 EXPECT_FALSE(verifyModule(*M, &errs())); 3151 3152 EXPECT_NE(EntryBB->getTerminator(), nullptr); 3153 3154 CallInst *OrderedEntryCI = nullptr; 3155 for (auto &EI : *EntryBB) { 3156 Instruction *Cur = &EI; 3157 if (isa<CallInst>(Cur)) { 3158 OrderedEntryCI = cast<CallInst>(Cur); 3159 if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") 3160 break; 3161 OrderedEntryCI = nullptr; 3162 } 3163 } 3164 EXPECT_NE(OrderedEntryCI, nullptr); 3165 EXPECT_EQ(OrderedEntryCI->arg_size(), 2U); 3166 EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered"); 3167 EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0))); 3168 3169 CallInst *OrderedEndCI = nullptr; 3170 for (auto &FI : *EntryBB) { 3171 Instruction *Cur = &FI; 3172 if (isa<CallInst>(Cur)) { 3173 OrderedEndCI = cast<CallInst>(Cur); 3174 if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") 3175 break; 3176 OrderedEndCI = nullptr; 3177 } 3178 } 3179 EXPECT_NE(OrderedEndCI, nullptr); 3180 EXPECT_EQ(OrderedEndCI->arg_size(), 2U); 3181 EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0))); 3182 EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1)); 3183 } 3184 3185 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { 3186 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3187 OpenMPIRBuilder OMPBuilder(*M); 3188 OMPBuilder.initialize(); 3189 F->setName("func"); 3190 IRBuilder<> Builder(BB); 3191 3192 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3193 3194 AllocaInst *PrivAI = 3195 Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); 3196 3197 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3198 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3199 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3200 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3201 3202 Builder.restoreIP(CodeGenIP); 3203 Builder.CreateStore(F->arg_begin(), PrivAI); 3204 Value *PrivLoad = 3205 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3206 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3207 }; 3208 3209 auto FiniCB = [&](InsertPointTy IP) { 3210 BasicBlock *IPBB = IP.getBlock(); 3211 EXPECT_NE(IPBB->end(), IP.getPoint()); 3212 }; 3213 3214 // Test for "#omp ordered simd" 3215 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3216 Builder.restoreIP( 3217 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false)); 3218 3219 Builder.CreateRetVoid(); 3220 OMPBuilder.finalize(); 3221 EXPECT_FALSE(verifyModule(*M, &errs())); 3222 3223 EXPECT_NE(EntryBB->getTerminator(), nullptr); 3224 3225 CallInst *OrderedEntryCI = nullptr; 3226 for (auto &EI : *EntryBB) { 3227 Instruction *Cur = &EI; 3228 if (isa<CallInst>(Cur)) { 3229 OrderedEntryCI = cast<CallInst>(Cur); 3230 if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") 3231 break; 3232 OrderedEntryCI = nullptr; 3233 } 3234 } 3235 EXPECT_EQ(OrderedEntryCI, nullptr); 3236 3237 CallInst *OrderedEndCI = nullptr; 3238 for (auto &FI : *EntryBB) { 3239 Instruction *Cur = &FI; 3240 if (isa<CallInst>(Cur)) { 3241 OrderedEndCI = cast<CallInst>(Cur); 3242 if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") 3243 break; 3244 OrderedEndCI = nullptr; 3245 } 3246 } 3247 EXPECT_EQ(OrderedEndCI, nullptr); 3248 } 3249 3250 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { 3251 OpenMPIRBuilder OMPBuilder(*M); 3252 OMPBuilder.initialize(); 3253 F->setName("func"); 3254 IRBuilder<> Builder(BB); 3255 3256 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3257 3258 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3259 AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy()); 3260 AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy()); 3261 3262 BasicBlock *EntryBB = BB; 3263 3264 OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress, 3265 PrivAddress, Int32, /*BranchtoEnd*/ true); 3266 3267 BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator()); 3268 3269 EXPECT_NE(EntryBr, nullptr); 3270 EXPECT_TRUE(EntryBr->isConditional()); 3271 3272 BasicBlock *NotMasterBB = EntryBr->getSuccessor(0); 3273 BasicBlock *CopyinEnd = EntryBr->getSuccessor(1); 3274 CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition()); 3275 3276 EXPECT_NE(CMP, nullptr); 3277 EXPECT_NE(NotMasterBB, nullptr); 3278 EXPECT_NE(CopyinEnd, nullptr); 3279 3280 BranchInst *NotMasterBr = 3281 dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator()); 3282 EXPECT_NE(NotMasterBr, nullptr); 3283 EXPECT_FALSE(NotMasterBr->isConditional()); 3284 EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0)); 3285 } 3286 3287 TEST_F(OpenMPIRBuilderTest, SingleDirective) { 3288 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3289 OpenMPIRBuilder OMPBuilder(*M); 3290 OMPBuilder.initialize(); 3291 F->setName("func"); 3292 IRBuilder<> Builder(BB); 3293 3294 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3295 3296 AllocaInst *PrivAI = nullptr; 3297 3298 BasicBlock *EntryBB = nullptr; 3299 BasicBlock *ThenBB = nullptr; 3300 3301 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3302 if (AllocaIP.isSet()) 3303 Builder.restoreIP(AllocaIP); 3304 else 3305 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3306 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3307 Builder.CreateStore(F->arg_begin(), PrivAI); 3308 3309 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3310 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3311 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3312 3313 Builder.restoreIP(CodeGenIP); 3314 3315 // collect some info for checks later 3316 ThenBB = Builder.GetInsertBlock(); 3317 EntryBB = ThenBB->getUniquePredecessor(); 3318 3319 // simple instructions for body 3320 Value *PrivLoad = 3321 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3322 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3323 }; 3324 3325 auto FiniCB = [&](InsertPointTy IP) { 3326 BasicBlock *IPBB = IP.getBlock(); 3327 EXPECT_NE(IPBB->end(), IP.getPoint()); 3328 }; 3329 3330 Builder.restoreIP( 3331 OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ false)); 3332 Value *EntryBBTI = EntryBB->getTerminator(); 3333 EXPECT_NE(EntryBBTI, nullptr); 3334 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3335 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3336 EXPECT_TRUE(EntryBr->isConditional()); 3337 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3338 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3339 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3340 3341 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3342 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3343 3344 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3345 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3346 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3347 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3348 3349 CallInst *SingleEndCI = nullptr; 3350 for (auto &FI : *ThenBB) { 3351 Instruction *cur = &FI; 3352 if (isa<CallInst>(cur)) { 3353 SingleEndCI = cast<CallInst>(cur); 3354 if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") 3355 break; 3356 SingleEndCI = nullptr; 3357 } 3358 } 3359 EXPECT_NE(SingleEndCI, nullptr); 3360 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3361 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3362 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3363 3364 bool FoundBarrier = false; 3365 for (auto &FI : *ExitBB) { 3366 Instruction *cur = &FI; 3367 if (auto CI = dyn_cast<CallInst>(cur)) { 3368 if (CI->getCalledFunction()->getName() == "__kmpc_barrier") { 3369 FoundBarrier = true; 3370 break; 3371 } 3372 } 3373 } 3374 EXPECT_TRUE(FoundBarrier); 3375 } 3376 3377 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { 3378 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3379 OpenMPIRBuilder OMPBuilder(*M); 3380 OMPBuilder.initialize(); 3381 F->setName("func"); 3382 IRBuilder<> Builder(BB); 3383 3384 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3385 3386 AllocaInst *PrivAI = nullptr; 3387 3388 BasicBlock *EntryBB = nullptr; 3389 BasicBlock *ThenBB = nullptr; 3390 3391 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3392 if (AllocaIP.isSet()) 3393 Builder.restoreIP(AllocaIP); 3394 else 3395 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3396 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3397 Builder.CreateStore(F->arg_begin(), PrivAI); 3398 3399 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3400 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3401 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3402 3403 Builder.restoreIP(CodeGenIP); 3404 3405 // collect some info for checks later 3406 ThenBB = Builder.GetInsertBlock(); 3407 EntryBB = ThenBB->getUniquePredecessor(); 3408 3409 // simple instructions for body 3410 Value *PrivLoad = 3411 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3412 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3413 }; 3414 3415 auto FiniCB = [&](InsertPointTy IP) { 3416 BasicBlock *IPBB = IP.getBlock(); 3417 EXPECT_NE(IPBB->end(), IP.getPoint()); 3418 }; 3419 3420 Builder.restoreIP( 3421 OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ true)); 3422 Value *EntryBBTI = EntryBB->getTerminator(); 3423 EXPECT_NE(EntryBBTI, nullptr); 3424 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3425 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3426 EXPECT_TRUE(EntryBr->isConditional()); 3427 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3428 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3429 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3430 3431 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3432 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3433 3434 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3435 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3436 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3437 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3438 3439 CallInst *SingleEndCI = nullptr; 3440 for (auto &FI : *ThenBB) { 3441 Instruction *cur = &FI; 3442 if (isa<CallInst>(cur)) { 3443 SingleEndCI = cast<CallInst>(cur); 3444 if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") 3445 break; 3446 SingleEndCI = nullptr; 3447 } 3448 } 3449 EXPECT_NE(SingleEndCI, nullptr); 3450 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3451 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3452 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3453 3454 CallInst *ExitBarrier = nullptr; 3455 for (auto &FI : *ExitBB) { 3456 Instruction *cur = &FI; 3457 if (auto CI = dyn_cast<CallInst>(cur)) { 3458 if (CI->getCalledFunction()->getName() == "__kmpc_barrier") { 3459 ExitBarrier = CI; 3460 break; 3461 } 3462 } 3463 } 3464 EXPECT_EQ(ExitBarrier, nullptr); 3465 } 3466 3467 // Helper class to check each instruction of a BB. 3468 class BBInstIter { 3469 BasicBlock *BB; 3470 BasicBlock::iterator BBI; 3471 3472 public: 3473 BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {} 3474 3475 bool hasNext() const { return BBI != BB->end(); } 3476 3477 template <typename InstTy> InstTy *next() { 3478 if (!hasNext()) 3479 return nullptr; 3480 Instruction *Cur = &*BBI++; 3481 if (!isa<InstTy>(Cur)) 3482 return nullptr; 3483 return cast<InstTy>(Cur); 3484 } 3485 }; 3486 3487 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { 3488 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3489 OpenMPIRBuilder OMPBuilder(*M); 3490 OMPBuilder.initialize(); 3491 F->setName("func"); 3492 IRBuilder<> Builder(BB); 3493 3494 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3495 3496 AllocaInst *PrivAI = nullptr; 3497 3498 BasicBlock *EntryBB = nullptr; 3499 BasicBlock *ThenBB = nullptr; 3500 3501 Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType()); 3502 Builder.CreateStore(F->arg_begin(), CPVar); 3503 3504 FunctionType *CopyFuncTy = FunctionType::get( 3505 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false); 3506 Function *CopyFunc = 3507 Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M); 3508 3509 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3510 if (AllocaIP.isSet()) 3511 Builder.restoreIP(AllocaIP); 3512 else 3513 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3514 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3515 Builder.CreateStore(F->arg_begin(), PrivAI); 3516 3517 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3518 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3519 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3520 3521 Builder.restoreIP(CodeGenIP); 3522 3523 // collect some info for checks later 3524 ThenBB = Builder.GetInsertBlock(); 3525 EntryBB = ThenBB->getUniquePredecessor(); 3526 3527 // simple instructions for body 3528 Value *PrivLoad = 3529 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3530 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3531 }; 3532 3533 auto FiniCB = [&](InsertPointTy IP) { 3534 BasicBlock *IPBB = IP.getBlock(); 3535 // IP must be before the unconditional branch to ExitBB 3536 EXPECT_NE(IPBB->end(), IP.getPoint()); 3537 }; 3538 3539 Builder.restoreIP(OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, 3540 /*IsNowait*/ false, {CPVar}, 3541 {CopyFunc})); 3542 Value *EntryBBTI = EntryBB->getTerminator(); 3543 EXPECT_NE(EntryBBTI, nullptr); 3544 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3545 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3546 EXPECT_TRUE(EntryBr->isConditional()); 3547 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3548 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3549 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3550 3551 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3552 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3553 3554 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3555 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3556 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3557 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3558 3559 // check ThenBB 3560 BBInstIter ThenBBI(ThenBB); 3561 // load PrivAI 3562 auto *PrivLI = ThenBBI.next<LoadInst>(); 3563 EXPECT_NE(PrivLI, nullptr); 3564 EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI); 3565 // icmp 3566 EXPECT_TRUE(ThenBBI.next<ICmpInst>()); 3567 // store 1, DidIt 3568 auto *DidItSI = ThenBBI.next<StoreInst>(); 3569 EXPECT_NE(DidItSI, nullptr); 3570 EXPECT_EQ(DidItSI->getValueOperand(), 3571 ConstantInt::get(Type::getInt32Ty(Ctx), 1)); 3572 Value *DidIt = DidItSI->getPointerOperand(); 3573 // call __kmpc_end_single 3574 auto *SingleEndCI = ThenBBI.next<CallInst>(); 3575 EXPECT_NE(SingleEndCI, nullptr); 3576 EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single"); 3577 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3578 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3579 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3580 // br ExitBB 3581 auto *ExitBBBI = ThenBBI.next<BranchInst>(); 3582 EXPECT_NE(ExitBBBI, nullptr); 3583 EXPECT_TRUE(ExitBBBI->isUnconditional()); 3584 EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB); 3585 EXPECT_FALSE(ThenBBI.hasNext()); 3586 3587 // check ExitBB 3588 BBInstIter ExitBBI(ExitBB); 3589 // call __kmpc_global_thread_num 3590 auto *ThreadNumCI = ExitBBI.next<CallInst>(); 3591 EXPECT_NE(ThreadNumCI, nullptr); 3592 EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(), 3593 "__kmpc_global_thread_num"); 3594 // load DidIt 3595 auto *DidItLI = ExitBBI.next<LoadInst>(); 3596 EXPECT_NE(DidItLI, nullptr); 3597 EXPECT_EQ(DidItLI->getPointerOperand(), DidIt); 3598 // call __kmpc_copyprivate 3599 auto *CopyPrivateCI = ExitBBI.next<CallInst>(); 3600 EXPECT_NE(CopyPrivateCI, nullptr); 3601 EXPECT_EQ(CopyPrivateCI->arg_size(), 6U); 3602 EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3))); 3603 EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar); 3604 EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4))); 3605 EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc); 3606 EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5))); 3607 DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5)); 3608 EXPECT_EQ(DidItLI->getOperand(0), DidIt); 3609 EXPECT_FALSE(ExitBBI.hasNext()); 3610 } 3611 3612 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) { 3613 OpenMPIRBuilder OMPBuilder(*M); 3614 OMPBuilder.initialize(); 3615 F->setName("func"); 3616 IRBuilder<> Builder(BB); 3617 3618 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3619 3620 Type *Float32 = Type::getFloatTy(M->getContext()); 3621 AllocaInst *XVal = Builder.CreateAlloca(Float32); 3622 XVal->setName("AtomicVar"); 3623 AllocaInst *VVal = Builder.CreateAlloca(Float32); 3624 VVal->setName("AtomicRead"); 3625 AtomicOrdering AO = AtomicOrdering::Monotonic; 3626 OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false}; 3627 OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false}; 3628 3629 Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO)); 3630 3631 IntegerType *IntCastTy = 3632 IntegerType::get(M->getContext(), Float32->getScalarSizeInBits()); 3633 3634 LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode()); 3635 EXPECT_TRUE(AtomicLoad->isAtomic()); 3636 EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal); 3637 3638 BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode()); 3639 EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy); 3640 EXPECT_EQ(CastToFlt->getDestTy(), Float32); 3641 EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad); 3642 3643 StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode()); 3644 EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt); 3645 EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal); 3646 3647 Builder.CreateRetVoid(); 3648 OMPBuilder.finalize(); 3649 EXPECT_FALSE(verifyModule(*M, &errs())); 3650 } 3651 3652 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) { 3653 OpenMPIRBuilder OMPBuilder(*M); 3654 OMPBuilder.initialize(); 3655 F->setName("func"); 3656 IRBuilder<> Builder(BB); 3657 3658 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3659 3660 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3661 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3662 XVal->setName("AtomicVar"); 3663 AllocaInst *VVal = Builder.CreateAlloca(Int32); 3664 VVal->setName("AtomicRead"); 3665 AtomicOrdering AO = AtomicOrdering::Monotonic; 3666 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3667 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 3668 3669 BasicBlock *EntryBB = BB; 3670 3671 Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO)); 3672 LoadInst *AtomicLoad = nullptr; 3673 StoreInst *StoreofAtomic = nullptr; 3674 3675 for (Instruction &Cur : *EntryBB) { 3676 if (isa<LoadInst>(Cur)) { 3677 AtomicLoad = cast<LoadInst>(&Cur); 3678 if (AtomicLoad->getPointerOperand() == XVal) 3679 continue; 3680 AtomicLoad = nullptr; 3681 } else if (isa<StoreInst>(Cur)) { 3682 StoreofAtomic = cast<StoreInst>(&Cur); 3683 if (StoreofAtomic->getPointerOperand() == VVal) 3684 continue; 3685 StoreofAtomic = nullptr; 3686 } 3687 } 3688 3689 EXPECT_NE(AtomicLoad, nullptr); 3690 EXPECT_TRUE(AtomicLoad->isAtomic()); 3691 3692 EXPECT_NE(StoreofAtomic, nullptr); 3693 EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad); 3694 3695 Builder.CreateRetVoid(); 3696 OMPBuilder.finalize(); 3697 3698 EXPECT_FALSE(verifyModule(*M, &errs())); 3699 } 3700 3701 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) { 3702 OpenMPIRBuilder OMPBuilder(*M); 3703 OMPBuilder.initialize(); 3704 F->setName("func"); 3705 IRBuilder<> Builder(BB); 3706 3707 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3708 3709 LLVMContext &Ctx = M->getContext(); 3710 Type *Float32 = Type::getFloatTy(Ctx); 3711 AllocaInst *XVal = Builder.CreateAlloca(Float32); 3712 XVal->setName("AtomicVar"); 3713 OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false}; 3714 AtomicOrdering AO = AtomicOrdering::Monotonic; 3715 Constant *ValToWrite = ConstantFP::get(Float32, 1.0); 3716 3717 Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO)); 3718 3719 IntegerType *IntCastTy = 3720 IntegerType::get(M->getContext(), Float32->getScalarSizeInBits()); 3721 3722 Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy); 3723 3724 StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode()); 3725 EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast); 3726 EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal); 3727 EXPECT_TRUE(StoreofAtomic->isAtomic()); 3728 3729 Builder.CreateRetVoid(); 3730 OMPBuilder.finalize(); 3731 EXPECT_FALSE(verifyModule(*M, &errs())); 3732 } 3733 3734 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) { 3735 OpenMPIRBuilder OMPBuilder(*M); 3736 OMPBuilder.initialize(); 3737 F->setName("func"); 3738 IRBuilder<> Builder(BB); 3739 3740 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3741 3742 LLVMContext &Ctx = M->getContext(); 3743 IntegerType *Int32 = Type::getInt32Ty(Ctx); 3744 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3745 XVal->setName("AtomicVar"); 3746 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3747 AtomicOrdering AO = AtomicOrdering::Monotonic; 3748 ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3749 3750 BasicBlock *EntryBB = BB; 3751 3752 Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO)); 3753 3754 StoreInst *StoreofAtomic = nullptr; 3755 3756 for (Instruction &Cur : *EntryBB) { 3757 if (isa<StoreInst>(Cur)) { 3758 StoreofAtomic = cast<StoreInst>(&Cur); 3759 if (StoreofAtomic->getPointerOperand() == XVal) 3760 continue; 3761 StoreofAtomic = nullptr; 3762 } 3763 } 3764 3765 EXPECT_NE(StoreofAtomic, nullptr); 3766 EXPECT_TRUE(StoreofAtomic->isAtomic()); 3767 EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite); 3768 3769 Builder.CreateRetVoid(); 3770 OMPBuilder.finalize(); 3771 EXPECT_FALSE(verifyModule(*M, &errs())); 3772 } 3773 3774 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) { 3775 OpenMPIRBuilder OMPBuilder(*M); 3776 OMPBuilder.initialize(); 3777 F->setName("func"); 3778 IRBuilder<> Builder(BB); 3779 3780 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3781 3782 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3783 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3784 XVal->setName("AtomicVar"); 3785 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 3786 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3787 AtomicOrdering AO = AtomicOrdering::Monotonic; 3788 ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3789 Value *Expr = nullptr; 3790 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub; 3791 bool IsXLHSInRHSPart = false; 3792 3793 BasicBlock *EntryBB = BB; 3794 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 3795 EntryBB->getFirstInsertionPt()); 3796 Value *Sub = nullptr; 3797 3798 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 3799 Sub = IRB.CreateSub(ConstVal, Atomic); 3800 return Sub; 3801 }; 3802 Builder.restoreIP(OMPBuilder.createAtomicUpdate( 3803 Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); 3804 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 3805 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 3806 EXPECT_NE(ContTI, nullptr); 3807 BasicBlock *EndBB = ContTI->getSuccessor(0); 3808 EXPECT_TRUE(ContTI->isConditional()); 3809 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 3810 EXPECT_NE(EndBB, nullptr); 3811 3812 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 3813 EXPECT_NE(Phi, nullptr); 3814 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 3815 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 3816 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 3817 3818 EXPECT_EQ(Sub->getNumUses(), 1U); 3819 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 3820 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 3821 3822 ExtractValueInst *ExVI1 = 3823 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 3824 EXPECT_NE(ExVI1, nullptr); 3825 AtomicCmpXchgInst *CmpExchg = 3826 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 3827 EXPECT_NE(CmpExchg, nullptr); 3828 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 3829 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 3830 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 3831 3832 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 3833 EXPECT_NE(Ld, nullptr); 3834 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 3835 3836 Builder.CreateRetVoid(); 3837 OMPBuilder.finalize(); 3838 EXPECT_FALSE(verifyModule(*M, &errs())); 3839 } 3840 3841 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) { 3842 OpenMPIRBuilder OMPBuilder(*M); 3843 OMPBuilder.initialize(); 3844 F->setName("func"); 3845 IRBuilder<> Builder(BB); 3846 3847 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3848 3849 Type *FloatTy = Type::getFloatTy(M->getContext()); 3850 AllocaInst *XVal = Builder.CreateAlloca(FloatTy); 3851 XVal->setName("AtomicVar"); 3852 Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal); 3853 OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false}; 3854 AtomicOrdering AO = AtomicOrdering::Monotonic; 3855 Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0); 3856 Value *Expr = nullptr; 3857 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub; 3858 bool IsXLHSInRHSPart = false; 3859 3860 BasicBlock *EntryBB = BB; 3861 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 3862 EntryBB->getFirstInsertionPt()); 3863 Value *Sub = nullptr; 3864 3865 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 3866 Sub = IRB.CreateFSub(ConstVal, Atomic); 3867 return Sub; 3868 }; 3869 Builder.restoreIP(OMPBuilder.createAtomicUpdate( 3870 Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); 3871 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 3872 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 3873 EXPECT_NE(ContTI, nullptr); 3874 BasicBlock *EndBB = ContTI->getSuccessor(0); 3875 EXPECT_TRUE(ContTI->isConditional()); 3876 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 3877 EXPECT_NE(EndBB, nullptr); 3878 3879 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 3880 EXPECT_NE(Phi, nullptr); 3881 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 3882 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 3883 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 3884 3885 EXPECT_EQ(Sub->getNumUses(), 1U); 3886 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 3887 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 3888 3889 ExtractValueInst *ExVI1 = 3890 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 3891 EXPECT_NE(ExVI1, nullptr); 3892 AtomicCmpXchgInst *CmpExchg = 3893 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 3894 EXPECT_NE(CmpExchg, nullptr); 3895 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 3896 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 3897 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 3898 3899 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 3900 EXPECT_NE(Ld, nullptr); 3901 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 3902 Builder.CreateRetVoid(); 3903 OMPBuilder.finalize(); 3904 EXPECT_FALSE(verifyModule(*M, &errs())); 3905 } 3906 3907 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) { 3908 OpenMPIRBuilder OMPBuilder(*M); 3909 OMPBuilder.initialize(); 3910 F->setName("func"); 3911 IRBuilder<> Builder(BB); 3912 3913 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3914 3915 Type *IntTy = Type::getInt32Ty(M->getContext()); 3916 AllocaInst *XVal = Builder.CreateAlloca(IntTy); 3917 XVal->setName("AtomicVar"); 3918 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal); 3919 OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false}; 3920 AtomicOrdering AO = AtomicOrdering::Monotonic; 3921 Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1); 3922 Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1); 3923 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax; 3924 bool IsXLHSInRHSPart = false; 3925 3926 BasicBlock *EntryBB = BB; 3927 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 3928 EntryBB->getFirstInsertionPt()); 3929 Value *Sub = nullptr; 3930 3931 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 3932 Sub = IRB.CreateSub(ConstVal, Atomic); 3933 return Sub; 3934 }; 3935 Builder.restoreIP(OMPBuilder.createAtomicUpdate( 3936 Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); 3937 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 3938 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 3939 EXPECT_NE(ContTI, nullptr); 3940 BasicBlock *EndBB = ContTI->getSuccessor(0); 3941 EXPECT_TRUE(ContTI->isConditional()); 3942 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 3943 EXPECT_NE(EndBB, nullptr); 3944 3945 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 3946 EXPECT_NE(Phi, nullptr); 3947 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 3948 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 3949 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 3950 3951 EXPECT_EQ(Sub->getNumUses(), 1U); 3952 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 3953 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 3954 3955 ExtractValueInst *ExVI1 = 3956 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 3957 EXPECT_NE(ExVI1, nullptr); 3958 AtomicCmpXchgInst *CmpExchg = 3959 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 3960 EXPECT_NE(CmpExchg, nullptr); 3961 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 3962 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 3963 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 3964 3965 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 3966 EXPECT_NE(Ld, nullptr); 3967 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 3968 3969 Builder.CreateRetVoid(); 3970 OMPBuilder.finalize(); 3971 EXPECT_FALSE(verifyModule(*M, &errs())); 3972 } 3973 3974 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) { 3975 OpenMPIRBuilder OMPBuilder(*M); 3976 OMPBuilder.initialize(); 3977 F->setName("func"); 3978 IRBuilder<> Builder(BB); 3979 3980 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3981 3982 LLVMContext &Ctx = M->getContext(); 3983 IntegerType *Int32 = Type::getInt32Ty(Ctx); 3984 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3985 XVal->setName("AtomicVar"); 3986 AllocaInst *VVal = Builder.CreateAlloca(Int32); 3987 VVal->setName("AtomicCapTar"); 3988 StoreInst *Init = 3989 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 3990 3991 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3992 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 3993 AtomicOrdering AO = AtomicOrdering::Monotonic; 3994 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3995 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add; 3996 bool IsXLHSInRHSPart = true; 3997 bool IsPostfixUpdate = true; 3998 bool UpdateExpr = true; 3999 4000 BasicBlock *EntryBB = BB; 4001 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 4002 EntryBB->getFirstInsertionPt()); 4003 4004 // integer update - not used 4005 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; }; 4006 4007 Builder.restoreIP(OMPBuilder.createAtomicCapture( 4008 Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, UpdateExpr, 4009 IsPostfixUpdate, IsXLHSInRHSPart)); 4010 EXPECT_EQ(EntryBB->getParent()->size(), 1U); 4011 AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode()); 4012 EXPECT_NE(ARWM, nullptr); 4013 EXPECT_EQ(ARWM->getPointerOperand(), XVal); 4014 EXPECT_EQ(ARWM->getOperation(), RMWOp); 4015 StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back()); 4016 EXPECT_NE(St, nullptr); 4017 EXPECT_EQ(St->getPointerOperand(), VVal); 4018 4019 Builder.CreateRetVoid(); 4020 OMPBuilder.finalize(); 4021 EXPECT_FALSE(verifyModule(*M, &errs())); 4022 } 4023 4024 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) { 4025 OpenMPIRBuilder OMPBuilder(*M); 4026 OMPBuilder.initialize(); 4027 F->setName("func"); 4028 IRBuilder<> Builder(BB); 4029 4030 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4031 4032 LLVMContext &Ctx = M->getContext(); 4033 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4034 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4035 XVal->setName("x"); 4036 StoreInst *Init = 4037 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4038 4039 OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false}; 4040 OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false}; 4041 // V and R are not used in atomic compare 4042 OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false}; 4043 OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false}; 4044 AtomicOrdering AO = AtomicOrdering::Monotonic; 4045 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4046 ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4047 OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX; 4048 OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ; 4049 4050 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4051 Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false)); 4052 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4053 Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false)); 4054 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4055 Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false)); 4056 4057 BasicBlock *EntryBB = BB; 4058 EXPECT_EQ(EntryBB->getParent()->size(), 1U); 4059 EXPECT_EQ(EntryBB->size(), 5U); 4060 4061 AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode()); 4062 EXPECT_NE(ARWM1, nullptr); 4063 EXPECT_EQ(ARWM1->getPointerOperand(), XVal); 4064 EXPECT_EQ(ARWM1->getValOperand(), Expr); 4065 EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min); 4066 4067 AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode()); 4068 EXPECT_NE(ARWM2, nullptr); 4069 EXPECT_EQ(ARWM2->getPointerOperand(), XVal); 4070 EXPECT_EQ(ARWM2->getValOperand(), Expr); 4071 EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax); 4072 4073 AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode()); 4074 EXPECT_NE(AXCHG, nullptr); 4075 EXPECT_EQ(AXCHG->getPointerOperand(), XVal); 4076 EXPECT_EQ(AXCHG->getCompareOperand(), Expr); 4077 EXPECT_EQ(AXCHG->getNewValOperand(), D); 4078 4079 Builder.CreateRetVoid(); 4080 OMPBuilder.finalize(); 4081 EXPECT_FALSE(verifyModule(*M, &errs())); 4082 } 4083 4084 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) { 4085 OpenMPIRBuilder OMPBuilder(*M); 4086 OMPBuilder.initialize(); 4087 F->setName("func"); 4088 IRBuilder<> Builder(BB); 4089 4090 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4091 4092 LLVMContext &Ctx = M->getContext(); 4093 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4094 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4095 XVal->setName("x"); 4096 AllocaInst *VVal = Builder.CreateAlloca(Int32); 4097 VVal->setName("v"); 4098 AllocaInst *RVal = Builder.CreateAlloca(Int32); 4099 RVal->setName("r"); 4100 4101 StoreInst *Init = 4102 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4103 4104 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false}; 4105 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 4106 OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false}; 4107 OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false}; 4108 OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false}; 4109 4110 AtomicOrdering AO = AtomicOrdering::Monotonic; 4111 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4112 ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4113 OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX; 4114 OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ; 4115 4116 // { cond-update-stmt v = x; } 4117 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4118 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4119 /* IsPostfixUpdate */ false, 4120 /* IsFailOnly */ false)); 4121 // { v = x; cond-update-stmt } 4122 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4123 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4124 /* IsPostfixUpdate */ true, 4125 /* IsFailOnly */ false)); 4126 // if(x == e) { x = d; } else { v = x; } 4127 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4128 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4129 /* IsPostfixUpdate */ false, 4130 /* IsFailOnly */ true)); 4131 // { r = x == e; if(r) { x = d; } } 4132 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4133 Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4134 /* IsPostfixUpdate */ false, 4135 /* IsFailOnly */ false)); 4136 // { r = x == e; if(r) { x = d; } else { v = x; } } 4137 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4138 Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4139 /* IsPostfixUpdate */ false, 4140 /* IsFailOnly */ true)); 4141 4142 // { v = x; cond-update-stmt } 4143 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4144 Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true, 4145 /* IsPostfixUpdate */ true, 4146 /* IsFailOnly */ false)); 4147 // { cond-update-stmt v = x; } 4148 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4149 Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false, 4150 /* IsPostfixUpdate */ false, 4151 /* IsFailOnly */ false)); 4152 4153 BasicBlock *EntryBB = BB; 4154 EXPECT_EQ(EntryBB->getParent()->size(), 5U); 4155 BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode()); 4156 EXPECT_NE(Cont1, nullptr); 4157 BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode()); 4158 EXPECT_NE(Exit1, nullptr); 4159 BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode()); 4160 EXPECT_NE(Cont2, nullptr); 4161 BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode()); 4162 EXPECT_NE(Exit2, nullptr); 4163 4164 AtomicCmpXchgInst *CmpXchg1 = 4165 dyn_cast<AtomicCmpXchgInst>(Init->getNextNode()); 4166 EXPECT_NE(CmpXchg1, nullptr); 4167 EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal); 4168 EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr); 4169 EXPECT_EQ(CmpXchg1->getNewValOperand(), D); 4170 ExtractValueInst *ExtVal1 = 4171 dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode()); 4172 EXPECT_NE(ExtVal1, nullptr); 4173 EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1); 4174 EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U)); 4175 ExtractValueInst *ExtVal2 = 4176 dyn_cast<ExtractValueInst>(ExtVal1->getNextNode()); 4177 EXPECT_NE(ExtVal2, nullptr); 4178 EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1); 4179 EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U)); 4180 SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode()); 4181 EXPECT_NE(Sel1, nullptr); 4182 EXPECT_EQ(Sel1->getCondition(), ExtVal2); 4183 EXPECT_EQ(Sel1->getTrueValue(), Expr); 4184 EXPECT_EQ(Sel1->getFalseValue(), ExtVal1); 4185 StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode()); 4186 EXPECT_NE(Store1, nullptr); 4187 EXPECT_EQ(Store1->getPointerOperand(), VVal); 4188 EXPECT_EQ(Store1->getValueOperand(), Sel1); 4189 4190 AtomicCmpXchgInst *CmpXchg2 = 4191 dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode()); 4192 EXPECT_NE(CmpXchg2, nullptr); 4193 EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal); 4194 EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr); 4195 EXPECT_EQ(CmpXchg2->getNewValOperand(), D); 4196 ExtractValueInst *ExtVal3 = 4197 dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode()); 4198 EXPECT_NE(ExtVal3, nullptr); 4199 EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2); 4200 EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U)); 4201 StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode()); 4202 EXPECT_NE(Store2, nullptr); 4203 EXPECT_EQ(Store2->getPointerOperand(), VVal); 4204 EXPECT_EQ(Store2->getValueOperand(), ExtVal3); 4205 4206 AtomicCmpXchgInst *CmpXchg3 = 4207 dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode()); 4208 EXPECT_NE(CmpXchg3, nullptr); 4209 EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal); 4210 EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr); 4211 EXPECT_EQ(CmpXchg3->getNewValOperand(), D); 4212 ExtractValueInst *ExtVal4 = 4213 dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode()); 4214 EXPECT_NE(ExtVal4, nullptr); 4215 EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3); 4216 EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U)); 4217 ExtractValueInst *ExtVal5 = 4218 dyn_cast<ExtractValueInst>(ExtVal4->getNextNode()); 4219 EXPECT_NE(ExtVal5, nullptr); 4220 EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3); 4221 EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U)); 4222 BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode()); 4223 EXPECT_NE(Br1, nullptr); 4224 EXPECT_EQ(Br1->isConditional(), true); 4225 EXPECT_EQ(Br1->getCondition(), ExtVal5); 4226 EXPECT_EQ(Br1->getSuccessor(0), Exit1); 4227 EXPECT_EQ(Br1->getSuccessor(1), Cont1); 4228 4229 StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front()); 4230 EXPECT_NE(Store3, nullptr); 4231 EXPECT_EQ(Store3->getPointerOperand(), VVal); 4232 EXPECT_EQ(Store3->getValueOperand(), ExtVal4); 4233 BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode()); 4234 EXPECT_NE(Br2, nullptr); 4235 EXPECT_EQ(Br2->isUnconditional(), true); 4236 EXPECT_EQ(Br2->getSuccessor(0), Exit1); 4237 4238 AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front()); 4239 EXPECT_NE(CmpXchg4, nullptr); 4240 EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal); 4241 EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr); 4242 EXPECT_EQ(CmpXchg4->getNewValOperand(), D); 4243 ExtractValueInst *ExtVal6 = 4244 dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode()); 4245 EXPECT_NE(ExtVal6, nullptr); 4246 EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4); 4247 EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U)); 4248 ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode()); 4249 EXPECT_NE(ZExt1, nullptr); 4250 EXPECT_EQ(ZExt1->getDestTy(), Int32); 4251 StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode()); 4252 EXPECT_NE(Store4, nullptr); 4253 EXPECT_EQ(Store4->getPointerOperand(), RVal); 4254 EXPECT_EQ(Store4->getValueOperand(), ZExt1); 4255 4256 AtomicCmpXchgInst *CmpXchg5 = 4257 dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode()); 4258 EXPECT_NE(CmpXchg5, nullptr); 4259 EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal); 4260 EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr); 4261 EXPECT_EQ(CmpXchg5->getNewValOperand(), D); 4262 ExtractValueInst *ExtVal7 = 4263 dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode()); 4264 EXPECT_NE(ExtVal7, nullptr); 4265 EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5); 4266 EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U)); 4267 ExtractValueInst *ExtVal8 = 4268 dyn_cast<ExtractValueInst>(ExtVal7->getNextNode()); 4269 EXPECT_NE(ExtVal8, nullptr); 4270 EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5); 4271 EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U)); 4272 BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode()); 4273 EXPECT_NE(Br3, nullptr); 4274 EXPECT_EQ(Br3->isConditional(), true); 4275 EXPECT_EQ(Br3->getCondition(), ExtVal8); 4276 EXPECT_EQ(Br3->getSuccessor(0), Exit2); 4277 EXPECT_EQ(Br3->getSuccessor(1), Cont2); 4278 4279 StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front()); 4280 EXPECT_NE(Store5, nullptr); 4281 EXPECT_EQ(Store5->getPointerOperand(), VVal); 4282 EXPECT_EQ(Store5->getValueOperand(), ExtVal7); 4283 BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode()); 4284 EXPECT_NE(Br4, nullptr); 4285 EXPECT_EQ(Br4->isUnconditional(), true); 4286 EXPECT_EQ(Br4->getSuccessor(0), Exit2); 4287 4288 ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front()); 4289 EXPECT_NE(ExtVal9, nullptr); 4290 EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5); 4291 EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U)); 4292 ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode()); 4293 EXPECT_NE(ZExt2, nullptr); 4294 EXPECT_EQ(ZExt2->getDestTy(), Int32); 4295 StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode()); 4296 EXPECT_NE(Store6, nullptr); 4297 EXPECT_EQ(Store6->getPointerOperand(), RVal); 4298 EXPECT_EQ(Store6->getValueOperand(), ZExt2); 4299 4300 AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode()); 4301 EXPECT_NE(ARWM1, nullptr); 4302 EXPECT_EQ(ARWM1->getPointerOperand(), XVal); 4303 EXPECT_EQ(ARWM1->getValOperand(), Expr); 4304 EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min); 4305 StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode()); 4306 EXPECT_NE(Store7, nullptr); 4307 EXPECT_EQ(Store7->getPointerOperand(), VVal); 4308 EXPECT_EQ(Store7->getValueOperand(), ARWM1); 4309 4310 AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode()); 4311 EXPECT_NE(ARWM2, nullptr); 4312 EXPECT_EQ(ARWM2->getPointerOperand(), XVal); 4313 EXPECT_EQ(ARWM2->getValOperand(), Expr); 4314 EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max); 4315 CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode()); 4316 EXPECT_NE(Cmp1, nullptr); 4317 EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT); 4318 EXPECT_EQ(Cmp1->getOperand(0), ARWM2); 4319 EXPECT_EQ(Cmp1->getOperand(1), Expr); 4320 SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode()); 4321 EXPECT_NE(Sel2, nullptr); 4322 EXPECT_EQ(Sel2->getCondition(), Cmp1); 4323 EXPECT_EQ(Sel2->getTrueValue(), Expr); 4324 EXPECT_EQ(Sel2->getFalseValue(), ARWM2); 4325 StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode()); 4326 EXPECT_NE(Store8, nullptr); 4327 EXPECT_EQ(Store8->getPointerOperand(), VVal); 4328 EXPECT_EQ(Store8->getValueOperand(), Sel2); 4329 4330 Builder.CreateRetVoid(); 4331 OMPBuilder.finalize(); 4332 EXPECT_FALSE(verifyModule(*M, &errs())); 4333 } 4334 4335 TEST_F(OpenMPIRBuilderTest, CreateTeams) { 4336 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4337 OpenMPIRBuilder OMPBuilder(*M); 4338 OMPBuilder.Config.IsTargetDevice = false; 4339 OMPBuilder.initialize(); 4340 F->setName("func"); 4341 IRBuilder<> Builder(BB); 4342 4343 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 4344 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 4345 Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load"); 4346 4347 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4348 Builder.restoreIP(AllocaIP); 4349 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 4350 "bodygen.alloca128"); 4351 4352 Builder.restoreIP(CodeGenIP); 4353 // Loading and storing captured pointer and values 4354 Builder.CreateStore(Val128, Local128); 4355 Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 4356 "bodygen.load32"); 4357 4358 LoadInst *PrivLoad128 = Builder.CreateLoad( 4359 Local128->getAllocatedType(), Local128, "bodygen.local.load128"); 4360 Value *Cmp = Builder.CreateICmpNE( 4361 Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); 4362 Instruction *ThenTerm, *ElseTerm; 4363 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 4364 &ThenTerm, &ElseTerm); 4365 }; 4366 4367 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4368 Builder.restoreIP(OMPBuilder.createTeams( 4369 Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, 4370 /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); 4371 4372 OMPBuilder.finalize(); 4373 Builder.CreateRetVoid(); 4374 4375 EXPECT_FALSE(verifyModule(*M, &errs())); 4376 4377 CallInst *TeamsForkCall = dyn_cast<CallInst>( 4378 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams) 4379 ->user_back()); 4380 4381 // Verify the Ident argument 4382 GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0)); 4383 ASSERT_NE(Ident, nullptr); 4384 EXPECT_TRUE(Ident->hasInitializer()); 4385 Constant *Initializer = Ident->getInitializer(); 4386 GlobalVariable *SrcStrGlob = 4387 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 4388 ASSERT_NE(SrcStrGlob, nullptr); 4389 ConstantDataArray *SrcSrc = 4390 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 4391 ASSERT_NE(SrcSrc, nullptr); 4392 4393 // Verify the outlined function signature. 4394 Function *OutlinedFn = 4395 dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); 4396 ASSERT_NE(OutlinedFn, nullptr); 4397 EXPECT_FALSE(OutlinedFn->isDeclaration()); 4398 EXPECT_TRUE(OutlinedFn->arg_size() >= 3); 4399 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid 4400 EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid 4401 EXPECT_EQ(OutlinedFn->getArg(2)->getType(), 4402 Builder.getPtrTy()); // captured args 4403 4404 // Check for TruncInst and ICmpInst in the outlined function. 4405 EXPECT_TRUE(any_of(instructions(OutlinedFn), 4406 [](Instruction &inst) { return isa<TruncInst>(&inst); })); 4407 EXPECT_TRUE(any_of(instructions(OutlinedFn), 4408 [](Instruction &inst) { return isa<ICmpInst>(&inst); })); 4409 } 4410 4411 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) { 4412 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4413 OpenMPIRBuilder OMPBuilder(*M); 4414 OMPBuilder.Config.IsTargetDevice = false; 4415 OMPBuilder.initialize(); 4416 F->setName("func"); 4417 IRBuilder<> &Builder = OMPBuilder.Builder; 4418 Builder.SetInsertPoint(BB); 4419 4420 Function *FakeFunction = 4421 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4422 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4423 4424 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4425 Builder.restoreIP(CodeGenIP); 4426 Builder.CreateCall(FakeFunction, {}); 4427 }; 4428 4429 // `F` has an argument - an integer, so we use that as the thread limit. 4430 Builder.restoreIP(OMPBuilder.createTeams(/*=*/Builder, BodyGenCB, 4431 /*NumTeamsLower=*/nullptr, 4432 /*NumTeamsUpper=*/nullptr, 4433 /*ThreadLimit=*/F->arg_begin(), 4434 /*IfExpr=*/nullptr)); 4435 4436 Builder.CreateRetVoid(); 4437 OMPBuilder.finalize(); 4438 4439 ASSERT_FALSE(verifyModule(*M)); 4440 4441 CallInst *PushNumTeamsCallInst = 4442 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4443 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4444 4445 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0)); 4446 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0)); 4447 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin()); 4448 4449 // Verifying that the next instruction to execute is kmpc_fork_teams 4450 BranchInst *BrInst = 4451 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4452 ASSERT_NE(BrInst, nullptr); 4453 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4454 Instruction *NextInstruction = 4455 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4456 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4457 ASSERT_NE(ForkTeamsCI, nullptr); 4458 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4459 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4460 } 4461 4462 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) { 4463 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4464 OpenMPIRBuilder OMPBuilder(*M); 4465 OMPBuilder.Config.IsTargetDevice = false; 4466 OMPBuilder.initialize(); 4467 F->setName("func"); 4468 IRBuilder<> &Builder = OMPBuilder.Builder; 4469 Builder.SetInsertPoint(BB); 4470 4471 Function *FakeFunction = 4472 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4473 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4474 4475 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4476 Builder.restoreIP(CodeGenIP); 4477 Builder.CreateCall(FakeFunction, {}); 4478 }; 4479 4480 // `F` already has an integer argument, so we use that as upper bound to 4481 // `num_teams` 4482 Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, 4483 /*NumTeamsLower=*/nullptr, 4484 /*NumTeamsUpper=*/F->arg_begin(), 4485 /*ThreadLimit=*/nullptr, 4486 /*IfExpr=*/nullptr)); 4487 4488 Builder.CreateRetVoid(); 4489 OMPBuilder.finalize(); 4490 4491 ASSERT_FALSE(verifyModule(*M)); 4492 4493 CallInst *PushNumTeamsCallInst = 4494 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4495 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4496 4497 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin()); 4498 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin()); 4499 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0)); 4500 4501 // Verifying that the next instruction to execute is kmpc_fork_teams 4502 BranchInst *BrInst = 4503 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4504 ASSERT_NE(BrInst, nullptr); 4505 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4506 Instruction *NextInstruction = 4507 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4508 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4509 ASSERT_NE(ForkTeamsCI, nullptr); 4510 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4511 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4512 } 4513 4514 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) { 4515 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4516 OpenMPIRBuilder OMPBuilder(*M); 4517 OMPBuilder.Config.IsTargetDevice = false; 4518 OMPBuilder.initialize(); 4519 F->setName("func"); 4520 IRBuilder<> &Builder = OMPBuilder.Builder; 4521 Builder.SetInsertPoint(BB); 4522 4523 Function *FakeFunction = 4524 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4525 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4526 4527 Value *NumTeamsLower = 4528 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower"); 4529 Value *NumTeamsUpper = 4530 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); 4531 4532 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4533 Builder.restoreIP(CodeGenIP); 4534 Builder.CreateCall(FakeFunction, {}); 4535 }; 4536 4537 // `F` already has an integer argument, so we use that as upper bound to 4538 // `num_teams` 4539 Builder.restoreIP( 4540 OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, 4541 /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); 4542 4543 Builder.CreateRetVoid(); 4544 OMPBuilder.finalize(); 4545 4546 ASSERT_FALSE(verifyModule(*M)); 4547 4548 CallInst *PushNumTeamsCallInst = 4549 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4550 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4551 4552 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower); 4553 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper); 4554 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0)); 4555 4556 // Verifying that the next instruction to execute is kmpc_fork_teams 4557 BranchInst *BrInst = 4558 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4559 ASSERT_NE(BrInst, nullptr); 4560 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4561 Instruction *NextInstruction = 4562 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4563 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4564 ASSERT_NE(ForkTeamsCI, nullptr); 4565 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4566 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4567 } 4568 4569 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { 4570 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4571 OpenMPIRBuilder OMPBuilder(*M); 4572 OMPBuilder.Config.IsTargetDevice = false; 4573 OMPBuilder.initialize(); 4574 F->setName("func"); 4575 IRBuilder<> &Builder = OMPBuilder.Builder; 4576 Builder.SetInsertPoint(BB); 4577 4578 BasicBlock *CodegenBB = splitBB(Builder, true); 4579 Builder.SetInsertPoint(CodegenBB); 4580 4581 // Generate values for `num_teams` and `thread_limit` using the first argument 4582 // of the testing function. 4583 Value *NumTeamsLower = 4584 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower"); 4585 Value *NumTeamsUpper = 4586 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); 4587 Value *ThreadLimit = 4588 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit"); 4589 4590 Function *FakeFunction = 4591 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4592 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4593 4594 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4595 Builder.restoreIP(CodeGenIP); 4596 Builder.CreateCall(FakeFunction, {}); 4597 }; 4598 4599 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4600 Builder.restoreIP(OMPBuilder.createTeams( 4601 Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr)); 4602 4603 Builder.CreateRetVoid(); 4604 OMPBuilder.finalize(); 4605 4606 ASSERT_FALSE(verifyModule(*M)); 4607 4608 CallInst *PushNumTeamsCallInst = 4609 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4610 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4611 4612 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower); 4613 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper); 4614 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit); 4615 4616 // Verifying that the next instruction to execute is kmpc_fork_teams 4617 BranchInst *BrInst = 4618 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4619 ASSERT_NE(BrInst, nullptr); 4620 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4621 Instruction *NextInstruction = 4622 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4623 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4624 ASSERT_NE(ForkTeamsCI, nullptr); 4625 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4626 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4627 } 4628 4629 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) { 4630 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4631 OpenMPIRBuilder OMPBuilder(*M); 4632 OMPBuilder.Config.IsTargetDevice = false; 4633 OMPBuilder.initialize(); 4634 F->setName("func"); 4635 IRBuilder<> &Builder = OMPBuilder.Builder; 4636 Builder.SetInsertPoint(BB); 4637 4638 Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(), 4639 Builder.CreateAlloca(Builder.getInt1Ty())); 4640 4641 Function *FakeFunction = 4642 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4643 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4644 4645 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4646 Builder.restoreIP(CodeGenIP); 4647 Builder.CreateCall(FakeFunction, {}); 4648 }; 4649 4650 // `F` already has an integer argument, so we use that as upper bound to 4651 // `num_teams` 4652 Builder.restoreIP(OMPBuilder.createTeams( 4653 Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, 4654 /*ThreadLimit=*/nullptr, IfExpr)); 4655 4656 Builder.CreateRetVoid(); 4657 OMPBuilder.finalize(); 4658 4659 ASSERT_FALSE(verifyModule(*M)); 4660 4661 CallInst *PushNumTeamsCallInst = 4662 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4663 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4664 Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2); 4665 Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3); 4666 Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4); 4667 4668 // Check the lower_bound 4669 ASSERT_NE(NumTeamsLower, nullptr); 4670 SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower); 4671 ASSERT_NE(NumTeamsLowerSelectInst, nullptr); 4672 EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr); 4673 EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0)); 4674 EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); 4675 4676 // Check the upper_bound 4677 ASSERT_NE(NumTeamsUpper, nullptr); 4678 SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper); 4679 ASSERT_NE(NumTeamsUpperSelectInst, nullptr); 4680 EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr); 4681 EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0)); 4682 EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); 4683 4684 // Check thread_limit 4685 EXPECT_EQ(ThreadLimit, Builder.getInt32(0)); 4686 } 4687 4688 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) { 4689 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4690 OpenMPIRBuilder OMPBuilder(*M); 4691 OMPBuilder.Config.IsTargetDevice = false; 4692 OMPBuilder.initialize(); 4693 F->setName("func"); 4694 IRBuilder<> &Builder = OMPBuilder.Builder; 4695 Builder.SetInsertPoint(BB); 4696 4697 Value *IfExpr = Builder.CreateLoad( 4698 Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty())); 4699 Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5)); 4700 Value *NumTeamsUpper = 4701 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10)); 4702 Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20)); 4703 4704 Function *FakeFunction = 4705 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4706 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4707 4708 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4709 Builder.restoreIP(CodeGenIP); 4710 Builder.CreateCall(FakeFunction, {}); 4711 }; 4712 4713 // `F` already has an integer argument, so we use that as upper bound to 4714 // `num_teams` 4715 Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, 4716 NumTeamsUpper, ThreadLimit, IfExpr)); 4717 4718 Builder.CreateRetVoid(); 4719 OMPBuilder.finalize(); 4720 4721 ASSERT_FALSE(verifyModule(*M)); 4722 4723 CallInst *PushNumTeamsCallInst = 4724 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4725 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4726 Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2); 4727 Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3); 4728 Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4); 4729 4730 // Get the boolean conversion of if expression 4731 ASSERT_EQ(IfExpr->getNumUses(), 1U); 4732 User *IfExprInst = IfExpr->user_back(); 4733 ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst); 4734 ASSERT_NE(IfExprCmpInst, nullptr); 4735 EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE); 4736 EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr); 4737 EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0)); 4738 4739 // Check the lower_bound 4740 ASSERT_NE(NumTeamsLowerArg, nullptr); 4741 SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg); 4742 ASSERT_NE(NumTeamsLowerSelectInst, nullptr); 4743 EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst); 4744 EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower); 4745 EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); 4746 4747 // Check the upper_bound 4748 ASSERT_NE(NumTeamsUpperArg, nullptr); 4749 SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg); 4750 ASSERT_NE(NumTeamsUpperSelectInst, nullptr); 4751 EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst); 4752 EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper); 4753 EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); 4754 4755 // Check thread_limit 4756 EXPECT_EQ(ThreadLimitArg, ThreadLimit); 4757 } 4758 4759 /// Returns the single instruction of InstTy type in BB that uses the value V. 4760 /// If there is more than one such instruction, returns null. 4761 template <typename InstTy> 4762 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) { 4763 InstTy *Result = nullptr; 4764 for (User *U : V->users()) { 4765 auto *Inst = dyn_cast<InstTy>(U); 4766 if (!Inst || Inst->getParent() != BB) 4767 continue; 4768 if (Result) { 4769 if (auto *SI = dyn_cast<StoreInst>(Inst)) { 4770 if (V == SI->getValueOperand()) 4771 continue; 4772 } else { 4773 return nullptr; 4774 } 4775 } 4776 Result = Inst; 4777 } 4778 return Result; 4779 } 4780 4781 /// Returns true if BB contains a simple binary reduction that loads a value 4782 /// from Accum, performs some binary operation with it, and stores it back to 4783 /// Accum. 4784 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB, 4785 Instruction::BinaryOps *OpCode = nullptr) { 4786 StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB); 4787 if (!Store) 4788 return false; 4789 auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0)); 4790 if (!Stored) 4791 return false; 4792 if (OpCode && *OpCode != Stored->getOpcode()) 4793 return false; 4794 auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0)); 4795 return Load && Load->getOperand(0) == Accum; 4796 } 4797 4798 /// Returns true if BB contains a binary reduction that reduces V using a binary 4799 /// operator into an accumulator that is a function argument. 4800 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) { 4801 auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB); 4802 if (!ReductionOp) 4803 return false; 4804 4805 auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0)); 4806 if (!GlobalLoad) 4807 return false; 4808 4809 auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB); 4810 if (!Store) 4811 return false; 4812 4813 return Store->getPointerOperand() == GlobalLoad->getPointerOperand() && 4814 isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand())); 4815 } 4816 4817 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and 4818 /// [0, 1], respectively, and assigns results of these instructions to Zero and 4819 /// One. Returns true on success, false on failure or if such instructions are 4820 /// not unique among the users of Ptr. 4821 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) { 4822 Zero = nullptr; 4823 One = nullptr; 4824 for (User *U : Ptr->users()) { 4825 if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { 4826 if (GEP->getNumIndices() != 2) 4827 continue; 4828 auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 4829 auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2)); 4830 EXPECT_NE(FirstIdx, nullptr); 4831 EXPECT_NE(SecondIdx, nullptr); 4832 4833 EXPECT_TRUE(FirstIdx->isZero()); 4834 if (SecondIdx->isZero()) { 4835 if (Zero) 4836 return false; 4837 Zero = GEP; 4838 } else if (SecondIdx->isOne()) { 4839 if (One) 4840 return false; 4841 One = GEP; 4842 } else { 4843 return false; 4844 } 4845 } 4846 } 4847 return Zero != nullptr && One != nullptr; 4848 } 4849 4850 static OpenMPIRBuilder::InsertPointTy 4851 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS, 4852 Value *&Result) { 4853 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 4854 Result = Builder.CreateFAdd(LHS, RHS, "red.add"); 4855 return Builder.saveIP(); 4856 } 4857 4858 static OpenMPIRBuilder::InsertPointTy 4859 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, 4860 Value *RHS) { 4861 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 4862 Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial"); 4863 Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt, 4864 AtomicOrdering::Monotonic); 4865 return Builder.saveIP(); 4866 } 4867 4868 static OpenMPIRBuilder::InsertPointTy 4869 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS, 4870 Value *&Result) { 4871 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 4872 Result = Builder.CreateXor(LHS, RHS, "red.xor"); 4873 return Builder.saveIP(); 4874 } 4875 4876 static OpenMPIRBuilder::InsertPointTy 4877 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, 4878 Value *RHS) { 4879 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 4880 Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial"); 4881 Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt, 4882 AtomicOrdering::Monotonic); 4883 return Builder.saveIP(); 4884 } 4885 4886 TEST_F(OpenMPIRBuilderTest, CreateReductions) { 4887 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4888 OpenMPIRBuilder OMPBuilder(*M); 4889 OMPBuilder.Config.IsTargetDevice = false; 4890 OMPBuilder.initialize(); 4891 F->setName("func"); 4892 IRBuilder<> Builder(BB); 4893 4894 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 4895 Builder.CreateBr(EnterBB); 4896 Builder.SetInsertPoint(EnterBB); 4897 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4898 4899 // Create variables to be reduced. 4900 InsertPointTy OuterAllocaIP(&F->getEntryBlock(), 4901 F->getEntryBlock().getFirstInsertionPt()); 4902 Type *SumType = Builder.getFloatTy(); 4903 Type *XorType = Builder.getInt32Ty(); 4904 Value *SumReduced; 4905 Value *XorReduced; 4906 { 4907 IRBuilderBase::InsertPointGuard Guard(Builder); 4908 Builder.restoreIP(OuterAllocaIP); 4909 SumReduced = Builder.CreateAlloca(SumType); 4910 XorReduced = Builder.CreateAlloca(XorType); 4911 } 4912 4913 // Store initial values of reductions into global variables. 4914 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced); 4915 Builder.CreateStore(Builder.getInt32(1), XorReduced); 4916 4917 // The loop body computes two reductions: 4918 // sum of (float) thread-id; 4919 // xor of thread-id; 4920 // and store the result in global variables. 4921 InsertPointTy BodyIP, BodyAllocaIP; 4922 auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) { 4923 IRBuilderBase::InsertPointGuard Guard(Builder); 4924 Builder.restoreIP(CodeGenIP); 4925 4926 uint32_t StrSize; 4927 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 4928 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 4929 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 4930 Value *SumLocal = 4931 Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); 4932 Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial"); 4933 Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); 4934 Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum"); 4935 Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); 4936 Builder.CreateStore(Sum, SumReduced); 4937 Builder.CreateStore(Xor, XorReduced); 4938 4939 BodyIP = Builder.saveIP(); 4940 BodyAllocaIP = InnerAllocaIP; 4941 }; 4942 4943 // Privatization for reduction creates local copies of reduction variables and 4944 // initializes them to reduction-neutral values. 4945 Value *SumPrivatized; 4946 Value *XorPrivatized; 4947 auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, 4948 Value &Original, Value &Inner, Value *&ReplVal) { 4949 IRBuilderBase::InsertPointGuard Guard(Builder); 4950 Builder.restoreIP(InnerAllocaIP); 4951 if (&Original == SumReduced) { 4952 SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy()); 4953 ReplVal = SumPrivatized; 4954 } else if (&Original == XorReduced) { 4955 XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty()); 4956 ReplVal = XorPrivatized; 4957 } else { 4958 ReplVal = &Inner; 4959 return CodeGenIP; 4960 } 4961 4962 Builder.restoreIP(CodeGenIP); 4963 if (&Original == SumReduced) 4964 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), 4965 SumPrivatized); 4966 else if (&Original == XorReduced) 4967 Builder.CreateStore(Builder.getInt32(0), XorPrivatized); 4968 4969 return Builder.saveIP(); 4970 }; 4971 4972 // Do nothing in finalization. 4973 auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; 4974 4975 InsertPointTy AfterIP = 4976 OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, 4977 /* IfCondition */ nullptr, 4978 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 4979 /* IsCancellable */ false); 4980 Builder.restoreIP(AfterIP); 4981 4982 OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { 4983 {SumType, SumReduced, SumPrivatized, sumReduction, sumAtomicReduction}, 4984 {XorType, XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}}; 4985 4986 OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos); 4987 4988 Builder.restoreIP(AfterIP); 4989 Builder.CreateRetVoid(); 4990 4991 OMPBuilder.finalize(F); 4992 4993 // The IR must be valid. 4994 EXPECT_FALSE(verifyModule(*M)); 4995 4996 // Outlining must have happened. 4997 SmallVector<CallInst *> ForkCalls; 4998 findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder, 4999 ForkCalls); 5000 ASSERT_EQ(ForkCalls.size(), 1u); 5001 Value *CalleeVal = ForkCalls[0]->getOperand(2); 5002 Function *Outlined = dyn_cast<Function>(CalleeVal); 5003 EXPECT_NE(Outlined, nullptr); 5004 5005 // Check that the lock variable was created with the expected name. 5006 GlobalVariable *LockVar = 5007 M->getGlobalVariable(".gomp_critical_user_.reduction.var"); 5008 EXPECT_NE(LockVar, nullptr); 5009 5010 // Find the allocation of a local array that will be used to call the runtime 5011 // reduciton function. 5012 BasicBlock &AllocBlock = Outlined->getEntryBlock(); 5013 Value *LocalArray = nullptr; 5014 for (Instruction &I : AllocBlock) { 5015 if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) { 5016 if (!Alloc->getAllocatedType()->isArrayTy() || 5017 !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy()) 5018 continue; 5019 LocalArray = Alloc; 5020 break; 5021 } 5022 } 5023 ASSERT_NE(LocalArray, nullptr); 5024 5025 // Find the call to the runtime reduction function. 5026 BasicBlock *BB = AllocBlock.getUniqueSuccessor(); 5027 Value *LocalArrayPtr = nullptr; 5028 Value *ReductionFnVal = nullptr; 5029 Value *SwitchArg = nullptr; 5030 for (Instruction &I : *BB) { 5031 if (CallInst *Call = dyn_cast<CallInst>(&I)) { 5032 if (Call->getCalledFunction() != 5033 OMPBuilder.getOrCreateRuntimeFunctionPtr( 5034 RuntimeFunction::OMPRTL___kmpc_reduce)) 5035 continue; 5036 LocalArrayPtr = Call->getOperand(4); 5037 ReductionFnVal = Call->getOperand(5); 5038 SwitchArg = Call; 5039 break; 5040 } 5041 } 5042 5043 // Check that the local array is passed to the function. 5044 ASSERT_NE(LocalArrayPtr, nullptr); 5045 EXPECT_EQ(LocalArrayPtr, LocalArray); 5046 5047 // Find the GEP instructions preceding stores to the local array. 5048 Value *FirstArrayElemPtr = nullptr; 5049 Value *SecondArrayElemPtr = nullptr; 5050 EXPECT_EQ(LocalArray->getNumUses(), 3u); 5051 ASSERT_TRUE( 5052 findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr)); 5053 5054 // Check that the values stored into the local array are privatized reduction 5055 // variables. 5056 auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>( 5057 findStoredValue<GetElementPtrInst>(FirstArrayElemPtr)); 5058 auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>( 5059 findStoredValue<GetElementPtrInst>(SecondArrayElemPtr)); 5060 ASSERT_NE(FirstPrivatized, nullptr); 5061 ASSERT_NE(SecondPrivatized, nullptr); 5062 ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr)); 5063 EXPECT_TRUE(isSimpleBinaryReduction( 5064 FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent())); 5065 EXPECT_TRUE(isSimpleBinaryReduction( 5066 SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent())); 5067 5068 // Check that the result of the runtime reduction call is used for further 5069 // dispatch. 5070 ASSERT_EQ(SwitchArg->getNumUses(), 1u); 5071 SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin()); 5072 ASSERT_NE(Switch, nullptr); 5073 EXPECT_EQ(Switch->getNumSuccessors(), 3u); 5074 BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor(); 5075 BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor(); 5076 5077 // Non-atomic block contains reductions to the global reduction variable, 5078 // which is passed into the outlined function as an argument. 5079 Value *FirstLoad = 5080 findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB); 5081 Value *SecondLoad = 5082 findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB); 5083 EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB)); 5084 EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB)); 5085 5086 // Atomic block also constains reductions to the global reduction variable. 5087 FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB); 5088 SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB); 5089 auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB); 5090 auto *SecondAtomic = 5091 findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB); 5092 ASSERT_NE(FirstAtomic, nullptr); 5093 Value *AtomicStorePointer = FirstAtomic->getPointerOperand(); 5094 EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer))); 5095 ASSERT_NE(SecondAtomic, nullptr); 5096 AtomicStorePointer = SecondAtomic->getPointerOperand(); 5097 EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer))); 5098 5099 // Check that the separate reduction function also performs (non-atomic) 5100 // reductions after extracting reduction variables from its arguments. 5101 Function *ReductionFn = cast<Function>(ReductionFnVal); 5102 BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock(); 5103 Value *FirstLHSPtr; 5104 Value *SecondLHSPtr; 5105 ASSERT_TRUE( 5106 findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr)); 5107 Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB); 5108 ASSERT_NE(Opaque, nullptr); 5109 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB)); 5110 Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB); 5111 ASSERT_NE(Opaque, nullptr); 5112 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB)); 5113 5114 Value *FirstRHS; 5115 Value *SecondRHS; 5116 EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS)); 5117 } 5118 5119 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { 5120 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5121 OpenMPIRBuilder OMPBuilder(*M); 5122 OMPBuilder.Config.IsTargetDevice = false; 5123 OMPBuilder.initialize(); 5124 F->setName("func"); 5125 IRBuilder<> Builder(BB); 5126 5127 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 5128 Builder.CreateBr(EnterBB); 5129 Builder.SetInsertPoint(EnterBB); 5130 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5131 5132 // Create variables to be reduced. 5133 InsertPointTy OuterAllocaIP(&F->getEntryBlock(), 5134 F->getEntryBlock().getFirstInsertionPt()); 5135 Type *SumType = Builder.getFloatTy(); 5136 Type *XorType = Builder.getInt32Ty(); 5137 Value *SumReduced; 5138 Value *XorReduced; 5139 { 5140 IRBuilderBase::InsertPointGuard Guard(Builder); 5141 Builder.restoreIP(OuterAllocaIP); 5142 SumReduced = Builder.CreateAlloca(SumType); 5143 XorReduced = Builder.CreateAlloca(XorType); 5144 } 5145 5146 // Store initial values of reductions into global variables. 5147 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced); 5148 Builder.CreateStore(Builder.getInt32(1), XorReduced); 5149 5150 InsertPointTy FirstBodyIP, FirstBodyAllocaIP; 5151 auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP, 5152 InsertPointTy CodeGenIP) { 5153 IRBuilderBase::InsertPointGuard Guard(Builder); 5154 Builder.restoreIP(CodeGenIP); 5155 5156 uint32_t StrSize; 5157 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5158 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5159 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5160 Value *SumLocal = 5161 Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); 5162 Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial"); 5163 Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum"); 5164 Builder.CreateStore(Sum, SumReduced); 5165 5166 FirstBodyIP = Builder.saveIP(); 5167 FirstBodyAllocaIP = InnerAllocaIP; 5168 }; 5169 5170 InsertPointTy SecondBodyIP, SecondBodyAllocaIP; 5171 auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP, 5172 InsertPointTy CodeGenIP) { 5173 IRBuilderBase::InsertPointGuard Guard(Builder); 5174 Builder.restoreIP(CodeGenIP); 5175 5176 uint32_t StrSize; 5177 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5178 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5179 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5180 Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); 5181 Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); 5182 Builder.CreateStore(Xor, XorReduced); 5183 5184 SecondBodyIP = Builder.saveIP(); 5185 SecondBodyAllocaIP = InnerAllocaIP; 5186 }; 5187 5188 // Privatization for reduction creates local copies of reduction variables and 5189 // initializes them to reduction-neutral values. The same privatization 5190 // callback is used for both loops, with dispatch based on the value being 5191 // privatized. 5192 Value *SumPrivatized; 5193 Value *XorPrivatized; 5194 auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, 5195 Value &Original, Value &Inner, Value *&ReplVal) { 5196 IRBuilderBase::InsertPointGuard Guard(Builder); 5197 Builder.restoreIP(InnerAllocaIP); 5198 if (&Original == SumReduced) { 5199 SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy()); 5200 ReplVal = SumPrivatized; 5201 } else if (&Original == XorReduced) { 5202 XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty()); 5203 ReplVal = XorPrivatized; 5204 } else { 5205 ReplVal = &Inner; 5206 return CodeGenIP; 5207 } 5208 5209 Builder.restoreIP(CodeGenIP); 5210 if (&Original == SumReduced) 5211 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), 5212 SumPrivatized); 5213 else if (&Original == XorReduced) 5214 Builder.CreateStore(Builder.getInt32(0), XorPrivatized); 5215 5216 return Builder.saveIP(); 5217 }; 5218 5219 // Do nothing in finalization. 5220 auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; 5221 5222 Builder.restoreIP( 5223 OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, 5224 FiniCB, /* IfCondition */ nullptr, 5225 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5226 /* IsCancellable */ false)); 5227 InsertPointTy AfterIP = OMPBuilder.createParallel( 5228 {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB, 5229 /* IfCondition */ nullptr, 5230 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5231 /* IsCancellable */ false); 5232 5233 OMPBuilder.createReductions( 5234 FirstBodyIP, FirstBodyAllocaIP, 5235 {{SumType, SumReduced, SumPrivatized, sumReduction, sumAtomicReduction}}); 5236 OMPBuilder.createReductions( 5237 SecondBodyIP, SecondBodyAllocaIP, 5238 {{XorType, XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}}); 5239 5240 Builder.restoreIP(AfterIP); 5241 Builder.CreateRetVoid(); 5242 5243 OMPBuilder.finalize(F); 5244 5245 // The IR must be valid. 5246 EXPECT_FALSE(verifyModule(*M)); 5247 5248 // Two different outlined functions must have been created. 5249 SmallVector<CallInst *> ForkCalls; 5250 findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder, 5251 ForkCalls); 5252 ASSERT_EQ(ForkCalls.size(), 2u); 5253 Value *CalleeVal = ForkCalls[0]->getOperand(2); 5254 Function *FirstCallee = cast<Function>(CalleeVal); 5255 CalleeVal = ForkCalls[1]->getOperand(2); 5256 Function *SecondCallee = cast<Function>(CalleeVal); 5257 EXPECT_NE(FirstCallee, SecondCallee); 5258 5259 // Two different reduction functions must have been created. 5260 SmallVector<CallInst *> ReduceCalls; 5261 findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder, 5262 ReduceCalls); 5263 ASSERT_EQ(ReduceCalls.size(), 1u); 5264 auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5)); 5265 ReduceCalls.clear(); 5266 findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, 5267 OMPBuilder, ReduceCalls); 5268 auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5)); 5269 EXPECT_NE(AddReduction, XorReduction); 5270 5271 // Each reduction function does its own kind of reduction. 5272 BasicBlock *FnReductionBB = &AddReduction->getEntryBlock(); 5273 Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>( 5274 AddReduction->getArg(0), FnReductionBB); 5275 ASSERT_NE(FirstLHSPtr, nullptr); 5276 Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB); 5277 ASSERT_NE(Opaque, nullptr); 5278 Instruction::BinaryOps Opcode = Instruction::FAdd; 5279 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode)); 5280 5281 FnReductionBB = &XorReduction->getEntryBlock(); 5282 Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>( 5283 XorReduction->getArg(0), FnReductionBB); 5284 ASSERT_NE(FirstLHSPtr, nullptr); 5285 Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB); 5286 ASSERT_NE(Opaque, nullptr); 5287 Opcode = Instruction::Xor; 5288 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode)); 5289 } 5290 5291 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { 5292 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5293 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5294 OpenMPIRBuilder OMPBuilder(*M); 5295 OMPBuilder.initialize(); 5296 F->setName("func"); 5297 IRBuilder<> Builder(BB); 5298 5299 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F); 5300 Builder.CreateBr(EnterBB); 5301 Builder.SetInsertPoint(EnterBB); 5302 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5303 5304 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5305 llvm::SmallVector<BasicBlock *, 4> CaseBBs; 5306 5307 auto FiniCB = [&](InsertPointTy IP) {}; 5308 auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 5309 SectionCBVector.push_back(SectionCB); 5310 5311 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5312 llvm::Value &, llvm::Value &Val, 5313 llvm::Value *&ReplVal) { return CodeGenIP; }; 5314 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5315 F->getEntryBlock().getFirstInsertionPt()); 5316 Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5317 PrivCB, FiniCB, false, false)); 5318 Builder.CreateRetVoid(); // Required at the end of the function 5319 EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); 5320 EXPECT_FALSE(verifyModule(*M, &errs())); 5321 } 5322 5323 TEST_F(OpenMPIRBuilderTest, CreateSections) { 5324 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5325 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5326 OpenMPIRBuilder OMPBuilder(*M); 5327 OMPBuilder.initialize(); 5328 F->setName("func"); 5329 IRBuilder<> Builder(BB); 5330 5331 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5332 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5333 llvm::SmallVector<BasicBlock *, 4> CaseBBs; 5334 5335 BasicBlock *SwitchBB = nullptr; 5336 AllocaInst *PrivAI = nullptr; 5337 SwitchInst *Switch = nullptr; 5338 5339 unsigned NumBodiesGenerated = 0; 5340 unsigned NumFiniCBCalls = 0; 5341 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 5342 5343 auto FiniCB = [&](InsertPointTy IP) { 5344 ++NumFiniCBCalls; 5345 BasicBlock *IPBB = IP.getBlock(); 5346 EXPECT_NE(IPBB->end(), IP.getPoint()); 5347 }; 5348 5349 auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 5350 ++NumBodiesGenerated; 5351 CaseBBs.push_back(CodeGenIP.getBlock()); 5352 SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor(); 5353 Builder.restoreIP(CodeGenIP); 5354 Builder.CreateStore(F->arg_begin(), PrivAI); 5355 Value *PrivLoad = 5356 Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca"); 5357 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 5358 }; 5359 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5360 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 5361 // TODO: Privatization not implemented yet 5362 return CodeGenIP; 5363 }; 5364 5365 SectionCBVector.push_back(SectionCB); 5366 SectionCBVector.push_back(SectionCB); 5367 5368 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5369 F->getEntryBlock().getFirstInsertionPt()); 5370 Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5371 PrivCB, FiniCB, false, false)); 5372 Builder.CreateRetVoid(); // Required at the end of the function 5373 5374 // Switch BB's predecessor is loop condition BB, whose successor at index 1 is 5375 // loop's exit BB 5376 BasicBlock *ForExitBB = 5377 SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1); 5378 EXPECT_NE(ForExitBB, nullptr); 5379 5380 EXPECT_NE(PrivAI, nullptr); 5381 Function *OutlinedFn = PrivAI->getFunction(); 5382 EXPECT_EQ(F, OutlinedFn); 5383 EXPECT_FALSE(verifyModule(*M, &errs())); 5384 EXPECT_EQ(OutlinedFn->arg_size(), 1U); 5385 5386 BasicBlock *LoopPreheaderBB = 5387 OutlinedFn->getEntryBlock().getSingleSuccessor(); 5388 // loop variables are 5 - lower bound, upper bound, stride, islastiter, and 5389 // iterator/counter 5390 bool FoundForInit = false; 5391 for (Instruction &Inst : *LoopPreheaderBB) { 5392 if (isa<CallInst>(Inst)) { 5393 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5394 "__kmpc_for_static_init_4u") { 5395 FoundForInit = true; 5396 } 5397 } 5398 } 5399 EXPECT_EQ(FoundForInit, true); 5400 5401 bool FoundForExit = false; 5402 bool FoundBarrier = false; 5403 for (Instruction &Inst : *ForExitBB) { 5404 if (isa<CallInst>(Inst)) { 5405 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5406 "__kmpc_for_static_fini") { 5407 FoundForExit = true; 5408 } 5409 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5410 "__kmpc_barrier") { 5411 FoundBarrier = true; 5412 } 5413 if (FoundForExit && FoundBarrier) 5414 break; 5415 } 5416 } 5417 EXPECT_EQ(FoundForExit, true); 5418 EXPECT_EQ(FoundBarrier, true); 5419 5420 EXPECT_NE(SwitchBB, nullptr); 5421 EXPECT_NE(SwitchBB->getTerminator(), nullptr); 5422 EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true); 5423 Switch = cast<SwitchInst>(SwitchBB->getTerminator()); 5424 EXPECT_EQ(Switch->getNumCases(), 2U); 5425 5426 EXPECT_EQ(CaseBBs.size(), 2U); 5427 for (auto *&CaseBB : CaseBBs) { 5428 EXPECT_EQ(CaseBB->getParent(), OutlinedFn); 5429 } 5430 5431 ASSERT_EQ(NumBodiesGenerated, 2U); 5432 ASSERT_EQ(NumFiniCBCalls, 1U); 5433 EXPECT_FALSE(verifyModule(*M, &errs())); 5434 } 5435 5436 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) { 5437 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5438 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5439 OpenMPIRBuilder OMPBuilder(*M); 5440 OMPBuilder.initialize(); 5441 F->setName("func"); 5442 IRBuilder<> Builder(BB); 5443 5444 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F); 5445 Builder.CreateBr(EnterBB); 5446 Builder.SetInsertPoint(EnterBB); 5447 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5448 5449 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5450 F->getEntryBlock().getFirstInsertionPt()); 5451 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5452 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5453 llvm::Value &, llvm::Value &Val, 5454 llvm::Value *&ReplVal) { return CodeGenIP; }; 5455 auto FiniCB = [&](InsertPointTy IP) {}; 5456 5457 Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5458 PrivCB, FiniCB, false, true)); 5459 Builder.CreateRetVoid(); // Required at the end of the function 5460 for (auto &Inst : instructions(*F)) { 5461 EXPECT_FALSE(isa<CallInst>(Inst) && 5462 cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5463 "__kmpc_barrier" && 5464 "call to function __kmpc_barrier found with nowait"); 5465 } 5466 } 5467 5468 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) { 5469 OpenMPIRBuilder OMPBuilder(*M); 5470 OMPBuilder.initialize(); 5471 5472 IRBuilder<> Builder(BB); 5473 5474 SmallVector<uint64_t> Mappings = {0, 1}; 5475 GlobalVariable *OffloadMaptypesGlobal = 5476 OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes"); 5477 EXPECT_FALSE(M->global_empty()); 5478 EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes"); 5479 EXPECT_TRUE(OffloadMaptypesGlobal->isConstant()); 5480 EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr()); 5481 EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage()); 5482 EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer()); 5483 Constant *Initializer = OffloadMaptypesGlobal->getInitializer(); 5484 EXPECT_TRUE(isa<ConstantDataArray>(Initializer)); 5485 ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer); 5486 EXPECT_EQ(MappingInit->getNumElements(), Mappings.size()); 5487 EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64)); 5488 Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings); 5489 EXPECT_EQ(MappingInit, CA); 5490 } 5491 5492 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) { 5493 OpenMPIRBuilder OMPBuilder(*M); 5494 OMPBuilder.initialize(); 5495 5496 IRBuilder<> Builder(BB); 5497 5498 uint32_t StrSize; 5499 Constant *Cst1 = 5500 OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); 5501 Constant *Cst2 = 5502 OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); 5503 SmallVector<llvm::Constant *> Names = {Cst1, Cst2}; 5504 5505 GlobalVariable *OffloadMaptypesGlobal = 5506 OMPBuilder.createOffloadMapnames(Names, "offload_mapnames"); 5507 EXPECT_FALSE(M->global_empty()); 5508 EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames"); 5509 EXPECT_TRUE(OffloadMaptypesGlobal->isConstant()); 5510 EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr()); 5511 EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage()); 5512 EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer()); 5513 Constant *Initializer = OffloadMaptypesGlobal->getInitializer(); 5514 EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts())); 5515 EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts())); 5516 5517 GlobalVariable *Name1Gbl = 5518 cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts()); 5519 EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer())); 5520 ConstantDataArray *Name1GblCA = 5521 dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer()); 5522 EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;"); 5523 5524 GlobalVariable *Name2Gbl = 5525 cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts()); 5526 EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer())); 5527 ConstantDataArray *Name2GblCA = 5528 dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer()); 5529 EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;"); 5530 5531 EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy()); 5532 EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size()); 5533 } 5534 5535 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) { 5536 OpenMPIRBuilder OMPBuilder(*M); 5537 OMPBuilder.initialize(); 5538 F->setName("func"); 5539 IRBuilder<> Builder(BB); 5540 5541 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5542 5543 unsigned TotalNbOperand = 2; 5544 5545 OpenMPIRBuilder::MapperAllocas MapperAllocas; 5546 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5547 F->getEntryBlock().getFirstInsertionPt()); 5548 OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas); 5549 EXPECT_NE(MapperAllocas.ArgsBase, nullptr); 5550 EXPECT_NE(MapperAllocas.Args, nullptr); 5551 EXPECT_NE(MapperAllocas.ArgSizes, nullptr); 5552 EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy()); 5553 ArrayType *ArrType = 5554 dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType()); 5555 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5556 EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType() 5557 ->getArrayElementType() 5558 ->isPointerTy()); 5559 5560 EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy()); 5561 ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType()); 5562 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5563 EXPECT_TRUE(MapperAllocas.Args->getAllocatedType() 5564 ->getArrayElementType() 5565 ->isPointerTy()); 5566 5567 EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy()); 5568 ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType()); 5569 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5570 EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType() 5571 ->getArrayElementType() 5572 ->isIntegerTy(64)); 5573 } 5574 5575 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { 5576 OpenMPIRBuilder OMPBuilder(*M); 5577 OMPBuilder.initialize(); 5578 F->setName("func"); 5579 IRBuilder<> Builder(BB); 5580 LLVMContext &Ctx = M->getContext(); 5581 5582 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5583 5584 unsigned TotalNbOperand = 2; 5585 5586 OpenMPIRBuilder::MapperAllocas MapperAllocas; 5587 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5588 F->getEntryBlock().getFirstInsertionPt()); 5589 OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas); 5590 5591 auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr( 5592 omp::OMPRTL___tgt_target_data_begin_mapper); 5593 5594 SmallVector<uint64_t> Flags = {0, 2}; 5595 5596 uint32_t StrSize; 5597 Constant *SrcLocCst = 5598 OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize); 5599 Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize); 5600 5601 Constant *Cst1 = 5602 OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); 5603 Constant *Cst2 = 5604 OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); 5605 SmallVector<llvm::Constant *> Names = {Cst1, Cst2}; 5606 5607 GlobalVariable *Maptypes = 5608 OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes"); 5609 Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32( 5610 ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes, 5611 /*Idx0=*/0, /*Idx1=*/0); 5612 5613 GlobalVariable *Mapnames = 5614 OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames"); 5615 Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32( 5616 ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames, 5617 /*Idx0=*/0, /*Idx1=*/0); 5618 5619 OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo, 5620 MaptypesArg, MapnamesArg, MapperAllocas, -1, 5621 TotalNbOperand); 5622 5623 CallInst *MapperCall = dyn_cast<CallInst>(&BB->back()); 5624 EXPECT_NE(MapperCall, nullptr); 5625 EXPECT_EQ(MapperCall->arg_size(), 9U); 5626 EXPECT_EQ(MapperCall->getCalledFunction()->getName(), 5627 "__tgt_target_data_begin_mapper"); 5628 EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo); 5629 EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64)); 5630 EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32)); 5631 5632 EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg); 5633 EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg); 5634 EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy()); 5635 } 5636 5637 TEST_F(OpenMPIRBuilderTest, TargetEnterData) { 5638 OpenMPIRBuilder OMPBuilder(*M); 5639 OMPBuilder.initialize(); 5640 F->setName("func"); 5641 IRBuilder<> Builder(BB); 5642 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5643 5644 int64_t DeviceID = 2; 5645 5646 AllocaInst *Val1 = 5647 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5648 ASSERT_NE(Val1, nullptr); 5649 5650 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5651 F->getEntryBlock().getFirstInsertionPt()); 5652 5653 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5654 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5655 auto GenMapInfoCB = 5656 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5657 // Get map clause information. 5658 Builder.restoreIP(codeGenIP); 5659 5660 CombinedInfo.BasePointers.emplace_back(Val1); 5661 CombinedInfo.Pointers.emplace_back(Val1); 5662 CombinedInfo.DevicePointers.emplace_back( 5663 llvm::OpenMPIRBuilder::DeviceInfoTy::None); 5664 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5665 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1)); 5666 uint32_t temp; 5667 CombinedInfo.Names.emplace_back( 5668 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5669 return CombinedInfo; 5670 }; 5671 5672 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5673 /*RequiresDevicePointerInfo=*/false, 5674 /*SeparateBeginEndCalls=*/true); 5675 5676 OMPBuilder.Config.setIsGPU(true); 5677 5678 llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper; 5679 Builder.restoreIP(OMPBuilder.createTargetData( 5680 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5681 /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); 5682 5683 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 5684 EXPECT_NE(TargetDataCall, nullptr); 5685 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5686 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5687 "__tgt_target_data_begin_mapper"); 5688 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5689 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5690 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5691 5692 Builder.CreateRetVoid(); 5693 EXPECT_FALSE(verifyModule(*M, &errs())); 5694 } 5695 5696 TEST_F(OpenMPIRBuilderTest, TargetExitData) { 5697 OpenMPIRBuilder OMPBuilder(*M); 5698 OMPBuilder.initialize(); 5699 F->setName("func"); 5700 IRBuilder<> Builder(BB); 5701 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5702 5703 int64_t DeviceID = 2; 5704 5705 AllocaInst *Val1 = 5706 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5707 ASSERT_NE(Val1, nullptr); 5708 5709 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5710 F->getEntryBlock().getFirstInsertionPt()); 5711 5712 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5713 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5714 auto GenMapInfoCB = 5715 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5716 // Get map clause information. 5717 Builder.restoreIP(codeGenIP); 5718 5719 CombinedInfo.BasePointers.emplace_back(Val1); 5720 CombinedInfo.Pointers.emplace_back(Val1); 5721 CombinedInfo.DevicePointers.emplace_back( 5722 llvm::OpenMPIRBuilder::DeviceInfoTy::None); 5723 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5724 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2)); 5725 uint32_t temp; 5726 CombinedInfo.Names.emplace_back( 5727 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5728 return CombinedInfo; 5729 }; 5730 5731 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5732 /*RequiresDevicePointerInfo=*/false, 5733 /*SeparateBeginEndCalls=*/true); 5734 5735 OMPBuilder.Config.setIsGPU(true); 5736 5737 llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper; 5738 Builder.restoreIP(OMPBuilder.createTargetData( 5739 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5740 /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); 5741 5742 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 5743 EXPECT_NE(TargetDataCall, nullptr); 5744 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5745 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5746 "__tgt_target_data_end_mapper"); 5747 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5748 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5749 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5750 5751 Builder.CreateRetVoid(); 5752 EXPECT_FALSE(verifyModule(*M, &errs())); 5753 } 5754 5755 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { 5756 OpenMPIRBuilder OMPBuilder(*M); 5757 OMPBuilder.initialize(); 5758 F->setName("func"); 5759 IRBuilder<> Builder(BB); 5760 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5761 5762 int64_t DeviceID = 2; 5763 5764 AllocaInst *Val1 = 5765 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5766 ASSERT_NE(Val1, nullptr); 5767 5768 AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy()); 5769 ASSERT_NE(Val2, nullptr); 5770 5771 AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy()); 5772 ASSERT_NE(Val3, nullptr); 5773 5774 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5775 F->getEntryBlock().getFirstInsertionPt()); 5776 5777 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 5778 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5779 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5780 auto GenMapInfoCB = 5781 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5782 // Get map clause information. 5783 Builder.restoreIP(codeGenIP); 5784 uint32_t temp; 5785 5786 CombinedInfo.BasePointers.emplace_back(Val1); 5787 CombinedInfo.Pointers.emplace_back(Val1); 5788 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None); 5789 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5790 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3)); 5791 CombinedInfo.Names.emplace_back( 5792 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5793 5794 CombinedInfo.BasePointers.emplace_back(Val2); 5795 CombinedInfo.Pointers.emplace_back(Val2); 5796 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 5797 CombinedInfo.Sizes.emplace_back(Builder.getInt64(8)); 5798 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67)); 5799 CombinedInfo.Names.emplace_back( 5800 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5801 5802 CombinedInfo.BasePointers.emplace_back(Val3); 5803 CombinedInfo.Pointers.emplace_back(Val3); 5804 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address); 5805 CombinedInfo.Sizes.emplace_back(Builder.getInt64(8)); 5806 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67)); 5807 CombinedInfo.Names.emplace_back( 5808 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5809 return CombinedInfo; 5810 }; 5811 5812 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5813 /*RequiresDevicePointerInfo=*/true, 5814 /*SeparateBeginEndCalls=*/true); 5815 5816 OMPBuilder.Config.setIsGPU(true); 5817 5818 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 5819 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 5820 if (BodyGenType == BodyGenTy::Priv) { 5821 EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u); 5822 Builder.restoreIP(CodeGenIP); 5823 CallInst *TargetDataCall = 5824 dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode()); 5825 EXPECT_NE(TargetDataCall, nullptr); 5826 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5827 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5828 "__tgt_target_data_begin_mapper"); 5829 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5830 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5831 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5832 5833 LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode()); 5834 EXPECT_NE(LI, nullptr); 5835 StoreInst *SI = dyn_cast<StoreInst>(&BB->back()); 5836 EXPECT_NE(SI, nullptr); 5837 EXPECT_EQ(SI->getValueOperand(), LI); 5838 EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second); 5839 EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second)); 5840 EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second)); 5841 Builder.CreateStore(Builder.getInt32(99), Val1); 5842 } 5843 return Builder.saveIP(); 5844 }; 5845 5846 Builder.restoreIP(OMPBuilder.createTargetData( 5847 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5848 /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB)); 5849 5850 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 5851 EXPECT_NE(TargetDataCall, nullptr); 5852 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5853 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5854 "__tgt_target_data_end_mapper"); 5855 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5856 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5857 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5858 5859 Builder.CreateRetVoid(); 5860 EXPECT_FALSE(verifyModule(*M, &errs())); 5861 } 5862 5863 namespace { 5864 // Some basic handling of argument mapping for the moment 5865 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder, 5866 llvm::SmallVectorImpl<llvm::Value *> &Args, 5867 llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) { 5868 for (auto Arg : Args) { 5869 CombinedInfo.BasePointers.emplace_back(Arg); 5870 CombinedInfo.Pointers.emplace_back(Arg); 5871 uint32_t SrcLocStrSize; 5872 CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr( 5873 "Unknown loc - stub implementation", SrcLocStrSize)); 5874 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags( 5875 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | 5876 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | 5877 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM)); 5878 CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64( 5879 OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType()))); 5880 } 5881 } 5882 } // namespace 5883 5884 TEST_F(OpenMPIRBuilderTest, TargetRegion) { 5885 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5886 OpenMPIRBuilder OMPBuilder(*M); 5887 OMPBuilder.initialize(); 5888 OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); 5889 OMPBuilder.setConfig(Config); 5890 F->setName("func"); 5891 IRBuilder<> Builder(BB); 5892 auto Int32Ty = Builder.getInt32Ty(); 5893 5894 AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr"); 5895 AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr"); 5896 AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr"); 5897 5898 Builder.CreateStore(Builder.getInt32(10), APtr); 5899 Builder.CreateStore(Builder.getInt32(20), BPtr); 5900 auto BodyGenCB = [&](InsertPointTy AllocaIP, 5901 InsertPointTy CodeGenIP) -> InsertPointTy { 5902 Builder.restoreIP(CodeGenIP); 5903 LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr); 5904 LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr); 5905 Value *Sum = Builder.CreateAdd(AVal, BVal); 5906 Builder.CreateStore(Sum, CPtr); 5907 return Builder.saveIP(); 5908 }; 5909 5910 llvm::SmallVector<llvm::Value *> Inputs; 5911 Inputs.push_back(APtr); 5912 Inputs.push_back(BPtr); 5913 Inputs.push_back(CPtr); 5914 5915 auto SimpleArgAccessorCB = 5916 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 5917 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 5918 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 5919 if (!OMPBuilder.Config.isTargetDevice()) { 5920 RetVal = cast<llvm::Value>(&Arg); 5921 return CodeGenIP; 5922 } 5923 5924 Builder.restoreIP(AllocaIP); 5925 5926 llvm::Value *Addr = Builder.CreateAlloca( 5927 Arg.getType()->isPointerTy() 5928 ? Arg.getType() 5929 : Type::getInt64Ty(Builder.getContext()), 5930 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 5931 llvm::Value *AddrAscast = 5932 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 5933 Builder.CreateStore(&Arg, AddrAscast); 5934 5935 Builder.restoreIP(CodeGenIP); 5936 5937 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 5938 5939 return Builder.saveIP(); 5940 }; 5941 5942 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 5943 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 5944 -> llvm::OpenMPIRBuilder::MapInfosTy & { 5945 CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos); 5946 return CombinedInfos; 5947 }; 5948 5949 TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17); 5950 OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL}); 5951 Builder.restoreIP(OMPBuilder.createTarget( 5952 OmpLoc, Builder.saveIP(), Builder.saveIP(), EntryInfo, -1, 0, Inputs, 5953 GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); 5954 OMPBuilder.finalize(); 5955 Builder.CreateRetVoid(); 5956 5957 // Check the kernel launch sequence 5958 auto Iter = F->getEntryBlock().rbegin(); 5959 EXPECT_TRUE(isa<BranchInst>(&*(Iter))); 5960 BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter)); 5961 EXPECT_TRUE(isa<CmpInst>(&*(++Iter))); 5962 EXPECT_TRUE(isa<CallInst>(&*(++Iter))); 5963 CallInst *Call = dyn_cast<CallInst>(&*(Iter)); 5964 5965 // Check that the kernel launch function is called 5966 Function *KernelLaunchFunc = Call->getCalledFunction(); 5967 EXPECT_NE(KernelLaunchFunc, nullptr); 5968 StringRef FunctionName = KernelLaunchFunc->getName(); 5969 EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel")); 5970 5971 // Check the fallback call 5972 BasicBlock *FallbackBlock = Branch->getSuccessor(0); 5973 Iter = FallbackBlock->rbegin(); 5974 CallInst *FCall = dyn_cast<CallInst>(&*(++Iter)); 5975 EXPECT_NE(FCall, nullptr); 5976 5977 // Check that the correct aguments are passed in 5978 for (auto ArgInput : zip(FCall->args(), Inputs)) { 5979 EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput)); 5980 } 5981 5982 // Check that the outlined function exists with the expected prefix 5983 Function *OutlinedFunc = FCall->getCalledFunction(); 5984 EXPECT_NE(OutlinedFunc, nullptr); 5985 StringRef FunctionName2 = OutlinedFunc->getName(); 5986 EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading")); 5987 5988 EXPECT_FALSE(verifyModule(*M, &errs())); 5989 } 5990 5991 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { 5992 OpenMPIRBuilder OMPBuilder(*M); 5993 OMPBuilder.setConfig( 5994 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 5995 OMPBuilder.initialize(); 5996 5997 F->setName("func"); 5998 IRBuilder<> Builder(BB); 5999 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6000 6001 LoadInst *Value = nullptr; 6002 StoreInst *TargetStore = nullptr; 6003 llvm::SmallVector<llvm::Value *, 2> CapturedArgs = { 6004 Constant::getNullValue(PointerType::get(Ctx, 0)), 6005 Constant::getNullValue(PointerType::get(Ctx, 0))}; 6006 6007 auto SimpleArgAccessorCB = 6008 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6009 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6010 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6011 if (!OMPBuilder.Config.isTargetDevice()) { 6012 RetVal = cast<llvm::Value>(&Arg); 6013 return CodeGenIP; 6014 } 6015 6016 Builder.restoreIP(AllocaIP); 6017 6018 llvm::Value *Addr = Builder.CreateAlloca( 6019 Arg.getType()->isPointerTy() 6020 ? Arg.getType() 6021 : Type::getInt64Ty(Builder.getContext()), 6022 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6023 llvm::Value *AddrAscast = 6024 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6025 Builder.CreateStore(&Arg, AddrAscast); 6026 6027 Builder.restoreIP(CodeGenIP); 6028 6029 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6030 6031 return Builder.saveIP(); 6032 }; 6033 6034 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6035 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6036 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6037 CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos); 6038 return CombinedInfos; 6039 }; 6040 6041 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, 6042 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6043 -> OpenMPIRBuilder::InsertPointTy { 6044 Builder.restoreIP(CodeGenIP); 6045 Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]); 6046 TargetStore = Builder.CreateStore(Value, CapturedArgs[1]); 6047 return Builder.saveIP(); 6048 }; 6049 6050 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6051 F->getEntryBlock().getFirstInsertionPt()); 6052 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6053 /*Line=*/3, /*Count=*/0); 6054 6055 Builder.restoreIP( 6056 OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1, 6057 /*NumThreads=*/0, CapturedArgs, GenMapInfoCB, 6058 BodyGenCB, SimpleArgAccessorCB)); 6059 6060 Builder.CreateRetVoid(); 6061 OMPBuilder.finalize(); 6062 6063 // Check outlined function 6064 EXPECT_FALSE(verifyModule(*M, &errs())); 6065 EXPECT_NE(TargetStore, nullptr); 6066 Function *OutlinedFn = TargetStore->getFunction(); 6067 EXPECT_NE(F, OutlinedFn); 6068 6069 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6070 // Account for the "implicit" first argument. 6071 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6072 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 6073 EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy()); 6074 EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy()); 6075 6076 // Check entry block 6077 auto &EntryBlock = OutlinedFn->getEntryBlock(); 6078 Instruction *Alloca1 = EntryBlock.getFirstNonPHI(); 6079 EXPECT_NE(Alloca1, nullptr); 6080 6081 EXPECT_TRUE(isa<AllocaInst>(Alloca1)); 6082 auto *Store1 = Alloca1->getNextNode(); 6083 EXPECT_TRUE(isa<StoreInst>(Store1)); 6084 auto *Alloca2 = Store1->getNextNode(); 6085 EXPECT_TRUE(isa<AllocaInst>(Alloca2)); 6086 auto *Store2 = Alloca2->getNextNode(); 6087 EXPECT_TRUE(isa<StoreInst>(Store2)); 6088 6089 auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode()); 6090 EXPECT_NE(InitCall, nullptr); 6091 EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init"); 6092 EXPECT_EQ(InitCall->arg_size(), 2U); 6093 EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0))); 6094 auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0)); 6095 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer())); 6096 auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer()); 6097 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U))); 6098 auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U)); 6099 EXPECT_EQ(ConfigC->getAggregateElement(0U), 6100 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6101 EXPECT_EQ(ConfigC->getAggregateElement(1U), 6102 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6103 EXPECT_EQ(ConfigC->getAggregateElement(2U), 6104 ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC)); 6105 6106 auto *EntryBlockBranch = EntryBlock.getTerminator(); 6107 EXPECT_NE(EntryBlockBranch, nullptr); 6108 EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U); 6109 6110 // Check user code block 6111 auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0); 6112 EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry"); 6113 auto *Load1 = UserCodeBlock->getFirstNonPHI(); 6114 EXPECT_TRUE(isa<LoadInst>(Load1)); 6115 auto *Load2 = Load1->getNextNode(); 6116 EXPECT_TRUE(isa<LoadInst>(Load2)); 6117 6118 auto *Value1 = Load2->getNextNode(); 6119 EXPECT_EQ(Value1, Value); 6120 EXPECT_EQ(Value1->getNextNode(), TargetStore); 6121 auto *Deinit = TargetStore->getNextNode(); 6122 EXPECT_NE(Deinit, nullptr); 6123 6124 auto *DeinitCall = dyn_cast<CallInst>(Deinit); 6125 EXPECT_NE(DeinitCall, nullptr); 6126 EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit"); 6127 EXPECT_EQ(DeinitCall->arg_size(), 0U); 6128 6129 EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode())); 6130 6131 // Check exit block 6132 auto *ExitBlock = EntryBlockBranch->getSuccessor(1); 6133 EXPECT_EQ(ExitBlock->getName(), "worker.exit"); 6134 EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI())); 6135 } 6136 6137 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { 6138 OpenMPIRBuilder OMPBuilder(*M); 6139 OMPBuilder.setConfig( 6140 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6141 OMPBuilder.initialize(); 6142 6143 F->setName("func"); 6144 IRBuilder<> Builder(BB); 6145 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6146 6147 LoadInst *Value = nullptr; 6148 StoreInst *TargetStore = nullptr; 6149 llvm::SmallVector<llvm::Value *, 1> CapturedArgs = { 6150 Constant::getNullValue(PointerType::get(Ctx, 0))}; 6151 6152 auto SimpleArgAccessorCB = 6153 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6154 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6155 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6156 if (!OMPBuilder.Config.isTargetDevice()) { 6157 RetVal = cast<llvm::Value>(&Arg); 6158 return CodeGenIP; 6159 } 6160 6161 Builder.restoreIP(AllocaIP); 6162 6163 llvm::Value *Addr = Builder.CreateAlloca( 6164 Arg.getType()->isPointerTy() 6165 ? Arg.getType() 6166 : Type::getInt64Ty(Builder.getContext()), 6167 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6168 llvm::Value *AddrAscast = 6169 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6170 Builder.CreateStore(&Arg, AddrAscast); 6171 6172 Builder.restoreIP(CodeGenIP); 6173 6174 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6175 6176 return Builder.saveIP(); 6177 }; 6178 6179 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6180 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6181 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6182 CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos); 6183 return CombinedInfos; 6184 }; 6185 6186 llvm::Value *RaiseAlloca = nullptr; 6187 6188 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, 6189 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6190 -> OpenMPIRBuilder::InsertPointTy { 6191 Builder.restoreIP(CodeGenIP); 6192 RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty()); 6193 Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]); 6194 TargetStore = Builder.CreateStore(Value, RaiseAlloca); 6195 return Builder.saveIP(); 6196 }; 6197 6198 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6199 F->getEntryBlock().getFirstInsertionPt()); 6200 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6201 /*Line=*/3, /*Count=*/0); 6202 6203 Builder.restoreIP( 6204 OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1, 6205 /*NumThreads=*/0, CapturedArgs, GenMapInfoCB, 6206 BodyGenCB, SimpleArgAccessorCB)); 6207 6208 Builder.CreateRetVoid(); 6209 OMPBuilder.finalize(); 6210 6211 // Check outlined function 6212 EXPECT_FALSE(verifyModule(*M, &errs())); 6213 EXPECT_NE(TargetStore, nullptr); 6214 Function *OutlinedFn = TargetStore->getFunction(); 6215 EXPECT_NE(F, OutlinedFn); 6216 6217 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6218 // Account for the "implicit" first argument. 6219 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6220 EXPECT_EQ(OutlinedFn->arg_size(), 2U); 6221 EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy()); 6222 6223 // Check entry block, to see if we have raised our alloca 6224 // from the body to the entry block. 6225 auto &EntryBlock = OutlinedFn->getEntryBlock(); 6226 6227 // Check that we have moved our alloca created in the 6228 // BodyGenCB function, to the top of the function. 6229 Instruction *Alloca1 = EntryBlock.getFirstNonPHI(); 6230 EXPECT_NE(Alloca1, nullptr); 6231 EXPECT_TRUE(isa<AllocaInst>(Alloca1)); 6232 EXPECT_EQ(Alloca1, RaiseAlloca); 6233 6234 // Verify we have not altered the rest of the function 6235 // inappropriately with our alloca movement. 6236 auto *Alloca2 = Alloca1->getNextNode(); 6237 EXPECT_TRUE(isa<AllocaInst>(Alloca2)); 6238 auto *Store2 = Alloca2->getNextNode(); 6239 EXPECT_TRUE(isa<StoreInst>(Store2)); 6240 6241 auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode()); 6242 EXPECT_NE(InitCall, nullptr); 6243 EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init"); 6244 EXPECT_EQ(InitCall->arg_size(), 2U); 6245 EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0))); 6246 auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0)); 6247 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer())); 6248 auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer()); 6249 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U))); 6250 auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U)); 6251 EXPECT_EQ(ConfigC->getAggregateElement(0U), 6252 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6253 EXPECT_EQ(ConfigC->getAggregateElement(1U), 6254 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6255 EXPECT_EQ(ConfigC->getAggregateElement(2U), 6256 ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC)); 6257 6258 auto *EntryBlockBranch = EntryBlock.getTerminator(); 6259 EXPECT_NE(EntryBlockBranch, nullptr); 6260 EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U); 6261 6262 // Check user code block 6263 auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0); 6264 EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry"); 6265 auto *Load1 = UserCodeBlock->getFirstNonPHI(); 6266 EXPECT_TRUE(isa<LoadInst>(Load1)); 6267 auto *Load2 = Load1->getNextNode(); 6268 EXPECT_TRUE(isa<LoadInst>(Load2)); 6269 EXPECT_EQ(Load2, Value); 6270 EXPECT_EQ(Load2->getNextNode(), TargetStore); 6271 auto *Deinit = TargetStore->getNextNode(); 6272 EXPECT_NE(Deinit, nullptr); 6273 6274 auto *DeinitCall = dyn_cast<CallInst>(Deinit); 6275 EXPECT_NE(DeinitCall, nullptr); 6276 EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit"); 6277 EXPECT_EQ(DeinitCall->arg_size(), 0U); 6278 6279 EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode())); 6280 6281 // Check exit block 6282 auto *ExitBlock = EntryBlockBranch->getSuccessor(1); 6283 EXPECT_EQ(ExitBlock->getName(), "worker.exit"); 6284 EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI())); 6285 } 6286 6287 TEST_F(OpenMPIRBuilderTest, CreateTask) { 6288 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6289 OpenMPIRBuilder OMPBuilder(*M); 6290 OMPBuilder.Config.IsTargetDevice = false; 6291 OMPBuilder.initialize(); 6292 F->setName("func"); 6293 IRBuilder<> Builder(BB); 6294 6295 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 6296 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 6297 Value *Val128 = 6298 Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); 6299 6300 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6301 Builder.restoreIP(AllocaIP); 6302 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 6303 "bodygen.alloca128"); 6304 6305 Builder.restoreIP(CodeGenIP); 6306 // Loading and storing captured pointer and values 6307 Builder.CreateStore(Val128, Local128); 6308 Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 6309 "bodygen.load32"); 6310 6311 LoadInst *PrivLoad128 = Builder.CreateLoad( 6312 Local128->getAllocatedType(), Local128, "bodygen.local.load128"); 6313 Value *Cmp = Builder.CreateICmpNE( 6314 Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); 6315 Instruction *ThenTerm, *ElseTerm; 6316 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 6317 &ThenTerm, &ElseTerm); 6318 }; 6319 6320 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6321 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6322 OpenMPIRBuilder::LocationDescription Loc( 6323 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6324 Builder.restoreIP(OMPBuilder.createTask( 6325 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6326 BodyGenCB)); 6327 OMPBuilder.finalize(); 6328 Builder.CreateRetVoid(); 6329 6330 EXPECT_FALSE(verifyModule(*M, &errs())); 6331 6332 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6333 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6334 ->user_back()); 6335 6336 // Verify the Ident argument 6337 GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0)); 6338 ASSERT_NE(Ident, nullptr); 6339 EXPECT_TRUE(Ident->hasInitializer()); 6340 Constant *Initializer = Ident->getInitializer(); 6341 GlobalVariable *SrcStrGlob = 6342 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 6343 ASSERT_NE(SrcStrGlob, nullptr); 6344 ConstantDataArray *SrcSrc = 6345 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 6346 ASSERT_NE(SrcSrc, nullptr); 6347 6348 // Verify the num_threads argument. 6349 CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1)); 6350 ASSERT_NE(GTID, nullptr); 6351 EXPECT_EQ(GTID->arg_size(), 1U); 6352 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 6353 6354 // Verify the flags 6355 // TODO: Check for others flags. Currently testing only for tiedness. 6356 ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2)); 6357 ASSERT_NE(Flags, nullptr); 6358 EXPECT_EQ(Flags->getSExtValue(), 1); 6359 6360 // Verify the data size 6361 ConstantInt *DataSize = 6362 dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3)); 6363 ASSERT_NE(DataSize, nullptr); 6364 EXPECT_EQ(DataSize->getSExtValue(), 40); 6365 6366 ConstantInt *SharedsSize = 6367 dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4)); 6368 EXPECT_EQ(SharedsSize->getSExtValue(), 6369 24); // 64-bit pointer + 128-bit integer 6370 6371 // Verify Wrapper function 6372 Function *OutlinedFn = 6373 dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); 6374 ASSERT_NE(OutlinedFn, nullptr); 6375 6376 LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin()); 6377 ASSERT_NE(SharedsLoad, nullptr); 6378 EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); 6379 6380 EXPECT_FALSE(OutlinedFn->isDeclaration()); 6381 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); 6382 6383 // Verify that the data argument is used only once, and that too in the load 6384 // instruction that is then used for accessing shared data. 6385 Value *DataPtr = OutlinedFn->getArg(1); 6386 EXPECT_EQ(DataPtr->getNumUses(), 1U); 6387 EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser())); 6388 Value *Data = DataPtr->uses().begin()->getUser(); 6389 EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { 6390 return isa<GetElementPtrInst>(U.getUser()); 6391 })); 6392 6393 // Verify the presence of `trunc` and `icmp` instructions in Outlined function 6394 EXPECT_TRUE(any_of(instructions(OutlinedFn), 6395 [](Instruction &inst) { return isa<TruncInst>(&inst); })); 6396 EXPECT_TRUE(any_of(instructions(OutlinedFn), 6397 [](Instruction &inst) { return isa<ICmpInst>(&inst); })); 6398 6399 // Verify the execution of the task 6400 CallInst *TaskCall = dyn_cast<CallInst>( 6401 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) 6402 ->user_back()); 6403 ASSERT_NE(TaskCall, nullptr); 6404 EXPECT_EQ(TaskCall->getArgOperand(0), Ident); 6405 EXPECT_EQ(TaskCall->getArgOperand(1), GTID); 6406 EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall); 6407 6408 // Verify that the argument data has been copied 6409 for (User *in : TaskAllocCall->users()) { 6410 if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) { 6411 EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall); 6412 } 6413 } 6414 } 6415 6416 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { 6417 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6418 OpenMPIRBuilder OMPBuilder(*M); 6419 OMPBuilder.Config.IsTargetDevice = false; 6420 OMPBuilder.initialize(); 6421 F->setName("func"); 6422 IRBuilder<> Builder(BB); 6423 6424 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6425 6426 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6427 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6428 OpenMPIRBuilder::LocationDescription Loc( 6429 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6430 Builder.restoreIP(OMPBuilder.createTask( 6431 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6432 BodyGenCB)); 6433 OMPBuilder.finalize(); 6434 Builder.CreateRetVoid(); 6435 6436 EXPECT_FALSE(verifyModule(*M, &errs())); 6437 6438 // Check that the outlined function has only one argument. 6439 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6440 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6441 ->user_back()); 6442 Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5)); 6443 ASSERT_NE(OutlinedFn, nullptr); 6444 ASSERT_EQ(OutlinedFn->arg_size(), 1U); 6445 } 6446 6447 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { 6448 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6449 OpenMPIRBuilder OMPBuilder(*M); 6450 OMPBuilder.Config.IsTargetDevice = false; 6451 OMPBuilder.initialize(); 6452 F->setName("func"); 6453 IRBuilder<> Builder(BB); 6454 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6455 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6456 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6457 OpenMPIRBuilder::LocationDescription Loc( 6458 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6459 Builder.restoreIP(OMPBuilder.createTask( 6460 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB, 6461 /*Tied=*/false)); 6462 OMPBuilder.finalize(); 6463 Builder.CreateRetVoid(); 6464 6465 // Check for the `Tied` argument 6466 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6467 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6468 ->user_back()); 6469 ASSERT_NE(TaskAllocCall, nullptr); 6470 ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2)); 6471 ASSERT_NE(Flags, nullptr); 6472 EXPECT_EQ(Flags->getZExtValue() & 1U, 0U); 6473 6474 EXPECT_FALSE(verifyModule(*M, &errs())); 6475 } 6476 6477 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { 6478 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6479 OpenMPIRBuilder OMPBuilder(*M); 6480 OMPBuilder.Config.IsTargetDevice = false; 6481 OMPBuilder.initialize(); 6482 F->setName("func"); 6483 IRBuilder<> Builder(BB); 6484 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6485 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6486 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6487 OpenMPIRBuilder::LocationDescription Loc( 6488 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6489 AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext())); 6490 SmallVector<OpenMPIRBuilder::DependData> DDS; 6491 { 6492 OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn, 6493 Type::getInt32Ty(M->getContext()), InDep); 6494 DDS.push_back(DDIn); 6495 } 6496 Builder.restoreIP(OMPBuilder.createTask( 6497 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB, 6498 /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS)); 6499 OMPBuilder.finalize(); 6500 Builder.CreateRetVoid(); 6501 6502 // Check for the `NumDeps` argument 6503 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6504 OMPBuilder 6505 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps) 6506 ->user_back()); 6507 ASSERT_NE(TaskAllocCall, nullptr); 6508 ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3)); 6509 ASSERT_NE(NumDeps, nullptr); 6510 EXPECT_EQ(NumDeps->getZExtValue(), 1U); 6511 6512 // Check for the `DepInfo` array argument 6513 AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4)); 6514 ASSERT_NE(DepArray, nullptr); 6515 Value::user_iterator DepArrayI = DepArray->user_begin(); 6516 ++DepArrayI; 6517 Value::user_iterator DepInfoI = DepArrayI->user_begin(); 6518 // Check for the `DependKind` flag in the `DepInfo` array 6519 Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI); 6520 ASSERT_NE(Flag, nullptr); 6521 ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag); 6522 ASSERT_NE(FlagInt, nullptr); 6523 EXPECT_EQ(FlagInt->getZExtValue(), 6524 static_cast<unsigned int>(RTLDependenceKindTy::DepIn)); 6525 ++DepInfoI; 6526 // Check for the size in the `DepInfo` array 6527 Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI); 6528 ASSERT_NE(Size, nullptr); 6529 ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size); 6530 ASSERT_NE(SizeInt, nullptr); 6531 EXPECT_EQ(SizeInt->getZExtValue(), 4U); 6532 ++DepInfoI; 6533 // Check for the variable address in the `DepInfo` array 6534 Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI); 6535 ASSERT_NE(AddrStored, nullptr); 6536 PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored); 6537 ASSERT_NE(AddrInt, nullptr); 6538 Value *Addr = AddrInt->getPointerOperand(); 6539 EXPECT_EQ(Addr, InDep); 6540 6541 ConstantInt *NumDepsNoAlias = 6542 dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5)); 6543 ASSERT_NE(NumDepsNoAlias, nullptr); 6544 EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U); 6545 EXPECT_EQ(TaskAllocCall->getOperand(6), 6546 ConstantPointerNull::get(PointerType::getUnqual(M->getContext()))); 6547 6548 EXPECT_FALSE(verifyModule(*M, &errs())); 6549 } 6550 6551 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { 6552 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6553 OpenMPIRBuilder OMPBuilder(*M); 6554 OMPBuilder.Config.IsTargetDevice = false; 6555 OMPBuilder.initialize(); 6556 F->setName("func"); 6557 IRBuilder<> Builder(BB); 6558 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6559 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6560 IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); 6561 Builder.SetInsertPoint(BodyBB); 6562 Value *Final = Builder.CreateICmp( 6563 CmpInst::Predicate::ICMP_EQ, F->getArg(0), 6564 ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); 6565 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 6566 Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, 6567 /*Tied=*/false, Final)); 6568 OMPBuilder.finalize(); 6569 Builder.CreateRetVoid(); 6570 6571 // Check for the `Tied` argument 6572 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6573 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6574 ->user_back()); 6575 ASSERT_NE(TaskAllocCall, nullptr); 6576 BinaryOperator *OrInst = 6577 dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2)); 6578 ASSERT_NE(OrInst, nullptr); 6579 EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or); 6580 6581 // One of the arguments to `or` instruction is the tied flag, which is equal 6582 // to zero. 6583 EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) { 6584 if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op)) 6585 return TiedValue->getSExtValue() == 0; 6586 return false; 6587 })); 6588 6589 // One of the arguments to `or` instruction is the final condition. 6590 EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) { 6591 if (SelectInst *Select = dyn_cast<SelectInst>(op)) { 6592 ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue()); 6593 ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue()); 6594 if (!TrueValue || !FalseValue) 6595 return false; 6596 return Select->getCondition() == Final && 6597 TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0; 6598 } 6599 return false; 6600 })); 6601 6602 EXPECT_FALSE(verifyModule(*M, &errs())); 6603 } 6604 6605 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { 6606 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6607 OpenMPIRBuilder OMPBuilder(*M); 6608 OMPBuilder.Config.IsTargetDevice = false; 6609 OMPBuilder.initialize(); 6610 F->setName("func"); 6611 IRBuilder<> Builder(BB); 6612 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6613 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6614 IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); 6615 Builder.SetInsertPoint(BodyBB); 6616 Value *IfCondition = Builder.CreateICmp( 6617 CmpInst::Predicate::ICMP_EQ, F->getArg(0), 6618 ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); 6619 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 6620 Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, 6621 /*Tied=*/false, /*Final=*/nullptr, 6622 IfCondition)); 6623 OMPBuilder.finalize(); 6624 Builder.CreateRetVoid(); 6625 6626 EXPECT_FALSE(verifyModule(*M, &errs())); 6627 6628 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6629 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6630 ->user_back()); 6631 ASSERT_NE(TaskAllocCall, nullptr); 6632 6633 // Check the branching is based on the if condition argument. 6634 BranchInst *IfConditionBranchInst = 6635 dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator()); 6636 ASSERT_NE(IfConditionBranchInst, nullptr); 6637 ASSERT_TRUE(IfConditionBranchInst->isConditional()); 6638 EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition); 6639 6640 // Check that the `__kmpc_omp_task` executes only in the then branch. 6641 CallInst *TaskCall = dyn_cast<CallInst>( 6642 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) 6643 ->user_back()); 6644 ASSERT_NE(TaskCall, nullptr); 6645 EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0)); 6646 6647 // Check that the OpenMP Runtime Functions specific to `if` clause execute 6648 // only in the else branch. Also check that the function call is between the 6649 // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls. 6650 CallInst *TaskBeginIfCall = dyn_cast<CallInst>( 6651 OMPBuilder 6652 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0) 6653 ->user_back()); 6654 CallInst *TaskCompleteCall = dyn_cast<CallInst>( 6655 OMPBuilder 6656 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0) 6657 ->user_back()); 6658 ASSERT_NE(TaskBeginIfCall, nullptr); 6659 ASSERT_NE(TaskCompleteCall, nullptr); 6660 Function *OulinedFn = 6661 dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); 6662 ASSERT_NE(OulinedFn, nullptr); 6663 CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back()); 6664 ASSERT_NE(OulinedFnCall, nullptr); 6665 EXPECT_EQ(TaskBeginIfCall->getParent(), 6666 IfConditionBranchInst->getSuccessor(1)); 6667 6668 EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); 6669 EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); 6670 } 6671 6672 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { 6673 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6674 OpenMPIRBuilder OMPBuilder(*M); 6675 OMPBuilder.initialize(); 6676 F->setName("func"); 6677 IRBuilder<> Builder(BB); 6678 6679 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 6680 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 6681 Value *Val128 = 6682 Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); 6683 Instruction *ThenTerm, *ElseTerm; 6684 6685 Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp; 6686 6687 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6688 Builder.restoreIP(AllocaIP); 6689 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 6690 "bodygen.alloca128"); 6691 6692 Builder.restoreIP(CodeGenIP); 6693 // Loading and storing captured pointer and values 6694 InternalStoreInst = Builder.CreateStore(Val128, Local128); 6695 InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 6696 "bodygen.load32"); 6697 6698 InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128, 6699 "bodygen.local.load128"); 6700 InternalIfCmp = Builder.CreateICmpNE( 6701 InternalLoad32, 6702 Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType())); 6703 SplitBlockAndInsertIfThenElse(InternalIfCmp, 6704 CodeGenIP.getBlock()->getTerminator(), 6705 &ThenTerm, &ElseTerm); 6706 }; 6707 6708 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6709 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6710 OpenMPIRBuilder::LocationDescription Loc( 6711 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6712 Builder.restoreIP(OMPBuilder.createTaskgroup( 6713 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6714 BodyGenCB)); 6715 OMPBuilder.finalize(); 6716 Builder.CreateRetVoid(); 6717 6718 EXPECT_FALSE(verifyModule(*M, &errs())); 6719 6720 CallInst *TaskgroupCall = dyn_cast<CallInst>( 6721 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup) 6722 ->user_back()); 6723 ASSERT_NE(TaskgroupCall, nullptr); 6724 CallInst *EndTaskgroupCall = dyn_cast<CallInst>( 6725 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup) 6726 ->user_back()); 6727 ASSERT_NE(EndTaskgroupCall, nullptr); 6728 6729 // Verify the Ident argument 6730 GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0)); 6731 ASSERT_NE(Ident, nullptr); 6732 EXPECT_TRUE(Ident->hasInitializer()); 6733 Constant *Initializer = Ident->getInitializer(); 6734 GlobalVariable *SrcStrGlob = 6735 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 6736 ASSERT_NE(SrcStrGlob, nullptr); 6737 ConstantDataArray *SrcSrc = 6738 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 6739 ASSERT_NE(SrcSrc, nullptr); 6740 6741 // Verify the num_threads argument. 6742 CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1)); 6743 ASSERT_NE(GTID, nullptr); 6744 EXPECT_EQ(GTID->arg_size(), 1U); 6745 EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr( 6746 OMPRTL___kmpc_global_thread_num)); 6747 6748 // Checking the general structure of the IR generated is same as expected. 6749 Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction(); 6750 EXPECT_EQ(GeneratedStoreInst, InternalStoreInst); 6751 Instruction *GeneratedLoad32 = 6752 GeneratedStoreInst->getNextNonDebugInstruction(); 6753 EXPECT_EQ(GeneratedLoad32, InternalLoad32); 6754 Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction(); 6755 EXPECT_EQ(GeneratedLoad128, InternalLoad128); 6756 6757 // Checking the ordering because of the if statements and that 6758 // `__kmp_end_taskgroup` call is after the if branching. 6759 BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(), 6760 ThenTerm->getSuccessor(0), 6761 EndTaskgroupCall->getParent(), 6762 ElseTerm->getParent()}; 6763 verifyDFSOrder(F, RefOrder); 6764 } 6765 6766 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { 6767 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6768 OpenMPIRBuilder OMPBuilder(*M); 6769 OMPBuilder.Config.IsTargetDevice = false; 6770 OMPBuilder.initialize(); 6771 F->setName("func"); 6772 IRBuilder<> Builder(BB); 6773 6774 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6775 Builder.restoreIP(AllocaIP); 6776 AllocaInst *Alloca32 = 6777 Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32"); 6778 AllocaInst *Alloca64 = 6779 Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64"); 6780 Builder.restoreIP(CodeGenIP); 6781 auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6782 Builder.restoreIP(CodeGenIP); 6783 LoadInst *LoadValue = 6784 Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64); 6785 Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64)); 6786 Builder.CreateStore(AddInst, Alloca64); 6787 }; 6788 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 6789 Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1)); 6790 6791 auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6792 Builder.restoreIP(CodeGenIP); 6793 LoadInst *LoadValue = 6794 Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32); 6795 Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32)); 6796 Builder.CreateStore(AddInst, Alloca32); 6797 }; 6798 OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL); 6799 Builder.restoreIP(OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2)); 6800 }; 6801 6802 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6803 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6804 OpenMPIRBuilder::LocationDescription Loc( 6805 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6806 Builder.restoreIP(OMPBuilder.createTaskgroup( 6807 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6808 BodyGenCB)); 6809 OMPBuilder.finalize(); 6810 Builder.CreateRetVoid(); 6811 6812 EXPECT_FALSE(verifyModule(*M, &errs())); 6813 6814 CallInst *TaskgroupCall = dyn_cast<CallInst>( 6815 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup) 6816 ->user_back()); 6817 ASSERT_NE(TaskgroupCall, nullptr); 6818 CallInst *EndTaskgroupCall = dyn_cast<CallInst>( 6819 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup) 6820 ->user_back()); 6821 ASSERT_NE(EndTaskgroupCall, nullptr); 6822 6823 Function *TaskAllocFn = 6824 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc); 6825 ASSERT_EQ(TaskAllocFn->getNumUses(), 2u); 6826 6827 CallInst *FirstTaskAllocCall = 6828 dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()); 6829 CallInst *SecondTaskAllocCall = 6830 dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++); 6831 ASSERT_NE(FirstTaskAllocCall, nullptr); 6832 ASSERT_NE(SecondTaskAllocCall, nullptr); 6833 6834 // Verify that the tasks have been generated in order and inside taskgroup 6835 // construct. 6836 BasicBlock *RefOrder[] = { 6837 TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(), 6838 SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()}; 6839 verifyDFSOrder(F, RefOrder); 6840 } 6841 6842 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { 6843 OpenMPIRBuilder OMPBuilder(*M); 6844 OMPBuilder.initialize(); 6845 6846 IRBuilder<> Builder(BB); 6847 6848 OpenMPIRBuilder::TargetDataRTArgs RTArgs; 6849 OpenMPIRBuilder::TargetDataInfo Info(true, false); 6850 6851 auto VoidPtrTy = PointerType::getUnqual(Builder.getContext()); 6852 auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext()); 6853 auto Int64Ty = Type::getInt64Ty(Builder.getContext()); 6854 auto Int64PtrTy = PointerType::getUnqual(Builder.getContext()); 6855 auto Array4VoidPtrTy = ArrayType::get(VoidPtrTy, 4); 6856 auto Array4Int64PtrTy = ArrayType::get(Int64Ty, 4); 6857 6858 Info.RTArgs.BasePointersArray = 6859 ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo(0)); 6860 Info.RTArgs.PointersArray = 6861 ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo()); 6862 Info.RTArgs.SizesArray = 6863 ConstantPointerNull::get(Array4Int64PtrTy->getPointerTo()); 6864 Info.RTArgs.MapTypesArray = 6865 ConstantPointerNull::get(Array4Int64PtrTy->getPointerTo()); 6866 Info.RTArgs.MapNamesArray = 6867 ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo()); 6868 Info.RTArgs.MappersArray = 6869 ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo()); 6870 Info.NumberOfPtrs = 4; 6871 6872 OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false); 6873 6874 EXPECT_NE(RTArgs.BasePointersArray, nullptr); 6875 EXPECT_NE(RTArgs.PointersArray, nullptr); 6876 EXPECT_NE(RTArgs.SizesArray, nullptr); 6877 EXPECT_NE(RTArgs.MapTypesArray, nullptr); 6878 EXPECT_NE(RTArgs.MappersArray, nullptr); 6879 EXPECT_NE(RTArgs.MapNamesArray, nullptr); 6880 EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr); 6881 6882 EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy); 6883 EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy); 6884 EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy); 6885 EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy); 6886 EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy); 6887 EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy); 6888 } 6889 6890 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { 6891 OpenMPIRBuilder OMPBuilder(*M); 6892 OMPBuilder.setConfig( 6893 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6894 OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager; 6895 TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); 6896 InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); 6897 EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo)); 6898 InfoManager.initializeDeviceGlobalVarEntryInfo( 6899 "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0); 6900 InfoManager.registerTargetRegionEntryInfo( 6901 EntryInfo, nullptr, nullptr, 6902 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion); 6903 InfoManager.registerDeviceGlobalVarEntryInfo( 6904 "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 6905 GlobalValue::WeakAnyLinkage); 6906 EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar")); 6907 } 6908 6909 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they 6910 // call each other (recursively in some cases). The test case test these 6911 // functions by utilising them for host code generation for declare target 6912 // global variables 6913 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) { 6914 OpenMPIRBuilder OMPBuilder(*M); 6915 OMPBuilder.initialize(); 6916 OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); 6917 OMPBuilder.setConfig(Config); 6918 6919 std::vector<llvm::Triple> TargetTriple; 6920 TargetTriple.emplace_back("amdgcn-amd-amdhsa"); 6921 6922 TargetRegionEntryInfo EntryInfo("", 42, 4711, 17); 6923 std::vector<GlobalVariable *> RefsGathered; 6924 6925 std::vector<Constant *> Globals; 6926 auto *IntTy = Type::getInt32Ty(Ctx); 6927 for (int I = 0; I < 2; ++I) { 6928 Globals.push_back(M->getOrInsertGlobal( 6929 "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * { 6930 return new GlobalVariable( 6931 *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage, 6932 ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I)); 6933 })); 6934 } 6935 6936 OMPBuilder.registerTargetGlobalVariable( 6937 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 6938 OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true, 6939 EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple, 6940 nullptr, nullptr, Globals[0]->getType(), Globals[0]); 6941 6942 OMPBuilder.registerTargetGlobalVariable( 6943 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink, 6944 OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true, 6945 EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple, 6946 nullptr, nullptr, Globals[1]->getType(), Globals[1]); 6947 6948 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn = 6949 [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 6950 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 6951 // If this is invoked, then we want to emit an error, even if it is not 6952 // neccesarily the most readable, as something has went wrong. The 6953 // test-suite unfortunately eats up all error output 6954 ASSERT_EQ(Kind, Kind); 6955 }; 6956 6957 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn); 6958 6959 // Clauses for data_int_0 with To + Any clauses for the host 6960 std::vector<GlobalVariable *> OffloadEntries; 6961 OffloadEntries.push_back(M->getNamedGlobal(".omp_offloading.entry_name")); 6962 OffloadEntries.push_back( 6963 M->getNamedGlobal(".omp_offloading.entry.test_data_int_0")); 6964 6965 // Clauses for data_int_1 with Link + Any clauses for the host 6966 OffloadEntries.push_back( 6967 M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr")); 6968 OffloadEntries.push_back(M->getNamedGlobal(".omp_offloading.entry_name.1")); 6969 OffloadEntries.push_back(M->getNamedGlobal( 6970 ".omp_offloading.entry.test_data_int_1_decl_tgt_ref_ptr")); 6971 6972 for (unsigned I = 0; I < OffloadEntries.size(); ++I) 6973 EXPECT_NE(OffloadEntries[I], nullptr); 6974 6975 // Metadata generated for the host offload module 6976 NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info"); 6977 ASSERT_THAT(OffloadMetadata, testing::NotNull()); 6978 StringRef Nodes[2] = { 6979 cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1)) 6980 ->getString(), 6981 cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1)) 6982 ->getString()}; 6983 EXPECT_THAT( 6984 Nodes, testing::UnorderedElementsAre("test_data_int_0", 6985 "test_data_int_1_decl_tgt_ref_ptr")); 6986 } 6987 6988 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) { 6989 OpenMPIRBuilder OMPBuilder(*M); 6990 OMPBuilder.initialize(); 6991 OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true, 6992 /* IsGPU = */ true, 6993 /* OpenMPOffloadMandatory = */ false, 6994 /* HasRequiresReverseOffload = */ false, 6995 /* HasRequiresUnifiedAddress = */ false, 6996 /* HasRequiresUnifiedSharedMemory = */ false, 6997 /* HasRequiresDynamicAllocators = */ false); 6998 OMPBuilder.setConfig(Config); 6999 7000 FunctionCallee FnTypeAndCallee = 7001 M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx)); 7002 7003 auto *Fn = cast<Function>(FnTypeAndCallee.getCallee()); 7004 OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn, 7005 /* Size = */ 0, 7006 /* Flags = */ 0, GlobalValue::WeakAnyLinkage); 7007 7008 // Check nvvm.annotations only created for GPU kernels 7009 NamedMDNode *MD = M->getNamedMetadata("nvvm.annotations"); 7010 EXPECT_NE(MD, nullptr); 7011 EXPECT_EQ(MD->getNumOperands(), 1u); 7012 7013 MDNode *Annotations = MD->getOperand(0); 7014 EXPECT_EQ(Annotations->getNumOperands(), 3u); 7015 7016 Constant *ConstVal = 7017 dyn_cast<ConstantAsMetadata>(Annotations->getOperand(0))->getValue(); 7018 EXPECT_TRUE(isa<Function>(Fn)); 7019 EXPECT_EQ(ConstVal, cast<Function>(Fn)); 7020 7021 EXPECT_TRUE(Annotations->getOperand(1).equalsStr("kernel")); 7022 7023 EXPECT_TRUE(mdconst::hasa<ConstantInt>(Annotations->getOperand(2))); 7024 APInt IntVal = 7025 mdconst::extract<ConstantInt>(Annotations->getOperand(2))->getValue(); 7026 EXPECT_EQ(IntVal, 1); 7027 7028 // Check kernel attributes 7029 EXPECT_TRUE(Fn->hasFnAttribute("kernel")); 7030 EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress)); 7031 } 7032 7033 } // namespace 7034