1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Frontend/OpenMP/OMPConstants.h" 10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" 11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 12 #include "llvm/IR/BasicBlock.h" 13 #include "llvm/IR/DIBuilder.h" 14 #include "llvm/IR/Function.h" 15 #include "llvm/IR/InstIterator.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/IR/LLVMContext.h" 18 #include "llvm/IR/Module.h" 19 #include "llvm/IR/Verifier.h" 20 #include "llvm/Passes/PassBuilder.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 23 #include "gmock/gmock.h" 24 #include "gtest/gtest.h" 25 #include <optional> 26 27 using namespace llvm; 28 using namespace omp; 29 30 namespace { 31 32 /// Create an instruction that uses the values in \p Values. We use "printf" 33 /// just because it is often used for this purpose in test code, but it is never 34 /// executed here. 35 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr, 36 ArrayRef<Value *> Values) { 37 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 38 39 GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M); 40 Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); 41 Constant *Indices[] = {Zero, Zero}; 42 Constant *FormatStrConst = 43 ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices); 44 45 Function *PrintfDecl = M->getFunction("printf"); 46 if (!PrintfDecl) { 47 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 48 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true); 49 PrintfDecl = Function::Create(Ty, Linkage, "printf", M); 50 } 51 52 SmallVector<Value *, 4> Args; 53 Args.push_back(FormatStrConst); 54 Args.append(Values.begin(), Values.end()); 55 return Builder.CreateCall(PrintfDecl, Args); 56 } 57 58 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit 59 /// order the control flow of \p F. 60 /// 61 /// This is an easy way to verify the branching structure of the CFG without 62 /// checking every branch instruction individually. For the CFG of a 63 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering 64 /// the body, i.e. the DFS order corresponds to the execution order with one 65 /// loop iteration. 66 static testing::AssertionResult 67 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { 68 ArrayRef<BasicBlock *>::iterator It = RefOrder.begin(); 69 ArrayRef<BasicBlock *>::iterator E = RefOrder.end(); 70 71 df_iterator_default_set<BasicBlock *, 16> Visited; 72 auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited); 73 74 BasicBlock *Prev = nullptr; 75 for (BasicBlock *BB : DFS) { 76 if (It != E && BB == *It) { 77 Prev = *It; 78 ++It; 79 } 80 } 81 82 if (It == E) 83 return testing::AssertionSuccess(); 84 if (!Prev) 85 return testing::AssertionFailure() 86 << "Did not find " << (*It)->getName() << " in control flow"; 87 return testing::AssertionFailure() 88 << "Expected " << Prev->getName() << " before " << (*It)->getName() 89 << " in control flow"; 90 } 91 92 /// Verify that blocks in \p RefOrder are in the same relative order in the 93 /// linked lists of blocks in \p F. The linked list may contain additional 94 /// blocks in-between. 95 /// 96 /// While the order in the linked list is not relevant for semantics, keeping 97 /// the order roughly in execution order makes its printout easier to read. 98 static testing::AssertionResult 99 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { 100 ArrayRef<BasicBlock *>::iterator It = RefOrder.begin(); 101 ArrayRef<BasicBlock *>::iterator E = RefOrder.end(); 102 103 BasicBlock *Prev = nullptr; 104 for (BasicBlock &BB : *F) { 105 if (It != E && &BB == *It) { 106 Prev = *It; 107 ++It; 108 } 109 } 110 111 if (It == E) 112 return testing::AssertionSuccess(); 113 if (!Prev) 114 return testing::AssertionFailure() << "Did not find " << (*It)->getName() 115 << " in function " << F->getName(); 116 return testing::AssertionFailure() 117 << "Expected " << Prev->getName() << " before " << (*It)->getName() 118 << " in function " << F->getName(); 119 } 120 121 /// Populate Calls with call instructions calling the function with the given 122 /// FnID from the given function F. 123 static void findCalls(Function *F, omp::RuntimeFunction FnID, 124 OpenMPIRBuilder &OMPBuilder, 125 SmallVectorImpl<CallInst *> &Calls) { 126 Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID); 127 for (BasicBlock &BB : *F) { 128 for (Instruction &I : BB) { 129 auto *Call = dyn_cast<CallInst>(&I); 130 if (Call && Call->getCalledFunction() == Fn) 131 Calls.push_back(Call); 132 } 133 } 134 } 135 136 /// Assuming \p F contains only one call to the function with the given \p FnID, 137 /// return that call. 138 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID, 139 OpenMPIRBuilder &OMPBuilder) { 140 SmallVector<CallInst *, 1> Calls; 141 findCalls(F, FnID, OMPBuilder, Calls); 142 EXPECT_EQ(1u, Calls.size()); 143 if (Calls.size() != 1) 144 return nullptr; 145 return Calls.front(); 146 } 147 148 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) { 149 switch (SchedType & ~omp::OMPScheduleType::ModifierMask) { 150 case omp::OMPScheduleType::BaseDynamicChunked: 151 return omp::OMP_SCHEDULE_Dynamic; 152 case omp::OMPScheduleType::BaseGuidedChunked: 153 return omp::OMP_SCHEDULE_Guided; 154 case omp::OMPScheduleType::BaseAuto: 155 return omp::OMP_SCHEDULE_Auto; 156 case omp::OMPScheduleType::BaseRuntime: 157 return omp::OMP_SCHEDULE_Runtime; 158 default: 159 llvm_unreachable("unknown type for this test"); 160 } 161 } 162 163 class OpenMPIRBuilderTest : public testing::Test { 164 protected: 165 void SetUp() override { 166 M.reset(new Module("MyModule", Ctx)); 167 FunctionType *FTy = 168 FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, 169 /*isVarArg=*/false); 170 F = Function::Create(FTy, Function::ExternalLinkage, "", M.get()); 171 BB = BasicBlock::Create(Ctx, "", F); 172 173 DIBuilder DIB(*M); 174 auto File = DIB.createFile("test.dbg", "/src", std::nullopt, 175 std::optional<StringRef>("/src/test.dbg")); 176 auto CU = 177 DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0); 178 auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray({})); 179 auto SP = DIB.createFunction( 180 CU, "foo", "", File, 1, Type, 1, DINode::FlagZero, 181 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized); 182 F->setSubprogram(SP); 183 auto Scope = DIB.createLexicalBlockFile(SP, File, 0); 184 DIB.finalize(); 185 DL = DILocation::get(Ctx, 3, 7, Scope); 186 } 187 188 void TearDown() override { 189 BB = nullptr; 190 M.reset(); 191 } 192 193 /// Create a function with a simple loop that calls printf using the logical 194 /// loop counter for use with tests that need a CanonicalLoopInfo object. 195 CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL, 196 OpenMPIRBuilder &OMPBuilder, 197 int UseIVBits, 198 CallInst **Call = nullptr, 199 BasicBlock **BodyCode = nullptr) { 200 OMPBuilder.initialize(); 201 F->setName("func"); 202 203 IRBuilder<> Builder(BB); 204 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 205 Value *TripCount = F->getArg(0); 206 207 Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits); 208 Value *CastedTripCount = 209 Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount"); 210 211 auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP, 212 llvm::Value *LC) { 213 Builder.restoreIP(CodeGenIP); 214 if (BodyCode) 215 *BodyCode = Builder.GetInsertBlock(); 216 217 // Add something that consumes the induction variable to the body. 218 CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC}); 219 if (Call) 220 *Call = CallInst; 221 }; 222 CanonicalLoopInfo *Loop = 223 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount); 224 225 // Finalize the function. 226 Builder.restoreIP(Loop->getAfterIP()); 227 Builder.CreateRetVoid(); 228 229 return Loop; 230 } 231 232 LLVMContext Ctx; 233 std::unique_ptr<Module> M; 234 Function *F; 235 BasicBlock *BB; 236 DebugLoc DL; 237 }; 238 239 class OpenMPIRBuilderTestWithParams 240 : public OpenMPIRBuilderTest, 241 public ::testing::WithParamInterface<omp::OMPScheduleType> {}; 242 243 class OpenMPIRBuilderTestWithIVBits 244 : public OpenMPIRBuilderTest, 245 public ::testing::WithParamInterface<int> {}; 246 247 // Returns the value stored in the given allocation. Returns null if the given 248 // value is not a result of an InstTy instruction, if no value is stored or if 249 // there is more than one store. 250 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) { 251 Instruction *Inst = dyn_cast<InstTy>(AllocaValue); 252 if (!Inst) 253 return nullptr; 254 StoreInst *Store = nullptr; 255 for (Use &U : Inst->uses()) { 256 if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) { 257 EXPECT_EQ(Store, nullptr); 258 Store = CandidateStore; 259 } 260 } 261 if (!Store) 262 return nullptr; 263 return Store->getValueOperand(); 264 } 265 266 // Returns the value stored in the aggregate argument of an outlined function, 267 // or nullptr if it is not found. 268 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate, 269 unsigned Idx) { 270 GetElementPtrInst *GEPAtIdx = nullptr; 271 // Find GEP instruction at that index. 272 for (User *Usr : Aggregate->users()) { 273 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr); 274 if (!GEP) 275 continue; 276 277 if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx)) 278 continue; 279 280 EXPECT_EQ(GEPAtIdx, nullptr); 281 GEPAtIdx = GEP; 282 } 283 284 EXPECT_NE(GEPAtIdx, nullptr); 285 EXPECT_EQ(GEPAtIdx->getNumUses(), 1U); 286 287 // Find the value stored to the aggregate. 288 StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin()); 289 Value *StoredAggValue = StoreToAgg->getValueOperand(); 290 291 Value *StoredValue = nullptr; 292 293 // Find the value stored to the value stored in the aggregate. 294 for (User *Usr : StoredAggValue->users()) { 295 StoreInst *Store = dyn_cast<StoreInst>(Usr); 296 if (!Store) 297 continue; 298 299 if (Store->getPointerOperand() != StoredAggValue) 300 continue; 301 302 EXPECT_EQ(StoredValue, nullptr); 303 StoredValue = Store->getValueOperand(); 304 } 305 306 return StoredValue; 307 } 308 309 // Returns the aggregate that the value is originating from. 310 static Value *findAggregateFromValue(Value *V) { 311 // Expects a load instruction that loads from the aggregate. 312 LoadInst *Load = dyn_cast<LoadInst>(V); 313 EXPECT_NE(Load, nullptr); 314 // Find the GEP instruction used in the load instruction. 315 GetElementPtrInst *GEP = 316 dyn_cast<GetElementPtrInst>(Load->getPointerOperand()); 317 EXPECT_NE(GEP, nullptr); 318 // Find the aggregate used in the GEP instruction. 319 Value *Aggregate = GEP->getPointerOperand(); 320 321 return Aggregate; 322 } 323 324 TEST_F(OpenMPIRBuilderTest, CreateBarrier) { 325 OpenMPIRBuilder OMPBuilder(*M); 326 OMPBuilder.initialize(); 327 328 IRBuilder<> Builder(BB); 329 330 OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for); 331 EXPECT_TRUE(M->global_empty()); 332 EXPECT_EQ(M->size(), 1U); 333 EXPECT_EQ(F->size(), 1U); 334 EXPECT_EQ(BB->size(), 0U); 335 336 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 337 OMPBuilder.createBarrier(Loc, OMPD_for); 338 EXPECT_FALSE(M->global_empty()); 339 EXPECT_EQ(M->size(), 3U); 340 EXPECT_EQ(F->size(), 1U); 341 EXPECT_EQ(BB->size(), 2U); 342 343 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 344 EXPECT_NE(GTID, nullptr); 345 EXPECT_EQ(GTID->arg_size(), 1U); 346 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 347 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 348 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 349 350 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 351 EXPECT_NE(Barrier, nullptr); 352 EXPECT_EQ(Barrier->arg_size(), 2U); 353 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier"); 354 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 355 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 356 357 EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); 358 359 Builder.CreateUnreachable(); 360 EXPECT_FALSE(verifyModule(*M, &errs())); 361 } 362 363 TEST_F(OpenMPIRBuilderTest, CreateCancel) { 364 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 365 OpenMPIRBuilder OMPBuilder(*M); 366 OMPBuilder.initialize(); 367 368 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 369 new UnreachableInst(Ctx, CBB); 370 auto FiniCB = [&](InsertPointTy IP) { 371 ASSERT_NE(IP.getBlock(), nullptr); 372 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 373 BranchInst::Create(CBB, IP.getBlock()); 374 }; 375 OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); 376 377 IRBuilder<> Builder(BB); 378 379 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 380 auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel); 381 Builder.restoreIP(NewIP); 382 EXPECT_FALSE(M->global_empty()); 383 EXPECT_EQ(M->size(), 4U); 384 EXPECT_EQ(F->size(), 4U); 385 EXPECT_EQ(BB->size(), 4U); 386 387 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 388 EXPECT_NE(GTID, nullptr); 389 EXPECT_EQ(GTID->arg_size(), 1U); 390 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 391 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 392 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 393 394 CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode()); 395 EXPECT_NE(Cancel, nullptr); 396 EXPECT_EQ(Cancel->arg_size(), 3U); 397 EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); 398 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); 399 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); 400 EXPECT_EQ(Cancel->getNumUses(), 1U); 401 Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); 402 EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); 403 EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock()); 404 EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); 405 CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front()); 406 EXPECT_NE(GTID1, nullptr); 407 EXPECT_EQ(GTID1->arg_size(), 1U); 408 EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 409 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); 410 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); 411 CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode()); 412 EXPECT_NE(Barrier, nullptr); 413 EXPECT_EQ(Barrier->arg_size(), 2U); 414 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 415 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 416 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 417 EXPECT_EQ(Barrier->getNumUses(), 0U); 418 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 419 1U); 420 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); 421 422 EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID); 423 424 OMPBuilder.popFinalizationCB(); 425 426 Builder.CreateUnreachable(); 427 EXPECT_FALSE(verifyModule(*M, &errs())); 428 } 429 430 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { 431 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 432 OpenMPIRBuilder OMPBuilder(*M); 433 OMPBuilder.initialize(); 434 435 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 436 new UnreachableInst(Ctx, CBB); 437 auto FiniCB = [&](InsertPointTy IP) { 438 ASSERT_NE(IP.getBlock(), nullptr); 439 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 440 BranchInst::Create(CBB, IP.getBlock()); 441 }; 442 OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); 443 444 IRBuilder<> Builder(BB); 445 446 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 447 auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel); 448 Builder.restoreIP(NewIP); 449 EXPECT_FALSE(M->global_empty()); 450 EXPECT_EQ(M->size(), 4U); 451 EXPECT_EQ(F->size(), 7U); 452 EXPECT_EQ(BB->size(), 1U); 453 ASSERT_TRUE(isa<BranchInst>(BB->getTerminator())); 454 ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U); 455 BB = BB->getTerminator()->getSuccessor(0); 456 EXPECT_EQ(BB->size(), 4U); 457 458 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 459 EXPECT_NE(GTID, nullptr); 460 EXPECT_EQ(GTID->arg_size(), 1U); 461 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 462 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 463 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 464 465 CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode()); 466 EXPECT_NE(Cancel, nullptr); 467 EXPECT_EQ(Cancel->arg_size(), 3U); 468 EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); 469 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); 470 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); 471 EXPECT_EQ(Cancel->getNumUses(), 1U); 472 Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); 473 EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); 474 EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); 475 EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), 476 NewIP.getBlock()); 477 EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); 478 CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front()); 479 EXPECT_NE(GTID1, nullptr); 480 EXPECT_EQ(GTID1->arg_size(), 1U); 481 EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 482 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); 483 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); 484 CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode()); 485 EXPECT_NE(Barrier, nullptr); 486 EXPECT_EQ(Barrier->arg_size(), 2U); 487 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 488 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 489 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 490 EXPECT_EQ(Barrier->getNumUses(), 0U); 491 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 492 1U); 493 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); 494 495 EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID); 496 497 OMPBuilder.popFinalizationCB(); 498 499 Builder.CreateUnreachable(); 500 EXPECT_FALSE(verifyModule(*M, &errs())); 501 } 502 503 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { 504 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 505 OpenMPIRBuilder OMPBuilder(*M); 506 OMPBuilder.initialize(); 507 508 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 509 new UnreachableInst(Ctx, CBB); 510 auto FiniCB = [&](InsertPointTy IP) { 511 ASSERT_NE(IP.getBlock(), nullptr); 512 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 513 BranchInst::Create(CBB, IP.getBlock()); 514 }; 515 OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); 516 517 IRBuilder<> Builder(BB); 518 519 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 520 auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for); 521 Builder.restoreIP(NewIP); 522 EXPECT_FALSE(M->global_empty()); 523 EXPECT_EQ(M->size(), 3U); 524 EXPECT_EQ(F->size(), 4U); 525 EXPECT_EQ(BB->size(), 4U); 526 527 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 528 EXPECT_NE(GTID, nullptr); 529 EXPECT_EQ(GTID->arg_size(), 1U); 530 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 531 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 532 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 533 534 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 535 EXPECT_NE(Barrier, nullptr); 536 EXPECT_EQ(Barrier->arg_size(), 2U); 537 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 538 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 539 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 540 EXPECT_EQ(Barrier->getNumUses(), 1U); 541 Instruction *BarrierBBTI = Barrier->getParent()->getTerminator(); 542 EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U); 543 EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock()); 544 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); 545 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 546 1U); 547 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), 548 CBB); 549 550 EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); 551 552 OMPBuilder.popFinalizationCB(); 553 554 Builder.CreateUnreachable(); 555 EXPECT_FALSE(verifyModule(*M, &errs())); 556 } 557 558 TEST_F(OpenMPIRBuilderTest, DbgLoc) { 559 OpenMPIRBuilder OMPBuilder(*M); 560 OMPBuilder.initialize(); 561 F->setName("func"); 562 563 IRBuilder<> Builder(BB); 564 565 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 566 OMPBuilder.createBarrier(Loc, OMPD_for); 567 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 568 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 569 EXPECT_EQ(GTID->getDebugLoc(), DL); 570 EXPECT_EQ(Barrier->getDebugLoc(), DL); 571 EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0))); 572 if (!isa<GlobalVariable>(Barrier->getOperand(0))) 573 return; 574 GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0)); 575 EXPECT_TRUE(Ident->hasInitializer()); 576 if (!Ident->hasInitializer()) 577 return; 578 Constant *Initializer = Ident->getInitializer(); 579 EXPECT_TRUE( 580 isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts())); 581 GlobalVariable *SrcStrGlob = 582 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 583 if (!SrcStrGlob) 584 return; 585 EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer())); 586 ConstantDataArray *SrcSrc = 587 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 588 if (!SrcSrc) 589 return; 590 EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;"); 591 } 592 593 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { 594 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 595 std::string oldDLStr = M->getDataLayoutStr(); 596 M->setDataLayout( 597 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" 598 "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" 599 "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); 600 OpenMPIRBuilder OMPBuilder(*M); 601 OMPBuilder.Config.IsTargetDevice = true; 602 OMPBuilder.initialize(); 603 F->setName("func"); 604 IRBuilder<> Builder(BB); 605 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 606 Builder.CreateBr(EnterBB); 607 Builder.SetInsertPoint(EnterBB); 608 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 609 610 AllocaInst *PrivAI = nullptr; 611 612 unsigned NumBodiesGenerated = 0; 613 unsigned NumPrivatizedVars = 0; 614 unsigned NumFinalizationPoints = 0; 615 616 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 617 ++NumBodiesGenerated; 618 619 Builder.restoreIP(AllocaIP); 620 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 621 Builder.CreateStore(F->arg_begin(), PrivAI); 622 623 Builder.restoreIP(CodeGenIP); 624 Value *PrivLoad = 625 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 626 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 627 Instruction *ThenTerm, *ElseTerm; 628 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 629 &ThenTerm, &ElseTerm); 630 }; 631 632 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 633 Value &Orig, Value &Inner, 634 Value *&ReplacementValue) -> InsertPointTy { 635 ++NumPrivatizedVars; 636 637 if (!isa<AllocaInst>(Orig)) { 638 EXPECT_EQ(&Orig, F->arg_begin()); 639 ReplacementValue = &Inner; 640 return CodeGenIP; 641 } 642 643 // Since the original value is an allocation, it has a pointer type and 644 // therefore no additional wrapping should happen. 645 EXPECT_EQ(&Orig, &Inner); 646 647 // Trivial copy (=firstprivate). 648 Builder.restoreIP(AllocaIP); 649 Type *VTy = ReplacementValue->getType(); 650 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 651 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 652 Builder.restoreIP(CodeGenIP); 653 Builder.CreateStore(V, ReplacementValue); 654 return CodeGenIP; 655 }; 656 657 auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; 658 659 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 660 F->getEntryBlock().getFirstInsertionPt()); 661 IRBuilder<>::InsertPoint AfterIP = 662 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 663 nullptr, nullptr, OMP_PROC_BIND_default, false); 664 665 EXPECT_EQ(NumBodiesGenerated, 1U); 666 EXPECT_EQ(NumPrivatizedVars, 1U); 667 EXPECT_EQ(NumFinalizationPoints, 1U); 668 669 Builder.restoreIP(AfterIP); 670 Builder.CreateRetVoid(); 671 672 OMPBuilder.finalize(); 673 Function *OutlinedFn = PrivAI->getFunction(); 674 EXPECT_FALSE(verifyModule(*M, &errs())); 675 EXPECT_NE(OutlinedFn, F); 676 EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); 677 EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); 678 EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); 679 680 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 681 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 682 // Make sure that arguments are pointers in 0 address address space 683 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), 684 PointerType::get(M->getContext(), 0)); 685 EXPECT_EQ(OutlinedFn->getArg(1)->getType(), 686 PointerType::get(M->getContext(), 0)); 687 EXPECT_EQ(OutlinedFn->getArg(2)->getType(), 688 PointerType::get(M->getContext(), 0)); 689 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 690 EXPECT_EQ(OutlinedFn->getNumUses(), 1U); 691 User *Usr = OutlinedFn->user_back(); 692 ASSERT_TRUE(isa<CallInst>(Usr)); 693 CallInst *Parallel51CI = dyn_cast<CallInst>(Usr); 694 ASSERT_NE(Parallel51CI, nullptr); 695 696 EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51"); 697 EXPECT_EQ(Parallel51CI->arg_size(), 9U); 698 EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn); 699 EXPECT_TRUE( 700 isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts())); 701 EXPECT_EQ(Parallel51CI, Usr); 702 M->setDataLayout(oldDLStr); 703 } 704 705 TEST_F(OpenMPIRBuilderTest, ParallelSimple) { 706 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 707 OpenMPIRBuilder OMPBuilder(*M); 708 OMPBuilder.Config.IsTargetDevice = false; 709 OMPBuilder.initialize(); 710 F->setName("func"); 711 IRBuilder<> Builder(BB); 712 713 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 714 Builder.CreateBr(EnterBB); 715 Builder.SetInsertPoint(EnterBB); 716 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 717 718 AllocaInst *PrivAI = nullptr; 719 720 unsigned NumBodiesGenerated = 0; 721 unsigned NumPrivatizedVars = 0; 722 unsigned NumFinalizationPoints = 0; 723 724 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 725 ++NumBodiesGenerated; 726 727 Builder.restoreIP(AllocaIP); 728 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 729 Builder.CreateStore(F->arg_begin(), PrivAI); 730 731 Builder.restoreIP(CodeGenIP); 732 Value *PrivLoad = 733 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 734 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 735 Instruction *ThenTerm, *ElseTerm; 736 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 737 &ThenTerm, &ElseTerm); 738 }; 739 740 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 741 Value &Orig, Value &Inner, 742 Value *&ReplacementValue) -> InsertPointTy { 743 ++NumPrivatizedVars; 744 745 if (!isa<AllocaInst>(Orig)) { 746 EXPECT_EQ(&Orig, F->arg_begin()); 747 ReplacementValue = &Inner; 748 return CodeGenIP; 749 } 750 751 // Since the original value is an allocation, it has a pointer type and 752 // therefore no additional wrapping should happen. 753 EXPECT_EQ(&Orig, &Inner); 754 755 // Trivial copy (=firstprivate). 756 Builder.restoreIP(AllocaIP); 757 Type *VTy = ReplacementValue->getType(); 758 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 759 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 760 Builder.restoreIP(CodeGenIP); 761 Builder.CreateStore(V, ReplacementValue); 762 return CodeGenIP; 763 }; 764 765 auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; 766 767 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 768 F->getEntryBlock().getFirstInsertionPt()); 769 IRBuilder<>::InsertPoint AfterIP = 770 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 771 nullptr, nullptr, OMP_PROC_BIND_default, false); 772 EXPECT_EQ(NumBodiesGenerated, 1U); 773 EXPECT_EQ(NumPrivatizedVars, 1U); 774 EXPECT_EQ(NumFinalizationPoints, 1U); 775 776 Builder.restoreIP(AfterIP); 777 Builder.CreateRetVoid(); 778 779 OMPBuilder.finalize(); 780 781 EXPECT_NE(PrivAI, nullptr); 782 Function *OutlinedFn = PrivAI->getFunction(); 783 EXPECT_NE(F, OutlinedFn); 784 EXPECT_FALSE(verifyModule(*M, &errs())); 785 EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); 786 EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); 787 EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); 788 789 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 790 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 791 792 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 793 EXPECT_EQ(OutlinedFn->getNumUses(), 1U); 794 User *Usr = OutlinedFn->user_back(); 795 ASSERT_TRUE(isa<CallInst>(Usr)); 796 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 797 ASSERT_NE(ForkCI, nullptr); 798 799 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 800 EXPECT_EQ(ForkCI->arg_size(), 4U); 801 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 802 EXPECT_EQ(ForkCI->getArgOperand(1), 803 ConstantInt::get(Type::getInt32Ty(Ctx), 1U)); 804 EXPECT_EQ(ForkCI, Usr); 805 Value *StoredValue = 806 findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0); 807 EXPECT_EQ(StoredValue, F->arg_begin()); 808 } 809 810 TEST_F(OpenMPIRBuilderTest, ParallelNested) { 811 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 812 OpenMPIRBuilder OMPBuilder(*M); 813 OMPBuilder.Config.IsTargetDevice = false; 814 OMPBuilder.initialize(); 815 F->setName("func"); 816 IRBuilder<> Builder(BB); 817 818 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 819 Builder.CreateBr(EnterBB); 820 Builder.SetInsertPoint(EnterBB); 821 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 822 823 unsigned NumInnerBodiesGenerated = 0; 824 unsigned NumOuterBodiesGenerated = 0; 825 unsigned NumFinalizationPoints = 0; 826 827 auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 828 ++NumInnerBodiesGenerated; 829 }; 830 831 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 832 Value &Orig, Value &Inner, 833 Value *&ReplacementValue) -> InsertPointTy { 834 // Trivial copy (=firstprivate). 835 Builder.restoreIP(AllocaIP); 836 Type *VTy = ReplacementValue->getType(); 837 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 838 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 839 Builder.restoreIP(CodeGenIP); 840 Builder.CreateStore(V, ReplacementValue); 841 return CodeGenIP; 842 }; 843 844 auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; 845 846 auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 847 ++NumOuterBodiesGenerated; 848 Builder.restoreIP(CodeGenIP); 849 BasicBlock *CGBB = CodeGenIP.getBlock(); 850 BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); 851 CGBB->getTerminator()->eraseFromParent(); 852 ; 853 854 IRBuilder<>::InsertPoint AfterIP = OMPBuilder.createParallel( 855 InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB, 856 FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); 857 858 Builder.restoreIP(AfterIP); 859 Builder.CreateBr(NewBB); 860 }; 861 862 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 863 F->getEntryBlock().getFirstInsertionPt()); 864 IRBuilder<>::InsertPoint AfterIP = 865 OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB, 866 nullptr, nullptr, OMP_PROC_BIND_default, false); 867 868 EXPECT_EQ(NumInnerBodiesGenerated, 1U); 869 EXPECT_EQ(NumOuterBodiesGenerated, 1U); 870 EXPECT_EQ(NumFinalizationPoints, 2U); 871 872 Builder.restoreIP(AfterIP); 873 Builder.CreateRetVoid(); 874 875 OMPBuilder.finalize(); 876 877 EXPECT_EQ(M->size(), 5U); 878 for (Function &OutlinedFn : *M) { 879 if (F == &OutlinedFn || OutlinedFn.isDeclaration()) 880 continue; 881 EXPECT_FALSE(verifyModule(*M, &errs())); 882 EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); 883 EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); 884 EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); 885 886 EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); 887 EXPECT_EQ(OutlinedFn.arg_size(), 2U); 888 889 EXPECT_EQ(OutlinedFn.getNumUses(), 1U); 890 User *Usr = OutlinedFn.user_back(); 891 ASSERT_TRUE(isa<CallInst>(Usr)); 892 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 893 ASSERT_NE(ForkCI, nullptr); 894 895 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 896 EXPECT_EQ(ForkCI->arg_size(), 3U); 897 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 898 EXPECT_EQ(ForkCI->getArgOperand(1), 899 ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); 900 EXPECT_EQ(ForkCI, Usr); 901 } 902 } 903 904 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { 905 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 906 OpenMPIRBuilder OMPBuilder(*M); 907 OMPBuilder.Config.IsTargetDevice = false; 908 OMPBuilder.initialize(); 909 F->setName("func"); 910 IRBuilder<> Builder(BB); 911 912 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 913 Builder.CreateBr(EnterBB); 914 Builder.SetInsertPoint(EnterBB); 915 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 916 917 unsigned NumInnerBodiesGenerated = 0; 918 unsigned NumOuterBodiesGenerated = 0; 919 unsigned NumFinalizationPoints = 0; 920 921 auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 922 ++NumInnerBodiesGenerated; 923 }; 924 925 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 926 Value &Orig, Value &Inner, 927 Value *&ReplacementValue) -> InsertPointTy { 928 // Trivial copy (=firstprivate). 929 Builder.restoreIP(AllocaIP); 930 Type *VTy = ReplacementValue->getType(); 931 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 932 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 933 Builder.restoreIP(CodeGenIP); 934 Builder.CreateStore(V, ReplacementValue); 935 return CodeGenIP; 936 }; 937 938 auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; 939 940 auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 941 ++NumOuterBodiesGenerated; 942 Builder.restoreIP(CodeGenIP); 943 BasicBlock *CGBB = CodeGenIP.getBlock(); 944 BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint()); 945 BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt()); 946 CGBB->getTerminator()->eraseFromParent(); 947 ; 948 NewBB1->getTerminator()->eraseFromParent(); 949 ; 950 951 IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.createParallel( 952 InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB, 953 FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); 954 955 Builder.restoreIP(AfterIP1); 956 Builder.CreateBr(NewBB1); 957 958 IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.createParallel( 959 InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB, 960 FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); 961 962 Builder.restoreIP(AfterIP2); 963 Builder.CreateBr(NewBB2); 964 }; 965 966 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 967 F->getEntryBlock().getFirstInsertionPt()); 968 IRBuilder<>::InsertPoint AfterIP = 969 OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB, 970 nullptr, nullptr, OMP_PROC_BIND_default, false); 971 972 EXPECT_EQ(NumInnerBodiesGenerated, 2U); 973 EXPECT_EQ(NumOuterBodiesGenerated, 1U); 974 EXPECT_EQ(NumFinalizationPoints, 3U); 975 976 Builder.restoreIP(AfterIP); 977 Builder.CreateRetVoid(); 978 979 OMPBuilder.finalize(); 980 981 EXPECT_EQ(M->size(), 6U); 982 for (Function &OutlinedFn : *M) { 983 if (F == &OutlinedFn || OutlinedFn.isDeclaration()) 984 continue; 985 EXPECT_FALSE(verifyModule(*M, &errs())); 986 EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); 987 EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); 988 EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); 989 990 EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); 991 EXPECT_EQ(OutlinedFn.arg_size(), 2U); 992 993 unsigned NumAllocas = 0; 994 for (Instruction &I : instructions(OutlinedFn)) 995 NumAllocas += isa<AllocaInst>(I); 996 EXPECT_EQ(NumAllocas, 1U); 997 998 EXPECT_EQ(OutlinedFn.getNumUses(), 1U); 999 User *Usr = OutlinedFn.user_back(); 1000 ASSERT_TRUE(isa<CallInst>(Usr)); 1001 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 1002 ASSERT_NE(ForkCI, nullptr); 1003 1004 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 1005 EXPECT_EQ(ForkCI->arg_size(), 3U); 1006 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 1007 EXPECT_EQ(ForkCI->getArgOperand(1), 1008 ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); 1009 EXPECT_EQ(ForkCI, Usr); 1010 } 1011 } 1012 1013 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { 1014 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1015 OpenMPIRBuilder OMPBuilder(*M); 1016 OMPBuilder.Config.IsTargetDevice = false; 1017 OMPBuilder.initialize(); 1018 F->setName("func"); 1019 IRBuilder<> Builder(BB); 1020 1021 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 1022 Builder.CreateBr(EnterBB); 1023 Builder.SetInsertPoint(EnterBB); 1024 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1025 1026 AllocaInst *PrivAI = nullptr; 1027 1028 unsigned NumBodiesGenerated = 0; 1029 unsigned NumPrivatizedVars = 0; 1030 unsigned NumFinalizationPoints = 0; 1031 1032 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1033 ++NumBodiesGenerated; 1034 1035 Builder.restoreIP(AllocaIP); 1036 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 1037 Builder.CreateStore(F->arg_begin(), PrivAI); 1038 1039 Builder.restoreIP(CodeGenIP); 1040 Value *PrivLoad = 1041 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 1042 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 1043 Instruction *ThenTerm, *ElseTerm; 1044 SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm, 1045 &ElseTerm); 1046 }; 1047 1048 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1049 Value &Orig, Value &Inner, 1050 Value *&ReplacementValue) -> InsertPointTy { 1051 ++NumPrivatizedVars; 1052 1053 if (!isa<AllocaInst>(Orig)) { 1054 EXPECT_EQ(&Orig, F->arg_begin()); 1055 ReplacementValue = &Inner; 1056 return CodeGenIP; 1057 } 1058 1059 // Since the original value is an allocation, it has a pointer type and 1060 // therefore no additional wrapping should happen. 1061 EXPECT_EQ(&Orig, &Inner); 1062 1063 // Trivial copy (=firstprivate). 1064 Builder.restoreIP(AllocaIP); 1065 Type *VTy = ReplacementValue->getType(); 1066 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 1067 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 1068 Builder.restoreIP(CodeGenIP); 1069 Builder.CreateStore(V, ReplacementValue); 1070 return CodeGenIP; 1071 }; 1072 1073 auto FiniCB = [&](InsertPointTy CodeGenIP) { 1074 ++NumFinalizationPoints; 1075 // No destructors. 1076 }; 1077 1078 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1079 F->getEntryBlock().getFirstInsertionPt()); 1080 IRBuilder<>::InsertPoint AfterIP = 1081 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1082 Builder.CreateIsNotNull(F->arg_begin()), 1083 nullptr, OMP_PROC_BIND_default, false); 1084 1085 EXPECT_EQ(NumBodiesGenerated, 1U); 1086 EXPECT_EQ(NumPrivatizedVars, 1U); 1087 EXPECT_EQ(NumFinalizationPoints, 1U); 1088 1089 Builder.restoreIP(AfterIP); 1090 Builder.CreateRetVoid(); 1091 OMPBuilder.finalize(); 1092 1093 EXPECT_NE(PrivAI, nullptr); 1094 Function *OutlinedFn = PrivAI->getFunction(); 1095 EXPECT_NE(F, OutlinedFn); 1096 EXPECT_FALSE(verifyModule(*M, &errs())); 1097 1098 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 1099 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 1100 1101 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 1102 ASSERT_EQ(OutlinedFn->getNumUses(), 1U); 1103 1104 CallInst *ForkCI = nullptr; 1105 for (User *Usr : OutlinedFn->users()) { 1106 ASSERT_TRUE(isa<CallInst>(Usr)); 1107 ForkCI = cast<CallInst>(Usr); 1108 } 1109 1110 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if"); 1111 EXPECT_EQ(ForkCI->arg_size(), 5U); 1112 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 1113 EXPECT_EQ(ForkCI->getArgOperand(1), 1114 ConstantInt::get(Type::getInt32Ty(Ctx), 1)); 1115 EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx)); 1116 } 1117 1118 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { 1119 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1120 OpenMPIRBuilder OMPBuilder(*M); 1121 OMPBuilder.Config.IsTargetDevice = false; 1122 OMPBuilder.initialize(); 1123 F->setName("func"); 1124 IRBuilder<> Builder(BB); 1125 1126 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 1127 Builder.CreateBr(EnterBB); 1128 Builder.SetInsertPoint(EnterBB); 1129 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1130 1131 unsigned NumBodiesGenerated = 0; 1132 unsigned NumPrivatizedVars = 0; 1133 unsigned NumFinalizationPoints = 0; 1134 1135 CallInst *CheckedBarrier = nullptr; 1136 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1137 ++NumBodiesGenerated; 1138 1139 Builder.restoreIP(CodeGenIP); 1140 1141 // Create three barriers, two cancel barriers but only one checked. 1142 Function *CBFn, *BFn; 1143 1144 Builder.restoreIP( 1145 OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel)); 1146 1147 CBFn = M->getFunction("__kmpc_cancel_barrier"); 1148 BFn = M->getFunction("__kmpc_barrier"); 1149 ASSERT_NE(CBFn, nullptr); 1150 ASSERT_EQ(BFn, nullptr); 1151 ASSERT_EQ(CBFn->getNumUses(), 1U); 1152 ASSERT_TRUE(isa<CallInst>(CBFn->user_back())); 1153 ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U); 1154 CheckedBarrier = cast<CallInst>(CBFn->user_back()); 1155 1156 Builder.restoreIP( 1157 OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true)); 1158 CBFn = M->getFunction("__kmpc_cancel_barrier"); 1159 BFn = M->getFunction("__kmpc_barrier"); 1160 ASSERT_NE(CBFn, nullptr); 1161 ASSERT_NE(BFn, nullptr); 1162 ASSERT_EQ(CBFn->getNumUses(), 1U); 1163 ASSERT_EQ(BFn->getNumUses(), 1U); 1164 ASSERT_TRUE(isa<CallInst>(BFn->user_back())); 1165 ASSERT_EQ(BFn->user_back()->getNumUses(), 0U); 1166 1167 Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, 1168 false, false)); 1169 ASSERT_EQ(CBFn->getNumUses(), 2U); 1170 ASSERT_EQ(BFn->getNumUses(), 1U); 1171 ASSERT_TRUE(CBFn->user_back() != CheckedBarrier); 1172 ASSERT_TRUE(isa<CallInst>(CBFn->user_back())); 1173 ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U); 1174 }; 1175 1176 auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &, 1177 Value *&) -> InsertPointTy { 1178 ++NumPrivatizedVars; 1179 llvm_unreachable("No privatization callback call expected!"); 1180 }; 1181 1182 FunctionType *FakeDestructorTy = 1183 FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, 1184 /*isVarArg=*/false); 1185 auto *FakeDestructor = Function::Create( 1186 FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get()); 1187 1188 auto FiniCB = [&](InsertPointTy IP) { 1189 ++NumFinalizationPoints; 1190 Builder.restoreIP(IP); 1191 Builder.CreateCall(FakeDestructor, 1192 {Builder.getInt32(NumFinalizationPoints)}); 1193 }; 1194 1195 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1196 F->getEntryBlock().getFirstInsertionPt()); 1197 IRBuilder<>::InsertPoint AfterIP = 1198 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1199 Builder.CreateIsNotNull(F->arg_begin()), 1200 nullptr, OMP_PROC_BIND_default, true); 1201 1202 EXPECT_EQ(NumBodiesGenerated, 1U); 1203 EXPECT_EQ(NumPrivatizedVars, 0U); 1204 EXPECT_EQ(NumFinalizationPoints, 2U); 1205 EXPECT_EQ(FakeDestructor->getNumUses(), 2U); 1206 1207 Builder.restoreIP(AfterIP); 1208 Builder.CreateRetVoid(); 1209 OMPBuilder.finalize(); 1210 1211 EXPECT_FALSE(verifyModule(*M, &errs())); 1212 1213 BasicBlock *ExitBB = nullptr; 1214 for (const User *Usr : FakeDestructor->users()) { 1215 const CallInst *CI = dyn_cast<CallInst>(Usr); 1216 ASSERT_EQ(CI->getCalledFunction(), FakeDestructor); 1217 ASSERT_TRUE(isa<BranchInst>(CI->getNextNode())); 1218 ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); 1219 if (ExitBB) 1220 ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB); 1221 else 1222 ExitBB = CI->getNextNode()->getSuccessor(0); 1223 ASSERT_EQ(ExitBB->size(), 1U); 1224 if (!isa<ReturnInst>(ExitBB->front())) { 1225 ASSERT_TRUE(isa<BranchInst>(ExitBB->front())); 1226 ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U); 1227 ASSERT_TRUE(isa<ReturnInst>( 1228 cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front())); 1229 } 1230 } 1231 } 1232 1233 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { 1234 OpenMPIRBuilder OMPBuilder(*M); 1235 OMPBuilder.Config.IsTargetDevice = false; 1236 OMPBuilder.initialize(); 1237 F->setName("func"); 1238 IRBuilder<> Builder(BB); 1239 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1240 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1241 1242 Type *I32Ty = Type::getInt32Ty(M->getContext()); 1243 Type *PtrTy = PointerType::get(M->getContext(), 0); 1244 Type *StructTy = StructType::get(I32Ty, PtrTy); 1245 Type *VoidTy = Type::getVoidTy(M->getContext()); 1246 FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty); 1247 FunctionCallee TakeI32Func = 1248 M->getOrInsertFunction("take_i32", VoidTy, I32Ty); 1249 FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy); 1250 FunctionCallee TakeI32PtrFunc = 1251 M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy); 1252 FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy); 1253 FunctionCallee TakeStructFunc = 1254 M->getOrInsertFunction("take_struct", VoidTy, StructTy); 1255 FunctionCallee RetStructPtrFunc = 1256 M->getOrInsertFunction("ret_structptr", PtrTy); 1257 FunctionCallee TakeStructPtrFunc = 1258 M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy); 1259 Value *I32Val = Builder.CreateCall(RetI32Func); 1260 Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc); 1261 Value *StructVal = Builder.CreateCall(RetStructFunc); 1262 Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); 1263 1264 Instruction *Internal; 1265 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1266 IRBuilder<>::InsertPointGuard Guard(Builder); 1267 Builder.restoreIP(CodeGenIP); 1268 Internal = Builder.CreateCall(TakeI32Func, I32Val); 1269 Builder.CreateCall(TakeI32PtrFunc, I32PtrVal); 1270 Builder.CreateCall(TakeStructFunc, StructVal); 1271 Builder.CreateCall(TakeStructPtrFunc, StructPtrVal); 1272 }; 1273 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, 1274 Value &Inner, Value *&ReplacementValue) { 1275 ReplacementValue = &Inner; 1276 return CodeGenIP; 1277 }; 1278 auto FiniCB = [](InsertPointTy) {}; 1279 1280 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1281 F->getEntryBlock().getFirstInsertionPt()); 1282 IRBuilder<>::InsertPoint AfterIP = 1283 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1284 nullptr, nullptr, OMP_PROC_BIND_default, false); 1285 Builder.restoreIP(AfterIP); 1286 Builder.CreateRetVoid(); 1287 1288 OMPBuilder.finalize(); 1289 1290 EXPECT_FALSE(verifyModule(*M, &errs())); 1291 Function *OutlinedFn = Internal->getFunction(); 1292 1293 Type *Arg2Type = OutlinedFn->getArg(2)->getType(); 1294 EXPECT_TRUE(Arg2Type->isPointerTy()); 1295 } 1296 1297 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) { 1298 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1299 OpenMPIRBuilder OMPBuilder(*M); 1300 OMPBuilder.initialize(); 1301 IRBuilder<> Builder(BB); 1302 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1303 Value *TripCount = F->getArg(0); 1304 1305 unsigned NumBodiesGenerated = 0; 1306 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { 1307 NumBodiesGenerated += 1; 1308 1309 Builder.restoreIP(CodeGenIP); 1310 1311 Value *Cmp = Builder.CreateICmpEQ(LC, TripCount); 1312 Instruction *ThenTerm, *ElseTerm; 1313 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 1314 &ThenTerm, &ElseTerm); 1315 }; 1316 1317 CanonicalLoopInfo *Loop = 1318 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount); 1319 1320 Builder.restoreIP(Loop->getAfterIP()); 1321 ReturnInst *RetInst = Builder.CreateRetVoid(); 1322 OMPBuilder.finalize(); 1323 1324 Loop->assertOK(); 1325 EXPECT_FALSE(verifyModule(*M, &errs())); 1326 1327 EXPECT_EQ(NumBodiesGenerated, 1U); 1328 1329 // Verify control flow structure (in addition to Loop->assertOK()). 1330 EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock()); 1331 EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock()); 1332 1333 Instruction *IndVar = Loop->getIndVar(); 1334 EXPECT_TRUE(isa<PHINode>(IndVar)); 1335 EXPECT_EQ(IndVar->getType(), TripCount->getType()); 1336 EXPECT_EQ(IndVar->getParent(), Loop->getHeader()); 1337 1338 EXPECT_EQ(Loop->getTripCount(), TripCount); 1339 1340 BasicBlock *Body = Loop->getBody(); 1341 Instruction *CmpInst = &Body->front(); 1342 EXPECT_TRUE(isa<ICmpInst>(CmpInst)); 1343 EXPECT_EQ(CmpInst->getOperand(0), IndVar); 1344 1345 BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor(); 1346 EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) { 1347 return SuccBB->getSingleSuccessor() == LatchPred; 1348 })); 1349 1350 EXPECT_EQ(&Loop->getAfter()->front(), RetInst); 1351 } 1352 1353 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) { 1354 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1355 OpenMPIRBuilder OMPBuilder(*M); 1356 OMPBuilder.initialize(); 1357 IRBuilder<> Builder(BB); 1358 1359 // Check the trip count is computed correctly. We generate the canonical loop 1360 // but rely on the IRBuilder's constant folder to compute the final result 1361 // since all inputs are constant. To verify overflow situations, limit the 1362 // trip count / loop counter widths to 16 bits. 1363 auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step, 1364 bool IsSigned, bool InclusiveStop) -> int64_t { 1365 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1366 Type *LCTy = Type::getInt16Ty(Ctx); 1367 Value *StartVal = ConstantInt::get(LCTy, Start); 1368 Value *StopVal = ConstantInt::get(LCTy, Stop); 1369 Value *StepVal = ConstantInt::get(LCTy, Step); 1370 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {}; 1371 CanonicalLoopInfo *Loop = 1372 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, 1373 StepVal, IsSigned, InclusiveStop); 1374 Loop->assertOK(); 1375 Builder.restoreIP(Loop->getAfterIP()); 1376 Value *TripCount = Loop->getTripCount(); 1377 return cast<ConstantInt>(TripCount)->getValue().getZExtValue(); 1378 }; 1379 1380 EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0); 1381 EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1); 1382 EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42); 1383 EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21); 1384 EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21); 1385 EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1); 1386 EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2); 1387 EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3); 1388 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF); 1389 EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0); 1390 EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1); 1391 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100); 1392 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1); 1393 1394 EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2); 1395 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2); 1396 EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1); 1397 EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1); 1398 EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF); 1399 EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000); 1400 1401 EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0); 1402 EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0); 1403 EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3); 1404 EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4); 1405 EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1); 1406 EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1); 1407 EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2); 1408 1409 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000); 1410 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001); 1411 EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF); 1412 EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF); 1413 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2); 1414 EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF); 1415 EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000); 1416 EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800); 1417 EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF); 1418 EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1); 1419 EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2); 1420 1421 // Finalize the function and verify it. 1422 Builder.CreateRetVoid(); 1423 OMPBuilder.finalize(); 1424 EXPECT_FALSE(verifyModule(*M, &errs())); 1425 } 1426 1427 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) { 1428 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1429 OpenMPIRBuilder OMPBuilder(*M); 1430 OMPBuilder.initialize(); 1431 F->setName("func"); 1432 1433 IRBuilder<> Builder(BB); 1434 1435 Type *LCTy = F->getArg(0)->getType(); 1436 Constant *One = ConstantInt::get(LCTy, 1); 1437 Constant *Two = ConstantInt::get(LCTy, 2); 1438 Value *OuterTripCount = 1439 Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer"); 1440 Value *InnerTripCount = 1441 Builder.CreateAdd(F->getArg(0), One, "tripcount.inner"); 1442 1443 // Fix an insertion point for ComputeIP. 1444 BasicBlock *LoopNextEnter = 1445 BasicBlock::Create(M->getContext(), "loopnest.enter", F, 1446 Builder.GetInsertBlock()->getNextNode()); 1447 BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter); 1448 InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()}; 1449 1450 Builder.SetInsertPoint(LoopNextEnter); 1451 OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL); 1452 1453 CanonicalLoopInfo *InnerLoop = nullptr; 1454 CallInst *InbetweenLead = nullptr; 1455 CallInst *InbetweenTrail = nullptr; 1456 CallInst *Call = nullptr; 1457 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) { 1458 Builder.restoreIP(OuterCodeGenIP); 1459 InbetweenLead = 1460 createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC}); 1461 1462 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1463 Value *InnerLC) { 1464 Builder.restoreIP(InnerCodeGenIP); 1465 Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC}); 1466 }; 1467 InnerLoop = OMPBuilder.createCanonicalLoop( 1468 Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner"); 1469 1470 Builder.restoreIP(InnerLoop->getAfterIP()); 1471 InbetweenTrail = 1472 createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC}); 1473 }; 1474 CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( 1475 OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer"); 1476 1477 // Finish the function. 1478 Builder.restoreIP(OuterLoop->getAfterIP()); 1479 Builder.CreateRetVoid(); 1480 1481 CanonicalLoopInfo *Collapsed = 1482 OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP); 1483 1484 OMPBuilder.finalize(); 1485 EXPECT_FALSE(verifyModule(*M, &errs())); 1486 1487 // Verify control flow and BB order. 1488 BasicBlock *RefOrder[] = { 1489 Collapsed->getPreheader(), Collapsed->getHeader(), 1490 Collapsed->getCond(), Collapsed->getBody(), 1491 InbetweenLead->getParent(), Call->getParent(), 1492 InbetweenTrail->getParent(), Collapsed->getLatch(), 1493 Collapsed->getExit(), Collapsed->getAfter(), 1494 }; 1495 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1496 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1497 1498 // Verify the total trip count. 1499 auto *TripCount = cast<MulOperator>(Collapsed->getTripCount()); 1500 EXPECT_EQ(TripCount->getOperand(0), OuterTripCount); 1501 EXPECT_EQ(TripCount->getOperand(1), InnerTripCount); 1502 1503 // Verify the changed indvar. 1504 auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1)); 1505 EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv); 1506 EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody()); 1507 EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount); 1508 EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar()); 1509 1510 auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2)); 1511 EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem); 1512 EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody()); 1513 EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar()); 1514 EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount); 1515 1516 EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV); 1517 EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV); 1518 } 1519 1520 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) { 1521 OpenMPIRBuilder OMPBuilder(*M); 1522 CallInst *Call; 1523 BasicBlock *BodyCode; 1524 CanonicalLoopInfo *Loop = 1525 buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode); 1526 1527 Instruction *OrigIndVar = Loop->getIndVar(); 1528 EXPECT_EQ(Call->getOperand(1), OrigIndVar); 1529 1530 // Tile the loop. 1531 Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7)); 1532 std::vector<CanonicalLoopInfo *> GenLoops = 1533 OMPBuilder.tileLoops(DL, {Loop}, {TileSize}); 1534 1535 OMPBuilder.finalize(); 1536 EXPECT_FALSE(verifyModule(*M, &errs())); 1537 1538 EXPECT_EQ(GenLoops.size(), 2u); 1539 CanonicalLoopInfo *Floor = GenLoops[0]; 1540 CanonicalLoopInfo *Tile = GenLoops[1]; 1541 1542 BasicBlock *RefOrder[] = { 1543 Floor->getPreheader(), Floor->getHeader(), Floor->getCond(), 1544 Floor->getBody(), Tile->getPreheader(), Tile->getHeader(), 1545 Tile->getCond(), Tile->getBody(), BodyCode, 1546 Tile->getLatch(), Tile->getExit(), Tile->getAfter(), 1547 Floor->getLatch(), Floor->getExit(), Floor->getAfter(), 1548 }; 1549 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1550 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1551 1552 // Check the induction variable. 1553 EXPECT_EQ(Call->getParent(), BodyCode); 1554 auto *Shift = cast<AddOperator>(Call->getOperand(1)); 1555 EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody()); 1556 EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar()); 1557 auto *Scale = cast<MulOperator>(Shift->getOperand(0)); 1558 EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody()); 1559 EXPECT_EQ(Scale->getOperand(0), TileSize); 1560 EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar()); 1561 } 1562 1563 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) { 1564 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1565 OpenMPIRBuilder OMPBuilder(*M); 1566 OMPBuilder.initialize(); 1567 F->setName("func"); 1568 1569 IRBuilder<> Builder(BB); 1570 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1571 Value *TripCount = F->getArg(0); 1572 Type *LCTy = TripCount->getType(); 1573 1574 BasicBlock *BodyCode = nullptr; 1575 CanonicalLoopInfo *InnerLoop = nullptr; 1576 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, 1577 llvm::Value *OuterLC) { 1578 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1579 llvm::Value *InnerLC) { 1580 Builder.restoreIP(InnerCodeGenIP); 1581 BodyCode = Builder.GetInsertBlock(); 1582 1583 // Add something that consumes the induction variables to the body. 1584 createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); 1585 }; 1586 InnerLoop = OMPBuilder.createCanonicalLoop( 1587 OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner"); 1588 }; 1589 CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( 1590 Loc, OuterLoopBodyGenCB, TripCount, "outer"); 1591 1592 // Finalize the function. 1593 Builder.restoreIP(OuterLoop->getAfterIP()); 1594 Builder.CreateRetVoid(); 1595 1596 // Tile to loop nest. 1597 Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11)); 1598 Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7)); 1599 std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops( 1600 DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize}); 1601 1602 OMPBuilder.finalize(); 1603 EXPECT_FALSE(verifyModule(*M, &errs())); 1604 1605 EXPECT_EQ(GenLoops.size(), 4u); 1606 CanonicalLoopInfo *Floor1 = GenLoops[0]; 1607 CanonicalLoopInfo *Floor2 = GenLoops[1]; 1608 CanonicalLoopInfo *Tile1 = GenLoops[2]; 1609 CanonicalLoopInfo *Tile2 = GenLoops[3]; 1610 1611 BasicBlock *RefOrder[] = { 1612 Floor1->getPreheader(), 1613 Floor1->getHeader(), 1614 Floor1->getCond(), 1615 Floor1->getBody(), 1616 Floor2->getPreheader(), 1617 Floor2->getHeader(), 1618 Floor2->getCond(), 1619 Floor2->getBody(), 1620 Tile1->getPreheader(), 1621 Tile1->getHeader(), 1622 Tile1->getCond(), 1623 Tile1->getBody(), 1624 Tile2->getPreheader(), 1625 Tile2->getHeader(), 1626 Tile2->getCond(), 1627 Tile2->getBody(), 1628 BodyCode, 1629 Tile2->getLatch(), 1630 Tile2->getExit(), 1631 Tile2->getAfter(), 1632 Tile1->getLatch(), 1633 Tile1->getExit(), 1634 Tile1->getAfter(), 1635 Floor2->getLatch(), 1636 Floor2->getExit(), 1637 Floor2->getAfter(), 1638 Floor1->getLatch(), 1639 Floor1->getExit(), 1640 Floor1->getAfter(), 1641 }; 1642 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1643 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1644 } 1645 1646 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) { 1647 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1648 OpenMPIRBuilder OMPBuilder(*M); 1649 OMPBuilder.initialize(); 1650 F->setName("func"); 1651 1652 IRBuilder<> Builder(BB); 1653 Value *TripCount = F->getArg(0); 1654 Type *LCTy = TripCount->getType(); 1655 1656 Value *OuterStartVal = ConstantInt::get(LCTy, 2); 1657 Value *OuterStopVal = TripCount; 1658 Value *OuterStep = ConstantInt::get(LCTy, 5); 1659 Value *InnerStartVal = ConstantInt::get(LCTy, 13); 1660 Value *InnerStopVal = TripCount; 1661 Value *InnerStep = ConstantInt::get(LCTy, 3); 1662 1663 // Fix an insertion point for ComputeIP. 1664 BasicBlock *LoopNextEnter = 1665 BasicBlock::Create(M->getContext(), "loopnest.enter", F, 1666 Builder.GetInsertBlock()->getNextNode()); 1667 BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter); 1668 InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()}; 1669 1670 InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()}; 1671 OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL}); 1672 1673 BasicBlock *BodyCode = nullptr; 1674 CanonicalLoopInfo *InnerLoop = nullptr; 1675 CallInst *Call = nullptr; 1676 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, 1677 llvm::Value *OuterLC) { 1678 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1679 llvm::Value *InnerLC) { 1680 Builder.restoreIP(InnerCodeGenIP); 1681 BodyCode = Builder.GetInsertBlock(); 1682 1683 // Add something that consumes the induction variable to the body. 1684 Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); 1685 }; 1686 InnerLoop = OMPBuilder.createCanonicalLoop( 1687 OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal, 1688 InnerStep, false, false, ComputeIP, "inner"); 1689 }; 1690 CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( 1691 Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false, 1692 false, ComputeIP, "outer"); 1693 1694 // Finalize the function 1695 Builder.restoreIP(OuterLoop->getAfterIP()); 1696 Builder.CreateRetVoid(); 1697 1698 // Tile the loop nest. 1699 Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11)); 1700 Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7)); 1701 std::vector<CanonicalLoopInfo *> GenLoops = 1702 OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1}); 1703 1704 OMPBuilder.finalize(); 1705 EXPECT_FALSE(verifyModule(*M, &errs())); 1706 1707 EXPECT_EQ(GenLoops.size(), 4u); 1708 CanonicalLoopInfo *Floor0 = GenLoops[0]; 1709 CanonicalLoopInfo *Floor1 = GenLoops[1]; 1710 CanonicalLoopInfo *Tile0 = GenLoops[2]; 1711 CanonicalLoopInfo *Tile1 = GenLoops[3]; 1712 1713 BasicBlock *RefOrder[] = { 1714 Floor0->getPreheader(), 1715 Floor0->getHeader(), 1716 Floor0->getCond(), 1717 Floor0->getBody(), 1718 Floor1->getPreheader(), 1719 Floor1->getHeader(), 1720 Floor1->getCond(), 1721 Floor1->getBody(), 1722 Tile0->getPreheader(), 1723 Tile0->getHeader(), 1724 Tile0->getCond(), 1725 Tile0->getBody(), 1726 Tile1->getPreheader(), 1727 Tile1->getHeader(), 1728 Tile1->getCond(), 1729 Tile1->getBody(), 1730 BodyCode, 1731 Tile1->getLatch(), 1732 Tile1->getExit(), 1733 Tile1->getAfter(), 1734 Tile0->getLatch(), 1735 Tile0->getExit(), 1736 Tile0->getAfter(), 1737 Floor1->getLatch(), 1738 Floor1->getExit(), 1739 Floor1->getAfter(), 1740 Floor0->getLatch(), 1741 Floor0->getExit(), 1742 Floor0->getAfter(), 1743 }; 1744 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1745 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1746 1747 EXPECT_EQ(Call->getParent(), BodyCode); 1748 1749 auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1)); 1750 EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal); 1751 auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0)); 1752 EXPECT_EQ(RangeScale0->getOperand(1), OuterStep); 1753 auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0)); 1754 EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody()); 1755 EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar()); 1756 auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0)); 1757 EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody()); 1758 EXPECT_EQ(TileScale0->getOperand(0), TileSize0); 1759 EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar()); 1760 1761 auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2)); 1762 EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode); 1763 EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal); 1764 auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0)); 1765 EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode); 1766 EXPECT_EQ(RangeScale1->getOperand(1), InnerStep); 1767 auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0)); 1768 EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody()); 1769 EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar()); 1770 auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0)); 1771 EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody()); 1772 EXPECT_EQ(TileScale1->getOperand(0), TileSize1); 1773 EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar()); 1774 } 1775 1776 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { 1777 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1778 OpenMPIRBuilder OMPBuilder(*M); 1779 OMPBuilder.initialize(); 1780 IRBuilder<> Builder(BB); 1781 1782 // Create a loop, tile it, and extract its trip count. All input values are 1783 // constant and IRBuilder evaluates all-constant arithmetic inplace, such that 1784 // the floor trip count itself will be a ConstantInt. Unfortunately we cannot 1785 // do the same for the tile loop. 1786 auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step, 1787 bool IsSigned, bool InclusiveStop, 1788 int64_t TileSize) -> uint64_t { 1789 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 1790 Type *LCTy = Type::getInt16Ty(Ctx); 1791 Value *StartVal = ConstantInt::get(LCTy, Start); 1792 Value *StopVal = ConstantInt::get(LCTy, Stop); 1793 Value *StepVal = ConstantInt::get(LCTy, Step); 1794 1795 // Generate a loop. 1796 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {}; 1797 CanonicalLoopInfo *Loop = 1798 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, 1799 StepVal, IsSigned, InclusiveStop); 1800 InsertPointTy AfterIP = Loop->getAfterIP(); 1801 1802 // Tile the loop. 1803 Value *TileSizeVal = ConstantInt::get(LCTy, TileSize); 1804 std::vector<CanonicalLoopInfo *> GenLoops = 1805 OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal}); 1806 1807 // Set the insertion pointer to after loop, where the next loop will be 1808 // emitted. 1809 Builder.restoreIP(AfterIP); 1810 1811 // Extract the trip count. 1812 CanonicalLoopInfo *FloorLoop = GenLoops[0]; 1813 Value *FloorTripCount = FloorLoop->getTripCount(); 1814 return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue(); 1815 }; 1816 1817 // Empty iteration domain. 1818 EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u); 1819 EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u); 1820 EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u); 1821 EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u); 1822 EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u); 1823 1824 // Only complete tiles. 1825 EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u); 1826 EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u); 1827 EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u); 1828 EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u); 1829 EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u); 1830 EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u); 1831 1832 // Only a partial tile. 1833 EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u); 1834 EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u); 1835 EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u); 1836 EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u); 1837 EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u); 1838 1839 // Complete and partial tiles. 1840 EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u); 1841 EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u); 1842 EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u); 1843 EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u); 1844 EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u); 1845 1846 // Close to 16-bit integer range. 1847 EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu); 1848 EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1); 1849 EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1); 1850 EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1); 1851 EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1); 1852 EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u); 1853 EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u); 1854 1855 // Finalize the function. 1856 Builder.CreateRetVoid(); 1857 OMPBuilder.finalize(); 1858 1859 EXPECT_FALSE(verifyModule(*M, &errs())); 1860 } 1861 1862 TEST_F(OpenMPIRBuilderTest, ApplySimd) { 1863 OpenMPIRBuilder OMPBuilder(*M); 1864 MapVector<Value *, Value *> AlignedVars; 1865 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1866 1867 // Simd-ize the loop. 1868 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 1869 OrderKind::OMP_ORDER_unknown, 1870 /* Simdlen */ nullptr, 1871 /* Safelen */ nullptr); 1872 1873 OMPBuilder.finalize(); 1874 EXPECT_FALSE(verifyModule(*M, &errs())); 1875 1876 PassBuilder PB; 1877 FunctionAnalysisManager FAM; 1878 PB.registerFunctionAnalyses(FAM); 1879 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 1880 1881 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 1882 EXPECT_EQ(TopLvl.size(), 1u); 1883 1884 Loop *L = TopLvl.front(); 1885 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 1886 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 1887 1888 // Check for llvm.access.group metadata attached to the printf 1889 // function in the loop body. 1890 BasicBlock *LoopBody = CLI->getBody(); 1891 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 1892 return I.getMetadata("llvm.access.group") != nullptr; 1893 })); 1894 } 1895 1896 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) { 1897 OpenMPIRBuilder OMPBuilder(*M); 1898 IRBuilder<> Builder(BB); 1899 const int AlignmentValue = 32; 1900 AllocaInst *Alloc1 = 1901 Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1)); 1902 LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); 1903 MapVector<Value *, Value *> AlignedVars; 1904 AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)}); 1905 1906 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1907 1908 // Simd-ize the loop. 1909 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 1910 OrderKind::OMP_ORDER_unknown, 1911 /* Simdlen */ nullptr, 1912 /* Safelen */ nullptr); 1913 1914 OMPBuilder.finalize(); 1915 EXPECT_FALSE(verifyModule(*M, &errs())); 1916 1917 PassBuilder PB; 1918 FunctionAnalysisManager FAM; 1919 PB.registerFunctionAnalyses(FAM); 1920 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 1921 1922 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 1923 EXPECT_EQ(TopLvl.size(), 1u); 1924 1925 Loop *L = TopLvl.front(); 1926 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 1927 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 1928 1929 // Check for llvm.access.group metadata attached to the printf 1930 // function in the loop body. 1931 BasicBlock *LoopBody = CLI->getBody(); 1932 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 1933 return I.getMetadata("llvm.access.group") != nullptr; 1934 })); 1935 1936 // Check if number of assumption instructions is equal to number of aligned 1937 // variables 1938 BasicBlock *LoopPreheader = CLI->getPreheader(); 1939 size_t NumAssummptionCallsInPreheader = count_if( 1940 *LoopPreheader, [](Instruction &I) { return isa<AssumeInst>(I); }); 1941 EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); 1942 1943 // Check if variables are correctly aligned 1944 for (Instruction &Instr : *LoopPreheader) { 1945 if (!isa<AssumeInst>(Instr)) 1946 continue; 1947 AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr); 1948 if (AssumeInstruction->getNumTotalBundleOperands()) { 1949 auto Bundle = AssumeInstruction->getOperandBundleAt(0); 1950 if (Bundle.getTagName() == "align") { 1951 EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1])); 1952 auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]); 1953 EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue); 1954 } 1955 } 1956 } 1957 } 1958 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { 1959 OpenMPIRBuilder OMPBuilder(*M); 1960 MapVector<Value *, Value *> AlignedVars; 1961 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1962 1963 // Simd-ize the loop. 1964 OMPBuilder.applySimd(CLI, AlignedVars, 1965 /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, 1966 ConstantInt::get(Type::getInt32Ty(Ctx), 3), 1967 /* Safelen */ nullptr); 1968 1969 OMPBuilder.finalize(); 1970 EXPECT_FALSE(verifyModule(*M, &errs())); 1971 1972 PassBuilder PB; 1973 FunctionAnalysisManager FAM; 1974 PB.registerFunctionAnalyses(FAM); 1975 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 1976 1977 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 1978 EXPECT_EQ(TopLvl.size(), 1u); 1979 1980 Loop *L = TopLvl.front(); 1981 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 1982 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 1983 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 1984 1985 // Check for llvm.access.group metadata attached to the printf 1986 // function in the loop body. 1987 BasicBlock *LoopBody = CLI->getBody(); 1988 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 1989 return I.getMetadata("llvm.access.group") != nullptr; 1990 })); 1991 } 1992 1993 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { 1994 OpenMPIRBuilder OMPBuilder(*M); 1995 MapVector<Value *, Value *> AlignedVars; 1996 1997 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1998 1999 // Simd-ize the loop. 2000 OMPBuilder.applySimd( 2001 CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, 2002 /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2003 2004 OMPBuilder.finalize(); 2005 EXPECT_FALSE(verifyModule(*M, &errs())); 2006 2007 PassBuilder PB; 2008 FunctionAnalysisManager FAM; 2009 PB.registerFunctionAnalyses(FAM); 2010 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2011 2012 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2013 EXPECT_EQ(TopLvl.size(), 1u); 2014 2015 Loop *L = TopLvl.front(); 2016 // Parallel metadata shoudl be attached because of presence of 2017 // the order(concurrent) OpenMP clause 2018 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2019 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2020 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2021 2022 // Check for llvm.access.group metadata attached to the printf 2023 // function in the loop body. 2024 BasicBlock *LoopBody = CLI->getBody(); 2025 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2026 return I.getMetadata("llvm.access.group") != nullptr; 2027 })); 2028 } 2029 2030 TEST_F(OpenMPIRBuilderTest, ApplySafelen) { 2031 OpenMPIRBuilder OMPBuilder(*M); 2032 MapVector<Value *, Value *> AlignedVars; 2033 2034 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2035 2036 OMPBuilder.applySimd( 2037 CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, 2038 /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2039 2040 OMPBuilder.finalize(); 2041 EXPECT_FALSE(verifyModule(*M, &errs())); 2042 2043 PassBuilder PB; 2044 FunctionAnalysisManager FAM; 2045 PB.registerFunctionAnalyses(FAM); 2046 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2047 2048 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2049 EXPECT_EQ(TopLvl.size(), 1u); 2050 2051 Loop *L = TopLvl.front(); 2052 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2053 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2054 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2055 2056 // Check for llvm.access.group metadata attached to the printf 2057 // function in the loop body. 2058 BasicBlock *LoopBody = CLI->getBody(); 2059 EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) { 2060 return I.getMetadata("llvm.access.group") != nullptr; 2061 })); 2062 } 2063 2064 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { 2065 OpenMPIRBuilder OMPBuilder(*M); 2066 MapVector<Value *, Value *> AlignedVars; 2067 2068 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2069 2070 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 2071 OrderKind::OMP_ORDER_unknown, 2072 ConstantInt::get(Type::getInt32Ty(Ctx), 2), 2073 ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2074 2075 OMPBuilder.finalize(); 2076 EXPECT_FALSE(verifyModule(*M, &errs())); 2077 2078 PassBuilder PB; 2079 FunctionAnalysisManager FAM; 2080 PB.registerFunctionAnalyses(FAM); 2081 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2082 2083 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2084 EXPECT_EQ(TopLvl.size(), 1u); 2085 2086 Loop *L = TopLvl.front(); 2087 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2088 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2089 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2); 2090 2091 // Check for llvm.access.group metadata attached to the printf 2092 // function in the loop body. 2093 BasicBlock *LoopBody = CLI->getBody(); 2094 EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) { 2095 return I.getMetadata("llvm.access.group") != nullptr; 2096 })); 2097 } 2098 2099 TEST_F(OpenMPIRBuilderTest, ApplySimdIf) { 2100 OpenMPIRBuilder OMPBuilder(*M); 2101 IRBuilder<> Builder(BB); 2102 MapVector<Value *, Value *> AlignedVars; 2103 AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); 2104 AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); 2105 2106 // Generation of if condition 2107 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1); 2108 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2); 2109 LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); 2110 LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2); 2111 2112 Value *IfCmp = Builder.CreateICmpNE(Load1, Load2); 2113 2114 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2115 2116 // Simd-ize the loop with if condition 2117 OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown, 2118 ConstantInt::get(Type::getInt32Ty(Ctx), 3), 2119 /* Safelen */ nullptr); 2120 2121 OMPBuilder.finalize(); 2122 EXPECT_FALSE(verifyModule(*M, &errs())); 2123 2124 PassBuilder PB; 2125 FunctionAnalysisManager FAM; 2126 PB.registerFunctionAnalyses(FAM); 2127 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2128 2129 // Check if there are two loops (one with enabled vectorization) 2130 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2131 EXPECT_EQ(TopLvl.size(), 2u); 2132 2133 Loop *L = TopLvl[0]; 2134 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2135 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2136 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2137 2138 // The second loop should have disabled vectorization 2139 L = TopLvl[1]; 2140 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2141 EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2142 // Check for llvm.access.group metadata attached to the printf 2143 // function in the loop body. 2144 BasicBlock *LoopBody = CLI->getBody(); 2145 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2146 return I.getMetadata("llvm.access.group") != nullptr; 2147 })); 2148 } 2149 2150 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) { 2151 OpenMPIRBuilder OMPBuilder(*M); 2152 2153 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2154 2155 // Unroll the loop. 2156 OMPBuilder.unrollLoopFull(DL, CLI); 2157 2158 OMPBuilder.finalize(); 2159 EXPECT_FALSE(verifyModule(*M, &errs())); 2160 2161 PassBuilder PB; 2162 FunctionAnalysisManager FAM; 2163 PB.registerFunctionAnalyses(FAM); 2164 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2165 2166 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2167 EXPECT_EQ(TopLvl.size(), 1u); 2168 2169 Loop *L = TopLvl.front(); 2170 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); 2171 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full")); 2172 } 2173 2174 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) { 2175 OpenMPIRBuilder OMPBuilder(*M); 2176 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2177 2178 // Unroll the loop. 2179 CanonicalLoopInfo *UnrolledLoop = nullptr; 2180 OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop); 2181 ASSERT_NE(UnrolledLoop, nullptr); 2182 2183 OMPBuilder.finalize(); 2184 EXPECT_FALSE(verifyModule(*M, &errs())); 2185 UnrolledLoop->assertOK(); 2186 2187 PassBuilder PB; 2188 FunctionAnalysisManager FAM; 2189 PB.registerFunctionAnalyses(FAM); 2190 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2191 2192 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2193 EXPECT_EQ(TopLvl.size(), 1u); 2194 Loop *Outer = TopLvl.front(); 2195 EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader()); 2196 EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch()); 2197 EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond()); 2198 EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit()); 2199 2200 EXPECT_EQ(Outer->getSubLoops().size(), 1u); 2201 Loop *Inner = Outer->getSubLoops().front(); 2202 2203 EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable")); 2204 EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5); 2205 } 2206 2207 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) { 2208 OpenMPIRBuilder OMPBuilder(*M); 2209 2210 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2211 2212 // Unroll the loop. 2213 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2214 2215 OMPBuilder.finalize(); 2216 EXPECT_FALSE(verifyModule(*M, &errs())); 2217 2218 PassBuilder PB; 2219 FunctionAnalysisManager FAM; 2220 PB.registerFunctionAnalyses(FAM); 2221 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2222 2223 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2224 EXPECT_EQ(TopLvl.size(), 1u); 2225 2226 Loop *L = TopLvl.front(); 2227 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); 2228 } 2229 2230 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) { 2231 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2232 std::string oldDLStr = M->getDataLayoutStr(); 2233 M->setDataLayout( 2234 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" 2235 "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" 2236 "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); 2237 OpenMPIRBuilder OMPBuilder(*M); 2238 OMPBuilder.Config.IsTargetDevice = true; 2239 OMPBuilder.initialize(); 2240 IRBuilder<> Builder(BB); 2241 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2242 InsertPointTy AllocaIP = Builder.saveIP(); 2243 2244 Type *LCTy = Type::getInt32Ty(Ctx); 2245 Value *StartVal = ConstantInt::get(LCTy, 10); 2246 Value *StopVal = ConstantInt::get(LCTy, 52); 2247 Value *StepVal = ConstantInt::get(LCTy, 2); 2248 auto LoopBodyGen = [&](InsertPointTy, Value *) {}; 2249 2250 CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( 2251 Loc, LoopBodyGen, StartVal, StopVal, StepVal, false, false); 2252 BasicBlock *Preheader = CLI->getPreheader(); 2253 Value *TripCount = CLI->getTripCount(); 2254 2255 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2256 2257 IRBuilder<>::InsertPoint AfterIP = OMPBuilder.applyWorkshareLoop( 2258 DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static, nullptr, false, false, 2259 false, false, WorksharingLoopType::ForStaticLoop); 2260 Builder.restoreIP(AfterIP); 2261 Builder.CreateRetVoid(); 2262 2263 OMPBuilder.finalize(); 2264 EXPECT_FALSE(verifyModule(*M, &errs())); 2265 2266 CallInst *WorkshareLoopRuntimeCall = nullptr; 2267 int WorkshareLoopRuntimeCallCnt = 0; 2268 for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) { 2269 CallInst *Call = dyn_cast<CallInst>(Inst); 2270 if (!Call) 2271 continue; 2272 if (!Call->getCalledFunction()) 2273 continue; 2274 2275 if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") { 2276 WorkshareLoopRuntimeCall = Call; 2277 WorkshareLoopRuntimeCallCnt++; 2278 } 2279 } 2280 EXPECT_NE(WorkshareLoopRuntimeCall, nullptr); 2281 // Verify that there is only one call to workshare loop function 2282 EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1); 2283 // Check that pointer to loop body function is passed as second argument 2284 Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1); 2285 EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType()); 2286 Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg); 2287 EXPECT_NE(ArgFunction, nullptr); 2288 EXPECT_EQ(ArgFunction->arg_size(), 1u); 2289 EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType()); 2290 // Check that no variables except for loop counter are used in loop body 2291 EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()), 2292 WorkshareLoopRuntimeCall->getArgOperand(2)); 2293 // Check loop trip count argument 2294 EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3)); 2295 } 2296 2297 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) { 2298 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2299 OpenMPIRBuilder OMPBuilder(*M); 2300 OMPBuilder.Config.IsTargetDevice = false; 2301 OMPBuilder.initialize(); 2302 IRBuilder<> Builder(BB); 2303 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2304 2305 Type *LCTy = Type::getInt32Ty(Ctx); 2306 Value *StartVal = ConstantInt::get(LCTy, 10); 2307 Value *StopVal = ConstantInt::get(LCTy, 52); 2308 Value *StepVal = ConstantInt::get(LCTy, 2); 2309 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; 2310 2311 CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( 2312 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2313 /*IsSigned=*/false, /*InclusiveStop=*/false); 2314 BasicBlock *Preheader = CLI->getPreheader(); 2315 BasicBlock *Body = CLI->getBody(); 2316 Value *IV = CLI->getIndVar(); 2317 BasicBlock *ExitBlock = CLI->getExit(); 2318 2319 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2320 InsertPointTy AllocaIP = Builder.saveIP(); 2321 2322 OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true, 2323 OMP_SCHEDULE_Static); 2324 2325 BasicBlock *Cond = Body->getSinglePredecessor(); 2326 Instruction *Cmp = &*Cond->begin(); 2327 Value *TripCount = Cmp->getOperand(1); 2328 2329 auto AllocaIter = BB->begin(); 2330 ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); 2331 AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2332 AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2333 AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2334 AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2335 EXPECT_NE(PLastIter, nullptr); 2336 EXPECT_NE(PLowerBound, nullptr); 2337 EXPECT_NE(PUpperBound, nullptr); 2338 EXPECT_NE(PStride, nullptr); 2339 2340 auto PreheaderIter = Preheader->begin(); 2341 ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7); 2342 StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2343 StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2344 StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2345 ASSERT_NE(LowerBoundStore, nullptr); 2346 ASSERT_NE(UpperBoundStore, nullptr); 2347 ASSERT_NE(StrideStore, nullptr); 2348 2349 auto *OrigLowerBound = 2350 dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand()); 2351 auto *OrigUpperBound = 2352 dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand()); 2353 auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand()); 2354 ASSERT_NE(OrigLowerBound, nullptr); 2355 ASSERT_NE(OrigUpperBound, nullptr); 2356 ASSERT_NE(OrigStride, nullptr); 2357 EXPECT_EQ(OrigLowerBound->getValue(), 0); 2358 EXPECT_EQ(OrigUpperBound->getValue(), 20); 2359 EXPECT_EQ(OrigStride->getValue(), 1); 2360 2361 // Check that the loop IV is updated to account for the lower bound returned 2362 // by the OpenMP runtime call. 2363 BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front()); 2364 EXPECT_EQ(Add->getOperand(0), IV); 2365 auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1)); 2366 ASSERT_NE(LoadedLowerBound, nullptr); 2367 EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound); 2368 2369 // Check that the trip count is updated to account for the lower and upper 2370 // bounds return by the OpenMP runtime call. 2371 auto *AddOne = dyn_cast<Instruction>(TripCount); 2372 ASSERT_NE(AddOne, nullptr); 2373 ASSERT_TRUE(AddOne->isBinaryOp()); 2374 auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1)); 2375 ASSERT_NE(One, nullptr); 2376 EXPECT_EQ(One->getValue(), 1); 2377 auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0)); 2378 ASSERT_NE(Difference, nullptr); 2379 ASSERT_TRUE(Difference->isBinaryOp()); 2380 EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound); 2381 auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0)); 2382 ASSERT_NE(LoadedUpperBound, nullptr); 2383 EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound); 2384 2385 // The original loop iterator should only be used in the condition, in the 2386 // increment and in the statement that adds the lower bound to it. 2387 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2388 2389 // The exit block should contain the "fini" call and the barrier call, 2390 // plus the call to obtain the thread ID. 2391 size_t NumCallsInExitBlock = 2392 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2393 EXPECT_EQ(NumCallsInExitBlock, 3u); 2394 } 2395 2396 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) { 2397 unsigned IVBits = GetParam(); 2398 2399 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2400 OpenMPIRBuilder OMPBuilder(*M); 2401 OMPBuilder.Config.IsTargetDevice = false; 2402 2403 BasicBlock *Body; 2404 CallInst *Call; 2405 CanonicalLoopInfo *CLI = 2406 buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body); 2407 2408 Instruction *OrigIndVar = CLI->getIndVar(); 2409 EXPECT_EQ(Call->getOperand(1), OrigIndVar); 2410 2411 Type *LCTy = Type::getInt32Ty(Ctx); 2412 Value *ChunkSize = ConstantInt::get(LCTy, 5); 2413 InsertPointTy AllocaIP{&F->getEntryBlock(), 2414 F->getEntryBlock().getFirstInsertionPt()}; 2415 OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true, 2416 OMP_SCHEDULE_Static, ChunkSize); 2417 2418 OMPBuilder.finalize(); 2419 EXPECT_FALSE(verifyModule(*M, &errs())); 2420 2421 BasicBlock *Entry = &F->getEntryBlock(); 2422 BasicBlock *Preheader = Entry->getSingleSuccessor(); 2423 2424 BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor(); 2425 BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor(); 2426 BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor(); 2427 BasicBlock *DispatchBody = succ_begin(DispatchCond)[0]; 2428 BasicBlock *DispatchExit = succ_begin(DispatchCond)[1]; 2429 BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor(); 2430 BasicBlock *Return = DispatchAfter->getSingleSuccessor(); 2431 2432 BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor(); 2433 BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor(); 2434 BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor(); 2435 BasicBlock *ChunkBody = succ_begin(ChunkCond)[0]; 2436 BasicBlock *ChunkExit = succ_begin(ChunkCond)[1]; 2437 BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor(); 2438 BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor(); 2439 2440 BasicBlock *DispatchInc = ChunkAfter; 2441 2442 EXPECT_EQ(ChunkBody, Body); 2443 EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader); 2444 EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader); 2445 2446 EXPECT_TRUE(isa<ReturnInst>(Return->front())); 2447 2448 Value *NewIV = Call->getOperand(1); 2449 EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits); 2450 2451 CallInst *InitCall = findSingleCall( 2452 F, 2453 (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u 2454 : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u, 2455 OMPBuilder); 2456 EXPECT_EQ(InitCall->getParent(), Preheader); 2457 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33); 2458 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1); 2459 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5); 2460 2461 CallInst *FiniCall = findSingleCall( 2462 F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder); 2463 EXPECT_EQ(FiniCall->getParent(), DispatchExit); 2464 2465 CallInst *BarrierCall = findSingleCall( 2466 F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder); 2467 EXPECT_EQ(BarrierCall->getParent(), DispatchExit); 2468 } 2469 2470 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits, 2471 ::testing::Values(8, 16, 32, 64)); 2472 2473 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { 2474 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2475 OpenMPIRBuilder OMPBuilder(*M); 2476 OMPBuilder.Config.IsTargetDevice = false; 2477 OMPBuilder.initialize(); 2478 IRBuilder<> Builder(BB); 2479 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2480 2481 omp::OMPScheduleType SchedType = GetParam(); 2482 uint32_t ChunkSize = 1; 2483 switch (SchedType & ~OMPScheduleType::ModifierMask) { 2484 case omp::OMPScheduleType::BaseDynamicChunked: 2485 case omp::OMPScheduleType::BaseGuidedChunked: 2486 ChunkSize = 7; 2487 break; 2488 case omp::OMPScheduleType::BaseAuto: 2489 case omp::OMPScheduleType::BaseRuntime: 2490 ChunkSize = 1; 2491 break; 2492 default: 2493 assert(0 && "unknown type for this test"); 2494 break; 2495 } 2496 2497 Type *LCTy = Type::getInt32Ty(Ctx); 2498 Value *StartVal = ConstantInt::get(LCTy, 10); 2499 Value *StopVal = ConstantInt::get(LCTy, 52); 2500 Value *StepVal = ConstantInt::get(LCTy, 2); 2501 Value *ChunkVal = 2502 (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize); 2503 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; 2504 2505 CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( 2506 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2507 /*IsSigned=*/false, /*InclusiveStop=*/false); 2508 2509 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2510 InsertPointTy AllocaIP = Builder.saveIP(); 2511 2512 // Collect all the info from CLI, as it isn't usable after the call to 2513 // createDynamicWorkshareLoop. 2514 InsertPointTy AfterIP = CLI->getAfterIP(); 2515 BasicBlock *Preheader = CLI->getPreheader(); 2516 BasicBlock *ExitBlock = CLI->getExit(); 2517 BasicBlock *LatchBlock = CLI->getLatch(); 2518 Value *IV = CLI->getIndVar(); 2519 2520 InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop( 2521 DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType), 2522 ChunkVal, /*Simd=*/false, 2523 (SchedType & omp::OMPScheduleType::ModifierMonotonic) == 2524 omp::OMPScheduleType::ModifierMonotonic, 2525 (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) == 2526 omp::OMPScheduleType::ModifierNonmonotonic, 2527 /*Ordered=*/false); 2528 2529 // The returned value should be the "after" point. 2530 ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); 2531 ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint()); 2532 2533 auto AllocaIter = BB->begin(); 2534 ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); 2535 AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2536 AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2537 AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2538 AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2539 EXPECT_NE(PLastIter, nullptr); 2540 EXPECT_NE(PLowerBound, nullptr); 2541 EXPECT_NE(PUpperBound, nullptr); 2542 EXPECT_NE(PStride, nullptr); 2543 2544 auto PreheaderIter = Preheader->begin(); 2545 ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6); 2546 StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2547 StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2548 StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2549 ASSERT_NE(LowerBoundStore, nullptr); 2550 ASSERT_NE(UpperBoundStore, nullptr); 2551 ASSERT_NE(StrideStore, nullptr); 2552 2553 CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++)); 2554 ASSERT_NE(ThreadIdCall, nullptr); 2555 EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(), 2556 "__kmpc_global_thread_num"); 2557 2558 CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter); 2559 2560 ASSERT_NE(InitCall, nullptr); 2561 EXPECT_EQ(InitCall->getCalledFunction()->getName(), 2562 "__kmpc_dispatch_init_4u"); 2563 EXPECT_EQ(InitCall->arg_size(), 7U); 2564 EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize)); 2565 ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2)); 2566 if ((SchedType & OMPScheduleType::MonotonicityMask) == 2567 OMPScheduleType::None) { 2568 // Implementation is allowed to add default nonmonotonicity flag 2569 EXPECT_EQ( 2570 static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) | 2571 OMPScheduleType::ModifierNonmonotonic, 2572 SchedType | OMPScheduleType::ModifierNonmonotonic); 2573 } else { 2574 EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()), 2575 SchedType); 2576 } 2577 2578 ConstantInt *OrigLowerBound = 2579 dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand()); 2580 ConstantInt *OrigUpperBound = 2581 dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand()); 2582 ConstantInt *OrigStride = 2583 dyn_cast<ConstantInt>(StrideStore->getValueOperand()); 2584 ASSERT_NE(OrigLowerBound, nullptr); 2585 ASSERT_NE(OrigUpperBound, nullptr); 2586 ASSERT_NE(OrigStride, nullptr); 2587 EXPECT_EQ(OrigLowerBound->getValue(), 1); 2588 EXPECT_EQ(OrigUpperBound->getValue(), 21); 2589 EXPECT_EQ(OrigStride->getValue(), 1); 2590 2591 CallInst *FiniCall = dyn_cast<CallInst>( 2592 &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); 2593 EXPECT_EQ(FiniCall, nullptr); 2594 2595 // The original loop iterator should only be used in the condition, in the 2596 // increment and in the statement that adds the lower bound to it. 2597 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2598 2599 // The exit block should contain the barrier call, plus the call to obtain 2600 // the thread ID. 2601 size_t NumCallsInExitBlock = 2602 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2603 EXPECT_EQ(NumCallsInExitBlock, 2u); 2604 2605 // Add a termination to our block and check that it is internally consistent. 2606 Builder.restoreIP(EndIP); 2607 Builder.CreateRetVoid(); 2608 OMPBuilder.finalize(); 2609 EXPECT_FALSE(verifyModule(*M, &errs())); 2610 } 2611 2612 INSTANTIATE_TEST_SUITE_P( 2613 OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams, 2614 ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked, 2615 omp::OMPScheduleType::UnorderedGuidedChunked, 2616 omp::OMPScheduleType::UnorderedAuto, 2617 omp::OMPScheduleType::UnorderedRuntime, 2618 omp::OMPScheduleType::UnorderedDynamicChunked | 2619 omp::OMPScheduleType::ModifierMonotonic, 2620 omp::OMPScheduleType::UnorderedDynamicChunked | 2621 omp::OMPScheduleType::ModifierNonmonotonic, 2622 omp::OMPScheduleType::UnorderedGuidedChunked | 2623 omp::OMPScheduleType::ModifierMonotonic, 2624 omp::OMPScheduleType::UnorderedGuidedChunked | 2625 omp::OMPScheduleType::ModifierNonmonotonic, 2626 omp::OMPScheduleType::UnorderedAuto | 2627 omp::OMPScheduleType::ModifierMonotonic, 2628 omp::OMPScheduleType::UnorderedRuntime | 2629 omp::OMPScheduleType::ModifierMonotonic)); 2630 2631 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) { 2632 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2633 OpenMPIRBuilder OMPBuilder(*M); 2634 OMPBuilder.Config.IsTargetDevice = false; 2635 OMPBuilder.initialize(); 2636 IRBuilder<> Builder(BB); 2637 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2638 2639 uint32_t ChunkSize = 1; 2640 Type *LCTy = Type::getInt32Ty(Ctx); 2641 Value *StartVal = ConstantInt::get(LCTy, 10); 2642 Value *StopVal = ConstantInt::get(LCTy, 52); 2643 Value *StepVal = ConstantInt::get(LCTy, 2); 2644 Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize); 2645 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; 2646 2647 CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( 2648 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2649 /*IsSigned=*/false, /*InclusiveStop=*/false); 2650 2651 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2652 InsertPointTy AllocaIP = Builder.saveIP(); 2653 2654 // Collect all the info from CLI, as it isn't usable after the call to 2655 // createDynamicWorkshareLoop. 2656 BasicBlock *Preheader = CLI->getPreheader(); 2657 BasicBlock *ExitBlock = CLI->getExit(); 2658 BasicBlock *LatchBlock = CLI->getLatch(); 2659 Value *IV = CLI->getIndVar(); 2660 2661 InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop( 2662 DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkVal, 2663 /*HasSimdModifier=*/false, /*HasMonotonicModifier=*/false, 2664 /*HasNonmonotonicModifier=*/false, 2665 /*HasOrderedClause=*/true); 2666 2667 // Add a termination to our block and check that it is internally consistent. 2668 Builder.restoreIP(EndIP); 2669 Builder.CreateRetVoid(); 2670 OMPBuilder.finalize(); 2671 EXPECT_FALSE(verifyModule(*M, &errs())); 2672 2673 CallInst *InitCall = nullptr; 2674 for (Instruction &EI : *Preheader) { 2675 Instruction *Cur = &EI; 2676 if (isa<CallInst>(Cur)) { 2677 InitCall = cast<CallInst>(Cur); 2678 if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u") 2679 break; 2680 InitCall = nullptr; 2681 } 2682 } 2683 EXPECT_NE(InitCall, nullptr); 2684 EXPECT_EQ(InitCall->arg_size(), 7U); 2685 ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2)); 2686 EXPECT_EQ(SchedVal->getValue(), 2687 static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked)); 2688 2689 CallInst *FiniCall = dyn_cast<CallInst>( 2690 &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); 2691 ASSERT_NE(FiniCall, nullptr); 2692 EXPECT_EQ(FiniCall->getCalledFunction()->getName(), 2693 "__kmpc_dispatch_fini_4u"); 2694 EXPECT_EQ(FiniCall->arg_size(), 2U); 2695 EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0)); 2696 EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1)); 2697 2698 // The original loop iterator should only be used in the condition, in the 2699 // increment and in the statement that adds the lower bound to it. 2700 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2701 2702 // The exit block should contain the barrier call, plus the call to obtain 2703 // the thread ID. 2704 size_t NumCallsInExitBlock = 2705 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2706 EXPECT_EQ(NumCallsInExitBlock, 2u); 2707 } 2708 2709 TEST_F(OpenMPIRBuilderTest, MasterDirective) { 2710 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2711 OpenMPIRBuilder OMPBuilder(*M); 2712 OMPBuilder.initialize(); 2713 F->setName("func"); 2714 IRBuilder<> Builder(BB); 2715 2716 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2717 2718 AllocaInst *PrivAI = nullptr; 2719 2720 BasicBlock *EntryBB = nullptr; 2721 BasicBlock *ThenBB = nullptr; 2722 2723 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2724 if (AllocaIP.isSet()) 2725 Builder.restoreIP(AllocaIP); 2726 else 2727 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 2728 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2729 Builder.CreateStore(F->arg_begin(), PrivAI); 2730 2731 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2732 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2733 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2734 2735 Builder.restoreIP(CodeGenIP); 2736 2737 // collect some info for checks later 2738 ThenBB = Builder.GetInsertBlock(); 2739 EntryBB = ThenBB->getUniquePredecessor(); 2740 2741 // simple instructions for body 2742 Value *PrivLoad = 2743 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2744 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2745 }; 2746 2747 auto FiniCB = [&](InsertPointTy IP) { 2748 BasicBlock *IPBB = IP.getBlock(); 2749 EXPECT_NE(IPBB->end(), IP.getPoint()); 2750 }; 2751 2752 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); 2753 Value *EntryBBTI = EntryBB->getTerminator(); 2754 EXPECT_NE(EntryBBTI, nullptr); 2755 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 2756 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 2757 EXPECT_TRUE(EntryBr->isConditional()); 2758 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 2759 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 2760 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 2761 2762 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 2763 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 2764 2765 CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0)); 2766 EXPECT_EQ(MasterEntryCI->arg_size(), 2U); 2767 EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master"); 2768 EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0))); 2769 2770 CallInst *MasterEndCI = nullptr; 2771 for (auto &FI : *ThenBB) { 2772 Instruction *cur = &FI; 2773 if (isa<CallInst>(cur)) { 2774 MasterEndCI = cast<CallInst>(cur); 2775 if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master") 2776 break; 2777 MasterEndCI = nullptr; 2778 } 2779 } 2780 EXPECT_NE(MasterEndCI, nullptr); 2781 EXPECT_EQ(MasterEndCI->arg_size(), 2U); 2782 EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0))); 2783 EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1)); 2784 } 2785 2786 TEST_F(OpenMPIRBuilderTest, MaskedDirective) { 2787 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2788 OpenMPIRBuilder OMPBuilder(*M); 2789 OMPBuilder.initialize(); 2790 F->setName("func"); 2791 IRBuilder<> Builder(BB); 2792 2793 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2794 2795 AllocaInst *PrivAI = nullptr; 2796 2797 BasicBlock *EntryBB = nullptr; 2798 BasicBlock *ThenBB = nullptr; 2799 2800 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2801 if (AllocaIP.isSet()) 2802 Builder.restoreIP(AllocaIP); 2803 else 2804 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 2805 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2806 Builder.CreateStore(F->arg_begin(), PrivAI); 2807 2808 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2809 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2810 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2811 2812 Builder.restoreIP(CodeGenIP); 2813 2814 // collect some info for checks later 2815 ThenBB = Builder.GetInsertBlock(); 2816 EntryBB = ThenBB->getUniquePredecessor(); 2817 2818 // simple instructions for body 2819 Value *PrivLoad = 2820 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2821 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2822 }; 2823 2824 auto FiniCB = [&](InsertPointTy IP) { 2825 BasicBlock *IPBB = IP.getBlock(); 2826 EXPECT_NE(IPBB->end(), IP.getPoint()); 2827 }; 2828 2829 Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); 2830 Builder.restoreIP( 2831 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, Filter)); 2832 Value *EntryBBTI = EntryBB->getTerminator(); 2833 EXPECT_NE(EntryBBTI, nullptr); 2834 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 2835 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 2836 EXPECT_TRUE(EntryBr->isConditional()); 2837 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 2838 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 2839 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 2840 2841 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 2842 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 2843 2844 CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0)); 2845 EXPECT_EQ(MaskedEntryCI->arg_size(), 3U); 2846 EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked"); 2847 EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0))); 2848 2849 CallInst *MaskedEndCI = nullptr; 2850 for (auto &FI : *ThenBB) { 2851 Instruction *cur = &FI; 2852 if (isa<CallInst>(cur)) { 2853 MaskedEndCI = cast<CallInst>(cur); 2854 if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked") 2855 break; 2856 MaskedEndCI = nullptr; 2857 } 2858 } 2859 EXPECT_NE(MaskedEndCI, nullptr); 2860 EXPECT_EQ(MaskedEndCI->arg_size(), 2U); 2861 EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0))); 2862 EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1)); 2863 } 2864 2865 TEST_F(OpenMPIRBuilderTest, CriticalDirective) { 2866 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2867 OpenMPIRBuilder OMPBuilder(*M); 2868 OMPBuilder.initialize(); 2869 F->setName("func"); 2870 IRBuilder<> Builder(BB); 2871 2872 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2873 2874 AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2875 2876 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2877 // actual start for bodyCB 2878 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2879 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2880 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2881 2882 // body begin 2883 Builder.restoreIP(CodeGenIP); 2884 Builder.CreateStore(F->arg_begin(), PrivAI); 2885 Value *PrivLoad = 2886 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2887 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2888 }; 2889 2890 auto FiniCB = [&](InsertPointTy IP) { 2891 BasicBlock *IPBB = IP.getBlock(); 2892 EXPECT_NE(IPBB->end(), IP.getPoint()); 2893 }; 2894 BasicBlock *EntryBB = Builder.GetInsertBlock(); 2895 2896 Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB, 2897 "testCRT", nullptr)); 2898 2899 CallInst *CriticalEntryCI = nullptr; 2900 for (auto &EI : *EntryBB) { 2901 Instruction *cur = &EI; 2902 if (isa<CallInst>(cur)) { 2903 CriticalEntryCI = cast<CallInst>(cur); 2904 if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical") 2905 break; 2906 CriticalEntryCI = nullptr; 2907 } 2908 } 2909 EXPECT_NE(CriticalEntryCI, nullptr); 2910 EXPECT_EQ(CriticalEntryCI->arg_size(), 3U); 2911 EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical"); 2912 EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0))); 2913 2914 CallInst *CriticalEndCI = nullptr; 2915 for (auto &FI : *EntryBB) { 2916 Instruction *cur = &FI; 2917 if (isa<CallInst>(cur)) { 2918 CriticalEndCI = cast<CallInst>(cur); 2919 if (CriticalEndCI->getCalledFunction()->getName() == 2920 "__kmpc_end_critical") 2921 break; 2922 CriticalEndCI = nullptr; 2923 } 2924 } 2925 EXPECT_NE(CriticalEndCI, nullptr); 2926 EXPECT_EQ(CriticalEndCI->arg_size(), 3U); 2927 EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0))); 2928 EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1)); 2929 PointerType *CriticalNamePtrTy = 2930 PointerType::getUnqual(ArrayType::get(Type::getInt32Ty(Ctx), 8)); 2931 EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2)); 2932 GlobalVariable *GV = 2933 dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2)); 2934 ASSERT_NE(GV, nullptr); 2935 EXPECT_EQ(GV->getType(), CriticalNamePtrTy); 2936 const DataLayout &DL = M->getDataLayout(); 2937 const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy); 2938 const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace()); 2939 if (const llvm::MaybeAlign Alignment = GV->getAlign()) 2940 EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign)); 2941 } 2942 2943 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) { 2944 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2945 OpenMPIRBuilder OMPBuilder(*M); 2946 OMPBuilder.initialize(); 2947 F->setName("func"); 2948 IRBuilder<> Builder(BB); 2949 LLVMContext &Ctx = M->getContext(); 2950 2951 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2952 2953 InsertPointTy AllocaIP(&F->getEntryBlock(), 2954 F->getEntryBlock().getFirstInsertionPt()); 2955 2956 unsigned NumLoops = 2; 2957 SmallVector<Value *, 2> StoreValues; 2958 Type *LCTy = Type::getInt64Ty(Ctx); 2959 StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); 2960 StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); 2961 2962 // Test for "#omp ordered depend(source)" 2963 Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, 2964 StoreValues, ".cnt.addr", 2965 /*IsDependSource=*/true)); 2966 2967 Builder.CreateRetVoid(); 2968 OMPBuilder.finalize(); 2969 EXPECT_FALSE(verifyModule(*M, &errs())); 2970 2971 AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front()); 2972 ASSERT_NE(AllocInst, nullptr); 2973 ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType()); 2974 EXPECT_EQ(ArrType->getNumElements(), NumLoops); 2975 EXPECT_TRUE( 2976 AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); 2977 2978 Instruction *IterInst = dyn_cast<Instruction>(AllocInst); 2979 for (unsigned Iter = 0; Iter < NumLoops; Iter++) { 2980 GetElementPtrInst *DependAddrGEPIter = 2981 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 2982 ASSERT_NE(DependAddrGEPIter, nullptr); 2983 EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); 2984 EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); 2985 auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1)); 2986 auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2)); 2987 ASSERT_NE(FirstIdx, nullptr); 2988 ASSERT_NE(SecondIdx, nullptr); 2989 EXPECT_EQ(FirstIdx->getValue(), 0); 2990 EXPECT_EQ(SecondIdx->getValue(), Iter); 2991 StoreInst *StoreValue = 2992 dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode()); 2993 ASSERT_NE(StoreValue, nullptr); 2994 EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); 2995 EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); 2996 EXPECT_EQ(StoreValue->getAlign(), Align(8)); 2997 IterInst = dyn_cast<Instruction>(StoreValue); 2998 } 2999 3000 GetElementPtrInst *DependBaseAddrGEP = 3001 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3002 ASSERT_NE(DependBaseAddrGEP, nullptr); 3003 EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); 3004 EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); 3005 auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1)); 3006 auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2)); 3007 ASSERT_NE(FirstIdx, nullptr); 3008 ASSERT_NE(SecondIdx, nullptr); 3009 EXPECT_EQ(FirstIdx->getValue(), 0); 3010 EXPECT_EQ(SecondIdx->getValue(), 0); 3011 3012 CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode()); 3013 ASSERT_NE(GTID, nullptr); 3014 EXPECT_EQ(GTID->arg_size(), 1U); 3015 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 3016 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 3017 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 3018 3019 CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode()); 3020 ASSERT_NE(Depend, nullptr); 3021 EXPECT_EQ(Depend->arg_size(), 3U); 3022 EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post"); 3023 EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0))); 3024 EXPECT_EQ(Depend->getArgOperand(1), GTID); 3025 EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); 3026 } 3027 3028 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) { 3029 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3030 OpenMPIRBuilder OMPBuilder(*M); 3031 OMPBuilder.initialize(); 3032 F->setName("func"); 3033 IRBuilder<> Builder(BB); 3034 LLVMContext &Ctx = M->getContext(); 3035 3036 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3037 3038 InsertPointTy AllocaIP(&F->getEntryBlock(), 3039 F->getEntryBlock().getFirstInsertionPt()); 3040 3041 unsigned NumLoops = 2; 3042 SmallVector<Value *, 2> StoreValues; 3043 Type *LCTy = Type::getInt64Ty(Ctx); 3044 StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); 3045 StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); 3046 3047 // Test for "#omp ordered depend(sink: vec)" 3048 Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, 3049 StoreValues, ".cnt.addr", 3050 /*IsDependSource=*/false)); 3051 3052 Builder.CreateRetVoid(); 3053 OMPBuilder.finalize(); 3054 EXPECT_FALSE(verifyModule(*M, &errs())); 3055 3056 AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front()); 3057 ASSERT_NE(AllocInst, nullptr); 3058 ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType()); 3059 EXPECT_EQ(ArrType->getNumElements(), NumLoops); 3060 EXPECT_TRUE( 3061 AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); 3062 3063 Instruction *IterInst = dyn_cast<Instruction>(AllocInst); 3064 for (unsigned Iter = 0; Iter < NumLoops; Iter++) { 3065 GetElementPtrInst *DependAddrGEPIter = 3066 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3067 ASSERT_NE(DependAddrGEPIter, nullptr); 3068 EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); 3069 EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); 3070 auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1)); 3071 auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2)); 3072 ASSERT_NE(FirstIdx, nullptr); 3073 ASSERT_NE(SecondIdx, nullptr); 3074 EXPECT_EQ(FirstIdx->getValue(), 0); 3075 EXPECT_EQ(SecondIdx->getValue(), Iter); 3076 StoreInst *StoreValue = 3077 dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode()); 3078 ASSERT_NE(StoreValue, nullptr); 3079 EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); 3080 EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); 3081 EXPECT_EQ(StoreValue->getAlign(), Align(8)); 3082 IterInst = dyn_cast<Instruction>(StoreValue); 3083 } 3084 3085 GetElementPtrInst *DependBaseAddrGEP = 3086 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3087 ASSERT_NE(DependBaseAddrGEP, nullptr); 3088 EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); 3089 EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); 3090 auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1)); 3091 auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2)); 3092 ASSERT_NE(FirstIdx, nullptr); 3093 ASSERT_NE(SecondIdx, nullptr); 3094 EXPECT_EQ(FirstIdx->getValue(), 0); 3095 EXPECT_EQ(SecondIdx->getValue(), 0); 3096 3097 CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode()); 3098 ASSERT_NE(GTID, nullptr); 3099 EXPECT_EQ(GTID->arg_size(), 1U); 3100 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 3101 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 3102 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 3103 3104 CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode()); 3105 ASSERT_NE(Depend, nullptr); 3106 EXPECT_EQ(Depend->arg_size(), 3U); 3107 EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait"); 3108 EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0))); 3109 EXPECT_EQ(Depend->getArgOperand(1), GTID); 3110 EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); 3111 } 3112 3113 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { 3114 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3115 OpenMPIRBuilder OMPBuilder(*M); 3116 OMPBuilder.initialize(); 3117 F->setName("func"); 3118 IRBuilder<> Builder(BB); 3119 3120 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3121 3122 AllocaInst *PrivAI = 3123 Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); 3124 3125 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3126 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3127 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3128 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3129 3130 Builder.restoreIP(CodeGenIP); 3131 Builder.CreateStore(F->arg_begin(), PrivAI); 3132 Value *PrivLoad = 3133 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3134 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3135 }; 3136 3137 auto FiniCB = [&](InsertPointTy IP) { 3138 BasicBlock *IPBB = IP.getBlock(); 3139 EXPECT_NE(IPBB->end(), IP.getPoint()); 3140 }; 3141 3142 // Test for "#omp ordered [threads]" 3143 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3144 Builder.restoreIP( 3145 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true)); 3146 3147 Builder.CreateRetVoid(); 3148 OMPBuilder.finalize(); 3149 EXPECT_FALSE(verifyModule(*M, &errs())); 3150 3151 EXPECT_NE(EntryBB->getTerminator(), nullptr); 3152 3153 CallInst *OrderedEntryCI = nullptr; 3154 for (auto &EI : *EntryBB) { 3155 Instruction *Cur = &EI; 3156 if (isa<CallInst>(Cur)) { 3157 OrderedEntryCI = cast<CallInst>(Cur); 3158 if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") 3159 break; 3160 OrderedEntryCI = nullptr; 3161 } 3162 } 3163 EXPECT_NE(OrderedEntryCI, nullptr); 3164 EXPECT_EQ(OrderedEntryCI->arg_size(), 2U); 3165 EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered"); 3166 EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0))); 3167 3168 CallInst *OrderedEndCI = nullptr; 3169 for (auto &FI : *EntryBB) { 3170 Instruction *Cur = &FI; 3171 if (isa<CallInst>(Cur)) { 3172 OrderedEndCI = cast<CallInst>(Cur); 3173 if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") 3174 break; 3175 OrderedEndCI = nullptr; 3176 } 3177 } 3178 EXPECT_NE(OrderedEndCI, nullptr); 3179 EXPECT_EQ(OrderedEndCI->arg_size(), 2U); 3180 EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0))); 3181 EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1)); 3182 } 3183 3184 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { 3185 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3186 OpenMPIRBuilder OMPBuilder(*M); 3187 OMPBuilder.initialize(); 3188 F->setName("func"); 3189 IRBuilder<> Builder(BB); 3190 3191 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3192 3193 AllocaInst *PrivAI = 3194 Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); 3195 3196 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3197 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3198 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3199 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3200 3201 Builder.restoreIP(CodeGenIP); 3202 Builder.CreateStore(F->arg_begin(), PrivAI); 3203 Value *PrivLoad = 3204 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3205 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3206 }; 3207 3208 auto FiniCB = [&](InsertPointTy IP) { 3209 BasicBlock *IPBB = IP.getBlock(); 3210 EXPECT_NE(IPBB->end(), IP.getPoint()); 3211 }; 3212 3213 // Test for "#omp ordered simd" 3214 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3215 Builder.restoreIP( 3216 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false)); 3217 3218 Builder.CreateRetVoid(); 3219 OMPBuilder.finalize(); 3220 EXPECT_FALSE(verifyModule(*M, &errs())); 3221 3222 EXPECT_NE(EntryBB->getTerminator(), nullptr); 3223 3224 CallInst *OrderedEntryCI = nullptr; 3225 for (auto &EI : *EntryBB) { 3226 Instruction *Cur = &EI; 3227 if (isa<CallInst>(Cur)) { 3228 OrderedEntryCI = cast<CallInst>(Cur); 3229 if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") 3230 break; 3231 OrderedEntryCI = nullptr; 3232 } 3233 } 3234 EXPECT_EQ(OrderedEntryCI, nullptr); 3235 3236 CallInst *OrderedEndCI = nullptr; 3237 for (auto &FI : *EntryBB) { 3238 Instruction *Cur = &FI; 3239 if (isa<CallInst>(Cur)) { 3240 OrderedEndCI = cast<CallInst>(Cur); 3241 if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") 3242 break; 3243 OrderedEndCI = nullptr; 3244 } 3245 } 3246 EXPECT_EQ(OrderedEndCI, nullptr); 3247 } 3248 3249 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { 3250 OpenMPIRBuilder OMPBuilder(*M); 3251 OMPBuilder.initialize(); 3252 F->setName("func"); 3253 IRBuilder<> Builder(BB); 3254 3255 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3256 3257 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3258 AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy()); 3259 AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy()); 3260 3261 BasicBlock *EntryBB = BB; 3262 3263 OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress, 3264 PrivAddress, Int32, /*BranchtoEnd*/ true); 3265 3266 BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator()); 3267 3268 EXPECT_NE(EntryBr, nullptr); 3269 EXPECT_TRUE(EntryBr->isConditional()); 3270 3271 BasicBlock *NotMasterBB = EntryBr->getSuccessor(0); 3272 BasicBlock *CopyinEnd = EntryBr->getSuccessor(1); 3273 CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition()); 3274 3275 EXPECT_NE(CMP, nullptr); 3276 EXPECT_NE(NotMasterBB, nullptr); 3277 EXPECT_NE(CopyinEnd, nullptr); 3278 3279 BranchInst *NotMasterBr = 3280 dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator()); 3281 EXPECT_NE(NotMasterBr, nullptr); 3282 EXPECT_FALSE(NotMasterBr->isConditional()); 3283 EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0)); 3284 } 3285 3286 TEST_F(OpenMPIRBuilderTest, SingleDirective) { 3287 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3288 OpenMPIRBuilder OMPBuilder(*M); 3289 OMPBuilder.initialize(); 3290 F->setName("func"); 3291 IRBuilder<> Builder(BB); 3292 3293 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3294 3295 AllocaInst *PrivAI = nullptr; 3296 3297 BasicBlock *EntryBB = nullptr; 3298 BasicBlock *ThenBB = nullptr; 3299 3300 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3301 if (AllocaIP.isSet()) 3302 Builder.restoreIP(AllocaIP); 3303 else 3304 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3305 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3306 Builder.CreateStore(F->arg_begin(), PrivAI); 3307 3308 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3309 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3310 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3311 3312 Builder.restoreIP(CodeGenIP); 3313 3314 // collect some info for checks later 3315 ThenBB = Builder.GetInsertBlock(); 3316 EntryBB = ThenBB->getUniquePredecessor(); 3317 3318 // simple instructions for body 3319 Value *PrivLoad = 3320 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3321 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3322 }; 3323 3324 auto FiniCB = [&](InsertPointTy IP) { 3325 BasicBlock *IPBB = IP.getBlock(); 3326 EXPECT_NE(IPBB->end(), IP.getPoint()); 3327 }; 3328 3329 Builder.restoreIP( 3330 OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ false)); 3331 Value *EntryBBTI = EntryBB->getTerminator(); 3332 EXPECT_NE(EntryBBTI, nullptr); 3333 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3334 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3335 EXPECT_TRUE(EntryBr->isConditional()); 3336 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3337 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3338 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3339 3340 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3341 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3342 3343 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3344 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3345 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3346 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3347 3348 CallInst *SingleEndCI = nullptr; 3349 for (auto &FI : *ThenBB) { 3350 Instruction *cur = &FI; 3351 if (isa<CallInst>(cur)) { 3352 SingleEndCI = cast<CallInst>(cur); 3353 if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") 3354 break; 3355 SingleEndCI = nullptr; 3356 } 3357 } 3358 EXPECT_NE(SingleEndCI, nullptr); 3359 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3360 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3361 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3362 3363 bool FoundBarrier = false; 3364 for (auto &FI : *ExitBB) { 3365 Instruction *cur = &FI; 3366 if (auto CI = dyn_cast<CallInst>(cur)) { 3367 if (CI->getCalledFunction()->getName() == "__kmpc_barrier") { 3368 FoundBarrier = true; 3369 break; 3370 } 3371 } 3372 } 3373 EXPECT_TRUE(FoundBarrier); 3374 } 3375 3376 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { 3377 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3378 OpenMPIRBuilder OMPBuilder(*M); 3379 OMPBuilder.initialize(); 3380 F->setName("func"); 3381 IRBuilder<> Builder(BB); 3382 3383 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3384 3385 AllocaInst *PrivAI = nullptr; 3386 3387 BasicBlock *EntryBB = nullptr; 3388 BasicBlock *ThenBB = nullptr; 3389 3390 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3391 if (AllocaIP.isSet()) 3392 Builder.restoreIP(AllocaIP); 3393 else 3394 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3395 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3396 Builder.CreateStore(F->arg_begin(), PrivAI); 3397 3398 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3399 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3400 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3401 3402 Builder.restoreIP(CodeGenIP); 3403 3404 // collect some info for checks later 3405 ThenBB = Builder.GetInsertBlock(); 3406 EntryBB = ThenBB->getUniquePredecessor(); 3407 3408 // simple instructions for body 3409 Value *PrivLoad = 3410 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3411 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3412 }; 3413 3414 auto FiniCB = [&](InsertPointTy IP) { 3415 BasicBlock *IPBB = IP.getBlock(); 3416 EXPECT_NE(IPBB->end(), IP.getPoint()); 3417 }; 3418 3419 Builder.restoreIP( 3420 OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ true)); 3421 Value *EntryBBTI = EntryBB->getTerminator(); 3422 EXPECT_NE(EntryBBTI, nullptr); 3423 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3424 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3425 EXPECT_TRUE(EntryBr->isConditional()); 3426 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3427 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3428 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3429 3430 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3431 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3432 3433 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3434 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3435 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3436 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3437 3438 CallInst *SingleEndCI = nullptr; 3439 for (auto &FI : *ThenBB) { 3440 Instruction *cur = &FI; 3441 if (isa<CallInst>(cur)) { 3442 SingleEndCI = cast<CallInst>(cur); 3443 if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") 3444 break; 3445 SingleEndCI = nullptr; 3446 } 3447 } 3448 EXPECT_NE(SingleEndCI, nullptr); 3449 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3450 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3451 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3452 3453 CallInst *ExitBarrier = nullptr; 3454 for (auto &FI : *ExitBB) { 3455 Instruction *cur = &FI; 3456 if (auto CI = dyn_cast<CallInst>(cur)) { 3457 if (CI->getCalledFunction()->getName() == "__kmpc_barrier") { 3458 ExitBarrier = CI; 3459 break; 3460 } 3461 } 3462 } 3463 EXPECT_EQ(ExitBarrier, nullptr); 3464 } 3465 3466 // Helper class to check each instruction of a BB. 3467 class BBInstIter { 3468 BasicBlock *BB; 3469 BasicBlock::iterator BBI; 3470 3471 public: 3472 BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {} 3473 3474 bool hasNext() const { return BBI != BB->end(); } 3475 3476 template <typename InstTy> InstTy *next() { 3477 if (!hasNext()) 3478 return nullptr; 3479 Instruction *Cur = &*BBI++; 3480 if (!isa<InstTy>(Cur)) 3481 return nullptr; 3482 return cast<InstTy>(Cur); 3483 } 3484 }; 3485 3486 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { 3487 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3488 OpenMPIRBuilder OMPBuilder(*M); 3489 OMPBuilder.initialize(); 3490 F->setName("func"); 3491 IRBuilder<> Builder(BB); 3492 3493 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3494 3495 AllocaInst *PrivAI = nullptr; 3496 3497 BasicBlock *EntryBB = nullptr; 3498 BasicBlock *ThenBB = nullptr; 3499 3500 Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType()); 3501 Builder.CreateStore(F->arg_begin(), CPVar); 3502 3503 FunctionType *CopyFuncTy = FunctionType::get( 3504 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false); 3505 Function *CopyFunc = 3506 Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M); 3507 3508 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3509 if (AllocaIP.isSet()) 3510 Builder.restoreIP(AllocaIP); 3511 else 3512 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3513 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3514 Builder.CreateStore(F->arg_begin(), PrivAI); 3515 3516 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3517 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3518 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3519 3520 Builder.restoreIP(CodeGenIP); 3521 3522 // collect some info for checks later 3523 ThenBB = Builder.GetInsertBlock(); 3524 EntryBB = ThenBB->getUniquePredecessor(); 3525 3526 // simple instructions for body 3527 Value *PrivLoad = 3528 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3529 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3530 }; 3531 3532 auto FiniCB = [&](InsertPointTy IP) { 3533 BasicBlock *IPBB = IP.getBlock(); 3534 // IP must be before the unconditional branch to ExitBB 3535 EXPECT_NE(IPBB->end(), IP.getPoint()); 3536 }; 3537 3538 Builder.restoreIP(OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, 3539 /*IsNowait*/ false, {CPVar}, 3540 {CopyFunc})); 3541 Value *EntryBBTI = EntryBB->getTerminator(); 3542 EXPECT_NE(EntryBBTI, nullptr); 3543 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3544 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3545 EXPECT_TRUE(EntryBr->isConditional()); 3546 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3547 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3548 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3549 3550 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3551 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3552 3553 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3554 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3555 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3556 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3557 3558 // check ThenBB 3559 BBInstIter ThenBBI(ThenBB); 3560 // load PrivAI 3561 auto *PrivLI = ThenBBI.next<LoadInst>(); 3562 EXPECT_NE(PrivLI, nullptr); 3563 EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI); 3564 // icmp 3565 EXPECT_TRUE(ThenBBI.next<ICmpInst>()); 3566 // store 1, DidIt 3567 auto *DidItSI = ThenBBI.next<StoreInst>(); 3568 EXPECT_NE(DidItSI, nullptr); 3569 EXPECT_EQ(DidItSI->getValueOperand(), 3570 ConstantInt::get(Type::getInt32Ty(Ctx), 1)); 3571 Value *DidIt = DidItSI->getPointerOperand(); 3572 // call __kmpc_end_single 3573 auto *SingleEndCI = ThenBBI.next<CallInst>(); 3574 EXPECT_NE(SingleEndCI, nullptr); 3575 EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single"); 3576 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3577 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3578 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3579 // br ExitBB 3580 auto *ExitBBBI = ThenBBI.next<BranchInst>(); 3581 EXPECT_NE(ExitBBBI, nullptr); 3582 EXPECT_TRUE(ExitBBBI->isUnconditional()); 3583 EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB); 3584 EXPECT_FALSE(ThenBBI.hasNext()); 3585 3586 // check ExitBB 3587 BBInstIter ExitBBI(ExitBB); 3588 // call __kmpc_global_thread_num 3589 auto *ThreadNumCI = ExitBBI.next<CallInst>(); 3590 EXPECT_NE(ThreadNumCI, nullptr); 3591 EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(), 3592 "__kmpc_global_thread_num"); 3593 // load DidIt 3594 auto *DidItLI = ExitBBI.next<LoadInst>(); 3595 EXPECT_NE(DidItLI, nullptr); 3596 EXPECT_EQ(DidItLI->getPointerOperand(), DidIt); 3597 // call __kmpc_copyprivate 3598 auto *CopyPrivateCI = ExitBBI.next<CallInst>(); 3599 EXPECT_NE(CopyPrivateCI, nullptr); 3600 EXPECT_EQ(CopyPrivateCI->arg_size(), 6U); 3601 EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3))); 3602 EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar); 3603 EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4))); 3604 EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc); 3605 EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5))); 3606 DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5)); 3607 EXPECT_EQ(DidItLI->getOperand(0), DidIt); 3608 EXPECT_FALSE(ExitBBI.hasNext()); 3609 } 3610 3611 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) { 3612 OpenMPIRBuilder OMPBuilder(*M); 3613 OMPBuilder.initialize(); 3614 F->setName("func"); 3615 IRBuilder<> Builder(BB); 3616 3617 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3618 3619 Type *Float32 = Type::getFloatTy(M->getContext()); 3620 AllocaInst *XVal = Builder.CreateAlloca(Float32); 3621 XVal->setName("AtomicVar"); 3622 AllocaInst *VVal = Builder.CreateAlloca(Float32); 3623 VVal->setName("AtomicRead"); 3624 AtomicOrdering AO = AtomicOrdering::Monotonic; 3625 OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false}; 3626 OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false}; 3627 3628 Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO)); 3629 3630 IntegerType *IntCastTy = 3631 IntegerType::get(M->getContext(), Float32->getScalarSizeInBits()); 3632 3633 LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode()); 3634 EXPECT_TRUE(AtomicLoad->isAtomic()); 3635 EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal); 3636 3637 BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode()); 3638 EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy); 3639 EXPECT_EQ(CastToFlt->getDestTy(), Float32); 3640 EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad); 3641 3642 StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode()); 3643 EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt); 3644 EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal); 3645 3646 Builder.CreateRetVoid(); 3647 OMPBuilder.finalize(); 3648 EXPECT_FALSE(verifyModule(*M, &errs())); 3649 } 3650 3651 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) { 3652 OpenMPIRBuilder OMPBuilder(*M); 3653 OMPBuilder.initialize(); 3654 F->setName("func"); 3655 IRBuilder<> Builder(BB); 3656 3657 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3658 3659 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3660 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3661 XVal->setName("AtomicVar"); 3662 AllocaInst *VVal = Builder.CreateAlloca(Int32); 3663 VVal->setName("AtomicRead"); 3664 AtomicOrdering AO = AtomicOrdering::Monotonic; 3665 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3666 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 3667 3668 BasicBlock *EntryBB = BB; 3669 3670 Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO)); 3671 LoadInst *AtomicLoad = nullptr; 3672 StoreInst *StoreofAtomic = nullptr; 3673 3674 for (Instruction &Cur : *EntryBB) { 3675 if (isa<LoadInst>(Cur)) { 3676 AtomicLoad = cast<LoadInst>(&Cur); 3677 if (AtomicLoad->getPointerOperand() == XVal) 3678 continue; 3679 AtomicLoad = nullptr; 3680 } else if (isa<StoreInst>(Cur)) { 3681 StoreofAtomic = cast<StoreInst>(&Cur); 3682 if (StoreofAtomic->getPointerOperand() == VVal) 3683 continue; 3684 StoreofAtomic = nullptr; 3685 } 3686 } 3687 3688 EXPECT_NE(AtomicLoad, nullptr); 3689 EXPECT_TRUE(AtomicLoad->isAtomic()); 3690 3691 EXPECT_NE(StoreofAtomic, nullptr); 3692 EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad); 3693 3694 Builder.CreateRetVoid(); 3695 OMPBuilder.finalize(); 3696 3697 EXPECT_FALSE(verifyModule(*M, &errs())); 3698 } 3699 3700 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) { 3701 OpenMPIRBuilder OMPBuilder(*M); 3702 OMPBuilder.initialize(); 3703 F->setName("func"); 3704 IRBuilder<> Builder(BB); 3705 3706 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3707 3708 LLVMContext &Ctx = M->getContext(); 3709 Type *Float32 = Type::getFloatTy(Ctx); 3710 AllocaInst *XVal = Builder.CreateAlloca(Float32); 3711 XVal->setName("AtomicVar"); 3712 OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false}; 3713 AtomicOrdering AO = AtomicOrdering::Monotonic; 3714 Constant *ValToWrite = ConstantFP::get(Float32, 1.0); 3715 3716 Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO)); 3717 3718 IntegerType *IntCastTy = 3719 IntegerType::get(M->getContext(), Float32->getScalarSizeInBits()); 3720 3721 Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy); 3722 3723 StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode()); 3724 EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast); 3725 EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal); 3726 EXPECT_TRUE(StoreofAtomic->isAtomic()); 3727 3728 Builder.CreateRetVoid(); 3729 OMPBuilder.finalize(); 3730 EXPECT_FALSE(verifyModule(*M, &errs())); 3731 } 3732 3733 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) { 3734 OpenMPIRBuilder OMPBuilder(*M); 3735 OMPBuilder.initialize(); 3736 F->setName("func"); 3737 IRBuilder<> Builder(BB); 3738 3739 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3740 3741 LLVMContext &Ctx = M->getContext(); 3742 IntegerType *Int32 = Type::getInt32Ty(Ctx); 3743 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3744 XVal->setName("AtomicVar"); 3745 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3746 AtomicOrdering AO = AtomicOrdering::Monotonic; 3747 ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3748 3749 BasicBlock *EntryBB = BB; 3750 3751 Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO)); 3752 3753 StoreInst *StoreofAtomic = nullptr; 3754 3755 for (Instruction &Cur : *EntryBB) { 3756 if (isa<StoreInst>(Cur)) { 3757 StoreofAtomic = cast<StoreInst>(&Cur); 3758 if (StoreofAtomic->getPointerOperand() == XVal) 3759 continue; 3760 StoreofAtomic = nullptr; 3761 } 3762 } 3763 3764 EXPECT_NE(StoreofAtomic, nullptr); 3765 EXPECT_TRUE(StoreofAtomic->isAtomic()); 3766 EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite); 3767 3768 Builder.CreateRetVoid(); 3769 OMPBuilder.finalize(); 3770 EXPECT_FALSE(verifyModule(*M, &errs())); 3771 } 3772 3773 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) { 3774 OpenMPIRBuilder OMPBuilder(*M); 3775 OMPBuilder.initialize(); 3776 F->setName("func"); 3777 IRBuilder<> Builder(BB); 3778 3779 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3780 3781 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3782 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3783 XVal->setName("AtomicVar"); 3784 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 3785 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3786 AtomicOrdering AO = AtomicOrdering::Monotonic; 3787 ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3788 Value *Expr = nullptr; 3789 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub; 3790 bool IsXLHSInRHSPart = false; 3791 3792 BasicBlock *EntryBB = BB; 3793 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 3794 EntryBB->getFirstInsertionPt()); 3795 Value *Sub = nullptr; 3796 3797 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 3798 Sub = IRB.CreateSub(ConstVal, Atomic); 3799 return Sub; 3800 }; 3801 Builder.restoreIP(OMPBuilder.createAtomicUpdate( 3802 Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); 3803 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 3804 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 3805 EXPECT_NE(ContTI, nullptr); 3806 BasicBlock *EndBB = ContTI->getSuccessor(0); 3807 EXPECT_TRUE(ContTI->isConditional()); 3808 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 3809 EXPECT_NE(EndBB, nullptr); 3810 3811 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 3812 EXPECT_NE(Phi, nullptr); 3813 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 3814 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 3815 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 3816 3817 EXPECT_EQ(Sub->getNumUses(), 1U); 3818 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 3819 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 3820 3821 ExtractValueInst *ExVI1 = 3822 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 3823 EXPECT_NE(ExVI1, nullptr); 3824 AtomicCmpXchgInst *CmpExchg = 3825 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 3826 EXPECT_NE(CmpExchg, nullptr); 3827 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 3828 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 3829 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 3830 3831 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 3832 EXPECT_NE(Ld, nullptr); 3833 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 3834 3835 Builder.CreateRetVoid(); 3836 OMPBuilder.finalize(); 3837 EXPECT_FALSE(verifyModule(*M, &errs())); 3838 } 3839 3840 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) { 3841 OpenMPIRBuilder OMPBuilder(*M); 3842 OMPBuilder.initialize(); 3843 F->setName("func"); 3844 IRBuilder<> Builder(BB); 3845 3846 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3847 3848 Type *FloatTy = Type::getFloatTy(M->getContext()); 3849 AllocaInst *XVal = Builder.CreateAlloca(FloatTy); 3850 XVal->setName("AtomicVar"); 3851 Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal); 3852 OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false}; 3853 AtomicOrdering AO = AtomicOrdering::Monotonic; 3854 Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0); 3855 Value *Expr = nullptr; 3856 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub; 3857 bool IsXLHSInRHSPart = false; 3858 3859 BasicBlock *EntryBB = BB; 3860 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 3861 EntryBB->getFirstInsertionPt()); 3862 Value *Sub = nullptr; 3863 3864 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 3865 Sub = IRB.CreateFSub(ConstVal, Atomic); 3866 return Sub; 3867 }; 3868 Builder.restoreIP(OMPBuilder.createAtomicUpdate( 3869 Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); 3870 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 3871 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 3872 EXPECT_NE(ContTI, nullptr); 3873 BasicBlock *EndBB = ContTI->getSuccessor(0); 3874 EXPECT_TRUE(ContTI->isConditional()); 3875 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 3876 EXPECT_NE(EndBB, nullptr); 3877 3878 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 3879 EXPECT_NE(Phi, nullptr); 3880 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 3881 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 3882 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 3883 3884 EXPECT_EQ(Sub->getNumUses(), 1U); 3885 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 3886 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 3887 3888 ExtractValueInst *ExVI1 = 3889 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 3890 EXPECT_NE(ExVI1, nullptr); 3891 AtomicCmpXchgInst *CmpExchg = 3892 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 3893 EXPECT_NE(CmpExchg, nullptr); 3894 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 3895 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 3896 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 3897 3898 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 3899 EXPECT_NE(Ld, nullptr); 3900 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 3901 Builder.CreateRetVoid(); 3902 OMPBuilder.finalize(); 3903 EXPECT_FALSE(verifyModule(*M, &errs())); 3904 } 3905 3906 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) { 3907 OpenMPIRBuilder OMPBuilder(*M); 3908 OMPBuilder.initialize(); 3909 F->setName("func"); 3910 IRBuilder<> Builder(BB); 3911 3912 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3913 3914 Type *IntTy = Type::getInt32Ty(M->getContext()); 3915 AllocaInst *XVal = Builder.CreateAlloca(IntTy); 3916 XVal->setName("AtomicVar"); 3917 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal); 3918 OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false}; 3919 AtomicOrdering AO = AtomicOrdering::Monotonic; 3920 Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1); 3921 Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1); 3922 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax; 3923 bool IsXLHSInRHSPart = false; 3924 3925 BasicBlock *EntryBB = BB; 3926 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 3927 EntryBB->getFirstInsertionPt()); 3928 Value *Sub = nullptr; 3929 3930 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 3931 Sub = IRB.CreateSub(ConstVal, Atomic); 3932 return Sub; 3933 }; 3934 Builder.restoreIP(OMPBuilder.createAtomicUpdate( 3935 Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); 3936 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 3937 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 3938 EXPECT_NE(ContTI, nullptr); 3939 BasicBlock *EndBB = ContTI->getSuccessor(0); 3940 EXPECT_TRUE(ContTI->isConditional()); 3941 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 3942 EXPECT_NE(EndBB, nullptr); 3943 3944 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 3945 EXPECT_NE(Phi, nullptr); 3946 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 3947 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 3948 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 3949 3950 EXPECT_EQ(Sub->getNumUses(), 1U); 3951 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 3952 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 3953 3954 ExtractValueInst *ExVI1 = 3955 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 3956 EXPECT_NE(ExVI1, nullptr); 3957 AtomicCmpXchgInst *CmpExchg = 3958 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 3959 EXPECT_NE(CmpExchg, nullptr); 3960 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 3961 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 3962 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 3963 3964 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 3965 EXPECT_NE(Ld, nullptr); 3966 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 3967 3968 Builder.CreateRetVoid(); 3969 OMPBuilder.finalize(); 3970 EXPECT_FALSE(verifyModule(*M, &errs())); 3971 } 3972 3973 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) { 3974 OpenMPIRBuilder OMPBuilder(*M); 3975 OMPBuilder.initialize(); 3976 F->setName("func"); 3977 IRBuilder<> Builder(BB); 3978 3979 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3980 3981 LLVMContext &Ctx = M->getContext(); 3982 IntegerType *Int32 = Type::getInt32Ty(Ctx); 3983 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3984 XVal->setName("AtomicVar"); 3985 AllocaInst *VVal = Builder.CreateAlloca(Int32); 3986 VVal->setName("AtomicCapTar"); 3987 StoreInst *Init = 3988 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 3989 3990 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3991 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 3992 AtomicOrdering AO = AtomicOrdering::Monotonic; 3993 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3994 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add; 3995 bool IsXLHSInRHSPart = true; 3996 bool IsPostfixUpdate = true; 3997 bool UpdateExpr = true; 3998 3999 BasicBlock *EntryBB = BB; 4000 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 4001 EntryBB->getFirstInsertionPt()); 4002 4003 // integer update - not used 4004 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; }; 4005 4006 Builder.restoreIP(OMPBuilder.createAtomicCapture( 4007 Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, UpdateExpr, 4008 IsPostfixUpdate, IsXLHSInRHSPart)); 4009 EXPECT_EQ(EntryBB->getParent()->size(), 1U); 4010 AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode()); 4011 EXPECT_NE(ARWM, nullptr); 4012 EXPECT_EQ(ARWM->getPointerOperand(), XVal); 4013 EXPECT_EQ(ARWM->getOperation(), RMWOp); 4014 StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back()); 4015 EXPECT_NE(St, nullptr); 4016 EXPECT_EQ(St->getPointerOperand(), VVal); 4017 4018 Builder.CreateRetVoid(); 4019 OMPBuilder.finalize(); 4020 EXPECT_FALSE(verifyModule(*M, &errs())); 4021 } 4022 4023 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) { 4024 OpenMPIRBuilder OMPBuilder(*M); 4025 OMPBuilder.initialize(); 4026 F->setName("func"); 4027 IRBuilder<> Builder(BB); 4028 4029 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4030 4031 LLVMContext &Ctx = M->getContext(); 4032 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4033 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4034 XVal->setName("x"); 4035 StoreInst *Init = 4036 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4037 4038 OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false}; 4039 OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false}; 4040 // V and R are not used in atomic compare 4041 OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false}; 4042 OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false}; 4043 AtomicOrdering AO = AtomicOrdering::Monotonic; 4044 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4045 ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4046 OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX; 4047 OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ; 4048 4049 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4050 Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false)); 4051 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4052 Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false)); 4053 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4054 Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false)); 4055 4056 BasicBlock *EntryBB = BB; 4057 EXPECT_EQ(EntryBB->getParent()->size(), 1U); 4058 EXPECT_EQ(EntryBB->size(), 5U); 4059 4060 AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode()); 4061 EXPECT_NE(ARWM1, nullptr); 4062 EXPECT_EQ(ARWM1->getPointerOperand(), XVal); 4063 EXPECT_EQ(ARWM1->getValOperand(), Expr); 4064 EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min); 4065 4066 AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode()); 4067 EXPECT_NE(ARWM2, nullptr); 4068 EXPECT_EQ(ARWM2->getPointerOperand(), XVal); 4069 EXPECT_EQ(ARWM2->getValOperand(), Expr); 4070 EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax); 4071 4072 AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode()); 4073 EXPECT_NE(AXCHG, nullptr); 4074 EXPECT_EQ(AXCHG->getPointerOperand(), XVal); 4075 EXPECT_EQ(AXCHG->getCompareOperand(), Expr); 4076 EXPECT_EQ(AXCHG->getNewValOperand(), D); 4077 4078 Builder.CreateRetVoid(); 4079 OMPBuilder.finalize(); 4080 EXPECT_FALSE(verifyModule(*M, &errs())); 4081 } 4082 4083 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) { 4084 OpenMPIRBuilder OMPBuilder(*M); 4085 OMPBuilder.initialize(); 4086 F->setName("func"); 4087 IRBuilder<> Builder(BB); 4088 4089 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4090 4091 LLVMContext &Ctx = M->getContext(); 4092 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4093 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4094 XVal->setName("x"); 4095 AllocaInst *VVal = Builder.CreateAlloca(Int32); 4096 VVal->setName("v"); 4097 AllocaInst *RVal = Builder.CreateAlloca(Int32); 4098 RVal->setName("r"); 4099 4100 StoreInst *Init = 4101 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4102 4103 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false}; 4104 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 4105 OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false}; 4106 OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false}; 4107 OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false}; 4108 4109 AtomicOrdering AO = AtomicOrdering::Monotonic; 4110 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4111 ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4112 OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX; 4113 OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ; 4114 4115 // { cond-update-stmt v = x; } 4116 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4117 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4118 /* IsPostfixUpdate */ false, 4119 /* IsFailOnly */ false)); 4120 // { v = x; cond-update-stmt } 4121 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4122 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4123 /* IsPostfixUpdate */ true, 4124 /* IsFailOnly */ false)); 4125 // if(x == e) { x = d; } else { v = x; } 4126 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4127 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4128 /* IsPostfixUpdate */ false, 4129 /* IsFailOnly */ true)); 4130 // { r = x == e; if(r) { x = d; } } 4131 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4132 Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4133 /* IsPostfixUpdate */ false, 4134 /* IsFailOnly */ false)); 4135 // { r = x == e; if(r) { x = d; } else { v = x; } } 4136 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4137 Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4138 /* IsPostfixUpdate */ false, 4139 /* IsFailOnly */ true)); 4140 4141 // { v = x; cond-update-stmt } 4142 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4143 Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true, 4144 /* IsPostfixUpdate */ true, 4145 /* IsFailOnly */ false)); 4146 // { cond-update-stmt v = x; } 4147 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4148 Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false, 4149 /* IsPostfixUpdate */ false, 4150 /* IsFailOnly */ false)); 4151 4152 BasicBlock *EntryBB = BB; 4153 EXPECT_EQ(EntryBB->getParent()->size(), 5U); 4154 BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode()); 4155 EXPECT_NE(Cont1, nullptr); 4156 BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode()); 4157 EXPECT_NE(Exit1, nullptr); 4158 BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode()); 4159 EXPECT_NE(Cont2, nullptr); 4160 BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode()); 4161 EXPECT_NE(Exit2, nullptr); 4162 4163 AtomicCmpXchgInst *CmpXchg1 = 4164 dyn_cast<AtomicCmpXchgInst>(Init->getNextNode()); 4165 EXPECT_NE(CmpXchg1, nullptr); 4166 EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal); 4167 EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr); 4168 EXPECT_EQ(CmpXchg1->getNewValOperand(), D); 4169 ExtractValueInst *ExtVal1 = 4170 dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode()); 4171 EXPECT_NE(ExtVal1, nullptr); 4172 EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1); 4173 EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U)); 4174 ExtractValueInst *ExtVal2 = 4175 dyn_cast<ExtractValueInst>(ExtVal1->getNextNode()); 4176 EXPECT_NE(ExtVal2, nullptr); 4177 EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1); 4178 EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U)); 4179 SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode()); 4180 EXPECT_NE(Sel1, nullptr); 4181 EXPECT_EQ(Sel1->getCondition(), ExtVal2); 4182 EXPECT_EQ(Sel1->getTrueValue(), Expr); 4183 EXPECT_EQ(Sel1->getFalseValue(), ExtVal1); 4184 StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode()); 4185 EXPECT_NE(Store1, nullptr); 4186 EXPECT_EQ(Store1->getPointerOperand(), VVal); 4187 EXPECT_EQ(Store1->getValueOperand(), Sel1); 4188 4189 AtomicCmpXchgInst *CmpXchg2 = 4190 dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode()); 4191 EXPECT_NE(CmpXchg2, nullptr); 4192 EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal); 4193 EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr); 4194 EXPECT_EQ(CmpXchg2->getNewValOperand(), D); 4195 ExtractValueInst *ExtVal3 = 4196 dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode()); 4197 EXPECT_NE(ExtVal3, nullptr); 4198 EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2); 4199 EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U)); 4200 StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode()); 4201 EXPECT_NE(Store2, nullptr); 4202 EXPECT_EQ(Store2->getPointerOperand(), VVal); 4203 EXPECT_EQ(Store2->getValueOperand(), ExtVal3); 4204 4205 AtomicCmpXchgInst *CmpXchg3 = 4206 dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode()); 4207 EXPECT_NE(CmpXchg3, nullptr); 4208 EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal); 4209 EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr); 4210 EXPECT_EQ(CmpXchg3->getNewValOperand(), D); 4211 ExtractValueInst *ExtVal4 = 4212 dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode()); 4213 EXPECT_NE(ExtVal4, nullptr); 4214 EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3); 4215 EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U)); 4216 ExtractValueInst *ExtVal5 = 4217 dyn_cast<ExtractValueInst>(ExtVal4->getNextNode()); 4218 EXPECT_NE(ExtVal5, nullptr); 4219 EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3); 4220 EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U)); 4221 BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode()); 4222 EXPECT_NE(Br1, nullptr); 4223 EXPECT_EQ(Br1->isConditional(), true); 4224 EXPECT_EQ(Br1->getCondition(), ExtVal5); 4225 EXPECT_EQ(Br1->getSuccessor(0), Exit1); 4226 EXPECT_EQ(Br1->getSuccessor(1), Cont1); 4227 4228 StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front()); 4229 EXPECT_NE(Store3, nullptr); 4230 EXPECT_EQ(Store3->getPointerOperand(), VVal); 4231 EXPECT_EQ(Store3->getValueOperand(), ExtVal4); 4232 BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode()); 4233 EXPECT_NE(Br2, nullptr); 4234 EXPECT_EQ(Br2->isUnconditional(), true); 4235 EXPECT_EQ(Br2->getSuccessor(0), Exit1); 4236 4237 AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front()); 4238 EXPECT_NE(CmpXchg4, nullptr); 4239 EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal); 4240 EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr); 4241 EXPECT_EQ(CmpXchg4->getNewValOperand(), D); 4242 ExtractValueInst *ExtVal6 = 4243 dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode()); 4244 EXPECT_NE(ExtVal6, nullptr); 4245 EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4); 4246 EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U)); 4247 ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode()); 4248 EXPECT_NE(ZExt1, nullptr); 4249 EXPECT_EQ(ZExt1->getDestTy(), Int32); 4250 StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode()); 4251 EXPECT_NE(Store4, nullptr); 4252 EXPECT_EQ(Store4->getPointerOperand(), RVal); 4253 EXPECT_EQ(Store4->getValueOperand(), ZExt1); 4254 4255 AtomicCmpXchgInst *CmpXchg5 = 4256 dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode()); 4257 EXPECT_NE(CmpXchg5, nullptr); 4258 EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal); 4259 EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr); 4260 EXPECT_EQ(CmpXchg5->getNewValOperand(), D); 4261 ExtractValueInst *ExtVal7 = 4262 dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode()); 4263 EXPECT_NE(ExtVal7, nullptr); 4264 EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5); 4265 EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U)); 4266 ExtractValueInst *ExtVal8 = 4267 dyn_cast<ExtractValueInst>(ExtVal7->getNextNode()); 4268 EXPECT_NE(ExtVal8, nullptr); 4269 EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5); 4270 EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U)); 4271 BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode()); 4272 EXPECT_NE(Br3, nullptr); 4273 EXPECT_EQ(Br3->isConditional(), true); 4274 EXPECT_EQ(Br3->getCondition(), ExtVal8); 4275 EXPECT_EQ(Br3->getSuccessor(0), Exit2); 4276 EXPECT_EQ(Br3->getSuccessor(1), Cont2); 4277 4278 StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front()); 4279 EXPECT_NE(Store5, nullptr); 4280 EXPECT_EQ(Store5->getPointerOperand(), VVal); 4281 EXPECT_EQ(Store5->getValueOperand(), ExtVal7); 4282 BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode()); 4283 EXPECT_NE(Br4, nullptr); 4284 EXPECT_EQ(Br4->isUnconditional(), true); 4285 EXPECT_EQ(Br4->getSuccessor(0), Exit2); 4286 4287 ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front()); 4288 EXPECT_NE(ExtVal9, nullptr); 4289 EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5); 4290 EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U)); 4291 ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode()); 4292 EXPECT_NE(ZExt2, nullptr); 4293 EXPECT_EQ(ZExt2->getDestTy(), Int32); 4294 StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode()); 4295 EXPECT_NE(Store6, nullptr); 4296 EXPECT_EQ(Store6->getPointerOperand(), RVal); 4297 EXPECT_EQ(Store6->getValueOperand(), ZExt2); 4298 4299 AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode()); 4300 EXPECT_NE(ARWM1, nullptr); 4301 EXPECT_EQ(ARWM1->getPointerOperand(), XVal); 4302 EXPECT_EQ(ARWM1->getValOperand(), Expr); 4303 EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min); 4304 StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode()); 4305 EXPECT_NE(Store7, nullptr); 4306 EXPECT_EQ(Store7->getPointerOperand(), VVal); 4307 EXPECT_EQ(Store7->getValueOperand(), ARWM1); 4308 4309 AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode()); 4310 EXPECT_NE(ARWM2, nullptr); 4311 EXPECT_EQ(ARWM2->getPointerOperand(), XVal); 4312 EXPECT_EQ(ARWM2->getValOperand(), Expr); 4313 EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max); 4314 CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode()); 4315 EXPECT_NE(Cmp1, nullptr); 4316 EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT); 4317 EXPECT_EQ(Cmp1->getOperand(0), ARWM2); 4318 EXPECT_EQ(Cmp1->getOperand(1), Expr); 4319 SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode()); 4320 EXPECT_NE(Sel2, nullptr); 4321 EXPECT_EQ(Sel2->getCondition(), Cmp1); 4322 EXPECT_EQ(Sel2->getTrueValue(), Expr); 4323 EXPECT_EQ(Sel2->getFalseValue(), ARWM2); 4324 StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode()); 4325 EXPECT_NE(Store8, nullptr); 4326 EXPECT_EQ(Store8->getPointerOperand(), VVal); 4327 EXPECT_EQ(Store8->getValueOperand(), Sel2); 4328 4329 Builder.CreateRetVoid(); 4330 OMPBuilder.finalize(); 4331 EXPECT_FALSE(verifyModule(*M, &errs())); 4332 } 4333 4334 TEST_F(OpenMPIRBuilderTest, CreateTeams) { 4335 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4336 OpenMPIRBuilder OMPBuilder(*M); 4337 OMPBuilder.Config.IsTargetDevice = false; 4338 OMPBuilder.initialize(); 4339 F->setName("func"); 4340 IRBuilder<> Builder(BB); 4341 4342 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 4343 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 4344 Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load"); 4345 4346 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4347 Builder.restoreIP(AllocaIP); 4348 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 4349 "bodygen.alloca128"); 4350 4351 Builder.restoreIP(CodeGenIP); 4352 // Loading and storing captured pointer and values 4353 Builder.CreateStore(Val128, Local128); 4354 Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 4355 "bodygen.load32"); 4356 4357 LoadInst *PrivLoad128 = Builder.CreateLoad( 4358 Local128->getAllocatedType(), Local128, "bodygen.local.load128"); 4359 Value *Cmp = Builder.CreateICmpNE( 4360 Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); 4361 Instruction *ThenTerm, *ElseTerm; 4362 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 4363 &ThenTerm, &ElseTerm); 4364 }; 4365 4366 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4367 Builder.restoreIP(OMPBuilder.createTeams( 4368 Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, 4369 /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); 4370 4371 OMPBuilder.finalize(); 4372 Builder.CreateRetVoid(); 4373 4374 EXPECT_FALSE(verifyModule(*M, &errs())); 4375 4376 CallInst *TeamsForkCall = dyn_cast<CallInst>( 4377 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams) 4378 ->user_back()); 4379 4380 // Verify the Ident argument 4381 GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0)); 4382 ASSERT_NE(Ident, nullptr); 4383 EXPECT_TRUE(Ident->hasInitializer()); 4384 Constant *Initializer = Ident->getInitializer(); 4385 GlobalVariable *SrcStrGlob = 4386 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 4387 ASSERT_NE(SrcStrGlob, nullptr); 4388 ConstantDataArray *SrcSrc = 4389 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 4390 ASSERT_NE(SrcSrc, nullptr); 4391 4392 // Verify the outlined function signature. 4393 Function *OutlinedFn = 4394 dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); 4395 ASSERT_NE(OutlinedFn, nullptr); 4396 EXPECT_FALSE(OutlinedFn->isDeclaration()); 4397 EXPECT_TRUE(OutlinedFn->arg_size() >= 3); 4398 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid 4399 EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid 4400 EXPECT_EQ(OutlinedFn->getArg(2)->getType(), 4401 Builder.getPtrTy()); // captured args 4402 4403 // Check for TruncInst and ICmpInst in the outlined function. 4404 EXPECT_TRUE(any_of(instructions(OutlinedFn), 4405 [](Instruction &inst) { return isa<TruncInst>(&inst); })); 4406 EXPECT_TRUE(any_of(instructions(OutlinedFn), 4407 [](Instruction &inst) { return isa<ICmpInst>(&inst); })); 4408 } 4409 4410 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) { 4411 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4412 OpenMPIRBuilder OMPBuilder(*M); 4413 OMPBuilder.Config.IsTargetDevice = false; 4414 OMPBuilder.initialize(); 4415 F->setName("func"); 4416 IRBuilder<> &Builder = OMPBuilder.Builder; 4417 Builder.SetInsertPoint(BB); 4418 4419 Function *FakeFunction = 4420 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4421 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4422 4423 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4424 Builder.restoreIP(CodeGenIP); 4425 Builder.CreateCall(FakeFunction, {}); 4426 }; 4427 4428 // `F` has an argument - an integer, so we use that as the thread limit. 4429 Builder.restoreIP(OMPBuilder.createTeams(/*=*/Builder, BodyGenCB, 4430 /*NumTeamsLower=*/nullptr, 4431 /*NumTeamsUpper=*/nullptr, 4432 /*ThreadLimit=*/F->arg_begin(), 4433 /*IfExpr=*/nullptr)); 4434 4435 Builder.CreateRetVoid(); 4436 OMPBuilder.finalize(); 4437 4438 ASSERT_FALSE(verifyModule(*M)); 4439 4440 CallInst *PushNumTeamsCallInst = 4441 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4442 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4443 4444 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0)); 4445 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0)); 4446 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin()); 4447 4448 // Verifying that the next instruction to execute is kmpc_fork_teams 4449 BranchInst *BrInst = 4450 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4451 ASSERT_NE(BrInst, nullptr); 4452 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4453 Instruction *NextInstruction = 4454 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4455 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4456 ASSERT_NE(ForkTeamsCI, nullptr); 4457 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4458 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4459 } 4460 4461 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) { 4462 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4463 OpenMPIRBuilder OMPBuilder(*M); 4464 OMPBuilder.Config.IsTargetDevice = false; 4465 OMPBuilder.initialize(); 4466 F->setName("func"); 4467 IRBuilder<> &Builder = OMPBuilder.Builder; 4468 Builder.SetInsertPoint(BB); 4469 4470 Function *FakeFunction = 4471 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4472 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4473 4474 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4475 Builder.restoreIP(CodeGenIP); 4476 Builder.CreateCall(FakeFunction, {}); 4477 }; 4478 4479 // `F` already has an integer argument, so we use that as upper bound to 4480 // `num_teams` 4481 Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, 4482 /*NumTeamsLower=*/nullptr, 4483 /*NumTeamsUpper=*/F->arg_begin(), 4484 /*ThreadLimit=*/nullptr, 4485 /*IfExpr=*/nullptr)); 4486 4487 Builder.CreateRetVoid(); 4488 OMPBuilder.finalize(); 4489 4490 ASSERT_FALSE(verifyModule(*M)); 4491 4492 CallInst *PushNumTeamsCallInst = 4493 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4494 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4495 4496 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin()); 4497 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin()); 4498 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0)); 4499 4500 // Verifying that the next instruction to execute is kmpc_fork_teams 4501 BranchInst *BrInst = 4502 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4503 ASSERT_NE(BrInst, nullptr); 4504 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4505 Instruction *NextInstruction = 4506 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4507 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4508 ASSERT_NE(ForkTeamsCI, nullptr); 4509 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4510 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4511 } 4512 4513 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) { 4514 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4515 OpenMPIRBuilder OMPBuilder(*M); 4516 OMPBuilder.Config.IsTargetDevice = false; 4517 OMPBuilder.initialize(); 4518 F->setName("func"); 4519 IRBuilder<> &Builder = OMPBuilder.Builder; 4520 Builder.SetInsertPoint(BB); 4521 4522 Function *FakeFunction = 4523 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4524 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4525 4526 Value *NumTeamsLower = 4527 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower"); 4528 Value *NumTeamsUpper = 4529 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); 4530 4531 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4532 Builder.restoreIP(CodeGenIP); 4533 Builder.CreateCall(FakeFunction, {}); 4534 }; 4535 4536 // `F` already has an integer argument, so we use that as upper bound to 4537 // `num_teams` 4538 Builder.restoreIP( 4539 OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, 4540 /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); 4541 4542 Builder.CreateRetVoid(); 4543 OMPBuilder.finalize(); 4544 4545 ASSERT_FALSE(verifyModule(*M)); 4546 4547 CallInst *PushNumTeamsCallInst = 4548 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4549 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4550 4551 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower); 4552 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper); 4553 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0)); 4554 4555 // Verifying that the next instruction to execute is kmpc_fork_teams 4556 BranchInst *BrInst = 4557 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4558 ASSERT_NE(BrInst, nullptr); 4559 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4560 Instruction *NextInstruction = 4561 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4562 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4563 ASSERT_NE(ForkTeamsCI, nullptr); 4564 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4565 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4566 } 4567 4568 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { 4569 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4570 OpenMPIRBuilder OMPBuilder(*M); 4571 OMPBuilder.Config.IsTargetDevice = false; 4572 OMPBuilder.initialize(); 4573 F->setName("func"); 4574 IRBuilder<> &Builder = OMPBuilder.Builder; 4575 Builder.SetInsertPoint(BB); 4576 4577 BasicBlock *CodegenBB = splitBB(Builder, true); 4578 Builder.SetInsertPoint(CodegenBB); 4579 4580 // Generate values for `num_teams` and `thread_limit` using the first argument 4581 // of the testing function. 4582 Value *NumTeamsLower = 4583 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower"); 4584 Value *NumTeamsUpper = 4585 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); 4586 Value *ThreadLimit = 4587 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit"); 4588 4589 Function *FakeFunction = 4590 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4591 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4592 4593 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4594 Builder.restoreIP(CodeGenIP); 4595 Builder.CreateCall(FakeFunction, {}); 4596 }; 4597 4598 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4599 Builder.restoreIP(OMPBuilder.createTeams( 4600 Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr)); 4601 4602 Builder.CreateRetVoid(); 4603 OMPBuilder.finalize(); 4604 4605 ASSERT_FALSE(verifyModule(*M)); 4606 4607 CallInst *PushNumTeamsCallInst = 4608 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4609 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4610 4611 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower); 4612 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper); 4613 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit); 4614 4615 // Verifying that the next instruction to execute is kmpc_fork_teams 4616 BranchInst *BrInst = 4617 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4618 ASSERT_NE(BrInst, nullptr); 4619 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4620 Instruction *NextInstruction = 4621 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4622 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4623 ASSERT_NE(ForkTeamsCI, nullptr); 4624 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4625 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4626 } 4627 4628 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) { 4629 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4630 OpenMPIRBuilder OMPBuilder(*M); 4631 OMPBuilder.Config.IsTargetDevice = false; 4632 OMPBuilder.initialize(); 4633 F->setName("func"); 4634 IRBuilder<> &Builder = OMPBuilder.Builder; 4635 Builder.SetInsertPoint(BB); 4636 4637 Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(), 4638 Builder.CreateAlloca(Builder.getInt1Ty())); 4639 4640 Function *FakeFunction = 4641 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4642 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4643 4644 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4645 Builder.restoreIP(CodeGenIP); 4646 Builder.CreateCall(FakeFunction, {}); 4647 }; 4648 4649 // `F` already has an integer argument, so we use that as upper bound to 4650 // `num_teams` 4651 Builder.restoreIP(OMPBuilder.createTeams( 4652 Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, 4653 /*ThreadLimit=*/nullptr, IfExpr)); 4654 4655 Builder.CreateRetVoid(); 4656 OMPBuilder.finalize(); 4657 4658 ASSERT_FALSE(verifyModule(*M)); 4659 4660 CallInst *PushNumTeamsCallInst = 4661 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4662 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4663 Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2); 4664 Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3); 4665 Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4); 4666 4667 // Check the lower_bound 4668 ASSERT_NE(NumTeamsLower, nullptr); 4669 SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower); 4670 ASSERT_NE(NumTeamsLowerSelectInst, nullptr); 4671 EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr); 4672 EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0)); 4673 EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); 4674 4675 // Check the upper_bound 4676 ASSERT_NE(NumTeamsUpper, nullptr); 4677 SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper); 4678 ASSERT_NE(NumTeamsUpperSelectInst, nullptr); 4679 EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr); 4680 EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0)); 4681 EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); 4682 4683 // Check thread_limit 4684 EXPECT_EQ(ThreadLimit, Builder.getInt32(0)); 4685 } 4686 4687 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) { 4688 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4689 OpenMPIRBuilder OMPBuilder(*M); 4690 OMPBuilder.Config.IsTargetDevice = false; 4691 OMPBuilder.initialize(); 4692 F->setName("func"); 4693 IRBuilder<> &Builder = OMPBuilder.Builder; 4694 Builder.SetInsertPoint(BB); 4695 4696 Value *IfExpr = Builder.CreateLoad( 4697 Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty())); 4698 Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5)); 4699 Value *NumTeamsUpper = 4700 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10)); 4701 Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20)); 4702 4703 Function *FakeFunction = 4704 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4705 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4706 4707 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4708 Builder.restoreIP(CodeGenIP); 4709 Builder.CreateCall(FakeFunction, {}); 4710 }; 4711 4712 // `F` already has an integer argument, so we use that as upper bound to 4713 // `num_teams` 4714 Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, 4715 NumTeamsUpper, ThreadLimit, IfExpr)); 4716 4717 Builder.CreateRetVoid(); 4718 OMPBuilder.finalize(); 4719 4720 ASSERT_FALSE(verifyModule(*M)); 4721 4722 CallInst *PushNumTeamsCallInst = 4723 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4724 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4725 Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2); 4726 Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3); 4727 Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4); 4728 4729 // Get the boolean conversion of if expression 4730 ASSERT_EQ(IfExpr->getNumUses(), 1U); 4731 User *IfExprInst = IfExpr->user_back(); 4732 ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst); 4733 ASSERT_NE(IfExprCmpInst, nullptr); 4734 EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE); 4735 EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr); 4736 EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0)); 4737 4738 // Check the lower_bound 4739 ASSERT_NE(NumTeamsLowerArg, nullptr); 4740 SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg); 4741 ASSERT_NE(NumTeamsLowerSelectInst, nullptr); 4742 EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst); 4743 EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower); 4744 EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); 4745 4746 // Check the upper_bound 4747 ASSERT_NE(NumTeamsUpperArg, nullptr); 4748 SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg); 4749 ASSERT_NE(NumTeamsUpperSelectInst, nullptr); 4750 EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst); 4751 EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper); 4752 EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); 4753 4754 // Check thread_limit 4755 EXPECT_EQ(ThreadLimitArg, ThreadLimit); 4756 } 4757 4758 /// Returns the single instruction of InstTy type in BB that uses the value V. 4759 /// If there is more than one such instruction, returns null. 4760 template <typename InstTy> 4761 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) { 4762 InstTy *Result = nullptr; 4763 for (User *U : V->users()) { 4764 auto *Inst = dyn_cast<InstTy>(U); 4765 if (!Inst || Inst->getParent() != BB) 4766 continue; 4767 if (Result) { 4768 if (auto *SI = dyn_cast<StoreInst>(Inst)) { 4769 if (V == SI->getValueOperand()) 4770 continue; 4771 } else { 4772 return nullptr; 4773 } 4774 } 4775 Result = Inst; 4776 } 4777 return Result; 4778 } 4779 4780 /// Returns true if BB contains a simple binary reduction that loads a value 4781 /// from Accum, performs some binary operation with it, and stores it back to 4782 /// Accum. 4783 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB, 4784 Instruction::BinaryOps *OpCode = nullptr) { 4785 StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB); 4786 if (!Store) 4787 return false; 4788 auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0)); 4789 if (!Stored) 4790 return false; 4791 if (OpCode && *OpCode != Stored->getOpcode()) 4792 return false; 4793 auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0)); 4794 return Load && Load->getOperand(0) == Accum; 4795 } 4796 4797 /// Returns true if BB contains a binary reduction that reduces V using a binary 4798 /// operator into an accumulator that is a function argument. 4799 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) { 4800 auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB); 4801 if (!ReductionOp) 4802 return false; 4803 4804 auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0)); 4805 if (!GlobalLoad) 4806 return false; 4807 4808 auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB); 4809 if (!Store) 4810 return false; 4811 4812 return Store->getPointerOperand() == GlobalLoad->getPointerOperand() && 4813 isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand())); 4814 } 4815 4816 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and 4817 /// [0, 1], respectively, and assigns results of these instructions to Zero and 4818 /// One. Returns true on success, false on failure or if such instructions are 4819 /// not unique among the users of Ptr. 4820 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) { 4821 Zero = nullptr; 4822 One = nullptr; 4823 for (User *U : Ptr->users()) { 4824 if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { 4825 if (GEP->getNumIndices() != 2) 4826 continue; 4827 auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 4828 auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2)); 4829 EXPECT_NE(FirstIdx, nullptr); 4830 EXPECT_NE(SecondIdx, nullptr); 4831 4832 EXPECT_TRUE(FirstIdx->isZero()); 4833 if (SecondIdx->isZero()) { 4834 if (Zero) 4835 return false; 4836 Zero = GEP; 4837 } else if (SecondIdx->isOne()) { 4838 if (One) 4839 return false; 4840 One = GEP; 4841 } else { 4842 return false; 4843 } 4844 } 4845 } 4846 return Zero != nullptr && One != nullptr; 4847 } 4848 4849 static OpenMPIRBuilder::InsertPointTy 4850 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS, 4851 Value *&Result) { 4852 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 4853 Result = Builder.CreateFAdd(LHS, RHS, "red.add"); 4854 return Builder.saveIP(); 4855 } 4856 4857 static OpenMPIRBuilder::InsertPointTy 4858 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, 4859 Value *RHS) { 4860 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 4861 Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial"); 4862 Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt, 4863 AtomicOrdering::Monotonic); 4864 return Builder.saveIP(); 4865 } 4866 4867 static OpenMPIRBuilder::InsertPointTy 4868 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS, 4869 Value *&Result) { 4870 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 4871 Result = Builder.CreateXor(LHS, RHS, "red.xor"); 4872 return Builder.saveIP(); 4873 } 4874 4875 static OpenMPIRBuilder::InsertPointTy 4876 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, 4877 Value *RHS) { 4878 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 4879 Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial"); 4880 Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt, 4881 AtomicOrdering::Monotonic); 4882 return Builder.saveIP(); 4883 } 4884 4885 TEST_F(OpenMPIRBuilderTest, CreateReductions) { 4886 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4887 OpenMPIRBuilder OMPBuilder(*M); 4888 OMPBuilder.Config.IsTargetDevice = false; 4889 OMPBuilder.initialize(); 4890 F->setName("func"); 4891 IRBuilder<> Builder(BB); 4892 4893 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 4894 Builder.CreateBr(EnterBB); 4895 Builder.SetInsertPoint(EnterBB); 4896 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4897 4898 // Create variables to be reduced. 4899 InsertPointTy OuterAllocaIP(&F->getEntryBlock(), 4900 F->getEntryBlock().getFirstInsertionPt()); 4901 Type *SumType = Builder.getFloatTy(); 4902 Type *XorType = Builder.getInt32Ty(); 4903 Value *SumReduced; 4904 Value *XorReduced; 4905 { 4906 IRBuilderBase::InsertPointGuard Guard(Builder); 4907 Builder.restoreIP(OuterAllocaIP); 4908 SumReduced = Builder.CreateAlloca(SumType); 4909 XorReduced = Builder.CreateAlloca(XorType); 4910 } 4911 4912 // Store initial values of reductions into global variables. 4913 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced); 4914 Builder.CreateStore(Builder.getInt32(1), XorReduced); 4915 4916 // The loop body computes two reductions: 4917 // sum of (float) thread-id; 4918 // xor of thread-id; 4919 // and store the result in global variables. 4920 InsertPointTy BodyIP, BodyAllocaIP; 4921 auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) { 4922 IRBuilderBase::InsertPointGuard Guard(Builder); 4923 Builder.restoreIP(CodeGenIP); 4924 4925 uint32_t StrSize; 4926 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 4927 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 4928 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 4929 Value *SumLocal = 4930 Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); 4931 Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial"); 4932 Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); 4933 Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum"); 4934 Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); 4935 Builder.CreateStore(Sum, SumReduced); 4936 Builder.CreateStore(Xor, XorReduced); 4937 4938 BodyIP = Builder.saveIP(); 4939 BodyAllocaIP = InnerAllocaIP; 4940 }; 4941 4942 // Privatization for reduction creates local copies of reduction variables and 4943 // initializes them to reduction-neutral values. 4944 Value *SumPrivatized; 4945 Value *XorPrivatized; 4946 auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, 4947 Value &Original, Value &Inner, Value *&ReplVal) { 4948 IRBuilderBase::InsertPointGuard Guard(Builder); 4949 Builder.restoreIP(InnerAllocaIP); 4950 if (&Original == SumReduced) { 4951 SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy()); 4952 ReplVal = SumPrivatized; 4953 } else if (&Original == XorReduced) { 4954 XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty()); 4955 ReplVal = XorPrivatized; 4956 } else { 4957 ReplVal = &Inner; 4958 return CodeGenIP; 4959 } 4960 4961 Builder.restoreIP(CodeGenIP); 4962 if (&Original == SumReduced) 4963 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), 4964 SumPrivatized); 4965 else if (&Original == XorReduced) 4966 Builder.CreateStore(Builder.getInt32(0), XorPrivatized); 4967 4968 return Builder.saveIP(); 4969 }; 4970 4971 // Do nothing in finalization. 4972 auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; 4973 4974 InsertPointTy AfterIP = 4975 OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, 4976 /* IfCondition */ nullptr, 4977 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 4978 /* IsCancellable */ false); 4979 Builder.restoreIP(AfterIP); 4980 4981 OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { 4982 {SumType, SumReduced, SumPrivatized, 4983 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, 4984 /*ReductionGenClang=*/nullptr, sumAtomicReduction}, 4985 {XorType, XorReduced, XorPrivatized, 4986 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction, 4987 /*ReductionGenClang=*/nullptr, xorAtomicReduction}}; 4988 OMPBuilder.Config.setIsGPU(false); 4989 4990 bool ReduceVariableByRef[] = {false, false}; 4991 4992 OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos, 4993 ReduceVariableByRef); 4994 4995 Builder.restoreIP(AfterIP); 4996 Builder.CreateRetVoid(); 4997 4998 OMPBuilder.finalize(F); 4999 5000 // The IR must be valid. 5001 EXPECT_FALSE(verifyModule(*M)); 5002 5003 // Outlining must have happened. 5004 SmallVector<CallInst *> ForkCalls; 5005 findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder, 5006 ForkCalls); 5007 ASSERT_EQ(ForkCalls.size(), 1u); 5008 Value *CalleeVal = ForkCalls[0]->getOperand(2); 5009 Function *Outlined = dyn_cast<Function>(CalleeVal); 5010 EXPECT_NE(Outlined, nullptr); 5011 5012 // Check that the lock variable was created with the expected name. 5013 GlobalVariable *LockVar = 5014 M->getGlobalVariable(".gomp_critical_user_.reduction.var"); 5015 EXPECT_NE(LockVar, nullptr); 5016 5017 // Find the allocation of a local array that will be used to call the runtime 5018 // reduciton function. 5019 BasicBlock &AllocBlock = Outlined->getEntryBlock(); 5020 Value *LocalArray = nullptr; 5021 for (Instruction &I : AllocBlock) { 5022 if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) { 5023 if (!Alloc->getAllocatedType()->isArrayTy() || 5024 !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy()) 5025 continue; 5026 LocalArray = Alloc; 5027 break; 5028 } 5029 } 5030 ASSERT_NE(LocalArray, nullptr); 5031 5032 // Find the call to the runtime reduction function. 5033 BasicBlock *BB = AllocBlock.getUniqueSuccessor(); 5034 Value *LocalArrayPtr = nullptr; 5035 Value *ReductionFnVal = nullptr; 5036 Value *SwitchArg = nullptr; 5037 for (Instruction &I : *BB) { 5038 if (CallInst *Call = dyn_cast<CallInst>(&I)) { 5039 if (Call->getCalledFunction() != 5040 OMPBuilder.getOrCreateRuntimeFunctionPtr( 5041 RuntimeFunction::OMPRTL___kmpc_reduce)) 5042 continue; 5043 LocalArrayPtr = Call->getOperand(4); 5044 ReductionFnVal = Call->getOperand(5); 5045 SwitchArg = Call; 5046 break; 5047 } 5048 } 5049 5050 // Check that the local array is passed to the function. 5051 ASSERT_NE(LocalArrayPtr, nullptr); 5052 EXPECT_EQ(LocalArrayPtr, LocalArray); 5053 5054 // Find the GEP instructions preceding stores to the local array. 5055 Value *FirstArrayElemPtr = nullptr; 5056 Value *SecondArrayElemPtr = nullptr; 5057 EXPECT_EQ(LocalArray->getNumUses(), 3u); 5058 ASSERT_TRUE( 5059 findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr)); 5060 5061 // Check that the values stored into the local array are privatized reduction 5062 // variables. 5063 auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>( 5064 findStoredValue<GetElementPtrInst>(FirstArrayElemPtr)); 5065 auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>( 5066 findStoredValue<GetElementPtrInst>(SecondArrayElemPtr)); 5067 ASSERT_NE(FirstPrivatized, nullptr); 5068 ASSERT_NE(SecondPrivatized, nullptr); 5069 ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr)); 5070 EXPECT_TRUE(isSimpleBinaryReduction( 5071 FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent())); 5072 EXPECT_TRUE(isSimpleBinaryReduction( 5073 SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent())); 5074 5075 // Check that the result of the runtime reduction call is used for further 5076 // dispatch. 5077 ASSERT_EQ(SwitchArg->getNumUses(), 1u); 5078 SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin()); 5079 ASSERT_NE(Switch, nullptr); 5080 EXPECT_EQ(Switch->getNumSuccessors(), 3u); 5081 BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor(); 5082 BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor(); 5083 5084 // Non-atomic block contains reductions to the global reduction variable, 5085 // which is passed into the outlined function as an argument. 5086 Value *FirstLoad = 5087 findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB); 5088 Value *SecondLoad = 5089 findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB); 5090 EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB)); 5091 EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB)); 5092 5093 // Atomic block also constains reductions to the global reduction variable. 5094 FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB); 5095 SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB); 5096 auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB); 5097 auto *SecondAtomic = 5098 findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB); 5099 ASSERT_NE(FirstAtomic, nullptr); 5100 Value *AtomicStorePointer = FirstAtomic->getPointerOperand(); 5101 EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer))); 5102 ASSERT_NE(SecondAtomic, nullptr); 5103 AtomicStorePointer = SecondAtomic->getPointerOperand(); 5104 EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer))); 5105 5106 // Check that the separate reduction function also performs (non-atomic) 5107 // reductions after extracting reduction variables from its arguments. 5108 Function *ReductionFn = cast<Function>(ReductionFnVal); 5109 BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock(); 5110 Value *FirstLHSPtr; 5111 Value *SecondLHSPtr; 5112 ASSERT_TRUE( 5113 findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr)); 5114 Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB); 5115 ASSERT_NE(Opaque, nullptr); 5116 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB)); 5117 Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB); 5118 ASSERT_NE(Opaque, nullptr); 5119 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB)); 5120 5121 Value *FirstRHS; 5122 Value *SecondRHS; 5123 EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS)); 5124 } 5125 5126 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { 5127 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5128 OpenMPIRBuilder OMPBuilder(*M); 5129 OMPBuilder.Config.IsTargetDevice = false; 5130 OMPBuilder.initialize(); 5131 F->setName("func"); 5132 IRBuilder<> Builder(BB); 5133 5134 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 5135 Builder.CreateBr(EnterBB); 5136 Builder.SetInsertPoint(EnterBB); 5137 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5138 5139 // Create variables to be reduced. 5140 InsertPointTy OuterAllocaIP(&F->getEntryBlock(), 5141 F->getEntryBlock().getFirstInsertionPt()); 5142 Type *SumType = Builder.getFloatTy(); 5143 Type *XorType = Builder.getInt32Ty(); 5144 Value *SumReduced; 5145 Value *XorReduced; 5146 { 5147 IRBuilderBase::InsertPointGuard Guard(Builder); 5148 Builder.restoreIP(OuterAllocaIP); 5149 SumReduced = Builder.CreateAlloca(SumType); 5150 XorReduced = Builder.CreateAlloca(XorType); 5151 } 5152 5153 // Store initial values of reductions into global variables. 5154 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced); 5155 Builder.CreateStore(Builder.getInt32(1), XorReduced); 5156 5157 InsertPointTy FirstBodyIP, FirstBodyAllocaIP; 5158 auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP, 5159 InsertPointTy CodeGenIP) { 5160 IRBuilderBase::InsertPointGuard Guard(Builder); 5161 Builder.restoreIP(CodeGenIP); 5162 5163 uint32_t StrSize; 5164 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5165 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5166 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5167 Value *SumLocal = 5168 Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); 5169 Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial"); 5170 Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum"); 5171 Builder.CreateStore(Sum, SumReduced); 5172 5173 FirstBodyIP = Builder.saveIP(); 5174 FirstBodyAllocaIP = InnerAllocaIP; 5175 }; 5176 5177 InsertPointTy SecondBodyIP, SecondBodyAllocaIP; 5178 auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP, 5179 InsertPointTy CodeGenIP) { 5180 IRBuilderBase::InsertPointGuard Guard(Builder); 5181 Builder.restoreIP(CodeGenIP); 5182 5183 uint32_t StrSize; 5184 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5185 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5186 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5187 Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); 5188 Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); 5189 Builder.CreateStore(Xor, XorReduced); 5190 5191 SecondBodyIP = Builder.saveIP(); 5192 SecondBodyAllocaIP = InnerAllocaIP; 5193 }; 5194 5195 // Privatization for reduction creates local copies of reduction variables and 5196 // initializes them to reduction-neutral values. The same privatization 5197 // callback is used for both loops, with dispatch based on the value being 5198 // privatized. 5199 Value *SumPrivatized; 5200 Value *XorPrivatized; 5201 auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, 5202 Value &Original, Value &Inner, Value *&ReplVal) { 5203 IRBuilderBase::InsertPointGuard Guard(Builder); 5204 Builder.restoreIP(InnerAllocaIP); 5205 if (&Original == SumReduced) { 5206 SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy()); 5207 ReplVal = SumPrivatized; 5208 } else if (&Original == XorReduced) { 5209 XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty()); 5210 ReplVal = XorPrivatized; 5211 } else { 5212 ReplVal = &Inner; 5213 return CodeGenIP; 5214 } 5215 5216 Builder.restoreIP(CodeGenIP); 5217 if (&Original == SumReduced) 5218 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), 5219 SumPrivatized); 5220 else if (&Original == XorReduced) 5221 Builder.CreateStore(Builder.getInt32(0), XorPrivatized); 5222 5223 return Builder.saveIP(); 5224 }; 5225 5226 // Do nothing in finalization. 5227 auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; 5228 5229 Builder.restoreIP( 5230 OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, 5231 FiniCB, /* IfCondition */ nullptr, 5232 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5233 /* IsCancellable */ false)); 5234 InsertPointTy AfterIP = OMPBuilder.createParallel( 5235 {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB, 5236 /* IfCondition */ nullptr, 5237 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5238 /* IsCancellable */ false); 5239 5240 OMPBuilder.Config.setIsGPU(false); 5241 bool ReduceVariableByRef[] = {false}; 5242 5243 OMPBuilder.createReductions( 5244 FirstBodyIP, FirstBodyAllocaIP, 5245 {{SumType, SumReduced, SumPrivatized, 5246 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, 5247 /*ReductionGenClang=*/nullptr, sumAtomicReduction}}, 5248 ReduceVariableByRef); 5249 OMPBuilder.createReductions( 5250 SecondBodyIP, SecondBodyAllocaIP, 5251 {{XorType, XorReduced, XorPrivatized, 5252 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction, 5253 /*ReductionGenClang=*/nullptr, xorAtomicReduction}}, 5254 ReduceVariableByRef); 5255 5256 Builder.restoreIP(AfterIP); 5257 Builder.CreateRetVoid(); 5258 5259 OMPBuilder.finalize(F); 5260 5261 // The IR must be valid. 5262 EXPECT_FALSE(verifyModule(*M)); 5263 5264 // Two different outlined functions must have been created. 5265 SmallVector<CallInst *> ForkCalls; 5266 findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder, 5267 ForkCalls); 5268 ASSERT_EQ(ForkCalls.size(), 2u); 5269 Value *CalleeVal = ForkCalls[0]->getOperand(2); 5270 Function *FirstCallee = cast<Function>(CalleeVal); 5271 CalleeVal = ForkCalls[1]->getOperand(2); 5272 Function *SecondCallee = cast<Function>(CalleeVal); 5273 EXPECT_NE(FirstCallee, SecondCallee); 5274 5275 // Two different reduction functions must have been created. 5276 SmallVector<CallInst *> ReduceCalls; 5277 findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder, 5278 ReduceCalls); 5279 ASSERT_EQ(ReduceCalls.size(), 1u); 5280 auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5)); 5281 ReduceCalls.clear(); 5282 findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, 5283 OMPBuilder, ReduceCalls); 5284 auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5)); 5285 EXPECT_NE(AddReduction, XorReduction); 5286 5287 // Each reduction function does its own kind of reduction. 5288 BasicBlock *FnReductionBB = &AddReduction->getEntryBlock(); 5289 Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>( 5290 AddReduction->getArg(0), FnReductionBB); 5291 ASSERT_NE(FirstLHSPtr, nullptr); 5292 Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB); 5293 ASSERT_NE(Opaque, nullptr); 5294 Instruction::BinaryOps Opcode = Instruction::FAdd; 5295 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode)); 5296 5297 FnReductionBB = &XorReduction->getEntryBlock(); 5298 Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>( 5299 XorReduction->getArg(0), FnReductionBB); 5300 ASSERT_NE(FirstLHSPtr, nullptr); 5301 Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB); 5302 ASSERT_NE(Opaque, nullptr); 5303 Opcode = Instruction::Xor; 5304 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode)); 5305 } 5306 5307 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { 5308 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5309 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5310 OpenMPIRBuilder OMPBuilder(*M); 5311 OMPBuilder.initialize(); 5312 F->setName("func"); 5313 IRBuilder<> Builder(BB); 5314 5315 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F); 5316 Builder.CreateBr(EnterBB); 5317 Builder.SetInsertPoint(EnterBB); 5318 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5319 5320 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5321 llvm::SmallVector<BasicBlock *, 4> CaseBBs; 5322 5323 auto FiniCB = [&](InsertPointTy IP) {}; 5324 auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 5325 SectionCBVector.push_back(SectionCB); 5326 5327 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5328 llvm::Value &, llvm::Value &Val, 5329 llvm::Value *&ReplVal) { return CodeGenIP; }; 5330 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5331 F->getEntryBlock().getFirstInsertionPt()); 5332 Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5333 PrivCB, FiniCB, false, false)); 5334 Builder.CreateRetVoid(); // Required at the end of the function 5335 EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); 5336 EXPECT_FALSE(verifyModule(*M, &errs())); 5337 } 5338 5339 TEST_F(OpenMPIRBuilderTest, CreateSections) { 5340 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5341 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5342 OpenMPIRBuilder OMPBuilder(*M); 5343 OMPBuilder.initialize(); 5344 F->setName("func"); 5345 IRBuilder<> Builder(BB); 5346 5347 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5348 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5349 llvm::SmallVector<BasicBlock *, 4> CaseBBs; 5350 5351 BasicBlock *SwitchBB = nullptr; 5352 AllocaInst *PrivAI = nullptr; 5353 SwitchInst *Switch = nullptr; 5354 5355 unsigned NumBodiesGenerated = 0; 5356 unsigned NumFiniCBCalls = 0; 5357 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 5358 5359 auto FiniCB = [&](InsertPointTy IP) { 5360 ++NumFiniCBCalls; 5361 BasicBlock *IPBB = IP.getBlock(); 5362 EXPECT_NE(IPBB->end(), IP.getPoint()); 5363 }; 5364 5365 auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 5366 ++NumBodiesGenerated; 5367 CaseBBs.push_back(CodeGenIP.getBlock()); 5368 SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor(); 5369 Builder.restoreIP(CodeGenIP); 5370 Builder.CreateStore(F->arg_begin(), PrivAI); 5371 Value *PrivLoad = 5372 Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca"); 5373 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 5374 }; 5375 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5376 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 5377 // TODO: Privatization not implemented yet 5378 return CodeGenIP; 5379 }; 5380 5381 SectionCBVector.push_back(SectionCB); 5382 SectionCBVector.push_back(SectionCB); 5383 5384 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5385 F->getEntryBlock().getFirstInsertionPt()); 5386 Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5387 PrivCB, FiniCB, false, false)); 5388 Builder.CreateRetVoid(); // Required at the end of the function 5389 5390 // Switch BB's predecessor is loop condition BB, whose successor at index 1 is 5391 // loop's exit BB 5392 BasicBlock *ForExitBB = 5393 SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1); 5394 EXPECT_NE(ForExitBB, nullptr); 5395 5396 EXPECT_NE(PrivAI, nullptr); 5397 Function *OutlinedFn = PrivAI->getFunction(); 5398 EXPECT_EQ(F, OutlinedFn); 5399 EXPECT_FALSE(verifyModule(*M, &errs())); 5400 EXPECT_EQ(OutlinedFn->arg_size(), 1U); 5401 5402 BasicBlock *LoopPreheaderBB = 5403 OutlinedFn->getEntryBlock().getSingleSuccessor(); 5404 // loop variables are 5 - lower bound, upper bound, stride, islastiter, and 5405 // iterator/counter 5406 bool FoundForInit = false; 5407 for (Instruction &Inst : *LoopPreheaderBB) { 5408 if (isa<CallInst>(Inst)) { 5409 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5410 "__kmpc_for_static_init_4u") { 5411 FoundForInit = true; 5412 } 5413 } 5414 } 5415 EXPECT_EQ(FoundForInit, true); 5416 5417 bool FoundForExit = false; 5418 bool FoundBarrier = false; 5419 for (Instruction &Inst : *ForExitBB) { 5420 if (isa<CallInst>(Inst)) { 5421 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5422 "__kmpc_for_static_fini") { 5423 FoundForExit = true; 5424 } 5425 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5426 "__kmpc_barrier") { 5427 FoundBarrier = true; 5428 } 5429 if (FoundForExit && FoundBarrier) 5430 break; 5431 } 5432 } 5433 EXPECT_EQ(FoundForExit, true); 5434 EXPECT_EQ(FoundBarrier, true); 5435 5436 EXPECT_NE(SwitchBB, nullptr); 5437 EXPECT_NE(SwitchBB->getTerminator(), nullptr); 5438 EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true); 5439 Switch = cast<SwitchInst>(SwitchBB->getTerminator()); 5440 EXPECT_EQ(Switch->getNumCases(), 2U); 5441 5442 EXPECT_EQ(CaseBBs.size(), 2U); 5443 for (auto *&CaseBB : CaseBBs) { 5444 EXPECT_EQ(CaseBB->getParent(), OutlinedFn); 5445 } 5446 5447 ASSERT_EQ(NumBodiesGenerated, 2U); 5448 ASSERT_EQ(NumFiniCBCalls, 1U); 5449 EXPECT_FALSE(verifyModule(*M, &errs())); 5450 } 5451 5452 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) { 5453 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5454 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5455 OpenMPIRBuilder OMPBuilder(*M); 5456 OMPBuilder.initialize(); 5457 F->setName("func"); 5458 IRBuilder<> Builder(BB); 5459 5460 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F); 5461 Builder.CreateBr(EnterBB); 5462 Builder.SetInsertPoint(EnterBB); 5463 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5464 5465 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5466 F->getEntryBlock().getFirstInsertionPt()); 5467 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5468 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5469 llvm::Value &, llvm::Value &Val, 5470 llvm::Value *&ReplVal) { return CodeGenIP; }; 5471 auto FiniCB = [&](InsertPointTy IP) {}; 5472 5473 Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5474 PrivCB, FiniCB, false, true)); 5475 Builder.CreateRetVoid(); // Required at the end of the function 5476 for (auto &Inst : instructions(*F)) { 5477 EXPECT_FALSE(isa<CallInst>(Inst) && 5478 cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5479 "__kmpc_barrier" && 5480 "call to function __kmpc_barrier found with nowait"); 5481 } 5482 } 5483 5484 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) { 5485 OpenMPIRBuilder OMPBuilder(*M); 5486 OMPBuilder.initialize(); 5487 5488 IRBuilder<> Builder(BB); 5489 5490 SmallVector<uint64_t> Mappings = {0, 1}; 5491 GlobalVariable *OffloadMaptypesGlobal = 5492 OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes"); 5493 EXPECT_FALSE(M->global_empty()); 5494 EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes"); 5495 EXPECT_TRUE(OffloadMaptypesGlobal->isConstant()); 5496 EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr()); 5497 EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage()); 5498 EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer()); 5499 Constant *Initializer = OffloadMaptypesGlobal->getInitializer(); 5500 EXPECT_TRUE(isa<ConstantDataArray>(Initializer)); 5501 ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer); 5502 EXPECT_EQ(MappingInit->getNumElements(), Mappings.size()); 5503 EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64)); 5504 Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings); 5505 EXPECT_EQ(MappingInit, CA); 5506 } 5507 5508 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) { 5509 OpenMPIRBuilder OMPBuilder(*M); 5510 OMPBuilder.initialize(); 5511 5512 IRBuilder<> Builder(BB); 5513 5514 uint32_t StrSize; 5515 Constant *Cst1 = 5516 OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); 5517 Constant *Cst2 = 5518 OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); 5519 SmallVector<llvm::Constant *> Names = {Cst1, Cst2}; 5520 5521 GlobalVariable *OffloadMaptypesGlobal = 5522 OMPBuilder.createOffloadMapnames(Names, "offload_mapnames"); 5523 EXPECT_FALSE(M->global_empty()); 5524 EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames"); 5525 EXPECT_TRUE(OffloadMaptypesGlobal->isConstant()); 5526 EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr()); 5527 EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage()); 5528 EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer()); 5529 Constant *Initializer = OffloadMaptypesGlobal->getInitializer(); 5530 EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts())); 5531 EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts())); 5532 5533 GlobalVariable *Name1Gbl = 5534 cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts()); 5535 EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer())); 5536 ConstantDataArray *Name1GblCA = 5537 dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer()); 5538 EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;"); 5539 5540 GlobalVariable *Name2Gbl = 5541 cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts()); 5542 EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer())); 5543 ConstantDataArray *Name2GblCA = 5544 dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer()); 5545 EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;"); 5546 5547 EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy()); 5548 EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size()); 5549 } 5550 5551 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) { 5552 OpenMPIRBuilder OMPBuilder(*M); 5553 OMPBuilder.initialize(); 5554 F->setName("func"); 5555 IRBuilder<> Builder(BB); 5556 5557 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5558 5559 unsigned TotalNbOperand = 2; 5560 5561 OpenMPIRBuilder::MapperAllocas MapperAllocas; 5562 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5563 F->getEntryBlock().getFirstInsertionPt()); 5564 OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas); 5565 EXPECT_NE(MapperAllocas.ArgsBase, nullptr); 5566 EXPECT_NE(MapperAllocas.Args, nullptr); 5567 EXPECT_NE(MapperAllocas.ArgSizes, nullptr); 5568 EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy()); 5569 ArrayType *ArrType = 5570 dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType()); 5571 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5572 EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType() 5573 ->getArrayElementType() 5574 ->isPointerTy()); 5575 5576 EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy()); 5577 ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType()); 5578 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5579 EXPECT_TRUE(MapperAllocas.Args->getAllocatedType() 5580 ->getArrayElementType() 5581 ->isPointerTy()); 5582 5583 EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy()); 5584 ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType()); 5585 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5586 EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType() 5587 ->getArrayElementType() 5588 ->isIntegerTy(64)); 5589 } 5590 5591 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { 5592 OpenMPIRBuilder OMPBuilder(*M); 5593 OMPBuilder.initialize(); 5594 F->setName("func"); 5595 IRBuilder<> Builder(BB); 5596 LLVMContext &Ctx = M->getContext(); 5597 5598 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5599 5600 unsigned TotalNbOperand = 2; 5601 5602 OpenMPIRBuilder::MapperAllocas MapperAllocas; 5603 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5604 F->getEntryBlock().getFirstInsertionPt()); 5605 OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas); 5606 5607 auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr( 5608 omp::OMPRTL___tgt_target_data_begin_mapper); 5609 5610 SmallVector<uint64_t> Flags = {0, 2}; 5611 5612 uint32_t StrSize; 5613 Constant *SrcLocCst = 5614 OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize); 5615 Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize); 5616 5617 Constant *Cst1 = 5618 OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); 5619 Constant *Cst2 = 5620 OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); 5621 SmallVector<llvm::Constant *> Names = {Cst1, Cst2}; 5622 5623 GlobalVariable *Maptypes = 5624 OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes"); 5625 Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32( 5626 ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes, 5627 /*Idx0=*/0, /*Idx1=*/0); 5628 5629 GlobalVariable *Mapnames = 5630 OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames"); 5631 Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32( 5632 ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames, 5633 /*Idx0=*/0, /*Idx1=*/0); 5634 5635 OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo, 5636 MaptypesArg, MapnamesArg, MapperAllocas, -1, 5637 TotalNbOperand); 5638 5639 CallInst *MapperCall = dyn_cast<CallInst>(&BB->back()); 5640 EXPECT_NE(MapperCall, nullptr); 5641 EXPECT_EQ(MapperCall->arg_size(), 9U); 5642 EXPECT_EQ(MapperCall->getCalledFunction()->getName(), 5643 "__tgt_target_data_begin_mapper"); 5644 EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo); 5645 EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64)); 5646 EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32)); 5647 5648 EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg); 5649 EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg); 5650 EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy()); 5651 } 5652 5653 TEST_F(OpenMPIRBuilderTest, TargetEnterData) { 5654 OpenMPIRBuilder OMPBuilder(*M); 5655 OMPBuilder.initialize(); 5656 F->setName("func"); 5657 IRBuilder<> Builder(BB); 5658 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5659 5660 int64_t DeviceID = 2; 5661 5662 AllocaInst *Val1 = 5663 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5664 ASSERT_NE(Val1, nullptr); 5665 5666 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5667 F->getEntryBlock().getFirstInsertionPt()); 5668 5669 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5670 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5671 auto GenMapInfoCB = 5672 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5673 // Get map clause information. 5674 Builder.restoreIP(codeGenIP); 5675 5676 CombinedInfo.BasePointers.emplace_back(Val1); 5677 CombinedInfo.Pointers.emplace_back(Val1); 5678 CombinedInfo.DevicePointers.emplace_back( 5679 llvm::OpenMPIRBuilder::DeviceInfoTy::None); 5680 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5681 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1)); 5682 uint32_t temp; 5683 CombinedInfo.Names.emplace_back( 5684 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5685 return CombinedInfo; 5686 }; 5687 5688 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5689 /*RequiresDevicePointerInfo=*/false, 5690 /*SeparateBeginEndCalls=*/true); 5691 5692 OMPBuilder.Config.setIsGPU(true); 5693 5694 llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper; 5695 Builder.restoreIP(OMPBuilder.createTargetData( 5696 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5697 /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); 5698 5699 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 5700 EXPECT_NE(TargetDataCall, nullptr); 5701 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5702 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5703 "__tgt_target_data_begin_mapper"); 5704 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5705 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5706 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5707 5708 Builder.CreateRetVoid(); 5709 EXPECT_FALSE(verifyModule(*M, &errs())); 5710 } 5711 5712 TEST_F(OpenMPIRBuilderTest, TargetExitData) { 5713 OpenMPIRBuilder OMPBuilder(*M); 5714 OMPBuilder.initialize(); 5715 F->setName("func"); 5716 IRBuilder<> Builder(BB); 5717 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5718 5719 int64_t DeviceID = 2; 5720 5721 AllocaInst *Val1 = 5722 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5723 ASSERT_NE(Val1, nullptr); 5724 5725 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5726 F->getEntryBlock().getFirstInsertionPt()); 5727 5728 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5729 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5730 auto GenMapInfoCB = 5731 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5732 // Get map clause information. 5733 Builder.restoreIP(codeGenIP); 5734 5735 CombinedInfo.BasePointers.emplace_back(Val1); 5736 CombinedInfo.Pointers.emplace_back(Val1); 5737 CombinedInfo.DevicePointers.emplace_back( 5738 llvm::OpenMPIRBuilder::DeviceInfoTy::None); 5739 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5740 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2)); 5741 uint32_t temp; 5742 CombinedInfo.Names.emplace_back( 5743 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5744 return CombinedInfo; 5745 }; 5746 5747 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5748 /*RequiresDevicePointerInfo=*/false, 5749 /*SeparateBeginEndCalls=*/true); 5750 5751 OMPBuilder.Config.setIsGPU(true); 5752 5753 llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper; 5754 Builder.restoreIP(OMPBuilder.createTargetData( 5755 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5756 /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); 5757 5758 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 5759 EXPECT_NE(TargetDataCall, nullptr); 5760 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5761 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5762 "__tgt_target_data_end_mapper"); 5763 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5764 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5765 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5766 5767 Builder.CreateRetVoid(); 5768 EXPECT_FALSE(verifyModule(*M, &errs())); 5769 } 5770 5771 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { 5772 OpenMPIRBuilder OMPBuilder(*M); 5773 OMPBuilder.initialize(); 5774 F->setName("func"); 5775 IRBuilder<> Builder(BB); 5776 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5777 5778 int64_t DeviceID = 2; 5779 5780 AllocaInst *Val1 = 5781 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5782 ASSERT_NE(Val1, nullptr); 5783 5784 AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy()); 5785 ASSERT_NE(Val2, nullptr); 5786 5787 AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy()); 5788 ASSERT_NE(Val3, nullptr); 5789 5790 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5791 F->getEntryBlock().getFirstInsertionPt()); 5792 5793 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 5794 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5795 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5796 auto GenMapInfoCB = 5797 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5798 // Get map clause information. 5799 Builder.restoreIP(codeGenIP); 5800 uint32_t temp; 5801 5802 CombinedInfo.BasePointers.emplace_back(Val1); 5803 CombinedInfo.Pointers.emplace_back(Val1); 5804 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None); 5805 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5806 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3)); 5807 CombinedInfo.Names.emplace_back( 5808 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5809 5810 CombinedInfo.BasePointers.emplace_back(Val2); 5811 CombinedInfo.Pointers.emplace_back(Val2); 5812 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 5813 CombinedInfo.Sizes.emplace_back(Builder.getInt64(8)); 5814 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67)); 5815 CombinedInfo.Names.emplace_back( 5816 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5817 5818 CombinedInfo.BasePointers.emplace_back(Val3); 5819 CombinedInfo.Pointers.emplace_back(Val3); 5820 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address); 5821 CombinedInfo.Sizes.emplace_back(Builder.getInt64(8)); 5822 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67)); 5823 CombinedInfo.Names.emplace_back( 5824 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5825 return CombinedInfo; 5826 }; 5827 5828 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5829 /*RequiresDevicePointerInfo=*/true, 5830 /*SeparateBeginEndCalls=*/true); 5831 5832 OMPBuilder.Config.setIsGPU(true); 5833 5834 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 5835 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 5836 if (BodyGenType == BodyGenTy::Priv) { 5837 EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u); 5838 Builder.restoreIP(CodeGenIP); 5839 CallInst *TargetDataCall = 5840 dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode()); 5841 EXPECT_NE(TargetDataCall, nullptr); 5842 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5843 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5844 "__tgt_target_data_begin_mapper"); 5845 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5846 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5847 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5848 5849 LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode()); 5850 EXPECT_NE(LI, nullptr); 5851 StoreInst *SI = dyn_cast<StoreInst>(&BB->back()); 5852 EXPECT_NE(SI, nullptr); 5853 EXPECT_EQ(SI->getValueOperand(), LI); 5854 EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second); 5855 EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second)); 5856 EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second)); 5857 Builder.CreateStore(Builder.getInt32(99), Val1); 5858 } 5859 return Builder.saveIP(); 5860 }; 5861 5862 Builder.restoreIP(OMPBuilder.createTargetData( 5863 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5864 /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB)); 5865 5866 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 5867 EXPECT_NE(TargetDataCall, nullptr); 5868 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5869 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5870 "__tgt_target_data_end_mapper"); 5871 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5872 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5873 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5874 5875 // Check that BodyGenCB is still made when IsTargetDevice is set to true. 5876 OMPBuilder.Config.setIsTargetDevice(true); 5877 bool CheckDevicePassBodyGen = false; 5878 auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 5879 CheckDevicePassBodyGen = true; 5880 Builder.restoreIP(CodeGenIP); 5881 CallInst *TargetDataCall = 5882 dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode()); 5883 // Make sure no begin_mapper call is present for device pass. 5884 EXPECT_EQ(TargetDataCall, nullptr); 5885 return Builder.saveIP(); 5886 }; 5887 Builder.restoreIP(OMPBuilder.createTargetData( 5888 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5889 /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB)); 5890 EXPECT_TRUE(CheckDevicePassBodyGen); 5891 5892 Builder.CreateRetVoid(); 5893 EXPECT_FALSE(verifyModule(*M, &errs())); 5894 } 5895 5896 namespace { 5897 // Some basic handling of argument mapping for the moment 5898 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder, 5899 llvm::SmallVectorImpl<llvm::Value *> &Args, 5900 llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) { 5901 for (auto Arg : Args) { 5902 CombinedInfo.BasePointers.emplace_back(Arg); 5903 CombinedInfo.Pointers.emplace_back(Arg); 5904 uint32_t SrcLocStrSize; 5905 CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr( 5906 "Unknown loc - stub implementation", SrcLocStrSize)); 5907 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags( 5908 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | 5909 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | 5910 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM)); 5911 CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64( 5912 OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType()))); 5913 } 5914 } 5915 } // namespace 5916 5917 TEST_F(OpenMPIRBuilderTest, TargetRegion) { 5918 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5919 OpenMPIRBuilder OMPBuilder(*M); 5920 OMPBuilder.initialize(); 5921 OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); 5922 OMPBuilder.setConfig(Config); 5923 F->setName("func"); 5924 IRBuilder<> Builder(BB); 5925 auto Int32Ty = Builder.getInt32Ty(); 5926 5927 AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr"); 5928 AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr"); 5929 AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr"); 5930 5931 Builder.CreateStore(Builder.getInt32(10), APtr); 5932 Builder.CreateStore(Builder.getInt32(20), BPtr); 5933 auto BodyGenCB = [&](InsertPointTy AllocaIP, 5934 InsertPointTy CodeGenIP) -> InsertPointTy { 5935 Builder.restoreIP(CodeGenIP); 5936 LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr); 5937 LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr); 5938 Value *Sum = Builder.CreateAdd(AVal, BVal); 5939 Builder.CreateStore(Sum, CPtr); 5940 return Builder.saveIP(); 5941 }; 5942 5943 llvm::SmallVector<llvm::Value *> Inputs; 5944 Inputs.push_back(APtr); 5945 Inputs.push_back(BPtr); 5946 Inputs.push_back(CPtr); 5947 5948 auto SimpleArgAccessorCB = 5949 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 5950 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 5951 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 5952 if (!OMPBuilder.Config.isTargetDevice()) { 5953 RetVal = cast<llvm::Value>(&Arg); 5954 return CodeGenIP; 5955 } 5956 5957 Builder.restoreIP(AllocaIP); 5958 5959 llvm::Value *Addr = Builder.CreateAlloca( 5960 Arg.getType()->isPointerTy() 5961 ? Arg.getType() 5962 : Type::getInt64Ty(Builder.getContext()), 5963 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 5964 llvm::Value *AddrAscast = 5965 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 5966 Builder.CreateStore(&Arg, AddrAscast); 5967 5968 Builder.restoreIP(CodeGenIP); 5969 5970 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 5971 5972 return Builder.saveIP(); 5973 }; 5974 5975 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 5976 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 5977 -> llvm::OpenMPIRBuilder::MapInfosTy & { 5978 CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos); 5979 return CombinedInfos; 5980 }; 5981 5982 TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17); 5983 OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL}); 5984 Builder.restoreIP(OMPBuilder.createTarget( 5985 OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(), 5986 EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); 5987 OMPBuilder.finalize(); 5988 Builder.CreateRetVoid(); 5989 5990 // Check the kernel launch sequence 5991 auto Iter = F->getEntryBlock().rbegin(); 5992 EXPECT_TRUE(isa<BranchInst>(&*(Iter))); 5993 BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter)); 5994 EXPECT_TRUE(isa<CmpInst>(&*(++Iter))); 5995 EXPECT_TRUE(isa<CallInst>(&*(++Iter))); 5996 CallInst *Call = dyn_cast<CallInst>(&*(Iter)); 5997 5998 // Check that the kernel launch function is called 5999 Function *KernelLaunchFunc = Call->getCalledFunction(); 6000 EXPECT_NE(KernelLaunchFunc, nullptr); 6001 StringRef FunctionName = KernelLaunchFunc->getName(); 6002 EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel")); 6003 6004 // Check the fallback call 6005 BasicBlock *FallbackBlock = Branch->getSuccessor(0); 6006 Iter = FallbackBlock->rbegin(); 6007 CallInst *FCall = dyn_cast<CallInst>(&*(++Iter)); 6008 // 'F' has a dummy DISubprogram which causes OutlinedFunc to also 6009 // have a DISubprogram. In this case, the call to OutlinedFunc needs 6010 // to have a debug loc, otherwise verifier will complain. 6011 FCall->setDebugLoc(DL); 6012 EXPECT_NE(FCall, nullptr); 6013 6014 // Check that the correct aguments are passed in 6015 for (auto ArgInput : zip(FCall->args(), Inputs)) { 6016 EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput)); 6017 } 6018 6019 // Check that the outlined function exists with the expected prefix 6020 Function *OutlinedFunc = FCall->getCalledFunction(); 6021 EXPECT_NE(OutlinedFunc, nullptr); 6022 StringRef FunctionName2 = OutlinedFunc->getName(); 6023 EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading")); 6024 6025 EXPECT_FALSE(verifyModule(*M, &errs())); 6026 } 6027 6028 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { 6029 OpenMPIRBuilder OMPBuilder(*M); 6030 OMPBuilder.setConfig( 6031 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6032 OMPBuilder.initialize(); 6033 6034 F->setName("func"); 6035 IRBuilder<> Builder(BB); 6036 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6037 6038 LoadInst *Value = nullptr; 6039 StoreInst *TargetStore = nullptr; 6040 llvm::SmallVector<llvm::Value *, 2> CapturedArgs = { 6041 Constant::getNullValue(PointerType::get(Ctx, 0)), 6042 Constant::getNullValue(PointerType::get(Ctx, 0))}; 6043 6044 auto SimpleArgAccessorCB = 6045 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6046 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6047 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6048 if (!OMPBuilder.Config.isTargetDevice()) { 6049 RetVal = cast<llvm::Value>(&Arg); 6050 return CodeGenIP; 6051 } 6052 6053 Builder.restoreIP(AllocaIP); 6054 6055 llvm::Value *Addr = Builder.CreateAlloca( 6056 Arg.getType()->isPointerTy() 6057 ? Arg.getType() 6058 : Type::getInt64Ty(Builder.getContext()), 6059 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6060 llvm::Value *AddrAscast = 6061 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6062 Builder.CreateStore(&Arg, AddrAscast); 6063 6064 Builder.restoreIP(CodeGenIP); 6065 6066 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6067 6068 return Builder.saveIP(); 6069 }; 6070 6071 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6072 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6073 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6074 CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos); 6075 return CombinedInfos; 6076 }; 6077 6078 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, 6079 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6080 -> OpenMPIRBuilder::InsertPointTy { 6081 Builder.restoreIP(CodeGenIP); 6082 Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]); 6083 TargetStore = Builder.CreateStore(Value, CapturedArgs[1]); 6084 return Builder.saveIP(); 6085 }; 6086 6087 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6088 F->getEntryBlock().getFirstInsertionPt()); 6089 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6090 /*Line=*/3, /*Count=*/0); 6091 6092 Builder.restoreIP( 6093 OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, 6094 EntryInfo, /*NumTeams=*/-1, 6095 /*NumThreads=*/0, CapturedArgs, GenMapInfoCB, 6096 BodyGenCB, SimpleArgAccessorCB)); 6097 6098 Builder.CreateRetVoid(); 6099 OMPBuilder.finalize(); 6100 6101 // Check outlined function 6102 EXPECT_FALSE(verifyModule(*M, &errs())); 6103 EXPECT_NE(TargetStore, nullptr); 6104 Function *OutlinedFn = TargetStore->getFunction(); 6105 EXPECT_NE(F, OutlinedFn); 6106 6107 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6108 // Account for the "implicit" first argument. 6109 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6110 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 6111 EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy()); 6112 EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy()); 6113 6114 // Check entry block 6115 auto &EntryBlock = OutlinedFn->getEntryBlock(); 6116 Instruction *Alloca1 = EntryBlock.getFirstNonPHI(); 6117 EXPECT_NE(Alloca1, nullptr); 6118 6119 EXPECT_TRUE(isa<AllocaInst>(Alloca1)); 6120 auto *Store1 = Alloca1->getNextNode(); 6121 EXPECT_TRUE(isa<StoreInst>(Store1)); 6122 auto *Alloca2 = Store1->getNextNode(); 6123 EXPECT_TRUE(isa<AllocaInst>(Alloca2)); 6124 auto *Store2 = Alloca2->getNextNode(); 6125 EXPECT_TRUE(isa<StoreInst>(Store2)); 6126 6127 auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode()); 6128 EXPECT_NE(InitCall, nullptr); 6129 EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init"); 6130 EXPECT_EQ(InitCall->arg_size(), 2U); 6131 EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0))); 6132 auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0)); 6133 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer())); 6134 auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer()); 6135 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U))); 6136 auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U)); 6137 EXPECT_EQ(ConfigC->getAggregateElement(0U), 6138 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6139 EXPECT_EQ(ConfigC->getAggregateElement(1U), 6140 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6141 EXPECT_EQ(ConfigC->getAggregateElement(2U), 6142 ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC)); 6143 6144 auto *EntryBlockBranch = EntryBlock.getTerminator(); 6145 EXPECT_NE(EntryBlockBranch, nullptr); 6146 EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U); 6147 6148 // Check user code block 6149 auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0); 6150 EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry"); 6151 auto *Load1 = UserCodeBlock->getFirstNonPHI(); 6152 EXPECT_TRUE(isa<LoadInst>(Load1)); 6153 auto *Load2 = Load1->getNextNode(); 6154 EXPECT_TRUE(isa<LoadInst>(Load2)); 6155 6156 auto *Value1 = Load2->getNextNode(); 6157 EXPECT_EQ(Value1, Value); 6158 EXPECT_EQ(Value1->getNextNode(), TargetStore); 6159 auto *Deinit = TargetStore->getNextNode(); 6160 EXPECT_NE(Deinit, nullptr); 6161 6162 auto *DeinitCall = dyn_cast<CallInst>(Deinit); 6163 EXPECT_NE(DeinitCall, nullptr); 6164 EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit"); 6165 EXPECT_EQ(DeinitCall->arg_size(), 0U); 6166 6167 EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode())); 6168 6169 // Check exit block 6170 auto *ExitBlock = EntryBlockBranch->getSuccessor(1); 6171 EXPECT_EQ(ExitBlock->getName(), "worker.exit"); 6172 EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI())); 6173 } 6174 6175 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { 6176 OpenMPIRBuilder OMPBuilder(*M); 6177 OMPBuilder.setConfig( 6178 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6179 OMPBuilder.initialize(); 6180 6181 F->setName("func"); 6182 IRBuilder<> Builder(BB); 6183 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6184 6185 LoadInst *Value = nullptr; 6186 StoreInst *TargetStore = nullptr; 6187 llvm::SmallVector<llvm::Value *, 1> CapturedArgs = { 6188 Constant::getNullValue(PointerType::get(Ctx, 0))}; 6189 6190 auto SimpleArgAccessorCB = 6191 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6192 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6193 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6194 if (!OMPBuilder.Config.isTargetDevice()) { 6195 RetVal = cast<llvm::Value>(&Arg); 6196 return CodeGenIP; 6197 } 6198 6199 Builder.restoreIP(AllocaIP); 6200 6201 llvm::Value *Addr = Builder.CreateAlloca( 6202 Arg.getType()->isPointerTy() 6203 ? Arg.getType() 6204 : Type::getInt64Ty(Builder.getContext()), 6205 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6206 llvm::Value *AddrAscast = 6207 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6208 Builder.CreateStore(&Arg, AddrAscast); 6209 6210 Builder.restoreIP(CodeGenIP); 6211 6212 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6213 6214 return Builder.saveIP(); 6215 }; 6216 6217 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6218 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6219 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6220 CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos); 6221 return CombinedInfos; 6222 }; 6223 6224 llvm::Value *RaiseAlloca = nullptr; 6225 6226 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, 6227 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6228 -> OpenMPIRBuilder::InsertPointTy { 6229 Builder.restoreIP(CodeGenIP); 6230 RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty()); 6231 Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]); 6232 TargetStore = Builder.CreateStore(Value, RaiseAlloca); 6233 return Builder.saveIP(); 6234 }; 6235 6236 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6237 F->getEntryBlock().getFirstInsertionPt()); 6238 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6239 /*Line=*/3, /*Count=*/0); 6240 6241 Builder.restoreIP( 6242 OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, 6243 EntryInfo, /*NumTeams=*/-1, 6244 /*NumThreads=*/0, CapturedArgs, GenMapInfoCB, 6245 BodyGenCB, SimpleArgAccessorCB)); 6246 6247 Builder.CreateRetVoid(); 6248 OMPBuilder.finalize(); 6249 6250 // Check outlined function 6251 EXPECT_FALSE(verifyModule(*M, &errs())); 6252 EXPECT_NE(TargetStore, nullptr); 6253 Function *OutlinedFn = TargetStore->getFunction(); 6254 EXPECT_NE(F, OutlinedFn); 6255 6256 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6257 // Account for the "implicit" first argument. 6258 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6259 EXPECT_EQ(OutlinedFn->arg_size(), 2U); 6260 EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy()); 6261 6262 // Check entry block, to see if we have raised our alloca 6263 // from the body to the entry block. 6264 auto &EntryBlock = OutlinedFn->getEntryBlock(); 6265 6266 // Check that we have moved our alloca created in the 6267 // BodyGenCB function, to the top of the function. 6268 Instruction *Alloca1 = EntryBlock.getFirstNonPHI(); 6269 EXPECT_NE(Alloca1, nullptr); 6270 EXPECT_TRUE(isa<AllocaInst>(Alloca1)); 6271 EXPECT_EQ(Alloca1, RaiseAlloca); 6272 6273 // Verify we have not altered the rest of the function 6274 // inappropriately with our alloca movement. 6275 auto *Alloca2 = Alloca1->getNextNode(); 6276 EXPECT_TRUE(isa<AllocaInst>(Alloca2)); 6277 auto *Store2 = Alloca2->getNextNode(); 6278 EXPECT_TRUE(isa<StoreInst>(Store2)); 6279 6280 auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode()); 6281 EXPECT_NE(InitCall, nullptr); 6282 EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init"); 6283 EXPECT_EQ(InitCall->arg_size(), 2U); 6284 EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0))); 6285 auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0)); 6286 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer())); 6287 auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer()); 6288 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U))); 6289 auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U)); 6290 EXPECT_EQ(ConfigC->getAggregateElement(0U), 6291 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6292 EXPECT_EQ(ConfigC->getAggregateElement(1U), 6293 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6294 EXPECT_EQ(ConfigC->getAggregateElement(2U), 6295 ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC)); 6296 6297 auto *EntryBlockBranch = EntryBlock.getTerminator(); 6298 EXPECT_NE(EntryBlockBranch, nullptr); 6299 EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U); 6300 6301 // Check user code block 6302 auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0); 6303 EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry"); 6304 auto *Load1 = UserCodeBlock->getFirstNonPHI(); 6305 EXPECT_TRUE(isa<LoadInst>(Load1)); 6306 auto *Load2 = Load1->getNextNode(); 6307 EXPECT_TRUE(isa<LoadInst>(Load2)); 6308 EXPECT_EQ(Load2, Value); 6309 EXPECT_EQ(Load2->getNextNode(), TargetStore); 6310 auto *Deinit = TargetStore->getNextNode(); 6311 EXPECT_NE(Deinit, nullptr); 6312 6313 auto *DeinitCall = dyn_cast<CallInst>(Deinit); 6314 EXPECT_NE(DeinitCall, nullptr); 6315 EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit"); 6316 EXPECT_EQ(DeinitCall->arg_size(), 0U); 6317 6318 EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode())); 6319 6320 // Check exit block 6321 auto *ExitBlock = EntryBlockBranch->getSuccessor(1); 6322 EXPECT_EQ(ExitBlock->getName(), "worker.exit"); 6323 EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI())); 6324 } 6325 6326 TEST_F(OpenMPIRBuilderTest, CreateTask) { 6327 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6328 OpenMPIRBuilder OMPBuilder(*M); 6329 OMPBuilder.Config.IsTargetDevice = false; 6330 OMPBuilder.initialize(); 6331 F->setName("func"); 6332 IRBuilder<> Builder(BB); 6333 6334 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 6335 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 6336 Value *Val128 = 6337 Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); 6338 6339 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6340 Builder.restoreIP(AllocaIP); 6341 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 6342 "bodygen.alloca128"); 6343 6344 Builder.restoreIP(CodeGenIP); 6345 // Loading and storing captured pointer and values 6346 Builder.CreateStore(Val128, Local128); 6347 Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 6348 "bodygen.load32"); 6349 6350 LoadInst *PrivLoad128 = Builder.CreateLoad( 6351 Local128->getAllocatedType(), Local128, "bodygen.local.load128"); 6352 Value *Cmp = Builder.CreateICmpNE( 6353 Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); 6354 Instruction *ThenTerm, *ElseTerm; 6355 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 6356 &ThenTerm, &ElseTerm); 6357 }; 6358 6359 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6360 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6361 OpenMPIRBuilder::LocationDescription Loc( 6362 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6363 Builder.restoreIP(OMPBuilder.createTask( 6364 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6365 BodyGenCB)); 6366 OMPBuilder.finalize(); 6367 Builder.CreateRetVoid(); 6368 6369 EXPECT_FALSE(verifyModule(*M, &errs())); 6370 6371 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6372 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6373 ->user_back()); 6374 6375 // Verify the Ident argument 6376 GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0)); 6377 ASSERT_NE(Ident, nullptr); 6378 EXPECT_TRUE(Ident->hasInitializer()); 6379 Constant *Initializer = Ident->getInitializer(); 6380 GlobalVariable *SrcStrGlob = 6381 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 6382 ASSERT_NE(SrcStrGlob, nullptr); 6383 ConstantDataArray *SrcSrc = 6384 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 6385 ASSERT_NE(SrcSrc, nullptr); 6386 6387 // Verify the num_threads argument. 6388 CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1)); 6389 ASSERT_NE(GTID, nullptr); 6390 EXPECT_EQ(GTID->arg_size(), 1U); 6391 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 6392 6393 // Verify the flags 6394 // TODO: Check for others flags. Currently testing only for tiedness. 6395 ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2)); 6396 ASSERT_NE(Flags, nullptr); 6397 EXPECT_EQ(Flags->getSExtValue(), 1); 6398 6399 // Verify the data size 6400 ConstantInt *DataSize = 6401 dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3)); 6402 ASSERT_NE(DataSize, nullptr); 6403 EXPECT_EQ(DataSize->getSExtValue(), 40); 6404 6405 ConstantInt *SharedsSize = 6406 dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4)); 6407 EXPECT_EQ(SharedsSize->getSExtValue(), 6408 24); // 64-bit pointer + 128-bit integer 6409 6410 // Verify Wrapper function 6411 Function *OutlinedFn = 6412 dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); 6413 ASSERT_NE(OutlinedFn, nullptr); 6414 6415 LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin()); 6416 ASSERT_NE(SharedsLoad, nullptr); 6417 EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); 6418 6419 EXPECT_FALSE(OutlinedFn->isDeclaration()); 6420 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); 6421 6422 // Verify that the data argument is used only once, and that too in the load 6423 // instruction that is then used for accessing shared data. 6424 Value *DataPtr = OutlinedFn->getArg(1); 6425 EXPECT_EQ(DataPtr->getNumUses(), 1U); 6426 EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser())); 6427 Value *Data = DataPtr->uses().begin()->getUser(); 6428 EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { 6429 return isa<GetElementPtrInst>(U.getUser()); 6430 })); 6431 6432 // Verify the presence of `trunc` and `icmp` instructions in Outlined function 6433 EXPECT_TRUE(any_of(instructions(OutlinedFn), 6434 [](Instruction &inst) { return isa<TruncInst>(&inst); })); 6435 EXPECT_TRUE(any_of(instructions(OutlinedFn), 6436 [](Instruction &inst) { return isa<ICmpInst>(&inst); })); 6437 6438 // Verify the execution of the task 6439 CallInst *TaskCall = dyn_cast<CallInst>( 6440 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) 6441 ->user_back()); 6442 ASSERT_NE(TaskCall, nullptr); 6443 EXPECT_EQ(TaskCall->getArgOperand(0), Ident); 6444 EXPECT_EQ(TaskCall->getArgOperand(1), GTID); 6445 EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall); 6446 6447 // Verify that the argument data has been copied 6448 for (User *in : TaskAllocCall->users()) { 6449 if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) { 6450 EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall); 6451 } 6452 } 6453 } 6454 6455 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { 6456 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6457 OpenMPIRBuilder OMPBuilder(*M); 6458 OMPBuilder.Config.IsTargetDevice = false; 6459 OMPBuilder.initialize(); 6460 F->setName("func"); 6461 IRBuilder<> Builder(BB); 6462 6463 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6464 6465 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6466 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6467 OpenMPIRBuilder::LocationDescription Loc( 6468 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6469 Builder.restoreIP(OMPBuilder.createTask( 6470 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6471 BodyGenCB)); 6472 OMPBuilder.finalize(); 6473 Builder.CreateRetVoid(); 6474 6475 EXPECT_FALSE(verifyModule(*M, &errs())); 6476 6477 // Check that the outlined function has only one argument. 6478 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6479 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6480 ->user_back()); 6481 Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5)); 6482 ASSERT_NE(OutlinedFn, nullptr); 6483 ASSERT_EQ(OutlinedFn->arg_size(), 1U); 6484 } 6485 6486 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { 6487 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6488 OpenMPIRBuilder OMPBuilder(*M); 6489 OMPBuilder.Config.IsTargetDevice = false; 6490 OMPBuilder.initialize(); 6491 F->setName("func"); 6492 IRBuilder<> Builder(BB); 6493 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6494 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6495 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6496 OpenMPIRBuilder::LocationDescription Loc( 6497 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6498 Builder.restoreIP(OMPBuilder.createTask( 6499 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB, 6500 /*Tied=*/false)); 6501 OMPBuilder.finalize(); 6502 Builder.CreateRetVoid(); 6503 6504 // Check for the `Tied` argument 6505 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6506 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6507 ->user_back()); 6508 ASSERT_NE(TaskAllocCall, nullptr); 6509 ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2)); 6510 ASSERT_NE(Flags, nullptr); 6511 EXPECT_EQ(Flags->getZExtValue() & 1U, 0U); 6512 6513 EXPECT_FALSE(verifyModule(*M, &errs())); 6514 } 6515 6516 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { 6517 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6518 OpenMPIRBuilder OMPBuilder(*M); 6519 OMPBuilder.Config.IsTargetDevice = false; 6520 OMPBuilder.initialize(); 6521 F->setName("func"); 6522 IRBuilder<> Builder(BB); 6523 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6524 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6525 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6526 OpenMPIRBuilder::LocationDescription Loc( 6527 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6528 AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext())); 6529 SmallVector<OpenMPIRBuilder::DependData> DDS; 6530 { 6531 OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn, 6532 Type::getInt32Ty(M->getContext()), InDep); 6533 DDS.push_back(DDIn); 6534 } 6535 Builder.restoreIP(OMPBuilder.createTask( 6536 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB, 6537 /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS)); 6538 OMPBuilder.finalize(); 6539 Builder.CreateRetVoid(); 6540 6541 // Check for the `NumDeps` argument 6542 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6543 OMPBuilder 6544 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps) 6545 ->user_back()); 6546 ASSERT_NE(TaskAllocCall, nullptr); 6547 ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3)); 6548 ASSERT_NE(NumDeps, nullptr); 6549 EXPECT_EQ(NumDeps->getZExtValue(), 1U); 6550 6551 // Check for the `DepInfo` array argument 6552 AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4)); 6553 ASSERT_NE(DepArray, nullptr); 6554 Value::user_iterator DepArrayI = DepArray->user_begin(); 6555 ++DepArrayI; 6556 Value::user_iterator DepInfoI = DepArrayI->user_begin(); 6557 // Check for the `DependKind` flag in the `DepInfo` array 6558 Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI); 6559 ASSERT_NE(Flag, nullptr); 6560 ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag); 6561 ASSERT_NE(FlagInt, nullptr); 6562 EXPECT_EQ(FlagInt->getZExtValue(), 6563 static_cast<unsigned int>(RTLDependenceKindTy::DepIn)); 6564 ++DepInfoI; 6565 // Check for the size in the `DepInfo` array 6566 Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI); 6567 ASSERT_NE(Size, nullptr); 6568 ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size); 6569 ASSERT_NE(SizeInt, nullptr); 6570 EXPECT_EQ(SizeInt->getZExtValue(), 4U); 6571 ++DepInfoI; 6572 // Check for the variable address in the `DepInfo` array 6573 Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI); 6574 ASSERT_NE(AddrStored, nullptr); 6575 PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored); 6576 ASSERT_NE(AddrInt, nullptr); 6577 Value *Addr = AddrInt->getPointerOperand(); 6578 EXPECT_EQ(Addr, InDep); 6579 6580 ConstantInt *NumDepsNoAlias = 6581 dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5)); 6582 ASSERT_NE(NumDepsNoAlias, nullptr); 6583 EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U); 6584 EXPECT_EQ(TaskAllocCall->getOperand(6), 6585 ConstantPointerNull::get(PointerType::getUnqual(M->getContext()))); 6586 6587 EXPECT_FALSE(verifyModule(*M, &errs())); 6588 } 6589 6590 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { 6591 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6592 OpenMPIRBuilder OMPBuilder(*M); 6593 OMPBuilder.Config.IsTargetDevice = false; 6594 OMPBuilder.initialize(); 6595 F->setName("func"); 6596 IRBuilder<> Builder(BB); 6597 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6598 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6599 IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); 6600 Builder.SetInsertPoint(BodyBB); 6601 Value *Final = Builder.CreateICmp( 6602 CmpInst::Predicate::ICMP_EQ, F->getArg(0), 6603 ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); 6604 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 6605 Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, 6606 /*Tied=*/false, Final)); 6607 OMPBuilder.finalize(); 6608 Builder.CreateRetVoid(); 6609 6610 // Check for the `Tied` argument 6611 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6612 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6613 ->user_back()); 6614 ASSERT_NE(TaskAllocCall, nullptr); 6615 BinaryOperator *OrInst = 6616 dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2)); 6617 ASSERT_NE(OrInst, nullptr); 6618 EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or); 6619 6620 // One of the arguments to `or` instruction is the tied flag, which is equal 6621 // to zero. 6622 EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) { 6623 if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op)) 6624 return TiedValue->getSExtValue() == 0; 6625 return false; 6626 })); 6627 6628 // One of the arguments to `or` instruction is the final condition. 6629 EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) { 6630 if (SelectInst *Select = dyn_cast<SelectInst>(op)) { 6631 ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue()); 6632 ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue()); 6633 if (!TrueValue || !FalseValue) 6634 return false; 6635 return Select->getCondition() == Final && 6636 TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0; 6637 } 6638 return false; 6639 })); 6640 6641 EXPECT_FALSE(verifyModule(*M, &errs())); 6642 } 6643 6644 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { 6645 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6646 OpenMPIRBuilder OMPBuilder(*M); 6647 OMPBuilder.Config.IsTargetDevice = false; 6648 OMPBuilder.initialize(); 6649 F->setName("func"); 6650 IRBuilder<> Builder(BB); 6651 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; 6652 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6653 IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); 6654 Builder.SetInsertPoint(BodyBB); 6655 Value *IfCondition = Builder.CreateICmp( 6656 CmpInst::Predicate::ICMP_EQ, F->getArg(0), 6657 ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); 6658 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 6659 Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, 6660 /*Tied=*/false, /*Final=*/nullptr, 6661 IfCondition)); 6662 OMPBuilder.finalize(); 6663 Builder.CreateRetVoid(); 6664 6665 EXPECT_FALSE(verifyModule(*M, &errs())); 6666 6667 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6668 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6669 ->user_back()); 6670 ASSERT_NE(TaskAllocCall, nullptr); 6671 6672 // Check the branching is based on the if condition argument. 6673 BranchInst *IfConditionBranchInst = 6674 dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator()); 6675 ASSERT_NE(IfConditionBranchInst, nullptr); 6676 ASSERT_TRUE(IfConditionBranchInst->isConditional()); 6677 EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition); 6678 6679 // Check that the `__kmpc_omp_task` executes only in the then branch. 6680 CallInst *TaskCall = dyn_cast<CallInst>( 6681 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) 6682 ->user_back()); 6683 ASSERT_NE(TaskCall, nullptr); 6684 EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0)); 6685 6686 // Check that the OpenMP Runtime Functions specific to `if` clause execute 6687 // only in the else branch. Also check that the function call is between the 6688 // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls. 6689 CallInst *TaskBeginIfCall = dyn_cast<CallInst>( 6690 OMPBuilder 6691 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0) 6692 ->user_back()); 6693 CallInst *TaskCompleteCall = dyn_cast<CallInst>( 6694 OMPBuilder 6695 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0) 6696 ->user_back()); 6697 ASSERT_NE(TaskBeginIfCall, nullptr); 6698 ASSERT_NE(TaskCompleteCall, nullptr); 6699 Function *OulinedFn = 6700 dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); 6701 ASSERT_NE(OulinedFn, nullptr); 6702 CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back()); 6703 ASSERT_NE(OulinedFnCall, nullptr); 6704 EXPECT_EQ(TaskBeginIfCall->getParent(), 6705 IfConditionBranchInst->getSuccessor(1)); 6706 6707 EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); 6708 EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); 6709 } 6710 6711 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { 6712 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6713 OpenMPIRBuilder OMPBuilder(*M); 6714 OMPBuilder.initialize(); 6715 F->setName("func"); 6716 IRBuilder<> Builder(BB); 6717 6718 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 6719 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 6720 Value *Val128 = 6721 Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); 6722 Instruction *ThenTerm, *ElseTerm; 6723 6724 Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp; 6725 6726 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6727 Builder.restoreIP(AllocaIP); 6728 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 6729 "bodygen.alloca128"); 6730 6731 Builder.restoreIP(CodeGenIP); 6732 // Loading and storing captured pointer and values 6733 InternalStoreInst = Builder.CreateStore(Val128, Local128); 6734 InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 6735 "bodygen.load32"); 6736 6737 InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128, 6738 "bodygen.local.load128"); 6739 InternalIfCmp = Builder.CreateICmpNE( 6740 InternalLoad32, 6741 Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType())); 6742 SplitBlockAndInsertIfThenElse(InternalIfCmp, 6743 CodeGenIP.getBlock()->getTerminator(), 6744 &ThenTerm, &ElseTerm); 6745 }; 6746 6747 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6748 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6749 OpenMPIRBuilder::LocationDescription Loc( 6750 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6751 Builder.restoreIP(OMPBuilder.createTaskgroup( 6752 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6753 BodyGenCB)); 6754 OMPBuilder.finalize(); 6755 Builder.CreateRetVoid(); 6756 6757 EXPECT_FALSE(verifyModule(*M, &errs())); 6758 6759 CallInst *TaskgroupCall = dyn_cast<CallInst>( 6760 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup) 6761 ->user_back()); 6762 ASSERT_NE(TaskgroupCall, nullptr); 6763 CallInst *EndTaskgroupCall = dyn_cast<CallInst>( 6764 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup) 6765 ->user_back()); 6766 ASSERT_NE(EndTaskgroupCall, nullptr); 6767 6768 // Verify the Ident argument 6769 GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0)); 6770 ASSERT_NE(Ident, nullptr); 6771 EXPECT_TRUE(Ident->hasInitializer()); 6772 Constant *Initializer = Ident->getInitializer(); 6773 GlobalVariable *SrcStrGlob = 6774 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 6775 ASSERT_NE(SrcStrGlob, nullptr); 6776 ConstantDataArray *SrcSrc = 6777 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 6778 ASSERT_NE(SrcSrc, nullptr); 6779 6780 // Verify the num_threads argument. 6781 CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1)); 6782 ASSERT_NE(GTID, nullptr); 6783 EXPECT_EQ(GTID->arg_size(), 1U); 6784 EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr( 6785 OMPRTL___kmpc_global_thread_num)); 6786 6787 // Checking the general structure of the IR generated is same as expected. 6788 Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction(); 6789 EXPECT_EQ(GeneratedStoreInst, InternalStoreInst); 6790 Instruction *GeneratedLoad32 = 6791 GeneratedStoreInst->getNextNonDebugInstruction(); 6792 EXPECT_EQ(GeneratedLoad32, InternalLoad32); 6793 Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction(); 6794 EXPECT_EQ(GeneratedLoad128, InternalLoad128); 6795 6796 // Checking the ordering because of the if statements and that 6797 // `__kmp_end_taskgroup` call is after the if branching. 6798 BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(), 6799 ThenTerm->getSuccessor(0), 6800 EndTaskgroupCall->getParent(), 6801 ElseTerm->getParent()}; 6802 verifyDFSOrder(F, RefOrder); 6803 } 6804 6805 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { 6806 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6807 OpenMPIRBuilder OMPBuilder(*M); 6808 OMPBuilder.Config.IsTargetDevice = false; 6809 OMPBuilder.initialize(); 6810 F->setName("func"); 6811 IRBuilder<> Builder(BB); 6812 6813 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6814 Builder.restoreIP(AllocaIP); 6815 AllocaInst *Alloca32 = 6816 Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32"); 6817 AllocaInst *Alloca64 = 6818 Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64"); 6819 Builder.restoreIP(CodeGenIP); 6820 auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6821 Builder.restoreIP(CodeGenIP); 6822 LoadInst *LoadValue = 6823 Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64); 6824 Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64)); 6825 Builder.CreateStore(AddInst, Alloca64); 6826 }; 6827 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 6828 Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1)); 6829 6830 auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6831 Builder.restoreIP(CodeGenIP); 6832 LoadInst *LoadValue = 6833 Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32); 6834 Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32)); 6835 Builder.CreateStore(AddInst, Alloca32); 6836 }; 6837 OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL); 6838 Builder.restoreIP(OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2)); 6839 }; 6840 6841 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6842 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6843 OpenMPIRBuilder::LocationDescription Loc( 6844 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6845 Builder.restoreIP(OMPBuilder.createTaskgroup( 6846 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6847 BodyGenCB)); 6848 OMPBuilder.finalize(); 6849 Builder.CreateRetVoid(); 6850 6851 EXPECT_FALSE(verifyModule(*M, &errs())); 6852 6853 CallInst *TaskgroupCall = dyn_cast<CallInst>( 6854 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup) 6855 ->user_back()); 6856 ASSERT_NE(TaskgroupCall, nullptr); 6857 CallInst *EndTaskgroupCall = dyn_cast<CallInst>( 6858 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup) 6859 ->user_back()); 6860 ASSERT_NE(EndTaskgroupCall, nullptr); 6861 6862 Function *TaskAllocFn = 6863 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc); 6864 ASSERT_EQ(TaskAllocFn->getNumUses(), 2u); 6865 6866 CallInst *FirstTaskAllocCall = 6867 dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()); 6868 CallInst *SecondTaskAllocCall = 6869 dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++); 6870 ASSERT_NE(FirstTaskAllocCall, nullptr); 6871 ASSERT_NE(SecondTaskAllocCall, nullptr); 6872 6873 // Verify that the tasks have been generated in order and inside taskgroup 6874 // construct. 6875 BasicBlock *RefOrder[] = { 6876 TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(), 6877 SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()}; 6878 verifyDFSOrder(F, RefOrder); 6879 } 6880 6881 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { 6882 OpenMPIRBuilder OMPBuilder(*M); 6883 OMPBuilder.initialize(); 6884 6885 IRBuilder<> Builder(BB); 6886 6887 OpenMPIRBuilder::TargetDataRTArgs RTArgs; 6888 OpenMPIRBuilder::TargetDataInfo Info(true, false); 6889 6890 auto VoidPtrTy = PointerType::getUnqual(Builder.getContext()); 6891 auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext()); 6892 auto Int64Ty = Type::getInt64Ty(Builder.getContext()); 6893 auto Int64PtrTy = PointerType::getUnqual(Builder.getContext()); 6894 auto Array4VoidPtrTy = ArrayType::get(VoidPtrTy, 4); 6895 auto Array4Int64PtrTy = ArrayType::get(Int64Ty, 4); 6896 6897 Info.RTArgs.BasePointersArray = 6898 ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo(0)); 6899 Info.RTArgs.PointersArray = 6900 ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo()); 6901 Info.RTArgs.SizesArray = 6902 ConstantPointerNull::get(Array4Int64PtrTy->getPointerTo()); 6903 Info.RTArgs.MapTypesArray = 6904 ConstantPointerNull::get(Array4Int64PtrTy->getPointerTo()); 6905 Info.RTArgs.MapNamesArray = 6906 ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo()); 6907 Info.RTArgs.MappersArray = 6908 ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo()); 6909 Info.NumberOfPtrs = 4; 6910 Info.EmitDebug = false; 6911 OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false); 6912 6913 EXPECT_NE(RTArgs.BasePointersArray, nullptr); 6914 EXPECT_NE(RTArgs.PointersArray, nullptr); 6915 EXPECT_NE(RTArgs.SizesArray, nullptr); 6916 EXPECT_NE(RTArgs.MapTypesArray, nullptr); 6917 EXPECT_NE(RTArgs.MappersArray, nullptr); 6918 EXPECT_NE(RTArgs.MapNamesArray, nullptr); 6919 EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr); 6920 6921 EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy); 6922 EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy); 6923 EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy); 6924 EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy); 6925 EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy); 6926 EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy); 6927 } 6928 6929 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { 6930 OpenMPIRBuilder OMPBuilder(*M); 6931 OMPBuilder.setConfig( 6932 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6933 OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager; 6934 TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); 6935 InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); 6936 EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo)); 6937 InfoManager.initializeDeviceGlobalVarEntryInfo( 6938 "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0); 6939 InfoManager.registerTargetRegionEntryInfo( 6940 EntryInfo, nullptr, nullptr, 6941 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion); 6942 InfoManager.registerDeviceGlobalVarEntryInfo( 6943 "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 6944 GlobalValue::WeakAnyLinkage); 6945 EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar")); 6946 } 6947 6948 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they 6949 // call each other (recursively in some cases). The test case test these 6950 // functions by utilising them for host code generation for declare target 6951 // global variables 6952 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) { 6953 OpenMPIRBuilder OMPBuilder(*M); 6954 OMPBuilder.initialize(); 6955 OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); 6956 OMPBuilder.setConfig(Config); 6957 6958 std::vector<llvm::Triple> TargetTriple; 6959 TargetTriple.emplace_back("amdgcn-amd-amdhsa"); 6960 6961 TargetRegionEntryInfo EntryInfo("", 42, 4711, 17); 6962 std::vector<GlobalVariable *> RefsGathered; 6963 6964 std::vector<Constant *> Globals; 6965 auto *IntTy = Type::getInt32Ty(Ctx); 6966 for (int I = 0; I < 2; ++I) { 6967 Globals.push_back(M->getOrInsertGlobal( 6968 "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * { 6969 return new GlobalVariable( 6970 *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage, 6971 ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I)); 6972 })); 6973 } 6974 6975 OMPBuilder.registerTargetGlobalVariable( 6976 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 6977 OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true, 6978 EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple, 6979 nullptr, nullptr, Globals[0]->getType(), Globals[0]); 6980 6981 OMPBuilder.registerTargetGlobalVariable( 6982 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink, 6983 OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true, 6984 EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple, 6985 nullptr, nullptr, Globals[1]->getType(), Globals[1]); 6986 6987 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn = 6988 [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 6989 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 6990 // If this is invoked, then we want to emit an error, even if it is not 6991 // neccesarily the most readable, as something has went wrong. The 6992 // test-suite unfortunately eats up all error output 6993 ASSERT_EQ(Kind, Kind); 6994 }; 6995 6996 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn); 6997 6998 // Clauses for data_int_0 with To + Any clauses for the host 6999 std::vector<GlobalVariable *> OffloadEntries; 7000 OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name")); 7001 OffloadEntries.push_back( 7002 M->getNamedGlobal(".offloading.entry.test_data_int_0")); 7003 7004 // Clauses for data_int_1 with Link + Any clauses for the host 7005 OffloadEntries.push_back( 7006 M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr")); 7007 OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name.1")); 7008 OffloadEntries.push_back( 7009 M->getNamedGlobal(".offloading.entry.test_data_int_1_decl_tgt_ref_ptr")); 7010 7011 for (unsigned I = 0; I < OffloadEntries.size(); ++I) 7012 EXPECT_NE(OffloadEntries[I], nullptr); 7013 7014 // Metadata generated for the host offload module 7015 NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info"); 7016 ASSERT_THAT(OffloadMetadata, testing::NotNull()); 7017 StringRef Nodes[2] = { 7018 cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1)) 7019 ->getString(), 7020 cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1)) 7021 ->getString()}; 7022 EXPECT_THAT( 7023 Nodes, testing::UnorderedElementsAre("test_data_int_0", 7024 "test_data_int_1_decl_tgt_ref_ptr")); 7025 } 7026 7027 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) { 7028 OpenMPIRBuilder OMPBuilder(*M); 7029 OMPBuilder.initialize(); 7030 OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true, 7031 /* IsGPU = */ true, 7032 /* OpenMPOffloadMandatory = */ false, 7033 /* HasRequiresReverseOffload = */ false, 7034 /* HasRequiresUnifiedAddress = */ false, 7035 /* HasRequiresUnifiedSharedMemory = */ false, 7036 /* HasRequiresDynamicAllocators = */ false); 7037 OMPBuilder.setConfig(Config); 7038 7039 FunctionCallee FnTypeAndCallee = 7040 M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx)); 7041 7042 auto *Fn = cast<Function>(FnTypeAndCallee.getCallee()); 7043 OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn, 7044 /* Size = */ 0, 7045 /* Flags = */ 0, GlobalValue::WeakAnyLinkage); 7046 7047 // Check nvvm.annotations only created for GPU kernels 7048 NamedMDNode *MD = M->getNamedMetadata("nvvm.annotations"); 7049 EXPECT_NE(MD, nullptr); 7050 EXPECT_EQ(MD->getNumOperands(), 1u); 7051 7052 MDNode *Annotations = MD->getOperand(0); 7053 EXPECT_EQ(Annotations->getNumOperands(), 3u); 7054 7055 Constant *ConstVal = 7056 dyn_cast<ConstantAsMetadata>(Annotations->getOperand(0))->getValue(); 7057 EXPECT_TRUE(isa<Function>(Fn)); 7058 EXPECT_EQ(ConstVal, cast<Function>(Fn)); 7059 7060 EXPECT_TRUE(Annotations->getOperand(1).equalsStr("kernel")); 7061 7062 EXPECT_TRUE(mdconst::hasa<ConstantInt>(Annotations->getOperand(2))); 7063 APInt IntVal = 7064 mdconst::extract<ConstantInt>(Annotations->getOperand(2))->getValue(); 7065 EXPECT_EQ(IntVal, 1); 7066 7067 // Check kernel attributes 7068 EXPECT_TRUE(Fn->hasFnAttribute("kernel")); 7069 EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress)); 7070 } 7071 7072 } // namespace 7073