1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Frontend/OpenMP/OMPConstants.h" 10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" 11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 12 #include "llvm/IR/BasicBlock.h" 13 #include "llvm/IR/DIBuilder.h" 14 #include "llvm/IR/Function.h" 15 #include "llvm/IR/InstIterator.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/IR/LLVMContext.h" 18 #include "llvm/IR/Module.h" 19 #include "llvm/IR/Verifier.h" 20 #include "llvm/Passes/PassBuilder.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Testing/Support/Error.h" 23 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 24 #include "gmock/gmock.h" 25 #include "gtest/gtest.h" 26 #include <optional> 27 28 using namespace llvm; 29 using namespace omp; 30 31 // Helper that intends to be functionally equivalent to `VarType VarName = Init` 32 // for an `Init` that returns an `Expected<VarType>` value. It produces an error 33 // message and returns if `Init` didn't produce a valid result. 34 #define ASSERT_EXPECTED_INIT(VarType, VarName, Init) \ 35 auto __Expected##VarName = Init; \ 36 ASSERT_THAT_EXPECTED(__Expected##VarName, Succeeded()); \ 37 VarType VarName = *__Expected##VarName 38 39 // Similar to ASSERT_EXPECTED_INIT, but returns a given expression in case of 40 // error after printing the error message. 41 #define ASSERT_EXPECTED_INIT_RETURN(VarType, VarName, Init, Return) \ 42 auto __Expected##VarName = Init; \ 43 EXPECT_THAT_EXPECTED(__Expected##VarName, Succeeded()); \ 44 if (!__Expected##VarName) \ 45 return Return; \ 46 VarType VarName = *__Expected##VarName 47 48 // Wrapper lambdas to allow using EXPECT*() macros inside of error-returning 49 // callbacks. 50 #define FINICB_WRAPPER(cb) \ 51 [&cb](InsertPointTy IP) -> Error { \ 52 cb(IP); \ 53 return Error::success(); \ 54 } 55 56 #define BODYGENCB_WRAPPER(cb) \ 57 [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error { \ 58 cb(AllocaIP, CodeGenIP); \ 59 return Error::success(); \ 60 } 61 62 #define LOOP_BODYGENCB_WRAPPER(cb) \ 63 [&cb](InsertPointTy CodeGenIP, Value *LC) -> Error { \ 64 cb(CodeGenIP, LC); \ 65 return Error::success(); \ 66 } 67 68 namespace { 69 70 /// Create an instruction that uses the values in \p Values. We use "printf" 71 /// just because it is often used for this purpose in test code, but it is never 72 /// executed here. 73 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr, 74 ArrayRef<Value *> Values) { 75 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 76 77 GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M); 78 Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); 79 Constant *Indices[] = {Zero, Zero}; 80 Constant *FormatStrConst = 81 ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices); 82 83 Function *PrintfDecl = M->getFunction("printf"); 84 if (!PrintfDecl) { 85 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 86 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true); 87 PrintfDecl = Function::Create(Ty, Linkage, "printf", M); 88 } 89 90 SmallVector<Value *, 4> Args; 91 Args.push_back(FormatStrConst); 92 Args.append(Values.begin(), Values.end()); 93 return Builder.CreateCall(PrintfDecl, Args); 94 } 95 96 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit 97 /// order the control flow of \p F. 98 /// 99 /// This is an easy way to verify the branching structure of the CFG without 100 /// checking every branch instruction individually. For the CFG of a 101 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering 102 /// the body, i.e. the DFS order corresponds to the execution order with one 103 /// loop iteration. 104 static testing::AssertionResult 105 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { 106 ArrayRef<BasicBlock *>::iterator It = RefOrder.begin(); 107 ArrayRef<BasicBlock *>::iterator E = RefOrder.end(); 108 109 df_iterator_default_set<BasicBlock *, 16> Visited; 110 auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited); 111 112 BasicBlock *Prev = nullptr; 113 for (BasicBlock *BB : DFS) { 114 if (It != E && BB == *It) { 115 Prev = *It; 116 ++It; 117 } 118 } 119 120 if (It == E) 121 return testing::AssertionSuccess(); 122 if (!Prev) 123 return testing::AssertionFailure() 124 << "Did not find " << (*It)->getName() << " in control flow"; 125 return testing::AssertionFailure() 126 << "Expected " << Prev->getName() << " before " << (*It)->getName() 127 << " in control flow"; 128 } 129 130 /// Verify that blocks in \p RefOrder are in the same relative order in the 131 /// linked lists of blocks in \p F. The linked list may contain additional 132 /// blocks in-between. 133 /// 134 /// While the order in the linked list is not relevant for semantics, keeping 135 /// the order roughly in execution order makes its printout easier to read. 136 static testing::AssertionResult 137 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { 138 ArrayRef<BasicBlock *>::iterator It = RefOrder.begin(); 139 ArrayRef<BasicBlock *>::iterator E = RefOrder.end(); 140 141 BasicBlock *Prev = nullptr; 142 for (BasicBlock &BB : *F) { 143 if (It != E && &BB == *It) { 144 Prev = *It; 145 ++It; 146 } 147 } 148 149 if (It == E) 150 return testing::AssertionSuccess(); 151 if (!Prev) 152 return testing::AssertionFailure() << "Did not find " << (*It)->getName() 153 << " in function " << F->getName(); 154 return testing::AssertionFailure() 155 << "Expected " << Prev->getName() << " before " << (*It)->getName() 156 << " in function " << F->getName(); 157 } 158 159 /// Populate Calls with call instructions calling the function with the given 160 /// FnID from the given function F. 161 static void findCalls(Function *F, omp::RuntimeFunction FnID, 162 OpenMPIRBuilder &OMPBuilder, 163 SmallVectorImpl<CallInst *> &Calls) { 164 Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID); 165 for (BasicBlock &BB : *F) { 166 for (Instruction &I : BB) { 167 auto *Call = dyn_cast<CallInst>(&I); 168 if (Call && Call->getCalledFunction() == Fn) 169 Calls.push_back(Call); 170 } 171 } 172 } 173 174 /// Assuming \p F contains only one call to the function with the given \p FnID, 175 /// return that call. 176 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID, 177 OpenMPIRBuilder &OMPBuilder) { 178 SmallVector<CallInst *, 1> Calls; 179 findCalls(F, FnID, OMPBuilder, Calls); 180 EXPECT_EQ(1u, Calls.size()); 181 if (Calls.size() != 1) 182 return nullptr; 183 return Calls.front(); 184 } 185 186 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) { 187 switch (SchedType & ~omp::OMPScheduleType::ModifierMask) { 188 case omp::OMPScheduleType::BaseDynamicChunked: 189 return omp::OMP_SCHEDULE_Dynamic; 190 case omp::OMPScheduleType::BaseGuidedChunked: 191 return omp::OMP_SCHEDULE_Guided; 192 case omp::OMPScheduleType::BaseAuto: 193 return omp::OMP_SCHEDULE_Auto; 194 case omp::OMPScheduleType::BaseRuntime: 195 return omp::OMP_SCHEDULE_Runtime; 196 default: 197 llvm_unreachable("unknown type for this test"); 198 } 199 } 200 201 class OpenMPIRBuilderTest : public testing::Test { 202 protected: 203 void SetUp() override { 204 M.reset(new Module("MyModule", Ctx)); 205 FunctionType *FTy = 206 FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, 207 /*isVarArg=*/false); 208 F = Function::Create(FTy, Function::ExternalLinkage, "", M.get()); 209 BB = BasicBlock::Create(Ctx, "", F); 210 211 DIBuilder DIB(*M); 212 auto File = DIB.createFile("test.dbg", "/src", std::nullopt, 213 std::optional<StringRef>("/src/test.dbg")); 214 auto CU = 215 DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0); 216 auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray({})); 217 auto SP = DIB.createFunction( 218 CU, "foo", "", File, 1, Type, 1, DINode::FlagZero, 219 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized); 220 F->setSubprogram(SP); 221 auto Scope = DIB.createLexicalBlockFile(SP, File, 0); 222 DIB.finalize(); 223 DL = DILocation::get(Ctx, 3, 7, Scope); 224 } 225 226 void TearDown() override { 227 BB = nullptr; 228 M.reset(); 229 } 230 231 /// Create a function with a simple loop that calls printf using the logical 232 /// loop counter for use with tests that need a CanonicalLoopInfo object. 233 CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL, 234 OpenMPIRBuilder &OMPBuilder, 235 int UseIVBits, 236 CallInst **Call = nullptr, 237 BasicBlock **BodyCode = nullptr) { 238 OMPBuilder.initialize(); 239 F->setName("func"); 240 241 IRBuilder<> Builder(BB); 242 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 243 Value *TripCount = F->getArg(0); 244 245 Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits); 246 Value *CastedTripCount = 247 Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount"); 248 249 auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP, 250 llvm::Value *LC) { 251 Builder.restoreIP(CodeGenIP); 252 if (BodyCode) 253 *BodyCode = Builder.GetInsertBlock(); 254 255 // Add something that consumes the induction variable to the body. 256 CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC}); 257 if (Call) 258 *Call = CallInst; 259 260 return Error::success(); 261 }; 262 263 ASSERT_EXPECTED_INIT_RETURN( 264 CanonicalLoopInfo *, Loop, 265 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount), 266 nullptr); 267 268 // Finalize the function. 269 Builder.restoreIP(Loop->getAfterIP()); 270 Builder.CreateRetVoid(); 271 272 return Loop; 273 } 274 275 LLVMContext Ctx; 276 std::unique_ptr<Module> M; 277 Function *F; 278 BasicBlock *BB; 279 DebugLoc DL; 280 }; 281 282 class OpenMPIRBuilderTestWithParams 283 : public OpenMPIRBuilderTest, 284 public ::testing::WithParamInterface<omp::OMPScheduleType> {}; 285 286 class OpenMPIRBuilderTestWithIVBits 287 : public OpenMPIRBuilderTest, 288 public ::testing::WithParamInterface<int> {}; 289 290 // Returns the value stored in the given allocation. Returns null if the given 291 // value is not a result of an InstTy instruction, if no value is stored or if 292 // there is more than one store. 293 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) { 294 Instruction *Inst = dyn_cast<InstTy>(AllocaValue); 295 if (!Inst) 296 return nullptr; 297 StoreInst *Store = nullptr; 298 for (Use &U : Inst->uses()) { 299 if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) { 300 EXPECT_EQ(Store, nullptr); 301 Store = CandidateStore; 302 } 303 } 304 if (!Store) 305 return nullptr; 306 return Store->getValueOperand(); 307 } 308 309 // Returns the value stored in the aggregate argument of an outlined function, 310 // or nullptr if it is not found. 311 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate, 312 unsigned Idx) { 313 GetElementPtrInst *GEPAtIdx = nullptr; 314 // Find GEP instruction at that index. 315 for (User *Usr : Aggregate->users()) { 316 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr); 317 if (!GEP) 318 continue; 319 320 if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx)) 321 continue; 322 323 EXPECT_EQ(GEPAtIdx, nullptr); 324 GEPAtIdx = GEP; 325 } 326 327 EXPECT_NE(GEPAtIdx, nullptr); 328 EXPECT_EQ(GEPAtIdx->getNumUses(), 1U); 329 330 // Find the value stored to the aggregate. 331 StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin()); 332 Value *StoredAggValue = StoreToAgg->getValueOperand(); 333 334 Value *StoredValue = nullptr; 335 336 // Find the value stored to the value stored in the aggregate. 337 for (User *Usr : StoredAggValue->users()) { 338 StoreInst *Store = dyn_cast<StoreInst>(Usr); 339 if (!Store) 340 continue; 341 342 if (Store->getPointerOperand() != StoredAggValue) 343 continue; 344 345 EXPECT_EQ(StoredValue, nullptr); 346 StoredValue = Store->getValueOperand(); 347 } 348 349 return StoredValue; 350 } 351 352 // Returns the aggregate that the value is originating from. 353 static Value *findAggregateFromValue(Value *V) { 354 // Expects a load instruction that loads from the aggregate. 355 LoadInst *Load = dyn_cast<LoadInst>(V); 356 EXPECT_NE(Load, nullptr); 357 // Find the GEP instruction used in the load instruction. 358 GetElementPtrInst *GEP = 359 dyn_cast<GetElementPtrInst>(Load->getPointerOperand()); 360 EXPECT_NE(GEP, nullptr); 361 // Find the aggregate used in the GEP instruction. 362 Value *Aggregate = GEP->getPointerOperand(); 363 364 return Aggregate; 365 } 366 367 TEST_F(OpenMPIRBuilderTest, CreateBarrier) { 368 OpenMPIRBuilder OMPBuilder(*M); 369 OMPBuilder.initialize(); 370 371 IRBuilder<> Builder(BB); 372 373 ASSERT_THAT_EXPECTED( 374 OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for), 375 Succeeded()); 376 EXPECT_TRUE(M->global_empty()); 377 EXPECT_EQ(M->size(), 1U); 378 EXPECT_EQ(F->size(), 1U); 379 EXPECT_EQ(BB->size(), 0U); 380 381 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 382 ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded()); 383 EXPECT_FALSE(M->global_empty()); 384 EXPECT_EQ(M->size(), 3U); 385 EXPECT_EQ(F->size(), 1U); 386 EXPECT_EQ(BB->size(), 2U); 387 388 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 389 EXPECT_NE(GTID, nullptr); 390 EXPECT_EQ(GTID->arg_size(), 1U); 391 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 392 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 393 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 394 395 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 396 EXPECT_NE(Barrier, nullptr); 397 EXPECT_EQ(Barrier->arg_size(), 2U); 398 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier"); 399 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 400 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 401 402 EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); 403 404 Builder.CreateUnreachable(); 405 EXPECT_FALSE(verifyModule(*M, &errs())); 406 } 407 408 TEST_F(OpenMPIRBuilderTest, CreateCancel) { 409 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 410 OpenMPIRBuilder OMPBuilder(*M); 411 OMPBuilder.initialize(); 412 413 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 414 new UnreachableInst(Ctx, CBB); 415 auto FiniCB = [&](InsertPointTy IP) { 416 ASSERT_NE(IP.getBlock(), nullptr); 417 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 418 BranchInst::Create(CBB, IP.getBlock()); 419 }; 420 OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); 421 422 IRBuilder<> Builder(BB); 423 424 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 425 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP, 426 OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel)); 427 Builder.restoreIP(NewIP); 428 EXPECT_FALSE(M->global_empty()); 429 EXPECT_EQ(M->size(), 4U); 430 EXPECT_EQ(F->size(), 4U); 431 EXPECT_EQ(BB->size(), 4U); 432 433 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 434 EXPECT_NE(GTID, nullptr); 435 EXPECT_EQ(GTID->arg_size(), 1U); 436 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 437 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 438 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 439 440 CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode()); 441 EXPECT_NE(Cancel, nullptr); 442 EXPECT_EQ(Cancel->arg_size(), 3U); 443 EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); 444 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); 445 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); 446 EXPECT_EQ(Cancel->getNumUses(), 1U); 447 Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); 448 EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); 449 EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock()); 450 EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); 451 CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front()); 452 EXPECT_NE(GTID1, nullptr); 453 EXPECT_EQ(GTID1->arg_size(), 1U); 454 EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 455 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); 456 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); 457 CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode()); 458 EXPECT_NE(Barrier, nullptr); 459 EXPECT_EQ(Barrier->arg_size(), 2U); 460 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 461 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 462 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 463 EXPECT_EQ(Barrier->getNumUses(), 0U); 464 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 465 1U); 466 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); 467 468 EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID); 469 470 OMPBuilder.popFinalizationCB(); 471 472 Builder.CreateUnreachable(); 473 EXPECT_FALSE(verifyModule(*M, &errs())); 474 } 475 476 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { 477 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 478 OpenMPIRBuilder OMPBuilder(*M); 479 OMPBuilder.initialize(); 480 481 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 482 new UnreachableInst(Ctx, CBB); 483 auto FiniCB = [&](InsertPointTy IP) { 484 ASSERT_NE(IP.getBlock(), nullptr); 485 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 486 BranchInst::Create(CBB, IP.getBlock()); 487 }; 488 OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); 489 490 IRBuilder<> Builder(BB); 491 492 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 493 ASSERT_EXPECTED_INIT( 494 OpenMPIRBuilder::InsertPointTy, NewIP, 495 OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel)); 496 Builder.restoreIP(NewIP); 497 EXPECT_FALSE(M->global_empty()); 498 EXPECT_EQ(M->size(), 4U); 499 EXPECT_EQ(F->size(), 7U); 500 EXPECT_EQ(BB->size(), 1U); 501 ASSERT_TRUE(isa<BranchInst>(BB->getTerminator())); 502 ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U); 503 BB = BB->getTerminator()->getSuccessor(0); 504 EXPECT_EQ(BB->size(), 4U); 505 506 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 507 EXPECT_NE(GTID, nullptr); 508 EXPECT_EQ(GTID->arg_size(), 1U); 509 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 510 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 511 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 512 513 CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode()); 514 EXPECT_NE(Cancel, nullptr); 515 EXPECT_EQ(Cancel->arg_size(), 3U); 516 EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); 517 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); 518 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); 519 EXPECT_EQ(Cancel->getNumUses(), 1U); 520 Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); 521 EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); 522 EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); 523 EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), 524 NewIP.getBlock()); 525 EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); 526 CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front()); 527 EXPECT_NE(GTID1, nullptr); 528 EXPECT_EQ(GTID1->arg_size(), 1U); 529 EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 530 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); 531 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); 532 CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode()); 533 EXPECT_NE(Barrier, nullptr); 534 EXPECT_EQ(Barrier->arg_size(), 2U); 535 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 536 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 537 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 538 EXPECT_EQ(Barrier->getNumUses(), 0U); 539 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 540 1U); 541 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); 542 543 EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID); 544 545 OMPBuilder.popFinalizationCB(); 546 547 Builder.CreateUnreachable(); 548 EXPECT_FALSE(verifyModule(*M, &errs())); 549 } 550 551 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { 552 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 553 OpenMPIRBuilder OMPBuilder(*M); 554 OMPBuilder.initialize(); 555 556 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 557 new UnreachableInst(Ctx, CBB); 558 auto FiniCB = [&](InsertPointTy IP) { 559 ASSERT_NE(IP.getBlock(), nullptr); 560 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 561 BranchInst::Create(CBB, IP.getBlock()); 562 }; 563 OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); 564 565 IRBuilder<> Builder(BB); 566 567 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 568 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP, 569 OMPBuilder.createBarrier(Loc, OMPD_for)); 570 Builder.restoreIP(NewIP); 571 EXPECT_FALSE(M->global_empty()); 572 EXPECT_EQ(M->size(), 3U); 573 EXPECT_EQ(F->size(), 4U); 574 EXPECT_EQ(BB->size(), 4U); 575 576 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 577 EXPECT_NE(GTID, nullptr); 578 EXPECT_EQ(GTID->arg_size(), 1U); 579 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 580 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 581 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 582 583 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 584 EXPECT_NE(Barrier, nullptr); 585 EXPECT_EQ(Barrier->arg_size(), 2U); 586 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 587 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 588 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 589 EXPECT_EQ(Barrier->getNumUses(), 1U); 590 Instruction *BarrierBBTI = Barrier->getParent()->getTerminator(); 591 EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U); 592 EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock()); 593 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); 594 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 595 1U); 596 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), 597 CBB); 598 599 EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); 600 601 OMPBuilder.popFinalizationCB(); 602 603 Builder.CreateUnreachable(); 604 EXPECT_FALSE(verifyModule(*M, &errs())); 605 } 606 607 TEST_F(OpenMPIRBuilderTest, DbgLoc) { 608 OpenMPIRBuilder OMPBuilder(*M); 609 OMPBuilder.initialize(); 610 F->setName("func"); 611 612 IRBuilder<> Builder(BB); 613 614 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 615 ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded()); 616 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 617 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 618 EXPECT_EQ(GTID->getDebugLoc(), DL); 619 EXPECT_EQ(Barrier->getDebugLoc(), DL); 620 EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0))); 621 if (!isa<GlobalVariable>(Barrier->getOperand(0))) 622 return; 623 GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0)); 624 EXPECT_TRUE(Ident->hasInitializer()); 625 if (!Ident->hasInitializer()) 626 return; 627 Constant *Initializer = Ident->getInitializer(); 628 EXPECT_TRUE( 629 isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts())); 630 GlobalVariable *SrcStrGlob = 631 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 632 if (!SrcStrGlob) 633 return; 634 EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer())); 635 ConstantDataArray *SrcSrc = 636 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 637 if (!SrcSrc) 638 return; 639 EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;"); 640 } 641 642 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { 643 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 644 std::string oldDLStr = M->getDataLayoutStr(); 645 M->setDataLayout( 646 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" 647 "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" 648 "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); 649 OpenMPIRBuilder OMPBuilder(*M); 650 OMPBuilder.Config.IsTargetDevice = true; 651 OMPBuilder.initialize(); 652 F->setName("func"); 653 IRBuilder<> Builder(BB); 654 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 655 Builder.CreateBr(EnterBB); 656 Builder.SetInsertPoint(EnterBB); 657 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 658 659 AllocaInst *PrivAI = nullptr; 660 661 unsigned NumBodiesGenerated = 0; 662 unsigned NumPrivatizedVars = 0; 663 unsigned NumFinalizationPoints = 0; 664 665 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 666 ++NumBodiesGenerated; 667 668 Builder.restoreIP(AllocaIP); 669 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 670 Builder.CreateStore(F->arg_begin(), PrivAI); 671 672 Builder.restoreIP(CodeGenIP); 673 Value *PrivLoad = 674 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 675 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 676 Instruction *ThenTerm, *ElseTerm; 677 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 678 &ThenTerm, &ElseTerm); 679 return Error::success(); 680 }; 681 682 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 683 Value &Orig, Value &Inner, 684 Value *&ReplacementValue) -> InsertPointTy { 685 ++NumPrivatizedVars; 686 687 if (!isa<AllocaInst>(Orig)) { 688 EXPECT_EQ(&Orig, F->arg_begin()); 689 ReplacementValue = &Inner; 690 return CodeGenIP; 691 } 692 693 // Since the original value is an allocation, it has a pointer type and 694 // therefore no additional wrapping should happen. 695 EXPECT_EQ(&Orig, &Inner); 696 697 // Trivial copy (=firstprivate). 698 Builder.restoreIP(AllocaIP); 699 Type *VTy = ReplacementValue->getType(); 700 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 701 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 702 Builder.restoreIP(CodeGenIP); 703 Builder.CreateStore(V, ReplacementValue); 704 return CodeGenIP; 705 }; 706 707 auto FiniCB = [&](InsertPointTy CodeGenIP) { 708 ++NumFinalizationPoints; 709 return Error::success(); 710 }; 711 712 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 713 F->getEntryBlock().getFirstInsertionPt()); 714 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 715 OMPBuilder.createParallel( 716 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, 717 nullptr, OMP_PROC_BIND_default, false)); 718 719 EXPECT_EQ(NumBodiesGenerated, 1U); 720 EXPECT_EQ(NumPrivatizedVars, 1U); 721 EXPECT_EQ(NumFinalizationPoints, 1U); 722 723 Builder.restoreIP(AfterIP); 724 Builder.CreateRetVoid(); 725 726 OMPBuilder.finalize(); 727 Function *OutlinedFn = PrivAI->getFunction(); 728 EXPECT_FALSE(verifyModule(*M, &errs())); 729 EXPECT_NE(OutlinedFn, F); 730 EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); 731 EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); 732 EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); 733 734 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 735 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 736 // Make sure that arguments are pointers in 0 address address space 737 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), 738 PointerType::get(M->getContext(), 0)); 739 EXPECT_EQ(OutlinedFn->getArg(1)->getType(), 740 PointerType::get(M->getContext(), 0)); 741 EXPECT_EQ(OutlinedFn->getArg(2)->getType(), 742 PointerType::get(M->getContext(), 0)); 743 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 744 EXPECT_EQ(OutlinedFn->getNumUses(), 1U); 745 User *Usr = OutlinedFn->user_back(); 746 ASSERT_TRUE(isa<CallInst>(Usr)); 747 CallInst *Parallel51CI = dyn_cast<CallInst>(Usr); 748 ASSERT_NE(Parallel51CI, nullptr); 749 750 EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51"); 751 EXPECT_EQ(Parallel51CI->arg_size(), 9U); 752 EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn); 753 EXPECT_TRUE( 754 isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts())); 755 EXPECT_EQ(Parallel51CI, Usr); 756 M->setDataLayout(oldDLStr); 757 } 758 759 TEST_F(OpenMPIRBuilderTest, ParallelSimple) { 760 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 761 OpenMPIRBuilder OMPBuilder(*M); 762 OMPBuilder.Config.IsTargetDevice = false; 763 OMPBuilder.initialize(); 764 F->setName("func"); 765 IRBuilder<> Builder(BB); 766 767 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 768 Builder.CreateBr(EnterBB); 769 Builder.SetInsertPoint(EnterBB); 770 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 771 772 AllocaInst *PrivAI = nullptr; 773 774 unsigned NumBodiesGenerated = 0; 775 unsigned NumPrivatizedVars = 0; 776 unsigned NumFinalizationPoints = 0; 777 778 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 779 ++NumBodiesGenerated; 780 781 Builder.restoreIP(AllocaIP); 782 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 783 Builder.CreateStore(F->arg_begin(), PrivAI); 784 785 Builder.restoreIP(CodeGenIP); 786 Value *PrivLoad = 787 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 788 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 789 Instruction *ThenTerm, *ElseTerm; 790 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 791 &ThenTerm, &ElseTerm); 792 return Error::success(); 793 }; 794 795 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 796 Value &Orig, Value &Inner, 797 Value *&ReplacementValue) -> InsertPointTy { 798 ++NumPrivatizedVars; 799 800 if (!isa<AllocaInst>(Orig)) { 801 EXPECT_EQ(&Orig, F->arg_begin()); 802 ReplacementValue = &Inner; 803 return CodeGenIP; 804 } 805 806 // Since the original value is an allocation, it has a pointer type and 807 // therefore no additional wrapping should happen. 808 EXPECT_EQ(&Orig, &Inner); 809 810 // Trivial copy (=firstprivate). 811 Builder.restoreIP(AllocaIP); 812 Type *VTy = ReplacementValue->getType(); 813 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 814 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 815 Builder.restoreIP(CodeGenIP); 816 Builder.CreateStore(V, ReplacementValue); 817 return CodeGenIP; 818 }; 819 820 auto FiniCB = [&](InsertPointTy CodeGenIP) { 821 ++NumFinalizationPoints; 822 return Error::success(); 823 }; 824 825 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 826 F->getEntryBlock().getFirstInsertionPt()); 827 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 828 OMPBuilder.createParallel( 829 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, 830 nullptr, OMP_PROC_BIND_default, false)); 831 EXPECT_EQ(NumBodiesGenerated, 1U); 832 EXPECT_EQ(NumPrivatizedVars, 1U); 833 EXPECT_EQ(NumFinalizationPoints, 1U); 834 835 Builder.restoreIP(AfterIP); 836 Builder.CreateRetVoid(); 837 838 OMPBuilder.finalize(); 839 840 EXPECT_NE(PrivAI, nullptr); 841 Function *OutlinedFn = PrivAI->getFunction(); 842 EXPECT_NE(F, OutlinedFn); 843 EXPECT_FALSE(verifyModule(*M, &errs())); 844 EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); 845 EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); 846 EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); 847 848 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 849 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 850 851 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 852 EXPECT_EQ(OutlinedFn->getNumUses(), 1U); 853 User *Usr = OutlinedFn->user_back(); 854 ASSERT_TRUE(isa<CallInst>(Usr)); 855 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 856 ASSERT_NE(ForkCI, nullptr); 857 858 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 859 EXPECT_EQ(ForkCI->arg_size(), 4U); 860 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 861 EXPECT_EQ(ForkCI->getArgOperand(1), 862 ConstantInt::get(Type::getInt32Ty(Ctx), 1U)); 863 EXPECT_EQ(ForkCI, Usr); 864 Value *StoredValue = 865 findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0); 866 EXPECT_EQ(StoredValue, F->arg_begin()); 867 } 868 869 TEST_F(OpenMPIRBuilderTest, ParallelNested) { 870 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 871 OpenMPIRBuilder OMPBuilder(*M); 872 OMPBuilder.Config.IsTargetDevice = false; 873 OMPBuilder.initialize(); 874 F->setName("func"); 875 IRBuilder<> Builder(BB); 876 877 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 878 Builder.CreateBr(EnterBB); 879 Builder.SetInsertPoint(EnterBB); 880 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 881 882 unsigned NumInnerBodiesGenerated = 0; 883 unsigned NumOuterBodiesGenerated = 0; 884 unsigned NumFinalizationPoints = 0; 885 886 auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 887 ++NumInnerBodiesGenerated; 888 return Error::success(); 889 }; 890 891 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 892 Value &Orig, Value &Inner, 893 Value *&ReplacementValue) -> InsertPointTy { 894 // Trivial copy (=firstprivate). 895 Builder.restoreIP(AllocaIP); 896 Type *VTy = ReplacementValue->getType(); 897 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 898 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 899 Builder.restoreIP(CodeGenIP); 900 Builder.CreateStore(V, ReplacementValue); 901 return CodeGenIP; 902 }; 903 904 auto FiniCB = [&](InsertPointTy CodeGenIP) { 905 ++NumFinalizationPoints; 906 return Error::success(); 907 }; 908 909 auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 910 ++NumOuterBodiesGenerated; 911 Builder.restoreIP(CodeGenIP); 912 BasicBlock *CGBB = CodeGenIP.getBlock(); 913 BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); 914 CGBB->getTerminator()->eraseFromParent(); 915 916 ASSERT_EXPECTED_INIT( 917 OpenMPIRBuilder::InsertPointTy, AfterIP, 918 OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP, 919 InnerBodyGenCB, PrivCB, FiniCB, nullptr, 920 nullptr, OMP_PROC_BIND_default, false)); 921 922 Builder.restoreIP(AfterIP); 923 Builder.CreateBr(NewBB); 924 }; 925 926 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 927 F->getEntryBlock().getFirstInsertionPt()); 928 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 929 OMPBuilder.createParallel( 930 Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB), 931 PrivCB, FiniCB, nullptr, nullptr, 932 OMP_PROC_BIND_default, false)); 933 934 EXPECT_EQ(NumInnerBodiesGenerated, 1U); 935 EXPECT_EQ(NumOuterBodiesGenerated, 1U); 936 EXPECT_EQ(NumFinalizationPoints, 2U); 937 938 Builder.restoreIP(AfterIP); 939 Builder.CreateRetVoid(); 940 941 OMPBuilder.finalize(); 942 943 EXPECT_EQ(M->size(), 5U); 944 for (Function &OutlinedFn : *M) { 945 if (F == &OutlinedFn || OutlinedFn.isDeclaration()) 946 continue; 947 EXPECT_FALSE(verifyModule(*M, &errs())); 948 EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); 949 EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); 950 EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); 951 952 EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); 953 EXPECT_EQ(OutlinedFn.arg_size(), 2U); 954 955 EXPECT_EQ(OutlinedFn.getNumUses(), 1U); 956 User *Usr = OutlinedFn.user_back(); 957 ASSERT_TRUE(isa<CallInst>(Usr)); 958 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 959 ASSERT_NE(ForkCI, nullptr); 960 961 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 962 EXPECT_EQ(ForkCI->arg_size(), 3U); 963 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 964 EXPECT_EQ(ForkCI->getArgOperand(1), 965 ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); 966 EXPECT_EQ(ForkCI, Usr); 967 } 968 } 969 970 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { 971 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 972 OpenMPIRBuilder OMPBuilder(*M); 973 OMPBuilder.Config.IsTargetDevice = false; 974 OMPBuilder.initialize(); 975 F->setName("func"); 976 IRBuilder<> Builder(BB); 977 978 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 979 Builder.CreateBr(EnterBB); 980 Builder.SetInsertPoint(EnterBB); 981 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 982 983 unsigned NumInnerBodiesGenerated = 0; 984 unsigned NumOuterBodiesGenerated = 0; 985 unsigned NumFinalizationPoints = 0; 986 987 auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 988 ++NumInnerBodiesGenerated; 989 return Error::success(); 990 }; 991 992 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 993 Value &Orig, Value &Inner, 994 Value *&ReplacementValue) -> InsertPointTy { 995 // Trivial copy (=firstprivate). 996 Builder.restoreIP(AllocaIP); 997 Type *VTy = ReplacementValue->getType(); 998 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 999 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 1000 Builder.restoreIP(CodeGenIP); 1001 Builder.CreateStore(V, ReplacementValue); 1002 return CodeGenIP; 1003 }; 1004 1005 auto FiniCB = [&](InsertPointTy CodeGenIP) { 1006 ++NumFinalizationPoints; 1007 return Error::success(); 1008 }; 1009 1010 auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1011 ++NumOuterBodiesGenerated; 1012 Builder.restoreIP(CodeGenIP); 1013 BasicBlock *CGBB = CodeGenIP.getBlock(); 1014 BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint()); 1015 BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt()); 1016 CGBB->getTerminator()->eraseFromParent(); 1017 ; 1018 NewBB1->getTerminator()->eraseFromParent(); 1019 ; 1020 1021 ASSERT_EXPECTED_INIT( 1022 OpenMPIRBuilder::InsertPointTy, AfterIP1, 1023 OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP, 1024 InnerBodyGenCB, PrivCB, FiniCB, nullptr, 1025 nullptr, OMP_PROC_BIND_default, false)); 1026 1027 Builder.restoreIP(AfterIP1); 1028 Builder.CreateBr(NewBB1); 1029 1030 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2, 1031 OMPBuilder.createParallel( 1032 InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, 1033 InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, 1034 OMP_PROC_BIND_default, false)); 1035 1036 Builder.restoreIP(AfterIP2); 1037 Builder.CreateBr(NewBB2); 1038 }; 1039 1040 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1041 F->getEntryBlock().getFirstInsertionPt()); 1042 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 1043 OMPBuilder.createParallel( 1044 Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB), 1045 PrivCB, FiniCB, nullptr, nullptr, 1046 OMP_PROC_BIND_default, false)); 1047 1048 EXPECT_EQ(NumInnerBodiesGenerated, 2U); 1049 EXPECT_EQ(NumOuterBodiesGenerated, 1U); 1050 EXPECT_EQ(NumFinalizationPoints, 3U); 1051 1052 Builder.restoreIP(AfterIP); 1053 Builder.CreateRetVoid(); 1054 1055 OMPBuilder.finalize(); 1056 1057 EXPECT_EQ(M->size(), 6U); 1058 for (Function &OutlinedFn : *M) { 1059 if (F == &OutlinedFn || OutlinedFn.isDeclaration()) 1060 continue; 1061 EXPECT_FALSE(verifyModule(*M, &errs())); 1062 EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); 1063 EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); 1064 EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); 1065 1066 EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); 1067 EXPECT_EQ(OutlinedFn.arg_size(), 2U); 1068 1069 unsigned NumAllocas = 0; 1070 for (Instruction &I : instructions(OutlinedFn)) 1071 NumAllocas += isa<AllocaInst>(I); 1072 EXPECT_EQ(NumAllocas, 1U); 1073 1074 EXPECT_EQ(OutlinedFn.getNumUses(), 1U); 1075 User *Usr = OutlinedFn.user_back(); 1076 ASSERT_TRUE(isa<CallInst>(Usr)); 1077 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 1078 ASSERT_NE(ForkCI, nullptr); 1079 1080 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 1081 EXPECT_EQ(ForkCI->arg_size(), 3U); 1082 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 1083 EXPECT_EQ(ForkCI->getArgOperand(1), 1084 ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); 1085 EXPECT_EQ(ForkCI, Usr); 1086 } 1087 } 1088 1089 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { 1090 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1091 OpenMPIRBuilder OMPBuilder(*M); 1092 OMPBuilder.Config.IsTargetDevice = false; 1093 OMPBuilder.initialize(); 1094 F->setName("func"); 1095 IRBuilder<> Builder(BB); 1096 1097 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 1098 Builder.CreateBr(EnterBB); 1099 Builder.SetInsertPoint(EnterBB); 1100 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1101 1102 AllocaInst *PrivAI = nullptr; 1103 1104 unsigned NumBodiesGenerated = 0; 1105 unsigned NumPrivatizedVars = 0; 1106 unsigned NumFinalizationPoints = 0; 1107 1108 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1109 ++NumBodiesGenerated; 1110 1111 Builder.restoreIP(AllocaIP); 1112 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 1113 Builder.CreateStore(F->arg_begin(), PrivAI); 1114 1115 Builder.restoreIP(CodeGenIP); 1116 Value *PrivLoad = 1117 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 1118 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 1119 Instruction *ThenTerm, *ElseTerm; 1120 SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm, 1121 &ElseTerm); 1122 return Error::success(); 1123 }; 1124 1125 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1126 Value &Orig, Value &Inner, 1127 Value *&ReplacementValue) -> InsertPointTy { 1128 ++NumPrivatizedVars; 1129 1130 if (!isa<AllocaInst>(Orig)) { 1131 EXPECT_EQ(&Orig, F->arg_begin()); 1132 ReplacementValue = &Inner; 1133 return CodeGenIP; 1134 } 1135 1136 // Since the original value is an allocation, it has a pointer type and 1137 // therefore no additional wrapping should happen. 1138 EXPECT_EQ(&Orig, &Inner); 1139 1140 // Trivial copy (=firstprivate). 1141 Builder.restoreIP(AllocaIP); 1142 Type *VTy = ReplacementValue->getType(); 1143 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 1144 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 1145 Builder.restoreIP(CodeGenIP); 1146 Builder.CreateStore(V, ReplacementValue); 1147 return CodeGenIP; 1148 }; 1149 1150 auto FiniCB = [&](InsertPointTy CodeGenIP) { 1151 ++NumFinalizationPoints; 1152 // No destructors. 1153 return Error::success(); 1154 }; 1155 1156 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1157 F->getEntryBlock().getFirstInsertionPt()); 1158 ASSERT_EXPECTED_INIT( 1159 OpenMPIRBuilder::InsertPointTy, AfterIP, 1160 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1161 Builder.CreateIsNotNull(F->arg_begin()), 1162 nullptr, OMP_PROC_BIND_default, false)); 1163 1164 EXPECT_EQ(NumBodiesGenerated, 1U); 1165 EXPECT_EQ(NumPrivatizedVars, 1U); 1166 EXPECT_EQ(NumFinalizationPoints, 1U); 1167 1168 Builder.restoreIP(AfterIP); 1169 Builder.CreateRetVoid(); 1170 OMPBuilder.finalize(); 1171 1172 EXPECT_NE(PrivAI, nullptr); 1173 Function *OutlinedFn = PrivAI->getFunction(); 1174 EXPECT_NE(F, OutlinedFn); 1175 EXPECT_FALSE(verifyModule(*M, &errs())); 1176 1177 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 1178 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 1179 1180 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 1181 ASSERT_EQ(OutlinedFn->getNumUses(), 1U); 1182 1183 CallInst *ForkCI = nullptr; 1184 for (User *Usr : OutlinedFn->users()) { 1185 ASSERT_TRUE(isa<CallInst>(Usr)); 1186 ForkCI = cast<CallInst>(Usr); 1187 } 1188 1189 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if"); 1190 EXPECT_EQ(ForkCI->arg_size(), 5U); 1191 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 1192 EXPECT_EQ(ForkCI->getArgOperand(1), 1193 ConstantInt::get(Type::getInt32Ty(Ctx), 1)); 1194 EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx)); 1195 } 1196 1197 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { 1198 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1199 OpenMPIRBuilder OMPBuilder(*M); 1200 OMPBuilder.Config.IsTargetDevice = false; 1201 OMPBuilder.initialize(); 1202 F->setName("func"); 1203 IRBuilder<> Builder(BB); 1204 1205 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 1206 Builder.CreateBr(EnterBB); 1207 Builder.SetInsertPoint(EnterBB); 1208 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1209 1210 unsigned NumBodiesGenerated = 0; 1211 unsigned NumPrivatizedVars = 0; 1212 unsigned NumFinalizationPoints = 0; 1213 1214 CallInst *CheckedBarrier = nullptr; 1215 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1216 ++NumBodiesGenerated; 1217 1218 Builder.restoreIP(CodeGenIP); 1219 1220 // Create three barriers, two cancel barriers but only one checked. 1221 Function *CBFn, *BFn; 1222 1223 ASSERT_EXPECTED_INIT( 1224 OpenMPIRBuilder::InsertPointTy, BarrierIP1, 1225 OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel)); 1226 Builder.restoreIP(BarrierIP1); 1227 1228 CBFn = M->getFunction("__kmpc_cancel_barrier"); 1229 BFn = M->getFunction("__kmpc_barrier"); 1230 ASSERT_NE(CBFn, nullptr); 1231 ASSERT_EQ(BFn, nullptr); 1232 ASSERT_EQ(CBFn->getNumUses(), 1U); 1233 ASSERT_TRUE(isa<CallInst>(CBFn->user_back())); 1234 ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U); 1235 CheckedBarrier = cast<CallInst>(CBFn->user_back()); 1236 1237 ASSERT_EXPECTED_INIT( 1238 OpenMPIRBuilder::InsertPointTy, BarrierIP2, 1239 OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true)); 1240 Builder.restoreIP(BarrierIP2); 1241 CBFn = M->getFunction("__kmpc_cancel_barrier"); 1242 BFn = M->getFunction("__kmpc_barrier"); 1243 ASSERT_NE(CBFn, nullptr); 1244 ASSERT_NE(BFn, nullptr); 1245 ASSERT_EQ(CBFn->getNumUses(), 1U); 1246 ASSERT_EQ(BFn->getNumUses(), 1U); 1247 ASSERT_TRUE(isa<CallInst>(BFn->user_back())); 1248 ASSERT_EQ(BFn->user_back()->getNumUses(), 0U); 1249 1250 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, BarrierIP3, 1251 OMPBuilder.createBarrier(Builder.saveIP(), 1252 OMPD_parallel, false, false)); 1253 Builder.restoreIP(BarrierIP3); 1254 ASSERT_EQ(CBFn->getNumUses(), 2U); 1255 ASSERT_EQ(BFn->getNumUses(), 1U); 1256 ASSERT_TRUE(CBFn->user_back() != CheckedBarrier); 1257 ASSERT_TRUE(isa<CallInst>(CBFn->user_back())); 1258 ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U); 1259 }; 1260 1261 auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &, 1262 Value *&) -> InsertPointTy { 1263 ++NumPrivatizedVars; 1264 llvm_unreachable("No privatization callback call expected!"); 1265 }; 1266 1267 FunctionType *FakeDestructorTy = 1268 FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, 1269 /*isVarArg=*/false); 1270 auto *FakeDestructor = Function::Create( 1271 FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get()); 1272 1273 auto FiniCB = [&](InsertPointTy IP) { 1274 ++NumFinalizationPoints; 1275 Builder.restoreIP(IP); 1276 Builder.CreateCall(FakeDestructor, 1277 {Builder.getInt32(NumFinalizationPoints)}); 1278 return Error::success(); 1279 }; 1280 1281 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1282 F->getEntryBlock().getFirstInsertionPt()); 1283 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 1284 OMPBuilder.createParallel( 1285 Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB, 1286 FiniCB, Builder.CreateIsNotNull(F->arg_begin()), 1287 nullptr, OMP_PROC_BIND_default, true)); 1288 1289 EXPECT_EQ(NumBodiesGenerated, 1U); 1290 EXPECT_EQ(NumPrivatizedVars, 0U); 1291 EXPECT_EQ(NumFinalizationPoints, 2U); 1292 EXPECT_EQ(FakeDestructor->getNumUses(), 2U); 1293 1294 Builder.restoreIP(AfterIP); 1295 Builder.CreateRetVoid(); 1296 OMPBuilder.finalize(); 1297 1298 EXPECT_FALSE(verifyModule(*M, &errs())); 1299 1300 BasicBlock *ExitBB = nullptr; 1301 for (const User *Usr : FakeDestructor->users()) { 1302 const CallInst *CI = dyn_cast<CallInst>(Usr); 1303 ASSERT_EQ(CI->getCalledFunction(), FakeDestructor); 1304 ASSERT_TRUE(isa<BranchInst>(CI->getNextNode())); 1305 ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); 1306 if (ExitBB) 1307 ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB); 1308 else 1309 ExitBB = CI->getNextNode()->getSuccessor(0); 1310 ASSERT_EQ(ExitBB->size(), 1U); 1311 if (!isa<ReturnInst>(ExitBB->front())) { 1312 ASSERT_TRUE(isa<BranchInst>(ExitBB->front())); 1313 ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U); 1314 ASSERT_TRUE(isa<ReturnInst>( 1315 cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front())); 1316 } 1317 } 1318 } 1319 1320 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { 1321 OpenMPIRBuilder OMPBuilder(*M); 1322 OMPBuilder.Config.IsTargetDevice = false; 1323 OMPBuilder.initialize(); 1324 F->setName("func"); 1325 IRBuilder<> Builder(BB); 1326 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1327 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1328 1329 Type *I32Ty = Type::getInt32Ty(M->getContext()); 1330 Type *PtrTy = PointerType::get(M->getContext(), 0); 1331 Type *StructTy = StructType::get(I32Ty, PtrTy); 1332 Type *VoidTy = Type::getVoidTy(M->getContext()); 1333 FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty); 1334 FunctionCallee TakeI32Func = 1335 M->getOrInsertFunction("take_i32", VoidTy, I32Ty); 1336 FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy); 1337 FunctionCallee TakeI32PtrFunc = 1338 M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy); 1339 FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy); 1340 FunctionCallee TakeStructFunc = 1341 M->getOrInsertFunction("take_struct", VoidTy, StructTy); 1342 FunctionCallee RetStructPtrFunc = 1343 M->getOrInsertFunction("ret_structptr", PtrTy); 1344 FunctionCallee TakeStructPtrFunc = 1345 M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy); 1346 Value *I32Val = Builder.CreateCall(RetI32Func); 1347 Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc); 1348 Value *StructVal = Builder.CreateCall(RetStructFunc); 1349 Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); 1350 1351 Instruction *Internal; 1352 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1353 IRBuilder<>::InsertPointGuard Guard(Builder); 1354 Builder.restoreIP(CodeGenIP); 1355 Internal = Builder.CreateCall(TakeI32Func, I32Val); 1356 Builder.CreateCall(TakeI32PtrFunc, I32PtrVal); 1357 Builder.CreateCall(TakeStructFunc, StructVal); 1358 Builder.CreateCall(TakeStructPtrFunc, StructPtrVal); 1359 return Error::success(); 1360 }; 1361 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, 1362 Value &Inner, Value *&ReplacementValue) { 1363 ReplacementValue = &Inner; 1364 return CodeGenIP; 1365 }; 1366 auto FiniCB = [](InsertPointTy) { return Error::success(); }; 1367 1368 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1369 F->getEntryBlock().getFirstInsertionPt()); 1370 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 1371 OMPBuilder.createParallel( 1372 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, 1373 nullptr, OMP_PROC_BIND_default, false)); 1374 Builder.restoreIP(AfterIP); 1375 Builder.CreateRetVoid(); 1376 1377 OMPBuilder.finalize(); 1378 1379 EXPECT_FALSE(verifyModule(*M, &errs())); 1380 Function *OutlinedFn = Internal->getFunction(); 1381 1382 Type *Arg2Type = OutlinedFn->getArg(2)->getType(); 1383 EXPECT_TRUE(Arg2Type->isPointerTy()); 1384 } 1385 1386 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) { 1387 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1388 OpenMPIRBuilder OMPBuilder(*M); 1389 OMPBuilder.initialize(); 1390 IRBuilder<> Builder(BB); 1391 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1392 Value *TripCount = F->getArg(0); 1393 1394 unsigned NumBodiesGenerated = 0; 1395 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { 1396 NumBodiesGenerated += 1; 1397 1398 Builder.restoreIP(CodeGenIP); 1399 1400 Value *Cmp = Builder.CreateICmpEQ(LC, TripCount); 1401 Instruction *ThenTerm, *ElseTerm; 1402 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 1403 &ThenTerm, &ElseTerm); 1404 return Error::success(); 1405 }; 1406 1407 ASSERT_EXPECTED_INIT( 1408 CanonicalLoopInfo *, Loop, 1409 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount)); 1410 1411 Builder.restoreIP(Loop->getAfterIP()); 1412 ReturnInst *RetInst = Builder.CreateRetVoid(); 1413 OMPBuilder.finalize(); 1414 1415 Loop->assertOK(); 1416 EXPECT_FALSE(verifyModule(*M, &errs())); 1417 1418 EXPECT_EQ(NumBodiesGenerated, 1U); 1419 1420 // Verify control flow structure (in addition to Loop->assertOK()). 1421 EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock()); 1422 EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock()); 1423 1424 Instruction *IndVar = Loop->getIndVar(); 1425 EXPECT_TRUE(isa<PHINode>(IndVar)); 1426 EXPECT_EQ(IndVar->getType(), TripCount->getType()); 1427 EXPECT_EQ(IndVar->getParent(), Loop->getHeader()); 1428 1429 EXPECT_EQ(Loop->getTripCount(), TripCount); 1430 1431 BasicBlock *Body = Loop->getBody(); 1432 Instruction *CmpInst = &Body->front(); 1433 EXPECT_TRUE(isa<ICmpInst>(CmpInst)); 1434 EXPECT_EQ(CmpInst->getOperand(0), IndVar); 1435 1436 BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor(); 1437 EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) { 1438 return SuccBB->getSingleSuccessor() == LatchPred; 1439 })); 1440 1441 EXPECT_EQ(&Loop->getAfter()->front(), RetInst); 1442 } 1443 1444 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) { 1445 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1446 OpenMPIRBuilder OMPBuilder(*M); 1447 OMPBuilder.initialize(); 1448 IRBuilder<> Builder(BB); 1449 1450 // Check the trip count is computed correctly. We generate the canonical loop 1451 // but rely on the IRBuilder's constant folder to compute the final result 1452 // since all inputs are constant. To verify overflow situations, limit the 1453 // trip count / loop counter widths to 16 bits. 1454 auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step, 1455 bool IsSigned, bool InclusiveStop) -> int64_t { 1456 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1457 Type *LCTy = Type::getInt16Ty(Ctx); 1458 Value *StartVal = ConstantInt::get(LCTy, Start); 1459 Value *StopVal = ConstantInt::get(LCTy, Stop); 1460 Value *StepVal = ConstantInt::get(LCTy, Step); 1461 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { 1462 return Error::success(); 1463 }; 1464 ASSERT_EXPECTED_INIT_RETURN( 1465 CanonicalLoopInfo *, Loop, 1466 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, 1467 StepVal, IsSigned, InclusiveStop), 1468 -1); 1469 Loop->assertOK(); 1470 Builder.restoreIP(Loop->getAfterIP()); 1471 Value *TripCount = Loop->getTripCount(); 1472 return cast<ConstantInt>(TripCount)->getValue().getZExtValue(); 1473 }; 1474 1475 EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0); 1476 EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1); 1477 EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42); 1478 EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21); 1479 EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21); 1480 EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1); 1481 EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2); 1482 EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3); 1483 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF); 1484 EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0); 1485 EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1); 1486 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100); 1487 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1); 1488 1489 EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2); 1490 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2); 1491 EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1); 1492 EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1); 1493 EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF); 1494 EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000); 1495 1496 EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0); 1497 EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0); 1498 EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3); 1499 EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4); 1500 EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1); 1501 EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1); 1502 EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2); 1503 1504 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000); 1505 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001); 1506 EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF); 1507 EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF); 1508 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2); 1509 EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF); 1510 EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000); 1511 EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800); 1512 EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF); 1513 EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1); 1514 EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2); 1515 1516 // Finalize the function and verify it. 1517 Builder.CreateRetVoid(); 1518 OMPBuilder.finalize(); 1519 EXPECT_FALSE(verifyModule(*M, &errs())); 1520 } 1521 1522 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) { 1523 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1524 OpenMPIRBuilder OMPBuilder(*M); 1525 OMPBuilder.initialize(); 1526 F->setName("func"); 1527 1528 IRBuilder<> Builder(BB); 1529 1530 Type *LCTy = F->getArg(0)->getType(); 1531 Constant *One = ConstantInt::get(LCTy, 1); 1532 Constant *Two = ConstantInt::get(LCTy, 2); 1533 Value *OuterTripCount = 1534 Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer"); 1535 Value *InnerTripCount = 1536 Builder.CreateAdd(F->getArg(0), One, "tripcount.inner"); 1537 1538 // Fix an insertion point for ComputeIP. 1539 BasicBlock *LoopNextEnter = 1540 BasicBlock::Create(M->getContext(), "loopnest.enter", F, 1541 Builder.GetInsertBlock()->getNextNode()); 1542 BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter); 1543 InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()}; 1544 1545 Builder.SetInsertPoint(LoopNextEnter); 1546 OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL); 1547 1548 CanonicalLoopInfo *InnerLoop = nullptr; 1549 CallInst *InbetweenLead = nullptr; 1550 CallInst *InbetweenTrail = nullptr; 1551 CallInst *Call = nullptr; 1552 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) { 1553 Builder.restoreIP(OuterCodeGenIP); 1554 InbetweenLead = 1555 createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC}); 1556 1557 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1558 Value *InnerLC) { 1559 Builder.restoreIP(InnerCodeGenIP); 1560 Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC}); 1561 return Error::success(); 1562 }; 1563 ASSERT_EXPECTED_INIT( 1564 CanonicalLoopInfo *, InnerLoopResult, 1565 OMPBuilder.createCanonicalLoop(Builder.saveIP(), InnerLoopBodyGenCB, 1566 InnerTripCount, "inner")); 1567 InnerLoop = InnerLoopResult; 1568 1569 Builder.restoreIP(InnerLoop->getAfterIP()); 1570 InbetweenTrail = 1571 createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC}); 1572 }; 1573 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop, 1574 OMPBuilder.createCanonicalLoop( 1575 OuterLoc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), 1576 OuterTripCount, "outer")); 1577 1578 // Finish the function. 1579 Builder.restoreIP(OuterLoop->getAfterIP()); 1580 Builder.CreateRetVoid(); 1581 1582 CanonicalLoopInfo *Collapsed = 1583 OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP); 1584 1585 OMPBuilder.finalize(); 1586 EXPECT_FALSE(verifyModule(*M, &errs())); 1587 1588 // Verify control flow and BB order. 1589 BasicBlock *RefOrder[] = { 1590 Collapsed->getPreheader(), Collapsed->getHeader(), 1591 Collapsed->getCond(), Collapsed->getBody(), 1592 InbetweenLead->getParent(), Call->getParent(), 1593 InbetweenTrail->getParent(), Collapsed->getLatch(), 1594 Collapsed->getExit(), Collapsed->getAfter(), 1595 }; 1596 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1597 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1598 1599 // Verify the total trip count. 1600 auto *TripCount = cast<MulOperator>(Collapsed->getTripCount()); 1601 EXPECT_EQ(TripCount->getOperand(0), OuterTripCount); 1602 EXPECT_EQ(TripCount->getOperand(1), InnerTripCount); 1603 1604 // Verify the changed indvar. 1605 auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1)); 1606 EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv); 1607 EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody()); 1608 EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount); 1609 EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar()); 1610 1611 auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2)); 1612 EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem); 1613 EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody()); 1614 EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar()); 1615 EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount); 1616 1617 EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV); 1618 EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV); 1619 } 1620 1621 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) { 1622 OpenMPIRBuilder OMPBuilder(*M); 1623 CallInst *Call; 1624 BasicBlock *BodyCode; 1625 CanonicalLoopInfo *Loop = 1626 buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode); 1627 ASSERT_NE(Loop, nullptr); 1628 1629 Instruction *OrigIndVar = Loop->getIndVar(); 1630 EXPECT_EQ(Call->getOperand(1), OrigIndVar); 1631 1632 // Tile the loop. 1633 Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7)); 1634 std::vector<CanonicalLoopInfo *> GenLoops = 1635 OMPBuilder.tileLoops(DL, {Loop}, {TileSize}); 1636 1637 OMPBuilder.finalize(); 1638 EXPECT_FALSE(verifyModule(*M, &errs())); 1639 1640 EXPECT_EQ(GenLoops.size(), 2u); 1641 CanonicalLoopInfo *Floor = GenLoops[0]; 1642 CanonicalLoopInfo *Tile = GenLoops[1]; 1643 1644 BasicBlock *RefOrder[] = { 1645 Floor->getPreheader(), Floor->getHeader(), Floor->getCond(), 1646 Floor->getBody(), Tile->getPreheader(), Tile->getHeader(), 1647 Tile->getCond(), Tile->getBody(), BodyCode, 1648 Tile->getLatch(), Tile->getExit(), Tile->getAfter(), 1649 Floor->getLatch(), Floor->getExit(), Floor->getAfter(), 1650 }; 1651 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1652 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1653 1654 // Check the induction variable. 1655 EXPECT_EQ(Call->getParent(), BodyCode); 1656 auto *Shift = cast<AddOperator>(Call->getOperand(1)); 1657 EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody()); 1658 EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar()); 1659 auto *Scale = cast<MulOperator>(Shift->getOperand(0)); 1660 EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody()); 1661 EXPECT_EQ(Scale->getOperand(0), TileSize); 1662 EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar()); 1663 } 1664 1665 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) { 1666 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1667 OpenMPIRBuilder OMPBuilder(*M); 1668 OMPBuilder.initialize(); 1669 F->setName("func"); 1670 1671 IRBuilder<> Builder(BB); 1672 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1673 Value *TripCount = F->getArg(0); 1674 Type *LCTy = TripCount->getType(); 1675 1676 BasicBlock *BodyCode = nullptr; 1677 CanonicalLoopInfo *InnerLoop = nullptr; 1678 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, 1679 llvm::Value *OuterLC) { 1680 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1681 llvm::Value *InnerLC) { 1682 Builder.restoreIP(InnerCodeGenIP); 1683 BodyCode = Builder.GetInsertBlock(); 1684 1685 // Add something that consumes the induction variables to the body. 1686 createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); 1687 return Error::success(); 1688 }; 1689 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, InnerLoopResult, 1690 OMPBuilder.createCanonicalLoop(OuterCodeGenIP, 1691 InnerLoopBodyGenCB, 1692 TripCount, "inner")); 1693 InnerLoop = InnerLoopResult; 1694 }; 1695 ASSERT_EXPECTED_INIT( 1696 CanonicalLoopInfo *, OuterLoop, 1697 OMPBuilder.createCanonicalLoop( 1698 Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), TripCount, "outer")); 1699 1700 // Finalize the function. 1701 Builder.restoreIP(OuterLoop->getAfterIP()); 1702 Builder.CreateRetVoid(); 1703 1704 // Tile to loop nest. 1705 Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11)); 1706 Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7)); 1707 std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops( 1708 DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize}); 1709 1710 OMPBuilder.finalize(); 1711 EXPECT_FALSE(verifyModule(*M, &errs())); 1712 1713 EXPECT_EQ(GenLoops.size(), 4u); 1714 CanonicalLoopInfo *Floor1 = GenLoops[0]; 1715 CanonicalLoopInfo *Floor2 = GenLoops[1]; 1716 CanonicalLoopInfo *Tile1 = GenLoops[2]; 1717 CanonicalLoopInfo *Tile2 = GenLoops[3]; 1718 1719 BasicBlock *RefOrder[] = { 1720 Floor1->getPreheader(), 1721 Floor1->getHeader(), 1722 Floor1->getCond(), 1723 Floor1->getBody(), 1724 Floor2->getPreheader(), 1725 Floor2->getHeader(), 1726 Floor2->getCond(), 1727 Floor2->getBody(), 1728 Tile1->getPreheader(), 1729 Tile1->getHeader(), 1730 Tile1->getCond(), 1731 Tile1->getBody(), 1732 Tile2->getPreheader(), 1733 Tile2->getHeader(), 1734 Tile2->getCond(), 1735 Tile2->getBody(), 1736 BodyCode, 1737 Tile2->getLatch(), 1738 Tile2->getExit(), 1739 Tile2->getAfter(), 1740 Tile1->getLatch(), 1741 Tile1->getExit(), 1742 Tile1->getAfter(), 1743 Floor2->getLatch(), 1744 Floor2->getExit(), 1745 Floor2->getAfter(), 1746 Floor1->getLatch(), 1747 Floor1->getExit(), 1748 Floor1->getAfter(), 1749 }; 1750 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1751 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1752 } 1753 1754 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) { 1755 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1756 OpenMPIRBuilder OMPBuilder(*M); 1757 OMPBuilder.initialize(); 1758 F->setName("func"); 1759 1760 IRBuilder<> Builder(BB); 1761 Value *TripCount = F->getArg(0); 1762 Type *LCTy = TripCount->getType(); 1763 1764 Value *OuterStartVal = ConstantInt::get(LCTy, 2); 1765 Value *OuterStopVal = TripCount; 1766 Value *OuterStep = ConstantInt::get(LCTy, 5); 1767 Value *InnerStartVal = ConstantInt::get(LCTy, 13); 1768 Value *InnerStopVal = TripCount; 1769 Value *InnerStep = ConstantInt::get(LCTy, 3); 1770 1771 // Fix an insertion point for ComputeIP. 1772 BasicBlock *LoopNextEnter = 1773 BasicBlock::Create(M->getContext(), "loopnest.enter", F, 1774 Builder.GetInsertBlock()->getNextNode()); 1775 BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter); 1776 InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()}; 1777 1778 InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()}; 1779 OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL}); 1780 1781 BasicBlock *BodyCode = nullptr; 1782 CanonicalLoopInfo *InnerLoop = nullptr; 1783 CallInst *Call = nullptr; 1784 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, 1785 llvm::Value *OuterLC) { 1786 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1787 llvm::Value *InnerLC) { 1788 Builder.restoreIP(InnerCodeGenIP); 1789 BodyCode = Builder.GetInsertBlock(); 1790 1791 // Add something that consumes the induction variable to the body. 1792 Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); 1793 return Error::success(); 1794 }; 1795 ASSERT_EXPECTED_INIT( 1796 CanonicalLoopInfo *, InnerLoopResult, 1797 OMPBuilder.createCanonicalLoop(OuterCodeGenIP, InnerLoopBodyGenCB, 1798 InnerStartVal, InnerStopVal, InnerStep, 1799 false, false, ComputeIP, "inner")); 1800 InnerLoop = InnerLoopResult; 1801 }; 1802 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop, 1803 OMPBuilder.createCanonicalLoop( 1804 Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), 1805 OuterStartVal, OuterStopVal, OuterStep, false, false, 1806 ComputeIP, "outer")); 1807 1808 // Finalize the function 1809 Builder.restoreIP(OuterLoop->getAfterIP()); 1810 Builder.CreateRetVoid(); 1811 1812 // Tile the loop nest. 1813 Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11)); 1814 Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7)); 1815 std::vector<CanonicalLoopInfo *> GenLoops = 1816 OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1}); 1817 1818 OMPBuilder.finalize(); 1819 EXPECT_FALSE(verifyModule(*M, &errs())); 1820 1821 EXPECT_EQ(GenLoops.size(), 4u); 1822 CanonicalLoopInfo *Floor0 = GenLoops[0]; 1823 CanonicalLoopInfo *Floor1 = GenLoops[1]; 1824 CanonicalLoopInfo *Tile0 = GenLoops[2]; 1825 CanonicalLoopInfo *Tile1 = GenLoops[3]; 1826 1827 BasicBlock *RefOrder[] = { 1828 Floor0->getPreheader(), 1829 Floor0->getHeader(), 1830 Floor0->getCond(), 1831 Floor0->getBody(), 1832 Floor1->getPreheader(), 1833 Floor1->getHeader(), 1834 Floor1->getCond(), 1835 Floor1->getBody(), 1836 Tile0->getPreheader(), 1837 Tile0->getHeader(), 1838 Tile0->getCond(), 1839 Tile0->getBody(), 1840 Tile1->getPreheader(), 1841 Tile1->getHeader(), 1842 Tile1->getCond(), 1843 Tile1->getBody(), 1844 BodyCode, 1845 Tile1->getLatch(), 1846 Tile1->getExit(), 1847 Tile1->getAfter(), 1848 Tile0->getLatch(), 1849 Tile0->getExit(), 1850 Tile0->getAfter(), 1851 Floor1->getLatch(), 1852 Floor1->getExit(), 1853 Floor1->getAfter(), 1854 Floor0->getLatch(), 1855 Floor0->getExit(), 1856 Floor0->getAfter(), 1857 }; 1858 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1859 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1860 1861 EXPECT_EQ(Call->getParent(), BodyCode); 1862 1863 auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1)); 1864 EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal); 1865 auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0)); 1866 EXPECT_EQ(RangeScale0->getOperand(1), OuterStep); 1867 auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0)); 1868 EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody()); 1869 EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar()); 1870 auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0)); 1871 EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody()); 1872 EXPECT_EQ(TileScale0->getOperand(0), TileSize0); 1873 EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar()); 1874 1875 auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2)); 1876 EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode); 1877 EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal); 1878 auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0)); 1879 EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode); 1880 EXPECT_EQ(RangeScale1->getOperand(1), InnerStep); 1881 auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0)); 1882 EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody()); 1883 EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar()); 1884 auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0)); 1885 EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody()); 1886 EXPECT_EQ(TileScale1->getOperand(0), TileSize1); 1887 EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar()); 1888 } 1889 1890 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { 1891 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1892 OpenMPIRBuilder OMPBuilder(*M); 1893 OMPBuilder.initialize(); 1894 IRBuilder<> Builder(BB); 1895 1896 // Create a loop, tile it, and extract its trip count. All input values are 1897 // constant and IRBuilder evaluates all-constant arithmetic inplace, such that 1898 // the floor trip count itself will be a ConstantInt. Unfortunately we cannot 1899 // do the same for the tile loop. 1900 auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step, 1901 bool IsSigned, bool InclusiveStop, 1902 int64_t TileSize) -> uint64_t { 1903 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 1904 Type *LCTy = Type::getInt16Ty(Ctx); 1905 Value *StartVal = ConstantInt::get(LCTy, Start); 1906 Value *StopVal = ConstantInt::get(LCTy, Stop); 1907 Value *StepVal = ConstantInt::get(LCTy, Step); 1908 1909 // Generate a loop. 1910 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { 1911 return Error::success(); 1912 }; 1913 ASSERT_EXPECTED_INIT_RETURN( 1914 CanonicalLoopInfo *, Loop, 1915 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, 1916 StepVal, IsSigned, InclusiveStop), 1917 (unsigned)-1); 1918 InsertPointTy AfterIP = Loop->getAfterIP(); 1919 1920 // Tile the loop. 1921 Value *TileSizeVal = ConstantInt::get(LCTy, TileSize); 1922 std::vector<CanonicalLoopInfo *> GenLoops = 1923 OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal}); 1924 1925 // Set the insertion pointer to after loop, where the next loop will be 1926 // emitted. 1927 Builder.restoreIP(AfterIP); 1928 1929 // Extract the trip count. 1930 CanonicalLoopInfo *FloorLoop = GenLoops[0]; 1931 Value *FloorTripCount = FloorLoop->getTripCount(); 1932 return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue(); 1933 }; 1934 1935 // Empty iteration domain. 1936 EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u); 1937 EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u); 1938 EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u); 1939 EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u); 1940 EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u); 1941 1942 // Only complete tiles. 1943 EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u); 1944 EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u); 1945 EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u); 1946 EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u); 1947 EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u); 1948 EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u); 1949 1950 // Only a partial tile. 1951 EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u); 1952 EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u); 1953 EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u); 1954 EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u); 1955 EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u); 1956 1957 // Complete and partial tiles. 1958 EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u); 1959 EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u); 1960 EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u); 1961 EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u); 1962 EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u); 1963 1964 // Close to 16-bit integer range. 1965 EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu); 1966 EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1); 1967 EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1); 1968 EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1); 1969 EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1); 1970 EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u); 1971 EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u); 1972 1973 // Finalize the function. 1974 Builder.CreateRetVoid(); 1975 OMPBuilder.finalize(); 1976 1977 EXPECT_FALSE(verifyModule(*M, &errs())); 1978 } 1979 1980 TEST_F(OpenMPIRBuilderTest, ApplySimd) { 1981 OpenMPIRBuilder OMPBuilder(*M); 1982 MapVector<Value *, Value *> AlignedVars; 1983 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1984 ASSERT_NE(CLI, nullptr); 1985 1986 // Simd-ize the loop. 1987 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 1988 OrderKind::OMP_ORDER_unknown, 1989 /* Simdlen */ nullptr, 1990 /* Safelen */ nullptr); 1991 1992 OMPBuilder.finalize(); 1993 EXPECT_FALSE(verifyModule(*M, &errs())); 1994 1995 PassBuilder PB; 1996 FunctionAnalysisManager FAM; 1997 PB.registerFunctionAnalyses(FAM); 1998 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 1999 2000 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2001 EXPECT_EQ(TopLvl.size(), 1u); 2002 2003 Loop *L = TopLvl.front(); 2004 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2005 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2006 2007 // Check for llvm.access.group metadata attached to the printf 2008 // function in the loop body. 2009 BasicBlock *LoopBody = CLI->getBody(); 2010 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2011 return I.getMetadata("llvm.access.group") != nullptr; 2012 })); 2013 } 2014 2015 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) { 2016 OpenMPIRBuilder OMPBuilder(*M); 2017 IRBuilder<> Builder(BB); 2018 const int AlignmentValue = 32; 2019 llvm::BasicBlock *sourceBlock = Builder.GetInsertBlock(); 2020 AllocaInst *Alloc1 = 2021 Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1)); 2022 LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); 2023 MapVector<Value *, Value *> AlignedVars; 2024 AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)}); 2025 2026 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2027 ASSERT_NE(CLI, nullptr); 2028 2029 // Simd-ize the loop. 2030 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 2031 OrderKind::OMP_ORDER_unknown, 2032 /* Simdlen */ nullptr, 2033 /* Safelen */ nullptr); 2034 2035 OMPBuilder.finalize(); 2036 EXPECT_FALSE(verifyModule(*M, &errs())); 2037 2038 PassBuilder PB; 2039 FunctionAnalysisManager FAM; 2040 PB.registerFunctionAnalyses(FAM); 2041 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2042 2043 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2044 EXPECT_EQ(TopLvl.size(), 1u); 2045 2046 Loop *L = TopLvl.front(); 2047 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2048 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2049 2050 // Check for llvm.access.group metadata attached to the printf 2051 // function in the loop body. 2052 BasicBlock *LoopBody = CLI->getBody(); 2053 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2054 return I.getMetadata("llvm.access.group") != nullptr; 2055 })); 2056 2057 // Check if number of assumption instructions is equal to number of aligned 2058 // variables 2059 size_t NumAssummptionCallsInPreheader = 2060 count_if(*sourceBlock, [](Instruction &I) { return isa<AssumeInst>(I); }); 2061 EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); 2062 2063 // Check if variables are correctly aligned 2064 for (Instruction &Instr : *sourceBlock) { 2065 if (!isa<AssumeInst>(Instr)) 2066 continue; 2067 AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr); 2068 if (AssumeInstruction->getNumTotalBundleOperands()) { 2069 auto Bundle = AssumeInstruction->getOperandBundleAt(0); 2070 if (Bundle.getTagName() == "align") { 2071 EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1])); 2072 auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]); 2073 EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue); 2074 } 2075 } 2076 } 2077 } 2078 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { 2079 OpenMPIRBuilder OMPBuilder(*M); 2080 MapVector<Value *, Value *> AlignedVars; 2081 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2082 ASSERT_NE(CLI, nullptr); 2083 2084 // Simd-ize the loop. 2085 OMPBuilder.applySimd(CLI, AlignedVars, 2086 /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, 2087 ConstantInt::get(Type::getInt32Ty(Ctx), 3), 2088 /* Safelen */ nullptr); 2089 2090 OMPBuilder.finalize(); 2091 EXPECT_FALSE(verifyModule(*M, &errs())); 2092 2093 PassBuilder PB; 2094 FunctionAnalysisManager FAM; 2095 PB.registerFunctionAnalyses(FAM); 2096 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2097 2098 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2099 EXPECT_EQ(TopLvl.size(), 1u); 2100 2101 Loop *L = TopLvl.front(); 2102 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2103 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2104 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2105 2106 // Check for llvm.access.group metadata attached to the printf 2107 // function in the loop body. 2108 BasicBlock *LoopBody = CLI->getBody(); 2109 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2110 return I.getMetadata("llvm.access.group") != nullptr; 2111 })); 2112 } 2113 2114 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { 2115 OpenMPIRBuilder OMPBuilder(*M); 2116 MapVector<Value *, Value *> AlignedVars; 2117 2118 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2119 ASSERT_NE(CLI, nullptr); 2120 2121 // Simd-ize the loop. 2122 OMPBuilder.applySimd( 2123 CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, 2124 /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2125 2126 OMPBuilder.finalize(); 2127 EXPECT_FALSE(verifyModule(*M, &errs())); 2128 2129 PassBuilder PB; 2130 FunctionAnalysisManager FAM; 2131 PB.registerFunctionAnalyses(FAM); 2132 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2133 2134 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2135 EXPECT_EQ(TopLvl.size(), 1u); 2136 2137 Loop *L = TopLvl.front(); 2138 // Parallel metadata shoudl be attached because of presence of 2139 // the order(concurrent) OpenMP clause 2140 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2141 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2142 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2143 2144 // Check for llvm.access.group metadata attached to the printf 2145 // function in the loop body. 2146 BasicBlock *LoopBody = CLI->getBody(); 2147 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2148 return I.getMetadata("llvm.access.group") != nullptr; 2149 })); 2150 } 2151 2152 TEST_F(OpenMPIRBuilderTest, ApplySafelen) { 2153 OpenMPIRBuilder OMPBuilder(*M); 2154 MapVector<Value *, Value *> AlignedVars; 2155 2156 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2157 ASSERT_NE(CLI, nullptr); 2158 2159 OMPBuilder.applySimd( 2160 CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, 2161 /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2162 2163 OMPBuilder.finalize(); 2164 EXPECT_FALSE(verifyModule(*M, &errs())); 2165 2166 PassBuilder PB; 2167 FunctionAnalysisManager FAM; 2168 PB.registerFunctionAnalyses(FAM); 2169 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2170 2171 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2172 EXPECT_EQ(TopLvl.size(), 1u); 2173 2174 Loop *L = TopLvl.front(); 2175 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2176 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2177 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2178 2179 // Check for llvm.access.group metadata attached to the printf 2180 // function in the loop body. 2181 BasicBlock *LoopBody = CLI->getBody(); 2182 EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) { 2183 return I.getMetadata("llvm.access.group") != nullptr; 2184 })); 2185 } 2186 2187 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { 2188 OpenMPIRBuilder OMPBuilder(*M); 2189 MapVector<Value *, Value *> AlignedVars; 2190 2191 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2192 ASSERT_NE(CLI, nullptr); 2193 2194 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 2195 OrderKind::OMP_ORDER_unknown, 2196 ConstantInt::get(Type::getInt32Ty(Ctx), 2), 2197 ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2198 2199 OMPBuilder.finalize(); 2200 EXPECT_FALSE(verifyModule(*M, &errs())); 2201 2202 PassBuilder PB; 2203 FunctionAnalysisManager FAM; 2204 PB.registerFunctionAnalyses(FAM); 2205 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2206 2207 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2208 EXPECT_EQ(TopLvl.size(), 1u); 2209 2210 Loop *L = TopLvl.front(); 2211 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2212 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2213 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2); 2214 2215 // Check for llvm.access.group metadata attached to the printf 2216 // function in the loop body. 2217 BasicBlock *LoopBody = CLI->getBody(); 2218 EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) { 2219 return I.getMetadata("llvm.access.group") != nullptr; 2220 })); 2221 } 2222 2223 TEST_F(OpenMPIRBuilderTest, ApplySimdIf) { 2224 OpenMPIRBuilder OMPBuilder(*M); 2225 IRBuilder<> Builder(BB); 2226 MapVector<Value *, Value *> AlignedVars; 2227 AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); 2228 AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); 2229 2230 // Generation of if condition 2231 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1); 2232 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2); 2233 LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); 2234 LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2); 2235 2236 Value *IfCmp = Builder.CreateICmpNE(Load1, Load2); 2237 2238 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2239 ASSERT_NE(CLI, nullptr); 2240 2241 // Simd-ize the loop with if condition 2242 OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown, 2243 ConstantInt::get(Type::getInt32Ty(Ctx), 3), 2244 /* Safelen */ nullptr); 2245 2246 OMPBuilder.finalize(); 2247 EXPECT_FALSE(verifyModule(*M, &errs())); 2248 2249 PassBuilder PB; 2250 FunctionAnalysisManager FAM; 2251 PB.registerFunctionAnalyses(FAM); 2252 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2253 2254 // Check if there are two loops (one with enabled vectorization) 2255 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2256 EXPECT_EQ(TopLvl.size(), 2u); 2257 2258 Loop *L = TopLvl[0]; 2259 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2260 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2261 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2262 2263 // The second loop should have disabled vectorization 2264 L = TopLvl[1]; 2265 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2266 EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2267 // Check for llvm.access.group metadata attached to the printf 2268 // function in the loop body. 2269 BasicBlock *LoopBody = CLI->getBody(); 2270 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2271 return I.getMetadata("llvm.access.group") != nullptr; 2272 })); 2273 } 2274 2275 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) { 2276 OpenMPIRBuilder OMPBuilder(*M); 2277 2278 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2279 ASSERT_NE(CLI, nullptr); 2280 2281 // Unroll the loop. 2282 OMPBuilder.unrollLoopFull(DL, CLI); 2283 2284 OMPBuilder.finalize(); 2285 EXPECT_FALSE(verifyModule(*M, &errs())); 2286 2287 PassBuilder PB; 2288 FunctionAnalysisManager FAM; 2289 PB.registerFunctionAnalyses(FAM); 2290 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2291 2292 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2293 EXPECT_EQ(TopLvl.size(), 1u); 2294 2295 Loop *L = TopLvl.front(); 2296 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); 2297 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full")); 2298 } 2299 2300 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) { 2301 OpenMPIRBuilder OMPBuilder(*M); 2302 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2303 ASSERT_NE(CLI, nullptr); 2304 2305 // Unroll the loop. 2306 CanonicalLoopInfo *UnrolledLoop = nullptr; 2307 OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop); 2308 ASSERT_NE(UnrolledLoop, nullptr); 2309 2310 OMPBuilder.finalize(); 2311 EXPECT_FALSE(verifyModule(*M, &errs())); 2312 UnrolledLoop->assertOK(); 2313 2314 PassBuilder PB; 2315 FunctionAnalysisManager FAM; 2316 PB.registerFunctionAnalyses(FAM); 2317 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2318 2319 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2320 EXPECT_EQ(TopLvl.size(), 1u); 2321 Loop *Outer = TopLvl.front(); 2322 EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader()); 2323 EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch()); 2324 EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond()); 2325 EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit()); 2326 2327 EXPECT_EQ(Outer->getSubLoops().size(), 1u); 2328 Loop *Inner = Outer->getSubLoops().front(); 2329 2330 EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable")); 2331 EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5); 2332 } 2333 2334 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) { 2335 OpenMPIRBuilder OMPBuilder(*M); 2336 2337 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2338 ASSERT_NE(CLI, nullptr); 2339 2340 // Unroll the loop. 2341 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2342 2343 OMPBuilder.finalize(); 2344 EXPECT_FALSE(verifyModule(*M, &errs())); 2345 2346 PassBuilder PB; 2347 FunctionAnalysisManager FAM; 2348 PB.registerFunctionAnalyses(FAM); 2349 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2350 2351 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2352 EXPECT_EQ(TopLvl.size(), 1u); 2353 2354 Loop *L = TopLvl.front(); 2355 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); 2356 } 2357 2358 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) { 2359 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2360 std::string oldDLStr = M->getDataLayoutStr(); 2361 M->setDataLayout( 2362 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" 2363 "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" 2364 "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); 2365 OpenMPIRBuilder OMPBuilder(*M); 2366 OMPBuilder.Config.IsTargetDevice = true; 2367 OMPBuilder.initialize(); 2368 IRBuilder<> Builder(BB); 2369 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2370 InsertPointTy AllocaIP = Builder.saveIP(); 2371 2372 Type *LCTy = Type::getInt32Ty(Ctx); 2373 Value *StartVal = ConstantInt::get(LCTy, 10); 2374 Value *StopVal = ConstantInt::get(LCTy, 52); 2375 Value *StepVal = ConstantInt::get(LCTy, 2); 2376 auto LoopBodyGen = [&](InsertPointTy, Value *) { return Error::success(); }; 2377 2378 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI, 2379 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGen, 2380 StartVal, StopVal, 2381 StepVal, false, false)); 2382 BasicBlock *Preheader = CLI->getPreheader(); 2383 Value *TripCount = CLI->getTripCount(); 2384 2385 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2386 2387 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 2388 OMPBuilder.applyWorkshareLoop( 2389 DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static, 2390 nullptr, false, false, false, false, 2391 WorksharingLoopType::ForStaticLoop)); 2392 Builder.restoreIP(AfterIP); 2393 Builder.CreateRetVoid(); 2394 2395 OMPBuilder.finalize(); 2396 EXPECT_FALSE(verifyModule(*M, &errs())); 2397 2398 CallInst *WorkshareLoopRuntimeCall = nullptr; 2399 int WorkshareLoopRuntimeCallCnt = 0; 2400 for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) { 2401 CallInst *Call = dyn_cast<CallInst>(Inst); 2402 if (!Call) 2403 continue; 2404 if (!Call->getCalledFunction()) 2405 continue; 2406 2407 if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") { 2408 WorkshareLoopRuntimeCall = Call; 2409 WorkshareLoopRuntimeCallCnt++; 2410 } 2411 } 2412 EXPECT_NE(WorkshareLoopRuntimeCall, nullptr); 2413 // Verify that there is only one call to workshare loop function 2414 EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1); 2415 // Check that pointer to loop body function is passed as second argument 2416 Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1); 2417 EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType()); 2418 Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg); 2419 EXPECT_NE(ArgFunction, nullptr); 2420 EXPECT_EQ(ArgFunction->arg_size(), 1u); 2421 EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType()); 2422 // Check that no variables except for loop counter are used in loop body 2423 EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()), 2424 WorkshareLoopRuntimeCall->getArgOperand(2)); 2425 // Check loop trip count argument 2426 EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3)); 2427 } 2428 2429 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) { 2430 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2431 OpenMPIRBuilder OMPBuilder(*M); 2432 OMPBuilder.Config.IsTargetDevice = false; 2433 OMPBuilder.initialize(); 2434 IRBuilder<> Builder(BB); 2435 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2436 2437 Type *LCTy = Type::getInt32Ty(Ctx); 2438 Value *StartVal = ConstantInt::get(LCTy, 10); 2439 Value *StopVal = ConstantInt::get(LCTy, 52); 2440 Value *StepVal = ConstantInt::get(LCTy, 2); 2441 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { 2442 return Error::success(); 2443 }; 2444 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI, 2445 OMPBuilder.createCanonicalLoop( 2446 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2447 /*IsSigned=*/false, /*InclusiveStop=*/false)); 2448 BasicBlock *Preheader = CLI->getPreheader(); 2449 BasicBlock *Body = CLI->getBody(); 2450 Value *IV = CLI->getIndVar(); 2451 BasicBlock *ExitBlock = CLI->getExit(); 2452 2453 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2454 InsertPointTy AllocaIP = Builder.saveIP(); 2455 2456 ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, 2457 /*NeedsBarrier=*/true, 2458 OMP_SCHEDULE_Static), 2459 Succeeded()); 2460 2461 BasicBlock *Cond = Body->getSinglePredecessor(); 2462 Instruction *Cmp = &*Cond->begin(); 2463 Value *TripCount = Cmp->getOperand(1); 2464 2465 auto AllocaIter = BB->begin(); 2466 ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); 2467 AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2468 AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2469 AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2470 AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2471 EXPECT_NE(PLastIter, nullptr); 2472 EXPECT_NE(PLowerBound, nullptr); 2473 EXPECT_NE(PUpperBound, nullptr); 2474 EXPECT_NE(PStride, nullptr); 2475 2476 auto PreheaderIter = Preheader->begin(); 2477 ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7); 2478 StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2479 StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2480 StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2481 ASSERT_NE(LowerBoundStore, nullptr); 2482 ASSERT_NE(UpperBoundStore, nullptr); 2483 ASSERT_NE(StrideStore, nullptr); 2484 2485 auto *OrigLowerBound = 2486 dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand()); 2487 auto *OrigUpperBound = 2488 dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand()); 2489 auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand()); 2490 ASSERT_NE(OrigLowerBound, nullptr); 2491 ASSERT_NE(OrigUpperBound, nullptr); 2492 ASSERT_NE(OrigStride, nullptr); 2493 EXPECT_EQ(OrigLowerBound->getValue(), 0); 2494 EXPECT_EQ(OrigUpperBound->getValue(), 20); 2495 EXPECT_EQ(OrigStride->getValue(), 1); 2496 2497 // Check that the loop IV is updated to account for the lower bound returned 2498 // by the OpenMP runtime call. 2499 BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front()); 2500 EXPECT_EQ(Add->getOperand(0), IV); 2501 auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1)); 2502 ASSERT_NE(LoadedLowerBound, nullptr); 2503 EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound); 2504 2505 // Check that the trip count is updated to account for the lower and upper 2506 // bounds return by the OpenMP runtime call. 2507 auto *AddOne = dyn_cast<Instruction>(TripCount); 2508 ASSERT_NE(AddOne, nullptr); 2509 ASSERT_TRUE(AddOne->isBinaryOp()); 2510 auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1)); 2511 ASSERT_NE(One, nullptr); 2512 EXPECT_EQ(One->getValue(), 1); 2513 auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0)); 2514 ASSERT_NE(Difference, nullptr); 2515 ASSERT_TRUE(Difference->isBinaryOp()); 2516 EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound); 2517 auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0)); 2518 ASSERT_NE(LoadedUpperBound, nullptr); 2519 EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound); 2520 2521 // The original loop iterator should only be used in the condition, in the 2522 // increment and in the statement that adds the lower bound to it. 2523 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2524 2525 // The exit block should contain the "fini" call and the barrier call, 2526 // plus the call to obtain the thread ID. 2527 size_t NumCallsInExitBlock = 2528 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2529 EXPECT_EQ(NumCallsInExitBlock, 3u); 2530 } 2531 2532 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) { 2533 unsigned IVBits = GetParam(); 2534 2535 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2536 OpenMPIRBuilder OMPBuilder(*M); 2537 OMPBuilder.Config.IsTargetDevice = false; 2538 2539 BasicBlock *Body; 2540 CallInst *Call; 2541 CanonicalLoopInfo *CLI = 2542 buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body); 2543 ASSERT_NE(CLI, nullptr); 2544 2545 Instruction *OrigIndVar = CLI->getIndVar(); 2546 EXPECT_EQ(Call->getOperand(1), OrigIndVar); 2547 2548 Type *LCTy = Type::getInt32Ty(Ctx); 2549 Value *ChunkSize = ConstantInt::get(LCTy, 5); 2550 InsertPointTy AllocaIP{&F->getEntryBlock(), 2551 F->getEntryBlock().getFirstInsertionPt()}; 2552 ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, 2553 /*NeedsBarrier=*/true, 2554 OMP_SCHEDULE_Static, 2555 ChunkSize), 2556 Succeeded()); 2557 2558 OMPBuilder.finalize(); 2559 EXPECT_FALSE(verifyModule(*M, &errs())); 2560 2561 BasicBlock *Entry = &F->getEntryBlock(); 2562 BasicBlock *Preheader = Entry->getSingleSuccessor(); 2563 2564 BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor(); 2565 BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor(); 2566 BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor(); 2567 BasicBlock *DispatchBody = succ_begin(DispatchCond)[0]; 2568 BasicBlock *DispatchExit = succ_begin(DispatchCond)[1]; 2569 BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor(); 2570 BasicBlock *Return = DispatchAfter->getSingleSuccessor(); 2571 2572 BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor(); 2573 BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor(); 2574 BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor(); 2575 BasicBlock *ChunkBody = succ_begin(ChunkCond)[0]; 2576 BasicBlock *ChunkExit = succ_begin(ChunkCond)[1]; 2577 BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor(); 2578 BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor(); 2579 2580 BasicBlock *DispatchInc = ChunkAfter; 2581 2582 EXPECT_EQ(ChunkBody, Body); 2583 EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader); 2584 EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader); 2585 2586 EXPECT_TRUE(isa<ReturnInst>(Return->front())); 2587 2588 Value *NewIV = Call->getOperand(1); 2589 EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits); 2590 2591 CallInst *InitCall = findSingleCall( 2592 F, 2593 (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u 2594 : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u, 2595 OMPBuilder); 2596 EXPECT_EQ(InitCall->getParent(), Preheader); 2597 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33); 2598 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1); 2599 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5); 2600 2601 CallInst *FiniCall = findSingleCall( 2602 F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder); 2603 EXPECT_EQ(FiniCall->getParent(), DispatchExit); 2604 2605 CallInst *BarrierCall = findSingleCall( 2606 F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder); 2607 EXPECT_EQ(BarrierCall->getParent(), DispatchExit); 2608 } 2609 2610 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits, 2611 ::testing::Values(8, 16, 32, 64)); 2612 2613 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { 2614 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2615 OpenMPIRBuilder OMPBuilder(*M); 2616 OMPBuilder.Config.IsTargetDevice = false; 2617 OMPBuilder.initialize(); 2618 IRBuilder<> Builder(BB); 2619 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2620 2621 omp::OMPScheduleType SchedType = GetParam(); 2622 uint32_t ChunkSize = 1; 2623 switch (SchedType & ~OMPScheduleType::ModifierMask) { 2624 case omp::OMPScheduleType::BaseDynamicChunked: 2625 case omp::OMPScheduleType::BaseGuidedChunked: 2626 ChunkSize = 7; 2627 break; 2628 case omp::OMPScheduleType::BaseAuto: 2629 case omp::OMPScheduleType::BaseRuntime: 2630 ChunkSize = 1; 2631 break; 2632 default: 2633 assert(0 && "unknown type for this test"); 2634 break; 2635 } 2636 2637 Type *LCTy = Type::getInt32Ty(Ctx); 2638 Value *StartVal = ConstantInt::get(LCTy, 10); 2639 Value *StopVal = ConstantInt::get(LCTy, 52); 2640 Value *StepVal = ConstantInt::get(LCTy, 2); 2641 Value *ChunkVal = 2642 (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize); 2643 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { 2644 return Error::success(); 2645 }; 2646 2647 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI, 2648 OMPBuilder.createCanonicalLoop( 2649 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2650 /*IsSigned=*/false, /*InclusiveStop=*/false)); 2651 2652 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2653 InsertPointTy AllocaIP = Builder.saveIP(); 2654 2655 // Collect all the info from CLI, as it isn't usable after the call to 2656 // createDynamicWorkshareLoop. 2657 InsertPointTy AfterIP = CLI->getAfterIP(); 2658 BasicBlock *Preheader = CLI->getPreheader(); 2659 BasicBlock *ExitBlock = CLI->getExit(); 2660 BasicBlock *LatchBlock = CLI->getLatch(); 2661 Value *IV = CLI->getIndVar(); 2662 2663 ASSERT_EXPECTED_INIT( 2664 OpenMPIRBuilder::InsertPointTy, EndIP, 2665 OMPBuilder.applyWorkshareLoop( 2666 DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType), 2667 ChunkVal, /*Simd=*/false, 2668 (SchedType & omp::OMPScheduleType::ModifierMonotonic) == 2669 omp::OMPScheduleType::ModifierMonotonic, 2670 (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) == 2671 omp::OMPScheduleType::ModifierNonmonotonic, 2672 /*Ordered=*/false)); 2673 2674 // The returned value should be the "after" point. 2675 ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); 2676 ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint()); 2677 2678 auto AllocaIter = BB->begin(); 2679 ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); 2680 AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2681 AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2682 AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2683 AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2684 EXPECT_NE(PLastIter, nullptr); 2685 EXPECT_NE(PLowerBound, nullptr); 2686 EXPECT_NE(PUpperBound, nullptr); 2687 EXPECT_NE(PStride, nullptr); 2688 2689 auto PreheaderIter = Preheader->begin(); 2690 ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6); 2691 StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2692 StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2693 StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2694 ASSERT_NE(LowerBoundStore, nullptr); 2695 ASSERT_NE(UpperBoundStore, nullptr); 2696 ASSERT_NE(StrideStore, nullptr); 2697 2698 CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++)); 2699 ASSERT_NE(ThreadIdCall, nullptr); 2700 EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(), 2701 "__kmpc_global_thread_num"); 2702 2703 CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter); 2704 2705 ASSERT_NE(InitCall, nullptr); 2706 EXPECT_EQ(InitCall->getCalledFunction()->getName(), 2707 "__kmpc_dispatch_init_4u"); 2708 EXPECT_EQ(InitCall->arg_size(), 7U); 2709 EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize)); 2710 ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2)); 2711 if ((SchedType & OMPScheduleType::MonotonicityMask) == 2712 OMPScheduleType::None) { 2713 // Implementation is allowed to add default nonmonotonicity flag 2714 EXPECT_EQ( 2715 static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) | 2716 OMPScheduleType::ModifierNonmonotonic, 2717 SchedType | OMPScheduleType::ModifierNonmonotonic); 2718 } else { 2719 EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()), 2720 SchedType); 2721 } 2722 2723 ConstantInt *OrigLowerBound = 2724 dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand()); 2725 ConstantInt *OrigUpperBound = 2726 dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand()); 2727 ConstantInt *OrigStride = 2728 dyn_cast<ConstantInt>(StrideStore->getValueOperand()); 2729 ASSERT_NE(OrigLowerBound, nullptr); 2730 ASSERT_NE(OrigUpperBound, nullptr); 2731 ASSERT_NE(OrigStride, nullptr); 2732 EXPECT_EQ(OrigLowerBound->getValue(), 1); 2733 EXPECT_EQ(OrigUpperBound->getValue(), 21); 2734 EXPECT_EQ(OrigStride->getValue(), 1); 2735 2736 CallInst *FiniCall = dyn_cast<CallInst>( 2737 &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); 2738 EXPECT_EQ(FiniCall, nullptr); 2739 2740 // The original loop iterator should only be used in the condition, in the 2741 // increment and in the statement that adds the lower bound to it. 2742 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2743 2744 // The exit block should contain the barrier call, plus the call to obtain 2745 // the thread ID. 2746 size_t NumCallsInExitBlock = 2747 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2748 EXPECT_EQ(NumCallsInExitBlock, 2u); 2749 2750 // Add a termination to our block and check that it is internally consistent. 2751 Builder.restoreIP(EndIP); 2752 Builder.CreateRetVoid(); 2753 OMPBuilder.finalize(); 2754 EXPECT_FALSE(verifyModule(*M, &errs())); 2755 } 2756 2757 INSTANTIATE_TEST_SUITE_P( 2758 OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams, 2759 ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked, 2760 omp::OMPScheduleType::UnorderedGuidedChunked, 2761 omp::OMPScheduleType::UnorderedAuto, 2762 omp::OMPScheduleType::UnorderedRuntime, 2763 omp::OMPScheduleType::UnorderedDynamicChunked | 2764 omp::OMPScheduleType::ModifierMonotonic, 2765 omp::OMPScheduleType::UnorderedDynamicChunked | 2766 omp::OMPScheduleType::ModifierNonmonotonic, 2767 omp::OMPScheduleType::UnorderedGuidedChunked | 2768 omp::OMPScheduleType::ModifierMonotonic, 2769 omp::OMPScheduleType::UnorderedGuidedChunked | 2770 omp::OMPScheduleType::ModifierNonmonotonic, 2771 omp::OMPScheduleType::UnorderedAuto | 2772 omp::OMPScheduleType::ModifierMonotonic, 2773 omp::OMPScheduleType::UnorderedRuntime | 2774 omp::OMPScheduleType::ModifierMonotonic)); 2775 2776 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) { 2777 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2778 OpenMPIRBuilder OMPBuilder(*M); 2779 OMPBuilder.Config.IsTargetDevice = false; 2780 OMPBuilder.initialize(); 2781 IRBuilder<> Builder(BB); 2782 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2783 2784 uint32_t ChunkSize = 1; 2785 Type *LCTy = Type::getInt32Ty(Ctx); 2786 Value *StartVal = ConstantInt::get(LCTy, 10); 2787 Value *StopVal = ConstantInt::get(LCTy, 52); 2788 Value *StepVal = ConstantInt::get(LCTy, 2); 2789 Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize); 2790 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { 2791 return llvm::Error::success(); 2792 }; 2793 2794 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI, 2795 OMPBuilder.createCanonicalLoop( 2796 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2797 /*IsSigned=*/false, /*InclusiveStop=*/false)); 2798 2799 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2800 InsertPointTy AllocaIP = Builder.saveIP(); 2801 2802 // Collect all the info from CLI, as it isn't usable after the call to 2803 // createDynamicWorkshareLoop. 2804 BasicBlock *Preheader = CLI->getPreheader(); 2805 BasicBlock *ExitBlock = CLI->getExit(); 2806 BasicBlock *LatchBlock = CLI->getLatch(); 2807 Value *IV = CLI->getIndVar(); 2808 2809 ASSERT_EXPECTED_INIT( 2810 OpenMPIRBuilder::InsertPointTy, EndIP, 2811 OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true, 2812 OMP_SCHEDULE_Static, ChunkVal, 2813 /*HasSimdModifier=*/false, 2814 /*HasMonotonicModifier=*/false, 2815 /*HasNonmonotonicModifier=*/false, 2816 /*HasOrderedClause=*/true)); 2817 2818 // Add a termination to our block and check that it is internally consistent. 2819 Builder.restoreIP(EndIP); 2820 Builder.CreateRetVoid(); 2821 OMPBuilder.finalize(); 2822 EXPECT_FALSE(verifyModule(*M, &errs())); 2823 2824 CallInst *InitCall = nullptr; 2825 for (Instruction &EI : *Preheader) { 2826 Instruction *Cur = &EI; 2827 if (isa<CallInst>(Cur)) { 2828 InitCall = cast<CallInst>(Cur); 2829 if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u") 2830 break; 2831 InitCall = nullptr; 2832 } 2833 } 2834 EXPECT_NE(InitCall, nullptr); 2835 EXPECT_EQ(InitCall->arg_size(), 7U); 2836 ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2)); 2837 EXPECT_EQ(SchedVal->getValue(), 2838 static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked)); 2839 2840 CallInst *FiniCall = dyn_cast<CallInst>( 2841 &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); 2842 ASSERT_NE(FiniCall, nullptr); 2843 EXPECT_EQ(FiniCall->getCalledFunction()->getName(), 2844 "__kmpc_dispatch_fini_4u"); 2845 EXPECT_EQ(FiniCall->arg_size(), 2U); 2846 EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0)); 2847 EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1)); 2848 2849 // The original loop iterator should only be used in the condition, in the 2850 // increment and in the statement that adds the lower bound to it. 2851 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2852 2853 // The exit block should contain the barrier call, plus the call to obtain 2854 // the thread ID. 2855 size_t NumCallsInExitBlock = 2856 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2857 EXPECT_EQ(NumCallsInExitBlock, 2u); 2858 } 2859 2860 TEST_F(OpenMPIRBuilderTest, MasterDirective) { 2861 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2862 OpenMPIRBuilder OMPBuilder(*M); 2863 OMPBuilder.initialize(); 2864 F->setName("func"); 2865 IRBuilder<> Builder(BB); 2866 2867 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2868 2869 AllocaInst *PrivAI = nullptr; 2870 2871 BasicBlock *EntryBB = nullptr; 2872 BasicBlock *ThenBB = nullptr; 2873 2874 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2875 if (AllocaIP.isSet()) 2876 Builder.restoreIP(AllocaIP); 2877 else 2878 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 2879 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2880 Builder.CreateStore(F->arg_begin(), PrivAI); 2881 2882 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2883 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2884 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2885 2886 Builder.restoreIP(CodeGenIP); 2887 2888 // collect some info for checks later 2889 ThenBB = Builder.GetInsertBlock(); 2890 EntryBB = ThenBB->getUniquePredecessor(); 2891 2892 // simple instructions for body 2893 Value *PrivLoad = 2894 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2895 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2896 }; 2897 2898 auto FiniCB = [&](InsertPointTy IP) { 2899 BasicBlock *IPBB = IP.getBlock(); 2900 EXPECT_NE(IPBB->end(), IP.getPoint()); 2901 }; 2902 2903 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 2904 OMPBuilder.createMaster(Builder, 2905 BODYGENCB_WRAPPER(BodyGenCB), 2906 FINICB_WRAPPER(FiniCB))); 2907 Builder.restoreIP(AfterIP); 2908 Value *EntryBBTI = EntryBB->getTerminator(); 2909 EXPECT_NE(EntryBBTI, nullptr); 2910 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 2911 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 2912 EXPECT_TRUE(EntryBr->isConditional()); 2913 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 2914 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 2915 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 2916 2917 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 2918 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 2919 2920 CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0)); 2921 EXPECT_EQ(MasterEntryCI->arg_size(), 2U); 2922 EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master"); 2923 EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0))); 2924 2925 CallInst *MasterEndCI = nullptr; 2926 for (auto &FI : *ThenBB) { 2927 Instruction *cur = &FI; 2928 if (isa<CallInst>(cur)) { 2929 MasterEndCI = cast<CallInst>(cur); 2930 if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master") 2931 break; 2932 MasterEndCI = nullptr; 2933 } 2934 } 2935 EXPECT_NE(MasterEndCI, nullptr); 2936 EXPECT_EQ(MasterEndCI->arg_size(), 2U); 2937 EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0))); 2938 EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1)); 2939 } 2940 2941 TEST_F(OpenMPIRBuilderTest, MaskedDirective) { 2942 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2943 OpenMPIRBuilder OMPBuilder(*M); 2944 OMPBuilder.initialize(); 2945 F->setName("func"); 2946 IRBuilder<> Builder(BB); 2947 2948 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2949 2950 AllocaInst *PrivAI = nullptr; 2951 2952 BasicBlock *EntryBB = nullptr; 2953 BasicBlock *ThenBB = nullptr; 2954 2955 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2956 if (AllocaIP.isSet()) 2957 Builder.restoreIP(AllocaIP); 2958 else 2959 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 2960 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2961 Builder.CreateStore(F->arg_begin(), PrivAI); 2962 2963 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2964 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2965 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2966 2967 Builder.restoreIP(CodeGenIP); 2968 2969 // collect some info for checks later 2970 ThenBB = Builder.GetInsertBlock(); 2971 EntryBB = ThenBB->getUniquePredecessor(); 2972 2973 // simple instructions for body 2974 Value *PrivLoad = 2975 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2976 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2977 }; 2978 2979 auto FiniCB = [&](InsertPointTy IP) { 2980 BasicBlock *IPBB = IP.getBlock(); 2981 EXPECT_NE(IPBB->end(), IP.getPoint()); 2982 }; 2983 2984 Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); 2985 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 2986 OMPBuilder.createMasked(Builder, 2987 BODYGENCB_WRAPPER(BodyGenCB), 2988 FINICB_WRAPPER(FiniCB), Filter)); 2989 Builder.restoreIP(AfterIP); 2990 Value *EntryBBTI = EntryBB->getTerminator(); 2991 EXPECT_NE(EntryBBTI, nullptr); 2992 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 2993 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 2994 EXPECT_TRUE(EntryBr->isConditional()); 2995 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 2996 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 2997 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 2998 2999 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3000 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3001 3002 CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3003 EXPECT_EQ(MaskedEntryCI->arg_size(), 3U); 3004 EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked"); 3005 EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0))); 3006 3007 CallInst *MaskedEndCI = nullptr; 3008 for (auto &FI : *ThenBB) { 3009 Instruction *cur = &FI; 3010 if (isa<CallInst>(cur)) { 3011 MaskedEndCI = cast<CallInst>(cur); 3012 if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked") 3013 break; 3014 MaskedEndCI = nullptr; 3015 } 3016 } 3017 EXPECT_NE(MaskedEndCI, nullptr); 3018 EXPECT_EQ(MaskedEndCI->arg_size(), 2U); 3019 EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0))); 3020 EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1)); 3021 } 3022 3023 TEST_F(OpenMPIRBuilderTest, CriticalDirective) { 3024 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3025 OpenMPIRBuilder OMPBuilder(*M); 3026 OMPBuilder.initialize(); 3027 F->setName("func"); 3028 IRBuilder<> Builder(BB); 3029 3030 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3031 3032 AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3033 3034 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3035 // actual start for bodyCB 3036 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3037 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3038 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3039 3040 // body begin 3041 Builder.restoreIP(CodeGenIP); 3042 Builder.CreateStore(F->arg_begin(), PrivAI); 3043 Value *PrivLoad = 3044 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3045 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3046 }; 3047 3048 auto FiniCB = [&](InsertPointTy IP) { 3049 BasicBlock *IPBB = IP.getBlock(); 3050 EXPECT_NE(IPBB->end(), IP.getPoint()); 3051 }; 3052 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3053 3054 ASSERT_EXPECTED_INIT( 3055 OpenMPIRBuilder::InsertPointTy, AfterIP, 3056 OMPBuilder.createCritical(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3057 FINICB_WRAPPER(FiniCB), "testCRT", nullptr)); 3058 Builder.restoreIP(AfterIP); 3059 3060 CallInst *CriticalEntryCI = nullptr; 3061 for (auto &EI : *EntryBB) { 3062 Instruction *cur = &EI; 3063 if (isa<CallInst>(cur)) { 3064 CriticalEntryCI = cast<CallInst>(cur); 3065 if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical") 3066 break; 3067 CriticalEntryCI = nullptr; 3068 } 3069 } 3070 EXPECT_NE(CriticalEntryCI, nullptr); 3071 EXPECT_EQ(CriticalEntryCI->arg_size(), 3U); 3072 EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical"); 3073 EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0))); 3074 3075 CallInst *CriticalEndCI = nullptr; 3076 for (auto &FI : *EntryBB) { 3077 Instruction *cur = &FI; 3078 if (isa<CallInst>(cur)) { 3079 CriticalEndCI = cast<CallInst>(cur); 3080 if (CriticalEndCI->getCalledFunction()->getName() == 3081 "__kmpc_end_critical") 3082 break; 3083 CriticalEndCI = nullptr; 3084 } 3085 } 3086 EXPECT_NE(CriticalEndCI, nullptr); 3087 EXPECT_EQ(CriticalEndCI->arg_size(), 3U); 3088 EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0))); 3089 EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1)); 3090 PointerType *CriticalNamePtrTy = PointerType::getUnqual(Ctx); 3091 EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2)); 3092 GlobalVariable *GV = 3093 dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2)); 3094 ASSERT_NE(GV, nullptr); 3095 EXPECT_EQ(GV->getType(), CriticalNamePtrTy); 3096 const DataLayout &DL = M->getDataLayout(); 3097 const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy); 3098 const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace()); 3099 if (const llvm::MaybeAlign Alignment = GV->getAlign()) 3100 EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign)); 3101 } 3102 3103 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) { 3104 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3105 OpenMPIRBuilder OMPBuilder(*M); 3106 OMPBuilder.initialize(); 3107 F->setName("func"); 3108 IRBuilder<> Builder(BB); 3109 LLVMContext &Ctx = M->getContext(); 3110 3111 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3112 3113 InsertPointTy AllocaIP(&F->getEntryBlock(), 3114 F->getEntryBlock().getFirstInsertionPt()); 3115 3116 unsigned NumLoops = 2; 3117 SmallVector<Value *, 2> StoreValues; 3118 Type *LCTy = Type::getInt64Ty(Ctx); 3119 StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); 3120 StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); 3121 3122 // Test for "#omp ordered depend(source)" 3123 Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, 3124 StoreValues, ".cnt.addr", 3125 /*IsDependSource=*/true)); 3126 3127 Builder.CreateRetVoid(); 3128 OMPBuilder.finalize(); 3129 EXPECT_FALSE(verifyModule(*M, &errs())); 3130 3131 AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front()); 3132 ASSERT_NE(AllocInst, nullptr); 3133 ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType()); 3134 EXPECT_EQ(ArrType->getNumElements(), NumLoops); 3135 EXPECT_TRUE( 3136 AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); 3137 3138 Instruction *IterInst = dyn_cast<Instruction>(AllocInst); 3139 for (unsigned Iter = 0; Iter < NumLoops; Iter++) { 3140 GetElementPtrInst *DependAddrGEPIter = 3141 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3142 ASSERT_NE(DependAddrGEPIter, nullptr); 3143 EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); 3144 EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); 3145 auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1)); 3146 auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2)); 3147 ASSERT_NE(FirstIdx, nullptr); 3148 ASSERT_NE(SecondIdx, nullptr); 3149 EXPECT_EQ(FirstIdx->getValue(), 0); 3150 EXPECT_EQ(SecondIdx->getValue(), Iter); 3151 StoreInst *StoreValue = 3152 dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode()); 3153 ASSERT_NE(StoreValue, nullptr); 3154 EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); 3155 EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); 3156 EXPECT_EQ(StoreValue->getAlign(), Align(8)); 3157 IterInst = dyn_cast<Instruction>(StoreValue); 3158 } 3159 3160 GetElementPtrInst *DependBaseAddrGEP = 3161 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3162 ASSERT_NE(DependBaseAddrGEP, nullptr); 3163 EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); 3164 EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); 3165 auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1)); 3166 auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2)); 3167 ASSERT_NE(FirstIdx, nullptr); 3168 ASSERT_NE(SecondIdx, nullptr); 3169 EXPECT_EQ(FirstIdx->getValue(), 0); 3170 EXPECT_EQ(SecondIdx->getValue(), 0); 3171 3172 CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode()); 3173 ASSERT_NE(GTID, nullptr); 3174 EXPECT_EQ(GTID->arg_size(), 1U); 3175 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 3176 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 3177 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 3178 3179 CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode()); 3180 ASSERT_NE(Depend, nullptr); 3181 EXPECT_EQ(Depend->arg_size(), 3U); 3182 EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post"); 3183 EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0))); 3184 EXPECT_EQ(Depend->getArgOperand(1), GTID); 3185 EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); 3186 } 3187 3188 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) { 3189 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3190 OpenMPIRBuilder OMPBuilder(*M); 3191 OMPBuilder.initialize(); 3192 F->setName("func"); 3193 IRBuilder<> Builder(BB); 3194 LLVMContext &Ctx = M->getContext(); 3195 3196 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3197 3198 InsertPointTy AllocaIP(&F->getEntryBlock(), 3199 F->getEntryBlock().getFirstInsertionPt()); 3200 3201 unsigned NumLoops = 2; 3202 SmallVector<Value *, 2> StoreValues; 3203 Type *LCTy = Type::getInt64Ty(Ctx); 3204 StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); 3205 StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); 3206 3207 // Test for "#omp ordered depend(sink: vec)" 3208 Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, 3209 StoreValues, ".cnt.addr", 3210 /*IsDependSource=*/false)); 3211 3212 Builder.CreateRetVoid(); 3213 OMPBuilder.finalize(); 3214 EXPECT_FALSE(verifyModule(*M, &errs())); 3215 3216 AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front()); 3217 ASSERT_NE(AllocInst, nullptr); 3218 ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType()); 3219 EXPECT_EQ(ArrType->getNumElements(), NumLoops); 3220 EXPECT_TRUE( 3221 AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); 3222 3223 Instruction *IterInst = dyn_cast<Instruction>(AllocInst); 3224 for (unsigned Iter = 0; Iter < NumLoops; Iter++) { 3225 GetElementPtrInst *DependAddrGEPIter = 3226 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3227 ASSERT_NE(DependAddrGEPIter, nullptr); 3228 EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); 3229 EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); 3230 auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1)); 3231 auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2)); 3232 ASSERT_NE(FirstIdx, nullptr); 3233 ASSERT_NE(SecondIdx, nullptr); 3234 EXPECT_EQ(FirstIdx->getValue(), 0); 3235 EXPECT_EQ(SecondIdx->getValue(), Iter); 3236 StoreInst *StoreValue = 3237 dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode()); 3238 ASSERT_NE(StoreValue, nullptr); 3239 EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); 3240 EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); 3241 EXPECT_EQ(StoreValue->getAlign(), Align(8)); 3242 IterInst = dyn_cast<Instruction>(StoreValue); 3243 } 3244 3245 GetElementPtrInst *DependBaseAddrGEP = 3246 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3247 ASSERT_NE(DependBaseAddrGEP, nullptr); 3248 EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); 3249 EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); 3250 auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1)); 3251 auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2)); 3252 ASSERT_NE(FirstIdx, nullptr); 3253 ASSERT_NE(SecondIdx, nullptr); 3254 EXPECT_EQ(FirstIdx->getValue(), 0); 3255 EXPECT_EQ(SecondIdx->getValue(), 0); 3256 3257 CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode()); 3258 ASSERT_NE(GTID, nullptr); 3259 EXPECT_EQ(GTID->arg_size(), 1U); 3260 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 3261 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 3262 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 3263 3264 CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode()); 3265 ASSERT_NE(Depend, nullptr); 3266 EXPECT_EQ(Depend->arg_size(), 3U); 3267 EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait"); 3268 EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0))); 3269 EXPECT_EQ(Depend->getArgOperand(1), GTID); 3270 EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); 3271 } 3272 3273 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { 3274 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3275 OpenMPIRBuilder OMPBuilder(*M); 3276 OMPBuilder.initialize(); 3277 F->setName("func"); 3278 IRBuilder<> Builder(BB); 3279 3280 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3281 3282 AllocaInst *PrivAI = 3283 Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); 3284 3285 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3286 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3287 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3288 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3289 3290 Builder.restoreIP(CodeGenIP); 3291 Builder.CreateStore(F->arg_begin(), PrivAI); 3292 Value *PrivLoad = 3293 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3294 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3295 }; 3296 3297 auto FiniCB = [&](InsertPointTy IP) { 3298 BasicBlock *IPBB = IP.getBlock(); 3299 EXPECT_NE(IPBB->end(), IP.getPoint()); 3300 }; 3301 3302 // Test for "#omp ordered [threads]" 3303 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3304 ASSERT_EXPECTED_INIT( 3305 OpenMPIRBuilder::InsertPointTy, AfterIP, 3306 OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3307 FINICB_WRAPPER(FiniCB), true)); 3308 Builder.restoreIP(AfterIP); 3309 3310 Builder.CreateRetVoid(); 3311 OMPBuilder.finalize(); 3312 EXPECT_FALSE(verifyModule(*M, &errs())); 3313 3314 EXPECT_NE(EntryBB->getTerminator(), nullptr); 3315 3316 CallInst *OrderedEntryCI = nullptr; 3317 for (auto &EI : *EntryBB) { 3318 Instruction *Cur = &EI; 3319 if (isa<CallInst>(Cur)) { 3320 OrderedEntryCI = cast<CallInst>(Cur); 3321 if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") 3322 break; 3323 OrderedEntryCI = nullptr; 3324 } 3325 } 3326 EXPECT_NE(OrderedEntryCI, nullptr); 3327 EXPECT_EQ(OrderedEntryCI->arg_size(), 2U); 3328 EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered"); 3329 EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0))); 3330 3331 CallInst *OrderedEndCI = nullptr; 3332 for (auto &FI : *EntryBB) { 3333 Instruction *Cur = &FI; 3334 if (isa<CallInst>(Cur)) { 3335 OrderedEndCI = cast<CallInst>(Cur); 3336 if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") 3337 break; 3338 OrderedEndCI = nullptr; 3339 } 3340 } 3341 EXPECT_NE(OrderedEndCI, nullptr); 3342 EXPECT_EQ(OrderedEndCI->arg_size(), 2U); 3343 EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0))); 3344 EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1)); 3345 } 3346 3347 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { 3348 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3349 OpenMPIRBuilder OMPBuilder(*M); 3350 OMPBuilder.initialize(); 3351 F->setName("func"); 3352 IRBuilder<> Builder(BB); 3353 3354 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3355 3356 AllocaInst *PrivAI = 3357 Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); 3358 3359 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3360 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3361 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3362 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3363 3364 Builder.restoreIP(CodeGenIP); 3365 Builder.CreateStore(F->arg_begin(), PrivAI); 3366 Value *PrivLoad = 3367 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3368 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3369 }; 3370 3371 auto FiniCB = [&](InsertPointTy IP) { 3372 BasicBlock *IPBB = IP.getBlock(); 3373 EXPECT_NE(IPBB->end(), IP.getPoint()); 3374 }; 3375 3376 // Test for "#omp ordered simd" 3377 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3378 ASSERT_EXPECTED_INIT( 3379 OpenMPIRBuilder::InsertPointTy, AfterIP, 3380 OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3381 FINICB_WRAPPER(FiniCB), false)); 3382 Builder.restoreIP(AfterIP); 3383 3384 Builder.CreateRetVoid(); 3385 OMPBuilder.finalize(); 3386 EXPECT_FALSE(verifyModule(*M, &errs())); 3387 3388 EXPECT_NE(EntryBB->getTerminator(), nullptr); 3389 3390 CallInst *OrderedEntryCI = nullptr; 3391 for (auto &EI : *EntryBB) { 3392 Instruction *Cur = &EI; 3393 if (isa<CallInst>(Cur)) { 3394 OrderedEntryCI = cast<CallInst>(Cur); 3395 if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") 3396 break; 3397 OrderedEntryCI = nullptr; 3398 } 3399 } 3400 EXPECT_EQ(OrderedEntryCI, nullptr); 3401 3402 CallInst *OrderedEndCI = nullptr; 3403 for (auto &FI : *EntryBB) { 3404 Instruction *Cur = &FI; 3405 if (isa<CallInst>(Cur)) { 3406 OrderedEndCI = cast<CallInst>(Cur); 3407 if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") 3408 break; 3409 OrderedEndCI = nullptr; 3410 } 3411 } 3412 EXPECT_EQ(OrderedEndCI, nullptr); 3413 } 3414 3415 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { 3416 OpenMPIRBuilder OMPBuilder(*M); 3417 OMPBuilder.initialize(); 3418 F->setName("func"); 3419 IRBuilder<> Builder(BB); 3420 3421 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3422 3423 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3424 AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy()); 3425 AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy()); 3426 3427 BasicBlock *EntryBB = BB; 3428 3429 OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress, 3430 PrivAddress, Int32, /*BranchtoEnd*/ true); 3431 3432 BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator()); 3433 3434 EXPECT_NE(EntryBr, nullptr); 3435 EXPECT_TRUE(EntryBr->isConditional()); 3436 3437 BasicBlock *NotMasterBB = EntryBr->getSuccessor(0); 3438 BasicBlock *CopyinEnd = EntryBr->getSuccessor(1); 3439 CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition()); 3440 3441 EXPECT_NE(CMP, nullptr); 3442 EXPECT_NE(NotMasterBB, nullptr); 3443 EXPECT_NE(CopyinEnd, nullptr); 3444 3445 BranchInst *NotMasterBr = 3446 dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator()); 3447 EXPECT_NE(NotMasterBr, nullptr); 3448 EXPECT_FALSE(NotMasterBr->isConditional()); 3449 EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0)); 3450 } 3451 3452 TEST_F(OpenMPIRBuilderTest, SingleDirective) { 3453 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3454 OpenMPIRBuilder OMPBuilder(*M); 3455 OMPBuilder.initialize(); 3456 F->setName("func"); 3457 IRBuilder<> Builder(BB); 3458 3459 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3460 3461 AllocaInst *PrivAI = nullptr; 3462 3463 BasicBlock *EntryBB = nullptr; 3464 BasicBlock *ThenBB = nullptr; 3465 3466 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3467 if (AllocaIP.isSet()) 3468 Builder.restoreIP(AllocaIP); 3469 else 3470 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3471 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3472 Builder.CreateStore(F->arg_begin(), PrivAI); 3473 3474 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3475 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3476 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3477 3478 Builder.restoreIP(CodeGenIP); 3479 3480 // collect some info for checks later 3481 ThenBB = Builder.GetInsertBlock(); 3482 EntryBB = ThenBB->getUniquePredecessor(); 3483 3484 // simple instructions for body 3485 Value *PrivLoad = 3486 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3487 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3488 }; 3489 3490 auto FiniCB = [&](InsertPointTy IP) { 3491 BasicBlock *IPBB = IP.getBlock(); 3492 EXPECT_NE(IPBB->end(), IP.getPoint()); 3493 }; 3494 3495 ASSERT_EXPECTED_INIT( 3496 OpenMPIRBuilder::InsertPointTy, AfterIP, 3497 OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3498 FINICB_WRAPPER(FiniCB), /*IsNowait*/ false)); 3499 Builder.restoreIP(AfterIP); 3500 Value *EntryBBTI = EntryBB->getTerminator(); 3501 EXPECT_NE(EntryBBTI, nullptr); 3502 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3503 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3504 EXPECT_TRUE(EntryBr->isConditional()); 3505 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3506 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3507 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3508 3509 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3510 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3511 3512 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3513 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3514 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3515 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3516 3517 CallInst *SingleEndCI = nullptr; 3518 for (auto &FI : *ThenBB) { 3519 Instruction *cur = &FI; 3520 if (isa<CallInst>(cur)) { 3521 SingleEndCI = cast<CallInst>(cur); 3522 if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") 3523 break; 3524 SingleEndCI = nullptr; 3525 } 3526 } 3527 EXPECT_NE(SingleEndCI, nullptr); 3528 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3529 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3530 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3531 3532 bool FoundBarrier = false; 3533 for (auto &FI : *ExitBB) { 3534 Instruction *cur = &FI; 3535 if (auto CI = dyn_cast<CallInst>(cur)) { 3536 if (CI->getCalledFunction()->getName() == "__kmpc_barrier") { 3537 FoundBarrier = true; 3538 break; 3539 } 3540 } 3541 } 3542 EXPECT_TRUE(FoundBarrier); 3543 } 3544 3545 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { 3546 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3547 OpenMPIRBuilder OMPBuilder(*M); 3548 OMPBuilder.initialize(); 3549 F->setName("func"); 3550 IRBuilder<> Builder(BB); 3551 3552 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3553 3554 AllocaInst *PrivAI = nullptr; 3555 3556 BasicBlock *EntryBB = nullptr; 3557 BasicBlock *ThenBB = nullptr; 3558 3559 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3560 if (AllocaIP.isSet()) 3561 Builder.restoreIP(AllocaIP); 3562 else 3563 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3564 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3565 Builder.CreateStore(F->arg_begin(), PrivAI); 3566 3567 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3568 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3569 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3570 3571 Builder.restoreIP(CodeGenIP); 3572 3573 // collect some info for checks later 3574 ThenBB = Builder.GetInsertBlock(); 3575 EntryBB = ThenBB->getUniquePredecessor(); 3576 3577 // simple instructions for body 3578 Value *PrivLoad = 3579 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3580 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3581 }; 3582 3583 auto FiniCB = [&](InsertPointTy IP) { 3584 BasicBlock *IPBB = IP.getBlock(); 3585 EXPECT_NE(IPBB->end(), IP.getPoint()); 3586 }; 3587 3588 ASSERT_EXPECTED_INIT( 3589 OpenMPIRBuilder::InsertPointTy, AfterIP, 3590 OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3591 FINICB_WRAPPER(FiniCB), /*IsNowait*/ true)); 3592 Builder.restoreIP(AfterIP); 3593 Value *EntryBBTI = EntryBB->getTerminator(); 3594 EXPECT_NE(EntryBBTI, nullptr); 3595 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3596 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3597 EXPECT_TRUE(EntryBr->isConditional()); 3598 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3599 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3600 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3601 3602 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3603 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3604 3605 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3606 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3607 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3608 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3609 3610 CallInst *SingleEndCI = nullptr; 3611 for (auto &FI : *ThenBB) { 3612 Instruction *cur = &FI; 3613 if (isa<CallInst>(cur)) { 3614 SingleEndCI = cast<CallInst>(cur); 3615 if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") 3616 break; 3617 SingleEndCI = nullptr; 3618 } 3619 } 3620 EXPECT_NE(SingleEndCI, nullptr); 3621 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3622 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3623 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3624 3625 CallInst *ExitBarrier = nullptr; 3626 for (auto &FI : *ExitBB) { 3627 Instruction *cur = &FI; 3628 if (auto CI = dyn_cast<CallInst>(cur)) { 3629 if (CI->getCalledFunction()->getName() == "__kmpc_barrier") { 3630 ExitBarrier = CI; 3631 break; 3632 } 3633 } 3634 } 3635 EXPECT_EQ(ExitBarrier, nullptr); 3636 } 3637 3638 // Helper class to check each instruction of a BB. 3639 class BBInstIter { 3640 BasicBlock *BB; 3641 BasicBlock::iterator BBI; 3642 3643 public: 3644 BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {} 3645 3646 bool hasNext() const { return BBI != BB->end(); } 3647 3648 template <typename InstTy> InstTy *next() { 3649 if (!hasNext()) 3650 return nullptr; 3651 Instruction *Cur = &*BBI++; 3652 if (!isa<InstTy>(Cur)) 3653 return nullptr; 3654 return cast<InstTy>(Cur); 3655 } 3656 }; 3657 3658 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { 3659 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3660 OpenMPIRBuilder OMPBuilder(*M); 3661 OMPBuilder.initialize(); 3662 F->setName("func"); 3663 IRBuilder<> Builder(BB); 3664 3665 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3666 3667 AllocaInst *PrivAI = nullptr; 3668 3669 BasicBlock *EntryBB = nullptr; 3670 BasicBlock *ThenBB = nullptr; 3671 3672 Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType()); 3673 Builder.CreateStore(F->arg_begin(), CPVar); 3674 3675 FunctionType *CopyFuncTy = FunctionType::get( 3676 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false); 3677 Function *CopyFunc = 3678 Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M); 3679 3680 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3681 if (AllocaIP.isSet()) 3682 Builder.restoreIP(AllocaIP); 3683 else 3684 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3685 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3686 Builder.CreateStore(F->arg_begin(), PrivAI); 3687 3688 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3689 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3690 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3691 3692 Builder.restoreIP(CodeGenIP); 3693 3694 // collect some info for checks later 3695 ThenBB = Builder.GetInsertBlock(); 3696 EntryBB = ThenBB->getUniquePredecessor(); 3697 3698 // simple instructions for body 3699 Value *PrivLoad = 3700 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3701 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3702 }; 3703 3704 auto FiniCB = [&](InsertPointTy IP) { 3705 BasicBlock *IPBB = IP.getBlock(); 3706 // IP must be before the unconditional branch to ExitBB 3707 EXPECT_NE(IPBB->end(), IP.getPoint()); 3708 }; 3709 3710 ASSERT_EXPECTED_INIT( 3711 OpenMPIRBuilder::InsertPointTy, AfterIP, 3712 OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3713 FINICB_WRAPPER(FiniCB), 3714 /*IsNowait*/ false, {CPVar}, {CopyFunc})); 3715 Builder.restoreIP(AfterIP); 3716 Value *EntryBBTI = EntryBB->getTerminator(); 3717 EXPECT_NE(EntryBBTI, nullptr); 3718 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3719 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3720 EXPECT_TRUE(EntryBr->isConditional()); 3721 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3722 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3723 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3724 3725 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3726 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3727 3728 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3729 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3730 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3731 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3732 3733 // check ThenBB 3734 BBInstIter ThenBBI(ThenBB); 3735 // load PrivAI 3736 auto *PrivLI = ThenBBI.next<LoadInst>(); 3737 EXPECT_NE(PrivLI, nullptr); 3738 EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI); 3739 // icmp 3740 EXPECT_TRUE(ThenBBI.next<ICmpInst>()); 3741 // store 1, DidIt 3742 auto *DidItSI = ThenBBI.next<StoreInst>(); 3743 EXPECT_NE(DidItSI, nullptr); 3744 EXPECT_EQ(DidItSI->getValueOperand(), 3745 ConstantInt::get(Type::getInt32Ty(Ctx), 1)); 3746 Value *DidIt = DidItSI->getPointerOperand(); 3747 // call __kmpc_end_single 3748 auto *SingleEndCI = ThenBBI.next<CallInst>(); 3749 EXPECT_NE(SingleEndCI, nullptr); 3750 EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single"); 3751 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3752 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3753 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3754 // br ExitBB 3755 auto *ExitBBBI = ThenBBI.next<BranchInst>(); 3756 EXPECT_NE(ExitBBBI, nullptr); 3757 EXPECT_TRUE(ExitBBBI->isUnconditional()); 3758 EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB); 3759 EXPECT_FALSE(ThenBBI.hasNext()); 3760 3761 // check ExitBB 3762 BBInstIter ExitBBI(ExitBB); 3763 // call __kmpc_global_thread_num 3764 auto *ThreadNumCI = ExitBBI.next<CallInst>(); 3765 EXPECT_NE(ThreadNumCI, nullptr); 3766 EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(), 3767 "__kmpc_global_thread_num"); 3768 // load DidIt 3769 auto *DidItLI = ExitBBI.next<LoadInst>(); 3770 EXPECT_NE(DidItLI, nullptr); 3771 EXPECT_EQ(DidItLI->getPointerOperand(), DidIt); 3772 // call __kmpc_copyprivate 3773 auto *CopyPrivateCI = ExitBBI.next<CallInst>(); 3774 EXPECT_NE(CopyPrivateCI, nullptr); 3775 EXPECT_EQ(CopyPrivateCI->arg_size(), 6U); 3776 EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3))); 3777 EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar); 3778 EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4))); 3779 EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc); 3780 EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5))); 3781 DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5)); 3782 EXPECT_EQ(DidItLI->getOperand(0), DidIt); 3783 EXPECT_FALSE(ExitBBI.hasNext()); 3784 } 3785 3786 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) { 3787 OpenMPIRBuilder OMPBuilder(*M); 3788 OMPBuilder.initialize(); 3789 F->setName("func"); 3790 IRBuilder<> Builder(BB); 3791 3792 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3793 3794 Type *Float32 = Type::getFloatTy(M->getContext()); 3795 AllocaInst *XVal = Builder.CreateAlloca(Float32); 3796 XVal->setName("AtomicVar"); 3797 AllocaInst *VVal = Builder.CreateAlloca(Float32); 3798 VVal->setName("AtomicRead"); 3799 AtomicOrdering AO = AtomicOrdering::Monotonic; 3800 OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false}; 3801 OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false}; 3802 3803 Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO)); 3804 3805 IntegerType *IntCastTy = 3806 IntegerType::get(M->getContext(), Float32->getScalarSizeInBits()); 3807 3808 LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode()); 3809 EXPECT_TRUE(AtomicLoad->isAtomic()); 3810 EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal); 3811 3812 BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode()); 3813 EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy); 3814 EXPECT_EQ(CastToFlt->getDestTy(), Float32); 3815 EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad); 3816 3817 StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode()); 3818 EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt); 3819 EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal); 3820 3821 Builder.CreateRetVoid(); 3822 OMPBuilder.finalize(); 3823 EXPECT_FALSE(verifyModule(*M, &errs())); 3824 } 3825 3826 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) { 3827 OpenMPIRBuilder OMPBuilder(*M); 3828 OMPBuilder.initialize(); 3829 F->setName("func"); 3830 IRBuilder<> Builder(BB); 3831 3832 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3833 3834 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3835 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3836 XVal->setName("AtomicVar"); 3837 AllocaInst *VVal = Builder.CreateAlloca(Int32); 3838 VVal->setName("AtomicRead"); 3839 AtomicOrdering AO = AtomicOrdering::Monotonic; 3840 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3841 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 3842 3843 BasicBlock *EntryBB = BB; 3844 3845 Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO)); 3846 LoadInst *AtomicLoad = nullptr; 3847 StoreInst *StoreofAtomic = nullptr; 3848 3849 for (Instruction &Cur : *EntryBB) { 3850 if (isa<LoadInst>(Cur)) { 3851 AtomicLoad = cast<LoadInst>(&Cur); 3852 if (AtomicLoad->getPointerOperand() == XVal) 3853 continue; 3854 AtomicLoad = nullptr; 3855 } else if (isa<StoreInst>(Cur)) { 3856 StoreofAtomic = cast<StoreInst>(&Cur); 3857 if (StoreofAtomic->getPointerOperand() == VVal) 3858 continue; 3859 StoreofAtomic = nullptr; 3860 } 3861 } 3862 3863 EXPECT_NE(AtomicLoad, nullptr); 3864 EXPECT_TRUE(AtomicLoad->isAtomic()); 3865 3866 EXPECT_NE(StoreofAtomic, nullptr); 3867 EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad); 3868 3869 Builder.CreateRetVoid(); 3870 OMPBuilder.finalize(); 3871 3872 EXPECT_FALSE(verifyModule(*M, &errs())); 3873 } 3874 3875 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) { 3876 OpenMPIRBuilder OMPBuilder(*M); 3877 OMPBuilder.initialize(); 3878 F->setName("func"); 3879 IRBuilder<> Builder(BB); 3880 3881 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3882 3883 LLVMContext &Ctx = M->getContext(); 3884 Type *Float32 = Type::getFloatTy(Ctx); 3885 AllocaInst *XVal = Builder.CreateAlloca(Float32); 3886 XVal->setName("AtomicVar"); 3887 OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false}; 3888 AtomicOrdering AO = AtomicOrdering::Monotonic; 3889 Constant *ValToWrite = ConstantFP::get(Float32, 1.0); 3890 3891 Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO)); 3892 3893 IntegerType *IntCastTy = 3894 IntegerType::get(M->getContext(), Float32->getScalarSizeInBits()); 3895 3896 Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy); 3897 3898 StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode()); 3899 EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast); 3900 EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal); 3901 EXPECT_TRUE(StoreofAtomic->isAtomic()); 3902 3903 Builder.CreateRetVoid(); 3904 OMPBuilder.finalize(); 3905 EXPECT_FALSE(verifyModule(*M, &errs())); 3906 } 3907 3908 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) { 3909 OpenMPIRBuilder OMPBuilder(*M); 3910 OMPBuilder.initialize(); 3911 F->setName("func"); 3912 IRBuilder<> Builder(BB); 3913 3914 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3915 3916 LLVMContext &Ctx = M->getContext(); 3917 IntegerType *Int32 = Type::getInt32Ty(Ctx); 3918 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3919 XVal->setName("AtomicVar"); 3920 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3921 AtomicOrdering AO = AtomicOrdering::Monotonic; 3922 ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3923 3924 BasicBlock *EntryBB = BB; 3925 3926 Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO)); 3927 3928 StoreInst *StoreofAtomic = nullptr; 3929 3930 for (Instruction &Cur : *EntryBB) { 3931 if (isa<StoreInst>(Cur)) { 3932 StoreofAtomic = cast<StoreInst>(&Cur); 3933 if (StoreofAtomic->getPointerOperand() == XVal) 3934 continue; 3935 StoreofAtomic = nullptr; 3936 } 3937 } 3938 3939 EXPECT_NE(StoreofAtomic, nullptr); 3940 EXPECT_TRUE(StoreofAtomic->isAtomic()); 3941 EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite); 3942 3943 Builder.CreateRetVoid(); 3944 OMPBuilder.finalize(); 3945 EXPECT_FALSE(verifyModule(*M, &errs())); 3946 } 3947 3948 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) { 3949 OpenMPIRBuilder OMPBuilder(*M); 3950 OMPBuilder.initialize(); 3951 F->setName("func"); 3952 IRBuilder<> Builder(BB); 3953 3954 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3955 3956 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3957 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3958 XVal->setName("AtomicVar"); 3959 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 3960 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3961 AtomicOrdering AO = AtomicOrdering::Monotonic; 3962 ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3963 Value *Expr = nullptr; 3964 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub; 3965 bool IsXLHSInRHSPart = false; 3966 3967 BasicBlock *EntryBB = BB; 3968 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 3969 EntryBB->getFirstInsertionPt()); 3970 Value *Sub = nullptr; 3971 3972 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 3973 Sub = IRB.CreateSub(ConstVal, Atomic); 3974 return Sub; 3975 }; 3976 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 3977 OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr, 3978 AO, RMWOp, UpdateOp, 3979 IsXLHSInRHSPart)); 3980 Builder.restoreIP(AfterIP); 3981 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 3982 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 3983 EXPECT_NE(ContTI, nullptr); 3984 BasicBlock *EndBB = ContTI->getSuccessor(0); 3985 EXPECT_TRUE(ContTI->isConditional()); 3986 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 3987 EXPECT_NE(EndBB, nullptr); 3988 3989 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 3990 EXPECT_NE(Phi, nullptr); 3991 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 3992 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 3993 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 3994 3995 EXPECT_EQ(Sub->getNumUses(), 1U); 3996 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 3997 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 3998 3999 ExtractValueInst *ExVI1 = 4000 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 4001 EXPECT_NE(ExVI1, nullptr); 4002 AtomicCmpXchgInst *CmpExchg = 4003 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 4004 EXPECT_NE(CmpExchg, nullptr); 4005 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 4006 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 4007 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 4008 4009 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 4010 EXPECT_NE(Ld, nullptr); 4011 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 4012 4013 Builder.CreateRetVoid(); 4014 OMPBuilder.finalize(); 4015 EXPECT_FALSE(verifyModule(*M, &errs())); 4016 } 4017 4018 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) { 4019 OpenMPIRBuilder OMPBuilder(*M); 4020 OMPBuilder.initialize(); 4021 F->setName("func"); 4022 IRBuilder<> Builder(BB); 4023 4024 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4025 4026 Type *FloatTy = Type::getFloatTy(M->getContext()); 4027 AllocaInst *XVal = Builder.CreateAlloca(FloatTy); 4028 XVal->setName("AtomicVar"); 4029 Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal); 4030 OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false}; 4031 AtomicOrdering AO = AtomicOrdering::Monotonic; 4032 Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0); 4033 Value *Expr = nullptr; 4034 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub; 4035 bool IsXLHSInRHSPart = false; 4036 4037 BasicBlock *EntryBB = BB; 4038 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 4039 EntryBB->getFirstInsertionPt()); 4040 Value *Sub = nullptr; 4041 4042 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 4043 Sub = IRB.CreateFSub(ConstVal, Atomic); 4044 return Sub; 4045 }; 4046 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4047 OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr, 4048 AO, RMWOp, UpdateOp, 4049 IsXLHSInRHSPart)); 4050 Builder.restoreIP(AfterIP); 4051 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 4052 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 4053 EXPECT_NE(ContTI, nullptr); 4054 BasicBlock *EndBB = ContTI->getSuccessor(0); 4055 EXPECT_TRUE(ContTI->isConditional()); 4056 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 4057 EXPECT_NE(EndBB, nullptr); 4058 4059 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 4060 EXPECT_NE(Phi, nullptr); 4061 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 4062 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 4063 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 4064 4065 EXPECT_EQ(Sub->getNumUses(), 1U); 4066 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 4067 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 4068 4069 ExtractValueInst *ExVI1 = 4070 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 4071 EXPECT_NE(ExVI1, nullptr); 4072 AtomicCmpXchgInst *CmpExchg = 4073 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 4074 EXPECT_NE(CmpExchg, nullptr); 4075 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 4076 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 4077 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 4078 4079 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 4080 EXPECT_NE(Ld, nullptr); 4081 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 4082 Builder.CreateRetVoid(); 4083 OMPBuilder.finalize(); 4084 EXPECT_FALSE(verifyModule(*M, &errs())); 4085 } 4086 4087 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) { 4088 OpenMPIRBuilder OMPBuilder(*M); 4089 OMPBuilder.initialize(); 4090 F->setName("func"); 4091 IRBuilder<> Builder(BB); 4092 4093 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4094 4095 Type *IntTy = Type::getInt32Ty(M->getContext()); 4096 AllocaInst *XVal = Builder.CreateAlloca(IntTy); 4097 XVal->setName("AtomicVar"); 4098 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal); 4099 OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false}; 4100 AtomicOrdering AO = AtomicOrdering::Monotonic; 4101 Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1); 4102 Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1); 4103 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax; 4104 bool IsXLHSInRHSPart = false; 4105 4106 BasicBlock *EntryBB = BB; 4107 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 4108 EntryBB->getFirstInsertionPt()); 4109 Value *Sub = nullptr; 4110 4111 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 4112 Sub = IRB.CreateSub(ConstVal, Atomic); 4113 return Sub; 4114 }; 4115 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4116 OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr, 4117 AO, RMWOp, UpdateOp, 4118 IsXLHSInRHSPart)); 4119 Builder.restoreIP(AfterIP); 4120 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 4121 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 4122 EXPECT_NE(ContTI, nullptr); 4123 BasicBlock *EndBB = ContTI->getSuccessor(0); 4124 EXPECT_TRUE(ContTI->isConditional()); 4125 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 4126 EXPECT_NE(EndBB, nullptr); 4127 4128 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 4129 EXPECT_NE(Phi, nullptr); 4130 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 4131 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 4132 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 4133 4134 EXPECT_EQ(Sub->getNumUses(), 1U); 4135 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 4136 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 4137 4138 ExtractValueInst *ExVI1 = 4139 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 4140 EXPECT_NE(ExVI1, nullptr); 4141 AtomicCmpXchgInst *CmpExchg = 4142 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 4143 EXPECT_NE(CmpExchg, nullptr); 4144 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 4145 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 4146 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 4147 4148 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 4149 EXPECT_NE(Ld, nullptr); 4150 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 4151 4152 Builder.CreateRetVoid(); 4153 OMPBuilder.finalize(); 4154 EXPECT_FALSE(verifyModule(*M, &errs())); 4155 } 4156 4157 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) { 4158 OpenMPIRBuilder OMPBuilder(*M); 4159 OMPBuilder.initialize(); 4160 F->setName("func"); 4161 IRBuilder<> Builder(BB); 4162 4163 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4164 4165 LLVMContext &Ctx = M->getContext(); 4166 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4167 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4168 XVal->setName("AtomicVar"); 4169 AllocaInst *VVal = Builder.CreateAlloca(Int32); 4170 VVal->setName("AtomicCapTar"); 4171 StoreInst *Init = 4172 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4173 4174 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 4175 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 4176 AtomicOrdering AO = AtomicOrdering::Monotonic; 4177 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4178 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add; 4179 bool IsXLHSInRHSPart = true; 4180 bool IsPostfixUpdate = true; 4181 bool UpdateExpr = true; 4182 4183 BasicBlock *EntryBB = BB; 4184 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 4185 EntryBB->getFirstInsertionPt()); 4186 4187 // integer update - not used 4188 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; }; 4189 4190 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4191 OMPBuilder.createAtomicCapture( 4192 Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, 4193 UpdateExpr, IsPostfixUpdate, IsXLHSInRHSPart)); 4194 Builder.restoreIP(AfterIP); 4195 EXPECT_EQ(EntryBB->getParent()->size(), 1U); 4196 AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode()); 4197 EXPECT_NE(ARWM, nullptr); 4198 EXPECT_EQ(ARWM->getPointerOperand(), XVal); 4199 EXPECT_EQ(ARWM->getOperation(), RMWOp); 4200 StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back()); 4201 EXPECT_NE(St, nullptr); 4202 EXPECT_EQ(St->getPointerOperand(), VVal); 4203 4204 Builder.CreateRetVoid(); 4205 OMPBuilder.finalize(); 4206 EXPECT_FALSE(verifyModule(*M, &errs())); 4207 } 4208 4209 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) { 4210 OpenMPIRBuilder OMPBuilder(*M); 4211 OMPBuilder.initialize(); 4212 F->setName("func"); 4213 IRBuilder<> Builder(BB); 4214 4215 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4216 4217 LLVMContext &Ctx = M->getContext(); 4218 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4219 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4220 XVal->setName("x"); 4221 StoreInst *Init = 4222 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4223 4224 OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false}; 4225 OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false}; 4226 // V and R are not used in atomic compare 4227 OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false}; 4228 OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false}; 4229 AtomicOrdering AO = AtomicOrdering::Monotonic; 4230 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4231 ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4232 OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX; 4233 OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ; 4234 4235 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4236 Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false)); 4237 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4238 Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false)); 4239 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4240 Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false)); 4241 4242 BasicBlock *EntryBB = BB; 4243 EXPECT_EQ(EntryBB->getParent()->size(), 1U); 4244 EXPECT_EQ(EntryBB->size(), 5U); 4245 4246 AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode()); 4247 EXPECT_NE(ARWM1, nullptr); 4248 EXPECT_EQ(ARWM1->getPointerOperand(), XVal); 4249 EXPECT_EQ(ARWM1->getValOperand(), Expr); 4250 EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min); 4251 4252 AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode()); 4253 EXPECT_NE(ARWM2, nullptr); 4254 EXPECT_EQ(ARWM2->getPointerOperand(), XVal); 4255 EXPECT_EQ(ARWM2->getValOperand(), Expr); 4256 EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax); 4257 4258 AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode()); 4259 EXPECT_NE(AXCHG, nullptr); 4260 EXPECT_EQ(AXCHG->getPointerOperand(), XVal); 4261 EXPECT_EQ(AXCHG->getCompareOperand(), Expr); 4262 EXPECT_EQ(AXCHG->getNewValOperand(), D); 4263 4264 Builder.CreateRetVoid(); 4265 OMPBuilder.finalize(); 4266 EXPECT_FALSE(verifyModule(*M, &errs())); 4267 } 4268 4269 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) { 4270 OpenMPIRBuilder OMPBuilder(*M); 4271 OMPBuilder.initialize(); 4272 F->setName("func"); 4273 IRBuilder<> Builder(BB); 4274 4275 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4276 4277 LLVMContext &Ctx = M->getContext(); 4278 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4279 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4280 XVal->setName("x"); 4281 AllocaInst *VVal = Builder.CreateAlloca(Int32); 4282 VVal->setName("v"); 4283 AllocaInst *RVal = Builder.CreateAlloca(Int32); 4284 RVal->setName("r"); 4285 4286 StoreInst *Init = 4287 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4288 4289 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false}; 4290 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 4291 OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false}; 4292 OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false}; 4293 OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false}; 4294 4295 AtomicOrdering AO = AtomicOrdering::Monotonic; 4296 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4297 ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4298 OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX; 4299 OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ; 4300 4301 // { cond-update-stmt v = x; } 4302 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4303 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4304 /* IsPostfixUpdate */ false, 4305 /* IsFailOnly */ false)); 4306 // { v = x; cond-update-stmt } 4307 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4308 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4309 /* IsPostfixUpdate */ true, 4310 /* IsFailOnly */ false)); 4311 // if(x == e) { x = d; } else { v = x; } 4312 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4313 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4314 /* IsPostfixUpdate */ false, 4315 /* IsFailOnly */ true)); 4316 // { r = x == e; if(r) { x = d; } } 4317 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4318 Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4319 /* IsPostfixUpdate */ false, 4320 /* IsFailOnly */ false)); 4321 // { r = x == e; if(r) { x = d; } else { v = x; } } 4322 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4323 Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4324 /* IsPostfixUpdate */ false, 4325 /* IsFailOnly */ true)); 4326 4327 // { v = x; cond-update-stmt } 4328 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4329 Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true, 4330 /* IsPostfixUpdate */ true, 4331 /* IsFailOnly */ false)); 4332 // { cond-update-stmt v = x; } 4333 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4334 Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false, 4335 /* IsPostfixUpdate */ false, 4336 /* IsFailOnly */ false)); 4337 4338 BasicBlock *EntryBB = BB; 4339 EXPECT_EQ(EntryBB->getParent()->size(), 5U); 4340 BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode()); 4341 EXPECT_NE(Cont1, nullptr); 4342 BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode()); 4343 EXPECT_NE(Exit1, nullptr); 4344 BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode()); 4345 EXPECT_NE(Cont2, nullptr); 4346 BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode()); 4347 EXPECT_NE(Exit2, nullptr); 4348 4349 AtomicCmpXchgInst *CmpXchg1 = 4350 dyn_cast<AtomicCmpXchgInst>(Init->getNextNode()); 4351 EXPECT_NE(CmpXchg1, nullptr); 4352 EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal); 4353 EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr); 4354 EXPECT_EQ(CmpXchg1->getNewValOperand(), D); 4355 ExtractValueInst *ExtVal1 = 4356 dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode()); 4357 EXPECT_NE(ExtVal1, nullptr); 4358 EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1); 4359 EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U)); 4360 ExtractValueInst *ExtVal2 = 4361 dyn_cast<ExtractValueInst>(ExtVal1->getNextNode()); 4362 EXPECT_NE(ExtVal2, nullptr); 4363 EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1); 4364 EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U)); 4365 SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode()); 4366 EXPECT_NE(Sel1, nullptr); 4367 EXPECT_EQ(Sel1->getCondition(), ExtVal2); 4368 EXPECT_EQ(Sel1->getTrueValue(), Expr); 4369 EXPECT_EQ(Sel1->getFalseValue(), ExtVal1); 4370 StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode()); 4371 EXPECT_NE(Store1, nullptr); 4372 EXPECT_EQ(Store1->getPointerOperand(), VVal); 4373 EXPECT_EQ(Store1->getValueOperand(), Sel1); 4374 4375 AtomicCmpXchgInst *CmpXchg2 = 4376 dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode()); 4377 EXPECT_NE(CmpXchg2, nullptr); 4378 EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal); 4379 EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr); 4380 EXPECT_EQ(CmpXchg2->getNewValOperand(), D); 4381 ExtractValueInst *ExtVal3 = 4382 dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode()); 4383 EXPECT_NE(ExtVal3, nullptr); 4384 EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2); 4385 EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U)); 4386 StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode()); 4387 EXPECT_NE(Store2, nullptr); 4388 EXPECT_EQ(Store2->getPointerOperand(), VVal); 4389 EXPECT_EQ(Store2->getValueOperand(), ExtVal3); 4390 4391 AtomicCmpXchgInst *CmpXchg3 = 4392 dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode()); 4393 EXPECT_NE(CmpXchg3, nullptr); 4394 EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal); 4395 EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr); 4396 EXPECT_EQ(CmpXchg3->getNewValOperand(), D); 4397 ExtractValueInst *ExtVal4 = 4398 dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode()); 4399 EXPECT_NE(ExtVal4, nullptr); 4400 EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3); 4401 EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U)); 4402 ExtractValueInst *ExtVal5 = 4403 dyn_cast<ExtractValueInst>(ExtVal4->getNextNode()); 4404 EXPECT_NE(ExtVal5, nullptr); 4405 EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3); 4406 EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U)); 4407 BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode()); 4408 EXPECT_NE(Br1, nullptr); 4409 EXPECT_EQ(Br1->isConditional(), true); 4410 EXPECT_EQ(Br1->getCondition(), ExtVal5); 4411 EXPECT_EQ(Br1->getSuccessor(0), Exit1); 4412 EXPECT_EQ(Br1->getSuccessor(1), Cont1); 4413 4414 StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front()); 4415 EXPECT_NE(Store3, nullptr); 4416 EXPECT_EQ(Store3->getPointerOperand(), VVal); 4417 EXPECT_EQ(Store3->getValueOperand(), ExtVal4); 4418 BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode()); 4419 EXPECT_NE(Br2, nullptr); 4420 EXPECT_EQ(Br2->isUnconditional(), true); 4421 EXPECT_EQ(Br2->getSuccessor(0), Exit1); 4422 4423 AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front()); 4424 EXPECT_NE(CmpXchg4, nullptr); 4425 EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal); 4426 EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr); 4427 EXPECT_EQ(CmpXchg4->getNewValOperand(), D); 4428 ExtractValueInst *ExtVal6 = 4429 dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode()); 4430 EXPECT_NE(ExtVal6, nullptr); 4431 EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4); 4432 EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U)); 4433 ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode()); 4434 EXPECT_NE(ZExt1, nullptr); 4435 EXPECT_EQ(ZExt1->getDestTy(), Int32); 4436 StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode()); 4437 EXPECT_NE(Store4, nullptr); 4438 EXPECT_EQ(Store4->getPointerOperand(), RVal); 4439 EXPECT_EQ(Store4->getValueOperand(), ZExt1); 4440 4441 AtomicCmpXchgInst *CmpXchg5 = 4442 dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode()); 4443 EXPECT_NE(CmpXchg5, nullptr); 4444 EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal); 4445 EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr); 4446 EXPECT_EQ(CmpXchg5->getNewValOperand(), D); 4447 ExtractValueInst *ExtVal7 = 4448 dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode()); 4449 EXPECT_NE(ExtVal7, nullptr); 4450 EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5); 4451 EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U)); 4452 ExtractValueInst *ExtVal8 = 4453 dyn_cast<ExtractValueInst>(ExtVal7->getNextNode()); 4454 EXPECT_NE(ExtVal8, nullptr); 4455 EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5); 4456 EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U)); 4457 BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode()); 4458 EXPECT_NE(Br3, nullptr); 4459 EXPECT_EQ(Br3->isConditional(), true); 4460 EXPECT_EQ(Br3->getCondition(), ExtVal8); 4461 EXPECT_EQ(Br3->getSuccessor(0), Exit2); 4462 EXPECT_EQ(Br3->getSuccessor(1), Cont2); 4463 4464 StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front()); 4465 EXPECT_NE(Store5, nullptr); 4466 EXPECT_EQ(Store5->getPointerOperand(), VVal); 4467 EXPECT_EQ(Store5->getValueOperand(), ExtVal7); 4468 BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode()); 4469 EXPECT_NE(Br4, nullptr); 4470 EXPECT_EQ(Br4->isUnconditional(), true); 4471 EXPECT_EQ(Br4->getSuccessor(0), Exit2); 4472 4473 ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front()); 4474 EXPECT_NE(ExtVal9, nullptr); 4475 EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5); 4476 EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U)); 4477 ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode()); 4478 EXPECT_NE(ZExt2, nullptr); 4479 EXPECT_EQ(ZExt2->getDestTy(), Int32); 4480 StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode()); 4481 EXPECT_NE(Store6, nullptr); 4482 EXPECT_EQ(Store6->getPointerOperand(), RVal); 4483 EXPECT_EQ(Store6->getValueOperand(), ZExt2); 4484 4485 AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode()); 4486 EXPECT_NE(ARWM1, nullptr); 4487 EXPECT_EQ(ARWM1->getPointerOperand(), XVal); 4488 EXPECT_EQ(ARWM1->getValOperand(), Expr); 4489 EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min); 4490 StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode()); 4491 EXPECT_NE(Store7, nullptr); 4492 EXPECT_EQ(Store7->getPointerOperand(), VVal); 4493 EXPECT_EQ(Store7->getValueOperand(), ARWM1); 4494 4495 AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode()); 4496 EXPECT_NE(ARWM2, nullptr); 4497 EXPECT_EQ(ARWM2->getPointerOperand(), XVal); 4498 EXPECT_EQ(ARWM2->getValOperand(), Expr); 4499 EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max); 4500 CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode()); 4501 EXPECT_NE(Cmp1, nullptr); 4502 EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT); 4503 EXPECT_EQ(Cmp1->getOperand(0), ARWM2); 4504 EXPECT_EQ(Cmp1->getOperand(1), Expr); 4505 SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode()); 4506 EXPECT_NE(Sel2, nullptr); 4507 EXPECT_EQ(Sel2->getCondition(), Cmp1); 4508 EXPECT_EQ(Sel2->getTrueValue(), Expr); 4509 EXPECT_EQ(Sel2->getFalseValue(), ARWM2); 4510 StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode()); 4511 EXPECT_NE(Store8, nullptr); 4512 EXPECT_EQ(Store8->getPointerOperand(), VVal); 4513 EXPECT_EQ(Store8->getValueOperand(), Sel2); 4514 4515 Builder.CreateRetVoid(); 4516 OMPBuilder.finalize(); 4517 EXPECT_FALSE(verifyModule(*M, &errs())); 4518 } 4519 4520 TEST_F(OpenMPIRBuilderTest, CreateTeams) { 4521 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4522 OpenMPIRBuilder OMPBuilder(*M); 4523 OMPBuilder.Config.IsTargetDevice = false; 4524 OMPBuilder.initialize(); 4525 F->setName("func"); 4526 IRBuilder<> Builder(BB); 4527 4528 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 4529 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 4530 Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load"); 4531 4532 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4533 Builder.restoreIP(AllocaIP); 4534 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 4535 "bodygen.alloca128"); 4536 4537 Builder.restoreIP(CodeGenIP); 4538 // Loading and storing captured pointer and values 4539 Builder.CreateStore(Val128, Local128); 4540 Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 4541 "bodygen.load32"); 4542 4543 LoadInst *PrivLoad128 = Builder.CreateLoad( 4544 Local128->getAllocatedType(), Local128, "bodygen.local.load128"); 4545 Value *Cmp = Builder.CreateICmpNE( 4546 Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); 4547 Instruction *ThenTerm, *ElseTerm; 4548 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 4549 &ThenTerm, &ElseTerm); 4550 return Error::success(); 4551 }; 4552 4553 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4554 ASSERT_EXPECTED_INIT( 4555 OpenMPIRBuilder::InsertPointTy, AfterIP, 4556 OMPBuilder.createTeams(Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, 4557 /*NumTeamsUpper=*/nullptr, 4558 /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); 4559 Builder.restoreIP(AfterIP); 4560 4561 OMPBuilder.finalize(); 4562 Builder.CreateRetVoid(); 4563 4564 EXPECT_FALSE(verifyModule(*M, &errs())); 4565 4566 CallInst *TeamsForkCall = dyn_cast<CallInst>( 4567 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams) 4568 ->user_back()); 4569 4570 // Verify the Ident argument 4571 GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0)); 4572 ASSERT_NE(Ident, nullptr); 4573 EXPECT_TRUE(Ident->hasInitializer()); 4574 Constant *Initializer = Ident->getInitializer(); 4575 GlobalVariable *SrcStrGlob = 4576 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 4577 ASSERT_NE(SrcStrGlob, nullptr); 4578 ConstantDataArray *SrcSrc = 4579 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 4580 ASSERT_NE(SrcSrc, nullptr); 4581 4582 // Verify the outlined function signature. 4583 Function *OutlinedFn = 4584 dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); 4585 ASSERT_NE(OutlinedFn, nullptr); 4586 EXPECT_FALSE(OutlinedFn->isDeclaration()); 4587 EXPECT_TRUE(OutlinedFn->arg_size() >= 3); 4588 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid 4589 EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid 4590 EXPECT_EQ(OutlinedFn->getArg(2)->getType(), 4591 Builder.getPtrTy()); // captured args 4592 4593 // Check for TruncInst and ICmpInst in the outlined function. 4594 EXPECT_TRUE(any_of(instructions(OutlinedFn), 4595 [](Instruction &inst) { return isa<TruncInst>(&inst); })); 4596 EXPECT_TRUE(any_of(instructions(OutlinedFn), 4597 [](Instruction &inst) { return isa<ICmpInst>(&inst); })); 4598 } 4599 4600 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) { 4601 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4602 OpenMPIRBuilder OMPBuilder(*M); 4603 OMPBuilder.Config.IsTargetDevice = false; 4604 OMPBuilder.initialize(); 4605 F->setName("func"); 4606 IRBuilder<> &Builder = OMPBuilder.Builder; 4607 Builder.SetInsertPoint(BB); 4608 4609 Function *FakeFunction = 4610 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4611 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4612 4613 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4614 Builder.restoreIP(CodeGenIP); 4615 Builder.CreateCall(FakeFunction, {}); 4616 return Error::success(); 4617 }; 4618 4619 // `F` has an argument - an integer, so we use that as the thread limit. 4620 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4621 OMPBuilder.createTeams( 4622 /*=*/Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, 4623 /*NumTeamsUpper=*/nullptr, 4624 /*ThreadLimit=*/F->arg_begin(), 4625 /*IfExpr=*/nullptr)); 4626 Builder.restoreIP(AfterIP); 4627 4628 Builder.CreateRetVoid(); 4629 OMPBuilder.finalize(); 4630 4631 ASSERT_FALSE(verifyModule(*M)); 4632 4633 CallInst *PushNumTeamsCallInst = 4634 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4635 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4636 4637 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0)); 4638 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0)); 4639 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin()); 4640 4641 // Verifying that the next instruction to execute is kmpc_fork_teams 4642 BranchInst *BrInst = 4643 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4644 ASSERT_NE(BrInst, nullptr); 4645 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4646 BasicBlock::iterator NextInstruction = 4647 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4648 CallInst *ForkTeamsCI = nullptr; 4649 if (NextInstruction != BrInst->getSuccessor(0)->end()) 4650 ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4651 ASSERT_NE(ForkTeamsCI, nullptr); 4652 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4653 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4654 } 4655 4656 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) { 4657 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4658 OpenMPIRBuilder OMPBuilder(*M); 4659 OMPBuilder.Config.IsTargetDevice = false; 4660 OMPBuilder.initialize(); 4661 F->setName("func"); 4662 IRBuilder<> &Builder = OMPBuilder.Builder; 4663 Builder.SetInsertPoint(BB); 4664 4665 Function *FakeFunction = 4666 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4667 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4668 4669 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4670 Builder.restoreIP(CodeGenIP); 4671 Builder.CreateCall(FakeFunction, {}); 4672 return Error::success(); 4673 }; 4674 4675 // `F` already has an integer argument, so we use that as upper bound to 4676 // `num_teams` 4677 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4678 OMPBuilder.createTeams(Builder, BodyGenCB, 4679 /*NumTeamsLower=*/nullptr, 4680 /*NumTeamsUpper=*/F->arg_begin(), 4681 /*ThreadLimit=*/nullptr, 4682 /*IfExpr=*/nullptr)); 4683 Builder.restoreIP(AfterIP); 4684 4685 Builder.CreateRetVoid(); 4686 OMPBuilder.finalize(); 4687 4688 ASSERT_FALSE(verifyModule(*M)); 4689 4690 CallInst *PushNumTeamsCallInst = 4691 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4692 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4693 4694 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin()); 4695 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin()); 4696 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0)); 4697 4698 // Verifying that the next instruction to execute is kmpc_fork_teams 4699 BranchInst *BrInst = 4700 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4701 ASSERT_NE(BrInst, nullptr); 4702 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4703 BasicBlock::iterator NextInstruction = 4704 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4705 CallInst *ForkTeamsCI = nullptr; 4706 if (NextInstruction != BrInst->getSuccessor(0)->end()) 4707 ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4708 ASSERT_NE(ForkTeamsCI, nullptr); 4709 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4710 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4711 } 4712 4713 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) { 4714 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4715 OpenMPIRBuilder OMPBuilder(*M); 4716 OMPBuilder.Config.IsTargetDevice = false; 4717 OMPBuilder.initialize(); 4718 F->setName("func"); 4719 IRBuilder<> &Builder = OMPBuilder.Builder; 4720 Builder.SetInsertPoint(BB); 4721 4722 Function *FakeFunction = 4723 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4724 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4725 4726 Value *NumTeamsLower = 4727 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower"); 4728 Value *NumTeamsUpper = 4729 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); 4730 4731 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4732 Builder.restoreIP(CodeGenIP); 4733 Builder.CreateCall(FakeFunction, {}); 4734 return Error::success(); 4735 }; 4736 4737 // `F` already has an integer argument, so we use that as upper bound to 4738 // `num_teams` 4739 ASSERT_EXPECTED_INIT( 4740 OpenMPIRBuilder::InsertPointTy, AfterIP, 4741 OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, 4742 /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); 4743 Builder.restoreIP(AfterIP); 4744 4745 Builder.CreateRetVoid(); 4746 OMPBuilder.finalize(); 4747 4748 ASSERT_FALSE(verifyModule(*M)); 4749 4750 CallInst *PushNumTeamsCallInst = 4751 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4752 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4753 4754 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower); 4755 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper); 4756 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0)); 4757 4758 // Verifying that the next instruction to execute is kmpc_fork_teams 4759 BranchInst *BrInst = 4760 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4761 ASSERT_NE(BrInst, nullptr); 4762 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4763 BasicBlock::iterator NextInstruction = 4764 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4765 CallInst *ForkTeamsCI = nullptr; 4766 if (NextInstruction != BrInst->getSuccessor(0)->end()) 4767 ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4768 ASSERT_NE(ForkTeamsCI, nullptr); 4769 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4770 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4771 } 4772 4773 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { 4774 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4775 OpenMPIRBuilder OMPBuilder(*M); 4776 OMPBuilder.Config.IsTargetDevice = false; 4777 OMPBuilder.initialize(); 4778 F->setName("func"); 4779 IRBuilder<> &Builder = OMPBuilder.Builder; 4780 Builder.SetInsertPoint(BB); 4781 4782 BasicBlock *CodegenBB = splitBB(Builder, true); 4783 Builder.SetInsertPoint(CodegenBB); 4784 4785 // Generate values for `num_teams` and `thread_limit` using the first argument 4786 // of the testing function. 4787 Value *NumTeamsLower = 4788 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower"); 4789 Value *NumTeamsUpper = 4790 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); 4791 Value *ThreadLimit = 4792 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit"); 4793 4794 Function *FakeFunction = 4795 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4796 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4797 4798 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4799 Builder.restoreIP(CodeGenIP); 4800 Builder.CreateCall(FakeFunction, {}); 4801 return Error::success(); 4802 }; 4803 4804 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4805 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4806 OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, 4807 NumTeamsUpper, ThreadLimit, 4808 nullptr)); 4809 Builder.restoreIP(AfterIP); 4810 4811 Builder.CreateRetVoid(); 4812 OMPBuilder.finalize(); 4813 4814 ASSERT_FALSE(verifyModule(*M)); 4815 4816 CallInst *PushNumTeamsCallInst = 4817 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4818 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4819 4820 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower); 4821 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper); 4822 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit); 4823 4824 // Verifying that the next instruction to execute is kmpc_fork_teams 4825 BranchInst *BrInst = 4826 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4827 ASSERT_NE(BrInst, nullptr); 4828 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4829 BasicBlock::iterator NextInstruction = 4830 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4831 CallInst *ForkTeamsCI = nullptr; 4832 if (NextInstruction != BrInst->getSuccessor(0)->end()) 4833 ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4834 ASSERT_NE(ForkTeamsCI, nullptr); 4835 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4836 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4837 } 4838 4839 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) { 4840 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4841 OpenMPIRBuilder OMPBuilder(*M); 4842 OMPBuilder.Config.IsTargetDevice = false; 4843 OMPBuilder.initialize(); 4844 F->setName("func"); 4845 IRBuilder<> &Builder = OMPBuilder.Builder; 4846 Builder.SetInsertPoint(BB); 4847 4848 Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(), 4849 Builder.CreateAlloca(Builder.getInt1Ty())); 4850 4851 Function *FakeFunction = 4852 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4853 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4854 4855 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4856 Builder.restoreIP(CodeGenIP); 4857 Builder.CreateCall(FakeFunction, {}); 4858 return Error::success(); 4859 }; 4860 4861 // `F` already has an integer argument, so we use that as upper bound to 4862 // `num_teams` 4863 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4864 OMPBuilder.createTeams(Builder, BodyGenCB, 4865 /*NumTeamsLower=*/nullptr, 4866 /*NumTeamsUpper=*/nullptr, 4867 /*ThreadLimit=*/nullptr, IfExpr)); 4868 Builder.restoreIP(AfterIP); 4869 4870 Builder.CreateRetVoid(); 4871 OMPBuilder.finalize(); 4872 4873 ASSERT_FALSE(verifyModule(*M)); 4874 4875 CallInst *PushNumTeamsCallInst = 4876 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4877 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4878 Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2); 4879 Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3); 4880 Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4); 4881 4882 // Check the lower_bound 4883 ASSERT_NE(NumTeamsLower, nullptr); 4884 SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower); 4885 ASSERT_NE(NumTeamsLowerSelectInst, nullptr); 4886 EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr); 4887 EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0)); 4888 EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); 4889 4890 // Check the upper_bound 4891 ASSERT_NE(NumTeamsUpper, nullptr); 4892 SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper); 4893 ASSERT_NE(NumTeamsUpperSelectInst, nullptr); 4894 EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr); 4895 EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0)); 4896 EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); 4897 4898 // Check thread_limit 4899 EXPECT_EQ(ThreadLimit, Builder.getInt32(0)); 4900 } 4901 4902 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) { 4903 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4904 OpenMPIRBuilder OMPBuilder(*M); 4905 OMPBuilder.Config.IsTargetDevice = false; 4906 OMPBuilder.initialize(); 4907 F->setName("func"); 4908 IRBuilder<> &Builder = OMPBuilder.Builder; 4909 Builder.SetInsertPoint(BB); 4910 4911 Value *IfExpr = Builder.CreateLoad( 4912 Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty())); 4913 Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5)); 4914 Value *NumTeamsUpper = 4915 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10)); 4916 Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20)); 4917 4918 Function *FakeFunction = 4919 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4920 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4921 4922 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4923 Builder.restoreIP(CodeGenIP); 4924 Builder.CreateCall(FakeFunction, {}); 4925 return Error::success(); 4926 }; 4927 4928 // `F` already has an integer argument, so we use that as upper bound to 4929 // `num_teams` 4930 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4931 OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, 4932 NumTeamsUpper, ThreadLimit, 4933 IfExpr)); 4934 Builder.restoreIP(AfterIP); 4935 4936 Builder.CreateRetVoid(); 4937 OMPBuilder.finalize(); 4938 4939 ASSERT_FALSE(verifyModule(*M)); 4940 4941 CallInst *PushNumTeamsCallInst = 4942 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4943 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4944 Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2); 4945 Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3); 4946 Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4); 4947 4948 // Get the boolean conversion of if expression 4949 ASSERT_EQ(IfExpr->getNumUses(), 1U); 4950 User *IfExprInst = IfExpr->user_back(); 4951 ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst); 4952 ASSERT_NE(IfExprCmpInst, nullptr); 4953 EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE); 4954 EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr); 4955 EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0)); 4956 4957 // Check the lower_bound 4958 ASSERT_NE(NumTeamsLowerArg, nullptr); 4959 SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg); 4960 ASSERT_NE(NumTeamsLowerSelectInst, nullptr); 4961 EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst); 4962 EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower); 4963 EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); 4964 4965 // Check the upper_bound 4966 ASSERT_NE(NumTeamsUpperArg, nullptr); 4967 SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg); 4968 ASSERT_NE(NumTeamsUpperSelectInst, nullptr); 4969 EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst); 4970 EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper); 4971 EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); 4972 4973 // Check thread_limit 4974 EXPECT_EQ(ThreadLimitArg, ThreadLimit); 4975 } 4976 4977 /// Returns the single instruction of InstTy type in BB that uses the value V. 4978 /// If there is more than one such instruction, returns null. 4979 template <typename InstTy> 4980 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) { 4981 InstTy *Result = nullptr; 4982 for (User *U : V->users()) { 4983 auto *Inst = dyn_cast<InstTy>(U); 4984 if (!Inst || Inst->getParent() != BB) 4985 continue; 4986 if (Result) { 4987 if (auto *SI = dyn_cast<StoreInst>(Inst)) { 4988 if (V == SI->getValueOperand()) 4989 continue; 4990 } else { 4991 return nullptr; 4992 } 4993 } 4994 Result = Inst; 4995 } 4996 return Result; 4997 } 4998 4999 /// Returns true if BB contains a simple binary reduction that loads a value 5000 /// from Accum, performs some binary operation with it, and stores it back to 5001 /// Accum. 5002 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB, 5003 Instruction::BinaryOps *OpCode = nullptr) { 5004 StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB); 5005 if (!Store) 5006 return false; 5007 auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0)); 5008 if (!Stored) 5009 return false; 5010 if (OpCode && *OpCode != Stored->getOpcode()) 5011 return false; 5012 auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0)); 5013 return Load && Load->getOperand(0) == Accum; 5014 } 5015 5016 /// Returns true if BB contains a binary reduction that reduces V using a binary 5017 /// operator into an accumulator that is a function argument. 5018 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) { 5019 auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB); 5020 if (!ReductionOp) 5021 return false; 5022 5023 auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0)); 5024 if (!GlobalLoad) 5025 return false; 5026 5027 auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB); 5028 if (!Store) 5029 return false; 5030 5031 return Store->getPointerOperand() == GlobalLoad->getPointerOperand() && 5032 isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand())); 5033 } 5034 5035 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and 5036 /// [0, 1], respectively, and assigns results of these instructions to Zero and 5037 /// One. Returns true on success, false on failure or if such instructions are 5038 /// not unique among the users of Ptr. 5039 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) { 5040 Zero = nullptr; 5041 One = nullptr; 5042 for (User *U : Ptr->users()) { 5043 if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { 5044 if (GEP->getNumIndices() != 2) 5045 continue; 5046 auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 5047 auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2)); 5048 EXPECT_NE(FirstIdx, nullptr); 5049 EXPECT_NE(SecondIdx, nullptr); 5050 5051 EXPECT_TRUE(FirstIdx->isZero()); 5052 if (SecondIdx->isZero()) { 5053 if (Zero) 5054 return false; 5055 Zero = GEP; 5056 } else if (SecondIdx->isOne()) { 5057 if (One) 5058 return false; 5059 One = GEP; 5060 } else { 5061 return false; 5062 } 5063 } 5064 } 5065 return Zero != nullptr && One != nullptr; 5066 } 5067 5068 static OpenMPIRBuilder::InsertPointTy 5069 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS, 5070 Value *&Result) { 5071 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 5072 Result = Builder.CreateFAdd(LHS, RHS, "red.add"); 5073 return Builder.saveIP(); 5074 } 5075 5076 static OpenMPIRBuilder::InsertPointTy 5077 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, 5078 Value *RHS) { 5079 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 5080 Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial"); 5081 Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt, 5082 AtomicOrdering::Monotonic); 5083 return Builder.saveIP(); 5084 } 5085 5086 static OpenMPIRBuilder::InsertPointTy 5087 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS, 5088 Value *&Result) { 5089 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 5090 Result = Builder.CreateXor(LHS, RHS, "red.xor"); 5091 return Builder.saveIP(); 5092 } 5093 5094 static OpenMPIRBuilder::InsertPointTy 5095 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, 5096 Value *RHS) { 5097 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 5098 Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial"); 5099 Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt, 5100 AtomicOrdering::Monotonic); 5101 return Builder.saveIP(); 5102 } 5103 5104 TEST_F(OpenMPIRBuilderTest, CreateReductions) { 5105 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5106 OpenMPIRBuilder OMPBuilder(*M); 5107 OMPBuilder.Config.IsTargetDevice = false; 5108 OMPBuilder.initialize(); 5109 F->setName("func"); 5110 IRBuilder<> Builder(BB); 5111 5112 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 5113 Builder.CreateBr(EnterBB); 5114 Builder.SetInsertPoint(EnterBB); 5115 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5116 5117 // Create variables to be reduced. 5118 InsertPointTy OuterAllocaIP(&F->getEntryBlock(), 5119 F->getEntryBlock().getFirstInsertionPt()); 5120 Type *SumType = Builder.getFloatTy(); 5121 Type *XorType = Builder.getInt32Ty(); 5122 Value *SumReduced; 5123 Value *XorReduced; 5124 { 5125 IRBuilderBase::InsertPointGuard Guard(Builder); 5126 Builder.restoreIP(OuterAllocaIP); 5127 SumReduced = Builder.CreateAlloca(SumType); 5128 XorReduced = Builder.CreateAlloca(XorType); 5129 } 5130 5131 // Store initial values of reductions into global variables. 5132 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced); 5133 Builder.CreateStore(Builder.getInt32(1), XorReduced); 5134 5135 // The loop body computes two reductions: 5136 // sum of (float) thread-id; 5137 // xor of thread-id; 5138 // and store the result in global variables. 5139 InsertPointTy BodyIP, BodyAllocaIP; 5140 auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) { 5141 IRBuilderBase::InsertPointGuard Guard(Builder); 5142 Builder.restoreIP(CodeGenIP); 5143 5144 uint32_t StrSize; 5145 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5146 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5147 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5148 Value *SumLocal = 5149 Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); 5150 Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial"); 5151 Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); 5152 Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum"); 5153 Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); 5154 Builder.CreateStore(Sum, SumReduced); 5155 Builder.CreateStore(Xor, XorReduced); 5156 5157 BodyIP = Builder.saveIP(); 5158 BodyAllocaIP = InnerAllocaIP; 5159 return Error::success(); 5160 }; 5161 5162 // Privatization for reduction creates local copies of reduction variables and 5163 // initializes them to reduction-neutral values. 5164 Value *SumPrivatized; 5165 Value *XorPrivatized; 5166 auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, 5167 Value &Original, Value &Inner, Value *&ReplVal) { 5168 IRBuilderBase::InsertPointGuard Guard(Builder); 5169 Builder.restoreIP(InnerAllocaIP); 5170 if (&Original == SumReduced) { 5171 SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy()); 5172 ReplVal = SumPrivatized; 5173 } else if (&Original == XorReduced) { 5174 XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty()); 5175 ReplVal = XorPrivatized; 5176 } else { 5177 ReplVal = &Inner; 5178 return CodeGenIP; 5179 } 5180 5181 Builder.restoreIP(CodeGenIP); 5182 if (&Original == SumReduced) 5183 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), 5184 SumPrivatized); 5185 else if (&Original == XorReduced) 5186 Builder.CreateStore(Builder.getInt32(0), XorPrivatized); 5187 5188 return Builder.saveIP(); 5189 }; 5190 5191 // Do nothing in finalization. 5192 auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; 5193 5194 ASSERT_EXPECTED_INIT( 5195 OpenMPIRBuilder::InsertPointTy, AfterIP, 5196 OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, 5197 /* IfCondition */ nullptr, 5198 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5199 /* IsCancellable */ false)); 5200 Builder.restoreIP(AfterIP); 5201 5202 OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { 5203 {SumType, SumReduced, SumPrivatized, 5204 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, 5205 /*ReductionGenClang=*/nullptr, sumAtomicReduction}, 5206 {XorType, XorReduced, XorPrivatized, 5207 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction, 5208 /*ReductionGenClang=*/nullptr, xorAtomicReduction}}; 5209 OMPBuilder.Config.setIsGPU(false); 5210 5211 bool ReduceVariableByRef[] = {false, false}; 5212 ASSERT_THAT_EXPECTED(OMPBuilder.createReductions(BodyIP, BodyAllocaIP, 5213 ReductionInfos, 5214 ReduceVariableByRef), 5215 Succeeded()); 5216 5217 Builder.restoreIP(AfterIP); 5218 Builder.CreateRetVoid(); 5219 5220 OMPBuilder.finalize(F); 5221 5222 // The IR must be valid. 5223 EXPECT_FALSE(verifyModule(*M)); 5224 5225 // Outlining must have happened. 5226 SmallVector<CallInst *> ForkCalls; 5227 findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder, 5228 ForkCalls); 5229 ASSERT_EQ(ForkCalls.size(), 1u); 5230 Value *CalleeVal = ForkCalls[0]->getOperand(2); 5231 Function *Outlined = dyn_cast<Function>(CalleeVal); 5232 EXPECT_NE(Outlined, nullptr); 5233 5234 // Check that the lock variable was created with the expected name. 5235 GlobalVariable *LockVar = 5236 M->getGlobalVariable(".gomp_critical_user_.reduction.var"); 5237 EXPECT_NE(LockVar, nullptr); 5238 5239 // Find the allocation of a local array that will be used to call the runtime 5240 // reduciton function. 5241 BasicBlock &AllocBlock = Outlined->getEntryBlock(); 5242 Value *LocalArray = nullptr; 5243 for (Instruction &I : AllocBlock) { 5244 if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) { 5245 if (!Alloc->getAllocatedType()->isArrayTy() || 5246 !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy()) 5247 continue; 5248 LocalArray = Alloc; 5249 break; 5250 } 5251 } 5252 ASSERT_NE(LocalArray, nullptr); 5253 5254 // Find the call to the runtime reduction function. 5255 BasicBlock *BB = AllocBlock.getUniqueSuccessor(); 5256 Value *LocalArrayPtr = nullptr; 5257 Value *ReductionFnVal = nullptr; 5258 Value *SwitchArg = nullptr; 5259 for (Instruction &I : *BB) { 5260 if (CallInst *Call = dyn_cast<CallInst>(&I)) { 5261 if (Call->getCalledFunction() != 5262 OMPBuilder.getOrCreateRuntimeFunctionPtr( 5263 RuntimeFunction::OMPRTL___kmpc_reduce)) 5264 continue; 5265 LocalArrayPtr = Call->getOperand(4); 5266 ReductionFnVal = Call->getOperand(5); 5267 SwitchArg = Call; 5268 break; 5269 } 5270 } 5271 5272 // Check that the local array is passed to the function. 5273 ASSERT_NE(LocalArrayPtr, nullptr); 5274 EXPECT_EQ(LocalArrayPtr, LocalArray); 5275 5276 // Find the GEP instructions preceding stores to the local array. 5277 Value *FirstArrayElemPtr = nullptr; 5278 Value *SecondArrayElemPtr = nullptr; 5279 EXPECT_EQ(LocalArray->getNumUses(), 3u); 5280 ASSERT_TRUE( 5281 findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr)); 5282 5283 // Check that the values stored into the local array are privatized reduction 5284 // variables. 5285 auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>( 5286 findStoredValue<GetElementPtrInst>(FirstArrayElemPtr)); 5287 auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>( 5288 findStoredValue<GetElementPtrInst>(SecondArrayElemPtr)); 5289 ASSERT_NE(FirstPrivatized, nullptr); 5290 ASSERT_NE(SecondPrivatized, nullptr); 5291 ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr)); 5292 EXPECT_TRUE(isSimpleBinaryReduction( 5293 FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent())); 5294 EXPECT_TRUE(isSimpleBinaryReduction( 5295 SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent())); 5296 5297 // Check that the result of the runtime reduction call is used for further 5298 // dispatch. 5299 ASSERT_EQ(SwitchArg->getNumUses(), 1u); 5300 SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin()); 5301 ASSERT_NE(Switch, nullptr); 5302 EXPECT_EQ(Switch->getNumSuccessors(), 3u); 5303 BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor(); 5304 BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor(); 5305 5306 // Non-atomic block contains reductions to the global reduction variable, 5307 // which is passed into the outlined function as an argument. 5308 Value *FirstLoad = 5309 findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB); 5310 Value *SecondLoad = 5311 findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB); 5312 EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB)); 5313 EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB)); 5314 5315 // Atomic block also constains reductions to the global reduction variable. 5316 FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB); 5317 SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB); 5318 auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB); 5319 auto *SecondAtomic = 5320 findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB); 5321 ASSERT_NE(FirstAtomic, nullptr); 5322 Value *AtomicStorePointer = FirstAtomic->getPointerOperand(); 5323 EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer))); 5324 ASSERT_NE(SecondAtomic, nullptr); 5325 AtomicStorePointer = SecondAtomic->getPointerOperand(); 5326 EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer))); 5327 5328 // Check that the separate reduction function also performs (non-atomic) 5329 // reductions after extracting reduction variables from its arguments. 5330 Function *ReductionFn = cast<Function>(ReductionFnVal); 5331 BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock(); 5332 Value *FirstLHSPtr; 5333 Value *SecondLHSPtr; 5334 ASSERT_TRUE( 5335 findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr)); 5336 Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB); 5337 ASSERT_NE(Opaque, nullptr); 5338 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB)); 5339 Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB); 5340 ASSERT_NE(Opaque, nullptr); 5341 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB)); 5342 5343 Value *FirstRHS; 5344 Value *SecondRHS; 5345 EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS)); 5346 } 5347 5348 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { 5349 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5350 OpenMPIRBuilder OMPBuilder(*M); 5351 OMPBuilder.Config.IsTargetDevice = false; 5352 OMPBuilder.initialize(); 5353 F->setName("func"); 5354 IRBuilder<> Builder(BB); 5355 5356 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 5357 Builder.CreateBr(EnterBB); 5358 Builder.SetInsertPoint(EnterBB); 5359 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5360 5361 // Create variables to be reduced. 5362 InsertPointTy OuterAllocaIP(&F->getEntryBlock(), 5363 F->getEntryBlock().getFirstInsertionPt()); 5364 Type *SumType = Builder.getFloatTy(); 5365 Type *XorType = Builder.getInt32Ty(); 5366 Value *SumReduced; 5367 Value *XorReduced; 5368 { 5369 IRBuilderBase::InsertPointGuard Guard(Builder); 5370 Builder.restoreIP(OuterAllocaIP); 5371 SumReduced = Builder.CreateAlloca(SumType); 5372 XorReduced = Builder.CreateAlloca(XorType); 5373 } 5374 5375 // Store initial values of reductions into global variables. 5376 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced); 5377 Builder.CreateStore(Builder.getInt32(1), XorReduced); 5378 5379 InsertPointTy FirstBodyIP, FirstBodyAllocaIP; 5380 auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP, 5381 InsertPointTy CodeGenIP) { 5382 IRBuilderBase::InsertPointGuard Guard(Builder); 5383 Builder.restoreIP(CodeGenIP); 5384 5385 uint32_t StrSize; 5386 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5387 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5388 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5389 Value *SumLocal = 5390 Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); 5391 Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial"); 5392 Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum"); 5393 Builder.CreateStore(Sum, SumReduced); 5394 5395 FirstBodyIP = Builder.saveIP(); 5396 FirstBodyAllocaIP = InnerAllocaIP; 5397 return Error::success(); 5398 }; 5399 5400 InsertPointTy SecondBodyIP, SecondBodyAllocaIP; 5401 auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP, 5402 InsertPointTy CodeGenIP) { 5403 IRBuilderBase::InsertPointGuard Guard(Builder); 5404 Builder.restoreIP(CodeGenIP); 5405 5406 uint32_t StrSize; 5407 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5408 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5409 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5410 Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); 5411 Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); 5412 Builder.CreateStore(Xor, XorReduced); 5413 5414 SecondBodyIP = Builder.saveIP(); 5415 SecondBodyAllocaIP = InnerAllocaIP; 5416 return Error::success(); 5417 }; 5418 5419 // Privatization for reduction creates local copies of reduction variables and 5420 // initializes them to reduction-neutral values. The same privatization 5421 // callback is used for both loops, with dispatch based on the value being 5422 // privatized. 5423 Value *SumPrivatized; 5424 Value *XorPrivatized; 5425 auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, 5426 Value &Original, Value &Inner, Value *&ReplVal) { 5427 IRBuilderBase::InsertPointGuard Guard(Builder); 5428 Builder.restoreIP(InnerAllocaIP); 5429 if (&Original == SumReduced) { 5430 SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy()); 5431 ReplVal = SumPrivatized; 5432 } else if (&Original == XorReduced) { 5433 XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty()); 5434 ReplVal = XorPrivatized; 5435 } else { 5436 ReplVal = &Inner; 5437 return CodeGenIP; 5438 } 5439 5440 Builder.restoreIP(CodeGenIP); 5441 if (&Original == SumReduced) 5442 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), 5443 SumPrivatized); 5444 else if (&Original == XorReduced) 5445 Builder.CreateStore(Builder.getInt32(0), XorPrivatized); 5446 5447 return Builder.saveIP(); 5448 }; 5449 5450 // Do nothing in finalization. 5451 auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; 5452 5453 ASSERT_EXPECTED_INIT( 5454 OpenMPIRBuilder::InsertPointTy, AfterIP1, 5455 OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, 5456 FiniCB, /* IfCondition */ nullptr, 5457 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5458 /* IsCancellable */ false)); 5459 Builder.restoreIP(AfterIP1); 5460 ASSERT_EXPECTED_INIT( 5461 OpenMPIRBuilder::InsertPointTy, AfterIP2, 5462 OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP, 5463 SecondBodyGenCB, PrivCB, FiniCB, 5464 /* IfCondition */ nullptr, 5465 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5466 /* IsCancellable */ false)); 5467 Builder.restoreIP(AfterIP2); 5468 5469 OMPBuilder.Config.setIsGPU(false); 5470 bool ReduceVariableByRef[] = {false}; 5471 5472 ASSERT_THAT_EXPECTED( 5473 OMPBuilder.createReductions( 5474 FirstBodyIP, FirstBodyAllocaIP, 5475 {{SumType, SumReduced, SumPrivatized, 5476 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, 5477 /*ReductionGenClang=*/nullptr, sumAtomicReduction}}, 5478 ReduceVariableByRef), 5479 Succeeded()); 5480 ASSERT_THAT_EXPECTED( 5481 OMPBuilder.createReductions( 5482 SecondBodyIP, SecondBodyAllocaIP, 5483 {{XorType, XorReduced, XorPrivatized, 5484 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction, 5485 /*ReductionGenClang=*/nullptr, xorAtomicReduction}}, 5486 ReduceVariableByRef), 5487 Succeeded()); 5488 5489 Builder.restoreIP(AfterIP2); 5490 Builder.CreateRetVoid(); 5491 5492 OMPBuilder.finalize(F); 5493 5494 // The IR must be valid. 5495 EXPECT_FALSE(verifyModule(*M)); 5496 5497 // Two different outlined functions must have been created. 5498 SmallVector<CallInst *> ForkCalls; 5499 findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder, 5500 ForkCalls); 5501 ASSERT_EQ(ForkCalls.size(), 2u); 5502 Value *CalleeVal = ForkCalls[0]->getOperand(2); 5503 Function *FirstCallee = cast<Function>(CalleeVal); 5504 CalleeVal = ForkCalls[1]->getOperand(2); 5505 Function *SecondCallee = cast<Function>(CalleeVal); 5506 EXPECT_NE(FirstCallee, SecondCallee); 5507 5508 // Two different reduction functions must have been created. 5509 SmallVector<CallInst *> ReduceCalls; 5510 findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder, 5511 ReduceCalls); 5512 ASSERT_EQ(ReduceCalls.size(), 1u); 5513 auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5)); 5514 ReduceCalls.clear(); 5515 findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, 5516 OMPBuilder, ReduceCalls); 5517 auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5)); 5518 EXPECT_NE(AddReduction, XorReduction); 5519 5520 // Each reduction function does its own kind of reduction. 5521 BasicBlock *FnReductionBB = &AddReduction->getEntryBlock(); 5522 Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>( 5523 AddReduction->getArg(0), FnReductionBB); 5524 ASSERT_NE(FirstLHSPtr, nullptr); 5525 Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB); 5526 ASSERT_NE(Opaque, nullptr); 5527 Instruction::BinaryOps Opcode = Instruction::FAdd; 5528 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode)); 5529 5530 FnReductionBB = &XorReduction->getEntryBlock(); 5531 Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>( 5532 XorReduction->getArg(0), FnReductionBB); 5533 ASSERT_NE(FirstLHSPtr, nullptr); 5534 Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB); 5535 ASSERT_NE(Opaque, nullptr); 5536 Opcode = Instruction::Xor; 5537 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode)); 5538 } 5539 5540 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { 5541 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5542 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5543 OpenMPIRBuilder OMPBuilder(*M); 5544 OMPBuilder.initialize(); 5545 F->setName("func"); 5546 IRBuilder<> Builder(BB); 5547 5548 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F); 5549 Builder.CreateBr(EnterBB); 5550 Builder.SetInsertPoint(EnterBB); 5551 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5552 5553 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5554 llvm::SmallVector<BasicBlock *, 4> CaseBBs; 5555 5556 auto FiniCB = [&](InsertPointTy IP) { return Error::success(); }; 5557 auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 5558 return Error::success(); 5559 }; 5560 SectionCBVector.push_back(SectionCB); 5561 5562 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5563 llvm::Value &, llvm::Value &Val, 5564 llvm::Value *&ReplVal) { return CodeGenIP; }; 5565 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5566 F->getEntryBlock().getFirstInsertionPt()); 5567 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 5568 OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5569 PrivCB, FiniCB, false, false)); 5570 Builder.restoreIP(AfterIP); 5571 Builder.CreateRetVoid(); // Required at the end of the function 5572 EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); 5573 EXPECT_FALSE(verifyModule(*M, &errs())); 5574 } 5575 5576 TEST_F(OpenMPIRBuilderTest, CreateSections) { 5577 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5578 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5579 OpenMPIRBuilder OMPBuilder(*M); 5580 OMPBuilder.initialize(); 5581 F->setName("func"); 5582 IRBuilder<> Builder(BB); 5583 5584 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5585 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5586 llvm::SmallVector<BasicBlock *, 4> CaseBBs; 5587 5588 BasicBlock *SwitchBB = nullptr; 5589 AllocaInst *PrivAI = nullptr; 5590 SwitchInst *Switch = nullptr; 5591 5592 unsigned NumBodiesGenerated = 0; 5593 unsigned NumFiniCBCalls = 0; 5594 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 5595 5596 auto FiniCB = [&](InsertPointTy IP) { 5597 ++NumFiniCBCalls; 5598 BasicBlock *IPBB = IP.getBlock(); 5599 EXPECT_NE(IPBB->end(), IP.getPoint()); 5600 }; 5601 5602 auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 5603 ++NumBodiesGenerated; 5604 CaseBBs.push_back(CodeGenIP.getBlock()); 5605 SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor(); 5606 Builder.restoreIP(CodeGenIP); 5607 Builder.CreateStore(F->arg_begin(), PrivAI); 5608 Value *PrivLoad = 5609 Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca"); 5610 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 5611 return Error::success(); 5612 }; 5613 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5614 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 5615 // TODO: Privatization not implemented yet 5616 return CodeGenIP; 5617 }; 5618 5619 SectionCBVector.push_back(SectionCB); 5620 SectionCBVector.push_back(SectionCB); 5621 5622 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5623 F->getEntryBlock().getFirstInsertionPt()); 5624 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 5625 OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5626 PrivCB, FINICB_WRAPPER(FiniCB), 5627 false, false)); 5628 Builder.restoreIP(AfterIP); 5629 Builder.CreateRetVoid(); // Required at the end of the function 5630 5631 // Switch BB's predecessor is loop condition BB, whose successor at index 1 is 5632 // loop's exit BB 5633 BasicBlock *ForExitBB = 5634 SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1); 5635 EXPECT_NE(ForExitBB, nullptr); 5636 5637 EXPECT_NE(PrivAI, nullptr); 5638 Function *OutlinedFn = PrivAI->getFunction(); 5639 EXPECT_EQ(F, OutlinedFn); 5640 EXPECT_FALSE(verifyModule(*M, &errs())); 5641 EXPECT_EQ(OutlinedFn->arg_size(), 1U); 5642 5643 BasicBlock *LoopPreheaderBB = 5644 OutlinedFn->getEntryBlock().getSingleSuccessor(); 5645 // loop variables are 5 - lower bound, upper bound, stride, islastiter, and 5646 // iterator/counter 5647 bool FoundForInit = false; 5648 for (Instruction &Inst : *LoopPreheaderBB) { 5649 if (isa<CallInst>(Inst)) { 5650 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5651 "__kmpc_for_static_init_4u") { 5652 FoundForInit = true; 5653 } 5654 } 5655 } 5656 EXPECT_EQ(FoundForInit, true); 5657 5658 bool FoundForExit = false; 5659 bool FoundBarrier = false; 5660 for (Instruction &Inst : *ForExitBB) { 5661 if (isa<CallInst>(Inst)) { 5662 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5663 "__kmpc_for_static_fini") { 5664 FoundForExit = true; 5665 } 5666 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5667 "__kmpc_barrier") { 5668 FoundBarrier = true; 5669 } 5670 if (FoundForExit && FoundBarrier) 5671 break; 5672 } 5673 } 5674 EXPECT_EQ(FoundForExit, true); 5675 EXPECT_EQ(FoundBarrier, true); 5676 5677 EXPECT_NE(SwitchBB, nullptr); 5678 EXPECT_NE(SwitchBB->getTerminator(), nullptr); 5679 EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true); 5680 Switch = cast<SwitchInst>(SwitchBB->getTerminator()); 5681 EXPECT_EQ(Switch->getNumCases(), 2U); 5682 5683 EXPECT_EQ(CaseBBs.size(), 2U); 5684 for (auto *&CaseBB : CaseBBs) { 5685 EXPECT_EQ(CaseBB->getParent(), OutlinedFn); 5686 } 5687 5688 ASSERT_EQ(NumBodiesGenerated, 2U); 5689 ASSERT_EQ(NumFiniCBCalls, 1U); 5690 EXPECT_FALSE(verifyModule(*M, &errs())); 5691 } 5692 5693 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) { 5694 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5695 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5696 OpenMPIRBuilder OMPBuilder(*M); 5697 OMPBuilder.initialize(); 5698 F->setName("func"); 5699 IRBuilder<> Builder(BB); 5700 5701 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F); 5702 Builder.CreateBr(EnterBB); 5703 Builder.SetInsertPoint(EnterBB); 5704 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5705 5706 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5707 F->getEntryBlock().getFirstInsertionPt()); 5708 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5709 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5710 llvm::Value &, llvm::Value &Val, 5711 llvm::Value *&ReplVal) { return CodeGenIP; }; 5712 auto FiniCB = [&](InsertPointTy IP) { return Error::success(); }; 5713 5714 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 5715 OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5716 PrivCB, FiniCB, false, true)); 5717 Builder.restoreIP(AfterIP); 5718 Builder.CreateRetVoid(); // Required at the end of the function 5719 for (auto &Inst : instructions(*F)) { 5720 EXPECT_FALSE(isa<CallInst>(Inst) && 5721 cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5722 "__kmpc_barrier" && 5723 "call to function __kmpc_barrier found with nowait"); 5724 } 5725 } 5726 5727 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) { 5728 OpenMPIRBuilder OMPBuilder(*M); 5729 OMPBuilder.initialize(); 5730 5731 IRBuilder<> Builder(BB); 5732 5733 SmallVector<uint64_t> Mappings = {0, 1}; 5734 GlobalVariable *OffloadMaptypesGlobal = 5735 OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes"); 5736 EXPECT_FALSE(M->global_empty()); 5737 EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes"); 5738 EXPECT_TRUE(OffloadMaptypesGlobal->isConstant()); 5739 EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr()); 5740 EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage()); 5741 EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer()); 5742 Constant *Initializer = OffloadMaptypesGlobal->getInitializer(); 5743 EXPECT_TRUE(isa<ConstantDataArray>(Initializer)); 5744 ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer); 5745 EXPECT_EQ(MappingInit->getNumElements(), Mappings.size()); 5746 EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64)); 5747 Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings); 5748 EXPECT_EQ(MappingInit, CA); 5749 } 5750 5751 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) { 5752 OpenMPIRBuilder OMPBuilder(*M); 5753 OMPBuilder.initialize(); 5754 5755 IRBuilder<> Builder(BB); 5756 5757 uint32_t StrSize; 5758 Constant *Cst1 = 5759 OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); 5760 Constant *Cst2 = 5761 OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); 5762 SmallVector<llvm::Constant *> Names = {Cst1, Cst2}; 5763 5764 GlobalVariable *OffloadMaptypesGlobal = 5765 OMPBuilder.createOffloadMapnames(Names, "offload_mapnames"); 5766 EXPECT_FALSE(M->global_empty()); 5767 EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames"); 5768 EXPECT_TRUE(OffloadMaptypesGlobal->isConstant()); 5769 EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr()); 5770 EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage()); 5771 EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer()); 5772 Constant *Initializer = OffloadMaptypesGlobal->getInitializer(); 5773 EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts())); 5774 EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts())); 5775 5776 GlobalVariable *Name1Gbl = 5777 cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts()); 5778 EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer())); 5779 ConstantDataArray *Name1GblCA = 5780 dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer()); 5781 EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;"); 5782 5783 GlobalVariable *Name2Gbl = 5784 cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts()); 5785 EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer())); 5786 ConstantDataArray *Name2GblCA = 5787 dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer()); 5788 EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;"); 5789 5790 EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy()); 5791 EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size()); 5792 } 5793 5794 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) { 5795 OpenMPIRBuilder OMPBuilder(*M); 5796 OMPBuilder.initialize(); 5797 F->setName("func"); 5798 IRBuilder<> Builder(BB); 5799 5800 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5801 5802 unsigned TotalNbOperand = 2; 5803 5804 OpenMPIRBuilder::MapperAllocas MapperAllocas; 5805 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5806 F->getEntryBlock().getFirstInsertionPt()); 5807 OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas); 5808 EXPECT_NE(MapperAllocas.ArgsBase, nullptr); 5809 EXPECT_NE(MapperAllocas.Args, nullptr); 5810 EXPECT_NE(MapperAllocas.ArgSizes, nullptr); 5811 EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy()); 5812 ArrayType *ArrType = 5813 dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType()); 5814 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5815 EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType() 5816 ->getArrayElementType() 5817 ->isPointerTy()); 5818 5819 EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy()); 5820 ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType()); 5821 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5822 EXPECT_TRUE(MapperAllocas.Args->getAllocatedType() 5823 ->getArrayElementType() 5824 ->isPointerTy()); 5825 5826 EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy()); 5827 ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType()); 5828 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5829 EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType() 5830 ->getArrayElementType() 5831 ->isIntegerTy(64)); 5832 } 5833 5834 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { 5835 OpenMPIRBuilder OMPBuilder(*M); 5836 OMPBuilder.initialize(); 5837 F->setName("func"); 5838 IRBuilder<> Builder(BB); 5839 LLVMContext &Ctx = M->getContext(); 5840 5841 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5842 5843 unsigned TotalNbOperand = 2; 5844 5845 OpenMPIRBuilder::MapperAllocas MapperAllocas; 5846 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5847 F->getEntryBlock().getFirstInsertionPt()); 5848 OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas); 5849 5850 auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr( 5851 omp::OMPRTL___tgt_target_data_begin_mapper); 5852 5853 SmallVector<uint64_t> Flags = {0, 2}; 5854 5855 uint32_t StrSize; 5856 Constant *SrcLocCst = 5857 OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize); 5858 Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize); 5859 5860 Constant *Cst1 = 5861 OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); 5862 Constant *Cst2 = 5863 OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); 5864 SmallVector<llvm::Constant *> Names = {Cst1, Cst2}; 5865 5866 GlobalVariable *Maptypes = 5867 OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes"); 5868 Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32( 5869 ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes, 5870 /*Idx0=*/0, /*Idx1=*/0); 5871 5872 GlobalVariable *Mapnames = 5873 OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames"); 5874 Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32( 5875 ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames, 5876 /*Idx0=*/0, /*Idx1=*/0); 5877 5878 OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo, 5879 MaptypesArg, MapnamesArg, MapperAllocas, -1, 5880 TotalNbOperand); 5881 5882 CallInst *MapperCall = dyn_cast<CallInst>(&BB->back()); 5883 EXPECT_NE(MapperCall, nullptr); 5884 EXPECT_EQ(MapperCall->arg_size(), 9U); 5885 EXPECT_EQ(MapperCall->getCalledFunction()->getName(), 5886 "__tgt_target_data_begin_mapper"); 5887 EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo); 5888 EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64)); 5889 EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32)); 5890 5891 EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg); 5892 EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg); 5893 EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy()); 5894 } 5895 5896 TEST_F(OpenMPIRBuilderTest, TargetEnterData) { 5897 OpenMPIRBuilder OMPBuilder(*M); 5898 OMPBuilder.initialize(); 5899 F->setName("func"); 5900 IRBuilder<> Builder(BB); 5901 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5902 5903 int64_t DeviceID = 2; 5904 5905 AllocaInst *Val1 = 5906 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5907 ASSERT_NE(Val1, nullptr); 5908 5909 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5910 F->getEntryBlock().getFirstInsertionPt()); 5911 5912 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5913 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5914 auto GenMapInfoCB = 5915 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5916 // Get map clause information. 5917 Builder.restoreIP(codeGenIP); 5918 5919 CombinedInfo.BasePointers.emplace_back(Val1); 5920 CombinedInfo.Pointers.emplace_back(Val1); 5921 CombinedInfo.DevicePointers.emplace_back( 5922 llvm::OpenMPIRBuilder::DeviceInfoTy::None); 5923 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5924 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1)); 5925 uint32_t temp; 5926 CombinedInfo.Names.emplace_back( 5927 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5928 return CombinedInfo; 5929 }; 5930 5931 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5932 /*RequiresDevicePointerInfo=*/false, 5933 /*SeparateBeginEndCalls=*/true); 5934 5935 OMPBuilder.Config.setIsGPU(true); 5936 5937 llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper; 5938 ASSERT_EXPECTED_INIT( 5939 OpenMPIRBuilder::InsertPointTy, AfterIP, 5940 OMPBuilder.createTargetData( 5941 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5942 /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); 5943 Builder.restoreIP(AfterIP); 5944 5945 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 5946 EXPECT_NE(TargetDataCall, nullptr); 5947 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5948 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5949 "__tgt_target_data_begin_mapper"); 5950 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5951 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5952 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5953 5954 Builder.CreateRetVoid(); 5955 EXPECT_FALSE(verifyModule(*M, &errs())); 5956 } 5957 5958 TEST_F(OpenMPIRBuilderTest, TargetExitData) { 5959 OpenMPIRBuilder OMPBuilder(*M); 5960 OMPBuilder.initialize(); 5961 F->setName("func"); 5962 IRBuilder<> Builder(BB); 5963 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5964 5965 int64_t DeviceID = 2; 5966 5967 AllocaInst *Val1 = 5968 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5969 ASSERT_NE(Val1, nullptr); 5970 5971 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5972 F->getEntryBlock().getFirstInsertionPt()); 5973 5974 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5975 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5976 auto GenMapInfoCB = 5977 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5978 // Get map clause information. 5979 Builder.restoreIP(codeGenIP); 5980 5981 CombinedInfo.BasePointers.emplace_back(Val1); 5982 CombinedInfo.Pointers.emplace_back(Val1); 5983 CombinedInfo.DevicePointers.emplace_back( 5984 llvm::OpenMPIRBuilder::DeviceInfoTy::None); 5985 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5986 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2)); 5987 uint32_t temp; 5988 CombinedInfo.Names.emplace_back( 5989 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5990 return CombinedInfo; 5991 }; 5992 5993 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5994 /*RequiresDevicePointerInfo=*/false, 5995 /*SeparateBeginEndCalls=*/true); 5996 5997 OMPBuilder.Config.setIsGPU(true); 5998 5999 llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper; 6000 ASSERT_EXPECTED_INIT( 6001 OpenMPIRBuilder::InsertPointTy, AfterIP, 6002 OMPBuilder.createTargetData( 6003 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 6004 /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); 6005 Builder.restoreIP(AfterIP); 6006 6007 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 6008 EXPECT_NE(TargetDataCall, nullptr); 6009 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 6010 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 6011 "__tgt_target_data_end_mapper"); 6012 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 6013 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 6014 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 6015 6016 Builder.CreateRetVoid(); 6017 EXPECT_FALSE(verifyModule(*M, &errs())); 6018 } 6019 6020 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { 6021 OpenMPIRBuilder OMPBuilder(*M); 6022 OMPBuilder.initialize(); 6023 F->setName("func"); 6024 IRBuilder<> Builder(BB); 6025 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6026 6027 int64_t DeviceID = 2; 6028 6029 AllocaInst *Val1 = 6030 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 6031 ASSERT_NE(Val1, nullptr); 6032 6033 AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy()); 6034 ASSERT_NE(Val2, nullptr); 6035 6036 AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy()); 6037 ASSERT_NE(Val3, nullptr); 6038 6039 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 6040 F->getEntryBlock().getFirstInsertionPt()); 6041 6042 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 6043 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 6044 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6045 auto GenMapInfoCB = 6046 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 6047 // Get map clause information. 6048 Builder.restoreIP(codeGenIP); 6049 uint32_t temp; 6050 6051 CombinedInfo.BasePointers.emplace_back(Val1); 6052 CombinedInfo.Pointers.emplace_back(Val1); 6053 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None); 6054 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 6055 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3)); 6056 CombinedInfo.Names.emplace_back( 6057 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 6058 6059 CombinedInfo.BasePointers.emplace_back(Val2); 6060 CombinedInfo.Pointers.emplace_back(Val2); 6061 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 6062 CombinedInfo.Sizes.emplace_back(Builder.getInt64(8)); 6063 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67)); 6064 CombinedInfo.Names.emplace_back( 6065 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 6066 6067 CombinedInfo.BasePointers.emplace_back(Val3); 6068 CombinedInfo.Pointers.emplace_back(Val3); 6069 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address); 6070 CombinedInfo.Sizes.emplace_back(Builder.getInt64(8)); 6071 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67)); 6072 CombinedInfo.Names.emplace_back( 6073 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 6074 return CombinedInfo; 6075 }; 6076 6077 llvm::OpenMPIRBuilder::TargetDataInfo Info( 6078 /*RequiresDevicePointerInfo=*/true, 6079 /*SeparateBeginEndCalls=*/true); 6080 6081 OMPBuilder.Config.setIsGPU(true); 6082 6083 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 6084 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 6085 if (BodyGenType == BodyGenTy::Priv) { 6086 EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u); 6087 Builder.restoreIP(CodeGenIP); 6088 CallInst *TargetDataCall = 6089 dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode()); 6090 EXPECT_NE(TargetDataCall, nullptr); 6091 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 6092 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 6093 "__tgt_target_data_begin_mapper"); 6094 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 6095 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 6096 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 6097 6098 LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode()); 6099 EXPECT_NE(LI, nullptr); 6100 StoreInst *SI = dyn_cast<StoreInst>(&BB->back()); 6101 EXPECT_NE(SI, nullptr); 6102 EXPECT_EQ(SI->getValueOperand(), LI); 6103 EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second); 6104 EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second)); 6105 EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second)); 6106 Builder.CreateStore(Builder.getInt32(99), Val1); 6107 } 6108 return Builder.saveIP(); 6109 }; 6110 6111 ASSERT_EXPECTED_INIT( 6112 OpenMPIRBuilder::InsertPointTy, TargetDataIP1, 6113 OMPBuilder.createTargetData( 6114 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 6115 /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB)); 6116 Builder.restoreIP(TargetDataIP1); 6117 6118 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 6119 EXPECT_NE(TargetDataCall, nullptr); 6120 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 6121 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 6122 "__tgt_target_data_end_mapper"); 6123 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 6124 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 6125 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 6126 6127 // Check that BodyGenCB is still made when IsTargetDevice is set to true. 6128 OMPBuilder.Config.setIsTargetDevice(true); 6129 bool CheckDevicePassBodyGen = false; 6130 auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 6131 CheckDevicePassBodyGen = true; 6132 Builder.restoreIP(CodeGenIP); 6133 CallInst *TargetDataCall = 6134 dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode()); 6135 // Make sure no begin_mapper call is present for device pass. 6136 EXPECT_EQ(TargetDataCall, nullptr); 6137 return Builder.saveIP(); 6138 }; 6139 ASSERT_EXPECTED_INIT( 6140 OpenMPIRBuilder::InsertPointTy, TargetDataIP2, 6141 OMPBuilder.createTargetData( 6142 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 6143 /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB)); 6144 Builder.restoreIP(TargetDataIP2); 6145 EXPECT_TRUE(CheckDevicePassBodyGen); 6146 6147 Builder.CreateRetVoid(); 6148 EXPECT_FALSE(verifyModule(*M, &errs())); 6149 } 6150 6151 namespace { 6152 // Some basic handling of argument mapping for the moment 6153 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder, 6154 llvm::SmallVectorImpl<llvm::Value *> &Args, 6155 llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) { 6156 for (auto Arg : Args) { 6157 CombinedInfo.BasePointers.emplace_back(Arg); 6158 CombinedInfo.Pointers.emplace_back(Arg); 6159 uint32_t SrcLocStrSize; 6160 CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr( 6161 "Unknown loc - stub implementation", SrcLocStrSize)); 6162 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags( 6163 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | 6164 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | 6165 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM)); 6166 CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64( 6167 OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType()))); 6168 } 6169 } 6170 } // namespace 6171 6172 TEST_F(OpenMPIRBuilderTest, TargetRegion) { 6173 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6174 OpenMPIRBuilder OMPBuilder(*M); 6175 OMPBuilder.initialize(); 6176 OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); 6177 OMPBuilder.setConfig(Config); 6178 F->setName("func"); 6179 F->addFnAttr("target-cpu", "x86-64"); 6180 F->addFnAttr("target-features", "+mmx,+sse"); 6181 IRBuilder<> Builder(BB); 6182 auto *Int32Ty = Builder.getInt32Ty(); 6183 6184 AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr"); 6185 AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr"); 6186 AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr"); 6187 6188 Builder.CreateStore(Builder.getInt32(10), APtr); 6189 Builder.CreateStore(Builder.getInt32(20), BPtr); 6190 auto BodyGenCB = [&](InsertPointTy AllocaIP, 6191 InsertPointTy CodeGenIP) -> InsertPointTy { 6192 Builder.restoreIP(CodeGenIP); 6193 LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr); 6194 LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr); 6195 Value *Sum = Builder.CreateAdd(AVal, BVal); 6196 Builder.CreateStore(Sum, CPtr); 6197 return Builder.saveIP(); 6198 }; 6199 6200 llvm::SmallVector<llvm::Value *> Inputs; 6201 Inputs.push_back(APtr); 6202 Inputs.push_back(BPtr); 6203 Inputs.push_back(CPtr); 6204 6205 auto SimpleArgAccessorCB = 6206 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6207 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6208 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6209 if (!OMPBuilder.Config.isTargetDevice()) { 6210 RetVal = cast<llvm::Value>(&Arg); 6211 return CodeGenIP; 6212 } 6213 6214 Builder.restoreIP(AllocaIP); 6215 6216 llvm::Value *Addr = Builder.CreateAlloca( 6217 Arg.getType()->isPointerTy() 6218 ? Arg.getType() 6219 : Type::getInt64Ty(Builder.getContext()), 6220 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6221 llvm::Value *AddrAscast = 6222 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6223 Builder.CreateStore(&Arg, AddrAscast); 6224 6225 Builder.restoreIP(CodeGenIP); 6226 6227 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6228 6229 return Builder.saveIP(); 6230 }; 6231 6232 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6233 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6234 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6235 CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos); 6236 return CombinedInfos; 6237 }; 6238 6239 TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17); 6240 OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL}); 6241 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6242 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6243 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC, 6244 /*MaxTeams=*/{10}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6245 RuntimeAttrs.TargetThreadLimit[0] = Builder.getInt32(20); 6246 RuntimeAttrs.TeamsThreadLimit[0] = Builder.getInt32(30); 6247 RuntimeAttrs.MaxThreads = Builder.getInt32(40); 6248 6249 ASSERT_EXPECTED_INIT( 6250 OpenMPIRBuilder::InsertPointTy, AfterIP, 6251 OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), 6252 Builder.saveIP(), EntryInfo, DefaultAttrs, 6253 RuntimeAttrs, /*IfCond=*/nullptr, Inputs, 6254 GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); 6255 Builder.restoreIP(AfterIP); 6256 6257 OMPBuilder.finalize(); 6258 Builder.CreateRetVoid(); 6259 6260 // Check the kernel launch sequence 6261 auto Iter = F->getEntryBlock().rbegin(); 6262 EXPECT_TRUE(isa<BranchInst>(&*(Iter))); 6263 BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter)); 6264 EXPECT_TRUE(isa<CmpInst>(&*(++Iter))); 6265 EXPECT_TRUE(isa<CallInst>(&*(++Iter))); 6266 CallInst *Call = dyn_cast<CallInst>(&*(Iter)); 6267 6268 // Check that the kernel launch function is called 6269 Function *KernelLaunchFunc = Call->getCalledFunction(); 6270 EXPECT_NE(KernelLaunchFunc, nullptr); 6271 StringRef FunctionName = KernelLaunchFunc->getName(); 6272 EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel")); 6273 6274 // Check num_teams and num_threads in call arguments 6275 EXPECT_TRUE(Call->arg_size() >= 4); 6276 Value *NumTeamsArg = Call->getArgOperand(2); 6277 EXPECT_TRUE(isa<ConstantInt>(NumTeamsArg)); 6278 EXPECT_EQ(10U, cast<ConstantInt>(NumTeamsArg)->getZExtValue()); 6279 Value *NumThreadsArg = Call->getArgOperand(3); 6280 EXPECT_TRUE(isa<ConstantInt>(NumThreadsArg)); 6281 EXPECT_EQ(20U, cast<ConstantInt>(NumThreadsArg)->getZExtValue()); 6282 6283 // Check num_teams and num_threads kernel arguments (use number 5 starting 6284 // from the end and counting the call to __tgt_target_kernel as the first use) 6285 Value *KernelArgs = Call->getArgOperand(Call->arg_size() - 1); 6286 EXPECT_TRUE(KernelArgs->getNumUses() >= 4); 6287 Value *NumTeamsGetElemPtr = *std::next(KernelArgs->user_begin(), 3); 6288 EXPECT_TRUE(isa<GetElementPtrInst>(NumTeamsGetElemPtr)); 6289 Value *NumTeamsStore = NumTeamsGetElemPtr->getUniqueUndroppableUser(); 6290 EXPECT_TRUE(isa<StoreInst>(NumTeamsStore)); 6291 Value *NumTeamsStoreArg = cast<StoreInst>(NumTeamsStore)->getValueOperand(); 6292 EXPECT_TRUE(isa<ConstantDataSequential>(NumTeamsStoreArg)); 6293 auto *NumTeamsStoreValue = cast<ConstantDataSequential>(NumTeamsStoreArg); 6294 EXPECT_EQ(3U, NumTeamsStoreValue->getNumElements()); 6295 EXPECT_EQ(10U, NumTeamsStoreValue->getElementAsInteger(0)); 6296 EXPECT_EQ(0U, NumTeamsStoreValue->getElementAsInteger(1)); 6297 EXPECT_EQ(0U, NumTeamsStoreValue->getElementAsInteger(2)); 6298 Value *NumThreadsGetElemPtr = *std::next(KernelArgs->user_begin(), 2); 6299 EXPECT_TRUE(isa<GetElementPtrInst>(NumThreadsGetElemPtr)); 6300 Value *NumThreadsStore = NumThreadsGetElemPtr->getUniqueUndroppableUser(); 6301 EXPECT_TRUE(isa<StoreInst>(NumThreadsStore)); 6302 Value *NumThreadsStoreArg = 6303 cast<StoreInst>(NumThreadsStore)->getValueOperand(); 6304 EXPECT_TRUE(isa<ConstantDataSequential>(NumThreadsStoreArg)); 6305 auto *NumThreadsStoreValue = cast<ConstantDataSequential>(NumThreadsStoreArg); 6306 EXPECT_EQ(3U, NumThreadsStoreValue->getNumElements()); 6307 EXPECT_EQ(20U, NumThreadsStoreValue->getElementAsInteger(0)); 6308 EXPECT_EQ(0U, NumThreadsStoreValue->getElementAsInteger(1)); 6309 EXPECT_EQ(0U, NumThreadsStoreValue->getElementAsInteger(2)); 6310 6311 // Check the fallback call 6312 BasicBlock *FallbackBlock = Branch->getSuccessor(0); 6313 Iter = FallbackBlock->rbegin(); 6314 CallInst *FCall = dyn_cast<CallInst>(&*(++Iter)); 6315 // 'F' has a dummy DISubprogram which causes OutlinedFunc to also 6316 // have a DISubprogram. In this case, the call to OutlinedFunc needs 6317 // to have a debug loc, otherwise verifier will complain. 6318 FCall->setDebugLoc(DL); 6319 EXPECT_NE(FCall, nullptr); 6320 6321 // Check that the correct aguments are passed in 6322 for (auto ArgInput : zip(FCall->args(), Inputs)) { 6323 EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput)); 6324 } 6325 6326 // Check that the outlined function exists with the expected prefix 6327 Function *OutlinedFunc = FCall->getCalledFunction(); 6328 EXPECT_NE(OutlinedFunc, nullptr); 6329 StringRef FunctionName2 = OutlinedFunc->getName(); 6330 EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading")); 6331 6332 // Check that target-cpu and target-features were propagated to the outlined 6333 // function 6334 EXPECT_EQ(OutlinedFunc->getFnAttribute("target-cpu"), 6335 F->getFnAttribute("target-cpu")); 6336 EXPECT_EQ(OutlinedFunc->getFnAttribute("target-features"), 6337 F->getFnAttribute("target-features")); 6338 6339 EXPECT_FALSE(verifyModule(*M, &errs())); 6340 } 6341 6342 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { 6343 OpenMPIRBuilder OMPBuilder(*M); 6344 OMPBuilder.setConfig( 6345 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6346 OMPBuilder.initialize(); 6347 6348 F->setName("func"); 6349 F->addFnAttr("target-cpu", "gfx90a"); 6350 F->addFnAttr("target-features", "+gfx9-insts,+wavefrontsize64"); 6351 IRBuilder<> Builder(BB); 6352 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6353 6354 LoadInst *Value = nullptr; 6355 StoreInst *TargetStore = nullptr; 6356 llvm::SmallVector<llvm::Value *, 2> CapturedArgs = { 6357 Constant::getNullValue(PointerType::get(Ctx, 0)), 6358 Constant::getNullValue(PointerType::get(Ctx, 0))}; 6359 6360 auto SimpleArgAccessorCB = 6361 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6362 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6363 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6364 if (!OMPBuilder.Config.isTargetDevice()) { 6365 RetVal = cast<llvm::Value>(&Arg); 6366 return CodeGenIP; 6367 } 6368 6369 Builder.restoreIP(AllocaIP); 6370 6371 llvm::Value *Addr = Builder.CreateAlloca( 6372 Arg.getType()->isPointerTy() 6373 ? Arg.getType() 6374 : Type::getInt64Ty(Builder.getContext()), 6375 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6376 llvm::Value *AddrAscast = 6377 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6378 Builder.CreateStore(&Arg, AddrAscast); 6379 6380 Builder.restoreIP(CodeGenIP); 6381 6382 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6383 6384 return Builder.saveIP(); 6385 }; 6386 6387 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6388 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6389 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6390 CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos); 6391 return CombinedInfos; 6392 }; 6393 6394 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, 6395 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6396 -> OpenMPIRBuilder::InsertPointTy { 6397 Builder.restoreIP(CodeGenIP); 6398 Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]); 6399 TargetStore = Builder.CreateStore(Value, CapturedArgs[1]); 6400 return Builder.saveIP(); 6401 }; 6402 6403 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6404 F->getEntryBlock().getFirstInsertionPt()); 6405 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6406 /*Line=*/3, /*Count=*/0); 6407 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6408 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6409 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC, 6410 /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6411 6412 ASSERT_EXPECTED_INIT( 6413 OpenMPIRBuilder::InsertPointTy, AfterIP, 6414 OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, 6415 EntryInfo, DefaultAttrs, RuntimeAttrs, 6416 /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, 6417 BodyGenCB, SimpleArgAccessorCB)); 6418 Builder.restoreIP(AfterIP); 6419 6420 Builder.CreateRetVoid(); 6421 OMPBuilder.finalize(); 6422 6423 // Check outlined function 6424 EXPECT_FALSE(verifyModule(*M, &errs())); 6425 EXPECT_NE(TargetStore, nullptr); 6426 Function *OutlinedFn = TargetStore->getFunction(); 6427 EXPECT_NE(F, OutlinedFn); 6428 6429 // Check that target-cpu and target-features were propagated to the outlined 6430 // function 6431 EXPECT_EQ(OutlinedFn->getFnAttribute("target-cpu"), 6432 F->getFnAttribute("target-cpu")); 6433 EXPECT_EQ(OutlinedFn->getFnAttribute("target-features"), 6434 F->getFnAttribute("target-features")); 6435 6436 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6437 // Account for the "implicit" first argument. 6438 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6439 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 6440 EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy()); 6441 EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy()); 6442 6443 // Check entry block 6444 auto &EntryBlock = OutlinedFn->getEntryBlock(); 6445 Instruction *Alloca1 = &*EntryBlock.getFirstNonPHIIt(); 6446 EXPECT_NE(Alloca1, nullptr); 6447 6448 EXPECT_TRUE(isa<AllocaInst>(Alloca1)); 6449 auto *Store1 = Alloca1->getNextNode(); 6450 EXPECT_TRUE(isa<StoreInst>(Store1)); 6451 auto *Alloca2 = Store1->getNextNode(); 6452 EXPECT_TRUE(isa<AllocaInst>(Alloca2)); 6453 auto *Store2 = Alloca2->getNextNode(); 6454 EXPECT_TRUE(isa<StoreInst>(Store2)); 6455 6456 auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode()); 6457 EXPECT_NE(InitCall, nullptr); 6458 EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init"); 6459 EXPECT_EQ(InitCall->arg_size(), 2U); 6460 EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0))); 6461 auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0)); 6462 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer())); 6463 auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer()); 6464 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U))); 6465 auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U)); 6466 EXPECT_EQ(ConfigC->getAggregateElement(0U), 6467 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6468 EXPECT_EQ(ConfigC->getAggregateElement(1U), 6469 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6470 EXPECT_EQ(ConfigC->getAggregateElement(2U), 6471 ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC)); 6472 6473 auto *EntryBlockBranch = EntryBlock.getTerminator(); 6474 EXPECT_NE(EntryBlockBranch, nullptr); 6475 EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U); 6476 6477 // Check user code block 6478 auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0); 6479 EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry"); 6480 Instruction *Load1 = &*UserCodeBlock->getFirstNonPHIIt(); 6481 EXPECT_TRUE(isa<LoadInst>(Load1)); 6482 auto *Load2 = Load1->getNextNode(); 6483 EXPECT_TRUE(isa<LoadInst>(Load2)); 6484 6485 auto *OutlinedBlockBr = Load2->getNextNode(); 6486 EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr)); 6487 6488 auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0); 6489 EXPECT_EQ(OutlinedBlock->getName(), "outlined.body"); 6490 6491 Instruction *Value1 = &*OutlinedBlock->getFirstNonPHIIt(); 6492 EXPECT_EQ(Value1, Value); 6493 EXPECT_EQ(Value1->getNextNode(), TargetStore); 6494 auto *Deinit = TargetStore->getNextNode(); 6495 EXPECT_NE(Deinit, nullptr); 6496 6497 auto *DeinitCall = dyn_cast<CallInst>(Deinit); 6498 EXPECT_NE(DeinitCall, nullptr); 6499 EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit"); 6500 EXPECT_EQ(DeinitCall->arg_size(), 0U); 6501 6502 EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode())); 6503 6504 // Check exit block 6505 auto *ExitBlock = EntryBlockBranch->getSuccessor(1); 6506 EXPECT_EQ(ExitBlock->getName(), "worker.exit"); 6507 EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHIIt())); 6508 6509 // Check global exec_mode. 6510 GlobalVariable *Used = M->getGlobalVariable("llvm.compiler.used"); 6511 EXPECT_NE(Used, nullptr); 6512 Constant *UsedInit = Used->getInitializer(); 6513 EXPECT_NE(UsedInit, nullptr); 6514 EXPECT_TRUE(isa<ConstantArray>(UsedInit)); 6515 auto *UsedInitData = cast<ConstantArray>(UsedInit); 6516 EXPECT_EQ(1U, UsedInitData->getNumOperands()); 6517 Constant *ExecMode = UsedInitData->getOperand(0); 6518 EXPECT_TRUE(isa<GlobalVariable>(ExecMode)); 6519 Constant *ExecModeValue = cast<GlobalVariable>(ExecMode)->getInitializer(); 6520 EXPECT_NE(ExecModeValue, nullptr); 6521 EXPECT_TRUE(isa<ConstantInt>(ExecModeValue)); 6522 EXPECT_EQ(OMP_TGT_EXEC_MODE_GENERIC, 6523 cast<ConstantInt>(ExecModeValue)->getZExtValue()); 6524 } 6525 6526 TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) { 6527 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6528 OpenMPIRBuilder OMPBuilder(*M); 6529 OMPBuilder.initialize(); 6530 OpenMPIRBuilderConfig Config(/*IsTargetDevice=*/false, /*IsGPU=*/false, 6531 /*OpenMPOffloadMandatory=*/false, 6532 /*HasRequiresReverseOffload=*/false, 6533 /*HasRequiresUnifiedAddress=*/false, 6534 /*HasRequiresUnifiedSharedMemory=*/false, 6535 /*HasRequiresDynamicAllocators=*/false); 6536 OMPBuilder.setConfig(Config); 6537 F->setName("func"); 6538 IRBuilder<> Builder(BB); 6539 6540 auto BodyGenCB = [&](InsertPointTy, 6541 InsertPointTy CodeGenIP) -> InsertPointTy { 6542 Builder.restoreIP(CodeGenIP); 6543 return Builder.saveIP(); 6544 }; 6545 6546 auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&, 6547 OpenMPIRBuilder::InsertPointTy, 6548 OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6549 Builder.restoreIP(CodeGenIP); 6550 return Builder.saveIP(); 6551 }; 6552 6553 SmallVector<Value *> Inputs; 6554 OpenMPIRBuilder::MapInfosTy CombinedInfos; 6555 auto GenMapInfoCB = 6556 [&](OpenMPIRBuilder::InsertPointTy) -> OpenMPIRBuilder::MapInfosTy & { 6557 return CombinedInfos; 6558 }; 6559 6560 TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17); 6561 OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL}); 6562 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6563 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6564 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD, 6565 /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6566 RuntimeAttrs.LoopTripCount = Builder.getInt64(1000); 6567 6568 ASSERT_EXPECTED_INIT( 6569 OpenMPIRBuilder::InsertPointTy, AfterIP, 6570 OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), 6571 Builder.saveIP(), EntryInfo, DefaultAttrs, 6572 RuntimeAttrs, /*IfCond=*/nullptr, Inputs, 6573 GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); 6574 Builder.restoreIP(AfterIP); 6575 6576 OMPBuilder.finalize(); 6577 Builder.CreateRetVoid(); 6578 6579 // Check the kernel launch sequence 6580 auto Iter = F->getEntryBlock().rbegin(); 6581 EXPECT_TRUE(isa<BranchInst>(&*(Iter))); 6582 BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter)); 6583 EXPECT_TRUE(isa<CmpInst>(&*(++Iter))); 6584 EXPECT_TRUE(isa<CallInst>(&*(++Iter))); 6585 CallInst *Call = dyn_cast<CallInst>(&*(Iter)); 6586 6587 // Check that the kernel launch function is called 6588 Function *KernelLaunchFunc = Call->getCalledFunction(); 6589 EXPECT_NE(KernelLaunchFunc, nullptr); 6590 StringRef FunctionName = KernelLaunchFunc->getName(); 6591 EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel")); 6592 6593 // Check the trip count kernel argument (use number 5 starting from the end 6594 // and counting the call to __tgt_target_kernel as the first use) 6595 Value *KernelArgs = Call->getArgOperand(Call->arg_size() - 1); 6596 EXPECT_TRUE(KernelArgs->getNumUses() >= 6); 6597 Value *TripCountGetElemPtr = *std::next(KernelArgs->user_begin(), 5); 6598 EXPECT_TRUE(isa<GetElementPtrInst>(TripCountGetElemPtr)); 6599 Value *TripCountStore = TripCountGetElemPtr->getUniqueUndroppableUser(); 6600 EXPECT_TRUE(isa<StoreInst>(TripCountStore)); 6601 Value *TripCountStoreArg = cast<StoreInst>(TripCountStore)->getValueOperand(); 6602 EXPECT_TRUE(isa<ConstantInt>(TripCountStoreArg)); 6603 EXPECT_EQ(1000U, cast<ConstantInt>(TripCountStoreArg)->getZExtValue()); 6604 6605 // Check the fallback call 6606 BasicBlock *FallbackBlock = Branch->getSuccessor(0); 6607 Iter = FallbackBlock->rbegin(); 6608 CallInst *FCall = dyn_cast<CallInst>(&*(++Iter)); 6609 // 'F' has a dummy DISubprogram which causes OutlinedFunc to also 6610 // have a DISubprogram. In this case, the call to OutlinedFunc needs 6611 // to have a debug loc, otherwise verifier will complain. 6612 FCall->setDebugLoc(DL); 6613 EXPECT_NE(FCall, nullptr); 6614 6615 // Check that the outlined function exists with the expected prefix 6616 Function *OutlinedFunc = FCall->getCalledFunction(); 6617 EXPECT_NE(OutlinedFunc, nullptr); 6618 StringRef FunctionName2 = OutlinedFunc->getName(); 6619 EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading")); 6620 6621 EXPECT_FALSE(verifyModule(*M, &errs())); 6622 } 6623 6624 TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) { 6625 OpenMPIRBuilder OMPBuilder(*M); 6626 OMPBuilder.setConfig( 6627 OpenMPIRBuilderConfig(/*IsTargetDevice=*/true, /*IsGPU=*/false, 6628 /*OpenMPOffloadMandatory=*/false, 6629 /*HasRequiresReverseOffload=*/false, 6630 /*HasRequiresUnifiedAddress=*/false, 6631 /*HasRequiresUnifiedSharedMemory=*/false, 6632 /*HasRequiresDynamicAllocators=*/false)); 6633 OMPBuilder.initialize(); 6634 F->setName("func"); 6635 IRBuilder<> Builder(BB); 6636 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6637 6638 Function *OutlinedFn = nullptr; 6639 SmallVector<Value *> CapturedArgs; 6640 6641 auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&, 6642 OpenMPIRBuilder::InsertPointTy, 6643 OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6644 Builder.restoreIP(CodeGenIP); 6645 return Builder.saveIP(); 6646 }; 6647 6648 OpenMPIRBuilder::MapInfosTy CombinedInfos; 6649 auto GenMapInfoCB = 6650 [&](OpenMPIRBuilder::InsertPointTy) -> OpenMPIRBuilder::MapInfosTy & { 6651 return CombinedInfos; 6652 }; 6653 6654 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy, 6655 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6656 -> OpenMPIRBuilder::InsertPointTy { 6657 Builder.restoreIP(CodeGenIP); 6658 OutlinedFn = CodeGenIP.getBlock()->getParent(); 6659 return Builder.saveIP(); 6660 }; 6661 6662 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6663 F->getEntryBlock().getFirstInsertionPt()); 6664 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6665 /*Line=*/3, /*Count=*/0); 6666 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6667 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6668 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD, 6669 /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6670 6671 ASSERT_EXPECTED_INIT( 6672 OpenMPIRBuilder::InsertPointTy, AfterIP, 6673 OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, 6674 EntryInfo, DefaultAttrs, RuntimeAttrs, 6675 /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, 6676 BodyGenCB, SimpleArgAccessorCB)); 6677 Builder.restoreIP(AfterIP); 6678 6679 Builder.CreateRetVoid(); 6680 OMPBuilder.finalize(); 6681 6682 // Check outlined function 6683 EXPECT_FALSE(verifyModule(*M, &errs())); 6684 EXPECT_NE(OutlinedFn, nullptr); 6685 EXPECT_NE(F, OutlinedFn); 6686 6687 // Check that target-cpu and target-features were propagated to the outlined 6688 // function 6689 EXPECT_EQ(OutlinedFn->getFnAttribute("target-cpu"), 6690 F->getFnAttribute("target-cpu")); 6691 EXPECT_EQ(OutlinedFn->getFnAttribute("target-features"), 6692 F->getFnAttribute("target-features")); 6693 6694 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6695 // Account for the "implicit" first argument. 6696 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6697 EXPECT_EQ(OutlinedFn->arg_size(), 1U); 6698 6699 // Check global exec_mode. 6700 GlobalVariable *Used = M->getGlobalVariable("llvm.compiler.used"); 6701 EXPECT_NE(Used, nullptr); 6702 Constant *UsedInit = Used->getInitializer(); 6703 EXPECT_NE(UsedInit, nullptr); 6704 EXPECT_TRUE(isa<ConstantArray>(UsedInit)); 6705 auto *UsedInitData = cast<ConstantArray>(UsedInit); 6706 EXPECT_EQ(1U, UsedInitData->getNumOperands()); 6707 Constant *ExecMode = UsedInitData->getOperand(0); 6708 EXPECT_TRUE(isa<GlobalVariable>(ExecMode)); 6709 Constant *ExecModeValue = cast<GlobalVariable>(ExecMode)->getInitializer(); 6710 EXPECT_NE(ExecModeValue, nullptr); 6711 EXPECT_TRUE(isa<ConstantInt>(ExecModeValue)); 6712 EXPECT_EQ(OMP_TGT_EXEC_MODE_SPMD, 6713 cast<ConstantInt>(ExecModeValue)->getZExtValue()); 6714 } 6715 6716 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { 6717 OpenMPIRBuilder OMPBuilder(*M); 6718 OMPBuilder.setConfig( 6719 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6720 OMPBuilder.initialize(); 6721 6722 F->setName("func"); 6723 IRBuilder<> Builder(BB); 6724 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6725 6726 LoadInst *Value = nullptr; 6727 StoreInst *TargetStore = nullptr; 6728 llvm::SmallVector<llvm::Value *, 1> CapturedArgs = { 6729 Constant::getNullValue(PointerType::get(Ctx, 0))}; 6730 6731 auto SimpleArgAccessorCB = 6732 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6733 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6734 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6735 if (!OMPBuilder.Config.isTargetDevice()) { 6736 RetVal = cast<llvm::Value>(&Arg); 6737 return CodeGenIP; 6738 } 6739 6740 Builder.restoreIP(AllocaIP); 6741 6742 llvm::Value *Addr = Builder.CreateAlloca( 6743 Arg.getType()->isPointerTy() 6744 ? Arg.getType() 6745 : Type::getInt64Ty(Builder.getContext()), 6746 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6747 llvm::Value *AddrAscast = 6748 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6749 Builder.CreateStore(&Arg, AddrAscast); 6750 6751 Builder.restoreIP(CodeGenIP); 6752 6753 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6754 6755 return Builder.saveIP(); 6756 }; 6757 6758 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6759 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6760 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6761 CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos); 6762 return CombinedInfos; 6763 }; 6764 6765 llvm::Value *RaiseAlloca = nullptr; 6766 6767 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, 6768 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6769 -> OpenMPIRBuilder::InsertPointTy { 6770 Builder.restoreIP(CodeGenIP); 6771 RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty()); 6772 Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]); 6773 TargetStore = Builder.CreateStore(Value, RaiseAlloca); 6774 return Builder.saveIP(); 6775 }; 6776 6777 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6778 F->getEntryBlock().getFirstInsertionPt()); 6779 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6780 /*Line=*/3, /*Count=*/0); 6781 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6782 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6783 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC, 6784 /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6785 6786 ASSERT_EXPECTED_INIT( 6787 OpenMPIRBuilder::InsertPointTy, AfterIP, 6788 OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, 6789 EntryInfo, DefaultAttrs, RuntimeAttrs, 6790 /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, 6791 BodyGenCB, SimpleArgAccessorCB)); 6792 Builder.restoreIP(AfterIP); 6793 6794 Builder.CreateRetVoid(); 6795 OMPBuilder.finalize(); 6796 6797 // Check outlined function 6798 EXPECT_FALSE(verifyModule(*M, &errs())); 6799 EXPECT_NE(TargetStore, nullptr); 6800 Function *OutlinedFn = TargetStore->getFunction(); 6801 EXPECT_NE(F, OutlinedFn); 6802 6803 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6804 // Account for the "implicit" first argument. 6805 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6806 EXPECT_EQ(OutlinedFn->arg_size(), 2U); 6807 EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy()); 6808 6809 // Check entry block, to see if we have raised our alloca 6810 // from the body to the entry block. 6811 auto &EntryBlock = OutlinedFn->getEntryBlock(); 6812 6813 // Check that we have moved our alloca created in the 6814 // BodyGenCB function, to the top of the function. 6815 Instruction *Alloca1 = &*EntryBlock.getFirstNonPHIIt(); 6816 EXPECT_NE(Alloca1, nullptr); 6817 EXPECT_TRUE(isa<AllocaInst>(Alloca1)); 6818 EXPECT_EQ(Alloca1, RaiseAlloca); 6819 6820 // Verify we have not altered the rest of the function 6821 // inappropriately with our alloca movement. 6822 auto *Alloca2 = Alloca1->getNextNode(); 6823 EXPECT_TRUE(isa<AllocaInst>(Alloca2)); 6824 auto *Store2 = Alloca2->getNextNode(); 6825 EXPECT_TRUE(isa<StoreInst>(Store2)); 6826 6827 auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode()); 6828 EXPECT_NE(InitCall, nullptr); 6829 EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init"); 6830 EXPECT_EQ(InitCall->arg_size(), 2U); 6831 EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0))); 6832 auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0)); 6833 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer())); 6834 auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer()); 6835 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U))); 6836 auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U)); 6837 EXPECT_EQ(ConfigC->getAggregateElement(0U), 6838 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6839 EXPECT_EQ(ConfigC->getAggregateElement(1U), 6840 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6841 EXPECT_EQ(ConfigC->getAggregateElement(2U), 6842 ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC)); 6843 6844 auto *EntryBlockBranch = EntryBlock.getTerminator(); 6845 EXPECT_NE(EntryBlockBranch, nullptr); 6846 EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U); 6847 6848 // Check user code block 6849 auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0); 6850 EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry"); 6851 BasicBlock::iterator Load1 = UserCodeBlock->getFirstNonPHIIt(); 6852 EXPECT_TRUE(isa<LoadInst>(Load1)); 6853 6854 auto *OutlinedBlockBr = Load1->getNextNode(); 6855 EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr)); 6856 6857 auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0); 6858 EXPECT_EQ(OutlinedBlock->getName(), "outlined.body"); 6859 6860 Instruction *Load2 = &*OutlinedBlock->getFirstNonPHIIt(); 6861 EXPECT_TRUE(isa<LoadInst>(Load2)); 6862 EXPECT_EQ(Load2, Value); 6863 EXPECT_EQ(Load2->getNextNode(), TargetStore); 6864 auto *Deinit = TargetStore->getNextNode(); 6865 EXPECT_NE(Deinit, nullptr); 6866 6867 auto *DeinitCall = dyn_cast<CallInst>(Deinit); 6868 EXPECT_NE(DeinitCall, nullptr); 6869 EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit"); 6870 EXPECT_EQ(DeinitCall->arg_size(), 0U); 6871 6872 EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode())); 6873 6874 // Check exit block 6875 auto *ExitBlock = EntryBlockBranch->getSuccessor(1); 6876 EXPECT_EQ(ExitBlock->getName(), "worker.exit"); 6877 EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHIIt())); 6878 } 6879 6880 TEST_F(OpenMPIRBuilderTest, CreateTask) { 6881 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6882 OpenMPIRBuilder OMPBuilder(*M); 6883 OMPBuilder.Config.IsTargetDevice = false; 6884 OMPBuilder.initialize(); 6885 F->setName("func"); 6886 IRBuilder<> Builder(BB); 6887 6888 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 6889 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 6890 Value *Val128 = 6891 Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); 6892 6893 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6894 Builder.restoreIP(AllocaIP); 6895 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 6896 "bodygen.alloca128"); 6897 6898 Builder.restoreIP(CodeGenIP); 6899 // Loading and storing captured pointer and values 6900 Builder.CreateStore(Val128, Local128); 6901 Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 6902 "bodygen.load32"); 6903 6904 LoadInst *PrivLoad128 = Builder.CreateLoad( 6905 Local128->getAllocatedType(), Local128, "bodygen.local.load128"); 6906 Value *Cmp = Builder.CreateICmpNE( 6907 Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); 6908 Instruction *ThenTerm, *ElseTerm; 6909 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 6910 &ThenTerm, &ElseTerm); 6911 return Error::success(); 6912 }; 6913 6914 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6915 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6916 OpenMPIRBuilder::LocationDescription Loc( 6917 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6918 ASSERT_EXPECTED_INIT( 6919 OpenMPIRBuilder::InsertPointTy, AfterIP, 6920 OMPBuilder.createTask( 6921 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6922 BodyGenCB)); 6923 Builder.restoreIP(AfterIP); 6924 OMPBuilder.finalize(); 6925 Builder.CreateRetVoid(); 6926 6927 EXPECT_FALSE(verifyModule(*M, &errs())); 6928 6929 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6930 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6931 ->user_back()); 6932 6933 // Verify the Ident argument 6934 GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0)); 6935 ASSERT_NE(Ident, nullptr); 6936 EXPECT_TRUE(Ident->hasInitializer()); 6937 Constant *Initializer = Ident->getInitializer(); 6938 GlobalVariable *SrcStrGlob = 6939 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 6940 ASSERT_NE(SrcStrGlob, nullptr); 6941 ConstantDataArray *SrcSrc = 6942 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 6943 ASSERT_NE(SrcSrc, nullptr); 6944 6945 // Verify the num_threads argument. 6946 CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1)); 6947 ASSERT_NE(GTID, nullptr); 6948 EXPECT_EQ(GTID->arg_size(), 1U); 6949 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 6950 6951 // Verify the flags 6952 // TODO: Check for others flags. Currently testing only for tiedness. 6953 ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2)); 6954 ASSERT_NE(Flags, nullptr); 6955 EXPECT_EQ(Flags->getSExtValue(), 1); 6956 6957 // Verify the data size 6958 ConstantInt *DataSize = 6959 dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3)); 6960 ASSERT_NE(DataSize, nullptr); 6961 EXPECT_EQ(DataSize->getSExtValue(), 40); 6962 6963 ConstantInt *SharedsSize = 6964 dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4)); 6965 EXPECT_EQ(SharedsSize->getSExtValue(), 6966 24); // 64-bit pointer + 128-bit integer 6967 6968 // Verify Wrapper function 6969 Function *OutlinedFn = 6970 dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); 6971 ASSERT_NE(OutlinedFn, nullptr); 6972 6973 LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin()); 6974 ASSERT_NE(SharedsLoad, nullptr); 6975 EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); 6976 6977 EXPECT_FALSE(OutlinedFn->isDeclaration()); 6978 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); 6979 6980 // Verify that the data argument is used only once, and that too in the load 6981 // instruction that is then used for accessing shared data. 6982 Value *DataPtr = OutlinedFn->getArg(1); 6983 EXPECT_EQ(DataPtr->getNumUses(), 1U); 6984 EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser())); 6985 Value *Data = DataPtr->uses().begin()->getUser(); 6986 EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { 6987 return isa<GetElementPtrInst>(U.getUser()); 6988 })); 6989 6990 // Verify the presence of `trunc` and `icmp` instructions in Outlined function 6991 EXPECT_TRUE(any_of(instructions(OutlinedFn), 6992 [](Instruction &inst) { return isa<TruncInst>(&inst); })); 6993 EXPECT_TRUE(any_of(instructions(OutlinedFn), 6994 [](Instruction &inst) { return isa<ICmpInst>(&inst); })); 6995 6996 // Verify the execution of the task 6997 CallInst *TaskCall = dyn_cast<CallInst>( 6998 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) 6999 ->user_back()); 7000 ASSERT_NE(TaskCall, nullptr); 7001 EXPECT_EQ(TaskCall->getArgOperand(0), Ident); 7002 EXPECT_EQ(TaskCall->getArgOperand(1), GTID); 7003 EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall); 7004 7005 // Verify that the argument data has been copied 7006 for (User *in : TaskAllocCall->users()) { 7007 if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) { 7008 EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall); 7009 } 7010 } 7011 } 7012 7013 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { 7014 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7015 OpenMPIRBuilder OMPBuilder(*M); 7016 OMPBuilder.Config.IsTargetDevice = false; 7017 OMPBuilder.initialize(); 7018 F->setName("func"); 7019 IRBuilder<> Builder(BB); 7020 7021 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7022 return Error::success(); 7023 }; 7024 7025 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7026 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7027 OpenMPIRBuilder::LocationDescription Loc( 7028 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7029 ASSERT_EXPECTED_INIT( 7030 OpenMPIRBuilder::InsertPointTy, AfterIP, 7031 OMPBuilder.createTask( 7032 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7033 BodyGenCB)); 7034 Builder.restoreIP(AfterIP); 7035 OMPBuilder.finalize(); 7036 Builder.CreateRetVoid(); 7037 7038 EXPECT_FALSE(verifyModule(*M, &errs())); 7039 7040 // Check that the outlined function has only one argument. 7041 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7042 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 7043 ->user_back()); 7044 Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5)); 7045 ASSERT_NE(OutlinedFn, nullptr); 7046 ASSERT_EQ(OutlinedFn->arg_size(), 1U); 7047 } 7048 7049 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { 7050 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7051 OpenMPIRBuilder OMPBuilder(*M); 7052 OMPBuilder.Config.IsTargetDevice = false; 7053 OMPBuilder.initialize(); 7054 F->setName("func"); 7055 IRBuilder<> Builder(BB); 7056 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7057 return Error::success(); 7058 }; 7059 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7060 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7061 OpenMPIRBuilder::LocationDescription Loc( 7062 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7063 ASSERT_EXPECTED_INIT( 7064 OpenMPIRBuilder::InsertPointTy, AfterIP, 7065 OMPBuilder.createTask( 7066 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7067 BodyGenCB, 7068 /*Tied=*/false)); 7069 Builder.restoreIP(AfterIP); 7070 OMPBuilder.finalize(); 7071 Builder.CreateRetVoid(); 7072 7073 // Check for the `Tied` argument 7074 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7075 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 7076 ->user_back()); 7077 ASSERT_NE(TaskAllocCall, nullptr); 7078 ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2)); 7079 ASSERT_NE(Flags, nullptr); 7080 EXPECT_EQ(Flags->getZExtValue() & 1U, 0U); 7081 7082 EXPECT_FALSE(verifyModule(*M, &errs())); 7083 } 7084 7085 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { 7086 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7087 OpenMPIRBuilder OMPBuilder(*M); 7088 OMPBuilder.Config.IsTargetDevice = false; 7089 OMPBuilder.initialize(); 7090 F->setName("func"); 7091 IRBuilder<> Builder(BB); 7092 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7093 return Error::success(); 7094 }; 7095 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7096 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7097 OpenMPIRBuilder::LocationDescription Loc( 7098 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7099 AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext())); 7100 SmallVector<OpenMPIRBuilder::DependData> DDS; 7101 { 7102 OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn, 7103 Type::getInt32Ty(M->getContext()), InDep); 7104 DDS.push_back(DDIn); 7105 } 7106 ASSERT_EXPECTED_INIT( 7107 OpenMPIRBuilder::InsertPointTy, AfterIP, 7108 OMPBuilder.createTask( 7109 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7110 BodyGenCB, 7111 /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS)); 7112 Builder.restoreIP(AfterIP); 7113 OMPBuilder.finalize(); 7114 Builder.CreateRetVoid(); 7115 7116 // Check for the `NumDeps` argument 7117 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7118 OMPBuilder 7119 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps) 7120 ->user_back()); 7121 ASSERT_NE(TaskAllocCall, nullptr); 7122 ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3)); 7123 ASSERT_NE(NumDeps, nullptr); 7124 EXPECT_EQ(NumDeps->getZExtValue(), 1U); 7125 7126 // Check for the `DepInfo` array argument 7127 AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4)); 7128 ASSERT_NE(DepArray, nullptr); 7129 Value::user_iterator DepArrayI = DepArray->user_begin(); 7130 ++DepArrayI; 7131 Value::user_iterator DepInfoI = DepArrayI->user_begin(); 7132 // Check for the `DependKind` flag in the `DepInfo` array 7133 Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI); 7134 ASSERT_NE(Flag, nullptr); 7135 ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag); 7136 ASSERT_NE(FlagInt, nullptr); 7137 EXPECT_EQ(FlagInt->getZExtValue(), 7138 static_cast<unsigned int>(RTLDependenceKindTy::DepIn)); 7139 ++DepInfoI; 7140 // Check for the size in the `DepInfo` array 7141 Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI); 7142 ASSERT_NE(Size, nullptr); 7143 ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size); 7144 ASSERT_NE(SizeInt, nullptr); 7145 EXPECT_EQ(SizeInt->getZExtValue(), 4U); 7146 ++DepInfoI; 7147 // Check for the variable address in the `DepInfo` array 7148 Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI); 7149 ASSERT_NE(AddrStored, nullptr); 7150 PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored); 7151 ASSERT_NE(AddrInt, nullptr); 7152 Value *Addr = AddrInt->getPointerOperand(); 7153 EXPECT_EQ(Addr, InDep); 7154 7155 ConstantInt *NumDepsNoAlias = 7156 dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5)); 7157 ASSERT_NE(NumDepsNoAlias, nullptr); 7158 EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U); 7159 EXPECT_EQ(TaskAllocCall->getOperand(6), 7160 ConstantPointerNull::get(PointerType::getUnqual(M->getContext()))); 7161 7162 EXPECT_FALSE(verifyModule(*M, &errs())); 7163 } 7164 7165 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { 7166 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7167 OpenMPIRBuilder OMPBuilder(*M); 7168 OMPBuilder.Config.IsTargetDevice = false; 7169 OMPBuilder.initialize(); 7170 F->setName("func"); 7171 IRBuilder<> Builder(BB); 7172 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7173 return Error::success(); 7174 }; 7175 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7176 IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); 7177 Builder.SetInsertPoint(BodyBB); 7178 Value *Final = Builder.CreateICmp( 7179 CmpInst::Predicate::ICMP_EQ, F->getArg(0), 7180 ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); 7181 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 7182 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 7183 OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, 7184 /*Tied=*/false, Final)); 7185 Builder.restoreIP(AfterIP); 7186 OMPBuilder.finalize(); 7187 Builder.CreateRetVoid(); 7188 7189 // Check for the `Tied` argument 7190 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7191 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 7192 ->user_back()); 7193 ASSERT_NE(TaskAllocCall, nullptr); 7194 BinaryOperator *OrInst = 7195 dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2)); 7196 ASSERT_NE(OrInst, nullptr); 7197 EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or); 7198 7199 // One of the arguments to `or` instruction is the tied flag, which is equal 7200 // to zero. 7201 EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) { 7202 if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op)) 7203 return TiedValue->getSExtValue() == 0; 7204 return false; 7205 })); 7206 7207 // One of the arguments to `or` instruction is the final condition. 7208 EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) { 7209 if (SelectInst *Select = dyn_cast<SelectInst>(op)) { 7210 ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue()); 7211 ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue()); 7212 if (!TrueValue || !FalseValue) 7213 return false; 7214 return Select->getCondition() == Final && 7215 TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0; 7216 } 7217 return false; 7218 })); 7219 7220 EXPECT_FALSE(verifyModule(*M, &errs())); 7221 } 7222 7223 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { 7224 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7225 OpenMPIRBuilder OMPBuilder(*M); 7226 OMPBuilder.Config.IsTargetDevice = false; 7227 OMPBuilder.initialize(); 7228 F->setName("func"); 7229 IRBuilder<> Builder(BB); 7230 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7231 return Error::success(); 7232 }; 7233 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7234 IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); 7235 Builder.SetInsertPoint(BodyBB); 7236 Value *IfCondition = Builder.CreateICmp( 7237 CmpInst::Predicate::ICMP_EQ, F->getArg(0), 7238 ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); 7239 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 7240 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 7241 OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, 7242 /*Tied=*/false, /*Final=*/nullptr, 7243 IfCondition)); 7244 Builder.restoreIP(AfterIP); 7245 OMPBuilder.finalize(); 7246 Builder.CreateRetVoid(); 7247 7248 EXPECT_FALSE(verifyModule(*M, &errs())); 7249 7250 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7251 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 7252 ->user_back()); 7253 ASSERT_NE(TaskAllocCall, nullptr); 7254 7255 // Check the branching is based on the if condition argument. 7256 BranchInst *IfConditionBranchInst = 7257 dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator()); 7258 ASSERT_NE(IfConditionBranchInst, nullptr); 7259 ASSERT_TRUE(IfConditionBranchInst->isConditional()); 7260 EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition); 7261 7262 // Check that the `__kmpc_omp_task` executes only in the then branch. 7263 CallInst *TaskCall = dyn_cast<CallInst>( 7264 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) 7265 ->user_back()); 7266 ASSERT_NE(TaskCall, nullptr); 7267 EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0)); 7268 7269 // Check that the OpenMP Runtime Functions specific to `if` clause execute 7270 // only in the else branch. Also check that the function call is between the 7271 // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls. 7272 CallInst *TaskBeginIfCall = dyn_cast<CallInst>( 7273 OMPBuilder 7274 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0) 7275 ->user_back()); 7276 CallInst *TaskCompleteCall = dyn_cast<CallInst>( 7277 OMPBuilder 7278 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0) 7279 ->user_back()); 7280 ASSERT_NE(TaskBeginIfCall, nullptr); 7281 ASSERT_NE(TaskCompleteCall, nullptr); 7282 Function *OulinedFn = 7283 dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); 7284 ASSERT_NE(OulinedFn, nullptr); 7285 CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back()); 7286 ASSERT_NE(OulinedFnCall, nullptr); 7287 EXPECT_EQ(TaskBeginIfCall->getParent(), 7288 IfConditionBranchInst->getSuccessor(1)); 7289 7290 EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); 7291 EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); 7292 } 7293 7294 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { 7295 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7296 OpenMPIRBuilder OMPBuilder(*M); 7297 OMPBuilder.initialize(); 7298 F->setName("func"); 7299 IRBuilder<> Builder(BB); 7300 7301 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 7302 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 7303 Value *Val128 = 7304 Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); 7305 Instruction *ThenTerm, *ElseTerm; 7306 7307 Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp; 7308 7309 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7310 Builder.restoreIP(AllocaIP); 7311 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 7312 "bodygen.alloca128"); 7313 7314 Builder.restoreIP(CodeGenIP); 7315 // Loading and storing captured pointer and values 7316 InternalStoreInst = Builder.CreateStore(Val128, Local128); 7317 InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 7318 "bodygen.load32"); 7319 7320 InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128, 7321 "bodygen.local.load128"); 7322 InternalIfCmp = Builder.CreateICmpNE( 7323 InternalLoad32, 7324 Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType())); 7325 SplitBlockAndInsertIfThenElse(InternalIfCmp, 7326 CodeGenIP.getBlock()->getTerminator(), 7327 &ThenTerm, &ElseTerm); 7328 return Error::success(); 7329 }; 7330 7331 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7332 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7333 OpenMPIRBuilder::LocationDescription Loc( 7334 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7335 ASSERT_EXPECTED_INIT( 7336 OpenMPIRBuilder::InsertPointTy, AfterIP, 7337 OMPBuilder.createTaskgroup( 7338 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7339 BodyGenCB)); 7340 Builder.restoreIP(AfterIP); 7341 OMPBuilder.finalize(); 7342 Builder.CreateRetVoid(); 7343 7344 EXPECT_FALSE(verifyModule(*M, &errs())); 7345 7346 CallInst *TaskgroupCall = dyn_cast<CallInst>( 7347 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup) 7348 ->user_back()); 7349 ASSERT_NE(TaskgroupCall, nullptr); 7350 CallInst *EndTaskgroupCall = dyn_cast<CallInst>( 7351 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup) 7352 ->user_back()); 7353 ASSERT_NE(EndTaskgroupCall, nullptr); 7354 7355 // Verify the Ident argument 7356 GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0)); 7357 ASSERT_NE(Ident, nullptr); 7358 EXPECT_TRUE(Ident->hasInitializer()); 7359 Constant *Initializer = Ident->getInitializer(); 7360 GlobalVariable *SrcStrGlob = 7361 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 7362 ASSERT_NE(SrcStrGlob, nullptr); 7363 ConstantDataArray *SrcSrc = 7364 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 7365 ASSERT_NE(SrcSrc, nullptr); 7366 7367 // Verify the num_threads argument. 7368 CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1)); 7369 ASSERT_NE(GTID, nullptr); 7370 EXPECT_EQ(GTID->arg_size(), 1U); 7371 EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr( 7372 OMPRTL___kmpc_global_thread_num)); 7373 7374 // Checking the general structure of the IR generated is same as expected. 7375 Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction(); 7376 EXPECT_EQ(GeneratedStoreInst, InternalStoreInst); 7377 Instruction *GeneratedLoad32 = 7378 GeneratedStoreInst->getNextNonDebugInstruction(); 7379 EXPECT_EQ(GeneratedLoad32, InternalLoad32); 7380 Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction(); 7381 EXPECT_EQ(GeneratedLoad128, InternalLoad128); 7382 7383 // Checking the ordering because of the if statements and that 7384 // `__kmp_end_taskgroup` call is after the if branching. 7385 BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(), 7386 ThenTerm->getSuccessor(0), 7387 EndTaskgroupCall->getParent(), 7388 ElseTerm->getParent()}; 7389 verifyDFSOrder(F, RefOrder); 7390 } 7391 7392 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { 7393 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7394 OpenMPIRBuilder OMPBuilder(*M); 7395 OMPBuilder.Config.IsTargetDevice = false; 7396 OMPBuilder.initialize(); 7397 F->setName("func"); 7398 IRBuilder<> Builder(BB); 7399 7400 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7401 Builder.restoreIP(AllocaIP); 7402 AllocaInst *Alloca32 = 7403 Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32"); 7404 AllocaInst *Alloca64 = 7405 Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64"); 7406 Builder.restoreIP(CodeGenIP); 7407 auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7408 Builder.restoreIP(CodeGenIP); 7409 LoadInst *LoadValue = 7410 Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64); 7411 Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64)); 7412 Builder.CreateStore(AddInst, Alloca64); 7413 return Error::success(); 7414 }; 7415 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 7416 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1, 7417 OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1)); 7418 Builder.restoreIP(TaskIP1); 7419 7420 auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7421 Builder.restoreIP(CodeGenIP); 7422 LoadInst *LoadValue = 7423 Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32); 7424 Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32)); 7425 Builder.CreateStore(AddInst, Alloca32); 7426 return Error::success(); 7427 }; 7428 OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL); 7429 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2, 7430 OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2)); 7431 Builder.restoreIP(TaskIP2); 7432 }; 7433 7434 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7435 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7436 OpenMPIRBuilder::LocationDescription Loc( 7437 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7438 ASSERT_EXPECTED_INIT( 7439 OpenMPIRBuilder::InsertPointTy, AfterIP, 7440 OMPBuilder.createTaskgroup( 7441 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7442 BODYGENCB_WRAPPER(BodyGenCB))); 7443 Builder.restoreIP(AfterIP); 7444 OMPBuilder.finalize(); 7445 Builder.CreateRetVoid(); 7446 7447 EXPECT_FALSE(verifyModule(*M, &errs())); 7448 7449 CallInst *TaskgroupCall = dyn_cast<CallInst>( 7450 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup) 7451 ->user_back()); 7452 ASSERT_NE(TaskgroupCall, nullptr); 7453 CallInst *EndTaskgroupCall = dyn_cast<CallInst>( 7454 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup) 7455 ->user_back()); 7456 ASSERT_NE(EndTaskgroupCall, nullptr); 7457 7458 Function *TaskAllocFn = 7459 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc); 7460 ASSERT_EQ(TaskAllocFn->getNumUses(), 2u); 7461 7462 CallInst *FirstTaskAllocCall = 7463 dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()); 7464 CallInst *SecondTaskAllocCall = 7465 dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++); 7466 ASSERT_NE(FirstTaskAllocCall, nullptr); 7467 ASSERT_NE(SecondTaskAllocCall, nullptr); 7468 7469 // Verify that the tasks have been generated in order and inside taskgroup 7470 // construct. 7471 BasicBlock *RefOrder[] = { 7472 TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(), 7473 SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()}; 7474 verifyDFSOrder(F, RefOrder); 7475 } 7476 7477 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { 7478 OpenMPIRBuilder OMPBuilder(*M); 7479 OMPBuilder.initialize(); 7480 7481 IRBuilder<> Builder(BB); 7482 7483 OpenMPIRBuilder::TargetDataRTArgs RTArgs; 7484 OpenMPIRBuilder::TargetDataInfo Info(true, false); 7485 7486 auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext()); 7487 auto Int64PtrTy = PointerType::getUnqual(Builder.getContext()); 7488 7489 Info.RTArgs.BasePointersArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7490 Info.RTArgs.PointersArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7491 Info.RTArgs.SizesArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7492 Info.RTArgs.MapTypesArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7493 Info.RTArgs.MapNamesArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7494 Info.RTArgs.MappersArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7495 Info.NumberOfPtrs = 4; 7496 Info.EmitDebug = false; 7497 OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false); 7498 7499 EXPECT_NE(RTArgs.BasePointersArray, nullptr); 7500 EXPECT_NE(RTArgs.PointersArray, nullptr); 7501 EXPECT_NE(RTArgs.SizesArray, nullptr); 7502 EXPECT_NE(RTArgs.MapTypesArray, nullptr); 7503 EXPECT_NE(RTArgs.MappersArray, nullptr); 7504 EXPECT_NE(RTArgs.MapNamesArray, nullptr); 7505 EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr); 7506 7507 EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy); 7508 EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy); 7509 EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy); 7510 EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy); 7511 EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy); 7512 EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy); 7513 } 7514 7515 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { 7516 OpenMPIRBuilder OMPBuilder(*M); 7517 OMPBuilder.setConfig( 7518 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 7519 OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager; 7520 TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); 7521 InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); 7522 EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo)); 7523 InfoManager.initializeDeviceGlobalVarEntryInfo( 7524 "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0); 7525 InfoManager.registerTargetRegionEntryInfo( 7526 EntryInfo, nullptr, nullptr, 7527 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion); 7528 InfoManager.registerDeviceGlobalVarEntryInfo( 7529 "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 7530 GlobalValue::WeakAnyLinkage); 7531 EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar")); 7532 } 7533 7534 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they 7535 // call each other (recursively in some cases). The test case test these 7536 // functions by utilising them for host code generation for declare target 7537 // global variables 7538 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) { 7539 OpenMPIRBuilder OMPBuilder(*M); 7540 OMPBuilder.initialize(); 7541 OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); 7542 OMPBuilder.setConfig(Config); 7543 7544 std::vector<llvm::Triple> TargetTriple; 7545 TargetTriple.emplace_back("amdgcn-amd-amdhsa"); 7546 7547 TargetRegionEntryInfo EntryInfo("", 42, 4711, 17); 7548 std::vector<GlobalVariable *> RefsGathered; 7549 7550 std::vector<Constant *> Globals; 7551 auto *IntTy = Type::getInt32Ty(Ctx); 7552 for (int I = 0; I < 2; ++I) { 7553 Globals.push_back(M->getOrInsertGlobal( 7554 "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * { 7555 return new GlobalVariable( 7556 *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage, 7557 ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I)); 7558 })); 7559 } 7560 7561 OMPBuilder.registerTargetGlobalVariable( 7562 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 7563 OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true, 7564 EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple, 7565 nullptr, nullptr, Globals[0]->getType(), Globals[0]); 7566 7567 OMPBuilder.registerTargetGlobalVariable( 7568 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink, 7569 OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true, 7570 EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple, 7571 nullptr, nullptr, Globals[1]->getType(), Globals[1]); 7572 7573 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn = 7574 [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 7575 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 7576 // If this is invoked, then we want to emit an error, even if it is not 7577 // neccesarily the most readable, as something has went wrong. The 7578 // test-suite unfortunately eats up all error output 7579 ASSERT_EQ(Kind, Kind); 7580 }; 7581 7582 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn); 7583 7584 // Clauses for data_int_0 with To + Any clauses for the host 7585 std::vector<GlobalVariable *> OffloadEntries; 7586 OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name")); 7587 OffloadEntries.push_back( 7588 M->getNamedGlobal(".offloading.entry.test_data_int_0")); 7589 7590 // Clauses for data_int_1 with Link + Any clauses for the host 7591 OffloadEntries.push_back( 7592 M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr")); 7593 OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name.1")); 7594 OffloadEntries.push_back( 7595 M->getNamedGlobal(".offloading.entry.test_data_int_1_decl_tgt_ref_ptr")); 7596 7597 for (unsigned I = 0; I < OffloadEntries.size(); ++I) 7598 EXPECT_NE(OffloadEntries[I], nullptr); 7599 7600 // Metadata generated for the host offload module 7601 NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info"); 7602 ASSERT_THAT(OffloadMetadata, testing::NotNull()); 7603 StringRef Nodes[2] = { 7604 cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1)) 7605 ->getString(), 7606 cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1)) 7607 ->getString()}; 7608 EXPECT_THAT( 7609 Nodes, testing::UnorderedElementsAre("test_data_int_0", 7610 "test_data_int_1_decl_tgt_ref_ptr")); 7611 } 7612 7613 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) { 7614 OpenMPIRBuilder OMPBuilder(*M); 7615 OMPBuilder.initialize(); 7616 OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true, 7617 /* IsGPU = */ true, 7618 /* OpenMPOffloadMandatory = */ false, 7619 /* HasRequiresReverseOffload = */ false, 7620 /* HasRequiresUnifiedAddress = */ false, 7621 /* HasRequiresUnifiedSharedMemory = */ false, 7622 /* HasRequiresDynamicAllocators = */ false); 7623 OMPBuilder.setConfig(Config); 7624 7625 FunctionCallee FnTypeAndCallee = 7626 M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx)); 7627 7628 auto *Fn = cast<Function>(FnTypeAndCallee.getCallee()); 7629 OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn, 7630 /* Size = */ 0, 7631 /* Flags = */ 0, GlobalValue::WeakAnyLinkage); 7632 7633 // Check kernel attributes 7634 EXPECT_TRUE(Fn->hasFnAttribute("kernel")); 7635 EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress)); 7636 } 7637 7638 } // namespace 7639