1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Frontend/OpenMP/OMPConstants.h" 10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" 11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 12 #include "llvm/IR/BasicBlock.h" 13 #include "llvm/IR/DIBuilder.h" 14 #include "llvm/IR/Function.h" 15 #include "llvm/IR/InstIterator.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/IR/LLVMContext.h" 18 #include "llvm/IR/Module.h" 19 #include "llvm/IR/Verifier.h" 20 #include "llvm/Passes/PassBuilder.h" 21 #include "llvm/Support/Casting.h" 22 #include "llvm/Testing/Support/Error.h" 23 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 24 #include "gmock/gmock.h" 25 #include "gtest/gtest.h" 26 #include <optional> 27 28 using namespace llvm; 29 using namespace omp; 30 31 // Helper that intends to be functionally equivalent to `VarType VarName = Init` 32 // for an `Init` that returns an `Expected<VarType>` value. It produces an error 33 // message and returns if `Init` didn't produce a valid result. 34 #define ASSERT_EXPECTED_INIT(VarType, VarName, Init) \ 35 auto __Expected##VarName = Init; \ 36 ASSERT_THAT_EXPECTED(__Expected##VarName, Succeeded()); \ 37 VarType VarName = *__Expected##VarName 38 39 // Similar to ASSERT_EXPECTED_INIT, but returns a given expression in case of 40 // error after printing the error message. 41 #define ASSERT_EXPECTED_INIT_RETURN(VarType, VarName, Init, Return) \ 42 auto __Expected##VarName = Init; \ 43 EXPECT_THAT_EXPECTED(__Expected##VarName, Succeeded()); \ 44 if (!__Expected##VarName) \ 45 return Return; \ 46 VarType VarName = *__Expected##VarName 47 48 // Wrapper lambdas to allow using EXPECT*() macros inside of error-returning 49 // callbacks. 50 #define FINICB_WRAPPER(cb) \ 51 [&cb](InsertPointTy IP) -> Error { \ 52 cb(IP); \ 53 return Error::success(); \ 54 } 55 56 #define BODYGENCB_WRAPPER(cb) \ 57 [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error { \ 58 cb(AllocaIP, CodeGenIP); \ 59 return Error::success(); \ 60 } 61 62 #define LOOP_BODYGENCB_WRAPPER(cb) \ 63 [&cb](InsertPointTy CodeGenIP, Value *LC) -> Error { \ 64 cb(CodeGenIP, LC); \ 65 return Error::success(); \ 66 } 67 68 namespace { 69 70 /// Create an instruction that uses the values in \p Values. We use "printf" 71 /// just because it is often used for this purpose in test code, but it is never 72 /// executed here. 73 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr, 74 ArrayRef<Value *> Values) { 75 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 76 77 GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M); 78 Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); 79 Constant *Indices[] = {Zero, Zero}; 80 Constant *FormatStrConst = 81 ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices); 82 83 Function *PrintfDecl = M->getFunction("printf"); 84 if (!PrintfDecl) { 85 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 86 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true); 87 PrintfDecl = Function::Create(Ty, Linkage, "printf", M); 88 } 89 90 SmallVector<Value *, 4> Args; 91 Args.push_back(FormatStrConst); 92 Args.append(Values.begin(), Values.end()); 93 return Builder.CreateCall(PrintfDecl, Args); 94 } 95 96 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit 97 /// order the control flow of \p F. 98 /// 99 /// This is an easy way to verify the branching structure of the CFG without 100 /// checking every branch instruction individually. For the CFG of a 101 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering 102 /// the body, i.e. the DFS order corresponds to the execution order with one 103 /// loop iteration. 104 static testing::AssertionResult 105 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { 106 ArrayRef<BasicBlock *>::iterator It = RefOrder.begin(); 107 ArrayRef<BasicBlock *>::iterator E = RefOrder.end(); 108 109 df_iterator_default_set<BasicBlock *, 16> Visited; 110 auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited); 111 112 BasicBlock *Prev = nullptr; 113 for (BasicBlock *BB : DFS) { 114 if (It != E && BB == *It) { 115 Prev = *It; 116 ++It; 117 } 118 } 119 120 if (It == E) 121 return testing::AssertionSuccess(); 122 if (!Prev) 123 return testing::AssertionFailure() 124 << "Did not find " << (*It)->getName() << " in control flow"; 125 return testing::AssertionFailure() 126 << "Expected " << Prev->getName() << " before " << (*It)->getName() 127 << " in control flow"; 128 } 129 130 /// Verify that blocks in \p RefOrder are in the same relative order in the 131 /// linked lists of blocks in \p F. The linked list may contain additional 132 /// blocks in-between. 133 /// 134 /// While the order in the linked list is not relevant for semantics, keeping 135 /// the order roughly in execution order makes its printout easier to read. 136 static testing::AssertionResult 137 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { 138 ArrayRef<BasicBlock *>::iterator It = RefOrder.begin(); 139 ArrayRef<BasicBlock *>::iterator E = RefOrder.end(); 140 141 BasicBlock *Prev = nullptr; 142 for (BasicBlock &BB : *F) { 143 if (It != E && &BB == *It) { 144 Prev = *It; 145 ++It; 146 } 147 } 148 149 if (It == E) 150 return testing::AssertionSuccess(); 151 if (!Prev) 152 return testing::AssertionFailure() << "Did not find " << (*It)->getName() 153 << " in function " << F->getName(); 154 return testing::AssertionFailure() 155 << "Expected " << Prev->getName() << " before " << (*It)->getName() 156 << " in function " << F->getName(); 157 } 158 159 /// Populate Calls with call instructions calling the function with the given 160 /// FnID from the given function F. 161 static void findCalls(Function *F, omp::RuntimeFunction FnID, 162 OpenMPIRBuilder &OMPBuilder, 163 SmallVectorImpl<CallInst *> &Calls) { 164 Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID); 165 for (BasicBlock &BB : *F) { 166 for (Instruction &I : BB) { 167 auto *Call = dyn_cast<CallInst>(&I); 168 if (Call && Call->getCalledFunction() == Fn) 169 Calls.push_back(Call); 170 } 171 } 172 } 173 174 /// Assuming \p F contains only one call to the function with the given \p FnID, 175 /// return that call. 176 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID, 177 OpenMPIRBuilder &OMPBuilder) { 178 SmallVector<CallInst *, 1> Calls; 179 findCalls(F, FnID, OMPBuilder, Calls); 180 EXPECT_EQ(1u, Calls.size()); 181 if (Calls.size() != 1) 182 return nullptr; 183 return Calls.front(); 184 } 185 186 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) { 187 switch (SchedType & ~omp::OMPScheduleType::ModifierMask) { 188 case omp::OMPScheduleType::BaseDynamicChunked: 189 return omp::OMP_SCHEDULE_Dynamic; 190 case omp::OMPScheduleType::BaseGuidedChunked: 191 return omp::OMP_SCHEDULE_Guided; 192 case omp::OMPScheduleType::BaseAuto: 193 return omp::OMP_SCHEDULE_Auto; 194 case omp::OMPScheduleType::BaseRuntime: 195 return omp::OMP_SCHEDULE_Runtime; 196 default: 197 llvm_unreachable("unknown type for this test"); 198 } 199 } 200 201 class OpenMPIRBuilderTest : public testing::Test { 202 protected: 203 void SetUp() override { 204 M.reset(new Module("MyModule", Ctx)); 205 FunctionType *FTy = 206 FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, 207 /*isVarArg=*/false); 208 F = Function::Create(FTy, Function::ExternalLinkage, "", M.get()); 209 BB = BasicBlock::Create(Ctx, "", F); 210 211 DIBuilder DIB(*M); 212 auto File = DIB.createFile("test.dbg", "/src", std::nullopt, 213 std::optional<StringRef>("/src/test.dbg")); 214 auto CU = 215 DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0); 216 auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray({})); 217 auto SP = DIB.createFunction( 218 CU, "foo", "", File, 1, Type, 1, DINode::FlagZero, 219 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized); 220 F->setSubprogram(SP); 221 auto Scope = DIB.createLexicalBlockFile(SP, File, 0); 222 DIB.finalize(); 223 DL = DILocation::get(Ctx, 3, 7, Scope); 224 } 225 226 void TearDown() override { 227 BB = nullptr; 228 M.reset(); 229 } 230 231 /// Create a function with a simple loop that calls printf using the logical 232 /// loop counter for use with tests that need a CanonicalLoopInfo object. 233 CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL, 234 OpenMPIRBuilder &OMPBuilder, 235 int UseIVBits, 236 CallInst **Call = nullptr, 237 BasicBlock **BodyCode = nullptr) { 238 OMPBuilder.initialize(); 239 F->setName("func"); 240 241 IRBuilder<> Builder(BB); 242 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 243 Value *TripCount = F->getArg(0); 244 245 Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits); 246 Value *CastedTripCount = 247 Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount"); 248 249 auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP, 250 llvm::Value *LC) { 251 Builder.restoreIP(CodeGenIP); 252 if (BodyCode) 253 *BodyCode = Builder.GetInsertBlock(); 254 255 // Add something that consumes the induction variable to the body. 256 CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC}); 257 if (Call) 258 *Call = CallInst; 259 260 return Error::success(); 261 }; 262 263 ASSERT_EXPECTED_INIT_RETURN( 264 CanonicalLoopInfo *, Loop, 265 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount), 266 nullptr); 267 268 // Finalize the function. 269 Builder.restoreIP(Loop->getAfterIP()); 270 Builder.CreateRetVoid(); 271 272 return Loop; 273 } 274 275 LLVMContext Ctx; 276 std::unique_ptr<Module> M; 277 Function *F; 278 BasicBlock *BB; 279 DebugLoc DL; 280 }; 281 282 class OpenMPIRBuilderTestWithParams 283 : public OpenMPIRBuilderTest, 284 public ::testing::WithParamInterface<omp::OMPScheduleType> {}; 285 286 class OpenMPIRBuilderTestWithIVBits 287 : public OpenMPIRBuilderTest, 288 public ::testing::WithParamInterface<int> {}; 289 290 // Returns the value stored in the given allocation. Returns null if the given 291 // value is not a result of an InstTy instruction, if no value is stored or if 292 // there is more than one store. 293 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) { 294 Instruction *Inst = dyn_cast<InstTy>(AllocaValue); 295 if (!Inst) 296 return nullptr; 297 StoreInst *Store = nullptr; 298 for (Use &U : Inst->uses()) { 299 if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) { 300 EXPECT_EQ(Store, nullptr); 301 Store = CandidateStore; 302 } 303 } 304 if (!Store) 305 return nullptr; 306 return Store->getValueOperand(); 307 } 308 309 // Returns the value stored in the aggregate argument of an outlined function, 310 // or nullptr if it is not found. 311 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate, 312 unsigned Idx) { 313 GetElementPtrInst *GEPAtIdx = nullptr; 314 // Find GEP instruction at that index. 315 for (User *Usr : Aggregate->users()) { 316 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr); 317 if (!GEP) 318 continue; 319 320 if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx)) 321 continue; 322 323 EXPECT_EQ(GEPAtIdx, nullptr); 324 GEPAtIdx = GEP; 325 } 326 327 EXPECT_NE(GEPAtIdx, nullptr); 328 EXPECT_EQ(GEPAtIdx->getNumUses(), 1U); 329 330 // Find the value stored to the aggregate. 331 StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin()); 332 Value *StoredAggValue = StoreToAgg->getValueOperand(); 333 334 Value *StoredValue = nullptr; 335 336 // Find the value stored to the value stored in the aggregate. 337 for (User *Usr : StoredAggValue->users()) { 338 StoreInst *Store = dyn_cast<StoreInst>(Usr); 339 if (!Store) 340 continue; 341 342 if (Store->getPointerOperand() != StoredAggValue) 343 continue; 344 345 EXPECT_EQ(StoredValue, nullptr); 346 StoredValue = Store->getValueOperand(); 347 } 348 349 return StoredValue; 350 } 351 352 // Returns the aggregate that the value is originating from. 353 static Value *findAggregateFromValue(Value *V) { 354 // Expects a load instruction that loads from the aggregate. 355 LoadInst *Load = dyn_cast<LoadInst>(V); 356 EXPECT_NE(Load, nullptr); 357 // Find the GEP instruction used in the load instruction. 358 GetElementPtrInst *GEP = 359 dyn_cast<GetElementPtrInst>(Load->getPointerOperand()); 360 EXPECT_NE(GEP, nullptr); 361 // Find the aggregate used in the GEP instruction. 362 Value *Aggregate = GEP->getPointerOperand(); 363 364 return Aggregate; 365 } 366 367 TEST_F(OpenMPIRBuilderTest, CreateBarrier) { 368 OpenMPIRBuilder OMPBuilder(*M); 369 OMPBuilder.initialize(); 370 371 IRBuilder<> Builder(BB); 372 373 ASSERT_THAT_EXPECTED( 374 OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for), 375 Succeeded()); 376 EXPECT_TRUE(M->global_empty()); 377 EXPECT_EQ(M->size(), 1U); 378 EXPECT_EQ(F->size(), 1U); 379 EXPECT_EQ(BB->size(), 0U); 380 381 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 382 ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded()); 383 EXPECT_FALSE(M->global_empty()); 384 EXPECT_EQ(M->size(), 3U); 385 EXPECT_EQ(F->size(), 1U); 386 EXPECT_EQ(BB->size(), 2U); 387 388 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 389 EXPECT_NE(GTID, nullptr); 390 EXPECT_EQ(GTID->arg_size(), 1U); 391 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 392 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 393 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 394 395 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 396 EXPECT_NE(Barrier, nullptr); 397 EXPECT_EQ(Barrier->arg_size(), 2U); 398 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier"); 399 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 400 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 401 402 EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); 403 404 Builder.CreateUnreachable(); 405 EXPECT_FALSE(verifyModule(*M, &errs())); 406 } 407 408 TEST_F(OpenMPIRBuilderTest, CreateCancel) { 409 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 410 OpenMPIRBuilder OMPBuilder(*M); 411 OMPBuilder.initialize(); 412 413 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 414 new UnreachableInst(Ctx, CBB); 415 auto FiniCB = [&](InsertPointTy IP) { 416 ASSERT_NE(IP.getBlock(), nullptr); 417 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 418 BranchInst::Create(CBB, IP.getBlock()); 419 }; 420 OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); 421 422 IRBuilder<> Builder(BB); 423 424 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 425 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP, 426 OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel)); 427 Builder.restoreIP(NewIP); 428 EXPECT_FALSE(M->global_empty()); 429 EXPECT_EQ(M->size(), 4U); 430 EXPECT_EQ(F->size(), 4U); 431 EXPECT_EQ(BB->size(), 4U); 432 433 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 434 EXPECT_NE(GTID, nullptr); 435 EXPECT_EQ(GTID->arg_size(), 1U); 436 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 437 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 438 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 439 440 CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode()); 441 EXPECT_NE(Cancel, nullptr); 442 EXPECT_EQ(Cancel->arg_size(), 3U); 443 EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); 444 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); 445 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); 446 EXPECT_EQ(Cancel->getNumUses(), 1U); 447 Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); 448 EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); 449 EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock()); 450 EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); 451 CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front()); 452 EXPECT_NE(GTID1, nullptr); 453 EXPECT_EQ(GTID1->arg_size(), 1U); 454 EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 455 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); 456 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); 457 CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode()); 458 EXPECT_NE(Barrier, nullptr); 459 EXPECT_EQ(Barrier->arg_size(), 2U); 460 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 461 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 462 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 463 EXPECT_EQ(Barrier->getNumUses(), 0U); 464 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 465 1U); 466 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); 467 468 EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID); 469 470 OMPBuilder.popFinalizationCB(); 471 472 Builder.CreateUnreachable(); 473 EXPECT_FALSE(verifyModule(*M, &errs())); 474 } 475 476 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { 477 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 478 OpenMPIRBuilder OMPBuilder(*M); 479 OMPBuilder.initialize(); 480 481 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 482 new UnreachableInst(Ctx, CBB); 483 auto FiniCB = [&](InsertPointTy IP) { 484 ASSERT_NE(IP.getBlock(), nullptr); 485 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 486 BranchInst::Create(CBB, IP.getBlock()); 487 }; 488 OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); 489 490 IRBuilder<> Builder(BB); 491 492 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 493 ASSERT_EXPECTED_INIT( 494 OpenMPIRBuilder::InsertPointTy, NewIP, 495 OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel)); 496 Builder.restoreIP(NewIP); 497 EXPECT_FALSE(M->global_empty()); 498 EXPECT_EQ(M->size(), 4U); 499 EXPECT_EQ(F->size(), 7U); 500 EXPECT_EQ(BB->size(), 1U); 501 ASSERT_TRUE(isa<BranchInst>(BB->getTerminator())); 502 ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U); 503 BB = BB->getTerminator()->getSuccessor(0); 504 EXPECT_EQ(BB->size(), 4U); 505 506 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 507 EXPECT_NE(GTID, nullptr); 508 EXPECT_EQ(GTID->arg_size(), 1U); 509 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 510 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 511 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 512 513 CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode()); 514 EXPECT_NE(Cancel, nullptr); 515 EXPECT_EQ(Cancel->arg_size(), 3U); 516 EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel"); 517 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory()); 518 EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory()); 519 EXPECT_EQ(Cancel->getNumUses(), 1U); 520 Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); 521 EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); 522 EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); 523 EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), 524 NewIP.getBlock()); 525 EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); 526 CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front()); 527 EXPECT_NE(GTID1, nullptr); 528 EXPECT_EQ(GTID1->arg_size(), 1U); 529 EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 530 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory()); 531 EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory()); 532 CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode()); 533 EXPECT_NE(Barrier, nullptr); 534 EXPECT_EQ(Barrier->arg_size(), 2U); 535 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 536 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 537 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 538 EXPECT_EQ(Barrier->getNumUses(), 0U); 539 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 540 1U); 541 EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB); 542 543 EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID); 544 545 OMPBuilder.popFinalizationCB(); 546 547 Builder.CreateUnreachable(); 548 EXPECT_FALSE(verifyModule(*M, &errs())); 549 } 550 551 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { 552 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 553 OpenMPIRBuilder OMPBuilder(*M); 554 OMPBuilder.initialize(); 555 556 BasicBlock *CBB = BasicBlock::Create(Ctx, "", F); 557 new UnreachableInst(Ctx, CBB); 558 auto FiniCB = [&](InsertPointTy IP) { 559 ASSERT_NE(IP.getBlock(), nullptr); 560 ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); 561 BranchInst::Create(CBB, IP.getBlock()); 562 }; 563 OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); 564 565 IRBuilder<> Builder(BB); 566 567 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); 568 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP, 569 OMPBuilder.createBarrier(Loc, OMPD_for)); 570 Builder.restoreIP(NewIP); 571 EXPECT_FALSE(M->global_empty()); 572 EXPECT_EQ(M->size(), 3U); 573 EXPECT_EQ(F->size(), 4U); 574 EXPECT_EQ(BB->size(), 4U); 575 576 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 577 EXPECT_NE(GTID, nullptr); 578 EXPECT_EQ(GTID->arg_size(), 1U); 579 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 580 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 581 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 582 583 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 584 EXPECT_NE(Barrier, nullptr); 585 EXPECT_EQ(Barrier->arg_size(), 2U); 586 EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier"); 587 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory()); 588 EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory()); 589 EXPECT_EQ(Barrier->getNumUses(), 1U); 590 Instruction *BarrierBBTI = Barrier->getParent()->getTerminator(); 591 EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U); 592 EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock()); 593 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); 594 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 595 1U); 596 EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), 597 CBB); 598 599 EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID); 600 601 OMPBuilder.popFinalizationCB(); 602 603 Builder.CreateUnreachable(); 604 EXPECT_FALSE(verifyModule(*M, &errs())); 605 } 606 607 TEST_F(OpenMPIRBuilderTest, DbgLoc) { 608 OpenMPIRBuilder OMPBuilder(*M); 609 OMPBuilder.initialize(); 610 F->setName("func"); 611 612 IRBuilder<> Builder(BB); 613 614 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 615 ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded()); 616 CallInst *GTID = dyn_cast<CallInst>(&BB->front()); 617 CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode()); 618 EXPECT_EQ(GTID->getDebugLoc(), DL); 619 EXPECT_EQ(Barrier->getDebugLoc(), DL); 620 EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0))); 621 if (!isa<GlobalVariable>(Barrier->getOperand(0))) 622 return; 623 GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0)); 624 EXPECT_TRUE(Ident->hasInitializer()); 625 if (!Ident->hasInitializer()) 626 return; 627 Constant *Initializer = Ident->getInitializer(); 628 EXPECT_TRUE( 629 isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts())); 630 GlobalVariable *SrcStrGlob = 631 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 632 if (!SrcStrGlob) 633 return; 634 EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer())); 635 ConstantDataArray *SrcSrc = 636 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 637 if (!SrcSrc) 638 return; 639 EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;"); 640 } 641 642 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { 643 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 644 std::string oldDLStr = M->getDataLayoutStr(); 645 M->setDataLayout( 646 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" 647 "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" 648 "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); 649 OpenMPIRBuilder OMPBuilder(*M); 650 OMPBuilder.Config.IsTargetDevice = true; 651 OMPBuilder.initialize(); 652 F->setName("func"); 653 IRBuilder<> Builder(BB); 654 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 655 Builder.CreateBr(EnterBB); 656 Builder.SetInsertPoint(EnterBB); 657 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 658 659 AllocaInst *PrivAI = nullptr; 660 661 unsigned NumBodiesGenerated = 0; 662 unsigned NumPrivatizedVars = 0; 663 unsigned NumFinalizationPoints = 0; 664 665 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 666 ++NumBodiesGenerated; 667 668 Builder.restoreIP(AllocaIP); 669 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 670 Builder.CreateStore(F->arg_begin(), PrivAI); 671 672 Builder.restoreIP(CodeGenIP); 673 Value *PrivLoad = 674 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 675 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 676 Instruction *ThenTerm, *ElseTerm; 677 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 678 &ThenTerm, &ElseTerm); 679 return Error::success(); 680 }; 681 682 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 683 Value &Orig, Value &Inner, 684 Value *&ReplacementValue) -> InsertPointTy { 685 ++NumPrivatizedVars; 686 687 if (!isa<AllocaInst>(Orig)) { 688 EXPECT_EQ(&Orig, F->arg_begin()); 689 ReplacementValue = &Inner; 690 return CodeGenIP; 691 } 692 693 // Since the original value is an allocation, it has a pointer type and 694 // therefore no additional wrapping should happen. 695 EXPECT_EQ(&Orig, &Inner); 696 697 // Trivial copy (=firstprivate). 698 Builder.restoreIP(AllocaIP); 699 Type *VTy = ReplacementValue->getType(); 700 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 701 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 702 Builder.restoreIP(CodeGenIP); 703 Builder.CreateStore(V, ReplacementValue); 704 return CodeGenIP; 705 }; 706 707 auto FiniCB = [&](InsertPointTy CodeGenIP) { 708 ++NumFinalizationPoints; 709 return Error::success(); 710 }; 711 712 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 713 F->getEntryBlock().getFirstInsertionPt()); 714 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 715 OMPBuilder.createParallel( 716 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, 717 nullptr, OMP_PROC_BIND_default, false)); 718 719 EXPECT_EQ(NumBodiesGenerated, 1U); 720 EXPECT_EQ(NumPrivatizedVars, 1U); 721 EXPECT_EQ(NumFinalizationPoints, 1U); 722 723 Builder.restoreIP(AfterIP); 724 Builder.CreateRetVoid(); 725 726 OMPBuilder.finalize(); 727 Function *OutlinedFn = PrivAI->getFunction(); 728 EXPECT_FALSE(verifyModule(*M, &errs())); 729 EXPECT_NE(OutlinedFn, F); 730 EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); 731 EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); 732 EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); 733 734 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 735 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 736 // Make sure that arguments are pointers in 0 address address space 737 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), 738 PointerType::get(M->getContext(), 0)); 739 EXPECT_EQ(OutlinedFn->getArg(1)->getType(), 740 PointerType::get(M->getContext(), 0)); 741 EXPECT_EQ(OutlinedFn->getArg(2)->getType(), 742 PointerType::get(M->getContext(), 0)); 743 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 744 EXPECT_EQ(OutlinedFn->getNumUses(), 1U); 745 User *Usr = OutlinedFn->user_back(); 746 ASSERT_TRUE(isa<CallInst>(Usr)); 747 CallInst *Parallel51CI = dyn_cast<CallInst>(Usr); 748 ASSERT_NE(Parallel51CI, nullptr); 749 750 EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51"); 751 EXPECT_EQ(Parallel51CI->arg_size(), 9U); 752 EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn); 753 EXPECT_TRUE( 754 isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts())); 755 EXPECT_EQ(Parallel51CI, Usr); 756 M->setDataLayout(oldDLStr); 757 } 758 759 TEST_F(OpenMPIRBuilderTest, ParallelSimple) { 760 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 761 OpenMPIRBuilder OMPBuilder(*M); 762 OMPBuilder.Config.IsTargetDevice = false; 763 OMPBuilder.initialize(); 764 F->setName("func"); 765 IRBuilder<> Builder(BB); 766 767 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 768 Builder.CreateBr(EnterBB); 769 Builder.SetInsertPoint(EnterBB); 770 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 771 772 AllocaInst *PrivAI = nullptr; 773 774 unsigned NumBodiesGenerated = 0; 775 unsigned NumPrivatizedVars = 0; 776 unsigned NumFinalizationPoints = 0; 777 778 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 779 ++NumBodiesGenerated; 780 781 Builder.restoreIP(AllocaIP); 782 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 783 Builder.CreateStore(F->arg_begin(), PrivAI); 784 785 Builder.restoreIP(CodeGenIP); 786 Value *PrivLoad = 787 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 788 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 789 Instruction *ThenTerm, *ElseTerm; 790 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 791 &ThenTerm, &ElseTerm); 792 return Error::success(); 793 }; 794 795 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 796 Value &Orig, Value &Inner, 797 Value *&ReplacementValue) -> InsertPointTy { 798 ++NumPrivatizedVars; 799 800 if (!isa<AllocaInst>(Orig)) { 801 EXPECT_EQ(&Orig, F->arg_begin()); 802 ReplacementValue = &Inner; 803 return CodeGenIP; 804 } 805 806 // Since the original value is an allocation, it has a pointer type and 807 // therefore no additional wrapping should happen. 808 EXPECT_EQ(&Orig, &Inner); 809 810 // Trivial copy (=firstprivate). 811 Builder.restoreIP(AllocaIP); 812 Type *VTy = ReplacementValue->getType(); 813 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 814 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 815 Builder.restoreIP(CodeGenIP); 816 Builder.CreateStore(V, ReplacementValue); 817 return CodeGenIP; 818 }; 819 820 auto FiniCB = [&](InsertPointTy CodeGenIP) { 821 ++NumFinalizationPoints; 822 return Error::success(); 823 }; 824 825 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 826 F->getEntryBlock().getFirstInsertionPt()); 827 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 828 OMPBuilder.createParallel( 829 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, 830 nullptr, OMP_PROC_BIND_default, false)); 831 EXPECT_EQ(NumBodiesGenerated, 1U); 832 EXPECT_EQ(NumPrivatizedVars, 1U); 833 EXPECT_EQ(NumFinalizationPoints, 1U); 834 835 Builder.restoreIP(AfterIP); 836 Builder.CreateRetVoid(); 837 838 OMPBuilder.finalize(); 839 840 EXPECT_NE(PrivAI, nullptr); 841 Function *OutlinedFn = PrivAI->getFunction(); 842 EXPECT_NE(F, OutlinedFn); 843 EXPECT_FALSE(verifyModule(*M, &errs())); 844 EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind)); 845 EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias)); 846 EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias)); 847 848 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 849 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 850 851 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 852 EXPECT_EQ(OutlinedFn->getNumUses(), 1U); 853 User *Usr = OutlinedFn->user_back(); 854 ASSERT_TRUE(isa<CallInst>(Usr)); 855 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 856 ASSERT_NE(ForkCI, nullptr); 857 858 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 859 EXPECT_EQ(ForkCI->arg_size(), 4U); 860 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 861 EXPECT_EQ(ForkCI->getArgOperand(1), 862 ConstantInt::get(Type::getInt32Ty(Ctx), 1U)); 863 EXPECT_EQ(ForkCI, Usr); 864 Value *StoredValue = 865 findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0); 866 EXPECT_EQ(StoredValue, F->arg_begin()); 867 } 868 869 TEST_F(OpenMPIRBuilderTest, ParallelNested) { 870 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 871 OpenMPIRBuilder OMPBuilder(*M); 872 OMPBuilder.Config.IsTargetDevice = false; 873 OMPBuilder.initialize(); 874 F->setName("func"); 875 IRBuilder<> Builder(BB); 876 877 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 878 Builder.CreateBr(EnterBB); 879 Builder.SetInsertPoint(EnterBB); 880 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 881 882 unsigned NumInnerBodiesGenerated = 0; 883 unsigned NumOuterBodiesGenerated = 0; 884 unsigned NumFinalizationPoints = 0; 885 886 auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 887 ++NumInnerBodiesGenerated; 888 return Error::success(); 889 }; 890 891 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 892 Value &Orig, Value &Inner, 893 Value *&ReplacementValue) -> InsertPointTy { 894 // Trivial copy (=firstprivate). 895 Builder.restoreIP(AllocaIP); 896 Type *VTy = ReplacementValue->getType(); 897 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 898 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 899 Builder.restoreIP(CodeGenIP); 900 Builder.CreateStore(V, ReplacementValue); 901 return CodeGenIP; 902 }; 903 904 auto FiniCB = [&](InsertPointTy CodeGenIP) { 905 ++NumFinalizationPoints; 906 return Error::success(); 907 }; 908 909 auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 910 ++NumOuterBodiesGenerated; 911 Builder.restoreIP(CodeGenIP); 912 BasicBlock *CGBB = CodeGenIP.getBlock(); 913 BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); 914 CGBB->getTerminator()->eraseFromParent(); 915 916 ASSERT_EXPECTED_INIT( 917 OpenMPIRBuilder::InsertPointTy, AfterIP, 918 OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP, 919 InnerBodyGenCB, PrivCB, FiniCB, nullptr, 920 nullptr, OMP_PROC_BIND_default, false)); 921 922 Builder.restoreIP(AfterIP); 923 Builder.CreateBr(NewBB); 924 }; 925 926 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 927 F->getEntryBlock().getFirstInsertionPt()); 928 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 929 OMPBuilder.createParallel( 930 Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB), 931 PrivCB, FiniCB, nullptr, nullptr, 932 OMP_PROC_BIND_default, false)); 933 934 EXPECT_EQ(NumInnerBodiesGenerated, 1U); 935 EXPECT_EQ(NumOuterBodiesGenerated, 1U); 936 EXPECT_EQ(NumFinalizationPoints, 2U); 937 938 Builder.restoreIP(AfterIP); 939 Builder.CreateRetVoid(); 940 941 OMPBuilder.finalize(); 942 943 EXPECT_EQ(M->size(), 5U); 944 for (Function &OutlinedFn : *M) { 945 if (F == &OutlinedFn || OutlinedFn.isDeclaration()) 946 continue; 947 EXPECT_FALSE(verifyModule(*M, &errs())); 948 EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); 949 EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); 950 EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); 951 952 EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); 953 EXPECT_EQ(OutlinedFn.arg_size(), 2U); 954 955 EXPECT_EQ(OutlinedFn.getNumUses(), 1U); 956 User *Usr = OutlinedFn.user_back(); 957 ASSERT_TRUE(isa<CallInst>(Usr)); 958 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 959 ASSERT_NE(ForkCI, nullptr); 960 961 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 962 EXPECT_EQ(ForkCI->arg_size(), 3U); 963 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 964 EXPECT_EQ(ForkCI->getArgOperand(1), 965 ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); 966 EXPECT_EQ(ForkCI, Usr); 967 } 968 } 969 970 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { 971 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 972 OpenMPIRBuilder OMPBuilder(*M); 973 OMPBuilder.Config.IsTargetDevice = false; 974 OMPBuilder.initialize(); 975 F->setName("func"); 976 IRBuilder<> Builder(BB); 977 978 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 979 Builder.CreateBr(EnterBB); 980 Builder.SetInsertPoint(EnterBB); 981 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 982 983 unsigned NumInnerBodiesGenerated = 0; 984 unsigned NumOuterBodiesGenerated = 0; 985 unsigned NumFinalizationPoints = 0; 986 987 auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 988 ++NumInnerBodiesGenerated; 989 return Error::success(); 990 }; 991 992 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 993 Value &Orig, Value &Inner, 994 Value *&ReplacementValue) -> InsertPointTy { 995 // Trivial copy (=firstprivate). 996 Builder.restoreIP(AllocaIP); 997 Type *VTy = ReplacementValue->getType(); 998 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 999 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 1000 Builder.restoreIP(CodeGenIP); 1001 Builder.CreateStore(V, ReplacementValue); 1002 return CodeGenIP; 1003 }; 1004 1005 auto FiniCB = [&](InsertPointTy CodeGenIP) { 1006 ++NumFinalizationPoints; 1007 return Error::success(); 1008 }; 1009 1010 auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1011 ++NumOuterBodiesGenerated; 1012 Builder.restoreIP(CodeGenIP); 1013 BasicBlock *CGBB = CodeGenIP.getBlock(); 1014 BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint()); 1015 BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt()); 1016 CGBB->getTerminator()->eraseFromParent(); 1017 ; 1018 NewBB1->getTerminator()->eraseFromParent(); 1019 ; 1020 1021 ASSERT_EXPECTED_INIT( 1022 OpenMPIRBuilder::InsertPointTy, AfterIP1, 1023 OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP, 1024 InnerBodyGenCB, PrivCB, FiniCB, nullptr, 1025 nullptr, OMP_PROC_BIND_default, false)); 1026 1027 Builder.restoreIP(AfterIP1); 1028 Builder.CreateBr(NewBB1); 1029 1030 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2, 1031 OMPBuilder.createParallel( 1032 InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, 1033 InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, 1034 OMP_PROC_BIND_default, false)); 1035 1036 Builder.restoreIP(AfterIP2); 1037 Builder.CreateBr(NewBB2); 1038 }; 1039 1040 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1041 F->getEntryBlock().getFirstInsertionPt()); 1042 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 1043 OMPBuilder.createParallel( 1044 Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB), 1045 PrivCB, FiniCB, nullptr, nullptr, 1046 OMP_PROC_BIND_default, false)); 1047 1048 EXPECT_EQ(NumInnerBodiesGenerated, 2U); 1049 EXPECT_EQ(NumOuterBodiesGenerated, 1U); 1050 EXPECT_EQ(NumFinalizationPoints, 3U); 1051 1052 Builder.restoreIP(AfterIP); 1053 Builder.CreateRetVoid(); 1054 1055 OMPBuilder.finalize(); 1056 1057 EXPECT_EQ(M->size(), 6U); 1058 for (Function &OutlinedFn : *M) { 1059 if (F == &OutlinedFn || OutlinedFn.isDeclaration()) 1060 continue; 1061 EXPECT_FALSE(verifyModule(*M, &errs())); 1062 EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind)); 1063 EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias)); 1064 EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias)); 1065 1066 EXPECT_TRUE(OutlinedFn.hasInternalLinkage()); 1067 EXPECT_EQ(OutlinedFn.arg_size(), 2U); 1068 1069 unsigned NumAllocas = 0; 1070 for (Instruction &I : instructions(OutlinedFn)) 1071 NumAllocas += isa<AllocaInst>(I); 1072 EXPECT_EQ(NumAllocas, 1U); 1073 1074 EXPECT_EQ(OutlinedFn.getNumUses(), 1U); 1075 User *Usr = OutlinedFn.user_back(); 1076 ASSERT_TRUE(isa<CallInst>(Usr)); 1077 CallInst *ForkCI = dyn_cast<CallInst>(Usr); 1078 ASSERT_NE(ForkCI, nullptr); 1079 1080 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call"); 1081 EXPECT_EQ(ForkCI->arg_size(), 3U); 1082 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 1083 EXPECT_EQ(ForkCI->getArgOperand(1), 1084 ConstantInt::get(Type::getInt32Ty(Ctx), 0U)); 1085 EXPECT_EQ(ForkCI, Usr); 1086 } 1087 } 1088 1089 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { 1090 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1091 OpenMPIRBuilder OMPBuilder(*M); 1092 OMPBuilder.Config.IsTargetDevice = false; 1093 OMPBuilder.initialize(); 1094 F->setName("func"); 1095 IRBuilder<> Builder(BB); 1096 1097 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 1098 Builder.CreateBr(EnterBB); 1099 Builder.SetInsertPoint(EnterBB); 1100 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1101 1102 AllocaInst *PrivAI = nullptr; 1103 1104 unsigned NumBodiesGenerated = 0; 1105 unsigned NumPrivatizedVars = 0; 1106 unsigned NumFinalizationPoints = 0; 1107 1108 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1109 ++NumBodiesGenerated; 1110 1111 Builder.restoreIP(AllocaIP); 1112 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 1113 Builder.CreateStore(F->arg_begin(), PrivAI); 1114 1115 Builder.restoreIP(CodeGenIP); 1116 Value *PrivLoad = 1117 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 1118 Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 1119 Instruction *ThenTerm, *ElseTerm; 1120 SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm, 1121 &ElseTerm); 1122 return Error::success(); 1123 }; 1124 1125 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1126 Value &Orig, Value &Inner, 1127 Value *&ReplacementValue) -> InsertPointTy { 1128 ++NumPrivatizedVars; 1129 1130 if (!isa<AllocaInst>(Orig)) { 1131 EXPECT_EQ(&Orig, F->arg_begin()); 1132 ReplacementValue = &Inner; 1133 return CodeGenIP; 1134 } 1135 1136 // Since the original value is an allocation, it has a pointer type and 1137 // therefore no additional wrapping should happen. 1138 EXPECT_EQ(&Orig, &Inner); 1139 1140 // Trivial copy (=firstprivate). 1141 Builder.restoreIP(AllocaIP); 1142 Type *VTy = ReplacementValue->getType(); 1143 Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); 1144 ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); 1145 Builder.restoreIP(CodeGenIP); 1146 Builder.CreateStore(V, ReplacementValue); 1147 return CodeGenIP; 1148 }; 1149 1150 auto FiniCB = [&](InsertPointTy CodeGenIP) { 1151 ++NumFinalizationPoints; 1152 // No destructors. 1153 return Error::success(); 1154 }; 1155 1156 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1157 F->getEntryBlock().getFirstInsertionPt()); 1158 ASSERT_EXPECTED_INIT( 1159 OpenMPIRBuilder::InsertPointTy, AfterIP, 1160 OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, 1161 Builder.CreateIsNotNull(F->arg_begin()), 1162 nullptr, OMP_PROC_BIND_default, false)); 1163 1164 EXPECT_EQ(NumBodiesGenerated, 1U); 1165 EXPECT_EQ(NumPrivatizedVars, 1U); 1166 EXPECT_EQ(NumFinalizationPoints, 1U); 1167 1168 Builder.restoreIP(AfterIP); 1169 Builder.CreateRetVoid(); 1170 OMPBuilder.finalize(); 1171 1172 EXPECT_NE(PrivAI, nullptr); 1173 Function *OutlinedFn = PrivAI->getFunction(); 1174 EXPECT_NE(F, OutlinedFn); 1175 EXPECT_FALSE(verifyModule(*M, &errs())); 1176 1177 EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); 1178 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 1179 1180 EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent()); 1181 ASSERT_EQ(OutlinedFn->getNumUses(), 1U); 1182 1183 CallInst *ForkCI = nullptr; 1184 for (User *Usr : OutlinedFn->users()) { 1185 ASSERT_TRUE(isa<CallInst>(Usr)); 1186 ForkCI = cast<CallInst>(Usr); 1187 } 1188 1189 EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if"); 1190 EXPECT_EQ(ForkCI->arg_size(), 5U); 1191 EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0))); 1192 EXPECT_EQ(ForkCI->getArgOperand(1), 1193 ConstantInt::get(Type::getInt32Ty(Ctx), 1)); 1194 EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx)); 1195 } 1196 1197 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { 1198 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1199 OpenMPIRBuilder OMPBuilder(*M); 1200 OMPBuilder.Config.IsTargetDevice = false; 1201 OMPBuilder.initialize(); 1202 F->setName("func"); 1203 IRBuilder<> Builder(BB); 1204 1205 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 1206 Builder.CreateBr(EnterBB); 1207 Builder.SetInsertPoint(EnterBB); 1208 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1209 1210 unsigned NumBodiesGenerated = 0; 1211 unsigned NumPrivatizedVars = 0; 1212 unsigned NumFinalizationPoints = 0; 1213 1214 CallInst *CheckedBarrier = nullptr; 1215 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1216 ++NumBodiesGenerated; 1217 1218 Builder.restoreIP(CodeGenIP); 1219 1220 // Create three barriers, two cancel barriers but only one checked. 1221 Function *CBFn, *BFn; 1222 1223 ASSERT_EXPECTED_INIT( 1224 OpenMPIRBuilder::InsertPointTy, BarrierIP1, 1225 OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel)); 1226 Builder.restoreIP(BarrierIP1); 1227 1228 CBFn = M->getFunction("__kmpc_cancel_barrier"); 1229 BFn = M->getFunction("__kmpc_barrier"); 1230 ASSERT_NE(CBFn, nullptr); 1231 ASSERT_EQ(BFn, nullptr); 1232 ASSERT_EQ(CBFn->getNumUses(), 1U); 1233 ASSERT_TRUE(isa<CallInst>(CBFn->user_back())); 1234 ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U); 1235 CheckedBarrier = cast<CallInst>(CBFn->user_back()); 1236 1237 ASSERT_EXPECTED_INIT( 1238 OpenMPIRBuilder::InsertPointTy, BarrierIP2, 1239 OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true)); 1240 Builder.restoreIP(BarrierIP2); 1241 CBFn = M->getFunction("__kmpc_cancel_barrier"); 1242 BFn = M->getFunction("__kmpc_barrier"); 1243 ASSERT_NE(CBFn, nullptr); 1244 ASSERT_NE(BFn, nullptr); 1245 ASSERT_EQ(CBFn->getNumUses(), 1U); 1246 ASSERT_EQ(BFn->getNumUses(), 1U); 1247 ASSERT_TRUE(isa<CallInst>(BFn->user_back())); 1248 ASSERT_EQ(BFn->user_back()->getNumUses(), 0U); 1249 1250 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, BarrierIP3, 1251 OMPBuilder.createBarrier(Builder.saveIP(), 1252 OMPD_parallel, false, false)); 1253 Builder.restoreIP(BarrierIP3); 1254 ASSERT_EQ(CBFn->getNumUses(), 2U); 1255 ASSERT_EQ(BFn->getNumUses(), 1U); 1256 ASSERT_TRUE(CBFn->user_back() != CheckedBarrier); 1257 ASSERT_TRUE(isa<CallInst>(CBFn->user_back())); 1258 ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U); 1259 }; 1260 1261 auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &, 1262 Value *&) -> InsertPointTy { 1263 ++NumPrivatizedVars; 1264 llvm_unreachable("No privatization callback call expected!"); 1265 }; 1266 1267 FunctionType *FakeDestructorTy = 1268 FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)}, 1269 /*isVarArg=*/false); 1270 auto *FakeDestructor = Function::Create( 1271 FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get()); 1272 1273 auto FiniCB = [&](InsertPointTy IP) { 1274 ++NumFinalizationPoints; 1275 Builder.restoreIP(IP); 1276 Builder.CreateCall(FakeDestructor, 1277 {Builder.getInt32(NumFinalizationPoints)}); 1278 return Error::success(); 1279 }; 1280 1281 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1282 F->getEntryBlock().getFirstInsertionPt()); 1283 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 1284 OMPBuilder.createParallel( 1285 Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB, 1286 FiniCB, Builder.CreateIsNotNull(F->arg_begin()), 1287 nullptr, OMP_PROC_BIND_default, true)); 1288 1289 EXPECT_EQ(NumBodiesGenerated, 1U); 1290 EXPECT_EQ(NumPrivatizedVars, 0U); 1291 EXPECT_EQ(NumFinalizationPoints, 2U); 1292 EXPECT_EQ(FakeDestructor->getNumUses(), 2U); 1293 1294 Builder.restoreIP(AfterIP); 1295 Builder.CreateRetVoid(); 1296 OMPBuilder.finalize(); 1297 1298 EXPECT_FALSE(verifyModule(*M, &errs())); 1299 1300 BasicBlock *ExitBB = nullptr; 1301 for (const User *Usr : FakeDestructor->users()) { 1302 const CallInst *CI = dyn_cast<CallInst>(Usr); 1303 ASSERT_EQ(CI->getCalledFunction(), FakeDestructor); 1304 ASSERT_TRUE(isa<BranchInst>(CI->getNextNode())); 1305 ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U); 1306 if (ExitBB) 1307 ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB); 1308 else 1309 ExitBB = CI->getNextNode()->getSuccessor(0); 1310 ASSERT_EQ(ExitBB->size(), 1U); 1311 if (!isa<ReturnInst>(ExitBB->front())) { 1312 ASSERT_TRUE(isa<BranchInst>(ExitBB->front())); 1313 ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U); 1314 ASSERT_TRUE(isa<ReturnInst>( 1315 cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front())); 1316 } 1317 } 1318 } 1319 1320 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { 1321 OpenMPIRBuilder OMPBuilder(*M); 1322 OMPBuilder.Config.IsTargetDevice = false; 1323 OMPBuilder.initialize(); 1324 F->setName("func"); 1325 IRBuilder<> Builder(BB); 1326 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1327 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1328 1329 Type *I32Ty = Type::getInt32Ty(M->getContext()); 1330 Type *PtrTy = PointerType::get(M->getContext(), 0); 1331 Type *StructTy = StructType::get(I32Ty, PtrTy); 1332 Type *VoidTy = Type::getVoidTy(M->getContext()); 1333 FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty); 1334 FunctionCallee TakeI32Func = 1335 M->getOrInsertFunction("take_i32", VoidTy, I32Ty); 1336 FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy); 1337 FunctionCallee TakeI32PtrFunc = 1338 M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy); 1339 FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy); 1340 FunctionCallee TakeStructFunc = 1341 M->getOrInsertFunction("take_struct", VoidTy, StructTy); 1342 FunctionCallee RetStructPtrFunc = 1343 M->getOrInsertFunction("ret_structptr", PtrTy); 1344 FunctionCallee TakeStructPtrFunc = 1345 M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy); 1346 Value *I32Val = Builder.CreateCall(RetI32Func); 1347 Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc); 1348 Value *StructVal = Builder.CreateCall(RetStructFunc); 1349 Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); 1350 1351 Instruction *Internal; 1352 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1353 IRBuilder<>::InsertPointGuard Guard(Builder); 1354 Builder.restoreIP(CodeGenIP); 1355 Internal = Builder.CreateCall(TakeI32Func, I32Val); 1356 Builder.CreateCall(TakeI32PtrFunc, I32PtrVal); 1357 Builder.CreateCall(TakeStructFunc, StructVal); 1358 Builder.CreateCall(TakeStructPtrFunc, StructPtrVal); 1359 return Error::success(); 1360 }; 1361 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, 1362 Value &Inner, Value *&ReplacementValue) { 1363 ReplacementValue = &Inner; 1364 return CodeGenIP; 1365 }; 1366 auto FiniCB = [](InsertPointTy) { return Error::success(); }; 1367 1368 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 1369 F->getEntryBlock().getFirstInsertionPt()); 1370 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 1371 OMPBuilder.createParallel( 1372 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, 1373 nullptr, OMP_PROC_BIND_default, false)); 1374 Builder.restoreIP(AfterIP); 1375 Builder.CreateRetVoid(); 1376 1377 OMPBuilder.finalize(); 1378 1379 EXPECT_FALSE(verifyModule(*M, &errs())); 1380 Function *OutlinedFn = Internal->getFunction(); 1381 1382 Type *Arg2Type = OutlinedFn->getArg(2)->getType(); 1383 EXPECT_TRUE(Arg2Type->isPointerTy()); 1384 } 1385 1386 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) { 1387 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1388 OpenMPIRBuilder OMPBuilder(*M); 1389 OMPBuilder.initialize(); 1390 IRBuilder<> Builder(BB); 1391 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1392 Value *TripCount = F->getArg(0); 1393 1394 unsigned NumBodiesGenerated = 0; 1395 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { 1396 NumBodiesGenerated += 1; 1397 1398 Builder.restoreIP(CodeGenIP); 1399 1400 Value *Cmp = Builder.CreateICmpEQ(LC, TripCount); 1401 Instruction *ThenTerm, *ElseTerm; 1402 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 1403 &ThenTerm, &ElseTerm); 1404 return Error::success(); 1405 }; 1406 1407 ASSERT_EXPECTED_INIT( 1408 CanonicalLoopInfo *, Loop, 1409 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount)); 1410 1411 Builder.restoreIP(Loop->getAfterIP()); 1412 ReturnInst *RetInst = Builder.CreateRetVoid(); 1413 OMPBuilder.finalize(); 1414 1415 Loop->assertOK(); 1416 EXPECT_FALSE(verifyModule(*M, &errs())); 1417 1418 EXPECT_EQ(NumBodiesGenerated, 1U); 1419 1420 // Verify control flow structure (in addition to Loop->assertOK()). 1421 EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock()); 1422 EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock()); 1423 1424 Instruction *IndVar = Loop->getIndVar(); 1425 EXPECT_TRUE(isa<PHINode>(IndVar)); 1426 EXPECT_EQ(IndVar->getType(), TripCount->getType()); 1427 EXPECT_EQ(IndVar->getParent(), Loop->getHeader()); 1428 1429 EXPECT_EQ(Loop->getTripCount(), TripCount); 1430 1431 BasicBlock *Body = Loop->getBody(); 1432 Instruction *CmpInst = &Body->front(); 1433 EXPECT_TRUE(isa<ICmpInst>(CmpInst)); 1434 EXPECT_EQ(CmpInst->getOperand(0), IndVar); 1435 1436 BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor(); 1437 EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) { 1438 return SuccBB->getSingleSuccessor() == LatchPred; 1439 })); 1440 1441 EXPECT_EQ(&Loop->getAfter()->front(), RetInst); 1442 } 1443 1444 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) { 1445 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1446 OpenMPIRBuilder OMPBuilder(*M); 1447 OMPBuilder.initialize(); 1448 IRBuilder<> Builder(BB); 1449 1450 // Check the trip count is computed correctly. We generate the canonical loop 1451 // but rely on the IRBuilder's constant folder to compute the final result 1452 // since all inputs are constant. To verify overflow situations, limit the 1453 // trip count / loop counter widths to 16 bits. 1454 auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step, 1455 bool IsSigned, bool InclusiveStop) -> int64_t { 1456 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1457 Type *LCTy = Type::getInt16Ty(Ctx); 1458 Value *StartVal = ConstantInt::get(LCTy, Start); 1459 Value *StopVal = ConstantInt::get(LCTy, Stop); 1460 Value *StepVal = ConstantInt::get(LCTy, Step); 1461 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { 1462 return Error::success(); 1463 }; 1464 ASSERT_EXPECTED_INIT_RETURN( 1465 CanonicalLoopInfo *, Loop, 1466 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, 1467 StepVal, IsSigned, InclusiveStop), 1468 -1); 1469 Loop->assertOK(); 1470 Builder.restoreIP(Loop->getAfterIP()); 1471 Value *TripCount = Loop->getTripCount(); 1472 return cast<ConstantInt>(TripCount)->getValue().getZExtValue(); 1473 }; 1474 1475 EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0); 1476 EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1); 1477 EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42); 1478 EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21); 1479 EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21); 1480 EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1); 1481 EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2); 1482 EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3); 1483 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF); 1484 EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0); 1485 EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1); 1486 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100); 1487 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1); 1488 1489 EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2); 1490 EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2); 1491 EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1); 1492 EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1); 1493 EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF); 1494 EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000); 1495 1496 EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0); 1497 EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0); 1498 EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3); 1499 EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4); 1500 EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1); 1501 EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1); 1502 EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2); 1503 1504 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000); 1505 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001); 1506 EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF); 1507 EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF); 1508 EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2); 1509 EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF); 1510 EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000); 1511 EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800); 1512 EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF); 1513 EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1); 1514 EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2); 1515 1516 // Finalize the function and verify it. 1517 Builder.CreateRetVoid(); 1518 OMPBuilder.finalize(); 1519 EXPECT_FALSE(verifyModule(*M, &errs())); 1520 } 1521 1522 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) { 1523 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1524 OpenMPIRBuilder OMPBuilder(*M); 1525 OMPBuilder.initialize(); 1526 F->setName("func"); 1527 1528 IRBuilder<> Builder(BB); 1529 1530 Type *LCTy = F->getArg(0)->getType(); 1531 Constant *One = ConstantInt::get(LCTy, 1); 1532 Constant *Two = ConstantInt::get(LCTy, 2); 1533 Value *OuterTripCount = 1534 Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer"); 1535 Value *InnerTripCount = 1536 Builder.CreateAdd(F->getArg(0), One, "tripcount.inner"); 1537 1538 // Fix an insertion point for ComputeIP. 1539 BasicBlock *LoopNextEnter = 1540 BasicBlock::Create(M->getContext(), "loopnest.enter", F, 1541 Builder.GetInsertBlock()->getNextNode()); 1542 BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter); 1543 InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()}; 1544 1545 Builder.SetInsertPoint(LoopNextEnter); 1546 OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL); 1547 1548 CanonicalLoopInfo *InnerLoop = nullptr; 1549 CallInst *InbetweenLead = nullptr; 1550 CallInst *InbetweenTrail = nullptr; 1551 CallInst *Call = nullptr; 1552 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) { 1553 Builder.restoreIP(OuterCodeGenIP); 1554 InbetweenLead = 1555 createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC}); 1556 1557 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1558 Value *InnerLC) { 1559 Builder.restoreIP(InnerCodeGenIP); 1560 Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC}); 1561 return Error::success(); 1562 }; 1563 ASSERT_EXPECTED_INIT( 1564 CanonicalLoopInfo *, InnerLoopResult, 1565 OMPBuilder.createCanonicalLoop(Builder.saveIP(), InnerLoopBodyGenCB, 1566 InnerTripCount, "inner")); 1567 InnerLoop = InnerLoopResult; 1568 1569 Builder.restoreIP(InnerLoop->getAfterIP()); 1570 InbetweenTrail = 1571 createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC}); 1572 }; 1573 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop, 1574 OMPBuilder.createCanonicalLoop( 1575 OuterLoc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), 1576 OuterTripCount, "outer")); 1577 1578 // Finish the function. 1579 Builder.restoreIP(OuterLoop->getAfterIP()); 1580 Builder.CreateRetVoid(); 1581 1582 CanonicalLoopInfo *Collapsed = 1583 OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP); 1584 1585 OMPBuilder.finalize(); 1586 EXPECT_FALSE(verifyModule(*M, &errs())); 1587 1588 // Verify control flow and BB order. 1589 BasicBlock *RefOrder[] = { 1590 Collapsed->getPreheader(), Collapsed->getHeader(), 1591 Collapsed->getCond(), Collapsed->getBody(), 1592 InbetweenLead->getParent(), Call->getParent(), 1593 InbetweenTrail->getParent(), Collapsed->getLatch(), 1594 Collapsed->getExit(), Collapsed->getAfter(), 1595 }; 1596 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1597 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1598 1599 // Verify the total trip count. 1600 auto *TripCount = cast<MulOperator>(Collapsed->getTripCount()); 1601 EXPECT_EQ(TripCount->getOperand(0), OuterTripCount); 1602 EXPECT_EQ(TripCount->getOperand(1), InnerTripCount); 1603 1604 // Verify the changed indvar. 1605 auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1)); 1606 EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv); 1607 EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody()); 1608 EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount); 1609 EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar()); 1610 1611 auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2)); 1612 EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem); 1613 EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody()); 1614 EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar()); 1615 EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount); 1616 1617 EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV); 1618 EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV); 1619 } 1620 1621 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) { 1622 OpenMPIRBuilder OMPBuilder(*M); 1623 CallInst *Call; 1624 BasicBlock *BodyCode; 1625 CanonicalLoopInfo *Loop = 1626 buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode); 1627 ASSERT_NE(Loop, nullptr); 1628 1629 Instruction *OrigIndVar = Loop->getIndVar(); 1630 EXPECT_EQ(Call->getOperand(1), OrigIndVar); 1631 1632 // Tile the loop. 1633 Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7)); 1634 std::vector<CanonicalLoopInfo *> GenLoops = 1635 OMPBuilder.tileLoops(DL, {Loop}, {TileSize}); 1636 1637 OMPBuilder.finalize(); 1638 EXPECT_FALSE(verifyModule(*M, &errs())); 1639 1640 EXPECT_EQ(GenLoops.size(), 2u); 1641 CanonicalLoopInfo *Floor = GenLoops[0]; 1642 CanonicalLoopInfo *Tile = GenLoops[1]; 1643 1644 BasicBlock *RefOrder[] = { 1645 Floor->getPreheader(), Floor->getHeader(), Floor->getCond(), 1646 Floor->getBody(), Tile->getPreheader(), Tile->getHeader(), 1647 Tile->getCond(), Tile->getBody(), BodyCode, 1648 Tile->getLatch(), Tile->getExit(), Tile->getAfter(), 1649 Floor->getLatch(), Floor->getExit(), Floor->getAfter(), 1650 }; 1651 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1652 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1653 1654 // Check the induction variable. 1655 EXPECT_EQ(Call->getParent(), BodyCode); 1656 auto *Shift = cast<AddOperator>(Call->getOperand(1)); 1657 EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody()); 1658 EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar()); 1659 auto *Scale = cast<MulOperator>(Shift->getOperand(0)); 1660 EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody()); 1661 EXPECT_EQ(Scale->getOperand(0), TileSize); 1662 EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar()); 1663 } 1664 1665 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) { 1666 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1667 OpenMPIRBuilder OMPBuilder(*M); 1668 OMPBuilder.initialize(); 1669 F->setName("func"); 1670 1671 IRBuilder<> Builder(BB); 1672 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 1673 Value *TripCount = F->getArg(0); 1674 Type *LCTy = TripCount->getType(); 1675 1676 BasicBlock *BodyCode = nullptr; 1677 CanonicalLoopInfo *InnerLoop = nullptr; 1678 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, 1679 llvm::Value *OuterLC) { 1680 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1681 llvm::Value *InnerLC) { 1682 Builder.restoreIP(InnerCodeGenIP); 1683 BodyCode = Builder.GetInsertBlock(); 1684 1685 // Add something that consumes the induction variables to the body. 1686 createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); 1687 return Error::success(); 1688 }; 1689 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, InnerLoopResult, 1690 OMPBuilder.createCanonicalLoop(OuterCodeGenIP, 1691 InnerLoopBodyGenCB, 1692 TripCount, "inner")); 1693 InnerLoop = InnerLoopResult; 1694 }; 1695 ASSERT_EXPECTED_INIT( 1696 CanonicalLoopInfo *, OuterLoop, 1697 OMPBuilder.createCanonicalLoop( 1698 Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), TripCount, "outer")); 1699 1700 // Finalize the function. 1701 Builder.restoreIP(OuterLoop->getAfterIP()); 1702 Builder.CreateRetVoid(); 1703 1704 // Tile to loop nest. 1705 Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11)); 1706 Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7)); 1707 std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops( 1708 DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize}); 1709 1710 OMPBuilder.finalize(); 1711 EXPECT_FALSE(verifyModule(*M, &errs())); 1712 1713 EXPECT_EQ(GenLoops.size(), 4u); 1714 CanonicalLoopInfo *Floor1 = GenLoops[0]; 1715 CanonicalLoopInfo *Floor2 = GenLoops[1]; 1716 CanonicalLoopInfo *Tile1 = GenLoops[2]; 1717 CanonicalLoopInfo *Tile2 = GenLoops[3]; 1718 1719 BasicBlock *RefOrder[] = { 1720 Floor1->getPreheader(), 1721 Floor1->getHeader(), 1722 Floor1->getCond(), 1723 Floor1->getBody(), 1724 Floor2->getPreheader(), 1725 Floor2->getHeader(), 1726 Floor2->getCond(), 1727 Floor2->getBody(), 1728 Tile1->getPreheader(), 1729 Tile1->getHeader(), 1730 Tile1->getCond(), 1731 Tile1->getBody(), 1732 Tile2->getPreheader(), 1733 Tile2->getHeader(), 1734 Tile2->getCond(), 1735 Tile2->getBody(), 1736 BodyCode, 1737 Tile2->getLatch(), 1738 Tile2->getExit(), 1739 Tile2->getAfter(), 1740 Tile1->getLatch(), 1741 Tile1->getExit(), 1742 Tile1->getAfter(), 1743 Floor2->getLatch(), 1744 Floor2->getExit(), 1745 Floor2->getAfter(), 1746 Floor1->getLatch(), 1747 Floor1->getExit(), 1748 Floor1->getAfter(), 1749 }; 1750 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1751 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1752 } 1753 1754 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) { 1755 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1756 OpenMPIRBuilder OMPBuilder(*M); 1757 OMPBuilder.initialize(); 1758 F->setName("func"); 1759 1760 IRBuilder<> Builder(BB); 1761 Value *TripCount = F->getArg(0); 1762 Type *LCTy = TripCount->getType(); 1763 1764 Value *OuterStartVal = ConstantInt::get(LCTy, 2); 1765 Value *OuterStopVal = TripCount; 1766 Value *OuterStep = ConstantInt::get(LCTy, 5); 1767 Value *InnerStartVal = ConstantInt::get(LCTy, 13); 1768 Value *InnerStopVal = TripCount; 1769 Value *InnerStep = ConstantInt::get(LCTy, 3); 1770 1771 // Fix an insertion point for ComputeIP. 1772 BasicBlock *LoopNextEnter = 1773 BasicBlock::Create(M->getContext(), "loopnest.enter", F, 1774 Builder.GetInsertBlock()->getNextNode()); 1775 BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter); 1776 InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()}; 1777 1778 InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()}; 1779 OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL}); 1780 1781 BasicBlock *BodyCode = nullptr; 1782 CanonicalLoopInfo *InnerLoop = nullptr; 1783 CallInst *Call = nullptr; 1784 auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, 1785 llvm::Value *OuterLC) { 1786 auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP, 1787 llvm::Value *InnerLC) { 1788 Builder.restoreIP(InnerCodeGenIP); 1789 BodyCode = Builder.GetInsertBlock(); 1790 1791 // Add something that consumes the induction variable to the body. 1792 Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); 1793 return Error::success(); 1794 }; 1795 ASSERT_EXPECTED_INIT( 1796 CanonicalLoopInfo *, InnerLoopResult, 1797 OMPBuilder.createCanonicalLoop(OuterCodeGenIP, InnerLoopBodyGenCB, 1798 InnerStartVal, InnerStopVal, InnerStep, 1799 false, false, ComputeIP, "inner")); 1800 InnerLoop = InnerLoopResult; 1801 }; 1802 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop, 1803 OMPBuilder.createCanonicalLoop( 1804 Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), 1805 OuterStartVal, OuterStopVal, OuterStep, false, false, 1806 ComputeIP, "outer")); 1807 1808 // Finalize the function 1809 Builder.restoreIP(OuterLoop->getAfterIP()); 1810 Builder.CreateRetVoid(); 1811 1812 // Tile the loop nest. 1813 Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11)); 1814 Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7)); 1815 std::vector<CanonicalLoopInfo *> GenLoops = 1816 OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1}); 1817 1818 OMPBuilder.finalize(); 1819 EXPECT_FALSE(verifyModule(*M, &errs())); 1820 1821 EXPECT_EQ(GenLoops.size(), 4u); 1822 CanonicalLoopInfo *Floor0 = GenLoops[0]; 1823 CanonicalLoopInfo *Floor1 = GenLoops[1]; 1824 CanonicalLoopInfo *Tile0 = GenLoops[2]; 1825 CanonicalLoopInfo *Tile1 = GenLoops[3]; 1826 1827 BasicBlock *RefOrder[] = { 1828 Floor0->getPreheader(), 1829 Floor0->getHeader(), 1830 Floor0->getCond(), 1831 Floor0->getBody(), 1832 Floor1->getPreheader(), 1833 Floor1->getHeader(), 1834 Floor1->getCond(), 1835 Floor1->getBody(), 1836 Tile0->getPreheader(), 1837 Tile0->getHeader(), 1838 Tile0->getCond(), 1839 Tile0->getBody(), 1840 Tile1->getPreheader(), 1841 Tile1->getHeader(), 1842 Tile1->getCond(), 1843 Tile1->getBody(), 1844 BodyCode, 1845 Tile1->getLatch(), 1846 Tile1->getExit(), 1847 Tile1->getAfter(), 1848 Tile0->getLatch(), 1849 Tile0->getExit(), 1850 Tile0->getAfter(), 1851 Floor1->getLatch(), 1852 Floor1->getExit(), 1853 Floor1->getAfter(), 1854 Floor0->getLatch(), 1855 Floor0->getExit(), 1856 Floor0->getAfter(), 1857 }; 1858 EXPECT_TRUE(verifyDFSOrder(F, RefOrder)); 1859 EXPECT_TRUE(verifyListOrder(F, RefOrder)); 1860 1861 EXPECT_EQ(Call->getParent(), BodyCode); 1862 1863 auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1)); 1864 EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal); 1865 auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0)); 1866 EXPECT_EQ(RangeScale0->getOperand(1), OuterStep); 1867 auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0)); 1868 EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody()); 1869 EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar()); 1870 auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0)); 1871 EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody()); 1872 EXPECT_EQ(TileScale0->getOperand(0), TileSize0); 1873 EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar()); 1874 1875 auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2)); 1876 EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode); 1877 EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal); 1878 auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0)); 1879 EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode); 1880 EXPECT_EQ(RangeScale1->getOperand(1), InnerStep); 1881 auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0)); 1882 EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody()); 1883 EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar()); 1884 auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0)); 1885 EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody()); 1886 EXPECT_EQ(TileScale1->getOperand(0), TileSize1); 1887 EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar()); 1888 } 1889 1890 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { 1891 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1892 OpenMPIRBuilder OMPBuilder(*M); 1893 OMPBuilder.initialize(); 1894 IRBuilder<> Builder(BB); 1895 1896 // Create a loop, tile it, and extract its trip count. All input values are 1897 // constant and IRBuilder evaluates all-constant arithmetic inplace, such that 1898 // the floor trip count itself will be a ConstantInt. Unfortunately we cannot 1899 // do the same for the tile loop. 1900 auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step, 1901 bool IsSigned, bool InclusiveStop, 1902 int64_t TileSize) -> uint64_t { 1903 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 1904 Type *LCTy = Type::getInt16Ty(Ctx); 1905 Value *StartVal = ConstantInt::get(LCTy, Start); 1906 Value *StopVal = ConstantInt::get(LCTy, Stop); 1907 Value *StepVal = ConstantInt::get(LCTy, Step); 1908 1909 // Generate a loop. 1910 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { 1911 return Error::success(); 1912 }; 1913 ASSERT_EXPECTED_INIT_RETURN( 1914 CanonicalLoopInfo *, Loop, 1915 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, 1916 StepVal, IsSigned, InclusiveStop), 1917 (unsigned)-1); 1918 InsertPointTy AfterIP = Loop->getAfterIP(); 1919 1920 // Tile the loop. 1921 Value *TileSizeVal = ConstantInt::get(LCTy, TileSize); 1922 std::vector<CanonicalLoopInfo *> GenLoops = 1923 OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal}); 1924 1925 // Set the insertion pointer to after loop, where the next loop will be 1926 // emitted. 1927 Builder.restoreIP(AfterIP); 1928 1929 // Extract the trip count. 1930 CanonicalLoopInfo *FloorLoop = GenLoops[0]; 1931 Value *FloorTripCount = FloorLoop->getTripCount(); 1932 return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue(); 1933 }; 1934 1935 // Empty iteration domain. 1936 EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u); 1937 EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u); 1938 EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u); 1939 EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u); 1940 EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u); 1941 1942 // Only complete tiles. 1943 EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u); 1944 EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u); 1945 EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u); 1946 EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u); 1947 EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u); 1948 EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u); 1949 1950 // Only a partial tile. 1951 EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u); 1952 EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u); 1953 EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u); 1954 EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u); 1955 EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u); 1956 1957 // Complete and partial tiles. 1958 EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u); 1959 EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u); 1960 EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u); 1961 EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u); 1962 EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u); 1963 1964 // Close to 16-bit integer range. 1965 EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu); 1966 EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1); 1967 EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1); 1968 EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1); 1969 EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1); 1970 EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u); 1971 EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u); 1972 1973 // Finalize the function. 1974 Builder.CreateRetVoid(); 1975 OMPBuilder.finalize(); 1976 1977 EXPECT_FALSE(verifyModule(*M, &errs())); 1978 } 1979 1980 TEST_F(OpenMPIRBuilderTest, ApplySimd) { 1981 OpenMPIRBuilder OMPBuilder(*M); 1982 MapVector<Value *, Value *> AlignedVars; 1983 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 1984 ASSERT_NE(CLI, nullptr); 1985 1986 // Simd-ize the loop. 1987 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 1988 OrderKind::OMP_ORDER_unknown, 1989 /* Simdlen */ nullptr, 1990 /* Safelen */ nullptr); 1991 1992 OMPBuilder.finalize(); 1993 EXPECT_FALSE(verifyModule(*M, &errs())); 1994 1995 PassBuilder PB; 1996 FunctionAnalysisManager FAM; 1997 PB.registerFunctionAnalyses(FAM); 1998 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 1999 2000 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2001 EXPECT_EQ(TopLvl.size(), 1u); 2002 2003 Loop *L = TopLvl.front(); 2004 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2005 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2006 2007 // Check for llvm.access.group metadata attached to the printf 2008 // function in the loop body. 2009 BasicBlock *LoopBody = CLI->getBody(); 2010 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2011 return I.getMetadata("llvm.access.group") != nullptr; 2012 })); 2013 } 2014 2015 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) { 2016 OpenMPIRBuilder OMPBuilder(*M); 2017 IRBuilder<> Builder(BB); 2018 const int AlignmentValue = 32; 2019 llvm::BasicBlock *sourceBlock = Builder.GetInsertBlock(); 2020 AllocaInst *Alloc1 = 2021 Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1)); 2022 LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); 2023 MapVector<Value *, Value *> AlignedVars; 2024 AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)}); 2025 2026 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2027 ASSERT_NE(CLI, nullptr); 2028 2029 // Simd-ize the loop. 2030 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 2031 OrderKind::OMP_ORDER_unknown, 2032 /* Simdlen */ nullptr, 2033 /* Safelen */ nullptr); 2034 2035 OMPBuilder.finalize(); 2036 EXPECT_FALSE(verifyModule(*M, &errs())); 2037 2038 PassBuilder PB; 2039 FunctionAnalysisManager FAM; 2040 PB.registerFunctionAnalyses(FAM); 2041 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2042 2043 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2044 EXPECT_EQ(TopLvl.size(), 1u); 2045 2046 Loop *L = TopLvl.front(); 2047 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2048 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2049 2050 // Check for llvm.access.group metadata attached to the printf 2051 // function in the loop body. 2052 BasicBlock *LoopBody = CLI->getBody(); 2053 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2054 return I.getMetadata("llvm.access.group") != nullptr; 2055 })); 2056 2057 // Check if number of assumption instructions is equal to number of aligned 2058 // variables 2059 size_t NumAssummptionCallsInPreheader = 2060 count_if(*sourceBlock, [](Instruction &I) { return isa<AssumeInst>(I); }); 2061 EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); 2062 2063 // Check if variables are correctly aligned 2064 for (Instruction &Instr : *sourceBlock) { 2065 if (!isa<AssumeInst>(Instr)) 2066 continue; 2067 AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr); 2068 if (AssumeInstruction->getNumTotalBundleOperands()) { 2069 auto Bundle = AssumeInstruction->getOperandBundleAt(0); 2070 if (Bundle.getTagName() == "align") { 2071 EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1])); 2072 auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]); 2073 EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue); 2074 } 2075 } 2076 } 2077 } 2078 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { 2079 OpenMPIRBuilder OMPBuilder(*M); 2080 MapVector<Value *, Value *> AlignedVars; 2081 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2082 ASSERT_NE(CLI, nullptr); 2083 2084 // Simd-ize the loop. 2085 OMPBuilder.applySimd(CLI, AlignedVars, 2086 /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, 2087 ConstantInt::get(Type::getInt32Ty(Ctx), 3), 2088 /* Safelen */ nullptr); 2089 2090 OMPBuilder.finalize(); 2091 EXPECT_FALSE(verifyModule(*M, &errs())); 2092 2093 PassBuilder PB; 2094 FunctionAnalysisManager FAM; 2095 PB.registerFunctionAnalyses(FAM); 2096 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2097 2098 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2099 EXPECT_EQ(TopLvl.size(), 1u); 2100 2101 Loop *L = TopLvl.front(); 2102 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2103 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2104 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2105 2106 // Check for llvm.access.group metadata attached to the printf 2107 // function in the loop body. 2108 BasicBlock *LoopBody = CLI->getBody(); 2109 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2110 return I.getMetadata("llvm.access.group") != nullptr; 2111 })); 2112 } 2113 2114 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { 2115 OpenMPIRBuilder OMPBuilder(*M); 2116 MapVector<Value *, Value *> AlignedVars; 2117 2118 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2119 ASSERT_NE(CLI, nullptr); 2120 2121 // Simd-ize the loop. 2122 OMPBuilder.applySimd( 2123 CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, 2124 /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2125 2126 OMPBuilder.finalize(); 2127 EXPECT_FALSE(verifyModule(*M, &errs())); 2128 2129 PassBuilder PB; 2130 FunctionAnalysisManager FAM; 2131 PB.registerFunctionAnalyses(FAM); 2132 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2133 2134 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2135 EXPECT_EQ(TopLvl.size(), 1u); 2136 2137 Loop *L = TopLvl.front(); 2138 // Parallel metadata shoudl be attached because of presence of 2139 // the order(concurrent) OpenMP clause 2140 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2141 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2142 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2143 2144 // Check for llvm.access.group metadata attached to the printf 2145 // function in the loop body. 2146 BasicBlock *LoopBody = CLI->getBody(); 2147 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2148 return I.getMetadata("llvm.access.group") != nullptr; 2149 })); 2150 } 2151 2152 TEST_F(OpenMPIRBuilderTest, ApplySafelen) { 2153 OpenMPIRBuilder OMPBuilder(*M); 2154 MapVector<Value *, Value *> AlignedVars; 2155 2156 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2157 ASSERT_NE(CLI, nullptr); 2158 2159 OMPBuilder.applySimd( 2160 CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, 2161 /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2162 2163 OMPBuilder.finalize(); 2164 EXPECT_FALSE(verifyModule(*M, &errs())); 2165 2166 PassBuilder PB; 2167 FunctionAnalysisManager FAM; 2168 PB.registerFunctionAnalyses(FAM); 2169 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2170 2171 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2172 EXPECT_EQ(TopLvl.size(), 1u); 2173 2174 Loop *L = TopLvl.front(); 2175 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2176 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2177 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2178 2179 // Check for llvm.access.group metadata attached to the printf 2180 // function in the loop body. 2181 BasicBlock *LoopBody = CLI->getBody(); 2182 EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) { 2183 return I.getMetadata("llvm.access.group") != nullptr; 2184 })); 2185 } 2186 2187 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { 2188 OpenMPIRBuilder OMPBuilder(*M); 2189 MapVector<Value *, Value *> AlignedVars; 2190 2191 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2192 ASSERT_NE(CLI, nullptr); 2193 2194 OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, 2195 OrderKind::OMP_ORDER_unknown, 2196 ConstantInt::get(Type::getInt32Ty(Ctx), 2), 2197 ConstantInt::get(Type::getInt32Ty(Ctx), 3)); 2198 2199 OMPBuilder.finalize(); 2200 EXPECT_FALSE(verifyModule(*M, &errs())); 2201 2202 PassBuilder PB; 2203 FunctionAnalysisManager FAM; 2204 PB.registerFunctionAnalyses(FAM); 2205 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2206 2207 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2208 EXPECT_EQ(TopLvl.size(), 1u); 2209 2210 Loop *L = TopLvl.front(); 2211 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2212 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2213 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2); 2214 2215 // Check for llvm.access.group metadata attached to the printf 2216 // function in the loop body. 2217 BasicBlock *LoopBody = CLI->getBody(); 2218 EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) { 2219 return I.getMetadata("llvm.access.group") != nullptr; 2220 })); 2221 } 2222 2223 TEST_F(OpenMPIRBuilderTest, ApplySimdIf) { 2224 OpenMPIRBuilder OMPBuilder(*M); 2225 IRBuilder<> Builder(BB); 2226 MapVector<Value *, Value *> AlignedVars; 2227 AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); 2228 AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); 2229 2230 // Generation of if condition 2231 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1); 2232 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2); 2233 LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); 2234 LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2); 2235 2236 Value *IfCmp = Builder.CreateICmpNE(Load1, Load2); 2237 2238 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2239 ASSERT_NE(CLI, nullptr); 2240 2241 // Simd-ize the loop with if condition 2242 OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown, 2243 ConstantInt::get(Type::getInt32Ty(Ctx), 3), 2244 /* Safelen */ nullptr); 2245 2246 OMPBuilder.finalize(); 2247 EXPECT_FALSE(verifyModule(*M, &errs())); 2248 2249 PassBuilder PB; 2250 FunctionAnalysisManager FAM; 2251 PB.registerFunctionAnalyses(FAM); 2252 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2253 2254 // Check if there are two loops (one with enabled vectorization) 2255 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2256 EXPECT_EQ(TopLvl.size(), 2u); 2257 2258 Loop *L = TopLvl[0]; 2259 EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2260 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2261 EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3); 2262 2263 // The second loop should have disabled vectorization 2264 L = TopLvl[1]; 2265 EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); 2266 EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); 2267 // Check for llvm.access.group metadata attached to the printf 2268 // function in the loop body. 2269 BasicBlock *LoopBody = CLI->getBody(); 2270 EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { 2271 return I.getMetadata("llvm.access.group") != nullptr; 2272 })); 2273 } 2274 2275 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) { 2276 OpenMPIRBuilder OMPBuilder(*M); 2277 2278 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2279 ASSERT_NE(CLI, nullptr); 2280 2281 // Unroll the loop. 2282 OMPBuilder.unrollLoopFull(DL, CLI); 2283 2284 OMPBuilder.finalize(); 2285 EXPECT_FALSE(verifyModule(*M, &errs())); 2286 2287 PassBuilder PB; 2288 FunctionAnalysisManager FAM; 2289 PB.registerFunctionAnalyses(FAM); 2290 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2291 2292 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2293 EXPECT_EQ(TopLvl.size(), 1u); 2294 2295 Loop *L = TopLvl.front(); 2296 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); 2297 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full")); 2298 } 2299 2300 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) { 2301 OpenMPIRBuilder OMPBuilder(*M); 2302 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2303 ASSERT_NE(CLI, nullptr); 2304 2305 // Unroll the loop. 2306 CanonicalLoopInfo *UnrolledLoop = nullptr; 2307 OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop); 2308 ASSERT_NE(UnrolledLoop, nullptr); 2309 2310 OMPBuilder.finalize(); 2311 EXPECT_FALSE(verifyModule(*M, &errs())); 2312 UnrolledLoop->assertOK(); 2313 2314 PassBuilder PB; 2315 FunctionAnalysisManager FAM; 2316 PB.registerFunctionAnalyses(FAM); 2317 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2318 2319 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2320 EXPECT_EQ(TopLvl.size(), 1u); 2321 Loop *Outer = TopLvl.front(); 2322 EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader()); 2323 EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch()); 2324 EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond()); 2325 EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit()); 2326 2327 EXPECT_EQ(Outer->getSubLoops().size(), 1u); 2328 Loop *Inner = Outer->getSubLoops().front(); 2329 2330 EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable")); 2331 EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5); 2332 } 2333 2334 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) { 2335 OpenMPIRBuilder OMPBuilder(*M); 2336 2337 CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); 2338 ASSERT_NE(CLI, nullptr); 2339 2340 // Unroll the loop. 2341 OMPBuilder.unrollLoopHeuristic(DL, CLI); 2342 2343 OMPBuilder.finalize(); 2344 EXPECT_FALSE(verifyModule(*M, &errs())); 2345 2346 PassBuilder PB; 2347 FunctionAnalysisManager FAM; 2348 PB.registerFunctionAnalyses(FAM); 2349 LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F); 2350 2351 const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops(); 2352 EXPECT_EQ(TopLvl.size(), 1u); 2353 2354 Loop *L = TopLvl.front(); 2355 EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")); 2356 } 2357 2358 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) { 2359 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2360 std::string oldDLStr = M->getDataLayoutStr(); 2361 M->setDataLayout( 2362 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:" 2363 "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:" 2364 "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"); 2365 OpenMPIRBuilder OMPBuilder(*M); 2366 OMPBuilder.Config.IsTargetDevice = true; 2367 OMPBuilder.initialize(); 2368 IRBuilder<> Builder(BB); 2369 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2370 InsertPointTy AllocaIP = Builder.saveIP(); 2371 2372 Type *LCTy = Type::getInt32Ty(Ctx); 2373 Value *StartVal = ConstantInt::get(LCTy, 10); 2374 Value *StopVal = ConstantInt::get(LCTy, 52); 2375 Value *StepVal = ConstantInt::get(LCTy, 2); 2376 auto LoopBodyGen = [&](InsertPointTy, Value *) { return Error::success(); }; 2377 2378 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI, 2379 OMPBuilder.createCanonicalLoop(Loc, LoopBodyGen, 2380 StartVal, StopVal, 2381 StepVal, false, false)); 2382 BasicBlock *Preheader = CLI->getPreheader(); 2383 Value *TripCount = CLI->getTripCount(); 2384 2385 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2386 2387 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 2388 OMPBuilder.applyWorkshareLoop( 2389 DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static, 2390 nullptr, false, false, false, false, 2391 WorksharingLoopType::ForStaticLoop)); 2392 Builder.restoreIP(AfterIP); 2393 Builder.CreateRetVoid(); 2394 2395 OMPBuilder.finalize(); 2396 EXPECT_FALSE(verifyModule(*M, &errs())); 2397 2398 CallInst *WorkshareLoopRuntimeCall = nullptr; 2399 int WorkshareLoopRuntimeCallCnt = 0; 2400 for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) { 2401 CallInst *Call = dyn_cast<CallInst>(Inst); 2402 if (!Call) 2403 continue; 2404 if (!Call->getCalledFunction()) 2405 continue; 2406 2407 if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") { 2408 WorkshareLoopRuntimeCall = Call; 2409 WorkshareLoopRuntimeCallCnt++; 2410 } 2411 } 2412 EXPECT_NE(WorkshareLoopRuntimeCall, nullptr); 2413 // Verify that there is only one call to workshare loop function 2414 EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1); 2415 // Check that pointer to loop body function is passed as second argument 2416 Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1); 2417 EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType()); 2418 Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg); 2419 EXPECT_NE(ArgFunction, nullptr); 2420 EXPECT_EQ(ArgFunction->arg_size(), 1u); 2421 EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType()); 2422 // Check that no variables except for loop counter are used in loop body 2423 EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()), 2424 WorkshareLoopRuntimeCall->getArgOperand(2)); 2425 // Check loop trip count argument 2426 EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3)); 2427 } 2428 2429 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) { 2430 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2431 OpenMPIRBuilder OMPBuilder(*M); 2432 OMPBuilder.Config.IsTargetDevice = false; 2433 OMPBuilder.initialize(); 2434 IRBuilder<> Builder(BB); 2435 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2436 2437 Type *LCTy = Type::getInt32Ty(Ctx); 2438 Value *StartVal = ConstantInt::get(LCTy, 10); 2439 Value *StopVal = ConstantInt::get(LCTy, 52); 2440 Value *StepVal = ConstantInt::get(LCTy, 2); 2441 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { 2442 return Error::success(); 2443 }; 2444 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI, 2445 OMPBuilder.createCanonicalLoop( 2446 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2447 /*IsSigned=*/false, /*InclusiveStop=*/false)); 2448 BasicBlock *Preheader = CLI->getPreheader(); 2449 BasicBlock *Body = CLI->getBody(); 2450 Value *IV = CLI->getIndVar(); 2451 BasicBlock *ExitBlock = CLI->getExit(); 2452 2453 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2454 InsertPointTy AllocaIP = Builder.saveIP(); 2455 2456 ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, 2457 /*NeedsBarrier=*/true, 2458 OMP_SCHEDULE_Static), 2459 Succeeded()); 2460 2461 BasicBlock *Cond = Body->getSinglePredecessor(); 2462 Instruction *Cmp = &*Cond->begin(); 2463 Value *TripCount = Cmp->getOperand(1); 2464 2465 auto AllocaIter = BB->begin(); 2466 ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); 2467 AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2468 AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2469 AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2470 AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2471 EXPECT_NE(PLastIter, nullptr); 2472 EXPECT_NE(PLowerBound, nullptr); 2473 EXPECT_NE(PUpperBound, nullptr); 2474 EXPECT_NE(PStride, nullptr); 2475 2476 auto PreheaderIter = Preheader->begin(); 2477 ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7); 2478 StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2479 StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2480 StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2481 ASSERT_NE(LowerBoundStore, nullptr); 2482 ASSERT_NE(UpperBoundStore, nullptr); 2483 ASSERT_NE(StrideStore, nullptr); 2484 2485 auto *OrigLowerBound = 2486 dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand()); 2487 auto *OrigUpperBound = 2488 dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand()); 2489 auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand()); 2490 ASSERT_NE(OrigLowerBound, nullptr); 2491 ASSERT_NE(OrigUpperBound, nullptr); 2492 ASSERT_NE(OrigStride, nullptr); 2493 EXPECT_EQ(OrigLowerBound->getValue(), 0); 2494 EXPECT_EQ(OrigUpperBound->getValue(), 20); 2495 EXPECT_EQ(OrigStride->getValue(), 1); 2496 2497 // Check that the loop IV is updated to account for the lower bound returned 2498 // by the OpenMP runtime call. 2499 BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front()); 2500 EXPECT_EQ(Add->getOperand(0), IV); 2501 auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1)); 2502 ASSERT_NE(LoadedLowerBound, nullptr); 2503 EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound); 2504 2505 // Check that the trip count is updated to account for the lower and upper 2506 // bounds return by the OpenMP runtime call. 2507 auto *AddOne = dyn_cast<Instruction>(TripCount); 2508 ASSERT_NE(AddOne, nullptr); 2509 ASSERT_TRUE(AddOne->isBinaryOp()); 2510 auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1)); 2511 ASSERT_NE(One, nullptr); 2512 EXPECT_EQ(One->getValue(), 1); 2513 auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0)); 2514 ASSERT_NE(Difference, nullptr); 2515 ASSERT_TRUE(Difference->isBinaryOp()); 2516 EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound); 2517 auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0)); 2518 ASSERT_NE(LoadedUpperBound, nullptr); 2519 EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound); 2520 2521 // The original loop iterator should only be used in the condition, in the 2522 // increment and in the statement that adds the lower bound to it. 2523 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2524 2525 // The exit block should contain the "fini" call and the barrier call, 2526 // plus the call to obtain the thread ID. 2527 size_t NumCallsInExitBlock = 2528 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2529 EXPECT_EQ(NumCallsInExitBlock, 3u); 2530 } 2531 2532 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) { 2533 unsigned IVBits = GetParam(); 2534 2535 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2536 OpenMPIRBuilder OMPBuilder(*M); 2537 OMPBuilder.Config.IsTargetDevice = false; 2538 2539 BasicBlock *Body; 2540 CallInst *Call; 2541 CanonicalLoopInfo *CLI = 2542 buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body); 2543 ASSERT_NE(CLI, nullptr); 2544 2545 Instruction *OrigIndVar = CLI->getIndVar(); 2546 EXPECT_EQ(Call->getOperand(1), OrigIndVar); 2547 2548 Type *LCTy = Type::getInt32Ty(Ctx); 2549 Value *ChunkSize = ConstantInt::get(LCTy, 5); 2550 InsertPointTy AllocaIP{&F->getEntryBlock(), 2551 F->getEntryBlock().getFirstInsertionPt()}; 2552 ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, 2553 /*NeedsBarrier=*/true, 2554 OMP_SCHEDULE_Static, 2555 ChunkSize), 2556 Succeeded()); 2557 2558 OMPBuilder.finalize(); 2559 EXPECT_FALSE(verifyModule(*M, &errs())); 2560 2561 BasicBlock *Entry = &F->getEntryBlock(); 2562 BasicBlock *Preheader = Entry->getSingleSuccessor(); 2563 2564 BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor(); 2565 BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor(); 2566 BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor(); 2567 BasicBlock *DispatchBody = succ_begin(DispatchCond)[0]; 2568 BasicBlock *DispatchExit = succ_begin(DispatchCond)[1]; 2569 BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor(); 2570 BasicBlock *Return = DispatchAfter->getSingleSuccessor(); 2571 2572 BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor(); 2573 BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor(); 2574 BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor(); 2575 BasicBlock *ChunkBody = succ_begin(ChunkCond)[0]; 2576 BasicBlock *ChunkExit = succ_begin(ChunkCond)[1]; 2577 BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor(); 2578 BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor(); 2579 2580 BasicBlock *DispatchInc = ChunkAfter; 2581 2582 EXPECT_EQ(ChunkBody, Body); 2583 EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader); 2584 EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader); 2585 2586 EXPECT_TRUE(isa<ReturnInst>(Return->front())); 2587 2588 Value *NewIV = Call->getOperand(1); 2589 EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits); 2590 2591 CallInst *InitCall = findSingleCall( 2592 F, 2593 (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u 2594 : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u, 2595 OMPBuilder); 2596 EXPECT_EQ(InitCall->getParent(), Preheader); 2597 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33); 2598 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1); 2599 EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5); 2600 2601 CallInst *FiniCall = findSingleCall( 2602 F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder); 2603 EXPECT_EQ(FiniCall->getParent(), DispatchExit); 2604 2605 CallInst *BarrierCall = findSingleCall( 2606 F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder); 2607 EXPECT_EQ(BarrierCall->getParent(), DispatchExit); 2608 } 2609 2610 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits, 2611 ::testing::Values(8, 16, 32, 64)); 2612 2613 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { 2614 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2615 OpenMPIRBuilder OMPBuilder(*M); 2616 OMPBuilder.Config.IsTargetDevice = false; 2617 OMPBuilder.initialize(); 2618 IRBuilder<> Builder(BB); 2619 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2620 2621 omp::OMPScheduleType SchedType = GetParam(); 2622 uint32_t ChunkSize = 1; 2623 switch (SchedType & ~OMPScheduleType::ModifierMask) { 2624 case omp::OMPScheduleType::BaseDynamicChunked: 2625 case omp::OMPScheduleType::BaseGuidedChunked: 2626 ChunkSize = 7; 2627 break; 2628 case omp::OMPScheduleType::BaseAuto: 2629 case omp::OMPScheduleType::BaseRuntime: 2630 ChunkSize = 1; 2631 break; 2632 default: 2633 assert(0 && "unknown type for this test"); 2634 break; 2635 } 2636 2637 Type *LCTy = Type::getInt32Ty(Ctx); 2638 Value *StartVal = ConstantInt::get(LCTy, 10); 2639 Value *StopVal = ConstantInt::get(LCTy, 52); 2640 Value *StepVal = ConstantInt::get(LCTy, 2); 2641 Value *ChunkVal = 2642 (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize); 2643 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { 2644 return Error::success(); 2645 }; 2646 2647 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI, 2648 OMPBuilder.createCanonicalLoop( 2649 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2650 /*IsSigned=*/false, /*InclusiveStop=*/false)); 2651 2652 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2653 InsertPointTy AllocaIP = Builder.saveIP(); 2654 2655 // Collect all the info from CLI, as it isn't usable after the call to 2656 // createDynamicWorkshareLoop. 2657 InsertPointTy AfterIP = CLI->getAfterIP(); 2658 BasicBlock *Preheader = CLI->getPreheader(); 2659 BasicBlock *ExitBlock = CLI->getExit(); 2660 BasicBlock *LatchBlock = CLI->getLatch(); 2661 Value *IV = CLI->getIndVar(); 2662 2663 ASSERT_EXPECTED_INIT( 2664 OpenMPIRBuilder::InsertPointTy, EndIP, 2665 OMPBuilder.applyWorkshareLoop( 2666 DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType), 2667 ChunkVal, /*Simd=*/false, 2668 (SchedType & omp::OMPScheduleType::ModifierMonotonic) == 2669 omp::OMPScheduleType::ModifierMonotonic, 2670 (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) == 2671 omp::OMPScheduleType::ModifierNonmonotonic, 2672 /*Ordered=*/false)); 2673 2674 // The returned value should be the "after" point. 2675 ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); 2676 ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint()); 2677 2678 auto AllocaIter = BB->begin(); 2679 ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); 2680 AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2681 AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2682 AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2683 AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++)); 2684 EXPECT_NE(PLastIter, nullptr); 2685 EXPECT_NE(PLowerBound, nullptr); 2686 EXPECT_NE(PUpperBound, nullptr); 2687 EXPECT_NE(PStride, nullptr); 2688 2689 auto PreheaderIter = Preheader->begin(); 2690 ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6); 2691 StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2692 StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2693 StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++)); 2694 ASSERT_NE(LowerBoundStore, nullptr); 2695 ASSERT_NE(UpperBoundStore, nullptr); 2696 ASSERT_NE(StrideStore, nullptr); 2697 2698 CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++)); 2699 ASSERT_NE(ThreadIdCall, nullptr); 2700 EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(), 2701 "__kmpc_global_thread_num"); 2702 2703 CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter); 2704 2705 ASSERT_NE(InitCall, nullptr); 2706 EXPECT_EQ(InitCall->getCalledFunction()->getName(), 2707 "__kmpc_dispatch_init_4u"); 2708 EXPECT_EQ(InitCall->arg_size(), 7U); 2709 EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize)); 2710 ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2)); 2711 if ((SchedType & OMPScheduleType::MonotonicityMask) == 2712 OMPScheduleType::None) { 2713 // Implementation is allowed to add default nonmonotonicity flag 2714 EXPECT_EQ( 2715 static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) | 2716 OMPScheduleType::ModifierNonmonotonic, 2717 SchedType | OMPScheduleType::ModifierNonmonotonic); 2718 } else { 2719 EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()), 2720 SchedType); 2721 } 2722 2723 ConstantInt *OrigLowerBound = 2724 dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand()); 2725 ConstantInt *OrigUpperBound = 2726 dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand()); 2727 ConstantInt *OrigStride = 2728 dyn_cast<ConstantInt>(StrideStore->getValueOperand()); 2729 ASSERT_NE(OrigLowerBound, nullptr); 2730 ASSERT_NE(OrigUpperBound, nullptr); 2731 ASSERT_NE(OrigStride, nullptr); 2732 EXPECT_EQ(OrigLowerBound->getValue(), 1); 2733 EXPECT_EQ(OrigUpperBound->getValue(), 21); 2734 EXPECT_EQ(OrigStride->getValue(), 1); 2735 2736 CallInst *FiniCall = dyn_cast<CallInst>( 2737 &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); 2738 EXPECT_EQ(FiniCall, nullptr); 2739 2740 // The original loop iterator should only be used in the condition, in the 2741 // increment and in the statement that adds the lower bound to it. 2742 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2743 2744 // The exit block should contain the barrier call, plus the call to obtain 2745 // the thread ID. 2746 size_t NumCallsInExitBlock = 2747 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2748 EXPECT_EQ(NumCallsInExitBlock, 2u); 2749 2750 // Add a termination to our block and check that it is internally consistent. 2751 Builder.restoreIP(EndIP); 2752 Builder.CreateRetVoid(); 2753 OMPBuilder.finalize(); 2754 EXPECT_FALSE(verifyModule(*M, &errs())); 2755 } 2756 2757 INSTANTIATE_TEST_SUITE_P( 2758 OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams, 2759 ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked, 2760 omp::OMPScheduleType::UnorderedGuidedChunked, 2761 omp::OMPScheduleType::UnorderedAuto, 2762 omp::OMPScheduleType::UnorderedRuntime, 2763 omp::OMPScheduleType::UnorderedDynamicChunked | 2764 omp::OMPScheduleType::ModifierMonotonic, 2765 omp::OMPScheduleType::UnorderedDynamicChunked | 2766 omp::OMPScheduleType::ModifierNonmonotonic, 2767 omp::OMPScheduleType::UnorderedGuidedChunked | 2768 omp::OMPScheduleType::ModifierMonotonic, 2769 omp::OMPScheduleType::UnorderedGuidedChunked | 2770 omp::OMPScheduleType::ModifierNonmonotonic, 2771 omp::OMPScheduleType::UnorderedAuto | 2772 omp::OMPScheduleType::ModifierMonotonic, 2773 omp::OMPScheduleType::UnorderedRuntime | 2774 omp::OMPScheduleType::ModifierMonotonic)); 2775 2776 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) { 2777 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2778 OpenMPIRBuilder OMPBuilder(*M); 2779 OMPBuilder.Config.IsTargetDevice = false; 2780 OMPBuilder.initialize(); 2781 IRBuilder<> Builder(BB); 2782 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2783 2784 uint32_t ChunkSize = 1; 2785 Type *LCTy = Type::getInt32Ty(Ctx); 2786 Value *StartVal = ConstantInt::get(LCTy, 10); 2787 Value *StopVal = ConstantInt::get(LCTy, 52); 2788 Value *StepVal = ConstantInt::get(LCTy, 2); 2789 Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize); 2790 auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { 2791 return llvm::Error::success(); 2792 }; 2793 2794 ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI, 2795 OMPBuilder.createCanonicalLoop( 2796 Loc, LoopBodyGen, StartVal, StopVal, StepVal, 2797 /*IsSigned=*/false, /*InclusiveStop=*/false)); 2798 2799 Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); 2800 InsertPointTy AllocaIP = Builder.saveIP(); 2801 2802 // Collect all the info from CLI, as it isn't usable after the call to 2803 // createDynamicWorkshareLoop. 2804 BasicBlock *Preheader = CLI->getPreheader(); 2805 BasicBlock *ExitBlock = CLI->getExit(); 2806 BasicBlock *LatchBlock = CLI->getLatch(); 2807 Value *IV = CLI->getIndVar(); 2808 2809 ASSERT_EXPECTED_INIT( 2810 OpenMPIRBuilder::InsertPointTy, EndIP, 2811 OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true, 2812 OMP_SCHEDULE_Static, ChunkVal, 2813 /*HasSimdModifier=*/false, 2814 /*HasMonotonicModifier=*/false, 2815 /*HasNonmonotonicModifier=*/false, 2816 /*HasOrderedClause=*/true)); 2817 2818 // Add a termination to our block and check that it is internally consistent. 2819 Builder.restoreIP(EndIP); 2820 Builder.CreateRetVoid(); 2821 OMPBuilder.finalize(); 2822 EXPECT_FALSE(verifyModule(*M, &errs())); 2823 2824 CallInst *InitCall = nullptr; 2825 for (Instruction &EI : *Preheader) { 2826 Instruction *Cur = &EI; 2827 if (isa<CallInst>(Cur)) { 2828 InitCall = cast<CallInst>(Cur); 2829 if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u") 2830 break; 2831 InitCall = nullptr; 2832 } 2833 } 2834 EXPECT_NE(InitCall, nullptr); 2835 EXPECT_EQ(InitCall->arg_size(), 7U); 2836 ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2)); 2837 EXPECT_EQ(SchedVal->getValue(), 2838 static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked)); 2839 2840 CallInst *FiniCall = dyn_cast<CallInst>( 2841 &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); 2842 ASSERT_NE(FiniCall, nullptr); 2843 EXPECT_EQ(FiniCall->getCalledFunction()->getName(), 2844 "__kmpc_dispatch_fini_4u"); 2845 EXPECT_EQ(FiniCall->arg_size(), 2U); 2846 EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0)); 2847 EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1)); 2848 2849 // The original loop iterator should only be used in the condition, in the 2850 // increment and in the statement that adds the lower bound to it. 2851 EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); 2852 2853 // The exit block should contain the barrier call, plus the call to obtain 2854 // the thread ID. 2855 size_t NumCallsInExitBlock = 2856 count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); }); 2857 EXPECT_EQ(NumCallsInExitBlock, 2u); 2858 } 2859 2860 TEST_F(OpenMPIRBuilderTest, MasterDirective) { 2861 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2862 OpenMPIRBuilder OMPBuilder(*M); 2863 OMPBuilder.initialize(); 2864 F->setName("func"); 2865 IRBuilder<> Builder(BB); 2866 2867 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2868 2869 AllocaInst *PrivAI = nullptr; 2870 2871 BasicBlock *EntryBB = nullptr; 2872 BasicBlock *ThenBB = nullptr; 2873 2874 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2875 if (AllocaIP.isSet()) 2876 Builder.restoreIP(AllocaIP); 2877 else 2878 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 2879 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2880 Builder.CreateStore(F->arg_begin(), PrivAI); 2881 2882 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2883 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2884 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2885 2886 Builder.restoreIP(CodeGenIP); 2887 2888 // collect some info for checks later 2889 ThenBB = Builder.GetInsertBlock(); 2890 EntryBB = ThenBB->getUniquePredecessor(); 2891 2892 // simple instructions for body 2893 Value *PrivLoad = 2894 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2895 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2896 }; 2897 2898 auto FiniCB = [&](InsertPointTy IP) { 2899 BasicBlock *IPBB = IP.getBlock(); 2900 EXPECT_NE(IPBB->end(), IP.getPoint()); 2901 }; 2902 2903 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 2904 OMPBuilder.createMaster(Builder, 2905 BODYGENCB_WRAPPER(BodyGenCB), 2906 FINICB_WRAPPER(FiniCB))); 2907 Builder.restoreIP(AfterIP); 2908 Value *EntryBBTI = EntryBB->getTerminator(); 2909 EXPECT_NE(EntryBBTI, nullptr); 2910 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 2911 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 2912 EXPECT_TRUE(EntryBr->isConditional()); 2913 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 2914 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 2915 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 2916 2917 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 2918 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 2919 2920 CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0)); 2921 EXPECT_EQ(MasterEntryCI->arg_size(), 2U); 2922 EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master"); 2923 EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0))); 2924 2925 CallInst *MasterEndCI = nullptr; 2926 for (auto &FI : *ThenBB) { 2927 Instruction *cur = &FI; 2928 if (isa<CallInst>(cur)) { 2929 MasterEndCI = cast<CallInst>(cur); 2930 if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master") 2931 break; 2932 MasterEndCI = nullptr; 2933 } 2934 } 2935 EXPECT_NE(MasterEndCI, nullptr); 2936 EXPECT_EQ(MasterEndCI->arg_size(), 2U); 2937 EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0))); 2938 EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1)); 2939 } 2940 2941 TEST_F(OpenMPIRBuilderTest, MaskedDirective) { 2942 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 2943 OpenMPIRBuilder OMPBuilder(*M); 2944 OMPBuilder.initialize(); 2945 F->setName("func"); 2946 IRBuilder<> Builder(BB); 2947 2948 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 2949 2950 AllocaInst *PrivAI = nullptr; 2951 2952 BasicBlock *EntryBB = nullptr; 2953 BasicBlock *ThenBB = nullptr; 2954 2955 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 2956 if (AllocaIP.isSet()) 2957 Builder.restoreIP(AllocaIP); 2958 else 2959 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 2960 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 2961 Builder.CreateStore(F->arg_begin(), PrivAI); 2962 2963 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 2964 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 2965 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 2966 2967 Builder.restoreIP(CodeGenIP); 2968 2969 // collect some info for checks later 2970 ThenBB = Builder.GetInsertBlock(); 2971 EntryBB = ThenBB->getUniquePredecessor(); 2972 2973 // simple instructions for body 2974 Value *PrivLoad = 2975 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 2976 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 2977 }; 2978 2979 auto FiniCB = [&](InsertPointTy IP) { 2980 BasicBlock *IPBB = IP.getBlock(); 2981 EXPECT_NE(IPBB->end(), IP.getPoint()); 2982 }; 2983 2984 Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); 2985 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 2986 OMPBuilder.createMasked(Builder, 2987 BODYGENCB_WRAPPER(BodyGenCB), 2988 FINICB_WRAPPER(FiniCB), Filter)); 2989 Builder.restoreIP(AfterIP); 2990 Value *EntryBBTI = EntryBB->getTerminator(); 2991 EXPECT_NE(EntryBBTI, nullptr); 2992 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 2993 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 2994 EXPECT_TRUE(EntryBr->isConditional()); 2995 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 2996 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 2997 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 2998 2999 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3000 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3001 3002 CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3003 EXPECT_EQ(MaskedEntryCI->arg_size(), 3U); 3004 EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked"); 3005 EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0))); 3006 3007 CallInst *MaskedEndCI = nullptr; 3008 for (auto &FI : *ThenBB) { 3009 Instruction *cur = &FI; 3010 if (isa<CallInst>(cur)) { 3011 MaskedEndCI = cast<CallInst>(cur); 3012 if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked") 3013 break; 3014 MaskedEndCI = nullptr; 3015 } 3016 } 3017 EXPECT_NE(MaskedEndCI, nullptr); 3018 EXPECT_EQ(MaskedEndCI->arg_size(), 2U); 3019 EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0))); 3020 EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1)); 3021 } 3022 3023 TEST_F(OpenMPIRBuilderTest, CriticalDirective) { 3024 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3025 OpenMPIRBuilder OMPBuilder(*M); 3026 OMPBuilder.initialize(); 3027 F->setName("func"); 3028 IRBuilder<> Builder(BB); 3029 3030 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3031 3032 AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3033 3034 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3035 // actual start for bodyCB 3036 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3037 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3038 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3039 3040 // body begin 3041 Builder.restoreIP(CodeGenIP); 3042 Builder.CreateStore(F->arg_begin(), PrivAI); 3043 Value *PrivLoad = 3044 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3045 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3046 }; 3047 3048 auto FiniCB = [&](InsertPointTy IP) { 3049 BasicBlock *IPBB = IP.getBlock(); 3050 EXPECT_NE(IPBB->end(), IP.getPoint()); 3051 }; 3052 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3053 3054 ASSERT_EXPECTED_INIT( 3055 OpenMPIRBuilder::InsertPointTy, AfterIP, 3056 OMPBuilder.createCritical(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3057 FINICB_WRAPPER(FiniCB), "testCRT", nullptr)); 3058 Builder.restoreIP(AfterIP); 3059 3060 CallInst *CriticalEntryCI = nullptr; 3061 for (auto &EI : *EntryBB) { 3062 Instruction *cur = &EI; 3063 if (isa<CallInst>(cur)) { 3064 CriticalEntryCI = cast<CallInst>(cur); 3065 if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical") 3066 break; 3067 CriticalEntryCI = nullptr; 3068 } 3069 } 3070 EXPECT_NE(CriticalEntryCI, nullptr); 3071 EXPECT_EQ(CriticalEntryCI->arg_size(), 3U); 3072 EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical"); 3073 EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0))); 3074 3075 CallInst *CriticalEndCI = nullptr; 3076 for (auto &FI : *EntryBB) { 3077 Instruction *cur = &FI; 3078 if (isa<CallInst>(cur)) { 3079 CriticalEndCI = cast<CallInst>(cur); 3080 if (CriticalEndCI->getCalledFunction()->getName() == 3081 "__kmpc_end_critical") 3082 break; 3083 CriticalEndCI = nullptr; 3084 } 3085 } 3086 EXPECT_NE(CriticalEndCI, nullptr); 3087 EXPECT_EQ(CriticalEndCI->arg_size(), 3U); 3088 EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0))); 3089 EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1)); 3090 PointerType *CriticalNamePtrTy = PointerType::getUnqual(Ctx); 3091 EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2)); 3092 GlobalVariable *GV = 3093 dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2)); 3094 ASSERT_NE(GV, nullptr); 3095 EXPECT_EQ(GV->getType(), CriticalNamePtrTy); 3096 const DataLayout &DL = M->getDataLayout(); 3097 const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy); 3098 const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace()); 3099 if (const llvm::MaybeAlign Alignment = GV->getAlign()) 3100 EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign)); 3101 } 3102 3103 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) { 3104 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3105 OpenMPIRBuilder OMPBuilder(*M); 3106 OMPBuilder.initialize(); 3107 F->setName("func"); 3108 IRBuilder<> Builder(BB); 3109 LLVMContext &Ctx = M->getContext(); 3110 3111 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3112 3113 InsertPointTy AllocaIP(&F->getEntryBlock(), 3114 F->getEntryBlock().getFirstInsertionPt()); 3115 3116 unsigned NumLoops = 2; 3117 SmallVector<Value *, 2> StoreValues; 3118 Type *LCTy = Type::getInt64Ty(Ctx); 3119 StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); 3120 StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); 3121 3122 // Test for "#omp ordered depend(source)" 3123 Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, 3124 StoreValues, ".cnt.addr", 3125 /*IsDependSource=*/true)); 3126 3127 Builder.CreateRetVoid(); 3128 OMPBuilder.finalize(); 3129 EXPECT_FALSE(verifyModule(*M, &errs())); 3130 3131 AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front()); 3132 ASSERT_NE(AllocInst, nullptr); 3133 ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType()); 3134 EXPECT_EQ(ArrType->getNumElements(), NumLoops); 3135 EXPECT_TRUE( 3136 AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); 3137 3138 Instruction *IterInst = dyn_cast<Instruction>(AllocInst); 3139 for (unsigned Iter = 0; Iter < NumLoops; Iter++) { 3140 GetElementPtrInst *DependAddrGEPIter = 3141 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3142 ASSERT_NE(DependAddrGEPIter, nullptr); 3143 EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); 3144 EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); 3145 auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1)); 3146 auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2)); 3147 ASSERT_NE(FirstIdx, nullptr); 3148 ASSERT_NE(SecondIdx, nullptr); 3149 EXPECT_EQ(FirstIdx->getValue(), 0); 3150 EXPECT_EQ(SecondIdx->getValue(), Iter); 3151 StoreInst *StoreValue = 3152 dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode()); 3153 ASSERT_NE(StoreValue, nullptr); 3154 EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); 3155 EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); 3156 EXPECT_EQ(StoreValue->getAlign(), Align(8)); 3157 IterInst = dyn_cast<Instruction>(StoreValue); 3158 } 3159 3160 GetElementPtrInst *DependBaseAddrGEP = 3161 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3162 ASSERT_NE(DependBaseAddrGEP, nullptr); 3163 EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); 3164 EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); 3165 auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1)); 3166 auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2)); 3167 ASSERT_NE(FirstIdx, nullptr); 3168 ASSERT_NE(SecondIdx, nullptr); 3169 EXPECT_EQ(FirstIdx->getValue(), 0); 3170 EXPECT_EQ(SecondIdx->getValue(), 0); 3171 3172 CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode()); 3173 ASSERT_NE(GTID, nullptr); 3174 EXPECT_EQ(GTID->arg_size(), 1U); 3175 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 3176 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 3177 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 3178 3179 CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode()); 3180 ASSERT_NE(Depend, nullptr); 3181 EXPECT_EQ(Depend->arg_size(), 3U); 3182 EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post"); 3183 EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0))); 3184 EXPECT_EQ(Depend->getArgOperand(1), GTID); 3185 EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); 3186 } 3187 3188 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) { 3189 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3190 OpenMPIRBuilder OMPBuilder(*M); 3191 OMPBuilder.initialize(); 3192 F->setName("func"); 3193 IRBuilder<> Builder(BB); 3194 LLVMContext &Ctx = M->getContext(); 3195 3196 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3197 3198 InsertPointTy AllocaIP(&F->getEntryBlock(), 3199 F->getEntryBlock().getFirstInsertionPt()); 3200 3201 unsigned NumLoops = 2; 3202 SmallVector<Value *, 2> StoreValues; 3203 Type *LCTy = Type::getInt64Ty(Ctx); 3204 StoreValues.emplace_back(ConstantInt::get(LCTy, 1)); 3205 StoreValues.emplace_back(ConstantInt::get(LCTy, 2)); 3206 3207 // Test for "#omp ordered depend(sink: vec)" 3208 Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops, 3209 StoreValues, ".cnt.addr", 3210 /*IsDependSource=*/false)); 3211 3212 Builder.CreateRetVoid(); 3213 OMPBuilder.finalize(); 3214 EXPECT_FALSE(verifyModule(*M, &errs())); 3215 3216 AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front()); 3217 ASSERT_NE(AllocInst, nullptr); 3218 ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType()); 3219 EXPECT_EQ(ArrType->getNumElements(), NumLoops); 3220 EXPECT_TRUE( 3221 AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64)); 3222 3223 Instruction *IterInst = dyn_cast<Instruction>(AllocInst); 3224 for (unsigned Iter = 0; Iter < NumLoops; Iter++) { 3225 GetElementPtrInst *DependAddrGEPIter = 3226 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3227 ASSERT_NE(DependAddrGEPIter, nullptr); 3228 EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst); 3229 EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2); 3230 auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1)); 3231 auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2)); 3232 ASSERT_NE(FirstIdx, nullptr); 3233 ASSERT_NE(SecondIdx, nullptr); 3234 EXPECT_EQ(FirstIdx->getValue(), 0); 3235 EXPECT_EQ(SecondIdx->getValue(), Iter); 3236 StoreInst *StoreValue = 3237 dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode()); 3238 ASSERT_NE(StoreValue, nullptr); 3239 EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]); 3240 EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter); 3241 EXPECT_EQ(StoreValue->getAlign(), Align(8)); 3242 IterInst = dyn_cast<Instruction>(StoreValue); 3243 } 3244 3245 GetElementPtrInst *DependBaseAddrGEP = 3246 dyn_cast<GetElementPtrInst>(IterInst->getNextNode()); 3247 ASSERT_NE(DependBaseAddrGEP, nullptr); 3248 EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst); 3249 EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2); 3250 auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1)); 3251 auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2)); 3252 ASSERT_NE(FirstIdx, nullptr); 3253 ASSERT_NE(SecondIdx, nullptr); 3254 EXPECT_EQ(FirstIdx->getValue(), 0); 3255 EXPECT_EQ(SecondIdx->getValue(), 0); 3256 3257 CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode()); 3258 ASSERT_NE(GTID, nullptr); 3259 EXPECT_EQ(GTID->arg_size(), 1U); 3260 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 3261 EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory()); 3262 EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory()); 3263 3264 CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode()); 3265 ASSERT_NE(Depend, nullptr); 3266 EXPECT_EQ(Depend->arg_size(), 3U); 3267 EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait"); 3268 EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0))); 3269 EXPECT_EQ(Depend->getArgOperand(1), GTID); 3270 EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP); 3271 } 3272 3273 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { 3274 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3275 OpenMPIRBuilder OMPBuilder(*M); 3276 OMPBuilder.initialize(); 3277 F->setName("func"); 3278 IRBuilder<> Builder(BB); 3279 3280 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3281 3282 AllocaInst *PrivAI = 3283 Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); 3284 3285 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3286 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3287 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3288 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3289 3290 Builder.restoreIP(CodeGenIP); 3291 Builder.CreateStore(F->arg_begin(), PrivAI); 3292 Value *PrivLoad = 3293 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3294 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3295 }; 3296 3297 auto FiniCB = [&](InsertPointTy IP) { 3298 BasicBlock *IPBB = IP.getBlock(); 3299 EXPECT_NE(IPBB->end(), IP.getPoint()); 3300 }; 3301 3302 // Test for "#omp ordered [threads]" 3303 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3304 ASSERT_EXPECTED_INIT( 3305 OpenMPIRBuilder::InsertPointTy, AfterIP, 3306 OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3307 FINICB_WRAPPER(FiniCB), true)); 3308 Builder.restoreIP(AfterIP); 3309 3310 Builder.CreateRetVoid(); 3311 OMPBuilder.finalize(); 3312 EXPECT_FALSE(verifyModule(*M, &errs())); 3313 3314 EXPECT_NE(EntryBB->getTerminator(), nullptr); 3315 3316 CallInst *OrderedEntryCI = nullptr; 3317 for (auto &EI : *EntryBB) { 3318 Instruction *Cur = &EI; 3319 if (isa<CallInst>(Cur)) { 3320 OrderedEntryCI = cast<CallInst>(Cur); 3321 if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") 3322 break; 3323 OrderedEntryCI = nullptr; 3324 } 3325 } 3326 EXPECT_NE(OrderedEntryCI, nullptr); 3327 EXPECT_EQ(OrderedEntryCI->arg_size(), 2U); 3328 EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered"); 3329 EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0))); 3330 3331 CallInst *OrderedEndCI = nullptr; 3332 for (auto &FI : *EntryBB) { 3333 Instruction *Cur = &FI; 3334 if (isa<CallInst>(Cur)) { 3335 OrderedEndCI = cast<CallInst>(Cur); 3336 if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") 3337 break; 3338 OrderedEndCI = nullptr; 3339 } 3340 } 3341 EXPECT_NE(OrderedEndCI, nullptr); 3342 EXPECT_EQ(OrderedEndCI->arg_size(), 2U); 3343 EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0))); 3344 EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1)); 3345 } 3346 3347 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { 3348 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3349 OpenMPIRBuilder OMPBuilder(*M); 3350 OMPBuilder.initialize(); 3351 F->setName("func"); 3352 IRBuilder<> Builder(BB); 3353 3354 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3355 3356 AllocaInst *PrivAI = 3357 Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); 3358 3359 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3360 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3361 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3362 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3363 3364 Builder.restoreIP(CodeGenIP); 3365 Builder.CreateStore(F->arg_begin(), PrivAI); 3366 Value *PrivLoad = 3367 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3368 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3369 }; 3370 3371 auto FiniCB = [&](InsertPointTy IP) { 3372 BasicBlock *IPBB = IP.getBlock(); 3373 EXPECT_NE(IPBB->end(), IP.getPoint()); 3374 }; 3375 3376 // Test for "#omp ordered simd" 3377 BasicBlock *EntryBB = Builder.GetInsertBlock(); 3378 ASSERT_EXPECTED_INIT( 3379 OpenMPIRBuilder::InsertPointTy, AfterIP, 3380 OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3381 FINICB_WRAPPER(FiniCB), false)); 3382 Builder.restoreIP(AfterIP); 3383 3384 Builder.CreateRetVoid(); 3385 OMPBuilder.finalize(); 3386 EXPECT_FALSE(verifyModule(*M, &errs())); 3387 3388 EXPECT_NE(EntryBB->getTerminator(), nullptr); 3389 3390 CallInst *OrderedEntryCI = nullptr; 3391 for (auto &EI : *EntryBB) { 3392 Instruction *Cur = &EI; 3393 if (isa<CallInst>(Cur)) { 3394 OrderedEntryCI = cast<CallInst>(Cur); 3395 if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered") 3396 break; 3397 OrderedEntryCI = nullptr; 3398 } 3399 } 3400 EXPECT_EQ(OrderedEntryCI, nullptr); 3401 3402 CallInst *OrderedEndCI = nullptr; 3403 for (auto &FI : *EntryBB) { 3404 Instruction *Cur = &FI; 3405 if (isa<CallInst>(Cur)) { 3406 OrderedEndCI = cast<CallInst>(Cur); 3407 if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered") 3408 break; 3409 OrderedEndCI = nullptr; 3410 } 3411 } 3412 EXPECT_EQ(OrderedEndCI, nullptr); 3413 } 3414 3415 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) { 3416 OpenMPIRBuilder OMPBuilder(*M); 3417 OMPBuilder.initialize(); 3418 F->setName("func"); 3419 IRBuilder<> Builder(BB); 3420 3421 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3422 3423 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3424 AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy()); 3425 AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy()); 3426 3427 BasicBlock *EntryBB = BB; 3428 3429 OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress, 3430 PrivAddress, Int32, /*BranchtoEnd*/ true); 3431 3432 BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator()); 3433 3434 EXPECT_NE(EntryBr, nullptr); 3435 EXPECT_TRUE(EntryBr->isConditional()); 3436 3437 BasicBlock *NotMasterBB = EntryBr->getSuccessor(0); 3438 BasicBlock *CopyinEnd = EntryBr->getSuccessor(1); 3439 CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition()); 3440 3441 EXPECT_NE(CMP, nullptr); 3442 EXPECT_NE(NotMasterBB, nullptr); 3443 EXPECT_NE(CopyinEnd, nullptr); 3444 3445 BranchInst *NotMasterBr = 3446 dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator()); 3447 EXPECT_NE(NotMasterBr, nullptr); 3448 EXPECT_FALSE(NotMasterBr->isConditional()); 3449 EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0)); 3450 } 3451 3452 TEST_F(OpenMPIRBuilderTest, SingleDirective) { 3453 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3454 OpenMPIRBuilder OMPBuilder(*M); 3455 OMPBuilder.initialize(); 3456 F->setName("func"); 3457 IRBuilder<> Builder(BB); 3458 3459 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3460 3461 AllocaInst *PrivAI = nullptr; 3462 3463 BasicBlock *EntryBB = nullptr; 3464 BasicBlock *ThenBB = nullptr; 3465 3466 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3467 if (AllocaIP.isSet()) 3468 Builder.restoreIP(AllocaIP); 3469 else 3470 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3471 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3472 Builder.CreateStore(F->arg_begin(), PrivAI); 3473 3474 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3475 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3476 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3477 3478 Builder.restoreIP(CodeGenIP); 3479 3480 // collect some info for checks later 3481 ThenBB = Builder.GetInsertBlock(); 3482 EntryBB = ThenBB->getUniquePredecessor(); 3483 3484 // simple instructions for body 3485 Value *PrivLoad = 3486 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3487 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3488 }; 3489 3490 auto FiniCB = [&](InsertPointTy IP) { 3491 BasicBlock *IPBB = IP.getBlock(); 3492 EXPECT_NE(IPBB->end(), IP.getPoint()); 3493 }; 3494 3495 ASSERT_EXPECTED_INIT( 3496 OpenMPIRBuilder::InsertPointTy, AfterIP, 3497 OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3498 FINICB_WRAPPER(FiniCB), /*IsNowait*/ false)); 3499 Builder.restoreIP(AfterIP); 3500 Value *EntryBBTI = EntryBB->getTerminator(); 3501 EXPECT_NE(EntryBBTI, nullptr); 3502 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3503 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3504 EXPECT_TRUE(EntryBr->isConditional()); 3505 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3506 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3507 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3508 3509 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3510 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3511 3512 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3513 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3514 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3515 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3516 3517 CallInst *SingleEndCI = nullptr; 3518 for (auto &FI : *ThenBB) { 3519 Instruction *cur = &FI; 3520 if (isa<CallInst>(cur)) { 3521 SingleEndCI = cast<CallInst>(cur); 3522 if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") 3523 break; 3524 SingleEndCI = nullptr; 3525 } 3526 } 3527 EXPECT_NE(SingleEndCI, nullptr); 3528 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3529 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3530 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3531 3532 bool FoundBarrier = false; 3533 for (auto &FI : *ExitBB) { 3534 Instruction *cur = &FI; 3535 if (auto CI = dyn_cast<CallInst>(cur)) { 3536 if (CI->getCalledFunction()->getName() == "__kmpc_barrier") { 3537 FoundBarrier = true; 3538 break; 3539 } 3540 } 3541 } 3542 EXPECT_TRUE(FoundBarrier); 3543 } 3544 3545 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { 3546 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3547 OpenMPIRBuilder OMPBuilder(*M); 3548 OMPBuilder.initialize(); 3549 F->setName("func"); 3550 IRBuilder<> Builder(BB); 3551 3552 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3553 3554 AllocaInst *PrivAI = nullptr; 3555 3556 BasicBlock *EntryBB = nullptr; 3557 BasicBlock *ThenBB = nullptr; 3558 3559 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3560 if (AllocaIP.isSet()) 3561 Builder.restoreIP(AllocaIP); 3562 else 3563 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3564 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3565 Builder.CreateStore(F->arg_begin(), PrivAI); 3566 3567 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3568 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3569 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3570 3571 Builder.restoreIP(CodeGenIP); 3572 3573 // collect some info for checks later 3574 ThenBB = Builder.GetInsertBlock(); 3575 EntryBB = ThenBB->getUniquePredecessor(); 3576 3577 // simple instructions for body 3578 Value *PrivLoad = 3579 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3580 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3581 }; 3582 3583 auto FiniCB = [&](InsertPointTy IP) { 3584 BasicBlock *IPBB = IP.getBlock(); 3585 EXPECT_NE(IPBB->end(), IP.getPoint()); 3586 }; 3587 3588 ASSERT_EXPECTED_INIT( 3589 OpenMPIRBuilder::InsertPointTy, AfterIP, 3590 OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3591 FINICB_WRAPPER(FiniCB), /*IsNowait*/ true)); 3592 Builder.restoreIP(AfterIP); 3593 Value *EntryBBTI = EntryBB->getTerminator(); 3594 EXPECT_NE(EntryBBTI, nullptr); 3595 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3596 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3597 EXPECT_TRUE(EntryBr->isConditional()); 3598 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3599 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3600 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3601 3602 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3603 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3604 3605 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3606 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3607 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3608 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3609 3610 CallInst *SingleEndCI = nullptr; 3611 for (auto &FI : *ThenBB) { 3612 Instruction *cur = &FI; 3613 if (isa<CallInst>(cur)) { 3614 SingleEndCI = cast<CallInst>(cur); 3615 if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single") 3616 break; 3617 SingleEndCI = nullptr; 3618 } 3619 } 3620 EXPECT_NE(SingleEndCI, nullptr); 3621 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3622 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3623 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3624 3625 CallInst *ExitBarrier = nullptr; 3626 for (auto &FI : *ExitBB) { 3627 Instruction *cur = &FI; 3628 if (auto CI = dyn_cast<CallInst>(cur)) { 3629 if (CI->getCalledFunction()->getName() == "__kmpc_barrier") { 3630 ExitBarrier = CI; 3631 break; 3632 } 3633 } 3634 } 3635 EXPECT_EQ(ExitBarrier, nullptr); 3636 } 3637 3638 // Helper class to check each instruction of a BB. 3639 class BBInstIter { 3640 BasicBlock *BB; 3641 BasicBlock::iterator BBI; 3642 3643 public: 3644 BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {} 3645 3646 bool hasNext() const { return BBI != BB->end(); } 3647 3648 template <typename InstTy> InstTy *next() { 3649 if (!hasNext()) 3650 return nullptr; 3651 Instruction *Cur = &*BBI++; 3652 if (!isa<InstTy>(Cur)) 3653 return nullptr; 3654 return cast<InstTy>(Cur); 3655 } 3656 }; 3657 3658 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { 3659 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3660 OpenMPIRBuilder OMPBuilder(*M); 3661 OMPBuilder.initialize(); 3662 F->setName("func"); 3663 IRBuilder<> Builder(BB); 3664 3665 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3666 3667 AllocaInst *PrivAI = nullptr; 3668 3669 BasicBlock *EntryBB = nullptr; 3670 BasicBlock *ThenBB = nullptr; 3671 3672 Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType()); 3673 Builder.CreateStore(F->arg_begin(), CPVar); 3674 3675 FunctionType *CopyFuncTy = FunctionType::get( 3676 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false); 3677 Function *CopyFunc = 3678 Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M); 3679 3680 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 3681 if (AllocaIP.isSet()) 3682 Builder.restoreIP(AllocaIP); 3683 else 3684 Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); 3685 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 3686 Builder.CreateStore(F->arg_begin(), PrivAI); 3687 3688 llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); 3689 llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); 3690 EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); 3691 3692 Builder.restoreIP(CodeGenIP); 3693 3694 // collect some info for checks later 3695 ThenBB = Builder.GetInsertBlock(); 3696 EntryBB = ThenBB->getUniquePredecessor(); 3697 3698 // simple instructions for body 3699 Value *PrivLoad = 3700 Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use"); 3701 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 3702 }; 3703 3704 auto FiniCB = [&](InsertPointTy IP) { 3705 BasicBlock *IPBB = IP.getBlock(); 3706 // IP must be before the unconditional branch to ExitBB 3707 EXPECT_NE(IPBB->end(), IP.getPoint()); 3708 }; 3709 3710 ASSERT_EXPECTED_INIT( 3711 OpenMPIRBuilder::InsertPointTy, AfterIP, 3712 OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB), 3713 FINICB_WRAPPER(FiniCB), 3714 /*IsNowait*/ false, {CPVar}, {CopyFunc})); 3715 Builder.restoreIP(AfterIP); 3716 Value *EntryBBTI = EntryBB->getTerminator(); 3717 EXPECT_NE(EntryBBTI, nullptr); 3718 EXPECT_TRUE(isa<BranchInst>(EntryBBTI)); 3719 BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator()); 3720 EXPECT_TRUE(EntryBr->isConditional()); 3721 EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB); 3722 BasicBlock *ExitBB = ThenBB->getUniqueSuccessor(); 3723 EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB); 3724 3725 CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition()); 3726 EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0))); 3727 3728 CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0)); 3729 EXPECT_EQ(SingleEntryCI->arg_size(), 2U); 3730 EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single"); 3731 EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0))); 3732 3733 // check ThenBB 3734 BBInstIter ThenBBI(ThenBB); 3735 // load PrivAI 3736 auto *PrivLI = ThenBBI.next<LoadInst>(); 3737 EXPECT_NE(PrivLI, nullptr); 3738 EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI); 3739 // icmp 3740 EXPECT_TRUE(ThenBBI.next<ICmpInst>()); 3741 // store 1, DidIt 3742 auto *DidItSI = ThenBBI.next<StoreInst>(); 3743 EXPECT_NE(DidItSI, nullptr); 3744 EXPECT_EQ(DidItSI->getValueOperand(), 3745 ConstantInt::get(Type::getInt32Ty(Ctx), 1)); 3746 Value *DidIt = DidItSI->getPointerOperand(); 3747 // call __kmpc_end_single 3748 auto *SingleEndCI = ThenBBI.next<CallInst>(); 3749 EXPECT_NE(SingleEndCI, nullptr); 3750 EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single"); 3751 EXPECT_EQ(SingleEndCI->arg_size(), 2U); 3752 EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0))); 3753 EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); 3754 // br ExitBB 3755 auto *ExitBBBI = ThenBBI.next<BranchInst>(); 3756 EXPECT_NE(ExitBBBI, nullptr); 3757 EXPECT_TRUE(ExitBBBI->isUnconditional()); 3758 EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB); 3759 EXPECT_FALSE(ThenBBI.hasNext()); 3760 3761 // check ExitBB 3762 BBInstIter ExitBBI(ExitBB); 3763 // call __kmpc_global_thread_num 3764 auto *ThreadNumCI = ExitBBI.next<CallInst>(); 3765 EXPECT_NE(ThreadNumCI, nullptr); 3766 EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(), 3767 "__kmpc_global_thread_num"); 3768 // load DidIt 3769 auto *DidItLI = ExitBBI.next<LoadInst>(); 3770 EXPECT_NE(DidItLI, nullptr); 3771 EXPECT_EQ(DidItLI->getPointerOperand(), DidIt); 3772 // call __kmpc_copyprivate 3773 auto *CopyPrivateCI = ExitBBI.next<CallInst>(); 3774 EXPECT_NE(CopyPrivateCI, nullptr); 3775 EXPECT_EQ(CopyPrivateCI->arg_size(), 6U); 3776 EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3))); 3777 EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar); 3778 EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4))); 3779 EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc); 3780 EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5))); 3781 DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5)); 3782 EXPECT_EQ(DidItLI->getOperand(0), DidIt); 3783 EXPECT_FALSE(ExitBBI.hasNext()); 3784 } 3785 3786 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) { 3787 OpenMPIRBuilder OMPBuilder(*M); 3788 OMPBuilder.initialize(); 3789 F->setName("func"); 3790 IRBuilder<> Builder(BB); 3791 3792 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3793 3794 Type *Float32 = Type::getFloatTy(M->getContext()); 3795 AllocaInst *XVal = Builder.CreateAlloca(Float32); 3796 XVal->setName("AtomicVar"); 3797 AllocaInst *VVal = Builder.CreateAlloca(Float32); 3798 VVal->setName("AtomicRead"); 3799 AtomicOrdering AO = AtomicOrdering::Monotonic; 3800 OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false}; 3801 OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false}; 3802 3803 Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO)); 3804 3805 IntegerType *IntCastTy = 3806 IntegerType::get(M->getContext(), Float32->getScalarSizeInBits()); 3807 3808 LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode()); 3809 EXPECT_TRUE(AtomicLoad->isAtomic()); 3810 EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal); 3811 3812 BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode()); 3813 EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy); 3814 EXPECT_EQ(CastToFlt->getDestTy(), Float32); 3815 EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad); 3816 3817 StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode()); 3818 EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt); 3819 EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal); 3820 3821 Builder.CreateRetVoid(); 3822 OMPBuilder.finalize(); 3823 EXPECT_FALSE(verifyModule(*M, &errs())); 3824 } 3825 3826 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) { 3827 OpenMPIRBuilder OMPBuilder(*M); 3828 OMPBuilder.initialize(); 3829 F->setName("func"); 3830 IRBuilder<> Builder(BB); 3831 3832 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3833 3834 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3835 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3836 XVal->setName("AtomicVar"); 3837 AllocaInst *VVal = Builder.CreateAlloca(Int32); 3838 VVal->setName("AtomicRead"); 3839 AtomicOrdering AO = AtomicOrdering::Monotonic; 3840 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3841 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 3842 3843 BasicBlock *EntryBB = BB; 3844 3845 Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO)); 3846 LoadInst *AtomicLoad = nullptr; 3847 StoreInst *StoreofAtomic = nullptr; 3848 3849 for (Instruction &Cur : *EntryBB) { 3850 if (isa<LoadInst>(Cur)) { 3851 AtomicLoad = cast<LoadInst>(&Cur); 3852 if (AtomicLoad->getPointerOperand() == XVal) 3853 continue; 3854 AtomicLoad = nullptr; 3855 } else if (isa<StoreInst>(Cur)) { 3856 StoreofAtomic = cast<StoreInst>(&Cur); 3857 if (StoreofAtomic->getPointerOperand() == VVal) 3858 continue; 3859 StoreofAtomic = nullptr; 3860 } 3861 } 3862 3863 EXPECT_NE(AtomicLoad, nullptr); 3864 EXPECT_TRUE(AtomicLoad->isAtomic()); 3865 3866 EXPECT_NE(StoreofAtomic, nullptr); 3867 EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad); 3868 3869 Builder.CreateRetVoid(); 3870 OMPBuilder.finalize(); 3871 3872 EXPECT_FALSE(verifyModule(*M, &errs())); 3873 } 3874 3875 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) { 3876 OpenMPIRBuilder OMPBuilder(*M); 3877 OMPBuilder.initialize(); 3878 F->setName("func"); 3879 IRBuilder<> Builder(BB); 3880 3881 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3882 3883 LLVMContext &Ctx = M->getContext(); 3884 Type *Float32 = Type::getFloatTy(Ctx); 3885 AllocaInst *XVal = Builder.CreateAlloca(Float32); 3886 XVal->setName("AtomicVar"); 3887 OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false}; 3888 AtomicOrdering AO = AtomicOrdering::Monotonic; 3889 Constant *ValToWrite = ConstantFP::get(Float32, 1.0); 3890 3891 Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO)); 3892 3893 IntegerType *IntCastTy = 3894 IntegerType::get(M->getContext(), Float32->getScalarSizeInBits()); 3895 3896 Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy); 3897 3898 StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode()); 3899 EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast); 3900 EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal); 3901 EXPECT_TRUE(StoreofAtomic->isAtomic()); 3902 3903 Builder.CreateRetVoid(); 3904 OMPBuilder.finalize(); 3905 EXPECT_FALSE(verifyModule(*M, &errs())); 3906 } 3907 3908 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) { 3909 OpenMPIRBuilder OMPBuilder(*M); 3910 OMPBuilder.initialize(); 3911 F->setName("func"); 3912 IRBuilder<> Builder(BB); 3913 3914 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3915 3916 LLVMContext &Ctx = M->getContext(); 3917 IntegerType *Int32 = Type::getInt32Ty(Ctx); 3918 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3919 XVal->setName("AtomicVar"); 3920 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3921 AtomicOrdering AO = AtomicOrdering::Monotonic; 3922 ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3923 3924 BasicBlock *EntryBB = BB; 3925 3926 Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO)); 3927 3928 StoreInst *StoreofAtomic = nullptr; 3929 3930 for (Instruction &Cur : *EntryBB) { 3931 if (isa<StoreInst>(Cur)) { 3932 StoreofAtomic = cast<StoreInst>(&Cur); 3933 if (StoreofAtomic->getPointerOperand() == XVal) 3934 continue; 3935 StoreofAtomic = nullptr; 3936 } 3937 } 3938 3939 EXPECT_NE(StoreofAtomic, nullptr); 3940 EXPECT_TRUE(StoreofAtomic->isAtomic()); 3941 EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite); 3942 3943 Builder.CreateRetVoid(); 3944 OMPBuilder.finalize(); 3945 EXPECT_FALSE(verifyModule(*M, &errs())); 3946 } 3947 3948 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) { 3949 OpenMPIRBuilder OMPBuilder(*M); 3950 OMPBuilder.initialize(); 3951 F->setName("func"); 3952 IRBuilder<> Builder(BB); 3953 3954 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 3955 3956 IntegerType *Int32 = Type::getInt32Ty(M->getContext()); 3957 AllocaInst *XVal = Builder.CreateAlloca(Int32); 3958 XVal->setName("AtomicVar"); 3959 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 3960 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 3961 AtomicOrdering AO = AtomicOrdering::Monotonic; 3962 ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 3963 Value *Expr = nullptr; 3964 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub; 3965 bool IsXLHSInRHSPart = false; 3966 3967 BasicBlock *EntryBB = BB; 3968 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 3969 EntryBB->getFirstInsertionPt()); 3970 Value *Sub = nullptr; 3971 3972 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 3973 Sub = IRB.CreateSub(ConstVal, Atomic); 3974 return Sub; 3975 }; 3976 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 3977 OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr, 3978 AO, RMWOp, UpdateOp, 3979 IsXLHSInRHSPart)); 3980 Builder.restoreIP(AfterIP); 3981 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 3982 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 3983 EXPECT_NE(ContTI, nullptr); 3984 BasicBlock *EndBB = ContTI->getSuccessor(0); 3985 EXPECT_TRUE(ContTI->isConditional()); 3986 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 3987 EXPECT_NE(EndBB, nullptr); 3988 3989 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 3990 EXPECT_NE(Phi, nullptr); 3991 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 3992 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 3993 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 3994 3995 EXPECT_EQ(Sub->getNumUses(), 1U); 3996 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 3997 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 3998 3999 ExtractValueInst *ExVI1 = 4000 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 4001 EXPECT_NE(ExVI1, nullptr); 4002 AtomicCmpXchgInst *CmpExchg = 4003 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 4004 EXPECT_NE(CmpExchg, nullptr); 4005 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 4006 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 4007 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 4008 4009 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 4010 EXPECT_NE(Ld, nullptr); 4011 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 4012 4013 Builder.CreateRetVoid(); 4014 OMPBuilder.finalize(); 4015 EXPECT_FALSE(verifyModule(*M, &errs())); 4016 } 4017 4018 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) { 4019 OpenMPIRBuilder OMPBuilder(*M); 4020 OMPBuilder.initialize(); 4021 F->setName("func"); 4022 IRBuilder<> Builder(BB); 4023 4024 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4025 4026 Type *FloatTy = Type::getFloatTy(M->getContext()); 4027 AllocaInst *XVal = Builder.CreateAlloca(FloatTy); 4028 XVal->setName("AtomicVar"); 4029 Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal); 4030 OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false}; 4031 AtomicOrdering AO = AtomicOrdering::Monotonic; 4032 Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0); 4033 Value *Expr = nullptr; 4034 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub; 4035 bool IsXLHSInRHSPart = false; 4036 4037 BasicBlock *EntryBB = BB; 4038 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 4039 EntryBB->getFirstInsertionPt()); 4040 Value *Sub = nullptr; 4041 4042 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 4043 Sub = IRB.CreateFSub(ConstVal, Atomic); 4044 return Sub; 4045 }; 4046 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4047 OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr, 4048 AO, RMWOp, UpdateOp, 4049 IsXLHSInRHSPart)); 4050 Builder.restoreIP(AfterIP); 4051 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 4052 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 4053 EXPECT_NE(ContTI, nullptr); 4054 BasicBlock *EndBB = ContTI->getSuccessor(0); 4055 EXPECT_TRUE(ContTI->isConditional()); 4056 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 4057 EXPECT_NE(EndBB, nullptr); 4058 4059 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 4060 EXPECT_NE(Phi, nullptr); 4061 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 4062 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 4063 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 4064 4065 EXPECT_EQ(Sub->getNumUses(), 1U); 4066 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 4067 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 4068 4069 ExtractValueInst *ExVI1 = 4070 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 4071 EXPECT_NE(ExVI1, nullptr); 4072 AtomicCmpXchgInst *CmpExchg = 4073 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 4074 EXPECT_NE(CmpExchg, nullptr); 4075 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 4076 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 4077 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 4078 4079 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 4080 EXPECT_NE(Ld, nullptr); 4081 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 4082 Builder.CreateRetVoid(); 4083 OMPBuilder.finalize(); 4084 EXPECT_FALSE(verifyModule(*M, &errs())); 4085 } 4086 4087 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) { 4088 OpenMPIRBuilder OMPBuilder(*M); 4089 OMPBuilder.initialize(); 4090 F->setName("func"); 4091 IRBuilder<> Builder(BB); 4092 4093 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4094 4095 Type *IntTy = Type::getInt32Ty(M->getContext()); 4096 AllocaInst *XVal = Builder.CreateAlloca(IntTy); 4097 XVal->setName("AtomicVar"); 4098 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal); 4099 OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false}; 4100 AtomicOrdering AO = AtomicOrdering::Monotonic; 4101 Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1); 4102 Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1); 4103 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax; 4104 bool IsXLHSInRHSPart = false; 4105 4106 BasicBlock *EntryBB = BB; 4107 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 4108 EntryBB->getFirstInsertionPt()); 4109 Value *Sub = nullptr; 4110 4111 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { 4112 Sub = IRB.CreateSub(ConstVal, Atomic); 4113 return Sub; 4114 }; 4115 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4116 OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr, 4117 AO, RMWOp, UpdateOp, 4118 IsXLHSInRHSPart)); 4119 Builder.restoreIP(AfterIP); 4120 BasicBlock *ContBB = EntryBB->getSingleSuccessor(); 4121 BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator()); 4122 EXPECT_NE(ContTI, nullptr); 4123 BasicBlock *EndBB = ContTI->getSuccessor(0); 4124 EXPECT_TRUE(ContTI->isConditional()); 4125 EXPECT_EQ(ContTI->getSuccessor(1), ContBB); 4126 EXPECT_NE(EndBB, nullptr); 4127 4128 PHINode *Phi = dyn_cast<PHINode>(&ContBB->front()); 4129 EXPECT_NE(Phi, nullptr); 4130 EXPECT_EQ(Phi->getNumIncomingValues(), 2U); 4131 EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB); 4132 EXPECT_EQ(Phi->getIncomingBlock(1), ContBB); 4133 4134 EXPECT_EQ(Sub->getNumUses(), 1U); 4135 StoreInst *St = dyn_cast<StoreInst>(Sub->user_back()); 4136 AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand()); 4137 4138 ExtractValueInst *ExVI1 = 4139 dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB)); 4140 EXPECT_NE(ExVI1, nullptr); 4141 AtomicCmpXchgInst *CmpExchg = 4142 dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand()); 4143 EXPECT_NE(CmpExchg, nullptr); 4144 EXPECT_EQ(CmpExchg->getPointerOperand(), XVal); 4145 EXPECT_EQ(CmpExchg->getCompareOperand(), Phi); 4146 EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic); 4147 4148 LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand()); 4149 EXPECT_NE(Ld, nullptr); 4150 EXPECT_EQ(UpdateTemp, Ld->getPointerOperand()); 4151 4152 Builder.CreateRetVoid(); 4153 OMPBuilder.finalize(); 4154 EXPECT_FALSE(verifyModule(*M, &errs())); 4155 } 4156 4157 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) { 4158 OpenMPIRBuilder OMPBuilder(*M); 4159 OMPBuilder.initialize(); 4160 F->setName("func"); 4161 IRBuilder<> Builder(BB); 4162 4163 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4164 4165 LLVMContext &Ctx = M->getContext(); 4166 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4167 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4168 XVal->setName("AtomicVar"); 4169 AllocaInst *VVal = Builder.CreateAlloca(Int32); 4170 VVal->setName("AtomicCapTar"); 4171 StoreInst *Init = 4172 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4173 4174 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false}; 4175 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 4176 AtomicOrdering AO = AtomicOrdering::Monotonic; 4177 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4178 AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add; 4179 bool IsXLHSInRHSPart = true; 4180 bool IsPostfixUpdate = true; 4181 bool UpdateExpr = true; 4182 4183 BasicBlock *EntryBB = BB; 4184 OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB, 4185 EntryBB->getFirstInsertionPt()); 4186 4187 // integer update - not used 4188 auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; }; 4189 4190 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4191 OMPBuilder.createAtomicCapture( 4192 Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, 4193 UpdateExpr, IsPostfixUpdate, IsXLHSInRHSPart)); 4194 Builder.restoreIP(AfterIP); 4195 EXPECT_EQ(EntryBB->getParent()->size(), 1U); 4196 AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode()); 4197 EXPECT_NE(ARWM, nullptr); 4198 EXPECT_EQ(ARWM->getPointerOperand(), XVal); 4199 EXPECT_EQ(ARWM->getOperation(), RMWOp); 4200 StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back()); 4201 EXPECT_NE(St, nullptr); 4202 EXPECT_EQ(St->getPointerOperand(), VVal); 4203 4204 Builder.CreateRetVoid(); 4205 OMPBuilder.finalize(); 4206 EXPECT_FALSE(verifyModule(*M, &errs())); 4207 } 4208 4209 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) { 4210 OpenMPIRBuilder OMPBuilder(*M); 4211 OMPBuilder.initialize(); 4212 F->setName("func"); 4213 IRBuilder<> Builder(BB); 4214 4215 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4216 4217 LLVMContext &Ctx = M->getContext(); 4218 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4219 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4220 XVal->setName("x"); 4221 StoreInst *Init = 4222 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4223 4224 OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false}; 4225 OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false}; 4226 // V and R are not used in atomic compare 4227 OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false}; 4228 OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false}; 4229 AtomicOrdering AO = AtomicOrdering::Monotonic; 4230 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4231 ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4232 OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX; 4233 OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ; 4234 4235 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4236 Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false)); 4237 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4238 Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false)); 4239 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4240 Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false)); 4241 4242 BasicBlock *EntryBB = BB; 4243 EXPECT_EQ(EntryBB->getParent()->size(), 1U); 4244 EXPECT_EQ(EntryBB->size(), 5U); 4245 4246 AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode()); 4247 EXPECT_NE(ARWM1, nullptr); 4248 EXPECT_EQ(ARWM1->getPointerOperand(), XVal); 4249 EXPECT_EQ(ARWM1->getValOperand(), Expr); 4250 EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min); 4251 4252 AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode()); 4253 EXPECT_NE(ARWM2, nullptr); 4254 EXPECT_EQ(ARWM2->getPointerOperand(), XVal); 4255 EXPECT_EQ(ARWM2->getValOperand(), Expr); 4256 EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax); 4257 4258 AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode()); 4259 EXPECT_NE(AXCHG, nullptr); 4260 EXPECT_EQ(AXCHG->getPointerOperand(), XVal); 4261 EXPECT_EQ(AXCHG->getCompareOperand(), Expr); 4262 EXPECT_EQ(AXCHG->getNewValOperand(), D); 4263 4264 Builder.CreateRetVoid(); 4265 OMPBuilder.finalize(); 4266 EXPECT_FALSE(verifyModule(*M, &errs())); 4267 } 4268 4269 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) { 4270 OpenMPIRBuilder OMPBuilder(*M); 4271 OMPBuilder.initialize(); 4272 F->setName("func"); 4273 IRBuilder<> Builder(BB); 4274 4275 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4276 4277 LLVMContext &Ctx = M->getContext(); 4278 IntegerType *Int32 = Type::getInt32Ty(Ctx); 4279 AllocaInst *XVal = Builder.CreateAlloca(Int32); 4280 XVal->setName("x"); 4281 AllocaInst *VVal = Builder.CreateAlloca(Int32); 4282 VVal->setName("v"); 4283 AllocaInst *RVal = Builder.CreateAlloca(Int32); 4284 RVal->setName("r"); 4285 4286 StoreInst *Init = 4287 Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal); 4288 4289 OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false}; 4290 OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false}; 4291 OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false}; 4292 OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false}; 4293 OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false}; 4294 4295 AtomicOrdering AO = AtomicOrdering::Monotonic; 4296 ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4297 ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U); 4298 OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX; 4299 OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ; 4300 4301 // { cond-update-stmt v = x; } 4302 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4303 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4304 /* IsPostfixUpdate */ false, 4305 /* IsFailOnly */ false)); 4306 // { v = x; cond-update-stmt } 4307 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4308 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4309 /* IsPostfixUpdate */ true, 4310 /* IsFailOnly */ false)); 4311 // if(x == e) { x = d; } else { v = x; } 4312 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4313 Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4314 /* IsPostfixUpdate */ false, 4315 /* IsFailOnly */ true)); 4316 // { r = x == e; if(r) { x = d; } } 4317 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4318 Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4319 /* IsPostfixUpdate */ false, 4320 /* IsFailOnly */ false)); 4321 // { r = x == e; if(r) { x = d; } else { v = x; } } 4322 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4323 Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true, 4324 /* IsPostfixUpdate */ false, 4325 /* IsFailOnly */ true)); 4326 4327 // { v = x; cond-update-stmt } 4328 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4329 Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true, 4330 /* IsPostfixUpdate */ true, 4331 /* IsFailOnly */ false)); 4332 // { cond-update-stmt v = x; } 4333 Builder.restoreIP(OMPBuilder.createAtomicCompare( 4334 Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false, 4335 /* IsPostfixUpdate */ false, 4336 /* IsFailOnly */ false)); 4337 4338 BasicBlock *EntryBB = BB; 4339 EXPECT_EQ(EntryBB->getParent()->size(), 5U); 4340 BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode()); 4341 EXPECT_NE(Cont1, nullptr); 4342 BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode()); 4343 EXPECT_NE(Exit1, nullptr); 4344 BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode()); 4345 EXPECT_NE(Cont2, nullptr); 4346 BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode()); 4347 EXPECT_NE(Exit2, nullptr); 4348 4349 AtomicCmpXchgInst *CmpXchg1 = 4350 dyn_cast<AtomicCmpXchgInst>(Init->getNextNode()); 4351 EXPECT_NE(CmpXchg1, nullptr); 4352 EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal); 4353 EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr); 4354 EXPECT_EQ(CmpXchg1->getNewValOperand(), D); 4355 ExtractValueInst *ExtVal1 = 4356 dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode()); 4357 EXPECT_NE(ExtVal1, nullptr); 4358 EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1); 4359 EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U)); 4360 ExtractValueInst *ExtVal2 = 4361 dyn_cast<ExtractValueInst>(ExtVal1->getNextNode()); 4362 EXPECT_NE(ExtVal2, nullptr); 4363 EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1); 4364 EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U)); 4365 SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode()); 4366 EXPECT_NE(Sel1, nullptr); 4367 EXPECT_EQ(Sel1->getCondition(), ExtVal2); 4368 EXPECT_EQ(Sel1->getTrueValue(), Expr); 4369 EXPECT_EQ(Sel1->getFalseValue(), ExtVal1); 4370 StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode()); 4371 EXPECT_NE(Store1, nullptr); 4372 EXPECT_EQ(Store1->getPointerOperand(), VVal); 4373 EXPECT_EQ(Store1->getValueOperand(), Sel1); 4374 4375 AtomicCmpXchgInst *CmpXchg2 = 4376 dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode()); 4377 EXPECT_NE(CmpXchg2, nullptr); 4378 EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal); 4379 EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr); 4380 EXPECT_EQ(CmpXchg2->getNewValOperand(), D); 4381 ExtractValueInst *ExtVal3 = 4382 dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode()); 4383 EXPECT_NE(ExtVal3, nullptr); 4384 EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2); 4385 EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U)); 4386 StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode()); 4387 EXPECT_NE(Store2, nullptr); 4388 EXPECT_EQ(Store2->getPointerOperand(), VVal); 4389 EXPECT_EQ(Store2->getValueOperand(), ExtVal3); 4390 4391 AtomicCmpXchgInst *CmpXchg3 = 4392 dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode()); 4393 EXPECT_NE(CmpXchg3, nullptr); 4394 EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal); 4395 EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr); 4396 EXPECT_EQ(CmpXchg3->getNewValOperand(), D); 4397 ExtractValueInst *ExtVal4 = 4398 dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode()); 4399 EXPECT_NE(ExtVal4, nullptr); 4400 EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3); 4401 EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U)); 4402 ExtractValueInst *ExtVal5 = 4403 dyn_cast<ExtractValueInst>(ExtVal4->getNextNode()); 4404 EXPECT_NE(ExtVal5, nullptr); 4405 EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3); 4406 EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U)); 4407 BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode()); 4408 EXPECT_NE(Br1, nullptr); 4409 EXPECT_EQ(Br1->isConditional(), true); 4410 EXPECT_EQ(Br1->getCondition(), ExtVal5); 4411 EXPECT_EQ(Br1->getSuccessor(0), Exit1); 4412 EXPECT_EQ(Br1->getSuccessor(1), Cont1); 4413 4414 StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front()); 4415 EXPECT_NE(Store3, nullptr); 4416 EXPECT_EQ(Store3->getPointerOperand(), VVal); 4417 EXPECT_EQ(Store3->getValueOperand(), ExtVal4); 4418 BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode()); 4419 EXPECT_NE(Br2, nullptr); 4420 EXPECT_EQ(Br2->isUnconditional(), true); 4421 EXPECT_EQ(Br2->getSuccessor(0), Exit1); 4422 4423 AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front()); 4424 EXPECT_NE(CmpXchg4, nullptr); 4425 EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal); 4426 EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr); 4427 EXPECT_EQ(CmpXchg4->getNewValOperand(), D); 4428 ExtractValueInst *ExtVal6 = 4429 dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode()); 4430 EXPECT_NE(ExtVal6, nullptr); 4431 EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4); 4432 EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U)); 4433 ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode()); 4434 EXPECT_NE(ZExt1, nullptr); 4435 EXPECT_EQ(ZExt1->getDestTy(), Int32); 4436 StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode()); 4437 EXPECT_NE(Store4, nullptr); 4438 EXPECT_EQ(Store4->getPointerOperand(), RVal); 4439 EXPECT_EQ(Store4->getValueOperand(), ZExt1); 4440 4441 AtomicCmpXchgInst *CmpXchg5 = 4442 dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode()); 4443 EXPECT_NE(CmpXchg5, nullptr); 4444 EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal); 4445 EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr); 4446 EXPECT_EQ(CmpXchg5->getNewValOperand(), D); 4447 ExtractValueInst *ExtVal7 = 4448 dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode()); 4449 EXPECT_NE(ExtVal7, nullptr); 4450 EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5); 4451 EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U)); 4452 ExtractValueInst *ExtVal8 = 4453 dyn_cast<ExtractValueInst>(ExtVal7->getNextNode()); 4454 EXPECT_NE(ExtVal8, nullptr); 4455 EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5); 4456 EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U)); 4457 BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode()); 4458 EXPECT_NE(Br3, nullptr); 4459 EXPECT_EQ(Br3->isConditional(), true); 4460 EXPECT_EQ(Br3->getCondition(), ExtVal8); 4461 EXPECT_EQ(Br3->getSuccessor(0), Exit2); 4462 EXPECT_EQ(Br3->getSuccessor(1), Cont2); 4463 4464 StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front()); 4465 EXPECT_NE(Store5, nullptr); 4466 EXPECT_EQ(Store5->getPointerOperand(), VVal); 4467 EXPECT_EQ(Store5->getValueOperand(), ExtVal7); 4468 BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode()); 4469 EXPECT_NE(Br4, nullptr); 4470 EXPECT_EQ(Br4->isUnconditional(), true); 4471 EXPECT_EQ(Br4->getSuccessor(0), Exit2); 4472 4473 ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front()); 4474 EXPECT_NE(ExtVal9, nullptr); 4475 EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5); 4476 EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U)); 4477 ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode()); 4478 EXPECT_NE(ZExt2, nullptr); 4479 EXPECT_EQ(ZExt2->getDestTy(), Int32); 4480 StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode()); 4481 EXPECT_NE(Store6, nullptr); 4482 EXPECT_EQ(Store6->getPointerOperand(), RVal); 4483 EXPECT_EQ(Store6->getValueOperand(), ZExt2); 4484 4485 AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode()); 4486 EXPECT_NE(ARWM1, nullptr); 4487 EXPECT_EQ(ARWM1->getPointerOperand(), XVal); 4488 EXPECT_EQ(ARWM1->getValOperand(), Expr); 4489 EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min); 4490 StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode()); 4491 EXPECT_NE(Store7, nullptr); 4492 EXPECT_EQ(Store7->getPointerOperand(), VVal); 4493 EXPECT_EQ(Store7->getValueOperand(), ARWM1); 4494 4495 AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode()); 4496 EXPECT_NE(ARWM2, nullptr); 4497 EXPECT_EQ(ARWM2->getPointerOperand(), XVal); 4498 EXPECT_EQ(ARWM2->getValOperand(), Expr); 4499 EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max); 4500 CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode()); 4501 EXPECT_NE(Cmp1, nullptr); 4502 EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT); 4503 EXPECT_EQ(Cmp1->getOperand(0), ARWM2); 4504 EXPECT_EQ(Cmp1->getOperand(1), Expr); 4505 SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode()); 4506 EXPECT_NE(Sel2, nullptr); 4507 EXPECT_EQ(Sel2->getCondition(), Cmp1); 4508 EXPECT_EQ(Sel2->getTrueValue(), Expr); 4509 EXPECT_EQ(Sel2->getFalseValue(), ARWM2); 4510 StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode()); 4511 EXPECT_NE(Store8, nullptr); 4512 EXPECT_EQ(Store8->getPointerOperand(), VVal); 4513 EXPECT_EQ(Store8->getValueOperand(), Sel2); 4514 4515 Builder.CreateRetVoid(); 4516 OMPBuilder.finalize(); 4517 EXPECT_FALSE(verifyModule(*M, &errs())); 4518 } 4519 4520 TEST_F(OpenMPIRBuilderTest, CreateTeams) { 4521 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4522 OpenMPIRBuilder OMPBuilder(*M); 4523 OMPBuilder.Config.IsTargetDevice = false; 4524 OMPBuilder.initialize(); 4525 F->setName("func"); 4526 IRBuilder<> Builder(BB); 4527 4528 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 4529 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 4530 Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load"); 4531 4532 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4533 Builder.restoreIP(AllocaIP); 4534 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 4535 "bodygen.alloca128"); 4536 4537 Builder.restoreIP(CodeGenIP); 4538 // Loading and storing captured pointer and values 4539 Builder.CreateStore(Val128, Local128); 4540 Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 4541 "bodygen.load32"); 4542 4543 LoadInst *PrivLoad128 = Builder.CreateLoad( 4544 Local128->getAllocatedType(), Local128, "bodygen.local.load128"); 4545 Value *Cmp = Builder.CreateICmpNE( 4546 Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); 4547 Instruction *ThenTerm, *ElseTerm; 4548 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 4549 &ThenTerm, &ElseTerm); 4550 return Error::success(); 4551 }; 4552 4553 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4554 ASSERT_EXPECTED_INIT( 4555 OpenMPIRBuilder::InsertPointTy, AfterIP, 4556 OMPBuilder.createTeams(Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, 4557 /*NumTeamsUpper=*/nullptr, 4558 /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); 4559 Builder.restoreIP(AfterIP); 4560 4561 OMPBuilder.finalize(); 4562 Builder.CreateRetVoid(); 4563 4564 EXPECT_FALSE(verifyModule(*M, &errs())); 4565 4566 CallInst *TeamsForkCall = dyn_cast<CallInst>( 4567 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams) 4568 ->user_back()); 4569 4570 // Verify the Ident argument 4571 GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0)); 4572 ASSERT_NE(Ident, nullptr); 4573 EXPECT_TRUE(Ident->hasInitializer()); 4574 Constant *Initializer = Ident->getInitializer(); 4575 GlobalVariable *SrcStrGlob = 4576 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 4577 ASSERT_NE(SrcStrGlob, nullptr); 4578 ConstantDataArray *SrcSrc = 4579 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 4580 ASSERT_NE(SrcSrc, nullptr); 4581 4582 // Verify the outlined function signature. 4583 Function *OutlinedFn = 4584 dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts()); 4585 ASSERT_NE(OutlinedFn, nullptr); 4586 EXPECT_FALSE(OutlinedFn->isDeclaration()); 4587 EXPECT_TRUE(OutlinedFn->arg_size() >= 3); 4588 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid 4589 EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid 4590 EXPECT_EQ(OutlinedFn->getArg(2)->getType(), 4591 Builder.getPtrTy()); // captured args 4592 4593 // Check for TruncInst and ICmpInst in the outlined function. 4594 EXPECT_TRUE(any_of(instructions(OutlinedFn), 4595 [](Instruction &inst) { return isa<TruncInst>(&inst); })); 4596 EXPECT_TRUE(any_of(instructions(OutlinedFn), 4597 [](Instruction &inst) { return isa<ICmpInst>(&inst); })); 4598 } 4599 4600 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) { 4601 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4602 OpenMPIRBuilder OMPBuilder(*M); 4603 OMPBuilder.Config.IsTargetDevice = false; 4604 OMPBuilder.initialize(); 4605 F->setName("func"); 4606 IRBuilder<> &Builder = OMPBuilder.Builder; 4607 Builder.SetInsertPoint(BB); 4608 4609 Function *FakeFunction = 4610 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4611 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4612 4613 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4614 Builder.restoreIP(CodeGenIP); 4615 Builder.CreateCall(FakeFunction, {}); 4616 return Error::success(); 4617 }; 4618 4619 // `F` has an argument - an integer, so we use that as the thread limit. 4620 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4621 OMPBuilder.createTeams( 4622 /*=*/Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, 4623 /*NumTeamsUpper=*/nullptr, 4624 /*ThreadLimit=*/F->arg_begin(), 4625 /*IfExpr=*/nullptr)); 4626 Builder.restoreIP(AfterIP); 4627 4628 Builder.CreateRetVoid(); 4629 OMPBuilder.finalize(); 4630 4631 ASSERT_FALSE(verifyModule(*M)); 4632 4633 CallInst *PushNumTeamsCallInst = 4634 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4635 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4636 4637 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0)); 4638 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0)); 4639 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin()); 4640 4641 // Verifying that the next instruction to execute is kmpc_fork_teams 4642 BranchInst *BrInst = 4643 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4644 ASSERT_NE(BrInst, nullptr); 4645 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4646 Instruction *NextInstruction = 4647 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4648 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4649 ASSERT_NE(ForkTeamsCI, nullptr); 4650 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4651 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4652 } 4653 4654 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) { 4655 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4656 OpenMPIRBuilder OMPBuilder(*M); 4657 OMPBuilder.Config.IsTargetDevice = false; 4658 OMPBuilder.initialize(); 4659 F->setName("func"); 4660 IRBuilder<> &Builder = OMPBuilder.Builder; 4661 Builder.SetInsertPoint(BB); 4662 4663 Function *FakeFunction = 4664 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4665 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4666 4667 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4668 Builder.restoreIP(CodeGenIP); 4669 Builder.CreateCall(FakeFunction, {}); 4670 return Error::success(); 4671 }; 4672 4673 // `F` already has an integer argument, so we use that as upper bound to 4674 // `num_teams` 4675 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4676 OMPBuilder.createTeams(Builder, BodyGenCB, 4677 /*NumTeamsLower=*/nullptr, 4678 /*NumTeamsUpper=*/F->arg_begin(), 4679 /*ThreadLimit=*/nullptr, 4680 /*IfExpr=*/nullptr)); 4681 Builder.restoreIP(AfterIP); 4682 4683 Builder.CreateRetVoid(); 4684 OMPBuilder.finalize(); 4685 4686 ASSERT_FALSE(verifyModule(*M)); 4687 4688 CallInst *PushNumTeamsCallInst = 4689 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4690 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4691 4692 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin()); 4693 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin()); 4694 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0)); 4695 4696 // Verifying that the next instruction to execute is kmpc_fork_teams 4697 BranchInst *BrInst = 4698 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4699 ASSERT_NE(BrInst, nullptr); 4700 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4701 Instruction *NextInstruction = 4702 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4703 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4704 ASSERT_NE(ForkTeamsCI, nullptr); 4705 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4706 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4707 } 4708 4709 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) { 4710 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4711 OpenMPIRBuilder OMPBuilder(*M); 4712 OMPBuilder.Config.IsTargetDevice = false; 4713 OMPBuilder.initialize(); 4714 F->setName("func"); 4715 IRBuilder<> &Builder = OMPBuilder.Builder; 4716 Builder.SetInsertPoint(BB); 4717 4718 Function *FakeFunction = 4719 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4720 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4721 4722 Value *NumTeamsLower = 4723 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower"); 4724 Value *NumTeamsUpper = 4725 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); 4726 4727 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4728 Builder.restoreIP(CodeGenIP); 4729 Builder.CreateCall(FakeFunction, {}); 4730 return Error::success(); 4731 }; 4732 4733 // `F` already has an integer argument, so we use that as upper bound to 4734 // `num_teams` 4735 ASSERT_EXPECTED_INIT( 4736 OpenMPIRBuilder::InsertPointTy, AfterIP, 4737 OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, 4738 /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); 4739 Builder.restoreIP(AfterIP); 4740 4741 Builder.CreateRetVoid(); 4742 OMPBuilder.finalize(); 4743 4744 ASSERT_FALSE(verifyModule(*M)); 4745 4746 CallInst *PushNumTeamsCallInst = 4747 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4748 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4749 4750 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower); 4751 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper); 4752 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0)); 4753 4754 // Verifying that the next instruction to execute is kmpc_fork_teams 4755 BranchInst *BrInst = 4756 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4757 ASSERT_NE(BrInst, nullptr); 4758 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4759 Instruction *NextInstruction = 4760 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4761 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4762 ASSERT_NE(ForkTeamsCI, nullptr); 4763 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4764 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4765 } 4766 4767 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { 4768 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4769 OpenMPIRBuilder OMPBuilder(*M); 4770 OMPBuilder.Config.IsTargetDevice = false; 4771 OMPBuilder.initialize(); 4772 F->setName("func"); 4773 IRBuilder<> &Builder = OMPBuilder.Builder; 4774 Builder.SetInsertPoint(BB); 4775 4776 BasicBlock *CodegenBB = splitBB(Builder, true); 4777 Builder.SetInsertPoint(CodegenBB); 4778 4779 // Generate values for `num_teams` and `thread_limit` using the first argument 4780 // of the testing function. 4781 Value *NumTeamsLower = 4782 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower"); 4783 Value *NumTeamsUpper = 4784 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); 4785 Value *ThreadLimit = 4786 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit"); 4787 4788 Function *FakeFunction = 4789 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4790 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4791 4792 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4793 Builder.restoreIP(CodeGenIP); 4794 Builder.CreateCall(FakeFunction, {}); 4795 return Error::success(); 4796 }; 4797 4798 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 4799 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4800 OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, 4801 NumTeamsUpper, ThreadLimit, 4802 nullptr)); 4803 Builder.restoreIP(AfterIP); 4804 4805 Builder.CreateRetVoid(); 4806 OMPBuilder.finalize(); 4807 4808 ASSERT_FALSE(verifyModule(*M)); 4809 4810 CallInst *PushNumTeamsCallInst = 4811 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4812 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4813 4814 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower); 4815 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper); 4816 EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit); 4817 4818 // Verifying that the next instruction to execute is kmpc_fork_teams 4819 BranchInst *BrInst = 4820 dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction()); 4821 ASSERT_NE(BrInst, nullptr); 4822 ASSERT_EQ(BrInst->getNumSuccessors(), 1U); 4823 Instruction *NextInstruction = 4824 BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); 4825 CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction); 4826 ASSERT_NE(ForkTeamsCI, nullptr); 4827 EXPECT_EQ(ForkTeamsCI->getCalledFunction(), 4828 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)); 4829 } 4830 4831 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) { 4832 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4833 OpenMPIRBuilder OMPBuilder(*M); 4834 OMPBuilder.Config.IsTargetDevice = false; 4835 OMPBuilder.initialize(); 4836 F->setName("func"); 4837 IRBuilder<> &Builder = OMPBuilder.Builder; 4838 Builder.SetInsertPoint(BB); 4839 4840 Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(), 4841 Builder.CreateAlloca(Builder.getInt1Ty())); 4842 4843 Function *FakeFunction = 4844 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4845 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4846 4847 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4848 Builder.restoreIP(CodeGenIP); 4849 Builder.CreateCall(FakeFunction, {}); 4850 return Error::success(); 4851 }; 4852 4853 // `F` already has an integer argument, so we use that as upper bound to 4854 // `num_teams` 4855 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4856 OMPBuilder.createTeams(Builder, BodyGenCB, 4857 /*NumTeamsLower=*/nullptr, 4858 /*NumTeamsUpper=*/nullptr, 4859 /*ThreadLimit=*/nullptr, IfExpr)); 4860 Builder.restoreIP(AfterIP); 4861 4862 Builder.CreateRetVoid(); 4863 OMPBuilder.finalize(); 4864 4865 ASSERT_FALSE(verifyModule(*M)); 4866 4867 CallInst *PushNumTeamsCallInst = 4868 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4869 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4870 Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2); 4871 Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3); 4872 Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4); 4873 4874 // Check the lower_bound 4875 ASSERT_NE(NumTeamsLower, nullptr); 4876 SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower); 4877 ASSERT_NE(NumTeamsLowerSelectInst, nullptr); 4878 EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr); 4879 EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0)); 4880 EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); 4881 4882 // Check the upper_bound 4883 ASSERT_NE(NumTeamsUpper, nullptr); 4884 SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper); 4885 ASSERT_NE(NumTeamsUpperSelectInst, nullptr); 4886 EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr); 4887 EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0)); 4888 EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); 4889 4890 // Check thread_limit 4891 EXPECT_EQ(ThreadLimit, Builder.getInt32(0)); 4892 } 4893 4894 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) { 4895 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 4896 OpenMPIRBuilder OMPBuilder(*M); 4897 OMPBuilder.Config.IsTargetDevice = false; 4898 OMPBuilder.initialize(); 4899 F->setName("func"); 4900 IRBuilder<> &Builder = OMPBuilder.Builder; 4901 Builder.SetInsertPoint(BB); 4902 4903 Value *IfExpr = Builder.CreateLoad( 4904 Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty())); 4905 Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5)); 4906 Value *NumTeamsUpper = 4907 Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10)); 4908 Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20)); 4909 4910 Function *FakeFunction = 4911 Function::Create(FunctionType::get(Builder.getVoidTy(), false), 4912 GlobalValue::ExternalLinkage, "fakeFunction", M.get()); 4913 4914 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 4915 Builder.restoreIP(CodeGenIP); 4916 Builder.CreateCall(FakeFunction, {}); 4917 return Error::success(); 4918 }; 4919 4920 // `F` already has an integer argument, so we use that as upper bound to 4921 // `num_teams` 4922 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 4923 OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, 4924 NumTeamsUpper, ThreadLimit, 4925 IfExpr)); 4926 Builder.restoreIP(AfterIP); 4927 4928 Builder.CreateRetVoid(); 4929 OMPBuilder.finalize(); 4930 4931 ASSERT_FALSE(verifyModule(*M)); 4932 4933 CallInst *PushNumTeamsCallInst = 4934 findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder); 4935 ASSERT_NE(PushNumTeamsCallInst, nullptr); 4936 Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2); 4937 Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3); 4938 Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4); 4939 4940 // Get the boolean conversion of if expression 4941 ASSERT_EQ(IfExpr->getNumUses(), 1U); 4942 User *IfExprInst = IfExpr->user_back(); 4943 ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst); 4944 ASSERT_NE(IfExprCmpInst, nullptr); 4945 EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE); 4946 EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr); 4947 EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0)); 4948 4949 // Check the lower_bound 4950 ASSERT_NE(NumTeamsLowerArg, nullptr); 4951 SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg); 4952 ASSERT_NE(NumTeamsLowerSelectInst, nullptr); 4953 EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst); 4954 EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower); 4955 EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1)); 4956 4957 // Check the upper_bound 4958 ASSERT_NE(NumTeamsUpperArg, nullptr); 4959 SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg); 4960 ASSERT_NE(NumTeamsUpperSelectInst, nullptr); 4961 EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst); 4962 EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper); 4963 EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1)); 4964 4965 // Check thread_limit 4966 EXPECT_EQ(ThreadLimitArg, ThreadLimit); 4967 } 4968 4969 /// Returns the single instruction of InstTy type in BB that uses the value V. 4970 /// If there is more than one such instruction, returns null. 4971 template <typename InstTy> 4972 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) { 4973 InstTy *Result = nullptr; 4974 for (User *U : V->users()) { 4975 auto *Inst = dyn_cast<InstTy>(U); 4976 if (!Inst || Inst->getParent() != BB) 4977 continue; 4978 if (Result) { 4979 if (auto *SI = dyn_cast<StoreInst>(Inst)) { 4980 if (V == SI->getValueOperand()) 4981 continue; 4982 } else { 4983 return nullptr; 4984 } 4985 } 4986 Result = Inst; 4987 } 4988 return Result; 4989 } 4990 4991 /// Returns true if BB contains a simple binary reduction that loads a value 4992 /// from Accum, performs some binary operation with it, and stores it back to 4993 /// Accum. 4994 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB, 4995 Instruction::BinaryOps *OpCode = nullptr) { 4996 StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB); 4997 if (!Store) 4998 return false; 4999 auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0)); 5000 if (!Stored) 5001 return false; 5002 if (OpCode && *OpCode != Stored->getOpcode()) 5003 return false; 5004 auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0)); 5005 return Load && Load->getOperand(0) == Accum; 5006 } 5007 5008 /// Returns true if BB contains a binary reduction that reduces V using a binary 5009 /// operator into an accumulator that is a function argument. 5010 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) { 5011 auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB); 5012 if (!ReductionOp) 5013 return false; 5014 5015 auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0)); 5016 if (!GlobalLoad) 5017 return false; 5018 5019 auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB); 5020 if (!Store) 5021 return false; 5022 5023 return Store->getPointerOperand() == GlobalLoad->getPointerOperand() && 5024 isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand())); 5025 } 5026 5027 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and 5028 /// [0, 1], respectively, and assigns results of these instructions to Zero and 5029 /// One. Returns true on success, false on failure or if such instructions are 5030 /// not unique among the users of Ptr. 5031 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) { 5032 Zero = nullptr; 5033 One = nullptr; 5034 for (User *U : Ptr->users()) { 5035 if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { 5036 if (GEP->getNumIndices() != 2) 5037 continue; 5038 auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 5039 auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2)); 5040 EXPECT_NE(FirstIdx, nullptr); 5041 EXPECT_NE(SecondIdx, nullptr); 5042 5043 EXPECT_TRUE(FirstIdx->isZero()); 5044 if (SecondIdx->isZero()) { 5045 if (Zero) 5046 return false; 5047 Zero = GEP; 5048 } else if (SecondIdx->isOne()) { 5049 if (One) 5050 return false; 5051 One = GEP; 5052 } else { 5053 return false; 5054 } 5055 } 5056 } 5057 return Zero != nullptr && One != nullptr; 5058 } 5059 5060 static OpenMPIRBuilder::InsertPointTy 5061 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS, 5062 Value *&Result) { 5063 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 5064 Result = Builder.CreateFAdd(LHS, RHS, "red.add"); 5065 return Builder.saveIP(); 5066 } 5067 5068 static OpenMPIRBuilder::InsertPointTy 5069 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, 5070 Value *RHS) { 5071 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 5072 Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial"); 5073 Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt, 5074 AtomicOrdering::Monotonic); 5075 return Builder.saveIP(); 5076 } 5077 5078 static OpenMPIRBuilder::InsertPointTy 5079 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS, 5080 Value *&Result) { 5081 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 5082 Result = Builder.CreateXor(LHS, RHS, "red.xor"); 5083 return Builder.saveIP(); 5084 } 5085 5086 static OpenMPIRBuilder::InsertPointTy 5087 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, 5088 Value *RHS) { 5089 IRBuilder<> Builder(IP.getBlock(), IP.getPoint()); 5090 Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial"); 5091 Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt, 5092 AtomicOrdering::Monotonic); 5093 return Builder.saveIP(); 5094 } 5095 5096 TEST_F(OpenMPIRBuilderTest, CreateReductions) { 5097 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5098 OpenMPIRBuilder OMPBuilder(*M); 5099 OMPBuilder.Config.IsTargetDevice = false; 5100 OMPBuilder.initialize(); 5101 F->setName("func"); 5102 IRBuilder<> Builder(BB); 5103 5104 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 5105 Builder.CreateBr(EnterBB); 5106 Builder.SetInsertPoint(EnterBB); 5107 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5108 5109 // Create variables to be reduced. 5110 InsertPointTy OuterAllocaIP(&F->getEntryBlock(), 5111 F->getEntryBlock().getFirstInsertionPt()); 5112 Type *SumType = Builder.getFloatTy(); 5113 Type *XorType = Builder.getInt32Ty(); 5114 Value *SumReduced; 5115 Value *XorReduced; 5116 { 5117 IRBuilderBase::InsertPointGuard Guard(Builder); 5118 Builder.restoreIP(OuterAllocaIP); 5119 SumReduced = Builder.CreateAlloca(SumType); 5120 XorReduced = Builder.CreateAlloca(XorType); 5121 } 5122 5123 // Store initial values of reductions into global variables. 5124 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced); 5125 Builder.CreateStore(Builder.getInt32(1), XorReduced); 5126 5127 // The loop body computes two reductions: 5128 // sum of (float) thread-id; 5129 // xor of thread-id; 5130 // and store the result in global variables. 5131 InsertPointTy BodyIP, BodyAllocaIP; 5132 auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) { 5133 IRBuilderBase::InsertPointGuard Guard(Builder); 5134 Builder.restoreIP(CodeGenIP); 5135 5136 uint32_t StrSize; 5137 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5138 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5139 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5140 Value *SumLocal = 5141 Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); 5142 Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial"); 5143 Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); 5144 Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum"); 5145 Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); 5146 Builder.CreateStore(Sum, SumReduced); 5147 Builder.CreateStore(Xor, XorReduced); 5148 5149 BodyIP = Builder.saveIP(); 5150 BodyAllocaIP = InnerAllocaIP; 5151 return Error::success(); 5152 }; 5153 5154 // Privatization for reduction creates local copies of reduction variables and 5155 // initializes them to reduction-neutral values. 5156 Value *SumPrivatized; 5157 Value *XorPrivatized; 5158 auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, 5159 Value &Original, Value &Inner, Value *&ReplVal) { 5160 IRBuilderBase::InsertPointGuard Guard(Builder); 5161 Builder.restoreIP(InnerAllocaIP); 5162 if (&Original == SumReduced) { 5163 SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy()); 5164 ReplVal = SumPrivatized; 5165 } else if (&Original == XorReduced) { 5166 XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty()); 5167 ReplVal = XorPrivatized; 5168 } else { 5169 ReplVal = &Inner; 5170 return CodeGenIP; 5171 } 5172 5173 Builder.restoreIP(CodeGenIP); 5174 if (&Original == SumReduced) 5175 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), 5176 SumPrivatized); 5177 else if (&Original == XorReduced) 5178 Builder.CreateStore(Builder.getInt32(0), XorPrivatized); 5179 5180 return Builder.saveIP(); 5181 }; 5182 5183 // Do nothing in finalization. 5184 auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; 5185 5186 ASSERT_EXPECTED_INIT( 5187 OpenMPIRBuilder::InsertPointTy, AfterIP, 5188 OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, 5189 /* IfCondition */ nullptr, 5190 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5191 /* IsCancellable */ false)); 5192 Builder.restoreIP(AfterIP); 5193 5194 OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { 5195 {SumType, SumReduced, SumPrivatized, 5196 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, 5197 /*ReductionGenClang=*/nullptr, sumAtomicReduction}, 5198 {XorType, XorReduced, XorPrivatized, 5199 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction, 5200 /*ReductionGenClang=*/nullptr, xorAtomicReduction}}; 5201 OMPBuilder.Config.setIsGPU(false); 5202 5203 bool ReduceVariableByRef[] = {false, false}; 5204 ASSERT_THAT_EXPECTED(OMPBuilder.createReductions(BodyIP, BodyAllocaIP, 5205 ReductionInfos, 5206 ReduceVariableByRef), 5207 Succeeded()); 5208 5209 Builder.restoreIP(AfterIP); 5210 Builder.CreateRetVoid(); 5211 5212 OMPBuilder.finalize(F); 5213 5214 // The IR must be valid. 5215 EXPECT_FALSE(verifyModule(*M)); 5216 5217 // Outlining must have happened. 5218 SmallVector<CallInst *> ForkCalls; 5219 findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder, 5220 ForkCalls); 5221 ASSERT_EQ(ForkCalls.size(), 1u); 5222 Value *CalleeVal = ForkCalls[0]->getOperand(2); 5223 Function *Outlined = dyn_cast<Function>(CalleeVal); 5224 EXPECT_NE(Outlined, nullptr); 5225 5226 // Check that the lock variable was created with the expected name. 5227 GlobalVariable *LockVar = 5228 M->getGlobalVariable(".gomp_critical_user_.reduction.var"); 5229 EXPECT_NE(LockVar, nullptr); 5230 5231 // Find the allocation of a local array that will be used to call the runtime 5232 // reduciton function. 5233 BasicBlock &AllocBlock = Outlined->getEntryBlock(); 5234 Value *LocalArray = nullptr; 5235 for (Instruction &I : AllocBlock) { 5236 if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) { 5237 if (!Alloc->getAllocatedType()->isArrayTy() || 5238 !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy()) 5239 continue; 5240 LocalArray = Alloc; 5241 break; 5242 } 5243 } 5244 ASSERT_NE(LocalArray, nullptr); 5245 5246 // Find the call to the runtime reduction function. 5247 BasicBlock *BB = AllocBlock.getUniqueSuccessor(); 5248 Value *LocalArrayPtr = nullptr; 5249 Value *ReductionFnVal = nullptr; 5250 Value *SwitchArg = nullptr; 5251 for (Instruction &I : *BB) { 5252 if (CallInst *Call = dyn_cast<CallInst>(&I)) { 5253 if (Call->getCalledFunction() != 5254 OMPBuilder.getOrCreateRuntimeFunctionPtr( 5255 RuntimeFunction::OMPRTL___kmpc_reduce)) 5256 continue; 5257 LocalArrayPtr = Call->getOperand(4); 5258 ReductionFnVal = Call->getOperand(5); 5259 SwitchArg = Call; 5260 break; 5261 } 5262 } 5263 5264 // Check that the local array is passed to the function. 5265 ASSERT_NE(LocalArrayPtr, nullptr); 5266 EXPECT_EQ(LocalArrayPtr, LocalArray); 5267 5268 // Find the GEP instructions preceding stores to the local array. 5269 Value *FirstArrayElemPtr = nullptr; 5270 Value *SecondArrayElemPtr = nullptr; 5271 EXPECT_EQ(LocalArray->getNumUses(), 3u); 5272 ASSERT_TRUE( 5273 findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr)); 5274 5275 // Check that the values stored into the local array are privatized reduction 5276 // variables. 5277 auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>( 5278 findStoredValue<GetElementPtrInst>(FirstArrayElemPtr)); 5279 auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>( 5280 findStoredValue<GetElementPtrInst>(SecondArrayElemPtr)); 5281 ASSERT_NE(FirstPrivatized, nullptr); 5282 ASSERT_NE(SecondPrivatized, nullptr); 5283 ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr)); 5284 EXPECT_TRUE(isSimpleBinaryReduction( 5285 FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent())); 5286 EXPECT_TRUE(isSimpleBinaryReduction( 5287 SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent())); 5288 5289 // Check that the result of the runtime reduction call is used for further 5290 // dispatch. 5291 ASSERT_EQ(SwitchArg->getNumUses(), 1u); 5292 SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin()); 5293 ASSERT_NE(Switch, nullptr); 5294 EXPECT_EQ(Switch->getNumSuccessors(), 3u); 5295 BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor(); 5296 BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor(); 5297 5298 // Non-atomic block contains reductions to the global reduction variable, 5299 // which is passed into the outlined function as an argument. 5300 Value *FirstLoad = 5301 findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB); 5302 Value *SecondLoad = 5303 findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB); 5304 EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB)); 5305 EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB)); 5306 5307 // Atomic block also constains reductions to the global reduction variable. 5308 FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB); 5309 SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB); 5310 auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB); 5311 auto *SecondAtomic = 5312 findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB); 5313 ASSERT_NE(FirstAtomic, nullptr); 5314 Value *AtomicStorePointer = FirstAtomic->getPointerOperand(); 5315 EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer))); 5316 ASSERT_NE(SecondAtomic, nullptr); 5317 AtomicStorePointer = SecondAtomic->getPointerOperand(); 5318 EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer))); 5319 5320 // Check that the separate reduction function also performs (non-atomic) 5321 // reductions after extracting reduction variables from its arguments. 5322 Function *ReductionFn = cast<Function>(ReductionFnVal); 5323 BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock(); 5324 Value *FirstLHSPtr; 5325 Value *SecondLHSPtr; 5326 ASSERT_TRUE( 5327 findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr)); 5328 Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB); 5329 ASSERT_NE(Opaque, nullptr); 5330 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB)); 5331 Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB); 5332 ASSERT_NE(Opaque, nullptr); 5333 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB)); 5334 5335 Value *FirstRHS; 5336 Value *SecondRHS; 5337 EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS)); 5338 } 5339 5340 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { 5341 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5342 OpenMPIRBuilder OMPBuilder(*M); 5343 OMPBuilder.Config.IsTargetDevice = false; 5344 OMPBuilder.initialize(); 5345 F->setName("func"); 5346 IRBuilder<> Builder(BB); 5347 5348 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); 5349 Builder.CreateBr(EnterBB); 5350 Builder.SetInsertPoint(EnterBB); 5351 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5352 5353 // Create variables to be reduced. 5354 InsertPointTy OuterAllocaIP(&F->getEntryBlock(), 5355 F->getEntryBlock().getFirstInsertionPt()); 5356 Type *SumType = Builder.getFloatTy(); 5357 Type *XorType = Builder.getInt32Ty(); 5358 Value *SumReduced; 5359 Value *XorReduced; 5360 { 5361 IRBuilderBase::InsertPointGuard Guard(Builder); 5362 Builder.restoreIP(OuterAllocaIP); 5363 SumReduced = Builder.CreateAlloca(SumType); 5364 XorReduced = Builder.CreateAlloca(XorType); 5365 } 5366 5367 // Store initial values of reductions into global variables. 5368 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced); 5369 Builder.CreateStore(Builder.getInt32(1), XorReduced); 5370 5371 InsertPointTy FirstBodyIP, FirstBodyAllocaIP; 5372 auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP, 5373 InsertPointTy CodeGenIP) { 5374 IRBuilderBase::InsertPointGuard Guard(Builder); 5375 Builder.restoreIP(CodeGenIP); 5376 5377 uint32_t StrSize; 5378 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5379 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5380 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5381 Value *SumLocal = 5382 Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local"); 5383 Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial"); 5384 Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum"); 5385 Builder.CreateStore(Sum, SumReduced); 5386 5387 FirstBodyIP = Builder.saveIP(); 5388 FirstBodyAllocaIP = InnerAllocaIP; 5389 return Error::success(); 5390 }; 5391 5392 InsertPointTy SecondBodyIP, SecondBodyAllocaIP; 5393 auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP, 5394 InsertPointTy CodeGenIP) { 5395 IRBuilderBase::InsertPointGuard Guard(Builder); 5396 Builder.restoreIP(CodeGenIP); 5397 5398 uint32_t StrSize; 5399 Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize); 5400 Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize); 5401 Value *TID = OMPBuilder.getOrCreateThreadID(Ident); 5402 Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial"); 5403 Value *Xor = Builder.CreateXor(XorPartial, TID, "xor"); 5404 Builder.CreateStore(Xor, XorReduced); 5405 5406 SecondBodyIP = Builder.saveIP(); 5407 SecondBodyAllocaIP = InnerAllocaIP; 5408 return Error::success(); 5409 }; 5410 5411 // Privatization for reduction creates local copies of reduction variables and 5412 // initializes them to reduction-neutral values. The same privatization 5413 // callback is used for both loops, with dispatch based on the value being 5414 // privatized. 5415 Value *SumPrivatized; 5416 Value *XorPrivatized; 5417 auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP, 5418 Value &Original, Value &Inner, Value *&ReplVal) { 5419 IRBuilderBase::InsertPointGuard Guard(Builder); 5420 Builder.restoreIP(InnerAllocaIP); 5421 if (&Original == SumReduced) { 5422 SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy()); 5423 ReplVal = SumPrivatized; 5424 } else if (&Original == XorReduced) { 5425 XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty()); 5426 ReplVal = XorPrivatized; 5427 } else { 5428 ReplVal = &Inner; 5429 return CodeGenIP; 5430 } 5431 5432 Builder.restoreIP(CodeGenIP); 5433 if (&Original == SumReduced) 5434 Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), 5435 SumPrivatized); 5436 else if (&Original == XorReduced) 5437 Builder.CreateStore(Builder.getInt32(0), XorPrivatized); 5438 5439 return Builder.saveIP(); 5440 }; 5441 5442 // Do nothing in finalization. 5443 auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; 5444 5445 ASSERT_EXPECTED_INIT( 5446 OpenMPIRBuilder::InsertPointTy, AfterIP1, 5447 OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, 5448 FiniCB, /* IfCondition */ nullptr, 5449 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5450 /* IsCancellable */ false)); 5451 Builder.restoreIP(AfterIP1); 5452 ASSERT_EXPECTED_INIT( 5453 OpenMPIRBuilder::InsertPointTy, AfterIP2, 5454 OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP, 5455 SecondBodyGenCB, PrivCB, FiniCB, 5456 /* IfCondition */ nullptr, 5457 /* NumThreads */ nullptr, OMP_PROC_BIND_default, 5458 /* IsCancellable */ false)); 5459 Builder.restoreIP(AfterIP2); 5460 5461 OMPBuilder.Config.setIsGPU(false); 5462 bool ReduceVariableByRef[] = {false}; 5463 5464 ASSERT_THAT_EXPECTED( 5465 OMPBuilder.createReductions( 5466 FirstBodyIP, FirstBodyAllocaIP, 5467 {{SumType, SumReduced, SumPrivatized, 5468 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, 5469 /*ReductionGenClang=*/nullptr, sumAtomicReduction}}, 5470 ReduceVariableByRef), 5471 Succeeded()); 5472 ASSERT_THAT_EXPECTED( 5473 OMPBuilder.createReductions( 5474 SecondBodyIP, SecondBodyAllocaIP, 5475 {{XorType, XorReduced, XorPrivatized, 5476 /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction, 5477 /*ReductionGenClang=*/nullptr, xorAtomicReduction}}, 5478 ReduceVariableByRef), 5479 Succeeded()); 5480 5481 Builder.restoreIP(AfterIP2); 5482 Builder.CreateRetVoid(); 5483 5484 OMPBuilder.finalize(F); 5485 5486 // The IR must be valid. 5487 EXPECT_FALSE(verifyModule(*M)); 5488 5489 // Two different outlined functions must have been created. 5490 SmallVector<CallInst *> ForkCalls; 5491 findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder, 5492 ForkCalls); 5493 ASSERT_EQ(ForkCalls.size(), 2u); 5494 Value *CalleeVal = ForkCalls[0]->getOperand(2); 5495 Function *FirstCallee = cast<Function>(CalleeVal); 5496 CalleeVal = ForkCalls[1]->getOperand(2); 5497 Function *SecondCallee = cast<Function>(CalleeVal); 5498 EXPECT_NE(FirstCallee, SecondCallee); 5499 5500 // Two different reduction functions must have been created. 5501 SmallVector<CallInst *> ReduceCalls; 5502 findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder, 5503 ReduceCalls); 5504 ASSERT_EQ(ReduceCalls.size(), 1u); 5505 auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5)); 5506 ReduceCalls.clear(); 5507 findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, 5508 OMPBuilder, ReduceCalls); 5509 auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5)); 5510 EXPECT_NE(AddReduction, XorReduction); 5511 5512 // Each reduction function does its own kind of reduction. 5513 BasicBlock *FnReductionBB = &AddReduction->getEntryBlock(); 5514 Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>( 5515 AddReduction->getArg(0), FnReductionBB); 5516 ASSERT_NE(FirstLHSPtr, nullptr); 5517 Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB); 5518 ASSERT_NE(Opaque, nullptr); 5519 Instruction::BinaryOps Opcode = Instruction::FAdd; 5520 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode)); 5521 5522 FnReductionBB = &XorReduction->getEntryBlock(); 5523 Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>( 5524 XorReduction->getArg(0), FnReductionBB); 5525 ASSERT_NE(FirstLHSPtr, nullptr); 5526 Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB); 5527 ASSERT_NE(Opaque, nullptr); 5528 Opcode = Instruction::Xor; 5529 EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode)); 5530 } 5531 5532 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { 5533 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5534 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5535 OpenMPIRBuilder OMPBuilder(*M); 5536 OMPBuilder.initialize(); 5537 F->setName("func"); 5538 IRBuilder<> Builder(BB); 5539 5540 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F); 5541 Builder.CreateBr(EnterBB); 5542 Builder.SetInsertPoint(EnterBB); 5543 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5544 5545 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5546 llvm::SmallVector<BasicBlock *, 4> CaseBBs; 5547 5548 auto FiniCB = [&](InsertPointTy IP) { return Error::success(); }; 5549 auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 5550 return Error::success(); 5551 }; 5552 SectionCBVector.push_back(SectionCB); 5553 5554 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5555 llvm::Value &, llvm::Value &Val, 5556 llvm::Value *&ReplVal) { return CodeGenIP; }; 5557 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5558 F->getEntryBlock().getFirstInsertionPt()); 5559 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 5560 OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5561 PrivCB, FiniCB, false, false)); 5562 Builder.restoreIP(AfterIP); 5563 Builder.CreateRetVoid(); // Required at the end of the function 5564 EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); 5565 EXPECT_FALSE(verifyModule(*M, &errs())); 5566 } 5567 5568 TEST_F(OpenMPIRBuilderTest, CreateSections) { 5569 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5570 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5571 OpenMPIRBuilder OMPBuilder(*M); 5572 OMPBuilder.initialize(); 5573 F->setName("func"); 5574 IRBuilder<> Builder(BB); 5575 5576 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5577 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5578 llvm::SmallVector<BasicBlock *, 4> CaseBBs; 5579 5580 BasicBlock *SwitchBB = nullptr; 5581 AllocaInst *PrivAI = nullptr; 5582 SwitchInst *Switch = nullptr; 5583 5584 unsigned NumBodiesGenerated = 0; 5585 unsigned NumFiniCBCalls = 0; 5586 PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); 5587 5588 auto FiniCB = [&](InsertPointTy IP) { 5589 ++NumFiniCBCalls; 5590 BasicBlock *IPBB = IP.getBlock(); 5591 EXPECT_NE(IPBB->end(), IP.getPoint()); 5592 }; 5593 5594 auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 5595 ++NumBodiesGenerated; 5596 CaseBBs.push_back(CodeGenIP.getBlock()); 5597 SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor(); 5598 Builder.restoreIP(CodeGenIP); 5599 Builder.CreateStore(F->arg_begin(), PrivAI); 5600 Value *PrivLoad = 5601 Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca"); 5602 Builder.CreateICmpNE(F->arg_begin(), PrivLoad); 5603 return Error::success(); 5604 }; 5605 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5606 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { 5607 // TODO: Privatization not implemented yet 5608 return CodeGenIP; 5609 }; 5610 5611 SectionCBVector.push_back(SectionCB); 5612 SectionCBVector.push_back(SectionCB); 5613 5614 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5615 F->getEntryBlock().getFirstInsertionPt()); 5616 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 5617 OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5618 PrivCB, FINICB_WRAPPER(FiniCB), 5619 false, false)); 5620 Builder.restoreIP(AfterIP); 5621 Builder.CreateRetVoid(); // Required at the end of the function 5622 5623 // Switch BB's predecessor is loop condition BB, whose successor at index 1 is 5624 // loop's exit BB 5625 BasicBlock *ForExitBB = 5626 SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1); 5627 EXPECT_NE(ForExitBB, nullptr); 5628 5629 EXPECT_NE(PrivAI, nullptr); 5630 Function *OutlinedFn = PrivAI->getFunction(); 5631 EXPECT_EQ(F, OutlinedFn); 5632 EXPECT_FALSE(verifyModule(*M, &errs())); 5633 EXPECT_EQ(OutlinedFn->arg_size(), 1U); 5634 5635 BasicBlock *LoopPreheaderBB = 5636 OutlinedFn->getEntryBlock().getSingleSuccessor(); 5637 // loop variables are 5 - lower bound, upper bound, stride, islastiter, and 5638 // iterator/counter 5639 bool FoundForInit = false; 5640 for (Instruction &Inst : *LoopPreheaderBB) { 5641 if (isa<CallInst>(Inst)) { 5642 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5643 "__kmpc_for_static_init_4u") { 5644 FoundForInit = true; 5645 } 5646 } 5647 } 5648 EXPECT_EQ(FoundForInit, true); 5649 5650 bool FoundForExit = false; 5651 bool FoundBarrier = false; 5652 for (Instruction &Inst : *ForExitBB) { 5653 if (isa<CallInst>(Inst)) { 5654 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5655 "__kmpc_for_static_fini") { 5656 FoundForExit = true; 5657 } 5658 if (cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5659 "__kmpc_barrier") { 5660 FoundBarrier = true; 5661 } 5662 if (FoundForExit && FoundBarrier) 5663 break; 5664 } 5665 } 5666 EXPECT_EQ(FoundForExit, true); 5667 EXPECT_EQ(FoundBarrier, true); 5668 5669 EXPECT_NE(SwitchBB, nullptr); 5670 EXPECT_NE(SwitchBB->getTerminator(), nullptr); 5671 EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true); 5672 Switch = cast<SwitchInst>(SwitchBB->getTerminator()); 5673 EXPECT_EQ(Switch->getNumCases(), 2U); 5674 5675 EXPECT_EQ(CaseBBs.size(), 2U); 5676 for (auto *&CaseBB : CaseBBs) { 5677 EXPECT_EQ(CaseBB->getParent(), OutlinedFn); 5678 } 5679 5680 ASSERT_EQ(NumBodiesGenerated, 2U); 5681 ASSERT_EQ(NumFiniCBCalls, 1U); 5682 EXPECT_FALSE(verifyModule(*M, &errs())); 5683 } 5684 5685 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) { 5686 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5687 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; 5688 OpenMPIRBuilder OMPBuilder(*M); 5689 OMPBuilder.initialize(); 5690 F->setName("func"); 5691 IRBuilder<> Builder(BB); 5692 5693 BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F); 5694 Builder.CreateBr(EnterBB); 5695 Builder.SetInsertPoint(EnterBB); 5696 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5697 5698 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5699 F->getEntryBlock().getFirstInsertionPt()); 5700 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; 5701 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 5702 llvm::Value &, llvm::Value &Val, 5703 llvm::Value *&ReplVal) { return CodeGenIP; }; 5704 auto FiniCB = [&](InsertPointTy IP) { return Error::success(); }; 5705 5706 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 5707 OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, 5708 PrivCB, FiniCB, false, true)); 5709 Builder.restoreIP(AfterIP); 5710 Builder.CreateRetVoid(); // Required at the end of the function 5711 for (auto &Inst : instructions(*F)) { 5712 EXPECT_FALSE(isa<CallInst>(Inst) && 5713 cast<CallInst>(&Inst)->getCalledFunction()->getName() == 5714 "__kmpc_barrier" && 5715 "call to function __kmpc_barrier found with nowait"); 5716 } 5717 } 5718 5719 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) { 5720 OpenMPIRBuilder OMPBuilder(*M); 5721 OMPBuilder.initialize(); 5722 5723 IRBuilder<> Builder(BB); 5724 5725 SmallVector<uint64_t> Mappings = {0, 1}; 5726 GlobalVariable *OffloadMaptypesGlobal = 5727 OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes"); 5728 EXPECT_FALSE(M->global_empty()); 5729 EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes"); 5730 EXPECT_TRUE(OffloadMaptypesGlobal->isConstant()); 5731 EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr()); 5732 EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage()); 5733 EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer()); 5734 Constant *Initializer = OffloadMaptypesGlobal->getInitializer(); 5735 EXPECT_TRUE(isa<ConstantDataArray>(Initializer)); 5736 ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer); 5737 EXPECT_EQ(MappingInit->getNumElements(), Mappings.size()); 5738 EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64)); 5739 Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings); 5740 EXPECT_EQ(MappingInit, CA); 5741 } 5742 5743 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) { 5744 OpenMPIRBuilder OMPBuilder(*M); 5745 OMPBuilder.initialize(); 5746 5747 IRBuilder<> Builder(BB); 5748 5749 uint32_t StrSize; 5750 Constant *Cst1 = 5751 OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); 5752 Constant *Cst2 = 5753 OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); 5754 SmallVector<llvm::Constant *> Names = {Cst1, Cst2}; 5755 5756 GlobalVariable *OffloadMaptypesGlobal = 5757 OMPBuilder.createOffloadMapnames(Names, "offload_mapnames"); 5758 EXPECT_FALSE(M->global_empty()); 5759 EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames"); 5760 EXPECT_TRUE(OffloadMaptypesGlobal->isConstant()); 5761 EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr()); 5762 EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage()); 5763 EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer()); 5764 Constant *Initializer = OffloadMaptypesGlobal->getInitializer(); 5765 EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts())); 5766 EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts())); 5767 5768 GlobalVariable *Name1Gbl = 5769 cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts()); 5770 EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer())); 5771 ConstantDataArray *Name1GblCA = 5772 dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer()); 5773 EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;"); 5774 5775 GlobalVariable *Name2Gbl = 5776 cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts()); 5777 EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer())); 5778 ConstantDataArray *Name2GblCA = 5779 dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer()); 5780 EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;"); 5781 5782 EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy()); 5783 EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size()); 5784 } 5785 5786 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) { 5787 OpenMPIRBuilder OMPBuilder(*M); 5788 OMPBuilder.initialize(); 5789 F->setName("func"); 5790 IRBuilder<> Builder(BB); 5791 5792 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5793 5794 unsigned TotalNbOperand = 2; 5795 5796 OpenMPIRBuilder::MapperAllocas MapperAllocas; 5797 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5798 F->getEntryBlock().getFirstInsertionPt()); 5799 OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas); 5800 EXPECT_NE(MapperAllocas.ArgsBase, nullptr); 5801 EXPECT_NE(MapperAllocas.Args, nullptr); 5802 EXPECT_NE(MapperAllocas.ArgSizes, nullptr); 5803 EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy()); 5804 ArrayType *ArrType = 5805 dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType()); 5806 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5807 EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType() 5808 ->getArrayElementType() 5809 ->isPointerTy()); 5810 5811 EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy()); 5812 ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType()); 5813 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5814 EXPECT_TRUE(MapperAllocas.Args->getAllocatedType() 5815 ->getArrayElementType() 5816 ->isPointerTy()); 5817 5818 EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy()); 5819 ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType()); 5820 EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand); 5821 EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType() 5822 ->getArrayElementType() 5823 ->isIntegerTy(64)); 5824 } 5825 5826 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { 5827 OpenMPIRBuilder OMPBuilder(*M); 5828 OMPBuilder.initialize(); 5829 F->setName("func"); 5830 IRBuilder<> Builder(BB); 5831 LLVMContext &Ctx = M->getContext(); 5832 5833 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5834 5835 unsigned TotalNbOperand = 2; 5836 5837 OpenMPIRBuilder::MapperAllocas MapperAllocas; 5838 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5839 F->getEntryBlock().getFirstInsertionPt()); 5840 OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas); 5841 5842 auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr( 5843 omp::OMPRTL___tgt_target_data_begin_mapper); 5844 5845 SmallVector<uint64_t> Flags = {0, 2}; 5846 5847 uint32_t StrSize; 5848 Constant *SrcLocCst = 5849 OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize); 5850 Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize); 5851 5852 Constant *Cst1 = 5853 OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize); 5854 Constant *Cst2 = 5855 OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize); 5856 SmallVector<llvm::Constant *> Names = {Cst1, Cst2}; 5857 5858 GlobalVariable *Maptypes = 5859 OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes"); 5860 Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32( 5861 ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes, 5862 /*Idx0=*/0, /*Idx1=*/0); 5863 5864 GlobalVariable *Mapnames = 5865 OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames"); 5866 Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32( 5867 ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames, 5868 /*Idx0=*/0, /*Idx1=*/0); 5869 5870 OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo, 5871 MaptypesArg, MapnamesArg, MapperAllocas, -1, 5872 TotalNbOperand); 5873 5874 CallInst *MapperCall = dyn_cast<CallInst>(&BB->back()); 5875 EXPECT_NE(MapperCall, nullptr); 5876 EXPECT_EQ(MapperCall->arg_size(), 9U); 5877 EXPECT_EQ(MapperCall->getCalledFunction()->getName(), 5878 "__tgt_target_data_begin_mapper"); 5879 EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo); 5880 EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64)); 5881 EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32)); 5882 5883 EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg); 5884 EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg); 5885 EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy()); 5886 } 5887 5888 TEST_F(OpenMPIRBuilderTest, TargetEnterData) { 5889 OpenMPIRBuilder OMPBuilder(*M); 5890 OMPBuilder.initialize(); 5891 F->setName("func"); 5892 IRBuilder<> Builder(BB); 5893 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5894 5895 int64_t DeviceID = 2; 5896 5897 AllocaInst *Val1 = 5898 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5899 ASSERT_NE(Val1, nullptr); 5900 5901 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5902 F->getEntryBlock().getFirstInsertionPt()); 5903 5904 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5905 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5906 auto GenMapInfoCB = 5907 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5908 // Get map clause information. 5909 Builder.restoreIP(codeGenIP); 5910 5911 CombinedInfo.BasePointers.emplace_back(Val1); 5912 CombinedInfo.Pointers.emplace_back(Val1); 5913 CombinedInfo.DevicePointers.emplace_back( 5914 llvm::OpenMPIRBuilder::DeviceInfoTy::None); 5915 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5916 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1)); 5917 uint32_t temp; 5918 CombinedInfo.Names.emplace_back( 5919 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5920 return CombinedInfo; 5921 }; 5922 5923 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5924 /*RequiresDevicePointerInfo=*/false, 5925 /*SeparateBeginEndCalls=*/true); 5926 5927 OMPBuilder.Config.setIsGPU(true); 5928 5929 llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper; 5930 ASSERT_EXPECTED_INIT( 5931 OpenMPIRBuilder::InsertPointTy, AfterIP, 5932 OMPBuilder.createTargetData( 5933 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5934 /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); 5935 Builder.restoreIP(AfterIP); 5936 5937 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 5938 EXPECT_NE(TargetDataCall, nullptr); 5939 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 5940 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 5941 "__tgt_target_data_begin_mapper"); 5942 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 5943 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 5944 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 5945 5946 Builder.CreateRetVoid(); 5947 EXPECT_FALSE(verifyModule(*M, &errs())); 5948 } 5949 5950 TEST_F(OpenMPIRBuilderTest, TargetExitData) { 5951 OpenMPIRBuilder OMPBuilder(*M); 5952 OMPBuilder.initialize(); 5953 F->setName("func"); 5954 IRBuilder<> Builder(BB); 5955 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 5956 5957 int64_t DeviceID = 2; 5958 5959 AllocaInst *Val1 = 5960 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 5961 ASSERT_NE(Val1, nullptr); 5962 5963 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 5964 F->getEntryBlock().getFirstInsertionPt()); 5965 5966 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 5967 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 5968 auto GenMapInfoCB = 5969 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 5970 // Get map clause information. 5971 Builder.restoreIP(codeGenIP); 5972 5973 CombinedInfo.BasePointers.emplace_back(Val1); 5974 CombinedInfo.Pointers.emplace_back(Val1); 5975 CombinedInfo.DevicePointers.emplace_back( 5976 llvm::OpenMPIRBuilder::DeviceInfoTy::None); 5977 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 5978 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2)); 5979 uint32_t temp; 5980 CombinedInfo.Names.emplace_back( 5981 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 5982 return CombinedInfo; 5983 }; 5984 5985 llvm::OpenMPIRBuilder::TargetDataInfo Info( 5986 /*RequiresDevicePointerInfo=*/false, 5987 /*SeparateBeginEndCalls=*/true); 5988 5989 OMPBuilder.Config.setIsGPU(true); 5990 5991 llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper; 5992 ASSERT_EXPECTED_INIT( 5993 OpenMPIRBuilder::InsertPointTy, AfterIP, 5994 OMPBuilder.createTargetData( 5995 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 5996 /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); 5997 Builder.restoreIP(AfterIP); 5998 5999 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 6000 EXPECT_NE(TargetDataCall, nullptr); 6001 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 6002 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 6003 "__tgt_target_data_end_mapper"); 6004 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 6005 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 6006 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 6007 6008 Builder.CreateRetVoid(); 6009 EXPECT_FALSE(verifyModule(*M, &errs())); 6010 } 6011 6012 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { 6013 OpenMPIRBuilder OMPBuilder(*M); 6014 OMPBuilder.initialize(); 6015 F->setName("func"); 6016 IRBuilder<> Builder(BB); 6017 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6018 6019 int64_t DeviceID = 2; 6020 6021 AllocaInst *Val1 = 6022 Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); 6023 ASSERT_NE(Val1, nullptr); 6024 6025 AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy()); 6026 ASSERT_NE(Val2, nullptr); 6027 6028 AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy()); 6029 ASSERT_NE(Val3, nullptr); 6030 6031 IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), 6032 F->getEntryBlock().getFirstInsertionPt()); 6033 6034 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 6035 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo; 6036 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6037 auto GenMapInfoCB = 6038 [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 6039 // Get map clause information. 6040 Builder.restoreIP(codeGenIP); 6041 uint32_t temp; 6042 6043 CombinedInfo.BasePointers.emplace_back(Val1); 6044 CombinedInfo.Pointers.emplace_back(Val1); 6045 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None); 6046 CombinedInfo.Sizes.emplace_back(Builder.getInt64(4)); 6047 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3)); 6048 CombinedInfo.Names.emplace_back( 6049 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 6050 6051 CombinedInfo.BasePointers.emplace_back(Val2); 6052 CombinedInfo.Pointers.emplace_back(Val2); 6053 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 6054 CombinedInfo.Sizes.emplace_back(Builder.getInt64(8)); 6055 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67)); 6056 CombinedInfo.Names.emplace_back( 6057 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 6058 6059 CombinedInfo.BasePointers.emplace_back(Val3); 6060 CombinedInfo.Pointers.emplace_back(Val3); 6061 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address); 6062 CombinedInfo.Sizes.emplace_back(Builder.getInt64(8)); 6063 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67)); 6064 CombinedInfo.Names.emplace_back( 6065 OMPBuilder.getOrCreateSrcLocStr("unknown", temp)); 6066 return CombinedInfo; 6067 }; 6068 6069 llvm::OpenMPIRBuilder::TargetDataInfo Info( 6070 /*RequiresDevicePointerInfo=*/true, 6071 /*SeparateBeginEndCalls=*/true); 6072 6073 OMPBuilder.Config.setIsGPU(true); 6074 6075 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 6076 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 6077 if (BodyGenType == BodyGenTy::Priv) { 6078 EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u); 6079 Builder.restoreIP(CodeGenIP); 6080 CallInst *TargetDataCall = 6081 dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode()); 6082 EXPECT_NE(TargetDataCall, nullptr); 6083 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 6084 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 6085 "__tgt_target_data_begin_mapper"); 6086 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 6087 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 6088 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 6089 6090 LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode()); 6091 EXPECT_NE(LI, nullptr); 6092 StoreInst *SI = dyn_cast<StoreInst>(&BB->back()); 6093 EXPECT_NE(SI, nullptr); 6094 EXPECT_EQ(SI->getValueOperand(), LI); 6095 EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second); 6096 EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second)); 6097 EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second)); 6098 Builder.CreateStore(Builder.getInt32(99), Val1); 6099 } 6100 return Builder.saveIP(); 6101 }; 6102 6103 ASSERT_EXPECTED_INIT( 6104 OpenMPIRBuilder::InsertPointTy, TargetDataIP1, 6105 OMPBuilder.createTargetData( 6106 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 6107 /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB)); 6108 Builder.restoreIP(TargetDataIP1); 6109 6110 CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back()); 6111 EXPECT_NE(TargetDataCall, nullptr); 6112 EXPECT_EQ(TargetDataCall->arg_size(), 9U); 6113 EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), 6114 "__tgt_target_data_end_mapper"); 6115 EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); 6116 EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); 6117 EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); 6118 6119 // Check that BodyGenCB is still made when IsTargetDevice is set to true. 6120 OMPBuilder.Config.setIsTargetDevice(true); 6121 bool CheckDevicePassBodyGen = false; 6122 auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 6123 CheckDevicePassBodyGen = true; 6124 Builder.restoreIP(CodeGenIP); 6125 CallInst *TargetDataCall = 6126 dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode()); 6127 // Make sure no begin_mapper call is present for device pass. 6128 EXPECT_EQ(TargetDataCall, nullptr); 6129 return Builder.saveIP(); 6130 }; 6131 ASSERT_EXPECTED_INIT( 6132 OpenMPIRBuilder::InsertPointTy, TargetDataIP2, 6133 OMPBuilder.createTargetData( 6134 Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), 6135 /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB)); 6136 Builder.restoreIP(TargetDataIP2); 6137 EXPECT_TRUE(CheckDevicePassBodyGen); 6138 6139 Builder.CreateRetVoid(); 6140 EXPECT_FALSE(verifyModule(*M, &errs())); 6141 } 6142 6143 namespace { 6144 // Some basic handling of argument mapping for the moment 6145 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder, 6146 llvm::SmallVectorImpl<llvm::Value *> &Args, 6147 llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) { 6148 for (auto Arg : Args) { 6149 CombinedInfo.BasePointers.emplace_back(Arg); 6150 CombinedInfo.Pointers.emplace_back(Arg); 6151 uint32_t SrcLocStrSize; 6152 CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr( 6153 "Unknown loc - stub implementation", SrcLocStrSize)); 6154 CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags( 6155 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | 6156 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM | 6157 llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM)); 6158 CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64( 6159 OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType()))); 6160 } 6161 } 6162 } // namespace 6163 6164 TEST_F(OpenMPIRBuilderTest, TargetRegion) { 6165 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6166 OpenMPIRBuilder OMPBuilder(*M); 6167 OMPBuilder.initialize(); 6168 OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); 6169 OMPBuilder.setConfig(Config); 6170 F->setName("func"); 6171 F->addFnAttr("target-cpu", "x86-64"); 6172 F->addFnAttr("target-features", "+mmx,+sse"); 6173 IRBuilder<> Builder(BB); 6174 auto *Int32Ty = Builder.getInt32Ty(); 6175 6176 AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr"); 6177 AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr"); 6178 AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr"); 6179 6180 Builder.CreateStore(Builder.getInt32(10), APtr); 6181 Builder.CreateStore(Builder.getInt32(20), BPtr); 6182 auto BodyGenCB = [&](InsertPointTy AllocaIP, 6183 InsertPointTy CodeGenIP) -> InsertPointTy { 6184 Builder.restoreIP(CodeGenIP); 6185 LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr); 6186 LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr); 6187 Value *Sum = Builder.CreateAdd(AVal, BVal); 6188 Builder.CreateStore(Sum, CPtr); 6189 return Builder.saveIP(); 6190 }; 6191 6192 llvm::SmallVector<llvm::Value *> Inputs; 6193 Inputs.push_back(APtr); 6194 Inputs.push_back(BPtr); 6195 Inputs.push_back(CPtr); 6196 6197 auto SimpleArgAccessorCB = 6198 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6199 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6200 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6201 if (!OMPBuilder.Config.isTargetDevice()) { 6202 RetVal = cast<llvm::Value>(&Arg); 6203 return CodeGenIP; 6204 } 6205 6206 Builder.restoreIP(AllocaIP); 6207 6208 llvm::Value *Addr = Builder.CreateAlloca( 6209 Arg.getType()->isPointerTy() 6210 ? Arg.getType() 6211 : Type::getInt64Ty(Builder.getContext()), 6212 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6213 llvm::Value *AddrAscast = 6214 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6215 Builder.CreateStore(&Arg, AddrAscast); 6216 6217 Builder.restoreIP(CodeGenIP); 6218 6219 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6220 6221 return Builder.saveIP(); 6222 }; 6223 6224 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6225 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6226 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6227 CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos); 6228 return CombinedInfos; 6229 }; 6230 6231 TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17); 6232 OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL}); 6233 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6234 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6235 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC, 6236 /*MaxTeams=*/{10}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6237 RuntimeAttrs.TargetThreadLimit[0] = Builder.getInt32(20); 6238 RuntimeAttrs.TeamsThreadLimit[0] = Builder.getInt32(30); 6239 RuntimeAttrs.MaxThreads = Builder.getInt32(40); 6240 6241 ASSERT_EXPECTED_INIT( 6242 OpenMPIRBuilder::InsertPointTy, AfterIP, 6243 OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), 6244 Builder.saveIP(), EntryInfo, DefaultAttrs, 6245 RuntimeAttrs, /*IfCond=*/nullptr, Inputs, 6246 GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); 6247 Builder.restoreIP(AfterIP); 6248 6249 OMPBuilder.finalize(); 6250 Builder.CreateRetVoid(); 6251 6252 // Check the kernel launch sequence 6253 auto Iter = F->getEntryBlock().rbegin(); 6254 EXPECT_TRUE(isa<BranchInst>(&*(Iter))); 6255 BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter)); 6256 EXPECT_TRUE(isa<CmpInst>(&*(++Iter))); 6257 EXPECT_TRUE(isa<CallInst>(&*(++Iter))); 6258 CallInst *Call = dyn_cast<CallInst>(&*(Iter)); 6259 6260 // Check that the kernel launch function is called 6261 Function *KernelLaunchFunc = Call->getCalledFunction(); 6262 EXPECT_NE(KernelLaunchFunc, nullptr); 6263 StringRef FunctionName = KernelLaunchFunc->getName(); 6264 EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel")); 6265 6266 // Check num_teams and num_threads in call arguments 6267 EXPECT_TRUE(Call->arg_size() >= 4); 6268 Value *NumTeamsArg = Call->getArgOperand(2); 6269 EXPECT_TRUE(isa<ConstantInt>(NumTeamsArg)); 6270 EXPECT_EQ(10U, cast<ConstantInt>(NumTeamsArg)->getZExtValue()); 6271 Value *NumThreadsArg = Call->getArgOperand(3); 6272 EXPECT_TRUE(isa<ConstantInt>(NumThreadsArg)); 6273 EXPECT_EQ(20U, cast<ConstantInt>(NumThreadsArg)->getZExtValue()); 6274 6275 // Check num_teams and num_threads kernel arguments (use number 5 starting 6276 // from the end and counting the call to __tgt_target_kernel as the first use) 6277 Value *KernelArgs = Call->getArgOperand(Call->arg_size() - 1); 6278 EXPECT_TRUE(KernelArgs->getNumUses() >= 4); 6279 Value *NumTeamsGetElemPtr = *std::next(KernelArgs->user_begin(), 3); 6280 EXPECT_TRUE(isa<GetElementPtrInst>(NumTeamsGetElemPtr)); 6281 Value *NumTeamsStore = NumTeamsGetElemPtr->getUniqueUndroppableUser(); 6282 EXPECT_TRUE(isa<StoreInst>(NumTeamsStore)); 6283 Value *NumTeamsStoreArg = cast<StoreInst>(NumTeamsStore)->getValueOperand(); 6284 EXPECT_TRUE(isa<ConstantDataSequential>(NumTeamsStoreArg)); 6285 auto *NumTeamsStoreValue = cast<ConstantDataSequential>(NumTeamsStoreArg); 6286 EXPECT_EQ(3U, NumTeamsStoreValue->getNumElements()); 6287 EXPECT_EQ(10U, NumTeamsStoreValue->getElementAsInteger(0)); 6288 EXPECT_EQ(0U, NumTeamsStoreValue->getElementAsInteger(1)); 6289 EXPECT_EQ(0U, NumTeamsStoreValue->getElementAsInteger(2)); 6290 Value *NumThreadsGetElemPtr = *std::next(KernelArgs->user_begin(), 2); 6291 EXPECT_TRUE(isa<GetElementPtrInst>(NumThreadsGetElemPtr)); 6292 Value *NumThreadsStore = NumThreadsGetElemPtr->getUniqueUndroppableUser(); 6293 EXPECT_TRUE(isa<StoreInst>(NumThreadsStore)); 6294 Value *NumThreadsStoreArg = 6295 cast<StoreInst>(NumThreadsStore)->getValueOperand(); 6296 EXPECT_TRUE(isa<ConstantDataSequential>(NumThreadsStoreArg)); 6297 auto *NumThreadsStoreValue = cast<ConstantDataSequential>(NumThreadsStoreArg); 6298 EXPECT_EQ(3U, NumThreadsStoreValue->getNumElements()); 6299 EXPECT_EQ(20U, NumThreadsStoreValue->getElementAsInteger(0)); 6300 EXPECT_EQ(0U, NumThreadsStoreValue->getElementAsInteger(1)); 6301 EXPECT_EQ(0U, NumThreadsStoreValue->getElementAsInteger(2)); 6302 6303 // Check the fallback call 6304 BasicBlock *FallbackBlock = Branch->getSuccessor(0); 6305 Iter = FallbackBlock->rbegin(); 6306 CallInst *FCall = dyn_cast<CallInst>(&*(++Iter)); 6307 // 'F' has a dummy DISubprogram which causes OutlinedFunc to also 6308 // have a DISubprogram. In this case, the call to OutlinedFunc needs 6309 // to have a debug loc, otherwise verifier will complain. 6310 FCall->setDebugLoc(DL); 6311 EXPECT_NE(FCall, nullptr); 6312 6313 // Check that the correct aguments are passed in 6314 for (auto ArgInput : zip(FCall->args(), Inputs)) { 6315 EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput)); 6316 } 6317 6318 // Check that the outlined function exists with the expected prefix 6319 Function *OutlinedFunc = FCall->getCalledFunction(); 6320 EXPECT_NE(OutlinedFunc, nullptr); 6321 StringRef FunctionName2 = OutlinedFunc->getName(); 6322 EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading")); 6323 6324 // Check that target-cpu and target-features were propagated to the outlined 6325 // function 6326 EXPECT_EQ(OutlinedFunc->getFnAttribute("target-cpu"), 6327 F->getFnAttribute("target-cpu")); 6328 EXPECT_EQ(OutlinedFunc->getFnAttribute("target-features"), 6329 F->getFnAttribute("target-features")); 6330 6331 EXPECT_FALSE(verifyModule(*M, &errs())); 6332 } 6333 6334 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { 6335 OpenMPIRBuilder OMPBuilder(*M); 6336 OMPBuilder.setConfig( 6337 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6338 OMPBuilder.initialize(); 6339 6340 F->setName("func"); 6341 F->addFnAttr("target-cpu", "gfx90a"); 6342 F->addFnAttr("target-features", "+gfx9-insts,+wavefrontsize64"); 6343 IRBuilder<> Builder(BB); 6344 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6345 6346 LoadInst *Value = nullptr; 6347 StoreInst *TargetStore = nullptr; 6348 llvm::SmallVector<llvm::Value *, 2> CapturedArgs = { 6349 Constant::getNullValue(PointerType::get(Ctx, 0)), 6350 Constant::getNullValue(PointerType::get(Ctx, 0))}; 6351 6352 auto SimpleArgAccessorCB = 6353 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6354 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6355 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6356 if (!OMPBuilder.Config.isTargetDevice()) { 6357 RetVal = cast<llvm::Value>(&Arg); 6358 return CodeGenIP; 6359 } 6360 6361 Builder.restoreIP(AllocaIP); 6362 6363 llvm::Value *Addr = Builder.CreateAlloca( 6364 Arg.getType()->isPointerTy() 6365 ? Arg.getType() 6366 : Type::getInt64Ty(Builder.getContext()), 6367 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6368 llvm::Value *AddrAscast = 6369 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6370 Builder.CreateStore(&Arg, AddrAscast); 6371 6372 Builder.restoreIP(CodeGenIP); 6373 6374 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6375 6376 return Builder.saveIP(); 6377 }; 6378 6379 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6380 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6381 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6382 CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos); 6383 return CombinedInfos; 6384 }; 6385 6386 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, 6387 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6388 -> OpenMPIRBuilder::InsertPointTy { 6389 Builder.restoreIP(CodeGenIP); 6390 Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]); 6391 TargetStore = Builder.CreateStore(Value, CapturedArgs[1]); 6392 return Builder.saveIP(); 6393 }; 6394 6395 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6396 F->getEntryBlock().getFirstInsertionPt()); 6397 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6398 /*Line=*/3, /*Count=*/0); 6399 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6400 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6401 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC, 6402 /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6403 6404 ASSERT_EXPECTED_INIT( 6405 OpenMPIRBuilder::InsertPointTy, AfterIP, 6406 OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, 6407 EntryInfo, DefaultAttrs, RuntimeAttrs, 6408 /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, 6409 BodyGenCB, SimpleArgAccessorCB)); 6410 Builder.restoreIP(AfterIP); 6411 6412 Builder.CreateRetVoid(); 6413 OMPBuilder.finalize(); 6414 6415 // Check outlined function 6416 EXPECT_FALSE(verifyModule(*M, &errs())); 6417 EXPECT_NE(TargetStore, nullptr); 6418 Function *OutlinedFn = TargetStore->getFunction(); 6419 EXPECT_NE(F, OutlinedFn); 6420 6421 // Check that target-cpu and target-features were propagated to the outlined 6422 // function 6423 EXPECT_EQ(OutlinedFn->getFnAttribute("target-cpu"), 6424 F->getFnAttribute("target-cpu")); 6425 EXPECT_EQ(OutlinedFn->getFnAttribute("target-features"), 6426 F->getFnAttribute("target-features")); 6427 6428 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6429 // Account for the "implicit" first argument. 6430 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6431 EXPECT_EQ(OutlinedFn->arg_size(), 3U); 6432 EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy()); 6433 EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy()); 6434 6435 // Check entry block 6436 auto &EntryBlock = OutlinedFn->getEntryBlock(); 6437 Instruction *Alloca1 = &*EntryBlock.getFirstNonPHIIt(); 6438 EXPECT_NE(Alloca1, nullptr); 6439 6440 EXPECT_TRUE(isa<AllocaInst>(Alloca1)); 6441 auto *Store1 = Alloca1->getNextNode(); 6442 EXPECT_TRUE(isa<StoreInst>(Store1)); 6443 auto *Alloca2 = Store1->getNextNode(); 6444 EXPECT_TRUE(isa<AllocaInst>(Alloca2)); 6445 auto *Store2 = Alloca2->getNextNode(); 6446 EXPECT_TRUE(isa<StoreInst>(Store2)); 6447 6448 auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode()); 6449 EXPECT_NE(InitCall, nullptr); 6450 EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init"); 6451 EXPECT_EQ(InitCall->arg_size(), 2U); 6452 EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0))); 6453 auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0)); 6454 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer())); 6455 auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer()); 6456 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U))); 6457 auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U)); 6458 EXPECT_EQ(ConfigC->getAggregateElement(0U), 6459 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6460 EXPECT_EQ(ConfigC->getAggregateElement(1U), 6461 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6462 EXPECT_EQ(ConfigC->getAggregateElement(2U), 6463 ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC)); 6464 6465 auto *EntryBlockBranch = EntryBlock.getTerminator(); 6466 EXPECT_NE(EntryBlockBranch, nullptr); 6467 EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U); 6468 6469 // Check user code block 6470 auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0); 6471 EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry"); 6472 Instruction *Load1 = &*UserCodeBlock->getFirstNonPHIIt(); 6473 EXPECT_TRUE(isa<LoadInst>(Load1)); 6474 auto *Load2 = Load1->getNextNode(); 6475 EXPECT_TRUE(isa<LoadInst>(Load2)); 6476 6477 auto *OutlinedBlockBr = Load2->getNextNode(); 6478 EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr)); 6479 6480 auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0); 6481 EXPECT_EQ(OutlinedBlock->getName(), "outlined.body"); 6482 6483 Instruction *Value1 = &*OutlinedBlock->getFirstNonPHIIt(); 6484 EXPECT_EQ(Value1, Value); 6485 EXPECT_EQ(Value1->getNextNode(), TargetStore); 6486 auto *Deinit = TargetStore->getNextNode(); 6487 EXPECT_NE(Deinit, nullptr); 6488 6489 auto *DeinitCall = dyn_cast<CallInst>(Deinit); 6490 EXPECT_NE(DeinitCall, nullptr); 6491 EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit"); 6492 EXPECT_EQ(DeinitCall->arg_size(), 0U); 6493 6494 EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode())); 6495 6496 // Check exit block 6497 auto *ExitBlock = EntryBlockBranch->getSuccessor(1); 6498 EXPECT_EQ(ExitBlock->getName(), "worker.exit"); 6499 EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHIIt())); 6500 6501 // Check global exec_mode. 6502 GlobalVariable *Used = M->getGlobalVariable("llvm.compiler.used"); 6503 EXPECT_NE(Used, nullptr); 6504 Constant *UsedInit = Used->getInitializer(); 6505 EXPECT_NE(UsedInit, nullptr); 6506 EXPECT_TRUE(isa<ConstantArray>(UsedInit)); 6507 auto *UsedInitData = cast<ConstantArray>(UsedInit); 6508 EXPECT_EQ(1U, UsedInitData->getNumOperands()); 6509 Constant *ExecMode = UsedInitData->getOperand(0); 6510 EXPECT_TRUE(isa<GlobalVariable>(ExecMode)); 6511 Constant *ExecModeValue = cast<GlobalVariable>(ExecMode)->getInitializer(); 6512 EXPECT_NE(ExecModeValue, nullptr); 6513 EXPECT_TRUE(isa<ConstantInt>(ExecModeValue)); 6514 EXPECT_EQ(OMP_TGT_EXEC_MODE_GENERIC, 6515 cast<ConstantInt>(ExecModeValue)->getZExtValue()); 6516 } 6517 6518 TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) { 6519 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6520 OpenMPIRBuilder OMPBuilder(*M); 6521 OMPBuilder.initialize(); 6522 OpenMPIRBuilderConfig Config(/*IsTargetDevice=*/false, /*IsGPU=*/false, 6523 /*OpenMPOffloadMandatory=*/false, 6524 /*HasRequiresReverseOffload=*/false, 6525 /*HasRequiresUnifiedAddress=*/false, 6526 /*HasRequiresUnifiedSharedMemory=*/false, 6527 /*HasRequiresDynamicAllocators=*/false); 6528 OMPBuilder.setConfig(Config); 6529 F->setName("func"); 6530 IRBuilder<> Builder(BB); 6531 6532 auto BodyGenCB = [&](InsertPointTy, 6533 InsertPointTy CodeGenIP) -> InsertPointTy { 6534 Builder.restoreIP(CodeGenIP); 6535 return Builder.saveIP(); 6536 }; 6537 6538 auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&, 6539 OpenMPIRBuilder::InsertPointTy, 6540 OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6541 Builder.restoreIP(CodeGenIP); 6542 return Builder.saveIP(); 6543 }; 6544 6545 SmallVector<Value *> Inputs; 6546 OpenMPIRBuilder::MapInfosTy CombinedInfos; 6547 auto GenMapInfoCB = 6548 [&](OpenMPIRBuilder::InsertPointTy) -> OpenMPIRBuilder::MapInfosTy & { 6549 return CombinedInfos; 6550 }; 6551 6552 TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17); 6553 OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL}); 6554 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6555 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6556 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD, 6557 /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6558 RuntimeAttrs.LoopTripCount = Builder.getInt64(1000); 6559 6560 ASSERT_EXPECTED_INIT( 6561 OpenMPIRBuilder::InsertPointTy, AfterIP, 6562 OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), 6563 Builder.saveIP(), EntryInfo, DefaultAttrs, 6564 RuntimeAttrs, /*IfCond=*/nullptr, Inputs, 6565 GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); 6566 Builder.restoreIP(AfterIP); 6567 6568 OMPBuilder.finalize(); 6569 Builder.CreateRetVoid(); 6570 6571 // Check the kernel launch sequence 6572 auto Iter = F->getEntryBlock().rbegin(); 6573 EXPECT_TRUE(isa<BranchInst>(&*(Iter))); 6574 BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter)); 6575 EXPECT_TRUE(isa<CmpInst>(&*(++Iter))); 6576 EXPECT_TRUE(isa<CallInst>(&*(++Iter))); 6577 CallInst *Call = dyn_cast<CallInst>(&*(Iter)); 6578 6579 // Check that the kernel launch function is called 6580 Function *KernelLaunchFunc = Call->getCalledFunction(); 6581 EXPECT_NE(KernelLaunchFunc, nullptr); 6582 StringRef FunctionName = KernelLaunchFunc->getName(); 6583 EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel")); 6584 6585 // Check the trip count kernel argument (use number 5 starting from the end 6586 // and counting the call to __tgt_target_kernel as the first use) 6587 Value *KernelArgs = Call->getArgOperand(Call->arg_size() - 1); 6588 EXPECT_TRUE(KernelArgs->getNumUses() >= 6); 6589 Value *TripCountGetElemPtr = *std::next(KernelArgs->user_begin(), 5); 6590 EXPECT_TRUE(isa<GetElementPtrInst>(TripCountGetElemPtr)); 6591 Value *TripCountStore = TripCountGetElemPtr->getUniqueUndroppableUser(); 6592 EXPECT_TRUE(isa<StoreInst>(TripCountStore)); 6593 Value *TripCountStoreArg = cast<StoreInst>(TripCountStore)->getValueOperand(); 6594 EXPECT_TRUE(isa<ConstantInt>(TripCountStoreArg)); 6595 EXPECT_EQ(1000U, cast<ConstantInt>(TripCountStoreArg)->getZExtValue()); 6596 6597 // Check the fallback call 6598 BasicBlock *FallbackBlock = Branch->getSuccessor(0); 6599 Iter = FallbackBlock->rbegin(); 6600 CallInst *FCall = dyn_cast<CallInst>(&*(++Iter)); 6601 // 'F' has a dummy DISubprogram which causes OutlinedFunc to also 6602 // have a DISubprogram. In this case, the call to OutlinedFunc needs 6603 // to have a debug loc, otherwise verifier will complain. 6604 FCall->setDebugLoc(DL); 6605 EXPECT_NE(FCall, nullptr); 6606 6607 // Check that the outlined function exists with the expected prefix 6608 Function *OutlinedFunc = FCall->getCalledFunction(); 6609 EXPECT_NE(OutlinedFunc, nullptr); 6610 StringRef FunctionName2 = OutlinedFunc->getName(); 6611 EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading")); 6612 6613 EXPECT_FALSE(verifyModule(*M, &errs())); 6614 } 6615 6616 TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) { 6617 OpenMPIRBuilder OMPBuilder(*M); 6618 OMPBuilder.setConfig( 6619 OpenMPIRBuilderConfig(/*IsTargetDevice=*/true, /*IsGPU=*/false, 6620 /*OpenMPOffloadMandatory=*/false, 6621 /*HasRequiresReverseOffload=*/false, 6622 /*HasRequiresUnifiedAddress=*/false, 6623 /*HasRequiresUnifiedSharedMemory=*/false, 6624 /*HasRequiresDynamicAllocators=*/false)); 6625 OMPBuilder.initialize(); 6626 F->setName("func"); 6627 IRBuilder<> Builder(BB); 6628 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6629 6630 Function *OutlinedFn = nullptr; 6631 SmallVector<Value *> CapturedArgs; 6632 6633 auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&, 6634 OpenMPIRBuilder::InsertPointTy, 6635 OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6636 Builder.restoreIP(CodeGenIP); 6637 return Builder.saveIP(); 6638 }; 6639 6640 OpenMPIRBuilder::MapInfosTy CombinedInfos; 6641 auto GenMapInfoCB = 6642 [&](OpenMPIRBuilder::InsertPointTy) -> OpenMPIRBuilder::MapInfosTy & { 6643 return CombinedInfos; 6644 }; 6645 6646 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy, 6647 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6648 -> OpenMPIRBuilder::InsertPointTy { 6649 Builder.restoreIP(CodeGenIP); 6650 OutlinedFn = CodeGenIP.getBlock()->getParent(); 6651 return Builder.saveIP(); 6652 }; 6653 6654 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6655 F->getEntryBlock().getFirstInsertionPt()); 6656 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6657 /*Line=*/3, /*Count=*/0); 6658 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6659 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6660 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD, 6661 /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6662 6663 ASSERT_EXPECTED_INIT( 6664 OpenMPIRBuilder::InsertPointTy, AfterIP, 6665 OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, 6666 EntryInfo, DefaultAttrs, RuntimeAttrs, 6667 /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, 6668 BodyGenCB, SimpleArgAccessorCB)); 6669 Builder.restoreIP(AfterIP); 6670 6671 Builder.CreateRetVoid(); 6672 OMPBuilder.finalize(); 6673 6674 // Check outlined function 6675 EXPECT_FALSE(verifyModule(*M, &errs())); 6676 EXPECT_NE(OutlinedFn, nullptr); 6677 EXPECT_NE(F, OutlinedFn); 6678 6679 // Check that target-cpu and target-features were propagated to the outlined 6680 // function 6681 EXPECT_EQ(OutlinedFn->getFnAttribute("target-cpu"), 6682 F->getFnAttribute("target-cpu")); 6683 EXPECT_EQ(OutlinedFn->getFnAttribute("target-features"), 6684 F->getFnAttribute("target-features")); 6685 6686 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6687 // Account for the "implicit" first argument. 6688 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6689 EXPECT_EQ(OutlinedFn->arg_size(), 1U); 6690 6691 // Check global exec_mode. 6692 GlobalVariable *Used = M->getGlobalVariable("llvm.compiler.used"); 6693 EXPECT_NE(Used, nullptr); 6694 Constant *UsedInit = Used->getInitializer(); 6695 EXPECT_NE(UsedInit, nullptr); 6696 EXPECT_TRUE(isa<ConstantArray>(UsedInit)); 6697 auto *UsedInitData = cast<ConstantArray>(UsedInit); 6698 EXPECT_EQ(1U, UsedInitData->getNumOperands()); 6699 Constant *ExecMode = UsedInitData->getOperand(0); 6700 EXPECT_TRUE(isa<GlobalVariable>(ExecMode)); 6701 Constant *ExecModeValue = cast<GlobalVariable>(ExecMode)->getInitializer(); 6702 EXPECT_NE(ExecModeValue, nullptr); 6703 EXPECT_TRUE(isa<ConstantInt>(ExecModeValue)); 6704 EXPECT_EQ(OMP_TGT_EXEC_MODE_SPMD, 6705 cast<ConstantInt>(ExecModeValue)->getZExtValue()); 6706 } 6707 6708 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { 6709 OpenMPIRBuilder OMPBuilder(*M); 6710 OMPBuilder.setConfig( 6711 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 6712 OMPBuilder.initialize(); 6713 6714 F->setName("func"); 6715 IRBuilder<> Builder(BB); 6716 OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); 6717 6718 LoadInst *Value = nullptr; 6719 StoreInst *TargetStore = nullptr; 6720 llvm::SmallVector<llvm::Value *, 1> CapturedArgs = { 6721 Constant::getNullValue(PointerType::get(Ctx, 0))}; 6722 6723 auto SimpleArgAccessorCB = 6724 [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal, 6725 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, 6726 llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) { 6727 if (!OMPBuilder.Config.isTargetDevice()) { 6728 RetVal = cast<llvm::Value>(&Arg); 6729 return CodeGenIP; 6730 } 6731 6732 Builder.restoreIP(AllocaIP); 6733 6734 llvm::Value *Addr = Builder.CreateAlloca( 6735 Arg.getType()->isPointerTy() 6736 ? Arg.getType() 6737 : Type::getInt64Ty(Builder.getContext()), 6738 OMPBuilder.M.getDataLayout().getAllocaAddrSpace()); 6739 llvm::Value *AddrAscast = 6740 Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType()); 6741 Builder.CreateStore(&Arg, AddrAscast); 6742 6743 Builder.restoreIP(CodeGenIP); 6744 6745 RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast); 6746 6747 return Builder.saveIP(); 6748 }; 6749 6750 llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos; 6751 auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP) 6752 -> llvm::OpenMPIRBuilder::MapInfosTy & { 6753 CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos); 6754 return CombinedInfos; 6755 }; 6756 6757 llvm::Value *RaiseAlloca = nullptr; 6758 6759 auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, 6760 OpenMPIRBuilder::InsertPointTy CodeGenIP) 6761 -> OpenMPIRBuilder::InsertPointTy { 6762 Builder.restoreIP(CodeGenIP); 6763 RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty()); 6764 Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]); 6765 TargetStore = Builder.CreateStore(Value, RaiseAlloca); 6766 return Builder.saveIP(); 6767 }; 6768 6769 IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(), 6770 F->getEntryBlock().getFirstInsertionPt()); 6771 TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, 6772 /*Line=*/3, /*Count=*/0); 6773 OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs; 6774 OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = { 6775 /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC, 6776 /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0}; 6777 6778 ASSERT_EXPECTED_INIT( 6779 OpenMPIRBuilder::InsertPointTy, AfterIP, 6780 OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, 6781 EntryInfo, DefaultAttrs, RuntimeAttrs, 6782 /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, 6783 BodyGenCB, SimpleArgAccessorCB)); 6784 Builder.restoreIP(AfterIP); 6785 6786 Builder.CreateRetVoid(); 6787 OMPBuilder.finalize(); 6788 6789 // Check outlined function 6790 EXPECT_FALSE(verifyModule(*M, &errs())); 6791 EXPECT_NE(TargetStore, nullptr); 6792 Function *OutlinedFn = TargetStore->getFunction(); 6793 EXPECT_NE(F, OutlinedFn); 6794 6795 EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage()); 6796 // Account for the "implicit" first argument. 6797 EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3"); 6798 EXPECT_EQ(OutlinedFn->arg_size(), 2U); 6799 EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy()); 6800 6801 // Check entry block, to see if we have raised our alloca 6802 // from the body to the entry block. 6803 auto &EntryBlock = OutlinedFn->getEntryBlock(); 6804 6805 // Check that we have moved our alloca created in the 6806 // BodyGenCB function, to the top of the function. 6807 Instruction *Alloca1 = &*EntryBlock.getFirstNonPHIIt(); 6808 EXPECT_NE(Alloca1, nullptr); 6809 EXPECT_TRUE(isa<AllocaInst>(Alloca1)); 6810 EXPECT_EQ(Alloca1, RaiseAlloca); 6811 6812 // Verify we have not altered the rest of the function 6813 // inappropriately with our alloca movement. 6814 auto *Alloca2 = Alloca1->getNextNode(); 6815 EXPECT_TRUE(isa<AllocaInst>(Alloca2)); 6816 auto *Store2 = Alloca2->getNextNode(); 6817 EXPECT_TRUE(isa<StoreInst>(Store2)); 6818 6819 auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode()); 6820 EXPECT_NE(InitCall, nullptr); 6821 EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init"); 6822 EXPECT_EQ(InitCall->arg_size(), 2U); 6823 EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0))); 6824 auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0)); 6825 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer())); 6826 auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer()); 6827 EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U))); 6828 auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U)); 6829 EXPECT_EQ(ConfigC->getAggregateElement(0U), 6830 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6831 EXPECT_EQ(ConfigC->getAggregateElement(1U), 6832 ConstantInt::get(Type::getInt8Ty(Ctx), true)); 6833 EXPECT_EQ(ConfigC->getAggregateElement(2U), 6834 ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC)); 6835 6836 auto *EntryBlockBranch = EntryBlock.getTerminator(); 6837 EXPECT_NE(EntryBlockBranch, nullptr); 6838 EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U); 6839 6840 // Check user code block 6841 auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0); 6842 EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry"); 6843 BasicBlock::iterator Load1 = UserCodeBlock->getFirstNonPHIIt(); 6844 EXPECT_TRUE(isa<LoadInst>(Load1)); 6845 6846 auto *OutlinedBlockBr = Load1->getNextNode(); 6847 EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr)); 6848 6849 auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0); 6850 EXPECT_EQ(OutlinedBlock->getName(), "outlined.body"); 6851 6852 Instruction *Load2 = &*OutlinedBlock->getFirstNonPHIIt(); 6853 EXPECT_TRUE(isa<LoadInst>(Load2)); 6854 EXPECT_EQ(Load2, Value); 6855 EXPECT_EQ(Load2->getNextNode(), TargetStore); 6856 auto *Deinit = TargetStore->getNextNode(); 6857 EXPECT_NE(Deinit, nullptr); 6858 6859 auto *DeinitCall = dyn_cast<CallInst>(Deinit); 6860 EXPECT_NE(DeinitCall, nullptr); 6861 EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit"); 6862 EXPECT_EQ(DeinitCall->arg_size(), 0U); 6863 6864 EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode())); 6865 6866 // Check exit block 6867 auto *ExitBlock = EntryBlockBranch->getSuccessor(1); 6868 EXPECT_EQ(ExitBlock->getName(), "worker.exit"); 6869 EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHIIt())); 6870 } 6871 6872 TEST_F(OpenMPIRBuilderTest, CreateTask) { 6873 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 6874 OpenMPIRBuilder OMPBuilder(*M); 6875 OMPBuilder.Config.IsTargetDevice = false; 6876 OMPBuilder.initialize(); 6877 F->setName("func"); 6878 IRBuilder<> Builder(BB); 6879 6880 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 6881 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 6882 Value *Val128 = 6883 Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); 6884 6885 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 6886 Builder.restoreIP(AllocaIP); 6887 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 6888 "bodygen.alloca128"); 6889 6890 Builder.restoreIP(CodeGenIP); 6891 // Loading and storing captured pointer and values 6892 Builder.CreateStore(Val128, Local128); 6893 Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 6894 "bodygen.load32"); 6895 6896 LoadInst *PrivLoad128 = Builder.CreateLoad( 6897 Local128->getAllocatedType(), Local128, "bodygen.local.load128"); 6898 Value *Cmp = Builder.CreateICmpNE( 6899 Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType())); 6900 Instruction *ThenTerm, *ElseTerm; 6901 SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), 6902 &ThenTerm, &ElseTerm); 6903 return Error::success(); 6904 }; 6905 6906 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 6907 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 6908 OpenMPIRBuilder::LocationDescription Loc( 6909 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 6910 ASSERT_EXPECTED_INIT( 6911 OpenMPIRBuilder::InsertPointTy, AfterIP, 6912 OMPBuilder.createTask( 6913 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 6914 BodyGenCB)); 6915 Builder.restoreIP(AfterIP); 6916 OMPBuilder.finalize(); 6917 Builder.CreateRetVoid(); 6918 6919 EXPECT_FALSE(verifyModule(*M, &errs())); 6920 6921 CallInst *TaskAllocCall = dyn_cast<CallInst>( 6922 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 6923 ->user_back()); 6924 6925 // Verify the Ident argument 6926 GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0)); 6927 ASSERT_NE(Ident, nullptr); 6928 EXPECT_TRUE(Ident->hasInitializer()); 6929 Constant *Initializer = Ident->getInitializer(); 6930 GlobalVariable *SrcStrGlob = 6931 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 6932 ASSERT_NE(SrcStrGlob, nullptr); 6933 ConstantDataArray *SrcSrc = 6934 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 6935 ASSERT_NE(SrcSrc, nullptr); 6936 6937 // Verify the num_threads argument. 6938 CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1)); 6939 ASSERT_NE(GTID, nullptr); 6940 EXPECT_EQ(GTID->arg_size(), 1U); 6941 EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num"); 6942 6943 // Verify the flags 6944 // TODO: Check for others flags. Currently testing only for tiedness. 6945 ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2)); 6946 ASSERT_NE(Flags, nullptr); 6947 EXPECT_EQ(Flags->getSExtValue(), 1); 6948 6949 // Verify the data size 6950 ConstantInt *DataSize = 6951 dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3)); 6952 ASSERT_NE(DataSize, nullptr); 6953 EXPECT_EQ(DataSize->getSExtValue(), 40); 6954 6955 ConstantInt *SharedsSize = 6956 dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4)); 6957 EXPECT_EQ(SharedsSize->getSExtValue(), 6958 24); // 64-bit pointer + 128-bit integer 6959 6960 // Verify Wrapper function 6961 Function *OutlinedFn = 6962 dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); 6963 ASSERT_NE(OutlinedFn, nullptr); 6964 6965 LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin()); 6966 ASSERT_NE(SharedsLoad, nullptr); 6967 EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1)); 6968 6969 EXPECT_FALSE(OutlinedFn->isDeclaration()); 6970 EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty()); 6971 6972 // Verify that the data argument is used only once, and that too in the load 6973 // instruction that is then used for accessing shared data. 6974 Value *DataPtr = OutlinedFn->getArg(1); 6975 EXPECT_EQ(DataPtr->getNumUses(), 1U); 6976 EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser())); 6977 Value *Data = DataPtr->uses().begin()->getUser(); 6978 EXPECT_TRUE(all_of(Data->uses(), [](Use &U) { 6979 return isa<GetElementPtrInst>(U.getUser()); 6980 })); 6981 6982 // Verify the presence of `trunc` and `icmp` instructions in Outlined function 6983 EXPECT_TRUE(any_of(instructions(OutlinedFn), 6984 [](Instruction &inst) { return isa<TruncInst>(&inst); })); 6985 EXPECT_TRUE(any_of(instructions(OutlinedFn), 6986 [](Instruction &inst) { return isa<ICmpInst>(&inst); })); 6987 6988 // Verify the execution of the task 6989 CallInst *TaskCall = dyn_cast<CallInst>( 6990 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) 6991 ->user_back()); 6992 ASSERT_NE(TaskCall, nullptr); 6993 EXPECT_EQ(TaskCall->getArgOperand(0), Ident); 6994 EXPECT_EQ(TaskCall->getArgOperand(1), GTID); 6995 EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall); 6996 6997 // Verify that the argument data has been copied 6998 for (User *in : TaskAllocCall->users()) { 6999 if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) { 7000 EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall); 7001 } 7002 } 7003 } 7004 7005 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { 7006 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7007 OpenMPIRBuilder OMPBuilder(*M); 7008 OMPBuilder.Config.IsTargetDevice = false; 7009 OMPBuilder.initialize(); 7010 F->setName("func"); 7011 IRBuilder<> Builder(BB); 7012 7013 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7014 return Error::success(); 7015 }; 7016 7017 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7018 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7019 OpenMPIRBuilder::LocationDescription Loc( 7020 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7021 ASSERT_EXPECTED_INIT( 7022 OpenMPIRBuilder::InsertPointTy, AfterIP, 7023 OMPBuilder.createTask( 7024 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7025 BodyGenCB)); 7026 Builder.restoreIP(AfterIP); 7027 OMPBuilder.finalize(); 7028 Builder.CreateRetVoid(); 7029 7030 EXPECT_FALSE(verifyModule(*M, &errs())); 7031 7032 // Check that the outlined function has only one argument. 7033 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7034 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 7035 ->user_back()); 7036 Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5)); 7037 ASSERT_NE(OutlinedFn, nullptr); 7038 ASSERT_EQ(OutlinedFn->arg_size(), 1U); 7039 } 7040 7041 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { 7042 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7043 OpenMPIRBuilder OMPBuilder(*M); 7044 OMPBuilder.Config.IsTargetDevice = false; 7045 OMPBuilder.initialize(); 7046 F->setName("func"); 7047 IRBuilder<> Builder(BB); 7048 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7049 return Error::success(); 7050 }; 7051 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7052 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7053 OpenMPIRBuilder::LocationDescription Loc( 7054 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7055 ASSERT_EXPECTED_INIT( 7056 OpenMPIRBuilder::InsertPointTy, AfterIP, 7057 OMPBuilder.createTask( 7058 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7059 BodyGenCB, 7060 /*Tied=*/false)); 7061 Builder.restoreIP(AfterIP); 7062 OMPBuilder.finalize(); 7063 Builder.CreateRetVoid(); 7064 7065 // Check for the `Tied` argument 7066 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7067 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 7068 ->user_back()); 7069 ASSERT_NE(TaskAllocCall, nullptr); 7070 ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2)); 7071 ASSERT_NE(Flags, nullptr); 7072 EXPECT_EQ(Flags->getZExtValue() & 1U, 0U); 7073 7074 EXPECT_FALSE(verifyModule(*M, &errs())); 7075 } 7076 7077 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { 7078 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7079 OpenMPIRBuilder OMPBuilder(*M); 7080 OMPBuilder.Config.IsTargetDevice = false; 7081 OMPBuilder.initialize(); 7082 F->setName("func"); 7083 IRBuilder<> Builder(BB); 7084 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7085 return Error::success(); 7086 }; 7087 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7088 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7089 OpenMPIRBuilder::LocationDescription Loc( 7090 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7091 AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext())); 7092 SmallVector<OpenMPIRBuilder::DependData> DDS; 7093 { 7094 OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn, 7095 Type::getInt32Ty(M->getContext()), InDep); 7096 DDS.push_back(DDIn); 7097 } 7098 ASSERT_EXPECTED_INIT( 7099 OpenMPIRBuilder::InsertPointTy, AfterIP, 7100 OMPBuilder.createTask( 7101 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7102 BodyGenCB, 7103 /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS)); 7104 Builder.restoreIP(AfterIP); 7105 OMPBuilder.finalize(); 7106 Builder.CreateRetVoid(); 7107 7108 // Check for the `NumDeps` argument 7109 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7110 OMPBuilder 7111 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps) 7112 ->user_back()); 7113 ASSERT_NE(TaskAllocCall, nullptr); 7114 ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3)); 7115 ASSERT_NE(NumDeps, nullptr); 7116 EXPECT_EQ(NumDeps->getZExtValue(), 1U); 7117 7118 // Check for the `DepInfo` array argument 7119 AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4)); 7120 ASSERT_NE(DepArray, nullptr); 7121 Value::user_iterator DepArrayI = DepArray->user_begin(); 7122 ++DepArrayI; 7123 Value::user_iterator DepInfoI = DepArrayI->user_begin(); 7124 // Check for the `DependKind` flag in the `DepInfo` array 7125 Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI); 7126 ASSERT_NE(Flag, nullptr); 7127 ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag); 7128 ASSERT_NE(FlagInt, nullptr); 7129 EXPECT_EQ(FlagInt->getZExtValue(), 7130 static_cast<unsigned int>(RTLDependenceKindTy::DepIn)); 7131 ++DepInfoI; 7132 // Check for the size in the `DepInfo` array 7133 Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI); 7134 ASSERT_NE(Size, nullptr); 7135 ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size); 7136 ASSERT_NE(SizeInt, nullptr); 7137 EXPECT_EQ(SizeInt->getZExtValue(), 4U); 7138 ++DepInfoI; 7139 // Check for the variable address in the `DepInfo` array 7140 Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI); 7141 ASSERT_NE(AddrStored, nullptr); 7142 PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored); 7143 ASSERT_NE(AddrInt, nullptr); 7144 Value *Addr = AddrInt->getPointerOperand(); 7145 EXPECT_EQ(Addr, InDep); 7146 7147 ConstantInt *NumDepsNoAlias = 7148 dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5)); 7149 ASSERT_NE(NumDepsNoAlias, nullptr); 7150 EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U); 7151 EXPECT_EQ(TaskAllocCall->getOperand(6), 7152 ConstantPointerNull::get(PointerType::getUnqual(M->getContext()))); 7153 7154 EXPECT_FALSE(verifyModule(*M, &errs())); 7155 } 7156 7157 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { 7158 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7159 OpenMPIRBuilder OMPBuilder(*M); 7160 OMPBuilder.Config.IsTargetDevice = false; 7161 OMPBuilder.initialize(); 7162 F->setName("func"); 7163 IRBuilder<> Builder(BB); 7164 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7165 return Error::success(); 7166 }; 7167 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7168 IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); 7169 Builder.SetInsertPoint(BodyBB); 7170 Value *Final = Builder.CreateICmp( 7171 CmpInst::Predicate::ICMP_EQ, F->getArg(0), 7172 ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); 7173 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 7174 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 7175 OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, 7176 /*Tied=*/false, Final)); 7177 Builder.restoreIP(AfterIP); 7178 OMPBuilder.finalize(); 7179 Builder.CreateRetVoid(); 7180 7181 // Check for the `Tied` argument 7182 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7183 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 7184 ->user_back()); 7185 ASSERT_NE(TaskAllocCall, nullptr); 7186 BinaryOperator *OrInst = 7187 dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2)); 7188 ASSERT_NE(OrInst, nullptr); 7189 EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or); 7190 7191 // One of the arguments to `or` instruction is the tied flag, which is equal 7192 // to zero. 7193 EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) { 7194 if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op)) 7195 return TiedValue->getSExtValue() == 0; 7196 return false; 7197 })); 7198 7199 // One of the arguments to `or` instruction is the final condition. 7200 EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) { 7201 if (SelectInst *Select = dyn_cast<SelectInst>(op)) { 7202 ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue()); 7203 ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue()); 7204 if (!TrueValue || !FalseValue) 7205 return false; 7206 return Select->getCondition() == Final && 7207 TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0; 7208 } 7209 return false; 7210 })); 7211 7212 EXPECT_FALSE(verifyModule(*M, &errs())); 7213 } 7214 7215 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { 7216 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7217 OpenMPIRBuilder OMPBuilder(*M); 7218 OMPBuilder.Config.IsTargetDevice = false; 7219 OMPBuilder.initialize(); 7220 F->setName("func"); 7221 IRBuilder<> Builder(BB); 7222 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7223 return Error::success(); 7224 }; 7225 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7226 IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); 7227 Builder.SetInsertPoint(BodyBB); 7228 Value *IfCondition = Builder.CreateICmp( 7229 CmpInst::Predicate::ICMP_EQ, F->getArg(0), 7230 ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); 7231 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 7232 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, 7233 OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, 7234 /*Tied=*/false, /*Final=*/nullptr, 7235 IfCondition)); 7236 Builder.restoreIP(AfterIP); 7237 OMPBuilder.finalize(); 7238 Builder.CreateRetVoid(); 7239 7240 EXPECT_FALSE(verifyModule(*M, &errs())); 7241 7242 CallInst *TaskAllocCall = dyn_cast<CallInst>( 7243 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc) 7244 ->user_back()); 7245 ASSERT_NE(TaskAllocCall, nullptr); 7246 7247 // Check the branching is based on the if condition argument. 7248 BranchInst *IfConditionBranchInst = 7249 dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator()); 7250 ASSERT_NE(IfConditionBranchInst, nullptr); 7251 ASSERT_TRUE(IfConditionBranchInst->isConditional()); 7252 EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition); 7253 7254 // Check that the `__kmpc_omp_task` executes only in the then branch. 7255 CallInst *TaskCall = dyn_cast<CallInst>( 7256 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task) 7257 ->user_back()); 7258 ASSERT_NE(TaskCall, nullptr); 7259 EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0)); 7260 7261 // Check that the OpenMP Runtime Functions specific to `if` clause execute 7262 // only in the else branch. Also check that the function call is between the 7263 // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls. 7264 CallInst *TaskBeginIfCall = dyn_cast<CallInst>( 7265 OMPBuilder 7266 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0) 7267 ->user_back()); 7268 CallInst *TaskCompleteCall = dyn_cast<CallInst>( 7269 OMPBuilder 7270 .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0) 7271 ->user_back()); 7272 ASSERT_NE(TaskBeginIfCall, nullptr); 7273 ASSERT_NE(TaskCompleteCall, nullptr); 7274 Function *OulinedFn = 7275 dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts()); 7276 ASSERT_NE(OulinedFn, nullptr); 7277 CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back()); 7278 ASSERT_NE(OulinedFnCall, nullptr); 7279 EXPECT_EQ(TaskBeginIfCall->getParent(), 7280 IfConditionBranchInst->getSuccessor(1)); 7281 7282 EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall); 7283 EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall); 7284 } 7285 7286 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { 7287 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7288 OpenMPIRBuilder OMPBuilder(*M); 7289 OMPBuilder.initialize(); 7290 F->setName("func"); 7291 IRBuilder<> Builder(BB); 7292 7293 AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty()); 7294 AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); 7295 Value *Val128 = 7296 Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); 7297 Instruction *ThenTerm, *ElseTerm; 7298 7299 Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp; 7300 7301 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7302 Builder.restoreIP(AllocaIP); 7303 AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, 7304 "bodygen.alloca128"); 7305 7306 Builder.restoreIP(CodeGenIP); 7307 // Loading and storing captured pointer and values 7308 InternalStoreInst = Builder.CreateStore(Val128, Local128); 7309 InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32, 7310 "bodygen.load32"); 7311 7312 InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128, 7313 "bodygen.local.load128"); 7314 InternalIfCmp = Builder.CreateICmpNE( 7315 InternalLoad32, 7316 Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType())); 7317 SplitBlockAndInsertIfThenElse(InternalIfCmp, 7318 CodeGenIP.getBlock()->getTerminator(), 7319 &ThenTerm, &ElseTerm); 7320 return Error::success(); 7321 }; 7322 7323 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7324 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7325 OpenMPIRBuilder::LocationDescription Loc( 7326 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7327 ASSERT_EXPECTED_INIT( 7328 OpenMPIRBuilder::InsertPointTy, AfterIP, 7329 OMPBuilder.createTaskgroup( 7330 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7331 BodyGenCB)); 7332 Builder.restoreIP(AfterIP); 7333 OMPBuilder.finalize(); 7334 Builder.CreateRetVoid(); 7335 7336 EXPECT_FALSE(verifyModule(*M, &errs())); 7337 7338 CallInst *TaskgroupCall = dyn_cast<CallInst>( 7339 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup) 7340 ->user_back()); 7341 ASSERT_NE(TaskgroupCall, nullptr); 7342 CallInst *EndTaskgroupCall = dyn_cast<CallInst>( 7343 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup) 7344 ->user_back()); 7345 ASSERT_NE(EndTaskgroupCall, nullptr); 7346 7347 // Verify the Ident argument 7348 GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0)); 7349 ASSERT_NE(Ident, nullptr); 7350 EXPECT_TRUE(Ident->hasInitializer()); 7351 Constant *Initializer = Ident->getInitializer(); 7352 GlobalVariable *SrcStrGlob = 7353 cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()); 7354 ASSERT_NE(SrcStrGlob, nullptr); 7355 ConstantDataArray *SrcSrc = 7356 dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer()); 7357 ASSERT_NE(SrcSrc, nullptr); 7358 7359 // Verify the num_threads argument. 7360 CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1)); 7361 ASSERT_NE(GTID, nullptr); 7362 EXPECT_EQ(GTID->arg_size(), 1U); 7363 EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr( 7364 OMPRTL___kmpc_global_thread_num)); 7365 7366 // Checking the general structure of the IR generated is same as expected. 7367 Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction(); 7368 EXPECT_EQ(GeneratedStoreInst, InternalStoreInst); 7369 Instruction *GeneratedLoad32 = 7370 GeneratedStoreInst->getNextNonDebugInstruction(); 7371 EXPECT_EQ(GeneratedLoad32, InternalLoad32); 7372 Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction(); 7373 EXPECT_EQ(GeneratedLoad128, InternalLoad128); 7374 7375 // Checking the ordering because of the if statements and that 7376 // `__kmp_end_taskgroup` call is after the if branching. 7377 BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(), 7378 ThenTerm->getSuccessor(0), 7379 EndTaskgroupCall->getParent(), 7380 ElseTerm->getParent()}; 7381 verifyDFSOrder(F, RefOrder); 7382 } 7383 7384 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { 7385 using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 7386 OpenMPIRBuilder OMPBuilder(*M); 7387 OMPBuilder.Config.IsTargetDevice = false; 7388 OMPBuilder.initialize(); 7389 F->setName("func"); 7390 IRBuilder<> Builder(BB); 7391 7392 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7393 Builder.restoreIP(AllocaIP); 7394 AllocaInst *Alloca32 = 7395 Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32"); 7396 AllocaInst *Alloca64 = 7397 Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64"); 7398 Builder.restoreIP(CodeGenIP); 7399 auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7400 Builder.restoreIP(CodeGenIP); 7401 LoadInst *LoadValue = 7402 Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64); 7403 Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64)); 7404 Builder.CreateStore(AddInst, Alloca64); 7405 return Error::success(); 7406 }; 7407 OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); 7408 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1, 7409 OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1)); 7410 Builder.restoreIP(TaskIP1); 7411 7412 auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 7413 Builder.restoreIP(CodeGenIP); 7414 LoadInst *LoadValue = 7415 Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32); 7416 Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32)); 7417 Builder.CreateStore(AddInst, Alloca32); 7418 return Error::success(); 7419 }; 7420 OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL); 7421 ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2, 7422 OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2)); 7423 Builder.restoreIP(TaskIP2); 7424 }; 7425 7426 BasicBlock *AllocaBB = Builder.GetInsertBlock(); 7427 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); 7428 OpenMPIRBuilder::LocationDescription Loc( 7429 InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); 7430 ASSERT_EXPECTED_INIT( 7431 OpenMPIRBuilder::InsertPointTy, AfterIP, 7432 OMPBuilder.createTaskgroup( 7433 Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), 7434 BODYGENCB_WRAPPER(BodyGenCB))); 7435 Builder.restoreIP(AfterIP); 7436 OMPBuilder.finalize(); 7437 Builder.CreateRetVoid(); 7438 7439 EXPECT_FALSE(verifyModule(*M, &errs())); 7440 7441 CallInst *TaskgroupCall = dyn_cast<CallInst>( 7442 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup) 7443 ->user_back()); 7444 ASSERT_NE(TaskgroupCall, nullptr); 7445 CallInst *EndTaskgroupCall = dyn_cast<CallInst>( 7446 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup) 7447 ->user_back()); 7448 ASSERT_NE(EndTaskgroupCall, nullptr); 7449 7450 Function *TaskAllocFn = 7451 OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc); 7452 ASSERT_EQ(TaskAllocFn->getNumUses(), 2u); 7453 7454 CallInst *FirstTaskAllocCall = 7455 dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()); 7456 CallInst *SecondTaskAllocCall = 7457 dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++); 7458 ASSERT_NE(FirstTaskAllocCall, nullptr); 7459 ASSERT_NE(SecondTaskAllocCall, nullptr); 7460 7461 // Verify that the tasks have been generated in order and inside taskgroup 7462 // construct. 7463 BasicBlock *RefOrder[] = { 7464 TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(), 7465 SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()}; 7466 verifyDFSOrder(F, RefOrder); 7467 } 7468 7469 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { 7470 OpenMPIRBuilder OMPBuilder(*M); 7471 OMPBuilder.initialize(); 7472 7473 IRBuilder<> Builder(BB); 7474 7475 OpenMPIRBuilder::TargetDataRTArgs RTArgs; 7476 OpenMPIRBuilder::TargetDataInfo Info(true, false); 7477 7478 auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext()); 7479 auto Int64PtrTy = PointerType::getUnqual(Builder.getContext()); 7480 7481 Info.RTArgs.BasePointersArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7482 Info.RTArgs.PointersArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7483 Info.RTArgs.SizesArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7484 Info.RTArgs.MapTypesArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7485 Info.RTArgs.MapNamesArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7486 Info.RTArgs.MappersArray = ConstantPointerNull::get(Builder.getPtrTy(0)); 7487 Info.NumberOfPtrs = 4; 7488 Info.EmitDebug = false; 7489 OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false); 7490 7491 EXPECT_NE(RTArgs.BasePointersArray, nullptr); 7492 EXPECT_NE(RTArgs.PointersArray, nullptr); 7493 EXPECT_NE(RTArgs.SizesArray, nullptr); 7494 EXPECT_NE(RTArgs.MapTypesArray, nullptr); 7495 EXPECT_NE(RTArgs.MappersArray, nullptr); 7496 EXPECT_NE(RTArgs.MapNamesArray, nullptr); 7497 EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr); 7498 7499 EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy); 7500 EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy); 7501 EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy); 7502 EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy); 7503 EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy); 7504 EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy); 7505 } 7506 7507 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { 7508 OpenMPIRBuilder OMPBuilder(*M); 7509 OMPBuilder.setConfig( 7510 OpenMPIRBuilderConfig(true, false, false, false, false, false, false)); 7511 OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager; 7512 TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); 7513 InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); 7514 EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo)); 7515 InfoManager.initializeDeviceGlobalVarEntryInfo( 7516 "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0); 7517 InfoManager.registerTargetRegionEntryInfo( 7518 EntryInfo, nullptr, nullptr, 7519 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion); 7520 InfoManager.registerDeviceGlobalVarEntryInfo( 7521 "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 7522 GlobalValue::WeakAnyLinkage); 7523 EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar")); 7524 } 7525 7526 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they 7527 // call each other (recursively in some cases). The test case test these 7528 // functions by utilising them for host code generation for declare target 7529 // global variables 7530 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) { 7531 OpenMPIRBuilder OMPBuilder(*M); 7532 OMPBuilder.initialize(); 7533 OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false); 7534 OMPBuilder.setConfig(Config); 7535 7536 std::vector<llvm::Triple> TargetTriple; 7537 TargetTriple.emplace_back("amdgcn-amd-amdhsa"); 7538 7539 TargetRegionEntryInfo EntryInfo("", 42, 4711, 17); 7540 std::vector<GlobalVariable *> RefsGathered; 7541 7542 std::vector<Constant *> Globals; 7543 auto *IntTy = Type::getInt32Ty(Ctx); 7544 for (int I = 0; I < 2; ++I) { 7545 Globals.push_back(M->getOrInsertGlobal( 7546 "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * { 7547 return new GlobalVariable( 7548 *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage, 7549 ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I)); 7550 })); 7551 } 7552 7553 OMPBuilder.registerTargetGlobalVariable( 7554 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 7555 OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true, 7556 EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple, 7557 nullptr, nullptr, Globals[0]->getType(), Globals[0]); 7558 7559 OMPBuilder.registerTargetGlobalVariable( 7560 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink, 7561 OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true, 7562 EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple, 7563 nullptr, nullptr, Globals[1]->getType(), Globals[1]); 7564 7565 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn = 7566 [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 7567 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 7568 // If this is invoked, then we want to emit an error, even if it is not 7569 // neccesarily the most readable, as something has went wrong. The 7570 // test-suite unfortunately eats up all error output 7571 ASSERT_EQ(Kind, Kind); 7572 }; 7573 7574 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn); 7575 7576 // Clauses for data_int_0 with To + Any clauses for the host 7577 std::vector<GlobalVariable *> OffloadEntries; 7578 OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name")); 7579 OffloadEntries.push_back( 7580 M->getNamedGlobal(".offloading.entry.test_data_int_0")); 7581 7582 // Clauses for data_int_1 with Link + Any clauses for the host 7583 OffloadEntries.push_back( 7584 M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr")); 7585 OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name.1")); 7586 OffloadEntries.push_back( 7587 M->getNamedGlobal(".offloading.entry.test_data_int_1_decl_tgt_ref_ptr")); 7588 7589 for (unsigned I = 0; I < OffloadEntries.size(); ++I) 7590 EXPECT_NE(OffloadEntries[I], nullptr); 7591 7592 // Metadata generated for the host offload module 7593 NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info"); 7594 ASSERT_THAT(OffloadMetadata, testing::NotNull()); 7595 StringRef Nodes[2] = { 7596 cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1)) 7597 ->getString(), 7598 cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1)) 7599 ->getString()}; 7600 EXPECT_THAT( 7601 Nodes, testing::UnorderedElementsAre("test_data_int_0", 7602 "test_data_int_1_decl_tgt_ref_ptr")); 7603 } 7604 7605 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) { 7606 OpenMPIRBuilder OMPBuilder(*M); 7607 OMPBuilder.initialize(); 7608 OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true, 7609 /* IsGPU = */ true, 7610 /* OpenMPOffloadMandatory = */ false, 7611 /* HasRequiresReverseOffload = */ false, 7612 /* HasRequiresUnifiedAddress = */ false, 7613 /* HasRequiresUnifiedSharedMemory = */ false, 7614 /* HasRequiresDynamicAllocators = */ false); 7615 OMPBuilder.setConfig(Config); 7616 7617 FunctionCallee FnTypeAndCallee = 7618 M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx)); 7619 7620 auto *Fn = cast<Function>(FnTypeAndCallee.getCallee()); 7621 OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn, 7622 /* Size = */ 0, 7623 /* Flags = */ 0, GlobalValue::WeakAnyLinkage); 7624 7625 // Check kernel attributes 7626 EXPECT_TRUE(Fn->hasFnAttribute("kernel")); 7627 EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress)); 7628 } 7629 7630 } // namespace 7631