xref: /llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/OpenMP/OMPConstants.h"
10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
12 #include "llvm/IR/BasicBlock.h"
13 #include "llvm/IR/DIBuilder.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/InstIterator.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/Verifier.h"
20 #include "llvm/Passes/PassBuilder.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Testing/Support/Error.h"
23 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
24 #include "gmock/gmock.h"
25 #include "gtest/gtest.h"
26 #include <optional>
27 
28 using namespace llvm;
29 using namespace omp;
30 
31 // Helper that intends to be functionally equivalent to `VarType VarName = Init`
32 // for an `Init` that returns an `Expected<VarType>` value. It produces an error
33 // message and returns if `Init` didn't produce a valid result.
34 #define ASSERT_EXPECTED_INIT(VarType, VarName, Init)                           \
35   auto __Expected##VarName = Init;                                             \
36   ASSERT_THAT_EXPECTED(__Expected##VarName, Succeeded());                      \
37   VarType VarName = *__Expected##VarName
38 
39 // Similar to ASSERT_EXPECTED_INIT, but returns a given expression in case of
40 // error after printing the error message.
41 #define ASSERT_EXPECTED_INIT_RETURN(VarType, VarName, Init, Return)            \
42   auto __Expected##VarName = Init;                                             \
43   EXPECT_THAT_EXPECTED(__Expected##VarName, Succeeded());                      \
44   if (!__Expected##VarName)                                                    \
45     return Return;                                                             \
46   VarType VarName = *__Expected##VarName
47 
48 // Wrapper lambdas to allow using EXPECT*() macros inside of error-returning
49 // callbacks.
50 #define FINICB_WRAPPER(cb)                                                     \
51   [&cb](InsertPointTy IP) -> Error {                                           \
52     cb(IP);                                                                    \
53     return Error::success();                                                   \
54   }
55 
56 #define BODYGENCB_WRAPPER(cb)                                                  \
57   [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error {            \
58     cb(AllocaIP, CodeGenIP);                                                   \
59     return Error::success();                                                   \
60   }
61 
62 #define LOOP_BODYGENCB_WRAPPER(cb)                                             \
63   [&cb](InsertPointTy CodeGenIP, Value *LC) -> Error {                         \
64     cb(CodeGenIP, LC);                                                         \
65     return Error::success();                                                   \
66   }
67 
68 namespace {
69 
70 /// Create an instruction that uses the values in \p Values. We use "printf"
71 /// just because it is often used for this purpose in test code, but it is never
72 /// executed here.
73 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
74                                   ArrayRef<Value *> Values) {
75   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
76 
77   GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
78   Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
79   Constant *Indices[] = {Zero, Zero};
80   Constant *FormatStrConst =
81       ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
82 
83   Function *PrintfDecl = M->getFunction("printf");
84   if (!PrintfDecl) {
85     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
86     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
87     PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
88   }
89 
90   SmallVector<Value *, 4> Args;
91   Args.push_back(FormatStrConst);
92   Args.append(Values.begin(), Values.end());
93   return Builder.CreateCall(PrintfDecl, Args);
94 }
95 
96 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
97 /// order the control flow of \p F.
98 ///
99 /// This is an easy way to verify the branching structure of the CFG without
100 /// checking every branch instruction individually. For the CFG of a
101 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
102 /// the body, i.e. the DFS order corresponds to the execution order with one
103 /// loop iteration.
104 static testing::AssertionResult
105 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
106   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
107   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
108 
109   df_iterator_default_set<BasicBlock *, 16> Visited;
110   auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
111 
112   BasicBlock *Prev = nullptr;
113   for (BasicBlock *BB : DFS) {
114     if (It != E && BB == *It) {
115       Prev = *It;
116       ++It;
117     }
118   }
119 
120   if (It == E)
121     return testing::AssertionSuccess();
122   if (!Prev)
123     return testing::AssertionFailure()
124            << "Did not find " << (*It)->getName() << " in control flow";
125   return testing::AssertionFailure()
126          << "Expected " << Prev->getName() << " before " << (*It)->getName()
127          << " in control flow";
128 }
129 
130 /// Verify that blocks in \p RefOrder are in the same relative order in the
131 /// linked lists of blocks in \p F. The linked list may contain additional
132 /// blocks in-between.
133 ///
134 /// While the order in the linked list is not relevant for semantics, keeping
135 /// the order roughly in execution order makes its printout easier to read.
136 static testing::AssertionResult
137 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
138   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
139   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
140 
141   BasicBlock *Prev = nullptr;
142   for (BasicBlock &BB : *F) {
143     if (It != E && &BB == *It) {
144       Prev = *It;
145       ++It;
146     }
147   }
148 
149   if (It == E)
150     return testing::AssertionSuccess();
151   if (!Prev)
152     return testing::AssertionFailure() << "Did not find " << (*It)->getName()
153                                        << " in function " << F->getName();
154   return testing::AssertionFailure()
155          << "Expected " << Prev->getName() << " before " << (*It)->getName()
156          << " in function " << F->getName();
157 }
158 
159 /// Populate Calls with call instructions calling the function with the given
160 /// FnID from the given function F.
161 static void findCalls(Function *F, omp::RuntimeFunction FnID,
162                       OpenMPIRBuilder &OMPBuilder,
163                       SmallVectorImpl<CallInst *> &Calls) {
164   Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID);
165   for (BasicBlock &BB : *F) {
166     for (Instruction &I : BB) {
167       auto *Call = dyn_cast<CallInst>(&I);
168       if (Call && Call->getCalledFunction() == Fn)
169         Calls.push_back(Call);
170     }
171   }
172 }
173 
174 /// Assuming \p F contains only one call to the function with the given \p FnID,
175 /// return that call.
176 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID,
177                                 OpenMPIRBuilder &OMPBuilder) {
178   SmallVector<CallInst *, 1> Calls;
179   findCalls(F, FnID, OMPBuilder, Calls);
180   EXPECT_EQ(1u, Calls.size());
181   if (Calls.size() != 1)
182     return nullptr;
183   return Calls.front();
184 }
185 
186 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
187   switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
188   case omp::OMPScheduleType::BaseDynamicChunked:
189     return omp::OMP_SCHEDULE_Dynamic;
190   case omp::OMPScheduleType::BaseGuidedChunked:
191     return omp::OMP_SCHEDULE_Guided;
192   case omp::OMPScheduleType::BaseAuto:
193     return omp::OMP_SCHEDULE_Auto;
194   case omp::OMPScheduleType::BaseRuntime:
195     return omp::OMP_SCHEDULE_Runtime;
196   default:
197     llvm_unreachable("unknown type for this test");
198   }
199 }
200 
201 class OpenMPIRBuilderTest : public testing::Test {
202 protected:
203   void SetUp() override {
204     M.reset(new Module("MyModule", Ctx));
205     FunctionType *FTy =
206         FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
207                           /*isVarArg=*/false);
208     F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
209     BB = BasicBlock::Create(Ctx, "", F);
210 
211     DIBuilder DIB(*M);
212     auto File = DIB.createFile("test.dbg", "/src", std::nullopt,
213                                std::optional<StringRef>("/src/test.dbg"));
214     auto CU =
215         DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0);
216     auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray({}));
217     auto SP = DIB.createFunction(
218         CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
219         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
220     F->setSubprogram(SP);
221     auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
222     DIB.finalize();
223     DL = DILocation::get(Ctx, 3, 7, Scope);
224   }
225 
226   void TearDown() override {
227     BB = nullptr;
228     M.reset();
229   }
230 
231   /// Create a function with a simple loop that calls printf using the logical
232   /// loop counter for use with tests that need a CanonicalLoopInfo object.
233   CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL,
234                                              OpenMPIRBuilder &OMPBuilder,
235                                              int UseIVBits,
236                                              CallInst **Call = nullptr,
237                                              BasicBlock **BodyCode = nullptr) {
238     OMPBuilder.initialize();
239     F->setName("func");
240 
241     IRBuilder<> Builder(BB);
242     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
243     Value *TripCount = F->getArg(0);
244 
245     Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits);
246     Value *CastedTripCount =
247         Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount");
248 
249     auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP,
250                              llvm::Value *LC) {
251       Builder.restoreIP(CodeGenIP);
252       if (BodyCode)
253         *BodyCode = Builder.GetInsertBlock();
254 
255       // Add something that consumes the induction variable to the body.
256       CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
257       if (Call)
258         *Call = CallInst;
259 
260       return Error::success();
261     };
262 
263     ASSERT_EXPECTED_INIT_RETURN(
264         CanonicalLoopInfo *, Loop,
265         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount),
266         nullptr);
267 
268     // Finalize the function.
269     Builder.restoreIP(Loop->getAfterIP());
270     Builder.CreateRetVoid();
271 
272     return Loop;
273   }
274 
275   LLVMContext Ctx;
276   std::unique_ptr<Module> M;
277   Function *F;
278   BasicBlock *BB;
279   DebugLoc DL;
280 };
281 
282 class OpenMPIRBuilderTestWithParams
283     : public OpenMPIRBuilderTest,
284       public ::testing::WithParamInterface<omp::OMPScheduleType> {};
285 
286 class OpenMPIRBuilderTestWithIVBits
287     : public OpenMPIRBuilderTest,
288       public ::testing::WithParamInterface<int> {};
289 
290 // Returns the value stored in the given allocation. Returns null if the given
291 // value is not a result of an InstTy instruction, if no value is stored or if
292 // there is more than one store.
293 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
294   Instruction *Inst = dyn_cast<InstTy>(AllocaValue);
295   if (!Inst)
296     return nullptr;
297   StoreInst *Store = nullptr;
298   for (Use &U : Inst->uses()) {
299     if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
300       EXPECT_EQ(Store, nullptr);
301       Store = CandidateStore;
302     }
303   }
304   if (!Store)
305     return nullptr;
306   return Store->getValueOperand();
307 }
308 
309 // Returns the value stored in the aggregate argument of an outlined function,
310 // or nullptr if it is not found.
311 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate,
312                                            unsigned Idx) {
313   GetElementPtrInst *GEPAtIdx = nullptr;
314   // Find GEP instruction at that index.
315   for (User *Usr : Aggregate->users()) {
316     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr);
317     if (!GEP)
318       continue;
319 
320     if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx))
321       continue;
322 
323     EXPECT_EQ(GEPAtIdx, nullptr);
324     GEPAtIdx = GEP;
325   }
326 
327   EXPECT_NE(GEPAtIdx, nullptr);
328   EXPECT_EQ(GEPAtIdx->getNumUses(), 1U);
329 
330   // Find the value stored to the aggregate.
331   StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin());
332   Value *StoredAggValue = StoreToAgg->getValueOperand();
333 
334   Value *StoredValue = nullptr;
335 
336   // Find the value stored to the value stored in the aggregate.
337   for (User *Usr : StoredAggValue->users()) {
338     StoreInst *Store = dyn_cast<StoreInst>(Usr);
339     if (!Store)
340       continue;
341 
342     if (Store->getPointerOperand() != StoredAggValue)
343       continue;
344 
345     EXPECT_EQ(StoredValue, nullptr);
346     StoredValue = Store->getValueOperand();
347   }
348 
349   return StoredValue;
350 }
351 
352 // Returns the aggregate that the value is originating from.
353 static Value *findAggregateFromValue(Value *V) {
354   // Expects a load instruction that loads from the aggregate.
355   LoadInst *Load = dyn_cast<LoadInst>(V);
356   EXPECT_NE(Load, nullptr);
357   // Find the GEP instruction used in the load instruction.
358   GetElementPtrInst *GEP =
359       dyn_cast<GetElementPtrInst>(Load->getPointerOperand());
360   EXPECT_NE(GEP, nullptr);
361   // Find the aggregate used in the GEP instruction.
362   Value *Aggregate = GEP->getPointerOperand();
363 
364   return Aggregate;
365 }
366 
367 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
368   OpenMPIRBuilder OMPBuilder(*M);
369   OMPBuilder.initialize();
370 
371   IRBuilder<> Builder(BB);
372 
373   ASSERT_THAT_EXPECTED(
374       OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for),
375       Succeeded());
376   EXPECT_TRUE(M->global_empty());
377   EXPECT_EQ(M->size(), 1U);
378   EXPECT_EQ(F->size(), 1U);
379   EXPECT_EQ(BB->size(), 0U);
380 
381   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
382   ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded());
383   EXPECT_FALSE(M->global_empty());
384   EXPECT_EQ(M->size(), 3U);
385   EXPECT_EQ(F->size(), 1U);
386   EXPECT_EQ(BB->size(), 2U);
387 
388   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
389   EXPECT_NE(GTID, nullptr);
390   EXPECT_EQ(GTID->arg_size(), 1U);
391   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
392   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
393   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
394 
395   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
396   EXPECT_NE(Barrier, nullptr);
397   EXPECT_EQ(Barrier->arg_size(), 2U);
398   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier");
399   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
400   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
401 
402   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
403 
404   Builder.CreateUnreachable();
405   EXPECT_FALSE(verifyModule(*M, &errs()));
406 }
407 
408 TEST_F(OpenMPIRBuilderTest, CreateCancel) {
409   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
410   OpenMPIRBuilder OMPBuilder(*M);
411   OMPBuilder.initialize();
412 
413   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
414   new UnreachableInst(Ctx, CBB);
415   auto FiniCB = [&](InsertPointTy IP) {
416     ASSERT_NE(IP.getBlock(), nullptr);
417     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
418     BranchInst::Create(CBB, IP.getBlock());
419   };
420   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
421 
422   IRBuilder<> Builder(BB);
423 
424   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
425   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP,
426                        OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel));
427   Builder.restoreIP(NewIP);
428   EXPECT_FALSE(M->global_empty());
429   EXPECT_EQ(M->size(), 4U);
430   EXPECT_EQ(F->size(), 4U);
431   EXPECT_EQ(BB->size(), 4U);
432 
433   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
434   EXPECT_NE(GTID, nullptr);
435   EXPECT_EQ(GTID->arg_size(), 1U);
436   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
437   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
438   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
439 
440   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
441   EXPECT_NE(Cancel, nullptr);
442   EXPECT_EQ(Cancel->arg_size(), 3U);
443   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
444   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
445   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
446   EXPECT_EQ(Cancel->getNumUses(), 1U);
447   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
448   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
449   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
450   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
451   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
452   EXPECT_NE(GTID1, nullptr);
453   EXPECT_EQ(GTID1->arg_size(), 1U);
454   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
455   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
456   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
457   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
458   EXPECT_NE(Barrier, nullptr);
459   EXPECT_EQ(Barrier->arg_size(), 2U);
460   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
461   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
462   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
463   EXPECT_EQ(Barrier->getNumUses(), 0U);
464   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
465             1U);
466   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
467 
468   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
469 
470   OMPBuilder.popFinalizationCB();
471 
472   Builder.CreateUnreachable();
473   EXPECT_FALSE(verifyModule(*M, &errs()));
474 }
475 
476 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
477   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
478   OpenMPIRBuilder OMPBuilder(*M);
479   OMPBuilder.initialize();
480 
481   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
482   new UnreachableInst(Ctx, CBB);
483   auto FiniCB = [&](InsertPointTy IP) {
484     ASSERT_NE(IP.getBlock(), nullptr);
485     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
486     BranchInst::Create(CBB, IP.getBlock());
487   };
488   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
489 
490   IRBuilder<> Builder(BB);
491 
492   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
493   ASSERT_EXPECTED_INIT(
494       OpenMPIRBuilder::InsertPointTy, NewIP,
495       OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel));
496   Builder.restoreIP(NewIP);
497   EXPECT_FALSE(M->global_empty());
498   EXPECT_EQ(M->size(), 4U);
499   EXPECT_EQ(F->size(), 7U);
500   EXPECT_EQ(BB->size(), 1U);
501   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
502   ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U);
503   BB = BB->getTerminator()->getSuccessor(0);
504   EXPECT_EQ(BB->size(), 4U);
505 
506   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
507   EXPECT_NE(GTID, nullptr);
508   EXPECT_EQ(GTID->arg_size(), 1U);
509   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
510   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
511   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
512 
513   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
514   EXPECT_NE(Cancel, nullptr);
515   EXPECT_EQ(Cancel->arg_size(), 3U);
516   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
517   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
518   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
519   EXPECT_EQ(Cancel->getNumUses(), 1U);
520   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
521   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
522   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
523   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
524             NewIP.getBlock());
525   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
526   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
527   EXPECT_NE(GTID1, nullptr);
528   EXPECT_EQ(GTID1->arg_size(), 1U);
529   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
530   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
531   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
532   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
533   EXPECT_NE(Barrier, nullptr);
534   EXPECT_EQ(Barrier->arg_size(), 2U);
535   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
536   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
537   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
538   EXPECT_EQ(Barrier->getNumUses(), 0U);
539   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
540             1U);
541   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
542 
543   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
544 
545   OMPBuilder.popFinalizationCB();
546 
547   Builder.CreateUnreachable();
548   EXPECT_FALSE(verifyModule(*M, &errs()));
549 }
550 
551 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
552   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
553   OpenMPIRBuilder OMPBuilder(*M);
554   OMPBuilder.initialize();
555 
556   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
557   new UnreachableInst(Ctx, CBB);
558   auto FiniCB = [&](InsertPointTy IP) {
559     ASSERT_NE(IP.getBlock(), nullptr);
560     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
561     BranchInst::Create(CBB, IP.getBlock());
562   };
563   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
564 
565   IRBuilder<> Builder(BB);
566 
567   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
568   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP,
569                        OMPBuilder.createBarrier(Loc, OMPD_for));
570   Builder.restoreIP(NewIP);
571   EXPECT_FALSE(M->global_empty());
572   EXPECT_EQ(M->size(), 3U);
573   EXPECT_EQ(F->size(), 4U);
574   EXPECT_EQ(BB->size(), 4U);
575 
576   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
577   EXPECT_NE(GTID, nullptr);
578   EXPECT_EQ(GTID->arg_size(), 1U);
579   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
580   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
581   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
582 
583   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
584   EXPECT_NE(Barrier, nullptr);
585   EXPECT_EQ(Barrier->arg_size(), 2U);
586   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
587   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
588   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
589   EXPECT_EQ(Barrier->getNumUses(), 1U);
590   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
591   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
592   EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
593   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
594   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
595             1U);
596   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
597             CBB);
598 
599   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
600 
601   OMPBuilder.popFinalizationCB();
602 
603   Builder.CreateUnreachable();
604   EXPECT_FALSE(verifyModule(*M, &errs()));
605 }
606 
607 TEST_F(OpenMPIRBuilderTest, DbgLoc) {
608   OpenMPIRBuilder OMPBuilder(*M);
609   OMPBuilder.initialize();
610   F->setName("func");
611 
612   IRBuilder<> Builder(BB);
613 
614   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
615   ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded());
616   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
617   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
618   EXPECT_EQ(GTID->getDebugLoc(), DL);
619   EXPECT_EQ(Barrier->getDebugLoc(), DL);
620   EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0)));
621   if (!isa<GlobalVariable>(Barrier->getOperand(0)))
622     return;
623   GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0));
624   EXPECT_TRUE(Ident->hasInitializer());
625   if (!Ident->hasInitializer())
626     return;
627   Constant *Initializer = Ident->getInitializer();
628   EXPECT_TRUE(
629       isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()));
630   GlobalVariable *SrcStrGlob =
631       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
632   if (!SrcStrGlob)
633     return;
634   EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer()));
635   ConstantDataArray *SrcSrc =
636       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
637   if (!SrcSrc)
638     return;
639   EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;");
640 }
641 
642 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
643   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
644   std::string oldDLStr = M->getDataLayoutStr();
645   M->setDataLayout(
646       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
647       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
648       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
649   OpenMPIRBuilder OMPBuilder(*M);
650   OMPBuilder.Config.IsTargetDevice = true;
651   OMPBuilder.initialize();
652   F->setName("func");
653   IRBuilder<> Builder(BB);
654   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
655   Builder.CreateBr(EnterBB);
656   Builder.SetInsertPoint(EnterBB);
657   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
658 
659   AllocaInst *PrivAI = nullptr;
660 
661   unsigned NumBodiesGenerated = 0;
662   unsigned NumPrivatizedVars = 0;
663   unsigned NumFinalizationPoints = 0;
664 
665   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
666     ++NumBodiesGenerated;
667 
668     Builder.restoreIP(AllocaIP);
669     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
670     Builder.CreateStore(F->arg_begin(), PrivAI);
671 
672     Builder.restoreIP(CodeGenIP);
673     Value *PrivLoad =
674         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
675     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
676     Instruction *ThenTerm, *ElseTerm;
677     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
678                                   &ThenTerm, &ElseTerm);
679     return Error::success();
680   };
681 
682   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
683                     Value &Orig, Value &Inner,
684                     Value *&ReplacementValue) -> InsertPointTy {
685     ++NumPrivatizedVars;
686 
687     if (!isa<AllocaInst>(Orig)) {
688       EXPECT_EQ(&Orig, F->arg_begin());
689       ReplacementValue = &Inner;
690       return CodeGenIP;
691     }
692 
693     // Since the original value is an allocation, it has a pointer type and
694     // therefore no additional wrapping should happen.
695     EXPECT_EQ(&Orig, &Inner);
696 
697     // Trivial copy (=firstprivate).
698     Builder.restoreIP(AllocaIP);
699     Type *VTy = ReplacementValue->getType();
700     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
701     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
702     Builder.restoreIP(CodeGenIP);
703     Builder.CreateStore(V, ReplacementValue);
704     return CodeGenIP;
705   };
706 
707   auto FiniCB = [&](InsertPointTy CodeGenIP) {
708     ++NumFinalizationPoints;
709     return Error::success();
710   };
711 
712   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
713                                     F->getEntryBlock().getFirstInsertionPt());
714   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
715                        OMPBuilder.createParallel(
716                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
717                            nullptr, OMP_PROC_BIND_default, false));
718 
719   EXPECT_EQ(NumBodiesGenerated, 1U);
720   EXPECT_EQ(NumPrivatizedVars, 1U);
721   EXPECT_EQ(NumFinalizationPoints, 1U);
722 
723   Builder.restoreIP(AfterIP);
724   Builder.CreateRetVoid();
725 
726   OMPBuilder.finalize();
727   Function *OutlinedFn = PrivAI->getFunction();
728   EXPECT_FALSE(verifyModule(*M, &errs()));
729   EXPECT_NE(OutlinedFn, F);
730   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
731   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
732   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
733 
734   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
735   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
736   // Make sure that arguments are pointers in 0 address address space
737   EXPECT_EQ(OutlinedFn->getArg(0)->getType(),
738             PointerType::get(M->getContext(), 0));
739   EXPECT_EQ(OutlinedFn->getArg(1)->getType(),
740             PointerType::get(M->getContext(), 0));
741   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
742             PointerType::get(M->getContext(), 0));
743   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
744   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
745   User *Usr = OutlinedFn->user_back();
746   ASSERT_TRUE(isa<CallInst>(Usr));
747   CallInst *Parallel51CI = dyn_cast<CallInst>(Usr);
748   ASSERT_NE(Parallel51CI, nullptr);
749 
750   EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51");
751   EXPECT_EQ(Parallel51CI->arg_size(), 9U);
752   EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn);
753   EXPECT_TRUE(
754       isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts()));
755   EXPECT_EQ(Parallel51CI, Usr);
756   M->setDataLayout(oldDLStr);
757 }
758 
759 TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
760   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
761   OpenMPIRBuilder OMPBuilder(*M);
762   OMPBuilder.Config.IsTargetDevice = false;
763   OMPBuilder.initialize();
764   F->setName("func");
765   IRBuilder<> Builder(BB);
766 
767   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
768   Builder.CreateBr(EnterBB);
769   Builder.SetInsertPoint(EnterBB);
770   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
771 
772   AllocaInst *PrivAI = nullptr;
773 
774   unsigned NumBodiesGenerated = 0;
775   unsigned NumPrivatizedVars = 0;
776   unsigned NumFinalizationPoints = 0;
777 
778   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
779     ++NumBodiesGenerated;
780 
781     Builder.restoreIP(AllocaIP);
782     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
783     Builder.CreateStore(F->arg_begin(), PrivAI);
784 
785     Builder.restoreIP(CodeGenIP);
786     Value *PrivLoad =
787         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
788     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
789     Instruction *ThenTerm, *ElseTerm;
790     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
791                                   &ThenTerm, &ElseTerm);
792     return Error::success();
793   };
794 
795   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
796                     Value &Orig, Value &Inner,
797                     Value *&ReplacementValue) -> InsertPointTy {
798     ++NumPrivatizedVars;
799 
800     if (!isa<AllocaInst>(Orig)) {
801       EXPECT_EQ(&Orig, F->arg_begin());
802       ReplacementValue = &Inner;
803       return CodeGenIP;
804     }
805 
806     // Since the original value is an allocation, it has a pointer type and
807     // therefore no additional wrapping should happen.
808     EXPECT_EQ(&Orig, &Inner);
809 
810     // Trivial copy (=firstprivate).
811     Builder.restoreIP(AllocaIP);
812     Type *VTy = ReplacementValue->getType();
813     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
814     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
815     Builder.restoreIP(CodeGenIP);
816     Builder.CreateStore(V, ReplacementValue);
817     return CodeGenIP;
818   };
819 
820   auto FiniCB = [&](InsertPointTy CodeGenIP) {
821     ++NumFinalizationPoints;
822     return Error::success();
823   };
824 
825   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
826                                     F->getEntryBlock().getFirstInsertionPt());
827   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
828                        OMPBuilder.createParallel(
829                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
830                            nullptr, OMP_PROC_BIND_default, false));
831   EXPECT_EQ(NumBodiesGenerated, 1U);
832   EXPECT_EQ(NumPrivatizedVars, 1U);
833   EXPECT_EQ(NumFinalizationPoints, 1U);
834 
835   Builder.restoreIP(AfterIP);
836   Builder.CreateRetVoid();
837 
838   OMPBuilder.finalize();
839 
840   EXPECT_NE(PrivAI, nullptr);
841   Function *OutlinedFn = PrivAI->getFunction();
842   EXPECT_NE(F, OutlinedFn);
843   EXPECT_FALSE(verifyModule(*M, &errs()));
844   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
845   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
846   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
847 
848   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
849   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
850 
851   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
852   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
853   User *Usr = OutlinedFn->user_back();
854   ASSERT_TRUE(isa<CallInst>(Usr));
855   CallInst *ForkCI = dyn_cast<CallInst>(Usr);
856   ASSERT_NE(ForkCI, nullptr);
857 
858   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
859   EXPECT_EQ(ForkCI->arg_size(), 4U);
860   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
861   EXPECT_EQ(ForkCI->getArgOperand(1),
862             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
863   EXPECT_EQ(ForkCI, Usr);
864   Value *StoredValue =
865       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
866   EXPECT_EQ(StoredValue, F->arg_begin());
867 }
868 
869 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
870   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
871   OpenMPIRBuilder OMPBuilder(*M);
872   OMPBuilder.Config.IsTargetDevice = false;
873   OMPBuilder.initialize();
874   F->setName("func");
875   IRBuilder<> Builder(BB);
876 
877   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
878   Builder.CreateBr(EnterBB);
879   Builder.SetInsertPoint(EnterBB);
880   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
881 
882   unsigned NumInnerBodiesGenerated = 0;
883   unsigned NumOuterBodiesGenerated = 0;
884   unsigned NumFinalizationPoints = 0;
885 
886   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
887     ++NumInnerBodiesGenerated;
888     return Error::success();
889   };
890 
891   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
892                     Value &Orig, Value &Inner,
893                     Value *&ReplacementValue) -> InsertPointTy {
894     // Trivial copy (=firstprivate).
895     Builder.restoreIP(AllocaIP);
896     Type *VTy = ReplacementValue->getType();
897     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
898     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
899     Builder.restoreIP(CodeGenIP);
900     Builder.CreateStore(V, ReplacementValue);
901     return CodeGenIP;
902   };
903 
904   auto FiniCB = [&](InsertPointTy CodeGenIP) {
905     ++NumFinalizationPoints;
906     return Error::success();
907   };
908 
909   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
910     ++NumOuterBodiesGenerated;
911     Builder.restoreIP(CodeGenIP);
912     BasicBlock *CGBB = CodeGenIP.getBlock();
913     BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
914     CGBB->getTerminator()->eraseFromParent();
915 
916     ASSERT_EXPECTED_INIT(
917         OpenMPIRBuilder::InsertPointTy, AfterIP,
918         OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
919                                   InnerBodyGenCB, PrivCB, FiniCB, nullptr,
920                                   nullptr, OMP_PROC_BIND_default, false));
921 
922     Builder.restoreIP(AfterIP);
923     Builder.CreateBr(NewBB);
924   };
925 
926   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
927                                     F->getEntryBlock().getFirstInsertionPt());
928   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
929                        OMPBuilder.createParallel(
930                            Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
931                            PrivCB, FiniCB, nullptr, nullptr,
932                            OMP_PROC_BIND_default, false));
933 
934   EXPECT_EQ(NumInnerBodiesGenerated, 1U);
935   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
936   EXPECT_EQ(NumFinalizationPoints, 2U);
937 
938   Builder.restoreIP(AfterIP);
939   Builder.CreateRetVoid();
940 
941   OMPBuilder.finalize();
942 
943   EXPECT_EQ(M->size(), 5U);
944   for (Function &OutlinedFn : *M) {
945     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
946       continue;
947     EXPECT_FALSE(verifyModule(*M, &errs()));
948     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
949     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
950     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
951 
952     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
953     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
954 
955     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
956     User *Usr = OutlinedFn.user_back();
957     ASSERT_TRUE(isa<CallInst>(Usr));
958     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
959     ASSERT_NE(ForkCI, nullptr);
960 
961     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
962     EXPECT_EQ(ForkCI->arg_size(), 3U);
963     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
964     EXPECT_EQ(ForkCI->getArgOperand(1),
965               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
966     EXPECT_EQ(ForkCI, Usr);
967   }
968 }
969 
970 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
971   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
972   OpenMPIRBuilder OMPBuilder(*M);
973   OMPBuilder.Config.IsTargetDevice = false;
974   OMPBuilder.initialize();
975   F->setName("func");
976   IRBuilder<> Builder(BB);
977 
978   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
979   Builder.CreateBr(EnterBB);
980   Builder.SetInsertPoint(EnterBB);
981   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
982 
983   unsigned NumInnerBodiesGenerated = 0;
984   unsigned NumOuterBodiesGenerated = 0;
985   unsigned NumFinalizationPoints = 0;
986 
987   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
988     ++NumInnerBodiesGenerated;
989     return Error::success();
990   };
991 
992   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
993                     Value &Orig, Value &Inner,
994                     Value *&ReplacementValue) -> InsertPointTy {
995     // Trivial copy (=firstprivate).
996     Builder.restoreIP(AllocaIP);
997     Type *VTy = ReplacementValue->getType();
998     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
999     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
1000     Builder.restoreIP(CodeGenIP);
1001     Builder.CreateStore(V, ReplacementValue);
1002     return CodeGenIP;
1003   };
1004 
1005   auto FiniCB = [&](InsertPointTy CodeGenIP) {
1006     ++NumFinalizationPoints;
1007     return Error::success();
1008   };
1009 
1010   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1011     ++NumOuterBodiesGenerated;
1012     Builder.restoreIP(CodeGenIP);
1013     BasicBlock *CGBB = CodeGenIP.getBlock();
1014     BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
1015     BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
1016     CGBB->getTerminator()->eraseFromParent();
1017     ;
1018     NewBB1->getTerminator()->eraseFromParent();
1019     ;
1020 
1021     ASSERT_EXPECTED_INIT(
1022         OpenMPIRBuilder::InsertPointTy, AfterIP1,
1023         OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
1024                                   InnerBodyGenCB, PrivCB, FiniCB, nullptr,
1025                                   nullptr, OMP_PROC_BIND_default, false));
1026 
1027     Builder.restoreIP(AfterIP1);
1028     Builder.CreateBr(NewBB1);
1029 
1030     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2,
1031                          OMPBuilder.createParallel(
1032                              InsertPointTy(NewBB1, NewBB1->end()), AllocaIP,
1033                              InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
1034                              OMP_PROC_BIND_default, false));
1035 
1036     Builder.restoreIP(AfterIP2);
1037     Builder.CreateBr(NewBB2);
1038   };
1039 
1040   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1041                                     F->getEntryBlock().getFirstInsertionPt());
1042   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1043                        OMPBuilder.createParallel(
1044                            Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
1045                            PrivCB, FiniCB, nullptr, nullptr,
1046                            OMP_PROC_BIND_default, false));
1047 
1048   EXPECT_EQ(NumInnerBodiesGenerated, 2U);
1049   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
1050   EXPECT_EQ(NumFinalizationPoints, 3U);
1051 
1052   Builder.restoreIP(AfterIP);
1053   Builder.CreateRetVoid();
1054 
1055   OMPBuilder.finalize();
1056 
1057   EXPECT_EQ(M->size(), 6U);
1058   for (Function &OutlinedFn : *M) {
1059     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
1060       continue;
1061     EXPECT_FALSE(verifyModule(*M, &errs()));
1062     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
1063     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
1064     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
1065 
1066     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
1067     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
1068 
1069     unsigned NumAllocas = 0;
1070     for (Instruction &I : instructions(OutlinedFn))
1071       NumAllocas += isa<AllocaInst>(I);
1072     EXPECT_EQ(NumAllocas, 1U);
1073 
1074     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
1075     User *Usr = OutlinedFn.user_back();
1076     ASSERT_TRUE(isa<CallInst>(Usr));
1077     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
1078     ASSERT_NE(ForkCI, nullptr);
1079 
1080     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
1081     EXPECT_EQ(ForkCI->arg_size(), 3U);
1082     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1083     EXPECT_EQ(ForkCI->getArgOperand(1),
1084               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
1085     EXPECT_EQ(ForkCI, Usr);
1086   }
1087 }
1088 
1089 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
1090   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1091   OpenMPIRBuilder OMPBuilder(*M);
1092   OMPBuilder.Config.IsTargetDevice = false;
1093   OMPBuilder.initialize();
1094   F->setName("func");
1095   IRBuilder<> Builder(BB);
1096 
1097   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1098   Builder.CreateBr(EnterBB);
1099   Builder.SetInsertPoint(EnterBB);
1100   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1101 
1102   AllocaInst *PrivAI = nullptr;
1103 
1104   unsigned NumBodiesGenerated = 0;
1105   unsigned NumPrivatizedVars = 0;
1106   unsigned NumFinalizationPoints = 0;
1107 
1108   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1109     ++NumBodiesGenerated;
1110 
1111     Builder.restoreIP(AllocaIP);
1112     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
1113     Builder.CreateStore(F->arg_begin(), PrivAI);
1114 
1115     Builder.restoreIP(CodeGenIP);
1116     Value *PrivLoad =
1117         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
1118     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
1119     Instruction *ThenTerm, *ElseTerm;
1120     SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm,
1121                                   &ElseTerm);
1122     return Error::success();
1123   };
1124 
1125   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1126                     Value &Orig, Value &Inner,
1127                     Value *&ReplacementValue) -> InsertPointTy {
1128     ++NumPrivatizedVars;
1129 
1130     if (!isa<AllocaInst>(Orig)) {
1131       EXPECT_EQ(&Orig, F->arg_begin());
1132       ReplacementValue = &Inner;
1133       return CodeGenIP;
1134     }
1135 
1136     // Since the original value is an allocation, it has a pointer type and
1137     // therefore no additional wrapping should happen.
1138     EXPECT_EQ(&Orig, &Inner);
1139 
1140     // Trivial copy (=firstprivate).
1141     Builder.restoreIP(AllocaIP);
1142     Type *VTy = ReplacementValue->getType();
1143     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
1144     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
1145     Builder.restoreIP(CodeGenIP);
1146     Builder.CreateStore(V, ReplacementValue);
1147     return CodeGenIP;
1148   };
1149 
1150   auto FiniCB = [&](InsertPointTy CodeGenIP) {
1151     ++NumFinalizationPoints;
1152     // No destructors.
1153     return Error::success();
1154   };
1155 
1156   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1157                                     F->getEntryBlock().getFirstInsertionPt());
1158   ASSERT_EXPECTED_INIT(
1159       OpenMPIRBuilder::InsertPointTy, AfterIP,
1160       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1161                                 Builder.CreateIsNotNull(F->arg_begin()),
1162                                 nullptr, OMP_PROC_BIND_default, false));
1163 
1164   EXPECT_EQ(NumBodiesGenerated, 1U);
1165   EXPECT_EQ(NumPrivatizedVars, 1U);
1166   EXPECT_EQ(NumFinalizationPoints, 1U);
1167 
1168   Builder.restoreIP(AfterIP);
1169   Builder.CreateRetVoid();
1170   OMPBuilder.finalize();
1171 
1172   EXPECT_NE(PrivAI, nullptr);
1173   Function *OutlinedFn = PrivAI->getFunction();
1174   EXPECT_NE(F, OutlinedFn);
1175   EXPECT_FALSE(verifyModule(*M, &errs()));
1176 
1177   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
1178   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
1179 
1180   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
1181   ASSERT_EQ(OutlinedFn->getNumUses(), 1U);
1182 
1183   CallInst *ForkCI = nullptr;
1184   for (User *Usr : OutlinedFn->users()) {
1185     ASSERT_TRUE(isa<CallInst>(Usr));
1186     ForkCI = cast<CallInst>(Usr);
1187   }
1188 
1189   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if");
1190   EXPECT_EQ(ForkCI->arg_size(), 5U);
1191   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1192   EXPECT_EQ(ForkCI->getArgOperand(1),
1193             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
1194   EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx));
1195 }
1196 
1197 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
1198   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1199   OpenMPIRBuilder OMPBuilder(*M);
1200   OMPBuilder.Config.IsTargetDevice = false;
1201   OMPBuilder.initialize();
1202   F->setName("func");
1203   IRBuilder<> Builder(BB);
1204 
1205   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1206   Builder.CreateBr(EnterBB);
1207   Builder.SetInsertPoint(EnterBB);
1208   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1209 
1210   unsigned NumBodiesGenerated = 0;
1211   unsigned NumPrivatizedVars = 0;
1212   unsigned NumFinalizationPoints = 0;
1213 
1214   CallInst *CheckedBarrier = nullptr;
1215   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1216     ++NumBodiesGenerated;
1217 
1218     Builder.restoreIP(CodeGenIP);
1219 
1220     // Create three barriers, two cancel barriers but only one checked.
1221     Function *CBFn, *BFn;
1222 
1223     ASSERT_EXPECTED_INIT(
1224         OpenMPIRBuilder::InsertPointTy, BarrierIP1,
1225         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel));
1226     Builder.restoreIP(BarrierIP1);
1227 
1228     CBFn = M->getFunction("__kmpc_cancel_barrier");
1229     BFn = M->getFunction("__kmpc_barrier");
1230     ASSERT_NE(CBFn, nullptr);
1231     ASSERT_EQ(BFn, nullptr);
1232     ASSERT_EQ(CBFn->getNumUses(), 1U);
1233     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1234     ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
1235     CheckedBarrier = cast<CallInst>(CBFn->user_back());
1236 
1237     ASSERT_EXPECTED_INIT(
1238         OpenMPIRBuilder::InsertPointTy, BarrierIP2,
1239         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true));
1240     Builder.restoreIP(BarrierIP2);
1241     CBFn = M->getFunction("__kmpc_cancel_barrier");
1242     BFn = M->getFunction("__kmpc_barrier");
1243     ASSERT_NE(CBFn, nullptr);
1244     ASSERT_NE(BFn, nullptr);
1245     ASSERT_EQ(CBFn->getNumUses(), 1U);
1246     ASSERT_EQ(BFn->getNumUses(), 1U);
1247     ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
1248     ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
1249 
1250     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, BarrierIP3,
1251                          OMPBuilder.createBarrier(Builder.saveIP(),
1252                                                   OMPD_parallel, false, false));
1253     Builder.restoreIP(BarrierIP3);
1254     ASSERT_EQ(CBFn->getNumUses(), 2U);
1255     ASSERT_EQ(BFn->getNumUses(), 1U);
1256     ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
1257     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1258     ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
1259   };
1260 
1261   auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
1262                     Value *&) -> InsertPointTy {
1263     ++NumPrivatizedVars;
1264     llvm_unreachable("No privatization callback call expected!");
1265   };
1266 
1267   FunctionType *FakeDestructorTy =
1268       FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
1269                         /*isVarArg=*/false);
1270   auto *FakeDestructor = Function::Create(
1271       FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
1272 
1273   auto FiniCB = [&](InsertPointTy IP) {
1274     ++NumFinalizationPoints;
1275     Builder.restoreIP(IP);
1276     Builder.CreateCall(FakeDestructor,
1277                        {Builder.getInt32(NumFinalizationPoints)});
1278     return Error::success();
1279   };
1280 
1281   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1282                                     F->getEntryBlock().getFirstInsertionPt());
1283   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1284                        OMPBuilder.createParallel(
1285                            Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB,
1286                            FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
1287                            nullptr, OMP_PROC_BIND_default, true));
1288 
1289   EXPECT_EQ(NumBodiesGenerated, 1U);
1290   EXPECT_EQ(NumPrivatizedVars, 0U);
1291   EXPECT_EQ(NumFinalizationPoints, 2U);
1292   EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
1293 
1294   Builder.restoreIP(AfterIP);
1295   Builder.CreateRetVoid();
1296   OMPBuilder.finalize();
1297 
1298   EXPECT_FALSE(verifyModule(*M, &errs()));
1299 
1300   BasicBlock *ExitBB = nullptr;
1301   for (const User *Usr : FakeDestructor->users()) {
1302     const CallInst *CI = dyn_cast<CallInst>(Usr);
1303     ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
1304     ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
1305     ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
1306     if (ExitBB)
1307       ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
1308     else
1309       ExitBB = CI->getNextNode()->getSuccessor(0);
1310     ASSERT_EQ(ExitBB->size(), 1U);
1311     if (!isa<ReturnInst>(ExitBB->front())) {
1312       ASSERT_TRUE(isa<BranchInst>(ExitBB->front()));
1313       ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U);
1314       ASSERT_TRUE(isa<ReturnInst>(
1315           cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front()));
1316     }
1317   }
1318 }
1319 
1320 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
1321   OpenMPIRBuilder OMPBuilder(*M);
1322   OMPBuilder.Config.IsTargetDevice = false;
1323   OMPBuilder.initialize();
1324   F->setName("func");
1325   IRBuilder<> Builder(BB);
1326   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1327   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1328 
1329   Type *I32Ty = Type::getInt32Ty(M->getContext());
1330   Type *PtrTy = PointerType::get(M->getContext(), 0);
1331   Type *StructTy = StructType::get(I32Ty, PtrTy);
1332   Type *VoidTy = Type::getVoidTy(M->getContext());
1333   FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
1334   FunctionCallee TakeI32Func =
1335       M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
1336   FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy);
1337   FunctionCallee TakeI32PtrFunc =
1338       M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy);
1339   FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
1340   FunctionCallee TakeStructFunc =
1341       M->getOrInsertFunction("take_struct", VoidTy, StructTy);
1342   FunctionCallee RetStructPtrFunc =
1343       M->getOrInsertFunction("ret_structptr", PtrTy);
1344   FunctionCallee TakeStructPtrFunc =
1345       M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy);
1346   Value *I32Val = Builder.CreateCall(RetI32Func);
1347   Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
1348   Value *StructVal = Builder.CreateCall(RetStructFunc);
1349   Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
1350 
1351   Instruction *Internal;
1352   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1353     IRBuilder<>::InsertPointGuard Guard(Builder);
1354     Builder.restoreIP(CodeGenIP);
1355     Internal = Builder.CreateCall(TakeI32Func, I32Val);
1356     Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
1357     Builder.CreateCall(TakeStructFunc, StructVal);
1358     Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
1359     return Error::success();
1360   };
1361   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1362                     Value &Inner, Value *&ReplacementValue) {
1363     ReplacementValue = &Inner;
1364     return CodeGenIP;
1365   };
1366   auto FiniCB = [](InsertPointTy) { return Error::success(); };
1367 
1368   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1369                                     F->getEntryBlock().getFirstInsertionPt());
1370   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1371                        OMPBuilder.createParallel(
1372                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
1373                            nullptr, OMP_PROC_BIND_default, false));
1374   Builder.restoreIP(AfterIP);
1375   Builder.CreateRetVoid();
1376 
1377   OMPBuilder.finalize();
1378 
1379   EXPECT_FALSE(verifyModule(*M, &errs()));
1380   Function *OutlinedFn = Internal->getFunction();
1381 
1382   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
1383   EXPECT_TRUE(Arg2Type->isPointerTy());
1384 }
1385 
1386 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
1387   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1388   OpenMPIRBuilder OMPBuilder(*M);
1389   OMPBuilder.initialize();
1390   IRBuilder<> Builder(BB);
1391   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1392   Value *TripCount = F->getArg(0);
1393 
1394   unsigned NumBodiesGenerated = 0;
1395   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1396     NumBodiesGenerated += 1;
1397 
1398     Builder.restoreIP(CodeGenIP);
1399 
1400     Value *Cmp = Builder.CreateICmpEQ(LC, TripCount);
1401     Instruction *ThenTerm, *ElseTerm;
1402     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
1403                                   &ThenTerm, &ElseTerm);
1404     return Error::success();
1405   };
1406 
1407   ASSERT_EXPECTED_INIT(
1408       CanonicalLoopInfo *, Loop,
1409       OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount));
1410 
1411   Builder.restoreIP(Loop->getAfterIP());
1412   ReturnInst *RetInst = Builder.CreateRetVoid();
1413   OMPBuilder.finalize();
1414 
1415   Loop->assertOK();
1416   EXPECT_FALSE(verifyModule(*M, &errs()));
1417 
1418   EXPECT_EQ(NumBodiesGenerated, 1U);
1419 
1420   // Verify control flow structure (in addition to Loop->assertOK()).
1421   EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock());
1422   EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock());
1423 
1424   Instruction *IndVar = Loop->getIndVar();
1425   EXPECT_TRUE(isa<PHINode>(IndVar));
1426   EXPECT_EQ(IndVar->getType(), TripCount->getType());
1427   EXPECT_EQ(IndVar->getParent(), Loop->getHeader());
1428 
1429   EXPECT_EQ(Loop->getTripCount(), TripCount);
1430 
1431   BasicBlock *Body = Loop->getBody();
1432   Instruction *CmpInst = &Body->front();
1433   EXPECT_TRUE(isa<ICmpInst>(CmpInst));
1434   EXPECT_EQ(CmpInst->getOperand(0), IndVar);
1435 
1436   BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor();
1437   EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) {
1438     return SuccBB->getSingleSuccessor() == LatchPred;
1439   }));
1440 
1441   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
1442 }
1443 
1444 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
1445   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1446   OpenMPIRBuilder OMPBuilder(*M);
1447   OMPBuilder.initialize();
1448   IRBuilder<> Builder(BB);
1449 
1450   // Check the trip count is computed correctly. We generate the canonical loop
1451   // but rely on the IRBuilder's constant folder to compute the final result
1452   // since all inputs are constant. To verify overflow situations, limit the
1453   // trip count / loop counter widths to 16 bits.
1454   auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1455                            bool IsSigned, bool InclusiveStop) -> int64_t {
1456     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1457     Type *LCTy = Type::getInt16Ty(Ctx);
1458     Value *StartVal = ConstantInt::get(LCTy, Start);
1459     Value *StopVal = ConstantInt::get(LCTy, Stop);
1460     Value *StepVal = ConstantInt::get(LCTy, Step);
1461     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1462       return Error::success();
1463     };
1464     ASSERT_EXPECTED_INIT_RETURN(
1465         CanonicalLoopInfo *, Loop,
1466         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1467                                        StepVal, IsSigned, InclusiveStop),
1468         -1);
1469     Loop->assertOK();
1470     Builder.restoreIP(Loop->getAfterIP());
1471     Value *TripCount = Loop->getTripCount();
1472     return cast<ConstantInt>(TripCount)->getValue().getZExtValue();
1473   };
1474 
1475   EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0);
1476   EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1);
1477   EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42);
1478   EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21);
1479   EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21);
1480   EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1);
1481   EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2);
1482   EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3);
1483   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF);
1484   EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0);
1485   EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1);
1486   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100);
1487   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1);
1488 
1489   EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2);
1490   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2);
1491   EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1);
1492   EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1);
1493   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF);
1494   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000);
1495 
1496   EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0);
1497   EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0);
1498   EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3);
1499   EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4);
1500   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1);
1501   EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1);
1502   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2);
1503 
1504   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000);
1505   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001);
1506   EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF);
1507   EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF);
1508   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2);
1509   EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF);
1510   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000);
1511   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800);
1512   EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF);
1513   EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1);
1514   EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2);
1515 
1516   // Finalize the function and verify it.
1517   Builder.CreateRetVoid();
1518   OMPBuilder.finalize();
1519   EXPECT_FALSE(verifyModule(*M, &errs()));
1520 }
1521 
1522 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
1523   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1524   OpenMPIRBuilder OMPBuilder(*M);
1525   OMPBuilder.initialize();
1526   F->setName("func");
1527 
1528   IRBuilder<> Builder(BB);
1529 
1530   Type *LCTy = F->getArg(0)->getType();
1531   Constant *One = ConstantInt::get(LCTy, 1);
1532   Constant *Two = ConstantInt::get(LCTy, 2);
1533   Value *OuterTripCount =
1534       Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer");
1535   Value *InnerTripCount =
1536       Builder.CreateAdd(F->getArg(0), One, "tripcount.inner");
1537 
1538   // Fix an insertion point for ComputeIP.
1539   BasicBlock *LoopNextEnter =
1540       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1541                          Builder.GetInsertBlock()->getNextNode());
1542   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1543   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1544 
1545   Builder.SetInsertPoint(LoopNextEnter);
1546   OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL);
1547 
1548   CanonicalLoopInfo *InnerLoop = nullptr;
1549   CallInst *InbetweenLead = nullptr;
1550   CallInst *InbetweenTrail = nullptr;
1551   CallInst *Call = nullptr;
1552   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) {
1553     Builder.restoreIP(OuterCodeGenIP);
1554     InbetweenLead =
1555         createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC});
1556 
1557     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1558                                   Value *InnerLC) {
1559       Builder.restoreIP(InnerCodeGenIP);
1560       Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
1561       return Error::success();
1562     };
1563     ASSERT_EXPECTED_INIT(
1564         CanonicalLoopInfo *, InnerLoopResult,
1565         OMPBuilder.createCanonicalLoop(Builder.saveIP(), InnerLoopBodyGenCB,
1566                                        InnerTripCount, "inner"));
1567     InnerLoop = InnerLoopResult;
1568 
1569     Builder.restoreIP(InnerLoop->getAfterIP());
1570     InbetweenTrail =
1571         createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
1572   };
1573   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop,
1574                        OMPBuilder.createCanonicalLoop(
1575                            OuterLoc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB),
1576                            OuterTripCount, "outer"));
1577 
1578   // Finish the function.
1579   Builder.restoreIP(OuterLoop->getAfterIP());
1580   Builder.CreateRetVoid();
1581 
1582   CanonicalLoopInfo *Collapsed =
1583       OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP);
1584 
1585   OMPBuilder.finalize();
1586   EXPECT_FALSE(verifyModule(*M, &errs()));
1587 
1588   // Verify control flow and BB order.
1589   BasicBlock *RefOrder[] = {
1590       Collapsed->getPreheader(),   Collapsed->getHeader(),
1591       Collapsed->getCond(),        Collapsed->getBody(),
1592       InbetweenLead->getParent(),  Call->getParent(),
1593       InbetweenTrail->getParent(), Collapsed->getLatch(),
1594       Collapsed->getExit(),        Collapsed->getAfter(),
1595   };
1596   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1597   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1598 
1599   // Verify the total trip count.
1600   auto *TripCount = cast<MulOperator>(Collapsed->getTripCount());
1601   EXPECT_EQ(TripCount->getOperand(0), OuterTripCount);
1602   EXPECT_EQ(TripCount->getOperand(1), InnerTripCount);
1603 
1604   // Verify the changed indvar.
1605   auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1));
1606   EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv);
1607   EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody());
1608   EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount);
1609   EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar());
1610 
1611   auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2));
1612   EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem);
1613   EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody());
1614   EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar());
1615   EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount);
1616 
1617   EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV);
1618   EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV);
1619 }
1620 
1621 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
1622   OpenMPIRBuilder OMPBuilder(*M);
1623   CallInst *Call;
1624   BasicBlock *BodyCode;
1625   CanonicalLoopInfo *Loop =
1626       buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode);
1627   ASSERT_NE(Loop, nullptr);
1628 
1629   Instruction *OrigIndVar = Loop->getIndVar();
1630   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
1631 
1632   // Tile the loop.
1633   Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
1634   std::vector<CanonicalLoopInfo *> GenLoops =
1635       OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
1636 
1637   OMPBuilder.finalize();
1638   EXPECT_FALSE(verifyModule(*M, &errs()));
1639 
1640   EXPECT_EQ(GenLoops.size(), 2u);
1641   CanonicalLoopInfo *Floor = GenLoops[0];
1642   CanonicalLoopInfo *Tile = GenLoops[1];
1643 
1644   BasicBlock *RefOrder[] = {
1645       Floor->getPreheader(), Floor->getHeader(),   Floor->getCond(),
1646       Floor->getBody(),      Tile->getPreheader(), Tile->getHeader(),
1647       Tile->getCond(),       Tile->getBody(),      BodyCode,
1648       Tile->getLatch(),      Tile->getExit(),      Tile->getAfter(),
1649       Floor->getLatch(),     Floor->getExit(),     Floor->getAfter(),
1650   };
1651   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1652   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1653 
1654   // Check the induction variable.
1655   EXPECT_EQ(Call->getParent(), BodyCode);
1656   auto *Shift = cast<AddOperator>(Call->getOperand(1));
1657   EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
1658   EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
1659   auto *Scale = cast<MulOperator>(Shift->getOperand(0));
1660   EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
1661   EXPECT_EQ(Scale->getOperand(0), TileSize);
1662   EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
1663 }
1664 
1665 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
1666   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1667   OpenMPIRBuilder OMPBuilder(*M);
1668   OMPBuilder.initialize();
1669   F->setName("func");
1670 
1671   IRBuilder<> Builder(BB);
1672   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1673   Value *TripCount = F->getArg(0);
1674   Type *LCTy = TripCount->getType();
1675 
1676   BasicBlock *BodyCode = nullptr;
1677   CanonicalLoopInfo *InnerLoop = nullptr;
1678   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1679                                 llvm::Value *OuterLC) {
1680     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1681                                   llvm::Value *InnerLC) {
1682       Builder.restoreIP(InnerCodeGenIP);
1683       BodyCode = Builder.GetInsertBlock();
1684 
1685       // Add something that consumes the induction variables to the body.
1686       createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1687       return Error::success();
1688     };
1689     ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, InnerLoopResult,
1690                          OMPBuilder.createCanonicalLoop(OuterCodeGenIP,
1691                                                         InnerLoopBodyGenCB,
1692                                                         TripCount, "inner"));
1693     InnerLoop = InnerLoopResult;
1694   };
1695   ASSERT_EXPECTED_INIT(
1696       CanonicalLoopInfo *, OuterLoop,
1697       OMPBuilder.createCanonicalLoop(
1698           Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), TripCount, "outer"));
1699 
1700   // Finalize the function.
1701   Builder.restoreIP(OuterLoop->getAfterIP());
1702   Builder.CreateRetVoid();
1703 
1704   // Tile to loop nest.
1705   Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
1706   Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
1707   std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
1708       DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
1709 
1710   OMPBuilder.finalize();
1711   EXPECT_FALSE(verifyModule(*M, &errs()));
1712 
1713   EXPECT_EQ(GenLoops.size(), 4u);
1714   CanonicalLoopInfo *Floor1 = GenLoops[0];
1715   CanonicalLoopInfo *Floor2 = GenLoops[1];
1716   CanonicalLoopInfo *Tile1 = GenLoops[2];
1717   CanonicalLoopInfo *Tile2 = GenLoops[3];
1718 
1719   BasicBlock *RefOrder[] = {
1720       Floor1->getPreheader(),
1721       Floor1->getHeader(),
1722       Floor1->getCond(),
1723       Floor1->getBody(),
1724       Floor2->getPreheader(),
1725       Floor2->getHeader(),
1726       Floor2->getCond(),
1727       Floor2->getBody(),
1728       Tile1->getPreheader(),
1729       Tile1->getHeader(),
1730       Tile1->getCond(),
1731       Tile1->getBody(),
1732       Tile2->getPreheader(),
1733       Tile2->getHeader(),
1734       Tile2->getCond(),
1735       Tile2->getBody(),
1736       BodyCode,
1737       Tile2->getLatch(),
1738       Tile2->getExit(),
1739       Tile2->getAfter(),
1740       Tile1->getLatch(),
1741       Tile1->getExit(),
1742       Tile1->getAfter(),
1743       Floor2->getLatch(),
1744       Floor2->getExit(),
1745       Floor2->getAfter(),
1746       Floor1->getLatch(),
1747       Floor1->getExit(),
1748       Floor1->getAfter(),
1749   };
1750   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1751   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1752 }
1753 
1754 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
1755   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1756   OpenMPIRBuilder OMPBuilder(*M);
1757   OMPBuilder.initialize();
1758   F->setName("func");
1759 
1760   IRBuilder<> Builder(BB);
1761   Value *TripCount = F->getArg(0);
1762   Type *LCTy = TripCount->getType();
1763 
1764   Value *OuterStartVal = ConstantInt::get(LCTy, 2);
1765   Value *OuterStopVal = TripCount;
1766   Value *OuterStep = ConstantInt::get(LCTy, 5);
1767   Value *InnerStartVal = ConstantInt::get(LCTy, 13);
1768   Value *InnerStopVal = TripCount;
1769   Value *InnerStep = ConstantInt::get(LCTy, 3);
1770 
1771   // Fix an insertion point for ComputeIP.
1772   BasicBlock *LoopNextEnter =
1773       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1774                          Builder.GetInsertBlock()->getNextNode());
1775   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1776   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1777 
1778   InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
1779   OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
1780 
1781   BasicBlock *BodyCode = nullptr;
1782   CanonicalLoopInfo *InnerLoop = nullptr;
1783   CallInst *Call = nullptr;
1784   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1785                                 llvm::Value *OuterLC) {
1786     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1787                                   llvm::Value *InnerLC) {
1788       Builder.restoreIP(InnerCodeGenIP);
1789       BodyCode = Builder.GetInsertBlock();
1790 
1791       // Add something that consumes the induction variable to the body.
1792       Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1793       return Error::success();
1794     };
1795     ASSERT_EXPECTED_INIT(
1796         CanonicalLoopInfo *, InnerLoopResult,
1797         OMPBuilder.createCanonicalLoop(OuterCodeGenIP, InnerLoopBodyGenCB,
1798                                        InnerStartVal, InnerStopVal, InnerStep,
1799                                        false, false, ComputeIP, "inner"));
1800     InnerLoop = InnerLoopResult;
1801   };
1802   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop,
1803                        OMPBuilder.createCanonicalLoop(
1804                            Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB),
1805                            OuterStartVal, OuterStopVal, OuterStep, false, false,
1806                            ComputeIP, "outer"));
1807 
1808   // Finalize the function
1809   Builder.restoreIP(OuterLoop->getAfterIP());
1810   Builder.CreateRetVoid();
1811 
1812   // Tile the loop nest.
1813   Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
1814   Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
1815   std::vector<CanonicalLoopInfo *> GenLoops =
1816       OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
1817 
1818   OMPBuilder.finalize();
1819   EXPECT_FALSE(verifyModule(*M, &errs()));
1820 
1821   EXPECT_EQ(GenLoops.size(), 4u);
1822   CanonicalLoopInfo *Floor0 = GenLoops[0];
1823   CanonicalLoopInfo *Floor1 = GenLoops[1];
1824   CanonicalLoopInfo *Tile0 = GenLoops[2];
1825   CanonicalLoopInfo *Tile1 = GenLoops[3];
1826 
1827   BasicBlock *RefOrder[] = {
1828       Floor0->getPreheader(),
1829       Floor0->getHeader(),
1830       Floor0->getCond(),
1831       Floor0->getBody(),
1832       Floor1->getPreheader(),
1833       Floor1->getHeader(),
1834       Floor1->getCond(),
1835       Floor1->getBody(),
1836       Tile0->getPreheader(),
1837       Tile0->getHeader(),
1838       Tile0->getCond(),
1839       Tile0->getBody(),
1840       Tile1->getPreheader(),
1841       Tile1->getHeader(),
1842       Tile1->getCond(),
1843       Tile1->getBody(),
1844       BodyCode,
1845       Tile1->getLatch(),
1846       Tile1->getExit(),
1847       Tile1->getAfter(),
1848       Tile0->getLatch(),
1849       Tile0->getExit(),
1850       Tile0->getAfter(),
1851       Floor1->getLatch(),
1852       Floor1->getExit(),
1853       Floor1->getAfter(),
1854       Floor0->getLatch(),
1855       Floor0->getExit(),
1856       Floor0->getAfter(),
1857   };
1858   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1859   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1860 
1861   EXPECT_EQ(Call->getParent(), BodyCode);
1862 
1863   auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
1864   EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
1865   auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
1866   EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
1867   auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
1868   EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
1869   EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
1870   auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
1871   EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
1872   EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
1873   EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
1874 
1875   auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
1876   EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
1877   EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
1878   auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
1879   EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
1880   EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
1881   auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
1882   EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
1883   EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
1884   auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
1885   EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
1886   EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
1887   EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
1888 }
1889 
1890 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
1891   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1892   OpenMPIRBuilder OMPBuilder(*M);
1893   OMPBuilder.initialize();
1894   IRBuilder<> Builder(BB);
1895 
1896   // Create a loop, tile it, and extract its trip count. All input values are
1897   // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
1898   // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
1899   // do the same for the tile loop.
1900   auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1901                            bool IsSigned, bool InclusiveStop,
1902                            int64_t TileSize) -> uint64_t {
1903     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
1904     Type *LCTy = Type::getInt16Ty(Ctx);
1905     Value *StartVal = ConstantInt::get(LCTy, Start);
1906     Value *StopVal = ConstantInt::get(LCTy, Stop);
1907     Value *StepVal = ConstantInt::get(LCTy, Step);
1908 
1909     // Generate a loop.
1910     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1911       return Error::success();
1912     };
1913     ASSERT_EXPECTED_INIT_RETURN(
1914         CanonicalLoopInfo *, Loop,
1915         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1916                                        StepVal, IsSigned, InclusiveStop),
1917         (unsigned)-1);
1918     InsertPointTy AfterIP = Loop->getAfterIP();
1919 
1920     // Tile the loop.
1921     Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
1922     std::vector<CanonicalLoopInfo *> GenLoops =
1923         OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
1924 
1925     // Set the insertion pointer to after loop, where the next loop will be
1926     // emitted.
1927     Builder.restoreIP(AfterIP);
1928 
1929     // Extract the trip count.
1930     CanonicalLoopInfo *FloorLoop = GenLoops[0];
1931     Value *FloorTripCount = FloorLoop->getTripCount();
1932     return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
1933   };
1934 
1935   // Empty iteration domain.
1936   EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u);
1937   EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u);
1938   EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u);
1939   EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u);
1940   EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u);
1941 
1942   // Only complete tiles.
1943   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1944   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1945   EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u);
1946   EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u);
1947   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u);
1948   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u);
1949 
1950   // Only a partial tile.
1951   EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u);
1952   EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u);
1953   EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u);
1954   EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u);
1955   EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u);
1956 
1957   // Complete and partial tiles.
1958   EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u);
1959   EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u);
1960   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u);
1961   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u);
1962   EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u);
1963 
1964   // Close to 16-bit integer range.
1965   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu);
1966   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1);
1967   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1);
1968   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1);
1969   EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1);
1970   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u);
1971   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u);
1972 
1973   // Finalize the function.
1974   Builder.CreateRetVoid();
1975   OMPBuilder.finalize();
1976 
1977   EXPECT_FALSE(verifyModule(*M, &errs()));
1978 }
1979 
1980 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
1981   OpenMPIRBuilder OMPBuilder(*M);
1982   MapVector<Value *, Value *> AlignedVars;
1983   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1984   ASSERT_NE(CLI, nullptr);
1985 
1986   // Simd-ize the loop.
1987   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
1988                        OrderKind::OMP_ORDER_unknown,
1989                        /* Simdlen */ nullptr,
1990                        /* Safelen */ nullptr);
1991 
1992   OMPBuilder.finalize();
1993   EXPECT_FALSE(verifyModule(*M, &errs()));
1994 
1995   PassBuilder PB;
1996   FunctionAnalysisManager FAM;
1997   PB.registerFunctionAnalyses(FAM);
1998   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1999 
2000   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2001   EXPECT_EQ(TopLvl.size(), 1u);
2002 
2003   Loop *L = TopLvl.front();
2004   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2005   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2006 
2007   // Check for llvm.access.group metadata attached to the printf
2008   // function in the loop body.
2009   BasicBlock *LoopBody = CLI->getBody();
2010   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2011     return I.getMetadata("llvm.access.group") != nullptr;
2012   }));
2013 }
2014 
2015 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
2016   OpenMPIRBuilder OMPBuilder(*M);
2017   IRBuilder<> Builder(BB);
2018   const int AlignmentValue = 32;
2019   llvm::BasicBlock *sourceBlock = Builder.GetInsertBlock();
2020   AllocaInst *Alloc1 =
2021       Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1));
2022   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
2023   MapVector<Value *, Value *> AlignedVars;
2024   AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)});
2025 
2026   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2027   ASSERT_NE(CLI, nullptr);
2028 
2029   // Simd-ize the loop.
2030   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
2031                        OrderKind::OMP_ORDER_unknown,
2032                        /* Simdlen */ nullptr,
2033                        /* Safelen */ nullptr);
2034 
2035   OMPBuilder.finalize();
2036   EXPECT_FALSE(verifyModule(*M, &errs()));
2037 
2038   PassBuilder PB;
2039   FunctionAnalysisManager FAM;
2040   PB.registerFunctionAnalyses(FAM);
2041   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2042 
2043   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2044   EXPECT_EQ(TopLvl.size(), 1u);
2045 
2046   Loop *L = TopLvl.front();
2047   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2048   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2049 
2050   // Check for llvm.access.group metadata attached to the printf
2051   // function in the loop body.
2052   BasicBlock *LoopBody = CLI->getBody();
2053   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2054     return I.getMetadata("llvm.access.group") != nullptr;
2055   }));
2056 
2057   // Check if number of assumption instructions is equal to number of aligned
2058   // variables
2059   size_t NumAssummptionCallsInPreheader =
2060       count_if(*sourceBlock, [](Instruction &I) { return isa<AssumeInst>(I); });
2061   EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size());
2062 
2063   // Check if variables are correctly aligned
2064   for (Instruction &Instr : *sourceBlock) {
2065     if (!isa<AssumeInst>(Instr))
2066       continue;
2067     AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr);
2068     if (AssumeInstruction->getNumTotalBundleOperands()) {
2069       auto Bundle = AssumeInstruction->getOperandBundleAt(0);
2070       if (Bundle.getTagName() == "align") {
2071         EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1]));
2072         auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]);
2073         EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue);
2074       }
2075     }
2076   }
2077 }
2078 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
2079   OpenMPIRBuilder OMPBuilder(*M);
2080   MapVector<Value *, Value *> AlignedVars;
2081   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2082   ASSERT_NE(CLI, nullptr);
2083 
2084   // Simd-ize the loop.
2085   OMPBuilder.applySimd(CLI, AlignedVars,
2086                        /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
2087                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
2088                        /* Safelen */ nullptr);
2089 
2090   OMPBuilder.finalize();
2091   EXPECT_FALSE(verifyModule(*M, &errs()));
2092 
2093   PassBuilder PB;
2094   FunctionAnalysisManager FAM;
2095   PB.registerFunctionAnalyses(FAM);
2096   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2097 
2098   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2099   EXPECT_EQ(TopLvl.size(), 1u);
2100 
2101   Loop *L = TopLvl.front();
2102   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2103   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2104   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2105 
2106   // Check for llvm.access.group metadata attached to the printf
2107   // function in the loop body.
2108   BasicBlock *LoopBody = CLI->getBody();
2109   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2110     return I.getMetadata("llvm.access.group") != nullptr;
2111   }));
2112 }
2113 
2114 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
2115   OpenMPIRBuilder OMPBuilder(*M);
2116   MapVector<Value *, Value *> AlignedVars;
2117 
2118   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2119   ASSERT_NE(CLI, nullptr);
2120 
2121   // Simd-ize the loop.
2122   OMPBuilder.applySimd(
2123       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
2124       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2125 
2126   OMPBuilder.finalize();
2127   EXPECT_FALSE(verifyModule(*M, &errs()));
2128 
2129   PassBuilder PB;
2130   FunctionAnalysisManager FAM;
2131   PB.registerFunctionAnalyses(FAM);
2132   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2133 
2134   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2135   EXPECT_EQ(TopLvl.size(), 1u);
2136 
2137   Loop *L = TopLvl.front();
2138   // Parallel metadata shoudl be attached because of presence of
2139   // the order(concurrent) OpenMP clause
2140   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2141   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2142   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2143 
2144   // Check for llvm.access.group metadata attached to the printf
2145   // function in the loop body.
2146   BasicBlock *LoopBody = CLI->getBody();
2147   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2148     return I.getMetadata("llvm.access.group") != nullptr;
2149   }));
2150 }
2151 
2152 TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
2153   OpenMPIRBuilder OMPBuilder(*M);
2154   MapVector<Value *, Value *> AlignedVars;
2155 
2156   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2157   ASSERT_NE(CLI, nullptr);
2158 
2159   OMPBuilder.applySimd(
2160       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
2161       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2162 
2163   OMPBuilder.finalize();
2164   EXPECT_FALSE(verifyModule(*M, &errs()));
2165 
2166   PassBuilder PB;
2167   FunctionAnalysisManager FAM;
2168   PB.registerFunctionAnalyses(FAM);
2169   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2170 
2171   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2172   EXPECT_EQ(TopLvl.size(), 1u);
2173 
2174   Loop *L = TopLvl.front();
2175   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2176   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2177   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2178 
2179   // Check for llvm.access.group metadata attached to the printf
2180   // function in the loop body.
2181   BasicBlock *LoopBody = CLI->getBody();
2182   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2183     return I.getMetadata("llvm.access.group") != nullptr;
2184   }));
2185 }
2186 
2187 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
2188   OpenMPIRBuilder OMPBuilder(*M);
2189   MapVector<Value *, Value *> AlignedVars;
2190 
2191   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2192   ASSERT_NE(CLI, nullptr);
2193 
2194   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
2195                        OrderKind::OMP_ORDER_unknown,
2196                        ConstantInt::get(Type::getInt32Ty(Ctx), 2),
2197                        ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2198 
2199   OMPBuilder.finalize();
2200   EXPECT_FALSE(verifyModule(*M, &errs()));
2201 
2202   PassBuilder PB;
2203   FunctionAnalysisManager FAM;
2204   PB.registerFunctionAnalyses(FAM);
2205   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2206 
2207   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2208   EXPECT_EQ(TopLvl.size(), 1u);
2209 
2210   Loop *L = TopLvl.front();
2211   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2212   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2213   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2);
2214 
2215   // Check for llvm.access.group metadata attached to the printf
2216   // function in the loop body.
2217   BasicBlock *LoopBody = CLI->getBody();
2218   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2219     return I.getMetadata("llvm.access.group") != nullptr;
2220   }));
2221 }
2222 
2223 TEST_F(OpenMPIRBuilderTest, ApplySimdIf) {
2224   OpenMPIRBuilder OMPBuilder(*M);
2225   IRBuilder<> Builder(BB);
2226   MapVector<Value *, Value *> AlignedVars;
2227   AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
2228   AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty());
2229 
2230   // Generation of if condition
2231   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1);
2232   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2);
2233   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
2234   LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2);
2235 
2236   Value *IfCmp = Builder.CreateICmpNE(Load1, Load2);
2237 
2238   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2239   ASSERT_NE(CLI, nullptr);
2240 
2241   // Simd-ize the loop with if condition
2242   OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown,
2243                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
2244                        /* Safelen */ nullptr);
2245 
2246   OMPBuilder.finalize();
2247   EXPECT_FALSE(verifyModule(*M, &errs()));
2248 
2249   PassBuilder PB;
2250   FunctionAnalysisManager FAM;
2251   PB.registerFunctionAnalyses(FAM);
2252   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2253 
2254   // Check if there are two loops (one with enabled vectorization)
2255   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2256   EXPECT_EQ(TopLvl.size(), 2u);
2257 
2258   Loop *L = TopLvl[0];
2259   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2260   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2261   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2262 
2263   // The second loop should have disabled vectorization
2264   L = TopLvl[1];
2265   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2266   EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2267   // Check for llvm.access.group metadata attached to the printf
2268   // function in the loop body.
2269   BasicBlock *LoopBody = CLI->getBody();
2270   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2271     return I.getMetadata("llvm.access.group") != nullptr;
2272   }));
2273 }
2274 
2275 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
2276   OpenMPIRBuilder OMPBuilder(*M);
2277 
2278   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2279   ASSERT_NE(CLI, nullptr);
2280 
2281   // Unroll the loop.
2282   OMPBuilder.unrollLoopFull(DL, CLI);
2283 
2284   OMPBuilder.finalize();
2285   EXPECT_FALSE(verifyModule(*M, &errs()));
2286 
2287   PassBuilder PB;
2288   FunctionAnalysisManager FAM;
2289   PB.registerFunctionAnalyses(FAM);
2290   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2291 
2292   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2293   EXPECT_EQ(TopLvl.size(), 1u);
2294 
2295   Loop *L = TopLvl.front();
2296   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2297   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full"));
2298 }
2299 
2300 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
2301   OpenMPIRBuilder OMPBuilder(*M);
2302   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2303   ASSERT_NE(CLI, nullptr);
2304 
2305   // Unroll the loop.
2306   CanonicalLoopInfo *UnrolledLoop = nullptr;
2307   OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop);
2308   ASSERT_NE(UnrolledLoop, nullptr);
2309 
2310   OMPBuilder.finalize();
2311   EXPECT_FALSE(verifyModule(*M, &errs()));
2312   UnrolledLoop->assertOK();
2313 
2314   PassBuilder PB;
2315   FunctionAnalysisManager FAM;
2316   PB.registerFunctionAnalyses(FAM);
2317   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2318 
2319   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2320   EXPECT_EQ(TopLvl.size(), 1u);
2321   Loop *Outer = TopLvl.front();
2322   EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader());
2323   EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch());
2324   EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond());
2325   EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit());
2326 
2327   EXPECT_EQ(Outer->getSubLoops().size(), 1u);
2328   Loop *Inner = Outer->getSubLoops().front();
2329 
2330   EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable"));
2331   EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5);
2332 }
2333 
2334 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
2335   OpenMPIRBuilder OMPBuilder(*M);
2336 
2337   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2338   ASSERT_NE(CLI, nullptr);
2339 
2340   // Unroll the loop.
2341   OMPBuilder.unrollLoopHeuristic(DL, CLI);
2342 
2343   OMPBuilder.finalize();
2344   EXPECT_FALSE(verifyModule(*M, &errs()));
2345 
2346   PassBuilder PB;
2347   FunctionAnalysisManager FAM;
2348   PB.registerFunctionAnalyses(FAM);
2349   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2350 
2351   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2352   EXPECT_EQ(TopLvl.size(), 1u);
2353 
2354   Loop *L = TopLvl.front();
2355   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2356 }
2357 
2358 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) {
2359   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2360   std::string oldDLStr = M->getDataLayoutStr();
2361   M->setDataLayout(
2362       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
2363       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
2364       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
2365   OpenMPIRBuilder OMPBuilder(*M);
2366   OMPBuilder.Config.IsTargetDevice = true;
2367   OMPBuilder.initialize();
2368   IRBuilder<> Builder(BB);
2369   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2370   InsertPointTy AllocaIP = Builder.saveIP();
2371 
2372   Type *LCTy = Type::getInt32Ty(Ctx);
2373   Value *StartVal = ConstantInt::get(LCTy, 10);
2374   Value *StopVal = ConstantInt::get(LCTy, 52);
2375   Value *StepVal = ConstantInt::get(LCTy, 2);
2376   auto LoopBodyGen = [&](InsertPointTy, Value *) { return Error::success(); };
2377 
2378   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2379                        OMPBuilder.createCanonicalLoop(Loc, LoopBodyGen,
2380                                                       StartVal, StopVal,
2381                                                       StepVal, false, false));
2382   BasicBlock *Preheader = CLI->getPreheader();
2383   Value *TripCount = CLI->getTripCount();
2384 
2385   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2386 
2387   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2388                        OMPBuilder.applyWorkshareLoop(
2389                            DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static,
2390                            nullptr, false, false, false, false,
2391                            WorksharingLoopType::ForStaticLoop));
2392   Builder.restoreIP(AfterIP);
2393   Builder.CreateRetVoid();
2394 
2395   OMPBuilder.finalize();
2396   EXPECT_FALSE(verifyModule(*M, &errs()));
2397 
2398   CallInst *WorkshareLoopRuntimeCall = nullptr;
2399   int WorkshareLoopRuntimeCallCnt = 0;
2400   for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) {
2401     CallInst *Call = dyn_cast<CallInst>(Inst);
2402     if (!Call)
2403       continue;
2404     if (!Call->getCalledFunction())
2405       continue;
2406 
2407     if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") {
2408       WorkshareLoopRuntimeCall = Call;
2409       WorkshareLoopRuntimeCallCnt++;
2410     }
2411   }
2412   EXPECT_NE(WorkshareLoopRuntimeCall, nullptr);
2413   // Verify that there is only one call to workshare loop function
2414   EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1);
2415   // Check that pointer to loop body function is passed as second argument
2416   Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1);
2417   EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType());
2418   Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg);
2419   EXPECT_NE(ArgFunction, nullptr);
2420   EXPECT_EQ(ArgFunction->arg_size(), 1u);
2421   EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType());
2422   // Check that no variables except for loop counter are used in loop body
2423   EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()),
2424             WorkshareLoopRuntimeCall->getArgOperand(2));
2425   // Check loop trip count argument
2426   EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3));
2427 }
2428 
2429 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
2430   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2431   OpenMPIRBuilder OMPBuilder(*M);
2432   OMPBuilder.Config.IsTargetDevice = false;
2433   OMPBuilder.initialize();
2434   IRBuilder<> Builder(BB);
2435   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2436 
2437   Type *LCTy = Type::getInt32Ty(Ctx);
2438   Value *StartVal = ConstantInt::get(LCTy, 10);
2439   Value *StopVal = ConstantInt::get(LCTy, 52);
2440   Value *StepVal = ConstantInt::get(LCTy, 2);
2441   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2442     return Error::success();
2443   };
2444   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2445                        OMPBuilder.createCanonicalLoop(
2446                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2447                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2448   BasicBlock *Preheader = CLI->getPreheader();
2449   BasicBlock *Body = CLI->getBody();
2450   Value *IV = CLI->getIndVar();
2451   BasicBlock *ExitBlock = CLI->getExit();
2452 
2453   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2454   InsertPointTy AllocaIP = Builder.saveIP();
2455 
2456   ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP,
2457                                                      /*NeedsBarrier=*/true,
2458                                                      OMP_SCHEDULE_Static),
2459                        Succeeded());
2460 
2461   BasicBlock *Cond = Body->getSinglePredecessor();
2462   Instruction *Cmp = &*Cond->begin();
2463   Value *TripCount = Cmp->getOperand(1);
2464 
2465   auto AllocaIter = BB->begin();
2466   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2467   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2468   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2469   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2470   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2471   EXPECT_NE(PLastIter, nullptr);
2472   EXPECT_NE(PLowerBound, nullptr);
2473   EXPECT_NE(PUpperBound, nullptr);
2474   EXPECT_NE(PStride, nullptr);
2475 
2476   auto PreheaderIter = Preheader->begin();
2477   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7);
2478   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2479   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2480   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2481   ASSERT_NE(LowerBoundStore, nullptr);
2482   ASSERT_NE(UpperBoundStore, nullptr);
2483   ASSERT_NE(StrideStore, nullptr);
2484 
2485   auto *OrigLowerBound =
2486       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2487   auto *OrigUpperBound =
2488       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2489   auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2490   ASSERT_NE(OrigLowerBound, nullptr);
2491   ASSERT_NE(OrigUpperBound, nullptr);
2492   ASSERT_NE(OrigStride, nullptr);
2493   EXPECT_EQ(OrigLowerBound->getValue(), 0);
2494   EXPECT_EQ(OrigUpperBound->getValue(), 20);
2495   EXPECT_EQ(OrigStride->getValue(), 1);
2496 
2497   // Check that the loop IV is updated to account for the lower bound returned
2498   // by the OpenMP runtime call.
2499   BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front());
2500   EXPECT_EQ(Add->getOperand(0), IV);
2501   auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
2502   ASSERT_NE(LoadedLowerBound, nullptr);
2503   EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
2504 
2505   // Check that the trip count is updated to account for the lower and upper
2506   // bounds return by the OpenMP runtime call.
2507   auto *AddOne = dyn_cast<Instruction>(TripCount);
2508   ASSERT_NE(AddOne, nullptr);
2509   ASSERT_TRUE(AddOne->isBinaryOp());
2510   auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
2511   ASSERT_NE(One, nullptr);
2512   EXPECT_EQ(One->getValue(), 1);
2513   auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0));
2514   ASSERT_NE(Difference, nullptr);
2515   ASSERT_TRUE(Difference->isBinaryOp());
2516   EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound);
2517   auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0));
2518   ASSERT_NE(LoadedUpperBound, nullptr);
2519   EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound);
2520 
2521   // The original loop iterator should only be used in the condition, in the
2522   // increment and in the statement that adds the lower bound to it.
2523   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2524 
2525   // The exit block should contain the "fini" call and the barrier call,
2526   // plus the call to obtain the thread ID.
2527   size_t NumCallsInExitBlock =
2528       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2529   EXPECT_EQ(NumCallsInExitBlock, 3u);
2530 }
2531 
2532 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
2533   unsigned IVBits = GetParam();
2534 
2535   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2536   OpenMPIRBuilder OMPBuilder(*M);
2537   OMPBuilder.Config.IsTargetDevice = false;
2538 
2539   BasicBlock *Body;
2540   CallInst *Call;
2541   CanonicalLoopInfo *CLI =
2542       buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body);
2543   ASSERT_NE(CLI, nullptr);
2544 
2545   Instruction *OrigIndVar = CLI->getIndVar();
2546   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
2547 
2548   Type *LCTy = Type::getInt32Ty(Ctx);
2549   Value *ChunkSize = ConstantInt::get(LCTy, 5);
2550   InsertPointTy AllocaIP{&F->getEntryBlock(),
2551                          F->getEntryBlock().getFirstInsertionPt()};
2552   ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP,
2553                                                      /*NeedsBarrier=*/true,
2554                                                      OMP_SCHEDULE_Static,
2555                                                      ChunkSize),
2556                        Succeeded());
2557 
2558   OMPBuilder.finalize();
2559   EXPECT_FALSE(verifyModule(*M, &errs()));
2560 
2561   BasicBlock *Entry = &F->getEntryBlock();
2562   BasicBlock *Preheader = Entry->getSingleSuccessor();
2563 
2564   BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor();
2565   BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor();
2566   BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor();
2567   BasicBlock *DispatchBody = succ_begin(DispatchCond)[0];
2568   BasicBlock *DispatchExit = succ_begin(DispatchCond)[1];
2569   BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor();
2570   BasicBlock *Return = DispatchAfter->getSingleSuccessor();
2571 
2572   BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor();
2573   BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor();
2574   BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor();
2575   BasicBlock *ChunkBody = succ_begin(ChunkCond)[0];
2576   BasicBlock *ChunkExit = succ_begin(ChunkCond)[1];
2577   BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor();
2578   BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor();
2579 
2580   BasicBlock *DispatchInc = ChunkAfter;
2581 
2582   EXPECT_EQ(ChunkBody, Body);
2583   EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader);
2584   EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader);
2585 
2586   EXPECT_TRUE(isa<ReturnInst>(Return->front()));
2587 
2588   Value *NewIV = Call->getOperand(1);
2589   EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits);
2590 
2591   CallInst *InitCall = findSingleCall(
2592       F,
2593       (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u
2594                     : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u,
2595       OMPBuilder);
2596   EXPECT_EQ(InitCall->getParent(), Preheader);
2597   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33);
2598   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1);
2599   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5);
2600 
2601   CallInst *FiniCall = findSingleCall(
2602       F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder);
2603   EXPECT_EQ(FiniCall->getParent(), DispatchExit);
2604 
2605   CallInst *BarrierCall = findSingleCall(
2606       F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder);
2607   EXPECT_EQ(BarrierCall->getParent(), DispatchExit);
2608 }
2609 
2610 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits,
2611                          ::testing::Values(8, 16, 32, 64));
2612 
2613 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
2614   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2615   OpenMPIRBuilder OMPBuilder(*M);
2616   OMPBuilder.Config.IsTargetDevice = false;
2617   OMPBuilder.initialize();
2618   IRBuilder<> Builder(BB);
2619   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2620 
2621   omp::OMPScheduleType SchedType = GetParam();
2622   uint32_t ChunkSize = 1;
2623   switch (SchedType & ~OMPScheduleType::ModifierMask) {
2624   case omp::OMPScheduleType::BaseDynamicChunked:
2625   case omp::OMPScheduleType::BaseGuidedChunked:
2626     ChunkSize = 7;
2627     break;
2628   case omp::OMPScheduleType::BaseAuto:
2629   case omp::OMPScheduleType::BaseRuntime:
2630     ChunkSize = 1;
2631     break;
2632   default:
2633     assert(0 && "unknown type for this test");
2634     break;
2635   }
2636 
2637   Type *LCTy = Type::getInt32Ty(Ctx);
2638   Value *StartVal = ConstantInt::get(LCTy, 10);
2639   Value *StopVal = ConstantInt::get(LCTy, 52);
2640   Value *StepVal = ConstantInt::get(LCTy, 2);
2641   Value *ChunkVal =
2642       (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize);
2643   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2644     return Error::success();
2645   };
2646 
2647   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2648                        OMPBuilder.createCanonicalLoop(
2649                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2650                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2651 
2652   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2653   InsertPointTy AllocaIP = Builder.saveIP();
2654 
2655   // Collect all the info from CLI, as it isn't usable after the call to
2656   // createDynamicWorkshareLoop.
2657   InsertPointTy AfterIP = CLI->getAfterIP();
2658   BasicBlock *Preheader = CLI->getPreheader();
2659   BasicBlock *ExitBlock = CLI->getExit();
2660   BasicBlock *LatchBlock = CLI->getLatch();
2661   Value *IV = CLI->getIndVar();
2662 
2663   ASSERT_EXPECTED_INIT(
2664       OpenMPIRBuilder::InsertPointTy, EndIP,
2665       OMPBuilder.applyWorkshareLoop(
2666           DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType),
2667           ChunkVal, /*Simd=*/false,
2668           (SchedType & omp::OMPScheduleType::ModifierMonotonic) ==
2669               omp::OMPScheduleType::ModifierMonotonic,
2670           (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) ==
2671               omp::OMPScheduleType::ModifierNonmonotonic,
2672           /*Ordered=*/false));
2673 
2674   // The returned value should be the "after" point.
2675   ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
2676   ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
2677 
2678   auto AllocaIter = BB->begin();
2679   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2680   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2681   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2682   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2683   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2684   EXPECT_NE(PLastIter, nullptr);
2685   EXPECT_NE(PLowerBound, nullptr);
2686   EXPECT_NE(PUpperBound, nullptr);
2687   EXPECT_NE(PStride, nullptr);
2688 
2689   auto PreheaderIter = Preheader->begin();
2690   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6);
2691   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2692   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2693   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2694   ASSERT_NE(LowerBoundStore, nullptr);
2695   ASSERT_NE(UpperBoundStore, nullptr);
2696   ASSERT_NE(StrideStore, nullptr);
2697 
2698   CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++));
2699   ASSERT_NE(ThreadIdCall, nullptr);
2700   EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(),
2701             "__kmpc_global_thread_num");
2702 
2703   CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter);
2704 
2705   ASSERT_NE(InitCall, nullptr);
2706   EXPECT_EQ(InitCall->getCalledFunction()->getName(),
2707             "__kmpc_dispatch_init_4u");
2708   EXPECT_EQ(InitCall->arg_size(), 7U);
2709   EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize));
2710   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2711   if ((SchedType & OMPScheduleType::MonotonicityMask) ==
2712       OMPScheduleType::None) {
2713     // Implementation is allowed to add default nonmonotonicity flag
2714     EXPECT_EQ(
2715         static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) |
2716             OMPScheduleType::ModifierNonmonotonic,
2717         SchedType | OMPScheduleType::ModifierNonmonotonic);
2718   } else {
2719     EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()),
2720               SchedType);
2721   }
2722 
2723   ConstantInt *OrigLowerBound =
2724       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2725   ConstantInt *OrigUpperBound =
2726       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2727   ConstantInt *OrigStride =
2728       dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2729   ASSERT_NE(OrigLowerBound, nullptr);
2730   ASSERT_NE(OrigUpperBound, nullptr);
2731   ASSERT_NE(OrigStride, nullptr);
2732   EXPECT_EQ(OrigLowerBound->getValue(), 1);
2733   EXPECT_EQ(OrigUpperBound->getValue(), 21);
2734   EXPECT_EQ(OrigStride->getValue(), 1);
2735 
2736   CallInst *FiniCall = dyn_cast<CallInst>(
2737       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2738   EXPECT_EQ(FiniCall, nullptr);
2739 
2740   // The original loop iterator should only be used in the condition, in the
2741   // increment and in the statement that adds the lower bound to it.
2742   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2743 
2744   // The exit block should contain the barrier call, plus the call to obtain
2745   // the thread ID.
2746   size_t NumCallsInExitBlock =
2747       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2748   EXPECT_EQ(NumCallsInExitBlock, 2u);
2749 
2750   // Add a termination to our block and check that it is internally consistent.
2751   Builder.restoreIP(EndIP);
2752   Builder.CreateRetVoid();
2753   OMPBuilder.finalize();
2754   EXPECT_FALSE(verifyModule(*M, &errs()));
2755 }
2756 
2757 INSTANTIATE_TEST_SUITE_P(
2758     OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams,
2759     ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked,
2760                       omp::OMPScheduleType::UnorderedGuidedChunked,
2761                       omp::OMPScheduleType::UnorderedAuto,
2762                       omp::OMPScheduleType::UnorderedRuntime,
2763                       omp::OMPScheduleType::UnorderedDynamicChunked |
2764                           omp::OMPScheduleType::ModifierMonotonic,
2765                       omp::OMPScheduleType::UnorderedDynamicChunked |
2766                           omp::OMPScheduleType::ModifierNonmonotonic,
2767                       omp::OMPScheduleType::UnorderedGuidedChunked |
2768                           omp::OMPScheduleType::ModifierMonotonic,
2769                       omp::OMPScheduleType::UnorderedGuidedChunked |
2770                           omp::OMPScheduleType::ModifierNonmonotonic,
2771                       omp::OMPScheduleType::UnorderedAuto |
2772                           omp::OMPScheduleType::ModifierMonotonic,
2773                       omp::OMPScheduleType::UnorderedRuntime |
2774                           omp::OMPScheduleType::ModifierMonotonic));
2775 
2776 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
2777   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2778   OpenMPIRBuilder OMPBuilder(*M);
2779   OMPBuilder.Config.IsTargetDevice = false;
2780   OMPBuilder.initialize();
2781   IRBuilder<> Builder(BB);
2782   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2783 
2784   uint32_t ChunkSize = 1;
2785   Type *LCTy = Type::getInt32Ty(Ctx);
2786   Value *StartVal = ConstantInt::get(LCTy, 10);
2787   Value *StopVal = ConstantInt::get(LCTy, 52);
2788   Value *StepVal = ConstantInt::get(LCTy, 2);
2789   Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
2790   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2791     return llvm::Error::success();
2792   };
2793 
2794   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2795                        OMPBuilder.createCanonicalLoop(
2796                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2797                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2798 
2799   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2800   InsertPointTy AllocaIP = Builder.saveIP();
2801 
2802   // Collect all the info from CLI, as it isn't usable after the call to
2803   // createDynamicWorkshareLoop.
2804   BasicBlock *Preheader = CLI->getPreheader();
2805   BasicBlock *ExitBlock = CLI->getExit();
2806   BasicBlock *LatchBlock = CLI->getLatch();
2807   Value *IV = CLI->getIndVar();
2808 
2809   ASSERT_EXPECTED_INIT(
2810       OpenMPIRBuilder::InsertPointTy, EndIP,
2811       OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
2812                                     OMP_SCHEDULE_Static, ChunkVal,
2813                                     /*HasSimdModifier=*/false,
2814                                     /*HasMonotonicModifier=*/false,
2815                                     /*HasNonmonotonicModifier=*/false,
2816                                     /*HasOrderedClause=*/true));
2817 
2818   // Add a termination to our block and check that it is internally consistent.
2819   Builder.restoreIP(EndIP);
2820   Builder.CreateRetVoid();
2821   OMPBuilder.finalize();
2822   EXPECT_FALSE(verifyModule(*M, &errs()));
2823 
2824   CallInst *InitCall = nullptr;
2825   for (Instruction &EI : *Preheader) {
2826     Instruction *Cur = &EI;
2827     if (isa<CallInst>(Cur)) {
2828       InitCall = cast<CallInst>(Cur);
2829       if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u")
2830         break;
2831       InitCall = nullptr;
2832     }
2833   }
2834   EXPECT_NE(InitCall, nullptr);
2835   EXPECT_EQ(InitCall->arg_size(), 7U);
2836   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2837   EXPECT_EQ(SchedVal->getValue(),
2838             static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked));
2839 
2840   CallInst *FiniCall = dyn_cast<CallInst>(
2841       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2842   ASSERT_NE(FiniCall, nullptr);
2843   EXPECT_EQ(FiniCall->getCalledFunction()->getName(),
2844             "__kmpc_dispatch_fini_4u");
2845   EXPECT_EQ(FiniCall->arg_size(), 2U);
2846   EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0));
2847   EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1));
2848 
2849   // The original loop iterator should only be used in the condition, in the
2850   // increment and in the statement that adds the lower bound to it.
2851   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2852 
2853   // The exit block should contain the barrier call, plus the call to obtain
2854   // the thread ID.
2855   size_t NumCallsInExitBlock =
2856       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2857   EXPECT_EQ(NumCallsInExitBlock, 2u);
2858 }
2859 
2860 TEST_F(OpenMPIRBuilderTest, MasterDirective) {
2861   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2862   OpenMPIRBuilder OMPBuilder(*M);
2863   OMPBuilder.initialize();
2864   F->setName("func");
2865   IRBuilder<> Builder(BB);
2866 
2867   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2868 
2869   AllocaInst *PrivAI = nullptr;
2870 
2871   BasicBlock *EntryBB = nullptr;
2872   BasicBlock *ThenBB = nullptr;
2873 
2874   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2875     if (AllocaIP.isSet())
2876       Builder.restoreIP(AllocaIP);
2877     else
2878       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2879     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2880     Builder.CreateStore(F->arg_begin(), PrivAI);
2881 
2882     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2883     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2884     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2885 
2886     Builder.restoreIP(CodeGenIP);
2887 
2888     // collect some info for checks later
2889     ThenBB = Builder.GetInsertBlock();
2890     EntryBB = ThenBB->getUniquePredecessor();
2891 
2892     // simple instructions for body
2893     Value *PrivLoad =
2894         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2895     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2896   };
2897 
2898   auto FiniCB = [&](InsertPointTy IP) {
2899     BasicBlock *IPBB = IP.getBlock();
2900     EXPECT_NE(IPBB->end(), IP.getPoint());
2901   };
2902 
2903   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2904                        OMPBuilder.createMaster(Builder,
2905                                                BODYGENCB_WRAPPER(BodyGenCB),
2906                                                FINICB_WRAPPER(FiniCB)));
2907   Builder.restoreIP(AfterIP);
2908   Value *EntryBBTI = EntryBB->getTerminator();
2909   EXPECT_NE(EntryBBTI, nullptr);
2910   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2911   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2912   EXPECT_TRUE(EntryBr->isConditional());
2913   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2914   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2915   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2916 
2917   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2918   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2919 
2920   CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0));
2921   EXPECT_EQ(MasterEntryCI->arg_size(), 2U);
2922   EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master");
2923   EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0)));
2924 
2925   CallInst *MasterEndCI = nullptr;
2926   for (auto &FI : *ThenBB) {
2927     Instruction *cur = &FI;
2928     if (isa<CallInst>(cur)) {
2929       MasterEndCI = cast<CallInst>(cur);
2930       if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master")
2931         break;
2932       MasterEndCI = nullptr;
2933     }
2934   }
2935   EXPECT_NE(MasterEndCI, nullptr);
2936   EXPECT_EQ(MasterEndCI->arg_size(), 2U);
2937   EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0)));
2938   EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1));
2939 }
2940 
2941 TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
2942   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2943   OpenMPIRBuilder OMPBuilder(*M);
2944   OMPBuilder.initialize();
2945   F->setName("func");
2946   IRBuilder<> Builder(BB);
2947 
2948   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2949 
2950   AllocaInst *PrivAI = nullptr;
2951 
2952   BasicBlock *EntryBB = nullptr;
2953   BasicBlock *ThenBB = nullptr;
2954 
2955   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2956     if (AllocaIP.isSet())
2957       Builder.restoreIP(AllocaIP);
2958     else
2959       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2960     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2961     Builder.CreateStore(F->arg_begin(), PrivAI);
2962 
2963     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2964     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2965     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2966 
2967     Builder.restoreIP(CodeGenIP);
2968 
2969     // collect some info for checks later
2970     ThenBB = Builder.GetInsertBlock();
2971     EntryBB = ThenBB->getUniquePredecessor();
2972 
2973     // simple instructions for body
2974     Value *PrivLoad =
2975         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2976     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2977   };
2978 
2979   auto FiniCB = [&](InsertPointTy IP) {
2980     BasicBlock *IPBB = IP.getBlock();
2981     EXPECT_NE(IPBB->end(), IP.getPoint());
2982   };
2983 
2984   Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
2985   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2986                        OMPBuilder.createMasked(Builder,
2987                                                BODYGENCB_WRAPPER(BodyGenCB),
2988                                                FINICB_WRAPPER(FiniCB), Filter));
2989   Builder.restoreIP(AfterIP);
2990   Value *EntryBBTI = EntryBB->getTerminator();
2991   EXPECT_NE(EntryBBTI, nullptr);
2992   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2993   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2994   EXPECT_TRUE(EntryBr->isConditional());
2995   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2996   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2997   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2998 
2999   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3000   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3001 
3002   CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0));
3003   EXPECT_EQ(MaskedEntryCI->arg_size(), 3U);
3004   EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked");
3005   EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0)));
3006 
3007   CallInst *MaskedEndCI = nullptr;
3008   for (auto &FI : *ThenBB) {
3009     Instruction *cur = &FI;
3010     if (isa<CallInst>(cur)) {
3011       MaskedEndCI = cast<CallInst>(cur);
3012       if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked")
3013         break;
3014       MaskedEndCI = nullptr;
3015     }
3016   }
3017   EXPECT_NE(MaskedEndCI, nullptr);
3018   EXPECT_EQ(MaskedEndCI->arg_size(), 2U);
3019   EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0)));
3020   EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1));
3021 }
3022 
3023 TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
3024   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3025   OpenMPIRBuilder OMPBuilder(*M);
3026   OMPBuilder.initialize();
3027   F->setName("func");
3028   IRBuilder<> Builder(BB);
3029 
3030   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3031 
3032   AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3033 
3034   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3035     // actual start for bodyCB
3036     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3037     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3038     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3039 
3040     // body begin
3041     Builder.restoreIP(CodeGenIP);
3042     Builder.CreateStore(F->arg_begin(), PrivAI);
3043     Value *PrivLoad =
3044         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3045     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3046   };
3047 
3048   auto FiniCB = [&](InsertPointTy IP) {
3049     BasicBlock *IPBB = IP.getBlock();
3050     EXPECT_NE(IPBB->end(), IP.getPoint());
3051   };
3052   BasicBlock *EntryBB = Builder.GetInsertBlock();
3053 
3054   ASSERT_EXPECTED_INIT(
3055       OpenMPIRBuilder::InsertPointTy, AfterIP,
3056       OMPBuilder.createCritical(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3057                                 FINICB_WRAPPER(FiniCB), "testCRT", nullptr));
3058   Builder.restoreIP(AfterIP);
3059 
3060   CallInst *CriticalEntryCI = nullptr;
3061   for (auto &EI : *EntryBB) {
3062     Instruction *cur = &EI;
3063     if (isa<CallInst>(cur)) {
3064       CriticalEntryCI = cast<CallInst>(cur);
3065       if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical")
3066         break;
3067       CriticalEntryCI = nullptr;
3068     }
3069   }
3070   EXPECT_NE(CriticalEntryCI, nullptr);
3071   EXPECT_EQ(CriticalEntryCI->arg_size(), 3U);
3072   EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical");
3073   EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0)));
3074 
3075   CallInst *CriticalEndCI = nullptr;
3076   for (auto &FI : *EntryBB) {
3077     Instruction *cur = &FI;
3078     if (isa<CallInst>(cur)) {
3079       CriticalEndCI = cast<CallInst>(cur);
3080       if (CriticalEndCI->getCalledFunction()->getName() ==
3081           "__kmpc_end_critical")
3082         break;
3083       CriticalEndCI = nullptr;
3084     }
3085   }
3086   EXPECT_NE(CriticalEndCI, nullptr);
3087   EXPECT_EQ(CriticalEndCI->arg_size(), 3U);
3088   EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0)));
3089   EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1));
3090   PointerType *CriticalNamePtrTy = PointerType::getUnqual(Ctx);
3091   EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2));
3092   GlobalVariable *GV =
3093       dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2));
3094   ASSERT_NE(GV, nullptr);
3095   EXPECT_EQ(GV->getType(), CriticalNamePtrTy);
3096   const DataLayout &DL = M->getDataLayout();
3097   const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy);
3098   const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace());
3099   if (const llvm::MaybeAlign Alignment = GV->getAlign())
3100     EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign));
3101 }
3102 
3103 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
3104   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3105   OpenMPIRBuilder OMPBuilder(*M);
3106   OMPBuilder.initialize();
3107   F->setName("func");
3108   IRBuilder<> Builder(BB);
3109   LLVMContext &Ctx = M->getContext();
3110 
3111   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3112 
3113   InsertPointTy AllocaIP(&F->getEntryBlock(),
3114                          F->getEntryBlock().getFirstInsertionPt());
3115 
3116   unsigned NumLoops = 2;
3117   SmallVector<Value *, 2> StoreValues;
3118   Type *LCTy = Type::getInt64Ty(Ctx);
3119   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
3120   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
3121 
3122   // Test for "#omp ordered depend(source)"
3123   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
3124                                                    StoreValues, ".cnt.addr",
3125                                                    /*IsDependSource=*/true));
3126 
3127   Builder.CreateRetVoid();
3128   OMPBuilder.finalize();
3129   EXPECT_FALSE(verifyModule(*M, &errs()));
3130 
3131   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
3132   ASSERT_NE(AllocInst, nullptr);
3133   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
3134   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
3135   EXPECT_TRUE(
3136       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
3137 
3138   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
3139   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
3140     GetElementPtrInst *DependAddrGEPIter =
3141         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3142     ASSERT_NE(DependAddrGEPIter, nullptr);
3143     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
3144     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
3145     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
3146     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
3147     ASSERT_NE(FirstIdx, nullptr);
3148     ASSERT_NE(SecondIdx, nullptr);
3149     EXPECT_EQ(FirstIdx->getValue(), 0);
3150     EXPECT_EQ(SecondIdx->getValue(), Iter);
3151     StoreInst *StoreValue =
3152         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
3153     ASSERT_NE(StoreValue, nullptr);
3154     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
3155     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
3156     EXPECT_EQ(StoreValue->getAlign(), Align(8));
3157     IterInst = dyn_cast<Instruction>(StoreValue);
3158   }
3159 
3160   GetElementPtrInst *DependBaseAddrGEP =
3161       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3162   ASSERT_NE(DependBaseAddrGEP, nullptr);
3163   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3164   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3165   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3166   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3167   ASSERT_NE(FirstIdx, nullptr);
3168   ASSERT_NE(SecondIdx, nullptr);
3169   EXPECT_EQ(FirstIdx->getValue(), 0);
3170   EXPECT_EQ(SecondIdx->getValue(), 0);
3171 
3172   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3173   ASSERT_NE(GTID, nullptr);
3174   EXPECT_EQ(GTID->arg_size(), 1U);
3175   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3176   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3177   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3178 
3179   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3180   ASSERT_NE(Depend, nullptr);
3181   EXPECT_EQ(Depend->arg_size(), 3U);
3182   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post");
3183   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3184   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3185   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3186 }
3187 
3188 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
3189   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3190   OpenMPIRBuilder OMPBuilder(*M);
3191   OMPBuilder.initialize();
3192   F->setName("func");
3193   IRBuilder<> Builder(BB);
3194   LLVMContext &Ctx = M->getContext();
3195 
3196   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3197 
3198   InsertPointTy AllocaIP(&F->getEntryBlock(),
3199                          F->getEntryBlock().getFirstInsertionPt());
3200 
3201   unsigned NumLoops = 2;
3202   SmallVector<Value *, 2> StoreValues;
3203   Type *LCTy = Type::getInt64Ty(Ctx);
3204   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
3205   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
3206 
3207   // Test for "#omp ordered depend(sink: vec)"
3208   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
3209                                                    StoreValues, ".cnt.addr",
3210                                                    /*IsDependSource=*/false));
3211 
3212   Builder.CreateRetVoid();
3213   OMPBuilder.finalize();
3214   EXPECT_FALSE(verifyModule(*M, &errs()));
3215 
3216   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
3217   ASSERT_NE(AllocInst, nullptr);
3218   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
3219   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
3220   EXPECT_TRUE(
3221       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
3222 
3223   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
3224   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
3225     GetElementPtrInst *DependAddrGEPIter =
3226         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3227     ASSERT_NE(DependAddrGEPIter, nullptr);
3228     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
3229     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
3230     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
3231     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
3232     ASSERT_NE(FirstIdx, nullptr);
3233     ASSERT_NE(SecondIdx, nullptr);
3234     EXPECT_EQ(FirstIdx->getValue(), 0);
3235     EXPECT_EQ(SecondIdx->getValue(), Iter);
3236     StoreInst *StoreValue =
3237         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
3238     ASSERT_NE(StoreValue, nullptr);
3239     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
3240     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
3241     EXPECT_EQ(StoreValue->getAlign(), Align(8));
3242     IterInst = dyn_cast<Instruction>(StoreValue);
3243   }
3244 
3245   GetElementPtrInst *DependBaseAddrGEP =
3246       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3247   ASSERT_NE(DependBaseAddrGEP, nullptr);
3248   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3249   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3250   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3251   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3252   ASSERT_NE(FirstIdx, nullptr);
3253   ASSERT_NE(SecondIdx, nullptr);
3254   EXPECT_EQ(FirstIdx->getValue(), 0);
3255   EXPECT_EQ(SecondIdx->getValue(), 0);
3256 
3257   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3258   ASSERT_NE(GTID, nullptr);
3259   EXPECT_EQ(GTID->arg_size(), 1U);
3260   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3261   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3262   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3263 
3264   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3265   ASSERT_NE(Depend, nullptr);
3266   EXPECT_EQ(Depend->arg_size(), 3U);
3267   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait");
3268   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3269   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3270   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3271 }
3272 
3273 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
3274   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3275   OpenMPIRBuilder OMPBuilder(*M);
3276   OMPBuilder.initialize();
3277   F->setName("func");
3278   IRBuilder<> Builder(BB);
3279 
3280   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3281 
3282   AllocaInst *PrivAI =
3283       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3284 
3285   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3286     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3287     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3288     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3289 
3290     Builder.restoreIP(CodeGenIP);
3291     Builder.CreateStore(F->arg_begin(), PrivAI);
3292     Value *PrivLoad =
3293         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3294     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3295   };
3296 
3297   auto FiniCB = [&](InsertPointTy IP) {
3298     BasicBlock *IPBB = IP.getBlock();
3299     EXPECT_NE(IPBB->end(), IP.getPoint());
3300   };
3301 
3302   // Test for "#omp ordered [threads]"
3303   BasicBlock *EntryBB = Builder.GetInsertBlock();
3304   ASSERT_EXPECTED_INIT(
3305       OpenMPIRBuilder::InsertPointTy, AfterIP,
3306       OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3307                                           FINICB_WRAPPER(FiniCB), true));
3308   Builder.restoreIP(AfterIP);
3309 
3310   Builder.CreateRetVoid();
3311   OMPBuilder.finalize();
3312   EXPECT_FALSE(verifyModule(*M, &errs()));
3313 
3314   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3315 
3316   CallInst *OrderedEntryCI = nullptr;
3317   for (auto &EI : *EntryBB) {
3318     Instruction *Cur = &EI;
3319     if (isa<CallInst>(Cur)) {
3320       OrderedEntryCI = cast<CallInst>(Cur);
3321       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3322         break;
3323       OrderedEntryCI = nullptr;
3324     }
3325   }
3326   EXPECT_NE(OrderedEntryCI, nullptr);
3327   EXPECT_EQ(OrderedEntryCI->arg_size(), 2U);
3328   EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
3329   EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
3330 
3331   CallInst *OrderedEndCI = nullptr;
3332   for (auto &FI : *EntryBB) {
3333     Instruction *Cur = &FI;
3334     if (isa<CallInst>(Cur)) {
3335       OrderedEndCI = cast<CallInst>(Cur);
3336       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3337         break;
3338       OrderedEndCI = nullptr;
3339     }
3340   }
3341   EXPECT_NE(OrderedEndCI, nullptr);
3342   EXPECT_EQ(OrderedEndCI->arg_size(), 2U);
3343   EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
3344   EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
3345 }
3346 
3347 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
3348   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3349   OpenMPIRBuilder OMPBuilder(*M);
3350   OMPBuilder.initialize();
3351   F->setName("func");
3352   IRBuilder<> Builder(BB);
3353 
3354   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3355 
3356   AllocaInst *PrivAI =
3357       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3358 
3359   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3360     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3361     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3362     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3363 
3364     Builder.restoreIP(CodeGenIP);
3365     Builder.CreateStore(F->arg_begin(), PrivAI);
3366     Value *PrivLoad =
3367         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3368     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3369   };
3370 
3371   auto FiniCB = [&](InsertPointTy IP) {
3372     BasicBlock *IPBB = IP.getBlock();
3373     EXPECT_NE(IPBB->end(), IP.getPoint());
3374   };
3375 
3376   // Test for "#omp ordered simd"
3377   BasicBlock *EntryBB = Builder.GetInsertBlock();
3378   ASSERT_EXPECTED_INIT(
3379       OpenMPIRBuilder::InsertPointTy, AfterIP,
3380       OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3381                                           FINICB_WRAPPER(FiniCB), false));
3382   Builder.restoreIP(AfterIP);
3383 
3384   Builder.CreateRetVoid();
3385   OMPBuilder.finalize();
3386   EXPECT_FALSE(verifyModule(*M, &errs()));
3387 
3388   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3389 
3390   CallInst *OrderedEntryCI = nullptr;
3391   for (auto &EI : *EntryBB) {
3392     Instruction *Cur = &EI;
3393     if (isa<CallInst>(Cur)) {
3394       OrderedEntryCI = cast<CallInst>(Cur);
3395       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3396         break;
3397       OrderedEntryCI = nullptr;
3398     }
3399   }
3400   EXPECT_EQ(OrderedEntryCI, nullptr);
3401 
3402   CallInst *OrderedEndCI = nullptr;
3403   for (auto &FI : *EntryBB) {
3404     Instruction *Cur = &FI;
3405     if (isa<CallInst>(Cur)) {
3406       OrderedEndCI = cast<CallInst>(Cur);
3407       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3408         break;
3409       OrderedEndCI = nullptr;
3410     }
3411   }
3412   EXPECT_EQ(OrderedEndCI, nullptr);
3413 }
3414 
3415 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
3416   OpenMPIRBuilder OMPBuilder(*M);
3417   OMPBuilder.initialize();
3418   F->setName("func");
3419   IRBuilder<> Builder(BB);
3420 
3421   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3422 
3423   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3424   AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy());
3425   AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy());
3426 
3427   BasicBlock *EntryBB = BB;
3428 
3429   OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress,
3430                                       PrivAddress, Int32, /*BranchtoEnd*/ true);
3431 
3432   BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator());
3433 
3434   EXPECT_NE(EntryBr, nullptr);
3435   EXPECT_TRUE(EntryBr->isConditional());
3436 
3437   BasicBlock *NotMasterBB = EntryBr->getSuccessor(0);
3438   BasicBlock *CopyinEnd = EntryBr->getSuccessor(1);
3439   CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition());
3440 
3441   EXPECT_NE(CMP, nullptr);
3442   EXPECT_NE(NotMasterBB, nullptr);
3443   EXPECT_NE(CopyinEnd, nullptr);
3444 
3445   BranchInst *NotMasterBr =
3446       dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator());
3447   EXPECT_NE(NotMasterBr, nullptr);
3448   EXPECT_FALSE(NotMasterBr->isConditional());
3449   EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0));
3450 }
3451 
3452 TEST_F(OpenMPIRBuilderTest, SingleDirective) {
3453   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3454   OpenMPIRBuilder OMPBuilder(*M);
3455   OMPBuilder.initialize();
3456   F->setName("func");
3457   IRBuilder<> Builder(BB);
3458 
3459   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3460 
3461   AllocaInst *PrivAI = nullptr;
3462 
3463   BasicBlock *EntryBB = nullptr;
3464   BasicBlock *ThenBB = nullptr;
3465 
3466   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3467     if (AllocaIP.isSet())
3468       Builder.restoreIP(AllocaIP);
3469     else
3470       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3471     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3472     Builder.CreateStore(F->arg_begin(), PrivAI);
3473 
3474     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3475     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3476     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3477 
3478     Builder.restoreIP(CodeGenIP);
3479 
3480     // collect some info for checks later
3481     ThenBB = Builder.GetInsertBlock();
3482     EntryBB = ThenBB->getUniquePredecessor();
3483 
3484     // simple instructions for body
3485     Value *PrivLoad =
3486         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3487     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3488   };
3489 
3490   auto FiniCB = [&](InsertPointTy IP) {
3491     BasicBlock *IPBB = IP.getBlock();
3492     EXPECT_NE(IPBB->end(), IP.getPoint());
3493   };
3494 
3495   ASSERT_EXPECTED_INIT(
3496       OpenMPIRBuilder::InsertPointTy, AfterIP,
3497       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3498                               FINICB_WRAPPER(FiniCB), /*IsNowait*/ false));
3499   Builder.restoreIP(AfterIP);
3500   Value *EntryBBTI = EntryBB->getTerminator();
3501   EXPECT_NE(EntryBBTI, nullptr);
3502   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3503   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3504   EXPECT_TRUE(EntryBr->isConditional());
3505   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3506   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3507   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3508 
3509   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3510   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3511 
3512   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3513   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3514   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3515   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3516 
3517   CallInst *SingleEndCI = nullptr;
3518   for (auto &FI : *ThenBB) {
3519     Instruction *cur = &FI;
3520     if (isa<CallInst>(cur)) {
3521       SingleEndCI = cast<CallInst>(cur);
3522       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3523         break;
3524       SingleEndCI = nullptr;
3525     }
3526   }
3527   EXPECT_NE(SingleEndCI, nullptr);
3528   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3529   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3530   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3531 
3532   bool FoundBarrier = false;
3533   for (auto &FI : *ExitBB) {
3534     Instruction *cur = &FI;
3535     if (auto CI = dyn_cast<CallInst>(cur)) {
3536       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3537         FoundBarrier = true;
3538         break;
3539       }
3540     }
3541   }
3542   EXPECT_TRUE(FoundBarrier);
3543 }
3544 
3545 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
3546   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3547   OpenMPIRBuilder OMPBuilder(*M);
3548   OMPBuilder.initialize();
3549   F->setName("func");
3550   IRBuilder<> Builder(BB);
3551 
3552   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3553 
3554   AllocaInst *PrivAI = nullptr;
3555 
3556   BasicBlock *EntryBB = nullptr;
3557   BasicBlock *ThenBB = nullptr;
3558 
3559   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3560     if (AllocaIP.isSet())
3561       Builder.restoreIP(AllocaIP);
3562     else
3563       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3564     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3565     Builder.CreateStore(F->arg_begin(), PrivAI);
3566 
3567     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3568     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3569     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3570 
3571     Builder.restoreIP(CodeGenIP);
3572 
3573     // collect some info for checks later
3574     ThenBB = Builder.GetInsertBlock();
3575     EntryBB = ThenBB->getUniquePredecessor();
3576 
3577     // simple instructions for body
3578     Value *PrivLoad =
3579         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3580     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3581   };
3582 
3583   auto FiniCB = [&](InsertPointTy IP) {
3584     BasicBlock *IPBB = IP.getBlock();
3585     EXPECT_NE(IPBB->end(), IP.getPoint());
3586   };
3587 
3588   ASSERT_EXPECTED_INIT(
3589       OpenMPIRBuilder::InsertPointTy, AfterIP,
3590       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3591                               FINICB_WRAPPER(FiniCB), /*IsNowait*/ true));
3592   Builder.restoreIP(AfterIP);
3593   Value *EntryBBTI = EntryBB->getTerminator();
3594   EXPECT_NE(EntryBBTI, nullptr);
3595   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3596   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3597   EXPECT_TRUE(EntryBr->isConditional());
3598   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3599   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3600   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3601 
3602   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3603   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3604 
3605   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3606   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3607   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3608   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3609 
3610   CallInst *SingleEndCI = nullptr;
3611   for (auto &FI : *ThenBB) {
3612     Instruction *cur = &FI;
3613     if (isa<CallInst>(cur)) {
3614       SingleEndCI = cast<CallInst>(cur);
3615       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3616         break;
3617       SingleEndCI = nullptr;
3618     }
3619   }
3620   EXPECT_NE(SingleEndCI, nullptr);
3621   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3622   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3623   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3624 
3625   CallInst *ExitBarrier = nullptr;
3626   for (auto &FI : *ExitBB) {
3627     Instruction *cur = &FI;
3628     if (auto CI = dyn_cast<CallInst>(cur)) {
3629       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3630         ExitBarrier = CI;
3631         break;
3632       }
3633     }
3634   }
3635   EXPECT_EQ(ExitBarrier, nullptr);
3636 }
3637 
3638 // Helper class to check each instruction of a BB.
3639 class BBInstIter {
3640   BasicBlock *BB;
3641   BasicBlock::iterator BBI;
3642 
3643 public:
3644   BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {}
3645 
3646   bool hasNext() const { return BBI != BB->end(); }
3647 
3648   template <typename InstTy> InstTy *next() {
3649     if (!hasNext())
3650       return nullptr;
3651     Instruction *Cur = &*BBI++;
3652     if (!isa<InstTy>(Cur))
3653       return nullptr;
3654     return cast<InstTy>(Cur);
3655   }
3656 };
3657 
3658 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
3659   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3660   OpenMPIRBuilder OMPBuilder(*M);
3661   OMPBuilder.initialize();
3662   F->setName("func");
3663   IRBuilder<> Builder(BB);
3664 
3665   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3666 
3667   AllocaInst *PrivAI = nullptr;
3668 
3669   BasicBlock *EntryBB = nullptr;
3670   BasicBlock *ThenBB = nullptr;
3671 
3672   Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType());
3673   Builder.CreateStore(F->arg_begin(), CPVar);
3674 
3675   FunctionType *CopyFuncTy = FunctionType::get(
3676       Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false);
3677   Function *CopyFunc =
3678       Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M);
3679 
3680   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3681     if (AllocaIP.isSet())
3682       Builder.restoreIP(AllocaIP);
3683     else
3684       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3685     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3686     Builder.CreateStore(F->arg_begin(), PrivAI);
3687 
3688     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3689     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3690     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3691 
3692     Builder.restoreIP(CodeGenIP);
3693 
3694     // collect some info for checks later
3695     ThenBB = Builder.GetInsertBlock();
3696     EntryBB = ThenBB->getUniquePredecessor();
3697 
3698     // simple instructions for body
3699     Value *PrivLoad =
3700         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3701     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3702   };
3703 
3704   auto FiniCB = [&](InsertPointTy IP) {
3705     BasicBlock *IPBB = IP.getBlock();
3706     // IP must be before the unconditional branch to ExitBB
3707     EXPECT_NE(IPBB->end(), IP.getPoint());
3708   };
3709 
3710   ASSERT_EXPECTED_INIT(
3711       OpenMPIRBuilder::InsertPointTy, AfterIP,
3712       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3713                               FINICB_WRAPPER(FiniCB),
3714                               /*IsNowait*/ false, {CPVar}, {CopyFunc}));
3715   Builder.restoreIP(AfterIP);
3716   Value *EntryBBTI = EntryBB->getTerminator();
3717   EXPECT_NE(EntryBBTI, nullptr);
3718   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3719   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3720   EXPECT_TRUE(EntryBr->isConditional());
3721   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3722   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3723   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3724 
3725   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3726   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3727 
3728   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3729   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3730   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3731   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3732 
3733   // check ThenBB
3734   BBInstIter ThenBBI(ThenBB);
3735   // load PrivAI
3736   auto *PrivLI = ThenBBI.next<LoadInst>();
3737   EXPECT_NE(PrivLI, nullptr);
3738   EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI);
3739   // icmp
3740   EXPECT_TRUE(ThenBBI.next<ICmpInst>());
3741   // store 1, DidIt
3742   auto *DidItSI = ThenBBI.next<StoreInst>();
3743   EXPECT_NE(DidItSI, nullptr);
3744   EXPECT_EQ(DidItSI->getValueOperand(),
3745             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
3746   Value *DidIt = DidItSI->getPointerOperand();
3747   // call __kmpc_end_single
3748   auto *SingleEndCI = ThenBBI.next<CallInst>();
3749   EXPECT_NE(SingleEndCI, nullptr);
3750   EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single");
3751   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3752   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3753   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3754   // br ExitBB
3755   auto *ExitBBBI = ThenBBI.next<BranchInst>();
3756   EXPECT_NE(ExitBBBI, nullptr);
3757   EXPECT_TRUE(ExitBBBI->isUnconditional());
3758   EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB);
3759   EXPECT_FALSE(ThenBBI.hasNext());
3760 
3761   // check ExitBB
3762   BBInstIter ExitBBI(ExitBB);
3763   // call __kmpc_global_thread_num
3764   auto *ThreadNumCI = ExitBBI.next<CallInst>();
3765   EXPECT_NE(ThreadNumCI, nullptr);
3766   EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(),
3767             "__kmpc_global_thread_num");
3768   // load DidIt
3769   auto *DidItLI = ExitBBI.next<LoadInst>();
3770   EXPECT_NE(DidItLI, nullptr);
3771   EXPECT_EQ(DidItLI->getPointerOperand(), DidIt);
3772   // call __kmpc_copyprivate
3773   auto *CopyPrivateCI = ExitBBI.next<CallInst>();
3774   EXPECT_NE(CopyPrivateCI, nullptr);
3775   EXPECT_EQ(CopyPrivateCI->arg_size(), 6U);
3776   EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3)));
3777   EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar);
3778   EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4)));
3779   EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc);
3780   EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5)));
3781   DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5));
3782   EXPECT_EQ(DidItLI->getOperand(0), DidIt);
3783   EXPECT_FALSE(ExitBBI.hasNext());
3784 }
3785 
3786 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
3787   OpenMPIRBuilder OMPBuilder(*M);
3788   OMPBuilder.initialize();
3789   F->setName("func");
3790   IRBuilder<> Builder(BB);
3791 
3792   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3793 
3794   Type *Float32 = Type::getFloatTy(M->getContext());
3795   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3796   XVal->setName("AtomicVar");
3797   AllocaInst *VVal = Builder.CreateAlloca(Float32);
3798   VVal->setName("AtomicRead");
3799   AtomicOrdering AO = AtomicOrdering::Monotonic;
3800   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3801   OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false};
3802 
3803   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3804 
3805   IntegerType *IntCastTy =
3806       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3807 
3808   LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode());
3809   EXPECT_TRUE(AtomicLoad->isAtomic());
3810   EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal);
3811 
3812   BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
3813   EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
3814   EXPECT_EQ(CastToFlt->getDestTy(), Float32);
3815   EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
3816 
3817   StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode());
3818   EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt);
3819   EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
3820 
3821   Builder.CreateRetVoid();
3822   OMPBuilder.finalize();
3823   EXPECT_FALSE(verifyModule(*M, &errs()));
3824 }
3825 
3826 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
3827   OpenMPIRBuilder OMPBuilder(*M);
3828   OMPBuilder.initialize();
3829   F->setName("func");
3830   IRBuilder<> Builder(BB);
3831 
3832   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3833 
3834   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3835   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3836   XVal->setName("AtomicVar");
3837   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3838   VVal->setName("AtomicRead");
3839   AtomicOrdering AO = AtomicOrdering::Monotonic;
3840   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3841   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3842 
3843   BasicBlock *EntryBB = BB;
3844 
3845   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3846   LoadInst *AtomicLoad = nullptr;
3847   StoreInst *StoreofAtomic = nullptr;
3848 
3849   for (Instruction &Cur : *EntryBB) {
3850     if (isa<LoadInst>(Cur)) {
3851       AtomicLoad = cast<LoadInst>(&Cur);
3852       if (AtomicLoad->getPointerOperand() == XVal)
3853         continue;
3854       AtomicLoad = nullptr;
3855     } else if (isa<StoreInst>(Cur)) {
3856       StoreofAtomic = cast<StoreInst>(&Cur);
3857       if (StoreofAtomic->getPointerOperand() == VVal)
3858         continue;
3859       StoreofAtomic = nullptr;
3860     }
3861   }
3862 
3863   EXPECT_NE(AtomicLoad, nullptr);
3864   EXPECT_TRUE(AtomicLoad->isAtomic());
3865 
3866   EXPECT_NE(StoreofAtomic, nullptr);
3867   EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
3868 
3869   Builder.CreateRetVoid();
3870   OMPBuilder.finalize();
3871 
3872   EXPECT_FALSE(verifyModule(*M, &errs()));
3873 }
3874 
3875 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
3876   OpenMPIRBuilder OMPBuilder(*M);
3877   OMPBuilder.initialize();
3878   F->setName("func");
3879   IRBuilder<> Builder(BB);
3880 
3881   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3882 
3883   LLVMContext &Ctx = M->getContext();
3884   Type *Float32 = Type::getFloatTy(Ctx);
3885   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3886   XVal->setName("AtomicVar");
3887   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3888   AtomicOrdering AO = AtomicOrdering::Monotonic;
3889   Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
3890 
3891   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3892 
3893   IntegerType *IntCastTy =
3894       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3895 
3896   Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
3897 
3898   StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode());
3899   EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast);
3900   EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal);
3901   EXPECT_TRUE(StoreofAtomic->isAtomic());
3902 
3903   Builder.CreateRetVoid();
3904   OMPBuilder.finalize();
3905   EXPECT_FALSE(verifyModule(*M, &errs()));
3906 }
3907 
3908 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
3909   OpenMPIRBuilder OMPBuilder(*M);
3910   OMPBuilder.initialize();
3911   F->setName("func");
3912   IRBuilder<> Builder(BB);
3913 
3914   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3915 
3916   LLVMContext &Ctx = M->getContext();
3917   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3918   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3919   XVal->setName("AtomicVar");
3920   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3921   AtomicOrdering AO = AtomicOrdering::Monotonic;
3922   ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3923 
3924   BasicBlock *EntryBB = BB;
3925 
3926   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3927 
3928   StoreInst *StoreofAtomic = nullptr;
3929 
3930   for (Instruction &Cur : *EntryBB) {
3931     if (isa<StoreInst>(Cur)) {
3932       StoreofAtomic = cast<StoreInst>(&Cur);
3933       if (StoreofAtomic->getPointerOperand() == XVal)
3934         continue;
3935       StoreofAtomic = nullptr;
3936     }
3937   }
3938 
3939   EXPECT_NE(StoreofAtomic, nullptr);
3940   EXPECT_TRUE(StoreofAtomic->isAtomic());
3941   EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite);
3942 
3943   Builder.CreateRetVoid();
3944   OMPBuilder.finalize();
3945   EXPECT_FALSE(verifyModule(*M, &errs()));
3946 }
3947 
3948 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
3949   OpenMPIRBuilder OMPBuilder(*M);
3950   OMPBuilder.initialize();
3951   F->setName("func");
3952   IRBuilder<> Builder(BB);
3953 
3954   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3955 
3956   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3957   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3958   XVal->setName("AtomicVar");
3959   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3960   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3961   AtomicOrdering AO = AtomicOrdering::Monotonic;
3962   ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3963   Value *Expr = nullptr;
3964   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub;
3965   bool IsXLHSInRHSPart = false;
3966 
3967   BasicBlock *EntryBB = BB;
3968   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3969                                           EntryBB->getFirstInsertionPt());
3970   Value *Sub = nullptr;
3971 
3972   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3973     Sub = IRB.CreateSub(ConstVal, Atomic);
3974     return Sub;
3975   };
3976   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
3977                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
3978                                                      AO, RMWOp, UpdateOp,
3979                                                      IsXLHSInRHSPart));
3980   Builder.restoreIP(AfterIP);
3981   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3982   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3983   EXPECT_NE(ContTI, nullptr);
3984   BasicBlock *EndBB = ContTI->getSuccessor(0);
3985   EXPECT_TRUE(ContTI->isConditional());
3986   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3987   EXPECT_NE(EndBB, nullptr);
3988 
3989   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3990   EXPECT_NE(Phi, nullptr);
3991   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3992   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3993   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3994 
3995   EXPECT_EQ(Sub->getNumUses(), 1U);
3996   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3997   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3998 
3999   ExtractValueInst *ExVI1 =
4000       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4001   EXPECT_NE(ExVI1, nullptr);
4002   AtomicCmpXchgInst *CmpExchg =
4003       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4004   EXPECT_NE(CmpExchg, nullptr);
4005   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4006   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4007   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4008 
4009   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4010   EXPECT_NE(Ld, nullptr);
4011   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4012 
4013   Builder.CreateRetVoid();
4014   OMPBuilder.finalize();
4015   EXPECT_FALSE(verifyModule(*M, &errs()));
4016 }
4017 
4018 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
4019   OpenMPIRBuilder OMPBuilder(*M);
4020   OMPBuilder.initialize();
4021   F->setName("func");
4022   IRBuilder<> Builder(BB);
4023 
4024   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4025 
4026   Type *FloatTy = Type::getFloatTy(M->getContext());
4027   AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
4028   XVal->setName("AtomicVar");
4029   Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal);
4030   OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
4031   AtomicOrdering AO = AtomicOrdering::Monotonic;
4032   Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0);
4033   Value *Expr = nullptr;
4034   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub;
4035   bool IsXLHSInRHSPart = false;
4036 
4037   BasicBlock *EntryBB = BB;
4038   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4039                                           EntryBB->getFirstInsertionPt());
4040   Value *Sub = nullptr;
4041 
4042   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
4043     Sub = IRB.CreateFSub(ConstVal, Atomic);
4044     return Sub;
4045   };
4046   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4047                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
4048                                                      AO, RMWOp, UpdateOp,
4049                                                      IsXLHSInRHSPart));
4050   Builder.restoreIP(AfterIP);
4051   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
4052   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
4053   EXPECT_NE(ContTI, nullptr);
4054   BasicBlock *EndBB = ContTI->getSuccessor(0);
4055   EXPECT_TRUE(ContTI->isConditional());
4056   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
4057   EXPECT_NE(EndBB, nullptr);
4058 
4059   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
4060   EXPECT_NE(Phi, nullptr);
4061   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
4062   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
4063   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
4064 
4065   EXPECT_EQ(Sub->getNumUses(), 1U);
4066   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
4067   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
4068 
4069   ExtractValueInst *ExVI1 =
4070       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4071   EXPECT_NE(ExVI1, nullptr);
4072   AtomicCmpXchgInst *CmpExchg =
4073       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4074   EXPECT_NE(CmpExchg, nullptr);
4075   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4076   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4077   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4078 
4079   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4080   EXPECT_NE(Ld, nullptr);
4081   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4082   Builder.CreateRetVoid();
4083   OMPBuilder.finalize();
4084   EXPECT_FALSE(verifyModule(*M, &errs()));
4085 }
4086 
4087 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
4088   OpenMPIRBuilder OMPBuilder(*M);
4089   OMPBuilder.initialize();
4090   F->setName("func");
4091   IRBuilder<> Builder(BB);
4092 
4093   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4094 
4095   Type *IntTy = Type::getInt32Ty(M->getContext());
4096   AllocaInst *XVal = Builder.CreateAlloca(IntTy);
4097   XVal->setName("AtomicVar");
4098   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal);
4099   OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false};
4100   AtomicOrdering AO = AtomicOrdering::Monotonic;
4101   Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
4102   Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
4103   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax;
4104   bool IsXLHSInRHSPart = false;
4105 
4106   BasicBlock *EntryBB = BB;
4107   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4108                                           EntryBB->getFirstInsertionPt());
4109   Value *Sub = nullptr;
4110 
4111   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
4112     Sub = IRB.CreateSub(ConstVal, Atomic);
4113     return Sub;
4114   };
4115   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4116                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
4117                                                      AO, RMWOp, UpdateOp,
4118                                                      IsXLHSInRHSPart));
4119   Builder.restoreIP(AfterIP);
4120   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
4121   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
4122   EXPECT_NE(ContTI, nullptr);
4123   BasicBlock *EndBB = ContTI->getSuccessor(0);
4124   EXPECT_TRUE(ContTI->isConditional());
4125   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
4126   EXPECT_NE(EndBB, nullptr);
4127 
4128   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
4129   EXPECT_NE(Phi, nullptr);
4130   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
4131   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
4132   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
4133 
4134   EXPECT_EQ(Sub->getNumUses(), 1U);
4135   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
4136   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
4137 
4138   ExtractValueInst *ExVI1 =
4139       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4140   EXPECT_NE(ExVI1, nullptr);
4141   AtomicCmpXchgInst *CmpExchg =
4142       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4143   EXPECT_NE(CmpExchg, nullptr);
4144   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4145   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4146   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4147 
4148   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4149   EXPECT_NE(Ld, nullptr);
4150   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4151 
4152   Builder.CreateRetVoid();
4153   OMPBuilder.finalize();
4154   EXPECT_FALSE(verifyModule(*M, &errs()));
4155 }
4156 
4157 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
4158   OpenMPIRBuilder OMPBuilder(*M);
4159   OMPBuilder.initialize();
4160   F->setName("func");
4161   IRBuilder<> Builder(BB);
4162 
4163   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4164 
4165   LLVMContext &Ctx = M->getContext();
4166   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4167   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4168   XVal->setName("AtomicVar");
4169   AllocaInst *VVal = Builder.CreateAlloca(Int32);
4170   VVal->setName("AtomicCapTar");
4171   StoreInst *Init =
4172       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4173 
4174   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
4175   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
4176   AtomicOrdering AO = AtomicOrdering::Monotonic;
4177   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4178   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add;
4179   bool IsXLHSInRHSPart = true;
4180   bool IsPostfixUpdate = true;
4181   bool UpdateExpr = true;
4182 
4183   BasicBlock *EntryBB = BB;
4184   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4185                                           EntryBB->getFirstInsertionPt());
4186 
4187   // integer update - not used
4188   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; };
4189 
4190   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4191                        OMPBuilder.createAtomicCapture(
4192                            Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp,
4193                            UpdateExpr, IsPostfixUpdate, IsXLHSInRHSPart));
4194   Builder.restoreIP(AfterIP);
4195   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4196   AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4197   EXPECT_NE(ARWM, nullptr);
4198   EXPECT_EQ(ARWM->getPointerOperand(), XVal);
4199   EXPECT_EQ(ARWM->getOperation(), RMWOp);
4200   StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back());
4201   EXPECT_NE(St, nullptr);
4202   EXPECT_EQ(St->getPointerOperand(), VVal);
4203 
4204   Builder.CreateRetVoid();
4205   OMPBuilder.finalize();
4206   EXPECT_FALSE(verifyModule(*M, &errs()));
4207 }
4208 
4209 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
4210   OpenMPIRBuilder OMPBuilder(*M);
4211   OMPBuilder.initialize();
4212   F->setName("func");
4213   IRBuilder<> Builder(BB);
4214 
4215   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4216 
4217   LLVMContext &Ctx = M->getContext();
4218   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4219   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4220   XVal->setName("x");
4221   StoreInst *Init =
4222       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4223 
4224   OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false};
4225   OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false};
4226   // V and R are not used in atomic compare
4227   OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false};
4228   OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false};
4229   AtomicOrdering AO = AtomicOrdering::Monotonic;
4230   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4231   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4232   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4233   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4234 
4235   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4236       Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false));
4237   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4238       Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false));
4239   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4240       Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false));
4241 
4242   BasicBlock *EntryBB = BB;
4243   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4244   EXPECT_EQ(EntryBB->size(), 5U);
4245 
4246   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4247   EXPECT_NE(ARWM1, nullptr);
4248   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4249   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4250   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4251 
4252   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode());
4253   EXPECT_NE(ARWM2, nullptr);
4254   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4255   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4256   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax);
4257 
4258   AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode());
4259   EXPECT_NE(AXCHG, nullptr);
4260   EXPECT_EQ(AXCHG->getPointerOperand(), XVal);
4261   EXPECT_EQ(AXCHG->getCompareOperand(), Expr);
4262   EXPECT_EQ(AXCHG->getNewValOperand(), D);
4263 
4264   Builder.CreateRetVoid();
4265   OMPBuilder.finalize();
4266   EXPECT_FALSE(verifyModule(*M, &errs()));
4267 }
4268 
4269 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) {
4270   OpenMPIRBuilder OMPBuilder(*M);
4271   OMPBuilder.initialize();
4272   F->setName("func");
4273   IRBuilder<> Builder(BB);
4274 
4275   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4276 
4277   LLVMContext &Ctx = M->getContext();
4278   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4279   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4280   XVal->setName("x");
4281   AllocaInst *VVal = Builder.CreateAlloca(Int32);
4282   VVal->setName("v");
4283   AllocaInst *RVal = Builder.CreateAlloca(Int32);
4284   RVal->setName("r");
4285 
4286   StoreInst *Init =
4287       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4288 
4289   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false};
4290   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
4291   OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false};
4292   OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false};
4293   OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false};
4294 
4295   AtomicOrdering AO = AtomicOrdering::Monotonic;
4296   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4297   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4298   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4299   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4300 
4301   // { cond-update-stmt v = x; }
4302   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4303       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4304       /* IsPostfixUpdate */ false,
4305       /* IsFailOnly */ false));
4306   // { v = x; cond-update-stmt }
4307   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4308       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4309       /* IsPostfixUpdate */ true,
4310       /* IsFailOnly */ false));
4311   // if(x == e) { x = d; } else { v = x; }
4312   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4313       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4314       /* IsPostfixUpdate */ false,
4315       /* IsFailOnly */ true));
4316   // { r = x == e; if(r) { x = d; } }
4317   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4318       Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4319       /* IsPostfixUpdate */ false,
4320       /* IsFailOnly */ false));
4321   // { r = x == e; if(r) { x = d; } else { v = x; } }
4322   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4323       Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4324       /* IsPostfixUpdate */ false,
4325       /* IsFailOnly */ true));
4326 
4327   // { v = x; cond-update-stmt }
4328   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4329       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true,
4330       /* IsPostfixUpdate */ true,
4331       /* IsFailOnly */ false));
4332   // { cond-update-stmt v = x; }
4333   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4334       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false,
4335       /* IsPostfixUpdate */ false,
4336       /* IsFailOnly */ false));
4337 
4338   BasicBlock *EntryBB = BB;
4339   EXPECT_EQ(EntryBB->getParent()->size(), 5U);
4340   BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode());
4341   EXPECT_NE(Cont1, nullptr);
4342   BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode());
4343   EXPECT_NE(Exit1, nullptr);
4344   BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode());
4345   EXPECT_NE(Cont2, nullptr);
4346   BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode());
4347   EXPECT_NE(Exit2, nullptr);
4348 
4349   AtomicCmpXchgInst *CmpXchg1 =
4350       dyn_cast<AtomicCmpXchgInst>(Init->getNextNode());
4351   EXPECT_NE(CmpXchg1, nullptr);
4352   EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal);
4353   EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr);
4354   EXPECT_EQ(CmpXchg1->getNewValOperand(), D);
4355   ExtractValueInst *ExtVal1 =
4356       dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode());
4357   EXPECT_NE(ExtVal1, nullptr);
4358   EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1);
4359   EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U));
4360   ExtractValueInst *ExtVal2 =
4361       dyn_cast<ExtractValueInst>(ExtVal1->getNextNode());
4362   EXPECT_NE(ExtVal2, nullptr);
4363   EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1);
4364   EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U));
4365   SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode());
4366   EXPECT_NE(Sel1, nullptr);
4367   EXPECT_EQ(Sel1->getCondition(), ExtVal2);
4368   EXPECT_EQ(Sel1->getTrueValue(), Expr);
4369   EXPECT_EQ(Sel1->getFalseValue(), ExtVal1);
4370   StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode());
4371   EXPECT_NE(Store1, nullptr);
4372   EXPECT_EQ(Store1->getPointerOperand(), VVal);
4373   EXPECT_EQ(Store1->getValueOperand(), Sel1);
4374 
4375   AtomicCmpXchgInst *CmpXchg2 =
4376       dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode());
4377   EXPECT_NE(CmpXchg2, nullptr);
4378   EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal);
4379   EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr);
4380   EXPECT_EQ(CmpXchg2->getNewValOperand(), D);
4381   ExtractValueInst *ExtVal3 =
4382       dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode());
4383   EXPECT_NE(ExtVal3, nullptr);
4384   EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2);
4385   EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U));
4386   StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode());
4387   EXPECT_NE(Store2, nullptr);
4388   EXPECT_EQ(Store2->getPointerOperand(), VVal);
4389   EXPECT_EQ(Store2->getValueOperand(), ExtVal3);
4390 
4391   AtomicCmpXchgInst *CmpXchg3 =
4392       dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode());
4393   EXPECT_NE(CmpXchg3, nullptr);
4394   EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal);
4395   EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr);
4396   EXPECT_EQ(CmpXchg3->getNewValOperand(), D);
4397   ExtractValueInst *ExtVal4 =
4398       dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode());
4399   EXPECT_NE(ExtVal4, nullptr);
4400   EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3);
4401   EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U));
4402   ExtractValueInst *ExtVal5 =
4403       dyn_cast<ExtractValueInst>(ExtVal4->getNextNode());
4404   EXPECT_NE(ExtVal5, nullptr);
4405   EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3);
4406   EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U));
4407   BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode());
4408   EXPECT_NE(Br1, nullptr);
4409   EXPECT_EQ(Br1->isConditional(), true);
4410   EXPECT_EQ(Br1->getCondition(), ExtVal5);
4411   EXPECT_EQ(Br1->getSuccessor(0), Exit1);
4412   EXPECT_EQ(Br1->getSuccessor(1), Cont1);
4413 
4414   StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front());
4415   EXPECT_NE(Store3, nullptr);
4416   EXPECT_EQ(Store3->getPointerOperand(), VVal);
4417   EXPECT_EQ(Store3->getValueOperand(), ExtVal4);
4418   BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode());
4419   EXPECT_NE(Br2, nullptr);
4420   EXPECT_EQ(Br2->isUnconditional(), true);
4421   EXPECT_EQ(Br2->getSuccessor(0), Exit1);
4422 
4423   AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front());
4424   EXPECT_NE(CmpXchg4, nullptr);
4425   EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal);
4426   EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr);
4427   EXPECT_EQ(CmpXchg4->getNewValOperand(), D);
4428   ExtractValueInst *ExtVal6 =
4429       dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode());
4430   EXPECT_NE(ExtVal6, nullptr);
4431   EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4);
4432   EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U));
4433   ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode());
4434   EXPECT_NE(ZExt1, nullptr);
4435   EXPECT_EQ(ZExt1->getDestTy(), Int32);
4436   StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode());
4437   EXPECT_NE(Store4, nullptr);
4438   EXPECT_EQ(Store4->getPointerOperand(), RVal);
4439   EXPECT_EQ(Store4->getValueOperand(), ZExt1);
4440 
4441   AtomicCmpXchgInst *CmpXchg5 =
4442       dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode());
4443   EXPECT_NE(CmpXchg5, nullptr);
4444   EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal);
4445   EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr);
4446   EXPECT_EQ(CmpXchg5->getNewValOperand(), D);
4447   ExtractValueInst *ExtVal7 =
4448       dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode());
4449   EXPECT_NE(ExtVal7, nullptr);
4450   EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5);
4451   EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U));
4452   ExtractValueInst *ExtVal8 =
4453       dyn_cast<ExtractValueInst>(ExtVal7->getNextNode());
4454   EXPECT_NE(ExtVal8, nullptr);
4455   EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5);
4456   EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U));
4457   BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode());
4458   EXPECT_NE(Br3, nullptr);
4459   EXPECT_EQ(Br3->isConditional(), true);
4460   EXPECT_EQ(Br3->getCondition(), ExtVal8);
4461   EXPECT_EQ(Br3->getSuccessor(0), Exit2);
4462   EXPECT_EQ(Br3->getSuccessor(1), Cont2);
4463 
4464   StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front());
4465   EXPECT_NE(Store5, nullptr);
4466   EXPECT_EQ(Store5->getPointerOperand(), VVal);
4467   EXPECT_EQ(Store5->getValueOperand(), ExtVal7);
4468   BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode());
4469   EXPECT_NE(Br4, nullptr);
4470   EXPECT_EQ(Br4->isUnconditional(), true);
4471   EXPECT_EQ(Br4->getSuccessor(0), Exit2);
4472 
4473   ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front());
4474   EXPECT_NE(ExtVal9, nullptr);
4475   EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5);
4476   EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U));
4477   ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode());
4478   EXPECT_NE(ZExt2, nullptr);
4479   EXPECT_EQ(ZExt2->getDestTy(), Int32);
4480   StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode());
4481   EXPECT_NE(Store6, nullptr);
4482   EXPECT_EQ(Store6->getPointerOperand(), RVal);
4483   EXPECT_EQ(Store6->getValueOperand(), ZExt2);
4484 
4485   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode());
4486   EXPECT_NE(ARWM1, nullptr);
4487   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4488   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4489   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4490   StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode());
4491   EXPECT_NE(Store7, nullptr);
4492   EXPECT_EQ(Store7->getPointerOperand(), VVal);
4493   EXPECT_EQ(Store7->getValueOperand(), ARWM1);
4494 
4495   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode());
4496   EXPECT_NE(ARWM2, nullptr);
4497   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4498   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4499   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max);
4500   CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode());
4501   EXPECT_NE(Cmp1, nullptr);
4502   EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT);
4503   EXPECT_EQ(Cmp1->getOperand(0), ARWM2);
4504   EXPECT_EQ(Cmp1->getOperand(1), Expr);
4505   SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode());
4506   EXPECT_NE(Sel2, nullptr);
4507   EXPECT_EQ(Sel2->getCondition(), Cmp1);
4508   EXPECT_EQ(Sel2->getTrueValue(), Expr);
4509   EXPECT_EQ(Sel2->getFalseValue(), ARWM2);
4510   StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode());
4511   EXPECT_NE(Store8, nullptr);
4512   EXPECT_EQ(Store8->getPointerOperand(), VVal);
4513   EXPECT_EQ(Store8->getValueOperand(), Sel2);
4514 
4515   Builder.CreateRetVoid();
4516   OMPBuilder.finalize();
4517   EXPECT_FALSE(verifyModule(*M, &errs()));
4518 }
4519 
4520 TEST_F(OpenMPIRBuilderTest, CreateTeams) {
4521   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4522   OpenMPIRBuilder OMPBuilder(*M);
4523   OMPBuilder.Config.IsTargetDevice = false;
4524   OMPBuilder.initialize();
4525   F->setName("func");
4526   IRBuilder<> Builder(BB);
4527 
4528   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
4529   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
4530   Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load");
4531 
4532   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4533     Builder.restoreIP(AllocaIP);
4534     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
4535                                                 "bodygen.alloca128");
4536 
4537     Builder.restoreIP(CodeGenIP);
4538     // Loading and storing captured pointer and values
4539     Builder.CreateStore(Val128, Local128);
4540     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
4541                                       "bodygen.load32");
4542 
4543     LoadInst *PrivLoad128 = Builder.CreateLoad(
4544         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
4545     Value *Cmp = Builder.CreateICmpNE(
4546         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
4547     Instruction *ThenTerm, *ElseTerm;
4548     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
4549                                   &ThenTerm, &ElseTerm);
4550     return Error::success();
4551   };
4552 
4553   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4554   ASSERT_EXPECTED_INIT(
4555       OpenMPIRBuilder::InsertPointTy, AfterIP,
4556       OMPBuilder.createTeams(Builder, BodyGenCB, /*NumTeamsLower=*/nullptr,
4557                              /*NumTeamsUpper=*/nullptr,
4558                              /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4559   Builder.restoreIP(AfterIP);
4560 
4561   OMPBuilder.finalize();
4562   Builder.CreateRetVoid();
4563 
4564   EXPECT_FALSE(verifyModule(*M, &errs()));
4565 
4566   CallInst *TeamsForkCall = dyn_cast<CallInst>(
4567       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)
4568           ->user_back());
4569 
4570   // Verify the Ident argument
4571   GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0));
4572   ASSERT_NE(Ident, nullptr);
4573   EXPECT_TRUE(Ident->hasInitializer());
4574   Constant *Initializer = Ident->getInitializer();
4575   GlobalVariable *SrcStrGlob =
4576       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
4577   ASSERT_NE(SrcStrGlob, nullptr);
4578   ConstantDataArray *SrcSrc =
4579       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
4580   ASSERT_NE(SrcSrc, nullptr);
4581 
4582   // Verify the outlined function signature.
4583   Function *OutlinedFn =
4584       dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts());
4585   ASSERT_NE(OutlinedFn, nullptr);
4586   EXPECT_FALSE(OutlinedFn->isDeclaration());
4587   EXPECT_TRUE(OutlinedFn->arg_size() >= 3);
4588   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid
4589   EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid
4590   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
4591             Builder.getPtrTy()); // captured args
4592 
4593   // Check for TruncInst and ICmpInst in the outlined function.
4594   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4595                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
4596   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4597                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
4598 }
4599 
4600 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) {
4601   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4602   OpenMPIRBuilder OMPBuilder(*M);
4603   OMPBuilder.Config.IsTargetDevice = false;
4604   OMPBuilder.initialize();
4605   F->setName("func");
4606   IRBuilder<> &Builder = OMPBuilder.Builder;
4607   Builder.SetInsertPoint(BB);
4608 
4609   Function *FakeFunction =
4610       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4611                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4612 
4613   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4614     Builder.restoreIP(CodeGenIP);
4615     Builder.CreateCall(FakeFunction, {});
4616     return Error::success();
4617   };
4618 
4619   // `F` has an argument - an integer, so we use that as the thread limit.
4620   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4621                        OMPBuilder.createTeams(
4622                            /*=*/Builder, BodyGenCB, /*NumTeamsLower=*/nullptr,
4623                            /*NumTeamsUpper=*/nullptr,
4624                            /*ThreadLimit=*/F->arg_begin(),
4625                            /*IfExpr=*/nullptr));
4626   Builder.restoreIP(AfterIP);
4627 
4628   Builder.CreateRetVoid();
4629   OMPBuilder.finalize();
4630 
4631   ASSERT_FALSE(verifyModule(*M));
4632 
4633   CallInst *PushNumTeamsCallInst =
4634       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4635   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4636 
4637   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0));
4638   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0));
4639   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin());
4640 
4641   // Verifying that the next instruction to execute is kmpc_fork_teams
4642   BranchInst *BrInst =
4643       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4644   ASSERT_NE(BrInst, nullptr);
4645   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4646   Instruction *NextInstruction =
4647       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4648   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4649   ASSERT_NE(ForkTeamsCI, nullptr);
4650   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4651             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4652 }
4653 
4654 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) {
4655   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4656   OpenMPIRBuilder OMPBuilder(*M);
4657   OMPBuilder.Config.IsTargetDevice = false;
4658   OMPBuilder.initialize();
4659   F->setName("func");
4660   IRBuilder<> &Builder = OMPBuilder.Builder;
4661   Builder.SetInsertPoint(BB);
4662 
4663   Function *FakeFunction =
4664       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4665                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4666 
4667   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4668     Builder.restoreIP(CodeGenIP);
4669     Builder.CreateCall(FakeFunction, {});
4670     return Error::success();
4671   };
4672 
4673   // `F` already has an integer argument, so we use that as upper bound to
4674   // `num_teams`
4675   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4676                        OMPBuilder.createTeams(Builder, BodyGenCB,
4677                                               /*NumTeamsLower=*/nullptr,
4678                                               /*NumTeamsUpper=*/F->arg_begin(),
4679                                               /*ThreadLimit=*/nullptr,
4680                                               /*IfExpr=*/nullptr));
4681   Builder.restoreIP(AfterIP);
4682 
4683   Builder.CreateRetVoid();
4684   OMPBuilder.finalize();
4685 
4686   ASSERT_FALSE(verifyModule(*M));
4687 
4688   CallInst *PushNumTeamsCallInst =
4689       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4690   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4691 
4692   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin());
4693   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin());
4694   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4695 
4696   // Verifying that the next instruction to execute is kmpc_fork_teams
4697   BranchInst *BrInst =
4698       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4699   ASSERT_NE(BrInst, nullptr);
4700   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4701   Instruction *NextInstruction =
4702       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4703   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4704   ASSERT_NE(ForkTeamsCI, nullptr);
4705   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4706             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4707 }
4708 
4709 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) {
4710   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4711   OpenMPIRBuilder OMPBuilder(*M);
4712   OMPBuilder.Config.IsTargetDevice = false;
4713   OMPBuilder.initialize();
4714   F->setName("func");
4715   IRBuilder<> &Builder = OMPBuilder.Builder;
4716   Builder.SetInsertPoint(BB);
4717 
4718   Function *FakeFunction =
4719       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4720                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4721 
4722   Value *NumTeamsLower =
4723       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4724   Value *NumTeamsUpper =
4725       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4726 
4727   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4728     Builder.restoreIP(CodeGenIP);
4729     Builder.CreateCall(FakeFunction, {});
4730     return Error::success();
4731   };
4732 
4733   // `F` already has an integer argument, so we use that as upper bound to
4734   // `num_teams`
4735   ASSERT_EXPECTED_INIT(
4736       OpenMPIRBuilder::InsertPointTy, AfterIP,
4737       OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper,
4738                              /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4739   Builder.restoreIP(AfterIP);
4740 
4741   Builder.CreateRetVoid();
4742   OMPBuilder.finalize();
4743 
4744   ASSERT_FALSE(verifyModule(*M));
4745 
4746   CallInst *PushNumTeamsCallInst =
4747       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4748   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4749 
4750   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4751   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4752   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4753 
4754   // Verifying that the next instruction to execute is kmpc_fork_teams
4755   BranchInst *BrInst =
4756       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4757   ASSERT_NE(BrInst, nullptr);
4758   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4759   Instruction *NextInstruction =
4760       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4761   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4762   ASSERT_NE(ForkTeamsCI, nullptr);
4763   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4764             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4765 }
4766 
4767 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) {
4768   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4769   OpenMPIRBuilder OMPBuilder(*M);
4770   OMPBuilder.Config.IsTargetDevice = false;
4771   OMPBuilder.initialize();
4772   F->setName("func");
4773   IRBuilder<> &Builder = OMPBuilder.Builder;
4774   Builder.SetInsertPoint(BB);
4775 
4776   BasicBlock *CodegenBB = splitBB(Builder, true);
4777   Builder.SetInsertPoint(CodegenBB);
4778 
4779   // Generate values for `num_teams` and `thread_limit` using the first argument
4780   // of the testing function.
4781   Value *NumTeamsLower =
4782       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4783   Value *NumTeamsUpper =
4784       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4785   Value *ThreadLimit =
4786       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit");
4787 
4788   Function *FakeFunction =
4789       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4790                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4791 
4792   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4793     Builder.restoreIP(CodeGenIP);
4794     Builder.CreateCall(FakeFunction, {});
4795     return Error::success();
4796   };
4797 
4798   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4799   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4800                        OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
4801                                               NumTeamsUpper, ThreadLimit,
4802                                               nullptr));
4803   Builder.restoreIP(AfterIP);
4804 
4805   Builder.CreateRetVoid();
4806   OMPBuilder.finalize();
4807 
4808   ASSERT_FALSE(verifyModule(*M));
4809 
4810   CallInst *PushNumTeamsCallInst =
4811       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4812   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4813 
4814   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4815   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4816   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit);
4817 
4818   // Verifying that the next instruction to execute is kmpc_fork_teams
4819   BranchInst *BrInst =
4820       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4821   ASSERT_NE(BrInst, nullptr);
4822   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4823   Instruction *NextInstruction =
4824       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4825   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4826   ASSERT_NE(ForkTeamsCI, nullptr);
4827   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4828             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4829 }
4830 
4831 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) {
4832   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4833   OpenMPIRBuilder OMPBuilder(*M);
4834   OMPBuilder.Config.IsTargetDevice = false;
4835   OMPBuilder.initialize();
4836   F->setName("func");
4837   IRBuilder<> &Builder = OMPBuilder.Builder;
4838   Builder.SetInsertPoint(BB);
4839 
4840   Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(),
4841                                      Builder.CreateAlloca(Builder.getInt1Ty()));
4842 
4843   Function *FakeFunction =
4844       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4845                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4846 
4847   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4848     Builder.restoreIP(CodeGenIP);
4849     Builder.CreateCall(FakeFunction, {});
4850     return Error::success();
4851   };
4852 
4853   // `F` already has an integer argument, so we use that as upper bound to
4854   // `num_teams`
4855   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4856                        OMPBuilder.createTeams(Builder, BodyGenCB,
4857                                               /*NumTeamsLower=*/nullptr,
4858                                               /*NumTeamsUpper=*/nullptr,
4859                                               /*ThreadLimit=*/nullptr, IfExpr));
4860   Builder.restoreIP(AfterIP);
4861 
4862   Builder.CreateRetVoid();
4863   OMPBuilder.finalize();
4864 
4865   ASSERT_FALSE(verifyModule(*M));
4866 
4867   CallInst *PushNumTeamsCallInst =
4868       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4869   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4870   Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2);
4871   Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3);
4872   Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4);
4873 
4874   // Check the lower_bound
4875   ASSERT_NE(NumTeamsLower, nullptr);
4876   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower);
4877   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4878   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr);
4879   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0));
4880   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4881 
4882   // Check the upper_bound
4883   ASSERT_NE(NumTeamsUpper, nullptr);
4884   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper);
4885   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4886   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr);
4887   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0));
4888   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4889 
4890   // Check thread_limit
4891   EXPECT_EQ(ThreadLimit, Builder.getInt32(0));
4892 }
4893 
4894 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) {
4895   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4896   OpenMPIRBuilder OMPBuilder(*M);
4897   OMPBuilder.Config.IsTargetDevice = false;
4898   OMPBuilder.initialize();
4899   F->setName("func");
4900   IRBuilder<> &Builder = OMPBuilder.Builder;
4901   Builder.SetInsertPoint(BB);
4902 
4903   Value *IfExpr = Builder.CreateLoad(
4904       Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty()));
4905   Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5));
4906   Value *NumTeamsUpper =
4907       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10));
4908   Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20));
4909 
4910   Function *FakeFunction =
4911       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4912                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4913 
4914   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4915     Builder.restoreIP(CodeGenIP);
4916     Builder.CreateCall(FakeFunction, {});
4917     return Error::success();
4918   };
4919 
4920   // `F` already has an integer argument, so we use that as upper bound to
4921   // `num_teams`
4922   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4923                        OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
4924                                               NumTeamsUpper, ThreadLimit,
4925                                               IfExpr));
4926   Builder.restoreIP(AfterIP);
4927 
4928   Builder.CreateRetVoid();
4929   OMPBuilder.finalize();
4930 
4931   ASSERT_FALSE(verifyModule(*M));
4932 
4933   CallInst *PushNumTeamsCallInst =
4934       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4935   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4936   Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2);
4937   Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3);
4938   Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4);
4939 
4940   // Get the boolean conversion of if expression
4941   ASSERT_EQ(IfExpr->getNumUses(), 1U);
4942   User *IfExprInst = IfExpr->user_back();
4943   ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst);
4944   ASSERT_NE(IfExprCmpInst, nullptr);
4945   EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE);
4946   EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr);
4947   EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0));
4948 
4949   // Check the lower_bound
4950   ASSERT_NE(NumTeamsLowerArg, nullptr);
4951   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg);
4952   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4953   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst);
4954   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower);
4955   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4956 
4957   // Check the upper_bound
4958   ASSERT_NE(NumTeamsUpperArg, nullptr);
4959   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg);
4960   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4961   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst);
4962   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper);
4963   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4964 
4965   // Check thread_limit
4966   EXPECT_EQ(ThreadLimitArg, ThreadLimit);
4967 }
4968 
4969 /// Returns the single instruction of InstTy type in BB that uses the value V.
4970 /// If there is more than one such instruction, returns null.
4971 template <typename InstTy>
4972 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) {
4973   InstTy *Result = nullptr;
4974   for (User *U : V->users()) {
4975     auto *Inst = dyn_cast<InstTy>(U);
4976     if (!Inst || Inst->getParent() != BB)
4977       continue;
4978     if (Result) {
4979       if (auto *SI = dyn_cast<StoreInst>(Inst)) {
4980         if (V == SI->getValueOperand())
4981           continue;
4982       } else {
4983         return nullptr;
4984       }
4985     }
4986     Result = Inst;
4987   }
4988   return Result;
4989 }
4990 
4991 /// Returns true if BB contains a simple binary reduction that loads a value
4992 /// from Accum, performs some binary operation with it, and stores it back to
4993 /// Accum.
4994 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB,
4995                                     Instruction::BinaryOps *OpCode = nullptr) {
4996   StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB);
4997   if (!Store)
4998     return false;
4999   auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0));
5000   if (!Stored)
5001     return false;
5002   if (OpCode && *OpCode != Stored->getOpcode())
5003     return false;
5004   auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0));
5005   return Load && Load->getOperand(0) == Accum;
5006 }
5007 
5008 /// Returns true if BB contains a binary reduction that reduces V using a binary
5009 /// operator into an accumulator that is a function argument.
5010 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
5011   auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB);
5012   if (!ReductionOp)
5013     return false;
5014 
5015   auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0));
5016   if (!GlobalLoad)
5017     return false;
5018 
5019   auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB);
5020   if (!Store)
5021     return false;
5022 
5023   return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
5024          isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand()));
5025 }
5026 
5027 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
5028 /// [0, 1], respectively, and assigns results of these instructions to Zero and
5029 /// One. Returns true on success, false on failure or if such instructions are
5030 /// not unique among the users of Ptr.
5031 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) {
5032   Zero = nullptr;
5033   One = nullptr;
5034   for (User *U : Ptr->users()) {
5035     if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
5036       if (GEP->getNumIndices() != 2)
5037         continue;
5038       auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
5039       auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2));
5040       EXPECT_NE(FirstIdx, nullptr);
5041       EXPECT_NE(SecondIdx, nullptr);
5042 
5043       EXPECT_TRUE(FirstIdx->isZero());
5044       if (SecondIdx->isZero()) {
5045         if (Zero)
5046           return false;
5047         Zero = GEP;
5048       } else if (SecondIdx->isOne()) {
5049         if (One)
5050           return false;
5051         One = GEP;
5052       } else {
5053         return false;
5054       }
5055     }
5056   }
5057   return Zero != nullptr && One != nullptr;
5058 }
5059 
5060 static OpenMPIRBuilder::InsertPointTy
5061 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
5062              Value *&Result) {
5063   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5064   Result = Builder.CreateFAdd(LHS, RHS, "red.add");
5065   return Builder.saveIP();
5066 }
5067 
5068 static OpenMPIRBuilder::InsertPointTy
5069 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
5070                    Value *RHS) {
5071   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5072   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
5073   Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt,
5074                           AtomicOrdering::Monotonic);
5075   return Builder.saveIP();
5076 }
5077 
5078 static OpenMPIRBuilder::InsertPointTy
5079 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
5080              Value *&Result) {
5081   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5082   Result = Builder.CreateXor(LHS, RHS, "red.xor");
5083   return Builder.saveIP();
5084 }
5085 
5086 static OpenMPIRBuilder::InsertPointTy
5087 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
5088                    Value *RHS) {
5089   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5090   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
5091   Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt,
5092                           AtomicOrdering::Monotonic);
5093   return Builder.saveIP();
5094 }
5095 
5096 TEST_F(OpenMPIRBuilderTest, CreateReductions) {
5097   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5098   OpenMPIRBuilder OMPBuilder(*M);
5099   OMPBuilder.Config.IsTargetDevice = false;
5100   OMPBuilder.initialize();
5101   F->setName("func");
5102   IRBuilder<> Builder(BB);
5103 
5104   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
5105   Builder.CreateBr(EnterBB);
5106   Builder.SetInsertPoint(EnterBB);
5107   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5108 
5109   // Create variables to be reduced.
5110   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
5111                               F->getEntryBlock().getFirstInsertionPt());
5112   Type *SumType = Builder.getFloatTy();
5113   Type *XorType = Builder.getInt32Ty();
5114   Value *SumReduced;
5115   Value *XorReduced;
5116   {
5117     IRBuilderBase::InsertPointGuard Guard(Builder);
5118     Builder.restoreIP(OuterAllocaIP);
5119     SumReduced = Builder.CreateAlloca(SumType);
5120     XorReduced = Builder.CreateAlloca(XorType);
5121   }
5122 
5123   // Store initial values of reductions into global variables.
5124   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
5125   Builder.CreateStore(Builder.getInt32(1), XorReduced);
5126 
5127   // The loop body computes two reductions:
5128   //   sum of (float) thread-id;
5129   //   xor of thread-id;
5130   // and store the result in global variables.
5131   InsertPointTy BodyIP, BodyAllocaIP;
5132   auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) {
5133     IRBuilderBase::InsertPointGuard Guard(Builder);
5134     Builder.restoreIP(CodeGenIP);
5135 
5136     uint32_t StrSize;
5137     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5138     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5139     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5140     Value *SumLocal =
5141         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
5142     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
5143     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
5144     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
5145     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
5146     Builder.CreateStore(Sum, SumReduced);
5147     Builder.CreateStore(Xor, XorReduced);
5148 
5149     BodyIP = Builder.saveIP();
5150     BodyAllocaIP = InnerAllocaIP;
5151     return Error::success();
5152   };
5153 
5154   // Privatization for reduction creates local copies of reduction variables and
5155   // initializes them to reduction-neutral values.
5156   Value *SumPrivatized;
5157   Value *XorPrivatized;
5158   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
5159                     Value &Original, Value &Inner, Value *&ReplVal) {
5160     IRBuilderBase::InsertPointGuard Guard(Builder);
5161     Builder.restoreIP(InnerAllocaIP);
5162     if (&Original == SumReduced) {
5163       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
5164       ReplVal = SumPrivatized;
5165     } else if (&Original == XorReduced) {
5166       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
5167       ReplVal = XorPrivatized;
5168     } else {
5169       ReplVal = &Inner;
5170       return CodeGenIP;
5171     }
5172 
5173     Builder.restoreIP(CodeGenIP);
5174     if (&Original == SumReduced)
5175       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
5176                           SumPrivatized);
5177     else if (&Original == XorReduced)
5178       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
5179 
5180     return Builder.saveIP();
5181   };
5182 
5183   // Do nothing in finalization.
5184   auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
5185 
5186   ASSERT_EXPECTED_INIT(
5187       OpenMPIRBuilder::InsertPointTy, AfterIP,
5188       OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
5189                                 /* IfCondition */ nullptr,
5190                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5191                                 /* IsCancellable */ false));
5192   Builder.restoreIP(AfterIP);
5193 
5194   OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
5195       {SumType, SumReduced, SumPrivatized,
5196        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
5197        /*ReductionGenClang=*/nullptr, sumAtomicReduction},
5198       {XorType, XorReduced, XorPrivatized,
5199        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
5200        /*ReductionGenClang=*/nullptr, xorAtomicReduction}};
5201   OMPBuilder.Config.setIsGPU(false);
5202 
5203   bool ReduceVariableByRef[] = {false, false};
5204   ASSERT_THAT_EXPECTED(OMPBuilder.createReductions(BodyIP, BodyAllocaIP,
5205                                                    ReductionInfos,
5206                                                    ReduceVariableByRef),
5207                        Succeeded());
5208 
5209   Builder.restoreIP(AfterIP);
5210   Builder.CreateRetVoid();
5211 
5212   OMPBuilder.finalize(F);
5213 
5214   // The IR must be valid.
5215   EXPECT_FALSE(verifyModule(*M));
5216 
5217   // Outlining must have happened.
5218   SmallVector<CallInst *> ForkCalls;
5219   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5220             ForkCalls);
5221   ASSERT_EQ(ForkCalls.size(), 1u);
5222   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5223   Function *Outlined = dyn_cast<Function>(CalleeVal);
5224   EXPECT_NE(Outlined, nullptr);
5225 
5226   // Check that the lock variable was created with the expected name.
5227   GlobalVariable *LockVar =
5228       M->getGlobalVariable(".gomp_critical_user_.reduction.var");
5229   EXPECT_NE(LockVar, nullptr);
5230 
5231   // Find the allocation of a local array that will be used to call the runtime
5232   // reduciton function.
5233   BasicBlock &AllocBlock = Outlined->getEntryBlock();
5234   Value *LocalArray = nullptr;
5235   for (Instruction &I : AllocBlock) {
5236     if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) {
5237       if (!Alloc->getAllocatedType()->isArrayTy() ||
5238           !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy())
5239         continue;
5240       LocalArray = Alloc;
5241       break;
5242     }
5243   }
5244   ASSERT_NE(LocalArray, nullptr);
5245 
5246   // Find the call to the runtime reduction function.
5247   BasicBlock *BB = AllocBlock.getUniqueSuccessor();
5248   Value *LocalArrayPtr = nullptr;
5249   Value *ReductionFnVal = nullptr;
5250   Value *SwitchArg = nullptr;
5251   for (Instruction &I : *BB) {
5252     if (CallInst *Call = dyn_cast<CallInst>(&I)) {
5253       if (Call->getCalledFunction() !=
5254           OMPBuilder.getOrCreateRuntimeFunctionPtr(
5255               RuntimeFunction::OMPRTL___kmpc_reduce))
5256         continue;
5257       LocalArrayPtr = Call->getOperand(4);
5258       ReductionFnVal = Call->getOperand(5);
5259       SwitchArg = Call;
5260       break;
5261     }
5262   }
5263 
5264   // Check that the local array is passed to the function.
5265   ASSERT_NE(LocalArrayPtr, nullptr);
5266   EXPECT_EQ(LocalArrayPtr, LocalArray);
5267 
5268   // Find the GEP instructions preceding stores to the local array.
5269   Value *FirstArrayElemPtr = nullptr;
5270   Value *SecondArrayElemPtr = nullptr;
5271   EXPECT_EQ(LocalArray->getNumUses(), 3u);
5272   ASSERT_TRUE(
5273       findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr));
5274 
5275   // Check that the values stored into the local array are privatized reduction
5276   // variables.
5277   auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>(
5278       findStoredValue<GetElementPtrInst>(FirstArrayElemPtr));
5279   auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>(
5280       findStoredValue<GetElementPtrInst>(SecondArrayElemPtr));
5281   ASSERT_NE(FirstPrivatized, nullptr);
5282   ASSERT_NE(SecondPrivatized, nullptr);
5283   ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr));
5284   EXPECT_TRUE(isSimpleBinaryReduction(
5285       FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5286   EXPECT_TRUE(isSimpleBinaryReduction(
5287       SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5288 
5289   // Check that the result of the runtime reduction call is used for further
5290   // dispatch.
5291   ASSERT_EQ(SwitchArg->getNumUses(), 1u);
5292   SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin());
5293   ASSERT_NE(Switch, nullptr);
5294   EXPECT_EQ(Switch->getNumSuccessors(), 3u);
5295   BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor();
5296   BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor();
5297 
5298   // Non-atomic block contains reductions to the global reduction variable,
5299   // which is passed into the outlined function as an argument.
5300   Value *FirstLoad =
5301       findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB);
5302   Value *SecondLoad =
5303       findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB);
5304   EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB));
5305   EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB));
5306 
5307   // Atomic block also constains reductions to the global reduction variable.
5308   FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB);
5309   SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB);
5310   auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB);
5311   auto *SecondAtomic =
5312       findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
5313   ASSERT_NE(FirstAtomic, nullptr);
5314   Value *AtomicStorePointer = FirstAtomic->getPointerOperand();
5315   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5316   ASSERT_NE(SecondAtomic, nullptr);
5317   AtomicStorePointer = SecondAtomic->getPointerOperand();
5318   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5319 
5320   // Check that the separate reduction function also performs (non-atomic)
5321   // reductions after extracting reduction variables from its arguments.
5322   Function *ReductionFn = cast<Function>(ReductionFnVal);
5323   BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock();
5324   Value *FirstLHSPtr;
5325   Value *SecondLHSPtr;
5326   ASSERT_TRUE(
5327       findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr));
5328   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5329   ASSERT_NE(Opaque, nullptr);
5330   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5331   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5332   ASSERT_NE(Opaque, nullptr);
5333   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5334 
5335   Value *FirstRHS;
5336   Value *SecondRHS;
5337   EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
5338 }
5339 
5340 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
5341   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5342   OpenMPIRBuilder OMPBuilder(*M);
5343   OMPBuilder.Config.IsTargetDevice = false;
5344   OMPBuilder.initialize();
5345   F->setName("func");
5346   IRBuilder<> Builder(BB);
5347 
5348   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
5349   Builder.CreateBr(EnterBB);
5350   Builder.SetInsertPoint(EnterBB);
5351   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5352 
5353   // Create variables to be reduced.
5354   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
5355                               F->getEntryBlock().getFirstInsertionPt());
5356   Type *SumType = Builder.getFloatTy();
5357   Type *XorType = Builder.getInt32Ty();
5358   Value *SumReduced;
5359   Value *XorReduced;
5360   {
5361     IRBuilderBase::InsertPointGuard Guard(Builder);
5362     Builder.restoreIP(OuterAllocaIP);
5363     SumReduced = Builder.CreateAlloca(SumType);
5364     XorReduced = Builder.CreateAlloca(XorType);
5365   }
5366 
5367   // Store initial values of reductions into global variables.
5368   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
5369   Builder.CreateStore(Builder.getInt32(1), XorReduced);
5370 
5371   InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
5372   auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5373                             InsertPointTy CodeGenIP) {
5374     IRBuilderBase::InsertPointGuard Guard(Builder);
5375     Builder.restoreIP(CodeGenIP);
5376 
5377     uint32_t StrSize;
5378     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5379     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5380     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5381     Value *SumLocal =
5382         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
5383     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
5384     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
5385     Builder.CreateStore(Sum, SumReduced);
5386 
5387     FirstBodyIP = Builder.saveIP();
5388     FirstBodyAllocaIP = InnerAllocaIP;
5389     return Error::success();
5390   };
5391 
5392   InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
5393   auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5394                              InsertPointTy CodeGenIP) {
5395     IRBuilderBase::InsertPointGuard Guard(Builder);
5396     Builder.restoreIP(CodeGenIP);
5397 
5398     uint32_t StrSize;
5399     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5400     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5401     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5402     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
5403     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
5404     Builder.CreateStore(Xor, XorReduced);
5405 
5406     SecondBodyIP = Builder.saveIP();
5407     SecondBodyAllocaIP = InnerAllocaIP;
5408     return Error::success();
5409   };
5410 
5411   // Privatization for reduction creates local copies of reduction variables and
5412   // initializes them to reduction-neutral values. The same privatization
5413   // callback is used for both loops, with dispatch based on the value being
5414   // privatized.
5415   Value *SumPrivatized;
5416   Value *XorPrivatized;
5417   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
5418                     Value &Original, Value &Inner, Value *&ReplVal) {
5419     IRBuilderBase::InsertPointGuard Guard(Builder);
5420     Builder.restoreIP(InnerAllocaIP);
5421     if (&Original == SumReduced) {
5422       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
5423       ReplVal = SumPrivatized;
5424     } else if (&Original == XorReduced) {
5425       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
5426       ReplVal = XorPrivatized;
5427     } else {
5428       ReplVal = &Inner;
5429       return CodeGenIP;
5430     }
5431 
5432     Builder.restoreIP(CodeGenIP);
5433     if (&Original == SumReduced)
5434       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
5435                           SumPrivatized);
5436     else if (&Original == XorReduced)
5437       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
5438 
5439     return Builder.saveIP();
5440   };
5441 
5442   // Do nothing in finalization.
5443   auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
5444 
5445   ASSERT_EXPECTED_INIT(
5446       OpenMPIRBuilder::InsertPointTy, AfterIP1,
5447       OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
5448                                 FiniCB, /* IfCondition */ nullptr,
5449                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5450                                 /* IsCancellable */ false));
5451   Builder.restoreIP(AfterIP1);
5452   ASSERT_EXPECTED_INIT(
5453       OpenMPIRBuilder::InsertPointTy, AfterIP2,
5454       OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP,
5455                                 SecondBodyGenCB, PrivCB, FiniCB,
5456                                 /* IfCondition */ nullptr,
5457                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5458                                 /* IsCancellable */ false));
5459   Builder.restoreIP(AfterIP2);
5460 
5461   OMPBuilder.Config.setIsGPU(false);
5462   bool ReduceVariableByRef[] = {false};
5463 
5464   ASSERT_THAT_EXPECTED(
5465       OMPBuilder.createReductions(
5466           FirstBodyIP, FirstBodyAllocaIP,
5467           {{SumType, SumReduced, SumPrivatized,
5468             /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
5469             /*ReductionGenClang=*/nullptr, sumAtomicReduction}},
5470           ReduceVariableByRef),
5471       Succeeded());
5472   ASSERT_THAT_EXPECTED(
5473       OMPBuilder.createReductions(
5474           SecondBodyIP, SecondBodyAllocaIP,
5475           {{XorType, XorReduced, XorPrivatized,
5476             /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
5477             /*ReductionGenClang=*/nullptr, xorAtomicReduction}},
5478           ReduceVariableByRef),
5479       Succeeded());
5480 
5481   Builder.restoreIP(AfterIP2);
5482   Builder.CreateRetVoid();
5483 
5484   OMPBuilder.finalize(F);
5485 
5486   // The IR must be valid.
5487   EXPECT_FALSE(verifyModule(*M));
5488 
5489   // Two different outlined functions must have been created.
5490   SmallVector<CallInst *> ForkCalls;
5491   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5492             ForkCalls);
5493   ASSERT_EQ(ForkCalls.size(), 2u);
5494   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5495   Function *FirstCallee = cast<Function>(CalleeVal);
5496   CalleeVal = ForkCalls[1]->getOperand(2);
5497   Function *SecondCallee = cast<Function>(CalleeVal);
5498   EXPECT_NE(FirstCallee, SecondCallee);
5499 
5500   // Two different reduction functions must have been created.
5501   SmallVector<CallInst *> ReduceCalls;
5502   findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder,
5503             ReduceCalls);
5504   ASSERT_EQ(ReduceCalls.size(), 1u);
5505   auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5506   ReduceCalls.clear();
5507   findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce,
5508             OMPBuilder, ReduceCalls);
5509   auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5510   EXPECT_NE(AddReduction, XorReduction);
5511 
5512   // Each reduction function does its own kind of reduction.
5513   BasicBlock *FnReductionBB = &AddReduction->getEntryBlock();
5514   Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5515       AddReduction->getArg(0), FnReductionBB);
5516   ASSERT_NE(FirstLHSPtr, nullptr);
5517   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5518   ASSERT_NE(Opaque, nullptr);
5519   Instruction::BinaryOps Opcode = Instruction::FAdd;
5520   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5521 
5522   FnReductionBB = &XorReduction->getEntryBlock();
5523   Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5524       XorReduction->getArg(0), FnReductionBB);
5525   ASSERT_NE(FirstLHSPtr, nullptr);
5526   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5527   ASSERT_NE(Opaque, nullptr);
5528   Opcode = Instruction::Xor;
5529   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5530 }
5531 
5532 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
5533   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5534   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5535   OpenMPIRBuilder OMPBuilder(*M);
5536   OMPBuilder.initialize();
5537   F->setName("func");
5538   IRBuilder<> Builder(BB);
5539 
5540   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5541   Builder.CreateBr(EnterBB);
5542   Builder.SetInsertPoint(EnterBB);
5543   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5544 
5545   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5546   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5547 
5548   auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
5549   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
5550     return Error::success();
5551   };
5552   SectionCBVector.push_back(SectionCB);
5553 
5554   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5555                    llvm::Value &, llvm::Value &Val,
5556                    llvm::Value *&ReplVal) { return CodeGenIP; };
5557   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5558                                     F->getEntryBlock().getFirstInsertionPt());
5559   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5560                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5561                                                  PrivCB, FiniCB, false, false));
5562   Builder.restoreIP(AfterIP);
5563   Builder.CreateRetVoid(); // Required at the end of the function
5564   EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr);
5565   EXPECT_FALSE(verifyModule(*M, &errs()));
5566 }
5567 
5568 TEST_F(OpenMPIRBuilderTest, CreateSections) {
5569   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5570   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5571   OpenMPIRBuilder OMPBuilder(*M);
5572   OMPBuilder.initialize();
5573   F->setName("func");
5574   IRBuilder<> Builder(BB);
5575 
5576   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5577   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5578   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5579 
5580   BasicBlock *SwitchBB = nullptr;
5581   AllocaInst *PrivAI = nullptr;
5582   SwitchInst *Switch = nullptr;
5583 
5584   unsigned NumBodiesGenerated = 0;
5585   unsigned NumFiniCBCalls = 0;
5586   PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
5587 
5588   auto FiniCB = [&](InsertPointTy IP) {
5589     ++NumFiniCBCalls;
5590     BasicBlock *IPBB = IP.getBlock();
5591     EXPECT_NE(IPBB->end(), IP.getPoint());
5592   };
5593 
5594   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
5595     ++NumBodiesGenerated;
5596     CaseBBs.push_back(CodeGenIP.getBlock());
5597     SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
5598     Builder.restoreIP(CodeGenIP);
5599     Builder.CreateStore(F->arg_begin(), PrivAI);
5600     Value *PrivLoad =
5601         Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca");
5602     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
5603     return Error::success();
5604   };
5605   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5606                    llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
5607     // TODO: Privatization not implemented yet
5608     return CodeGenIP;
5609   };
5610 
5611   SectionCBVector.push_back(SectionCB);
5612   SectionCBVector.push_back(SectionCB);
5613 
5614   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5615                                     F->getEntryBlock().getFirstInsertionPt());
5616   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5617                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5618                                                  PrivCB, FINICB_WRAPPER(FiniCB),
5619                                                  false, false));
5620   Builder.restoreIP(AfterIP);
5621   Builder.CreateRetVoid(); // Required at the end of the function
5622 
5623   // Switch BB's predecessor is loop condition BB, whose successor at index 1 is
5624   // loop's exit BB
5625   BasicBlock *ForExitBB =
5626       SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1);
5627   EXPECT_NE(ForExitBB, nullptr);
5628 
5629   EXPECT_NE(PrivAI, nullptr);
5630   Function *OutlinedFn = PrivAI->getFunction();
5631   EXPECT_EQ(F, OutlinedFn);
5632   EXPECT_FALSE(verifyModule(*M, &errs()));
5633   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
5634 
5635   BasicBlock *LoopPreheaderBB =
5636       OutlinedFn->getEntryBlock().getSingleSuccessor();
5637   // loop variables are 5 - lower bound, upper bound, stride, islastiter, and
5638   // iterator/counter
5639   bool FoundForInit = false;
5640   for (Instruction &Inst : *LoopPreheaderBB) {
5641     if (isa<CallInst>(Inst)) {
5642       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5643           "__kmpc_for_static_init_4u") {
5644         FoundForInit = true;
5645       }
5646     }
5647   }
5648   EXPECT_EQ(FoundForInit, true);
5649 
5650   bool FoundForExit = false;
5651   bool FoundBarrier = false;
5652   for (Instruction &Inst : *ForExitBB) {
5653     if (isa<CallInst>(Inst)) {
5654       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5655           "__kmpc_for_static_fini") {
5656         FoundForExit = true;
5657       }
5658       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5659           "__kmpc_barrier") {
5660         FoundBarrier = true;
5661       }
5662       if (FoundForExit && FoundBarrier)
5663         break;
5664     }
5665   }
5666   EXPECT_EQ(FoundForExit, true);
5667   EXPECT_EQ(FoundBarrier, true);
5668 
5669   EXPECT_NE(SwitchBB, nullptr);
5670   EXPECT_NE(SwitchBB->getTerminator(), nullptr);
5671   EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true);
5672   Switch = cast<SwitchInst>(SwitchBB->getTerminator());
5673   EXPECT_EQ(Switch->getNumCases(), 2U);
5674 
5675   EXPECT_EQ(CaseBBs.size(), 2U);
5676   for (auto *&CaseBB : CaseBBs) {
5677     EXPECT_EQ(CaseBB->getParent(), OutlinedFn);
5678   }
5679 
5680   ASSERT_EQ(NumBodiesGenerated, 2U);
5681   ASSERT_EQ(NumFiniCBCalls, 1U);
5682   EXPECT_FALSE(verifyModule(*M, &errs()));
5683 }
5684 
5685 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) {
5686   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5687   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5688   OpenMPIRBuilder OMPBuilder(*M);
5689   OMPBuilder.initialize();
5690   F->setName("func");
5691   IRBuilder<> Builder(BB);
5692 
5693   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5694   Builder.CreateBr(EnterBB);
5695   Builder.SetInsertPoint(EnterBB);
5696   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5697 
5698   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5699                                     F->getEntryBlock().getFirstInsertionPt());
5700   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5701   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5702                    llvm::Value &, llvm::Value &Val,
5703                    llvm::Value *&ReplVal) { return CodeGenIP; };
5704   auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
5705 
5706   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5707                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5708                                                  PrivCB, FiniCB, false, true));
5709   Builder.restoreIP(AfterIP);
5710   Builder.CreateRetVoid(); // Required at the end of the function
5711   for (auto &Inst : instructions(*F)) {
5712     EXPECT_FALSE(isa<CallInst>(Inst) &&
5713                  cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5714                      "__kmpc_barrier" &&
5715                  "call to function __kmpc_barrier found with nowait");
5716   }
5717 }
5718 
5719 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) {
5720   OpenMPIRBuilder OMPBuilder(*M);
5721   OMPBuilder.initialize();
5722 
5723   IRBuilder<> Builder(BB);
5724 
5725   SmallVector<uint64_t> Mappings = {0, 1};
5726   GlobalVariable *OffloadMaptypesGlobal =
5727       OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes");
5728   EXPECT_FALSE(M->global_empty());
5729   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes");
5730   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5731   EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5732   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5733   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5734   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5735   EXPECT_TRUE(isa<ConstantDataArray>(Initializer));
5736   ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer);
5737   EXPECT_EQ(MappingInit->getNumElements(), Mappings.size());
5738   EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64));
5739   Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings);
5740   EXPECT_EQ(MappingInit, CA);
5741 }
5742 
5743 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) {
5744   OpenMPIRBuilder OMPBuilder(*M);
5745   OMPBuilder.initialize();
5746 
5747   IRBuilder<> Builder(BB);
5748 
5749   uint32_t StrSize;
5750   Constant *Cst1 =
5751       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5752   Constant *Cst2 =
5753       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5754   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5755 
5756   GlobalVariable *OffloadMaptypesGlobal =
5757       OMPBuilder.createOffloadMapnames(Names, "offload_mapnames");
5758   EXPECT_FALSE(M->global_empty());
5759   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames");
5760   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5761   EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5762   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5763   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5764   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5765   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts()));
5766   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts()));
5767 
5768   GlobalVariable *Name1Gbl =
5769       cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts());
5770   EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer()));
5771   ConstantDataArray *Name1GblCA =
5772       dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer());
5773   EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;");
5774 
5775   GlobalVariable *Name2Gbl =
5776       cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts());
5777   EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer()));
5778   ConstantDataArray *Name2GblCA =
5779       dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer());
5780   EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;");
5781 
5782   EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy());
5783   EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size());
5784 }
5785 
5786 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
5787   OpenMPIRBuilder OMPBuilder(*M);
5788   OMPBuilder.initialize();
5789   F->setName("func");
5790   IRBuilder<> Builder(BB);
5791 
5792   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5793 
5794   unsigned TotalNbOperand = 2;
5795 
5796   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5797   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5798                                     F->getEntryBlock().getFirstInsertionPt());
5799   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5800   EXPECT_NE(MapperAllocas.ArgsBase, nullptr);
5801   EXPECT_NE(MapperAllocas.Args, nullptr);
5802   EXPECT_NE(MapperAllocas.ArgSizes, nullptr);
5803   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy());
5804   ArrayType *ArrType =
5805       dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType());
5806   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5807   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
5808                   ->getArrayElementType()
5809                   ->isPointerTy());
5810 
5811   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
5812   ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType());
5813   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5814   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
5815                   ->getArrayElementType()
5816                   ->isPointerTy());
5817 
5818   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
5819   ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType());
5820   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5821   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()
5822                   ->getArrayElementType()
5823                   ->isIntegerTy(64));
5824 }
5825 
5826 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) {
5827   OpenMPIRBuilder OMPBuilder(*M);
5828   OMPBuilder.initialize();
5829   F->setName("func");
5830   IRBuilder<> Builder(BB);
5831   LLVMContext &Ctx = M->getContext();
5832 
5833   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5834 
5835   unsigned TotalNbOperand = 2;
5836 
5837   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5838   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5839                                     F->getEntryBlock().getFirstInsertionPt());
5840   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5841 
5842   auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr(
5843       omp::OMPRTL___tgt_target_data_begin_mapper);
5844 
5845   SmallVector<uint64_t> Flags = {0, 2};
5846 
5847   uint32_t StrSize;
5848   Constant *SrcLocCst =
5849       OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize);
5850   Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize);
5851 
5852   Constant *Cst1 =
5853       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5854   Constant *Cst2 =
5855       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5856   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5857 
5858   GlobalVariable *Maptypes =
5859       OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes");
5860   Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32(
5861       ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes,
5862       /*Idx0=*/0, /*Idx1=*/0);
5863 
5864   GlobalVariable *Mapnames =
5865       OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames");
5866   Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32(
5867       ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames,
5868       /*Idx0=*/0, /*Idx1=*/0);
5869 
5870   OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo,
5871                             MaptypesArg, MapnamesArg, MapperAllocas, -1,
5872                             TotalNbOperand);
5873 
5874   CallInst *MapperCall = dyn_cast<CallInst>(&BB->back());
5875   EXPECT_NE(MapperCall, nullptr);
5876   EXPECT_EQ(MapperCall->arg_size(), 9U);
5877   EXPECT_EQ(MapperCall->getCalledFunction()->getName(),
5878             "__tgt_target_data_begin_mapper");
5879   EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo);
5880   EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64));
5881   EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32));
5882 
5883   EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg);
5884   EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg);
5885   EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy());
5886 }
5887 
5888 TEST_F(OpenMPIRBuilderTest, TargetEnterData) {
5889   OpenMPIRBuilder OMPBuilder(*M);
5890   OMPBuilder.initialize();
5891   F->setName("func");
5892   IRBuilder<> Builder(BB);
5893   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5894 
5895   int64_t DeviceID = 2;
5896 
5897   AllocaInst *Val1 =
5898       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5899   ASSERT_NE(Val1, nullptr);
5900 
5901   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5902                                     F->getEntryBlock().getFirstInsertionPt());
5903 
5904   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5905   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5906   auto GenMapInfoCB =
5907       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5908     // Get map clause information.
5909     Builder.restoreIP(codeGenIP);
5910 
5911     CombinedInfo.BasePointers.emplace_back(Val1);
5912     CombinedInfo.Pointers.emplace_back(Val1);
5913     CombinedInfo.DevicePointers.emplace_back(
5914         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5915     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5916     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1));
5917     uint32_t temp;
5918     CombinedInfo.Names.emplace_back(
5919         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5920     return CombinedInfo;
5921   };
5922 
5923   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5924       /*RequiresDevicePointerInfo=*/false,
5925       /*SeparateBeginEndCalls=*/true);
5926 
5927   OMPBuilder.Config.setIsGPU(true);
5928 
5929   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper;
5930   ASSERT_EXPECTED_INIT(
5931       OpenMPIRBuilder::InsertPointTy, AfterIP,
5932       OMPBuilder.createTargetData(
5933           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5934           /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5935   Builder.restoreIP(AfterIP);
5936 
5937   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5938   EXPECT_NE(TargetDataCall, nullptr);
5939   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5940   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5941             "__tgt_target_data_begin_mapper");
5942   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5943   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5944   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5945 
5946   Builder.CreateRetVoid();
5947   EXPECT_FALSE(verifyModule(*M, &errs()));
5948 }
5949 
5950 TEST_F(OpenMPIRBuilderTest, TargetExitData) {
5951   OpenMPIRBuilder OMPBuilder(*M);
5952   OMPBuilder.initialize();
5953   F->setName("func");
5954   IRBuilder<> Builder(BB);
5955   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5956 
5957   int64_t DeviceID = 2;
5958 
5959   AllocaInst *Val1 =
5960       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5961   ASSERT_NE(Val1, nullptr);
5962 
5963   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5964                                     F->getEntryBlock().getFirstInsertionPt());
5965 
5966   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5967   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5968   auto GenMapInfoCB =
5969       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5970     // Get map clause information.
5971     Builder.restoreIP(codeGenIP);
5972 
5973     CombinedInfo.BasePointers.emplace_back(Val1);
5974     CombinedInfo.Pointers.emplace_back(Val1);
5975     CombinedInfo.DevicePointers.emplace_back(
5976         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5977     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5978     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2));
5979     uint32_t temp;
5980     CombinedInfo.Names.emplace_back(
5981         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5982     return CombinedInfo;
5983   };
5984 
5985   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5986       /*RequiresDevicePointerInfo=*/false,
5987       /*SeparateBeginEndCalls=*/true);
5988 
5989   OMPBuilder.Config.setIsGPU(true);
5990 
5991   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper;
5992   ASSERT_EXPECTED_INIT(
5993       OpenMPIRBuilder::InsertPointTy, AfterIP,
5994       OMPBuilder.createTargetData(
5995           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5996           /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5997   Builder.restoreIP(AfterIP);
5998 
5999   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
6000   EXPECT_NE(TargetDataCall, nullptr);
6001   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6002   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6003             "__tgt_target_data_end_mapper");
6004   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6005   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6006   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6007 
6008   Builder.CreateRetVoid();
6009   EXPECT_FALSE(verifyModule(*M, &errs()));
6010 }
6011 
6012 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
6013   OpenMPIRBuilder OMPBuilder(*M);
6014   OMPBuilder.initialize();
6015   F->setName("func");
6016   IRBuilder<> Builder(BB);
6017   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6018 
6019   int64_t DeviceID = 2;
6020 
6021   AllocaInst *Val1 =
6022       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
6023   ASSERT_NE(Val1, nullptr);
6024 
6025   AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy());
6026   ASSERT_NE(Val2, nullptr);
6027 
6028   AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy());
6029   ASSERT_NE(Val3, nullptr);
6030 
6031   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
6032                                     F->getEntryBlock().getFirstInsertionPt());
6033 
6034   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6035   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
6036   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6037   auto GenMapInfoCB =
6038       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
6039     // Get map clause information.
6040     Builder.restoreIP(codeGenIP);
6041     uint32_t temp;
6042 
6043     CombinedInfo.BasePointers.emplace_back(Val1);
6044     CombinedInfo.Pointers.emplace_back(Val1);
6045     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None);
6046     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
6047     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3));
6048     CombinedInfo.Names.emplace_back(
6049         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6050 
6051     CombinedInfo.BasePointers.emplace_back(Val2);
6052     CombinedInfo.Pointers.emplace_back(Val2);
6053     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
6054     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
6055     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
6056     CombinedInfo.Names.emplace_back(
6057         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6058 
6059     CombinedInfo.BasePointers.emplace_back(Val3);
6060     CombinedInfo.Pointers.emplace_back(Val3);
6061     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address);
6062     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
6063     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
6064     CombinedInfo.Names.emplace_back(
6065         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6066     return CombinedInfo;
6067   };
6068 
6069   llvm::OpenMPIRBuilder::TargetDataInfo Info(
6070       /*RequiresDevicePointerInfo=*/true,
6071       /*SeparateBeginEndCalls=*/true);
6072 
6073   OMPBuilder.Config.setIsGPU(true);
6074 
6075   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
6076   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
6077     if (BodyGenType == BodyGenTy::Priv) {
6078       EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u);
6079       Builder.restoreIP(CodeGenIP);
6080       CallInst *TargetDataCall =
6081           dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
6082       EXPECT_NE(TargetDataCall, nullptr);
6083       EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6084       EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6085                 "__tgt_target_data_begin_mapper");
6086       EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6087       EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6088       EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6089 
6090       LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode());
6091       EXPECT_NE(LI, nullptr);
6092       StoreInst *SI = dyn_cast<StoreInst>(&BB->back());
6093       EXPECT_NE(SI, nullptr);
6094       EXPECT_EQ(SI->getValueOperand(), LI);
6095       EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second);
6096       EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second));
6097       EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second));
6098       Builder.CreateStore(Builder.getInt32(99), Val1);
6099     }
6100     return Builder.saveIP();
6101   };
6102 
6103   ASSERT_EXPECTED_INIT(
6104       OpenMPIRBuilder::InsertPointTy, TargetDataIP1,
6105       OMPBuilder.createTargetData(
6106           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
6107           /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB));
6108   Builder.restoreIP(TargetDataIP1);
6109 
6110   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
6111   EXPECT_NE(TargetDataCall, nullptr);
6112   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6113   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6114             "__tgt_target_data_end_mapper");
6115   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6116   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6117   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6118 
6119   // Check that BodyGenCB is still made when IsTargetDevice is set to true.
6120   OMPBuilder.Config.setIsTargetDevice(true);
6121   bool CheckDevicePassBodyGen = false;
6122   auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
6123     CheckDevicePassBodyGen = true;
6124     Builder.restoreIP(CodeGenIP);
6125     CallInst *TargetDataCall =
6126         dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
6127     // Make sure no begin_mapper call is present for device pass.
6128     EXPECT_EQ(TargetDataCall, nullptr);
6129     return Builder.saveIP();
6130   };
6131   ASSERT_EXPECTED_INIT(
6132       OpenMPIRBuilder::InsertPointTy, TargetDataIP2,
6133       OMPBuilder.createTargetData(
6134           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
6135           /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB));
6136   Builder.restoreIP(TargetDataIP2);
6137   EXPECT_TRUE(CheckDevicePassBodyGen);
6138 
6139   Builder.CreateRetVoid();
6140   EXPECT_FALSE(verifyModule(*M, &errs()));
6141 }
6142 
6143 namespace {
6144 // Some basic handling of argument mapping for the moment
6145 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder,
6146                            llvm::SmallVectorImpl<llvm::Value *> &Args,
6147                            llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) {
6148   for (auto Arg : Args) {
6149     CombinedInfo.BasePointers.emplace_back(Arg);
6150     CombinedInfo.Pointers.emplace_back(Arg);
6151     uint32_t SrcLocStrSize;
6152     CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr(
6153         "Unknown loc - stub implementation", SrcLocStrSize));
6154     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(
6155         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
6156         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM |
6157         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM));
6158     CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64(
6159         OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType())));
6160   }
6161 }
6162 } // namespace
6163 
6164 TEST_F(OpenMPIRBuilderTest, TargetRegion) {
6165   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6166   OpenMPIRBuilder OMPBuilder(*M);
6167   OMPBuilder.initialize();
6168   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
6169   OMPBuilder.setConfig(Config);
6170   F->setName("func");
6171   F->addFnAttr("target-cpu", "x86-64");
6172   F->addFnAttr("target-features", "+mmx,+sse");
6173   IRBuilder<> Builder(BB);
6174   auto *Int32Ty = Builder.getInt32Ty();
6175 
6176   AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr");
6177   AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr");
6178   AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr");
6179 
6180   Builder.CreateStore(Builder.getInt32(10), APtr);
6181   Builder.CreateStore(Builder.getInt32(20), BPtr);
6182   auto BodyGenCB = [&](InsertPointTy AllocaIP,
6183                        InsertPointTy CodeGenIP) -> InsertPointTy {
6184     Builder.restoreIP(CodeGenIP);
6185     LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr);
6186     LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr);
6187     Value *Sum = Builder.CreateAdd(AVal, BVal);
6188     Builder.CreateStore(Sum, CPtr);
6189     return Builder.saveIP();
6190   };
6191 
6192   llvm::SmallVector<llvm::Value *> Inputs;
6193   Inputs.push_back(APtr);
6194   Inputs.push_back(BPtr);
6195   Inputs.push_back(CPtr);
6196 
6197   auto SimpleArgAccessorCB =
6198       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6199           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6200           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6201         if (!OMPBuilder.Config.isTargetDevice()) {
6202           RetVal = cast<llvm::Value>(&Arg);
6203           return CodeGenIP;
6204         }
6205 
6206         Builder.restoreIP(AllocaIP);
6207 
6208         llvm::Value *Addr = Builder.CreateAlloca(
6209             Arg.getType()->isPointerTy()
6210                 ? Arg.getType()
6211                 : Type::getInt64Ty(Builder.getContext()),
6212             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6213         llvm::Value *AddrAscast =
6214             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6215         Builder.CreateStore(&Arg, AddrAscast);
6216 
6217         Builder.restoreIP(CodeGenIP);
6218 
6219         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6220 
6221         return Builder.saveIP();
6222       };
6223 
6224   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6225   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6226       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6227     CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos);
6228     return CombinedInfos;
6229   };
6230 
6231   TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
6232   OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
6233   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6234   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6235       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC,
6236       /*MaxTeams=*/{10}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6237   RuntimeAttrs.TargetThreadLimit[0] = Builder.getInt32(20);
6238   RuntimeAttrs.TeamsThreadLimit[0] = Builder.getInt32(30);
6239   RuntimeAttrs.MaxThreads = Builder.getInt32(40);
6240 
6241   ASSERT_EXPECTED_INIT(
6242       OpenMPIRBuilder::InsertPointTy, AfterIP,
6243       OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
6244                               Builder.saveIP(), EntryInfo, DefaultAttrs,
6245                               RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
6246                               GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
6247   Builder.restoreIP(AfterIP);
6248 
6249   OMPBuilder.finalize();
6250   Builder.CreateRetVoid();
6251 
6252   // Check the kernel launch sequence
6253   auto Iter = F->getEntryBlock().rbegin();
6254   EXPECT_TRUE(isa<BranchInst>(&*(Iter)));
6255   BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter));
6256   EXPECT_TRUE(isa<CmpInst>(&*(++Iter)));
6257   EXPECT_TRUE(isa<CallInst>(&*(++Iter)));
6258   CallInst *Call = dyn_cast<CallInst>(&*(Iter));
6259 
6260   // Check that the kernel launch function is called
6261   Function *KernelLaunchFunc = Call->getCalledFunction();
6262   EXPECT_NE(KernelLaunchFunc, nullptr);
6263   StringRef FunctionName = KernelLaunchFunc->getName();
6264   EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel"));
6265 
6266   // Check num_teams and num_threads in call arguments
6267   EXPECT_TRUE(Call->arg_size() >= 4);
6268   Value *NumTeamsArg = Call->getArgOperand(2);
6269   EXPECT_TRUE(isa<ConstantInt>(NumTeamsArg));
6270   EXPECT_EQ(10U, cast<ConstantInt>(NumTeamsArg)->getZExtValue());
6271   Value *NumThreadsArg = Call->getArgOperand(3);
6272   EXPECT_TRUE(isa<ConstantInt>(NumThreadsArg));
6273   EXPECT_EQ(20U, cast<ConstantInt>(NumThreadsArg)->getZExtValue());
6274 
6275   // Check num_teams and num_threads kernel arguments (use number 5 starting
6276   // from the end and counting the call to __tgt_target_kernel as the first use)
6277   Value *KernelArgs = Call->getArgOperand(Call->arg_size() - 1);
6278   EXPECT_TRUE(KernelArgs->getNumUses() >= 4);
6279   Value *NumTeamsGetElemPtr = *std::next(KernelArgs->user_begin(), 3);
6280   EXPECT_TRUE(isa<GetElementPtrInst>(NumTeamsGetElemPtr));
6281   Value *NumTeamsStore = NumTeamsGetElemPtr->getUniqueUndroppableUser();
6282   EXPECT_TRUE(isa<StoreInst>(NumTeamsStore));
6283   Value *NumTeamsStoreArg = cast<StoreInst>(NumTeamsStore)->getValueOperand();
6284   EXPECT_TRUE(isa<ConstantDataSequential>(NumTeamsStoreArg));
6285   auto *NumTeamsStoreValue = cast<ConstantDataSequential>(NumTeamsStoreArg);
6286   EXPECT_EQ(3U, NumTeamsStoreValue->getNumElements());
6287   EXPECT_EQ(10U, NumTeamsStoreValue->getElementAsInteger(0));
6288   EXPECT_EQ(0U, NumTeamsStoreValue->getElementAsInteger(1));
6289   EXPECT_EQ(0U, NumTeamsStoreValue->getElementAsInteger(2));
6290   Value *NumThreadsGetElemPtr = *std::next(KernelArgs->user_begin(), 2);
6291   EXPECT_TRUE(isa<GetElementPtrInst>(NumThreadsGetElemPtr));
6292   Value *NumThreadsStore = NumThreadsGetElemPtr->getUniqueUndroppableUser();
6293   EXPECT_TRUE(isa<StoreInst>(NumThreadsStore));
6294   Value *NumThreadsStoreArg =
6295       cast<StoreInst>(NumThreadsStore)->getValueOperand();
6296   EXPECT_TRUE(isa<ConstantDataSequential>(NumThreadsStoreArg));
6297   auto *NumThreadsStoreValue = cast<ConstantDataSequential>(NumThreadsStoreArg);
6298   EXPECT_EQ(3U, NumThreadsStoreValue->getNumElements());
6299   EXPECT_EQ(20U, NumThreadsStoreValue->getElementAsInteger(0));
6300   EXPECT_EQ(0U, NumThreadsStoreValue->getElementAsInteger(1));
6301   EXPECT_EQ(0U, NumThreadsStoreValue->getElementAsInteger(2));
6302 
6303   // Check the fallback call
6304   BasicBlock *FallbackBlock = Branch->getSuccessor(0);
6305   Iter = FallbackBlock->rbegin();
6306   CallInst *FCall = dyn_cast<CallInst>(&*(++Iter));
6307   // 'F' has a dummy DISubprogram which causes OutlinedFunc to also
6308   // have a DISubprogram. In this case, the call to OutlinedFunc needs
6309   // to have a debug loc, otherwise verifier will complain.
6310   FCall->setDebugLoc(DL);
6311   EXPECT_NE(FCall, nullptr);
6312 
6313   // Check that the correct aguments are passed in
6314   for (auto ArgInput : zip(FCall->args(), Inputs)) {
6315     EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput));
6316   }
6317 
6318   // Check that the outlined function exists with the expected prefix
6319   Function *OutlinedFunc = FCall->getCalledFunction();
6320   EXPECT_NE(OutlinedFunc, nullptr);
6321   StringRef FunctionName2 = OutlinedFunc->getName();
6322   EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading"));
6323 
6324   // Check that target-cpu and target-features were propagated to the outlined
6325   // function
6326   EXPECT_EQ(OutlinedFunc->getFnAttribute("target-cpu"),
6327             F->getFnAttribute("target-cpu"));
6328   EXPECT_EQ(OutlinedFunc->getFnAttribute("target-features"),
6329             F->getFnAttribute("target-features"));
6330 
6331   EXPECT_FALSE(verifyModule(*M, &errs()));
6332 }
6333 
6334 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
6335   OpenMPIRBuilder OMPBuilder(*M);
6336   OMPBuilder.setConfig(
6337       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6338   OMPBuilder.initialize();
6339 
6340   F->setName("func");
6341   F->addFnAttr("target-cpu", "gfx90a");
6342   F->addFnAttr("target-features", "+gfx9-insts,+wavefrontsize64");
6343   IRBuilder<> Builder(BB);
6344   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6345 
6346   LoadInst *Value = nullptr;
6347   StoreInst *TargetStore = nullptr;
6348   llvm::SmallVector<llvm::Value *, 2> CapturedArgs = {
6349       Constant::getNullValue(PointerType::get(Ctx, 0)),
6350       Constant::getNullValue(PointerType::get(Ctx, 0))};
6351 
6352   auto SimpleArgAccessorCB =
6353       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6354           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6355           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6356         if (!OMPBuilder.Config.isTargetDevice()) {
6357           RetVal = cast<llvm::Value>(&Arg);
6358           return CodeGenIP;
6359         }
6360 
6361         Builder.restoreIP(AllocaIP);
6362 
6363         llvm::Value *Addr = Builder.CreateAlloca(
6364             Arg.getType()->isPointerTy()
6365                 ? Arg.getType()
6366                 : Type::getInt64Ty(Builder.getContext()),
6367             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6368         llvm::Value *AddrAscast =
6369             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6370         Builder.CreateStore(&Arg, AddrAscast);
6371 
6372         Builder.restoreIP(CodeGenIP);
6373 
6374         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6375 
6376         return Builder.saveIP();
6377       };
6378 
6379   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6380   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6381       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6382     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6383     return CombinedInfos;
6384   };
6385 
6386   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6387                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6388       -> OpenMPIRBuilder::InsertPointTy {
6389     Builder.restoreIP(CodeGenIP);
6390     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6391     TargetStore = Builder.CreateStore(Value, CapturedArgs[1]);
6392     return Builder.saveIP();
6393   };
6394 
6395   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6396                                    F->getEntryBlock().getFirstInsertionPt());
6397   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6398                                   /*Line=*/3, /*Count=*/0);
6399   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6400   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6401       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC,
6402       /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6403 
6404   ASSERT_EXPECTED_INIT(
6405       OpenMPIRBuilder::InsertPointTy, AfterIP,
6406       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6407                               EntryInfo, DefaultAttrs, RuntimeAttrs,
6408                               /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
6409                               BodyGenCB, SimpleArgAccessorCB));
6410   Builder.restoreIP(AfterIP);
6411 
6412   Builder.CreateRetVoid();
6413   OMPBuilder.finalize();
6414 
6415   // Check outlined function
6416   EXPECT_FALSE(verifyModule(*M, &errs()));
6417   EXPECT_NE(TargetStore, nullptr);
6418   Function *OutlinedFn = TargetStore->getFunction();
6419   EXPECT_NE(F, OutlinedFn);
6420 
6421   // Check that target-cpu and target-features were propagated to the outlined
6422   // function
6423   EXPECT_EQ(OutlinedFn->getFnAttribute("target-cpu"),
6424             F->getFnAttribute("target-cpu"));
6425   EXPECT_EQ(OutlinedFn->getFnAttribute("target-features"),
6426             F->getFnAttribute("target-features"));
6427 
6428   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6429   // Account for the "implicit" first argument.
6430   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6431   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
6432   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6433   EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy());
6434 
6435   // Check entry block
6436   auto &EntryBlock = OutlinedFn->getEntryBlock();
6437   Instruction *Alloca1 = &*EntryBlock.getFirstNonPHIIt();
6438   EXPECT_NE(Alloca1, nullptr);
6439 
6440   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6441   auto *Store1 = Alloca1->getNextNode();
6442   EXPECT_TRUE(isa<StoreInst>(Store1));
6443   auto *Alloca2 = Store1->getNextNode();
6444   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6445   auto *Store2 = Alloca2->getNextNode();
6446   EXPECT_TRUE(isa<StoreInst>(Store2));
6447 
6448   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6449   EXPECT_NE(InitCall, nullptr);
6450   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6451   EXPECT_EQ(InitCall->arg_size(), 2U);
6452   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6453   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6454   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6455   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6456   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6457   auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6458   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6459             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6460   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6461             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6462   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6463             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6464 
6465   auto *EntryBlockBranch = EntryBlock.getTerminator();
6466   EXPECT_NE(EntryBlockBranch, nullptr);
6467   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6468 
6469   // Check user code block
6470   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6471   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6472   Instruction *Load1 = &*UserCodeBlock->getFirstNonPHIIt();
6473   EXPECT_TRUE(isa<LoadInst>(Load1));
6474   auto *Load2 = Load1->getNextNode();
6475   EXPECT_TRUE(isa<LoadInst>(Load2));
6476 
6477   auto *OutlinedBlockBr = Load2->getNextNode();
6478   EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr));
6479 
6480   auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0);
6481   EXPECT_EQ(OutlinedBlock->getName(), "outlined.body");
6482 
6483   Instruction *Value1 = &*OutlinedBlock->getFirstNonPHIIt();
6484   EXPECT_EQ(Value1, Value);
6485   EXPECT_EQ(Value1->getNextNode(), TargetStore);
6486   auto *Deinit = TargetStore->getNextNode();
6487   EXPECT_NE(Deinit, nullptr);
6488 
6489   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6490   EXPECT_NE(DeinitCall, nullptr);
6491   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6492   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6493 
6494   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6495 
6496   // Check exit block
6497   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6498   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6499   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHIIt()));
6500 
6501   // Check global exec_mode.
6502   GlobalVariable *Used = M->getGlobalVariable("llvm.compiler.used");
6503   EXPECT_NE(Used, nullptr);
6504   Constant *UsedInit = Used->getInitializer();
6505   EXPECT_NE(UsedInit, nullptr);
6506   EXPECT_TRUE(isa<ConstantArray>(UsedInit));
6507   auto *UsedInitData = cast<ConstantArray>(UsedInit);
6508   EXPECT_EQ(1U, UsedInitData->getNumOperands());
6509   Constant *ExecMode = UsedInitData->getOperand(0);
6510   EXPECT_TRUE(isa<GlobalVariable>(ExecMode));
6511   Constant *ExecModeValue = cast<GlobalVariable>(ExecMode)->getInitializer();
6512   EXPECT_NE(ExecModeValue, nullptr);
6513   EXPECT_TRUE(isa<ConstantInt>(ExecModeValue));
6514   EXPECT_EQ(OMP_TGT_EXEC_MODE_GENERIC,
6515             cast<ConstantInt>(ExecModeValue)->getZExtValue());
6516 }
6517 
6518 TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) {
6519   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6520   OpenMPIRBuilder OMPBuilder(*M);
6521   OMPBuilder.initialize();
6522   OpenMPIRBuilderConfig Config(/*IsTargetDevice=*/false, /*IsGPU=*/false,
6523                                /*OpenMPOffloadMandatory=*/false,
6524                                /*HasRequiresReverseOffload=*/false,
6525                                /*HasRequiresUnifiedAddress=*/false,
6526                                /*HasRequiresUnifiedSharedMemory=*/false,
6527                                /*HasRequiresDynamicAllocators=*/false);
6528   OMPBuilder.setConfig(Config);
6529   F->setName("func");
6530   IRBuilder<> Builder(BB);
6531 
6532   auto BodyGenCB = [&](InsertPointTy,
6533                        InsertPointTy CodeGenIP) -> InsertPointTy {
6534     Builder.restoreIP(CodeGenIP);
6535     return Builder.saveIP();
6536   };
6537 
6538   auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&,
6539                                  OpenMPIRBuilder::InsertPointTy,
6540                                  OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6541     Builder.restoreIP(CodeGenIP);
6542     return Builder.saveIP();
6543   };
6544 
6545   SmallVector<Value *> Inputs;
6546   OpenMPIRBuilder::MapInfosTy CombinedInfos;
6547   auto GenMapInfoCB =
6548       [&](OpenMPIRBuilder::InsertPointTy) -> OpenMPIRBuilder::MapInfosTy & {
6549     return CombinedInfos;
6550   };
6551 
6552   TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
6553   OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
6554   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6555   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6556       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD,
6557       /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6558   RuntimeAttrs.LoopTripCount = Builder.getInt64(1000);
6559 
6560   ASSERT_EXPECTED_INIT(
6561       OpenMPIRBuilder::InsertPointTy, AfterIP,
6562       OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
6563                               Builder.saveIP(), EntryInfo, DefaultAttrs,
6564                               RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
6565                               GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
6566   Builder.restoreIP(AfterIP);
6567 
6568   OMPBuilder.finalize();
6569   Builder.CreateRetVoid();
6570 
6571   // Check the kernel launch sequence
6572   auto Iter = F->getEntryBlock().rbegin();
6573   EXPECT_TRUE(isa<BranchInst>(&*(Iter)));
6574   BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter));
6575   EXPECT_TRUE(isa<CmpInst>(&*(++Iter)));
6576   EXPECT_TRUE(isa<CallInst>(&*(++Iter)));
6577   CallInst *Call = dyn_cast<CallInst>(&*(Iter));
6578 
6579   // Check that the kernel launch function is called
6580   Function *KernelLaunchFunc = Call->getCalledFunction();
6581   EXPECT_NE(KernelLaunchFunc, nullptr);
6582   StringRef FunctionName = KernelLaunchFunc->getName();
6583   EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel"));
6584 
6585   // Check the trip count kernel argument (use number 5 starting from the end
6586   // and counting the call to __tgt_target_kernel as the first use)
6587   Value *KernelArgs = Call->getArgOperand(Call->arg_size() - 1);
6588   EXPECT_TRUE(KernelArgs->getNumUses() >= 6);
6589   Value *TripCountGetElemPtr = *std::next(KernelArgs->user_begin(), 5);
6590   EXPECT_TRUE(isa<GetElementPtrInst>(TripCountGetElemPtr));
6591   Value *TripCountStore = TripCountGetElemPtr->getUniqueUndroppableUser();
6592   EXPECT_TRUE(isa<StoreInst>(TripCountStore));
6593   Value *TripCountStoreArg = cast<StoreInst>(TripCountStore)->getValueOperand();
6594   EXPECT_TRUE(isa<ConstantInt>(TripCountStoreArg));
6595   EXPECT_EQ(1000U, cast<ConstantInt>(TripCountStoreArg)->getZExtValue());
6596 
6597   // Check the fallback call
6598   BasicBlock *FallbackBlock = Branch->getSuccessor(0);
6599   Iter = FallbackBlock->rbegin();
6600   CallInst *FCall = dyn_cast<CallInst>(&*(++Iter));
6601   // 'F' has a dummy DISubprogram which causes OutlinedFunc to also
6602   // have a DISubprogram. In this case, the call to OutlinedFunc needs
6603   // to have a debug loc, otherwise verifier will complain.
6604   FCall->setDebugLoc(DL);
6605   EXPECT_NE(FCall, nullptr);
6606 
6607   // Check that the outlined function exists with the expected prefix
6608   Function *OutlinedFunc = FCall->getCalledFunction();
6609   EXPECT_NE(OutlinedFunc, nullptr);
6610   StringRef FunctionName2 = OutlinedFunc->getName();
6611   EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading"));
6612 
6613   EXPECT_FALSE(verifyModule(*M, &errs()));
6614 }
6615 
6616 TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) {
6617   OpenMPIRBuilder OMPBuilder(*M);
6618   OMPBuilder.setConfig(
6619       OpenMPIRBuilderConfig(/*IsTargetDevice=*/true, /*IsGPU=*/false,
6620                             /*OpenMPOffloadMandatory=*/false,
6621                             /*HasRequiresReverseOffload=*/false,
6622                             /*HasRequiresUnifiedAddress=*/false,
6623                             /*HasRequiresUnifiedSharedMemory=*/false,
6624                             /*HasRequiresDynamicAllocators=*/false));
6625   OMPBuilder.initialize();
6626   F->setName("func");
6627   IRBuilder<> Builder(BB);
6628   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6629 
6630   Function *OutlinedFn = nullptr;
6631   SmallVector<Value *> CapturedArgs;
6632 
6633   auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&,
6634                                  OpenMPIRBuilder::InsertPointTy,
6635                                  OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6636     Builder.restoreIP(CodeGenIP);
6637     return Builder.saveIP();
6638   };
6639 
6640   OpenMPIRBuilder::MapInfosTy CombinedInfos;
6641   auto GenMapInfoCB =
6642       [&](OpenMPIRBuilder::InsertPointTy) -> OpenMPIRBuilder::MapInfosTy & {
6643     return CombinedInfos;
6644   };
6645 
6646   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy,
6647                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6648       -> OpenMPIRBuilder::InsertPointTy {
6649     Builder.restoreIP(CodeGenIP);
6650     OutlinedFn = CodeGenIP.getBlock()->getParent();
6651     return Builder.saveIP();
6652   };
6653 
6654   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6655                                    F->getEntryBlock().getFirstInsertionPt());
6656   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6657                                   /*Line=*/3, /*Count=*/0);
6658   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6659   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6660       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD,
6661       /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6662 
6663   ASSERT_EXPECTED_INIT(
6664       OpenMPIRBuilder::InsertPointTy, AfterIP,
6665       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6666                               EntryInfo, DefaultAttrs, RuntimeAttrs,
6667                               /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
6668                               BodyGenCB, SimpleArgAccessorCB));
6669   Builder.restoreIP(AfterIP);
6670 
6671   Builder.CreateRetVoid();
6672   OMPBuilder.finalize();
6673 
6674   // Check outlined function
6675   EXPECT_FALSE(verifyModule(*M, &errs()));
6676   EXPECT_NE(OutlinedFn, nullptr);
6677   EXPECT_NE(F, OutlinedFn);
6678 
6679   // Check that target-cpu and target-features were propagated to the outlined
6680   // function
6681   EXPECT_EQ(OutlinedFn->getFnAttribute("target-cpu"),
6682             F->getFnAttribute("target-cpu"));
6683   EXPECT_EQ(OutlinedFn->getFnAttribute("target-features"),
6684             F->getFnAttribute("target-features"));
6685 
6686   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6687   // Account for the "implicit" first argument.
6688   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6689   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
6690 
6691   // Check global exec_mode.
6692   GlobalVariable *Used = M->getGlobalVariable("llvm.compiler.used");
6693   EXPECT_NE(Used, nullptr);
6694   Constant *UsedInit = Used->getInitializer();
6695   EXPECT_NE(UsedInit, nullptr);
6696   EXPECT_TRUE(isa<ConstantArray>(UsedInit));
6697   auto *UsedInitData = cast<ConstantArray>(UsedInit);
6698   EXPECT_EQ(1U, UsedInitData->getNumOperands());
6699   Constant *ExecMode = UsedInitData->getOperand(0);
6700   EXPECT_TRUE(isa<GlobalVariable>(ExecMode));
6701   Constant *ExecModeValue = cast<GlobalVariable>(ExecMode)->getInitializer();
6702   EXPECT_NE(ExecModeValue, nullptr);
6703   EXPECT_TRUE(isa<ConstantInt>(ExecModeValue));
6704   EXPECT_EQ(OMP_TGT_EXEC_MODE_SPMD,
6705             cast<ConstantInt>(ExecModeValue)->getZExtValue());
6706 }
6707 
6708 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
6709   OpenMPIRBuilder OMPBuilder(*M);
6710   OMPBuilder.setConfig(
6711       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6712   OMPBuilder.initialize();
6713 
6714   F->setName("func");
6715   IRBuilder<> Builder(BB);
6716   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6717 
6718   LoadInst *Value = nullptr;
6719   StoreInst *TargetStore = nullptr;
6720   llvm::SmallVector<llvm::Value *, 1> CapturedArgs = {
6721       Constant::getNullValue(PointerType::get(Ctx, 0))};
6722 
6723   auto SimpleArgAccessorCB =
6724       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6725           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6726           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6727         if (!OMPBuilder.Config.isTargetDevice()) {
6728           RetVal = cast<llvm::Value>(&Arg);
6729           return CodeGenIP;
6730         }
6731 
6732         Builder.restoreIP(AllocaIP);
6733 
6734         llvm::Value *Addr = Builder.CreateAlloca(
6735             Arg.getType()->isPointerTy()
6736                 ? Arg.getType()
6737                 : Type::getInt64Ty(Builder.getContext()),
6738             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6739         llvm::Value *AddrAscast =
6740             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6741         Builder.CreateStore(&Arg, AddrAscast);
6742 
6743         Builder.restoreIP(CodeGenIP);
6744 
6745         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6746 
6747         return Builder.saveIP();
6748       };
6749 
6750   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6751   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6752       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6753     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6754     return CombinedInfos;
6755   };
6756 
6757   llvm::Value *RaiseAlloca = nullptr;
6758 
6759   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6760                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6761       -> OpenMPIRBuilder::InsertPointTy {
6762     Builder.restoreIP(CodeGenIP);
6763     RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty());
6764     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6765     TargetStore = Builder.CreateStore(Value, RaiseAlloca);
6766     return Builder.saveIP();
6767   };
6768 
6769   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6770                                    F->getEntryBlock().getFirstInsertionPt());
6771   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6772                                   /*Line=*/3, /*Count=*/0);
6773   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6774   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6775       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC,
6776       /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6777 
6778   ASSERT_EXPECTED_INIT(
6779       OpenMPIRBuilder::InsertPointTy, AfterIP,
6780       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6781                               EntryInfo, DefaultAttrs, RuntimeAttrs,
6782                               /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
6783                               BodyGenCB, SimpleArgAccessorCB));
6784   Builder.restoreIP(AfterIP);
6785 
6786   Builder.CreateRetVoid();
6787   OMPBuilder.finalize();
6788 
6789   // Check outlined function
6790   EXPECT_FALSE(verifyModule(*M, &errs()));
6791   EXPECT_NE(TargetStore, nullptr);
6792   Function *OutlinedFn = TargetStore->getFunction();
6793   EXPECT_NE(F, OutlinedFn);
6794 
6795   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6796   // Account for the "implicit" first argument.
6797   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6798   EXPECT_EQ(OutlinedFn->arg_size(), 2U);
6799   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6800 
6801   // Check entry block, to see if we have raised our alloca
6802   // from the body to the entry block.
6803   auto &EntryBlock = OutlinedFn->getEntryBlock();
6804 
6805   // Check that we have moved our alloca created in the
6806   // BodyGenCB function, to the top of the function.
6807   Instruction *Alloca1 = &*EntryBlock.getFirstNonPHIIt();
6808   EXPECT_NE(Alloca1, nullptr);
6809   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6810   EXPECT_EQ(Alloca1, RaiseAlloca);
6811 
6812   // Verify we have not altered the rest of the function
6813   // inappropriately with our alloca movement.
6814   auto *Alloca2 = Alloca1->getNextNode();
6815   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6816   auto *Store2 = Alloca2->getNextNode();
6817   EXPECT_TRUE(isa<StoreInst>(Store2));
6818 
6819   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6820   EXPECT_NE(InitCall, nullptr);
6821   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6822   EXPECT_EQ(InitCall->arg_size(), 2U);
6823   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6824   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6825   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6826   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6827   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6828   auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6829   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6830             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6831   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6832             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6833   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6834             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6835 
6836   auto *EntryBlockBranch = EntryBlock.getTerminator();
6837   EXPECT_NE(EntryBlockBranch, nullptr);
6838   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6839 
6840   // Check user code block
6841   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6842   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6843   BasicBlock::iterator Load1 = UserCodeBlock->getFirstNonPHIIt();
6844   EXPECT_TRUE(isa<LoadInst>(Load1));
6845 
6846   auto *OutlinedBlockBr = Load1->getNextNode();
6847   EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr));
6848 
6849   auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0);
6850   EXPECT_EQ(OutlinedBlock->getName(), "outlined.body");
6851 
6852   Instruction *Load2 = &*OutlinedBlock->getFirstNonPHIIt();
6853   EXPECT_TRUE(isa<LoadInst>(Load2));
6854   EXPECT_EQ(Load2, Value);
6855   EXPECT_EQ(Load2->getNextNode(), TargetStore);
6856   auto *Deinit = TargetStore->getNextNode();
6857   EXPECT_NE(Deinit, nullptr);
6858 
6859   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6860   EXPECT_NE(DeinitCall, nullptr);
6861   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6862   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6863 
6864   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6865 
6866   // Check exit block
6867   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6868   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6869   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHIIt()));
6870 }
6871 
6872 TEST_F(OpenMPIRBuilderTest, CreateTask) {
6873   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6874   OpenMPIRBuilder OMPBuilder(*M);
6875   OMPBuilder.Config.IsTargetDevice = false;
6876   OMPBuilder.initialize();
6877   F->setName("func");
6878   IRBuilder<> Builder(BB);
6879 
6880   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
6881   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
6882   Value *Val128 =
6883       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
6884 
6885   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6886     Builder.restoreIP(AllocaIP);
6887     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
6888                                                 "bodygen.alloca128");
6889 
6890     Builder.restoreIP(CodeGenIP);
6891     // Loading and storing captured pointer and values
6892     Builder.CreateStore(Val128, Local128);
6893     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
6894                                       "bodygen.load32");
6895 
6896     LoadInst *PrivLoad128 = Builder.CreateLoad(
6897         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
6898     Value *Cmp = Builder.CreateICmpNE(
6899         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
6900     Instruction *ThenTerm, *ElseTerm;
6901     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
6902                                   &ThenTerm, &ElseTerm);
6903     return Error::success();
6904   };
6905 
6906   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6907   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6908   OpenMPIRBuilder::LocationDescription Loc(
6909       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6910   ASSERT_EXPECTED_INIT(
6911       OpenMPIRBuilder::InsertPointTy, AfterIP,
6912       OMPBuilder.createTask(
6913           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6914           BodyGenCB));
6915   Builder.restoreIP(AfterIP);
6916   OMPBuilder.finalize();
6917   Builder.CreateRetVoid();
6918 
6919   EXPECT_FALSE(verifyModule(*M, &errs()));
6920 
6921   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6922       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6923           ->user_back());
6924 
6925   // Verify the Ident argument
6926   GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0));
6927   ASSERT_NE(Ident, nullptr);
6928   EXPECT_TRUE(Ident->hasInitializer());
6929   Constant *Initializer = Ident->getInitializer();
6930   GlobalVariable *SrcStrGlob =
6931       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
6932   ASSERT_NE(SrcStrGlob, nullptr);
6933   ConstantDataArray *SrcSrc =
6934       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
6935   ASSERT_NE(SrcSrc, nullptr);
6936 
6937   // Verify the num_threads argument.
6938   CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1));
6939   ASSERT_NE(GTID, nullptr);
6940   EXPECT_EQ(GTID->arg_size(), 1U);
6941   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
6942 
6943   // Verify the flags
6944   // TODO: Check for others flags. Currently testing only for tiedness.
6945   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
6946   ASSERT_NE(Flags, nullptr);
6947   EXPECT_EQ(Flags->getSExtValue(), 1);
6948 
6949   // Verify the data size
6950   ConstantInt *DataSize =
6951       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
6952   ASSERT_NE(DataSize, nullptr);
6953   EXPECT_EQ(DataSize->getSExtValue(), 40);
6954 
6955   ConstantInt *SharedsSize =
6956       dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4));
6957   EXPECT_EQ(SharedsSize->getSExtValue(),
6958             24); // 64-bit pointer + 128-bit integer
6959 
6960   // Verify Wrapper function
6961   Function *OutlinedFn =
6962       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
6963   ASSERT_NE(OutlinedFn, nullptr);
6964 
6965   LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin());
6966   ASSERT_NE(SharedsLoad, nullptr);
6967   EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1));
6968 
6969   EXPECT_FALSE(OutlinedFn->isDeclaration());
6970   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty());
6971 
6972   // Verify that the data argument is used only once, and that too in the load
6973   // instruction that is then used for accessing shared data.
6974   Value *DataPtr = OutlinedFn->getArg(1);
6975   EXPECT_EQ(DataPtr->getNumUses(), 1U);
6976   EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser()));
6977   Value *Data = DataPtr->uses().begin()->getUser();
6978   EXPECT_TRUE(all_of(Data->uses(), [](Use &U) {
6979     return isa<GetElementPtrInst>(U.getUser());
6980   }));
6981 
6982   // Verify the presence of `trunc` and `icmp` instructions in Outlined function
6983   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6984                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
6985   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6986                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
6987 
6988   // Verify the execution of the task
6989   CallInst *TaskCall = dyn_cast<CallInst>(
6990       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
6991           ->user_back());
6992   ASSERT_NE(TaskCall, nullptr);
6993   EXPECT_EQ(TaskCall->getArgOperand(0), Ident);
6994   EXPECT_EQ(TaskCall->getArgOperand(1), GTID);
6995   EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall);
6996 
6997   // Verify that the argument data has been copied
6998   for (User *in : TaskAllocCall->users()) {
6999     if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) {
7000       EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall);
7001     }
7002   }
7003 }
7004 
7005 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
7006   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7007   OpenMPIRBuilder OMPBuilder(*M);
7008   OMPBuilder.Config.IsTargetDevice = false;
7009   OMPBuilder.initialize();
7010   F->setName("func");
7011   IRBuilder<> Builder(BB);
7012 
7013   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7014     return Error::success();
7015   };
7016 
7017   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7018   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7019   OpenMPIRBuilder::LocationDescription Loc(
7020       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7021   ASSERT_EXPECTED_INIT(
7022       OpenMPIRBuilder::InsertPointTy, AfterIP,
7023       OMPBuilder.createTask(
7024           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7025           BodyGenCB));
7026   Builder.restoreIP(AfterIP);
7027   OMPBuilder.finalize();
7028   Builder.CreateRetVoid();
7029 
7030   EXPECT_FALSE(verifyModule(*M, &errs()));
7031 
7032   // Check that the outlined function has only one argument.
7033   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7034       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
7035           ->user_back());
7036   Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5));
7037   ASSERT_NE(OutlinedFn, nullptr);
7038   ASSERT_EQ(OutlinedFn->arg_size(), 1U);
7039 }
7040 
7041 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
7042   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7043   OpenMPIRBuilder OMPBuilder(*M);
7044   OMPBuilder.Config.IsTargetDevice = false;
7045   OMPBuilder.initialize();
7046   F->setName("func");
7047   IRBuilder<> Builder(BB);
7048   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7049     return Error::success();
7050   };
7051   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7052   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7053   OpenMPIRBuilder::LocationDescription Loc(
7054       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7055   ASSERT_EXPECTED_INIT(
7056       OpenMPIRBuilder::InsertPointTy, AfterIP,
7057       OMPBuilder.createTask(
7058           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7059           BodyGenCB,
7060           /*Tied=*/false));
7061   Builder.restoreIP(AfterIP);
7062   OMPBuilder.finalize();
7063   Builder.CreateRetVoid();
7064 
7065   // Check for the `Tied` argument
7066   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7067       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
7068           ->user_back());
7069   ASSERT_NE(TaskAllocCall, nullptr);
7070   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
7071   ASSERT_NE(Flags, nullptr);
7072   EXPECT_EQ(Flags->getZExtValue() & 1U, 0U);
7073 
7074   EXPECT_FALSE(verifyModule(*M, &errs()));
7075 }
7076 
7077 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
7078   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7079   OpenMPIRBuilder OMPBuilder(*M);
7080   OMPBuilder.Config.IsTargetDevice = false;
7081   OMPBuilder.initialize();
7082   F->setName("func");
7083   IRBuilder<> Builder(BB);
7084   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7085     return Error::success();
7086   };
7087   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7088   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7089   OpenMPIRBuilder::LocationDescription Loc(
7090       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7091   AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext()));
7092   SmallVector<OpenMPIRBuilder::DependData> DDS;
7093   {
7094     OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn,
7095                                      Type::getInt32Ty(M->getContext()), InDep);
7096     DDS.push_back(DDIn);
7097   }
7098   ASSERT_EXPECTED_INIT(
7099       OpenMPIRBuilder::InsertPointTy, AfterIP,
7100       OMPBuilder.createTask(
7101           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7102           BodyGenCB,
7103           /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS));
7104   Builder.restoreIP(AfterIP);
7105   OMPBuilder.finalize();
7106   Builder.CreateRetVoid();
7107 
7108   // Check for the `NumDeps` argument
7109   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7110       OMPBuilder
7111           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps)
7112           ->user_back());
7113   ASSERT_NE(TaskAllocCall, nullptr);
7114   ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
7115   ASSERT_NE(NumDeps, nullptr);
7116   EXPECT_EQ(NumDeps->getZExtValue(), 1U);
7117 
7118   // Check for the `DepInfo` array argument
7119   AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4));
7120   ASSERT_NE(DepArray, nullptr);
7121   Value::user_iterator DepArrayI = DepArray->user_begin();
7122   ++DepArrayI;
7123   Value::user_iterator DepInfoI = DepArrayI->user_begin();
7124   // Check for the `DependKind` flag in the `DepInfo` array
7125   Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI);
7126   ASSERT_NE(Flag, nullptr);
7127   ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag);
7128   ASSERT_NE(FlagInt, nullptr);
7129   EXPECT_EQ(FlagInt->getZExtValue(),
7130             static_cast<unsigned int>(RTLDependenceKindTy::DepIn));
7131   ++DepInfoI;
7132   // Check for the size in the `DepInfo` array
7133   Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI);
7134   ASSERT_NE(Size, nullptr);
7135   ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size);
7136   ASSERT_NE(SizeInt, nullptr);
7137   EXPECT_EQ(SizeInt->getZExtValue(), 4U);
7138   ++DepInfoI;
7139   // Check for the variable address in the `DepInfo` array
7140   Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI);
7141   ASSERT_NE(AddrStored, nullptr);
7142   PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored);
7143   ASSERT_NE(AddrInt, nullptr);
7144   Value *Addr = AddrInt->getPointerOperand();
7145   EXPECT_EQ(Addr, InDep);
7146 
7147   ConstantInt *NumDepsNoAlias =
7148       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5));
7149   ASSERT_NE(NumDepsNoAlias, nullptr);
7150   EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U);
7151   EXPECT_EQ(TaskAllocCall->getOperand(6),
7152             ConstantPointerNull::get(PointerType::getUnqual(M->getContext())));
7153 
7154   EXPECT_FALSE(verifyModule(*M, &errs()));
7155 }
7156 
7157 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
7158   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7159   OpenMPIRBuilder OMPBuilder(*M);
7160   OMPBuilder.Config.IsTargetDevice = false;
7161   OMPBuilder.initialize();
7162   F->setName("func");
7163   IRBuilder<> Builder(BB);
7164   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7165     return Error::success();
7166   };
7167   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7168   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
7169   Builder.SetInsertPoint(BodyBB);
7170   Value *Final = Builder.CreateICmp(
7171       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
7172       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
7173   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
7174   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
7175                        OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
7176                                              /*Tied=*/false, Final));
7177   Builder.restoreIP(AfterIP);
7178   OMPBuilder.finalize();
7179   Builder.CreateRetVoid();
7180 
7181   // Check for the `Tied` argument
7182   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7183       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
7184           ->user_back());
7185   ASSERT_NE(TaskAllocCall, nullptr);
7186   BinaryOperator *OrInst =
7187       dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2));
7188   ASSERT_NE(OrInst, nullptr);
7189   EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or);
7190 
7191   // One of the arguments to `or` instruction is the tied flag, which is equal
7192   // to zero.
7193   EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) {
7194     if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op))
7195       return TiedValue->getSExtValue() == 0;
7196     return false;
7197   }));
7198 
7199   // One of the arguments to `or` instruction is the final condition.
7200   EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) {
7201     if (SelectInst *Select = dyn_cast<SelectInst>(op)) {
7202       ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue());
7203       ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue());
7204       if (!TrueValue || !FalseValue)
7205         return false;
7206       return Select->getCondition() == Final &&
7207              TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0;
7208     }
7209     return false;
7210   }));
7211 
7212   EXPECT_FALSE(verifyModule(*M, &errs()));
7213 }
7214 
7215 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
7216   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7217   OpenMPIRBuilder OMPBuilder(*M);
7218   OMPBuilder.Config.IsTargetDevice = false;
7219   OMPBuilder.initialize();
7220   F->setName("func");
7221   IRBuilder<> Builder(BB);
7222   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7223     return Error::success();
7224   };
7225   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7226   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
7227   Builder.SetInsertPoint(BodyBB);
7228   Value *IfCondition = Builder.CreateICmp(
7229       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
7230       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
7231   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
7232   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
7233                        OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
7234                                              /*Tied=*/false, /*Final=*/nullptr,
7235                                              IfCondition));
7236   Builder.restoreIP(AfterIP);
7237   OMPBuilder.finalize();
7238   Builder.CreateRetVoid();
7239 
7240   EXPECT_FALSE(verifyModule(*M, &errs()));
7241 
7242   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7243       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
7244           ->user_back());
7245   ASSERT_NE(TaskAllocCall, nullptr);
7246 
7247   // Check the branching is based on the if condition argument.
7248   BranchInst *IfConditionBranchInst =
7249       dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator());
7250   ASSERT_NE(IfConditionBranchInst, nullptr);
7251   ASSERT_TRUE(IfConditionBranchInst->isConditional());
7252   EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition);
7253 
7254   // Check that the `__kmpc_omp_task` executes only in the then branch.
7255   CallInst *TaskCall = dyn_cast<CallInst>(
7256       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
7257           ->user_back());
7258   ASSERT_NE(TaskCall, nullptr);
7259   EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0));
7260 
7261   // Check that the OpenMP Runtime Functions specific to `if` clause execute
7262   // only in the else branch. Also check that the function call is between the
7263   // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls.
7264   CallInst *TaskBeginIfCall = dyn_cast<CallInst>(
7265       OMPBuilder
7266           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0)
7267           ->user_back());
7268   CallInst *TaskCompleteCall = dyn_cast<CallInst>(
7269       OMPBuilder
7270           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0)
7271           ->user_back());
7272   ASSERT_NE(TaskBeginIfCall, nullptr);
7273   ASSERT_NE(TaskCompleteCall, nullptr);
7274   Function *OulinedFn =
7275       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
7276   ASSERT_NE(OulinedFn, nullptr);
7277   CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back());
7278   ASSERT_NE(OulinedFnCall, nullptr);
7279   EXPECT_EQ(TaskBeginIfCall->getParent(),
7280             IfConditionBranchInst->getSuccessor(1));
7281 
7282   EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall);
7283   EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall);
7284 }
7285 
7286 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
7287   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7288   OpenMPIRBuilder OMPBuilder(*M);
7289   OMPBuilder.initialize();
7290   F->setName("func");
7291   IRBuilder<> Builder(BB);
7292 
7293   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
7294   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
7295   Value *Val128 =
7296       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
7297   Instruction *ThenTerm, *ElseTerm;
7298 
7299   Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp;
7300 
7301   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7302     Builder.restoreIP(AllocaIP);
7303     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
7304                                                 "bodygen.alloca128");
7305 
7306     Builder.restoreIP(CodeGenIP);
7307     // Loading and storing captured pointer and values
7308     InternalStoreInst = Builder.CreateStore(Val128, Local128);
7309     InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
7310                                         "bodygen.load32");
7311 
7312     InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128,
7313                                          "bodygen.local.load128");
7314     InternalIfCmp = Builder.CreateICmpNE(
7315         InternalLoad32,
7316         Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType()));
7317     SplitBlockAndInsertIfThenElse(InternalIfCmp,
7318                                   CodeGenIP.getBlock()->getTerminator(),
7319                                   &ThenTerm, &ElseTerm);
7320     return Error::success();
7321   };
7322 
7323   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7324   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7325   OpenMPIRBuilder::LocationDescription Loc(
7326       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7327   ASSERT_EXPECTED_INIT(
7328       OpenMPIRBuilder::InsertPointTy, AfterIP,
7329       OMPBuilder.createTaskgroup(
7330           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7331           BodyGenCB));
7332   Builder.restoreIP(AfterIP);
7333   OMPBuilder.finalize();
7334   Builder.CreateRetVoid();
7335 
7336   EXPECT_FALSE(verifyModule(*M, &errs()));
7337 
7338   CallInst *TaskgroupCall = dyn_cast<CallInst>(
7339       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
7340           ->user_back());
7341   ASSERT_NE(TaskgroupCall, nullptr);
7342   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
7343       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
7344           ->user_back());
7345   ASSERT_NE(EndTaskgroupCall, nullptr);
7346 
7347   // Verify the Ident argument
7348   GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0));
7349   ASSERT_NE(Ident, nullptr);
7350   EXPECT_TRUE(Ident->hasInitializer());
7351   Constant *Initializer = Ident->getInitializer();
7352   GlobalVariable *SrcStrGlob =
7353       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
7354   ASSERT_NE(SrcStrGlob, nullptr);
7355   ConstantDataArray *SrcSrc =
7356       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
7357   ASSERT_NE(SrcSrc, nullptr);
7358 
7359   // Verify the num_threads argument.
7360   CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1));
7361   ASSERT_NE(GTID, nullptr);
7362   EXPECT_EQ(GTID->arg_size(), 1U);
7363   EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr(
7364                                            OMPRTL___kmpc_global_thread_num));
7365 
7366   // Checking the general structure of the IR generated is same as expected.
7367   Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction();
7368   EXPECT_EQ(GeneratedStoreInst, InternalStoreInst);
7369   Instruction *GeneratedLoad32 =
7370       GeneratedStoreInst->getNextNonDebugInstruction();
7371   EXPECT_EQ(GeneratedLoad32, InternalLoad32);
7372   Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction();
7373   EXPECT_EQ(GeneratedLoad128, InternalLoad128);
7374 
7375   // Checking the ordering because of the if statements and that
7376   // `__kmp_end_taskgroup` call is after the if branching.
7377   BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(),
7378                             ThenTerm->getSuccessor(0),
7379                             EndTaskgroupCall->getParent(),
7380                             ElseTerm->getParent()};
7381   verifyDFSOrder(F, RefOrder);
7382 }
7383 
7384 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
7385   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7386   OpenMPIRBuilder OMPBuilder(*M);
7387   OMPBuilder.Config.IsTargetDevice = false;
7388   OMPBuilder.initialize();
7389   F->setName("func");
7390   IRBuilder<> Builder(BB);
7391 
7392   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7393     Builder.restoreIP(AllocaIP);
7394     AllocaInst *Alloca32 =
7395         Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32");
7396     AllocaInst *Alloca64 =
7397         Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64");
7398     Builder.restoreIP(CodeGenIP);
7399     auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7400       Builder.restoreIP(CodeGenIP);
7401       LoadInst *LoadValue =
7402           Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64);
7403       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64));
7404       Builder.CreateStore(AddInst, Alloca64);
7405       return Error::success();
7406     };
7407     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
7408     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1,
7409                          OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1));
7410     Builder.restoreIP(TaskIP1);
7411 
7412     auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7413       Builder.restoreIP(CodeGenIP);
7414       LoadInst *LoadValue =
7415           Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32);
7416       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32));
7417       Builder.CreateStore(AddInst, Alloca32);
7418       return Error::success();
7419     };
7420     OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL);
7421     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2,
7422                          OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2));
7423     Builder.restoreIP(TaskIP2);
7424   };
7425 
7426   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7427   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7428   OpenMPIRBuilder::LocationDescription Loc(
7429       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7430   ASSERT_EXPECTED_INIT(
7431       OpenMPIRBuilder::InsertPointTy, AfterIP,
7432       OMPBuilder.createTaskgroup(
7433           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7434           BODYGENCB_WRAPPER(BodyGenCB)));
7435   Builder.restoreIP(AfterIP);
7436   OMPBuilder.finalize();
7437   Builder.CreateRetVoid();
7438 
7439   EXPECT_FALSE(verifyModule(*M, &errs()));
7440 
7441   CallInst *TaskgroupCall = dyn_cast<CallInst>(
7442       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
7443           ->user_back());
7444   ASSERT_NE(TaskgroupCall, nullptr);
7445   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
7446       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
7447           ->user_back());
7448   ASSERT_NE(EndTaskgroupCall, nullptr);
7449 
7450   Function *TaskAllocFn =
7451       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
7452   ASSERT_EQ(TaskAllocFn->getNumUses(), 2u);
7453 
7454   CallInst *FirstTaskAllocCall =
7455       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin());
7456   CallInst *SecondTaskAllocCall =
7457       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++);
7458   ASSERT_NE(FirstTaskAllocCall, nullptr);
7459   ASSERT_NE(SecondTaskAllocCall, nullptr);
7460 
7461   // Verify that the tasks have been generated in order and inside taskgroup
7462   // construct.
7463   BasicBlock *RefOrder[] = {
7464       TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(),
7465       SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()};
7466   verifyDFSOrder(F, RefOrder);
7467 }
7468 
7469 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
7470   OpenMPIRBuilder OMPBuilder(*M);
7471   OMPBuilder.initialize();
7472 
7473   IRBuilder<> Builder(BB);
7474 
7475   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
7476   OpenMPIRBuilder::TargetDataInfo Info(true, false);
7477 
7478   auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext());
7479   auto Int64PtrTy = PointerType::getUnqual(Builder.getContext());
7480 
7481   Info.RTArgs.BasePointersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7482   Info.RTArgs.PointersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7483   Info.RTArgs.SizesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7484   Info.RTArgs.MapTypesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7485   Info.RTArgs.MapNamesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7486   Info.RTArgs.MappersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7487   Info.NumberOfPtrs = 4;
7488   Info.EmitDebug = false;
7489   OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
7490 
7491   EXPECT_NE(RTArgs.BasePointersArray, nullptr);
7492   EXPECT_NE(RTArgs.PointersArray, nullptr);
7493   EXPECT_NE(RTArgs.SizesArray, nullptr);
7494   EXPECT_NE(RTArgs.MapTypesArray, nullptr);
7495   EXPECT_NE(RTArgs.MappersArray, nullptr);
7496   EXPECT_NE(RTArgs.MapNamesArray, nullptr);
7497   EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr);
7498 
7499   EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy);
7500   EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy);
7501   EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy);
7502   EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy);
7503   EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy);
7504   EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy);
7505 }
7506 
7507 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) {
7508   OpenMPIRBuilder OMPBuilder(*M);
7509   OMPBuilder.setConfig(
7510       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
7511   OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager;
7512   TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0);
7513   InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0);
7514   EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo));
7515   InfoManager.initializeDeviceGlobalVarEntryInfo(
7516       "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0);
7517   InfoManager.registerTargetRegionEntryInfo(
7518       EntryInfo, nullptr, nullptr,
7519       OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7520   InfoManager.registerDeviceGlobalVarEntryInfo(
7521       "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
7522       GlobalValue::WeakAnyLinkage);
7523   EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar"));
7524 }
7525 
7526 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they
7527 // call each other (recursively in some cases). The test case test these
7528 // functions by utilising them for host code generation for declare target
7529 // global variables
7530 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) {
7531   OpenMPIRBuilder OMPBuilder(*M);
7532   OMPBuilder.initialize();
7533   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
7534   OMPBuilder.setConfig(Config);
7535 
7536   std::vector<llvm::Triple> TargetTriple;
7537   TargetTriple.emplace_back("amdgcn-amd-amdhsa");
7538 
7539   TargetRegionEntryInfo EntryInfo("", 42, 4711, 17);
7540   std::vector<GlobalVariable *> RefsGathered;
7541 
7542   std::vector<Constant *> Globals;
7543   auto *IntTy = Type::getInt32Ty(Ctx);
7544   for (int I = 0; I < 2; ++I) {
7545     Globals.push_back(M->getOrInsertGlobal(
7546         "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * {
7547           return new GlobalVariable(
7548               *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage,
7549               ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I));
7550         }));
7551   }
7552 
7553   OMPBuilder.registerTargetGlobalVariable(
7554       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
7555       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
7556       EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple,
7557       nullptr, nullptr, Globals[0]->getType(), Globals[0]);
7558 
7559   OMPBuilder.registerTargetGlobalVariable(
7560       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink,
7561       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
7562       EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple,
7563       nullptr, nullptr, Globals[1]->getType(), Globals[1]);
7564 
7565   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn =
7566       [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
7567          const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
7568     // If this is invoked, then we want to emit an error, even if it is not
7569     // neccesarily the most readable, as something has went wrong. The
7570     // test-suite unfortunately eats up all error output
7571     ASSERT_EQ(Kind, Kind);
7572   };
7573 
7574   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn);
7575 
7576   // Clauses for data_int_0 with To + Any clauses for the host
7577   std::vector<GlobalVariable *> OffloadEntries;
7578   OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name"));
7579   OffloadEntries.push_back(
7580       M->getNamedGlobal(".offloading.entry.test_data_int_0"));
7581 
7582   // Clauses for data_int_1 with Link + Any clauses for the host
7583   OffloadEntries.push_back(
7584       M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr"));
7585   OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name.1"));
7586   OffloadEntries.push_back(
7587       M->getNamedGlobal(".offloading.entry.test_data_int_1_decl_tgt_ref_ptr"));
7588 
7589   for (unsigned I = 0; I < OffloadEntries.size(); ++I)
7590     EXPECT_NE(OffloadEntries[I], nullptr);
7591 
7592   // Metadata generated for the host offload module
7593   NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info");
7594   ASSERT_THAT(OffloadMetadata, testing::NotNull());
7595   StringRef Nodes[2] = {
7596       cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1))
7597           ->getString(),
7598       cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1))
7599           ->getString()};
7600   EXPECT_THAT(
7601       Nodes, testing::UnorderedElementsAre("test_data_int_0",
7602                                            "test_data_int_1_decl_tgt_ref_ptr"));
7603 }
7604 
7605 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) {
7606   OpenMPIRBuilder OMPBuilder(*M);
7607   OMPBuilder.initialize();
7608   OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true,
7609                                /* IsGPU = */ true,
7610                                /* OpenMPOffloadMandatory = */ false,
7611                                /* HasRequiresReverseOffload = */ false,
7612                                /* HasRequiresUnifiedAddress = */ false,
7613                                /* HasRequiresUnifiedSharedMemory = */ false,
7614                                /* HasRequiresDynamicAllocators = */ false);
7615   OMPBuilder.setConfig(Config);
7616 
7617   FunctionCallee FnTypeAndCallee =
7618       M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx));
7619 
7620   auto *Fn = cast<Function>(FnTypeAndCallee.getCallee());
7621   OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn,
7622                                 /* Size = */ 0,
7623                                 /* Flags = */ 0, GlobalValue::WeakAnyLinkage);
7624 
7625   // Check kernel attributes
7626   EXPECT_TRUE(Fn->hasFnAttribute("kernel"));
7627   EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress));
7628 }
7629 
7630 } // namespace
7631