xref: /llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (revision 81d18ad86419fc612c7071e888d11aa923eaeb8a)
1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/OpenMP/OMPConstants.h"
10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
12 #include "llvm/IR/BasicBlock.h"
13 #include "llvm/IR/DIBuilder.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/InstIterator.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/Verifier.h"
20 #include "llvm/Passes/PassBuilder.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Testing/Support/Error.h"
23 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
24 #include "gmock/gmock.h"
25 #include "gtest/gtest.h"
26 #include <optional>
27 
28 using namespace llvm;
29 using namespace omp;
30 
31 // Helper that intends to be functionally equivalent to `VarType VarName = Init`
32 // for an `Init` that returns an `Expected<VarType>` value. It produces an error
33 // message and returns if `Init` didn't produce a valid result.
34 #define ASSERT_EXPECTED_INIT(VarType, VarName, Init)                           \
35   auto __Expected##VarName = Init;                                             \
36   ASSERT_THAT_EXPECTED(__Expected##VarName, Succeeded());                      \
37   VarType VarName = *__Expected##VarName
38 
39 // Similar to ASSERT_EXPECTED_INIT, but returns a given expression in case of
40 // error after printing the error message.
41 #define ASSERT_EXPECTED_INIT_RETURN(VarType, VarName, Init, Return)            \
42   auto __Expected##VarName = Init;                                             \
43   EXPECT_THAT_EXPECTED(__Expected##VarName, Succeeded());                      \
44   if (!__Expected##VarName)                                                    \
45     return Return;                                                             \
46   VarType VarName = *__Expected##VarName
47 
48 // Wrapper lambdas to allow using EXPECT*() macros inside of error-returning
49 // callbacks.
50 #define FINICB_WRAPPER(cb)                                                     \
51   [&cb](InsertPointTy IP) -> Error {                                           \
52     cb(IP);                                                                    \
53     return Error::success();                                                   \
54   }
55 
56 #define BODYGENCB_WRAPPER(cb)                                                  \
57   [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error {            \
58     cb(AllocaIP, CodeGenIP);                                                   \
59     return Error::success();                                                   \
60   }
61 
62 #define LOOP_BODYGENCB_WRAPPER(cb)                                             \
63   [&cb](InsertPointTy CodeGenIP, Value *LC) -> Error {                         \
64     cb(CodeGenIP, LC);                                                         \
65     return Error::success();                                                   \
66   }
67 
68 namespace {
69 
70 /// Create an instruction that uses the values in \p Values. We use "printf"
71 /// just because it is often used for this purpose in test code, but it is never
72 /// executed here.
73 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
74                                   ArrayRef<Value *> Values) {
75   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
76 
77   GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
78   Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
79   Constant *Indices[] = {Zero, Zero};
80   Constant *FormatStrConst =
81       ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
82 
83   Function *PrintfDecl = M->getFunction("printf");
84   if (!PrintfDecl) {
85     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
86     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
87     PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
88   }
89 
90   SmallVector<Value *, 4> Args;
91   Args.push_back(FormatStrConst);
92   Args.append(Values.begin(), Values.end());
93   return Builder.CreateCall(PrintfDecl, Args);
94 }
95 
96 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
97 /// order the control flow of \p F.
98 ///
99 /// This is an easy way to verify the branching structure of the CFG without
100 /// checking every branch instruction individually. For the CFG of a
101 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
102 /// the body, i.e. the DFS order corresponds to the execution order with one
103 /// loop iteration.
104 static testing::AssertionResult
105 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
106   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
107   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
108 
109   df_iterator_default_set<BasicBlock *, 16> Visited;
110   auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
111 
112   BasicBlock *Prev = nullptr;
113   for (BasicBlock *BB : DFS) {
114     if (It != E && BB == *It) {
115       Prev = *It;
116       ++It;
117     }
118   }
119 
120   if (It == E)
121     return testing::AssertionSuccess();
122   if (!Prev)
123     return testing::AssertionFailure()
124            << "Did not find " << (*It)->getName() << " in control flow";
125   return testing::AssertionFailure()
126          << "Expected " << Prev->getName() << " before " << (*It)->getName()
127          << " in control flow";
128 }
129 
130 /// Verify that blocks in \p RefOrder are in the same relative order in the
131 /// linked lists of blocks in \p F. The linked list may contain additional
132 /// blocks in-between.
133 ///
134 /// While the order in the linked list is not relevant for semantics, keeping
135 /// the order roughly in execution order makes its printout easier to read.
136 static testing::AssertionResult
137 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
138   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
139   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
140 
141   BasicBlock *Prev = nullptr;
142   for (BasicBlock &BB : *F) {
143     if (It != E && &BB == *It) {
144       Prev = *It;
145       ++It;
146     }
147   }
148 
149   if (It == E)
150     return testing::AssertionSuccess();
151   if (!Prev)
152     return testing::AssertionFailure() << "Did not find " << (*It)->getName()
153                                        << " in function " << F->getName();
154   return testing::AssertionFailure()
155          << "Expected " << Prev->getName() << " before " << (*It)->getName()
156          << " in function " << F->getName();
157 }
158 
159 /// Populate Calls with call instructions calling the function with the given
160 /// FnID from the given function F.
161 static void findCalls(Function *F, omp::RuntimeFunction FnID,
162                       OpenMPIRBuilder &OMPBuilder,
163                       SmallVectorImpl<CallInst *> &Calls) {
164   Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID);
165   for (BasicBlock &BB : *F) {
166     for (Instruction &I : BB) {
167       auto *Call = dyn_cast<CallInst>(&I);
168       if (Call && Call->getCalledFunction() == Fn)
169         Calls.push_back(Call);
170     }
171   }
172 }
173 
174 /// Assuming \p F contains only one call to the function with the given \p FnID,
175 /// return that call.
176 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID,
177                                 OpenMPIRBuilder &OMPBuilder) {
178   SmallVector<CallInst *, 1> Calls;
179   findCalls(F, FnID, OMPBuilder, Calls);
180   EXPECT_EQ(1u, Calls.size());
181   if (Calls.size() != 1)
182     return nullptr;
183   return Calls.front();
184 }
185 
186 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
187   switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
188   case omp::OMPScheduleType::BaseDynamicChunked:
189     return omp::OMP_SCHEDULE_Dynamic;
190   case omp::OMPScheduleType::BaseGuidedChunked:
191     return omp::OMP_SCHEDULE_Guided;
192   case omp::OMPScheduleType::BaseAuto:
193     return omp::OMP_SCHEDULE_Auto;
194   case omp::OMPScheduleType::BaseRuntime:
195     return omp::OMP_SCHEDULE_Runtime;
196   default:
197     llvm_unreachable("unknown type for this test");
198   }
199 }
200 
201 class OpenMPIRBuilderTest : public testing::Test {
202 protected:
203   void SetUp() override {
204     M.reset(new Module("MyModule", Ctx));
205     FunctionType *FTy =
206         FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
207                           /*isVarArg=*/false);
208     F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
209     BB = BasicBlock::Create(Ctx, "", F);
210 
211     DIBuilder DIB(*M);
212     auto File = DIB.createFile("test.dbg", "/src", std::nullopt,
213                                std::optional<StringRef>("/src/test.dbg"));
214     auto CU =
215         DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0);
216     auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray({}));
217     auto SP = DIB.createFunction(
218         CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
219         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
220     F->setSubprogram(SP);
221     auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
222     DIB.finalize();
223     DL = DILocation::get(Ctx, 3, 7, Scope);
224   }
225 
226   void TearDown() override {
227     BB = nullptr;
228     M.reset();
229   }
230 
231   /// Create a function with a simple loop that calls printf using the logical
232   /// loop counter for use with tests that need a CanonicalLoopInfo object.
233   CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL,
234                                              OpenMPIRBuilder &OMPBuilder,
235                                              int UseIVBits,
236                                              CallInst **Call = nullptr,
237                                              BasicBlock **BodyCode = nullptr) {
238     OMPBuilder.initialize();
239     F->setName("func");
240 
241     IRBuilder<> Builder(BB);
242     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
243     Value *TripCount = F->getArg(0);
244 
245     Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits);
246     Value *CastedTripCount =
247         Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount");
248 
249     auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP,
250                              llvm::Value *LC) {
251       Builder.restoreIP(CodeGenIP);
252       if (BodyCode)
253         *BodyCode = Builder.GetInsertBlock();
254 
255       // Add something that consumes the induction variable to the body.
256       CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
257       if (Call)
258         *Call = CallInst;
259 
260       return Error::success();
261     };
262 
263     ASSERT_EXPECTED_INIT_RETURN(
264         CanonicalLoopInfo *, Loop,
265         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount),
266         nullptr);
267 
268     // Finalize the function.
269     Builder.restoreIP(Loop->getAfterIP());
270     Builder.CreateRetVoid();
271 
272     return Loop;
273   }
274 
275   LLVMContext Ctx;
276   std::unique_ptr<Module> M;
277   Function *F;
278   BasicBlock *BB;
279   DebugLoc DL;
280 };
281 
282 class OpenMPIRBuilderTestWithParams
283     : public OpenMPIRBuilderTest,
284       public ::testing::WithParamInterface<omp::OMPScheduleType> {};
285 
286 class OpenMPIRBuilderTestWithIVBits
287     : public OpenMPIRBuilderTest,
288       public ::testing::WithParamInterface<int> {};
289 
290 // Returns the value stored in the given allocation. Returns null if the given
291 // value is not a result of an InstTy instruction, if no value is stored or if
292 // there is more than one store.
293 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
294   Instruction *Inst = dyn_cast<InstTy>(AllocaValue);
295   if (!Inst)
296     return nullptr;
297   StoreInst *Store = nullptr;
298   for (Use &U : Inst->uses()) {
299     if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
300       EXPECT_EQ(Store, nullptr);
301       Store = CandidateStore;
302     }
303   }
304   if (!Store)
305     return nullptr;
306   return Store->getValueOperand();
307 }
308 
309 // Returns the value stored in the aggregate argument of an outlined function,
310 // or nullptr if it is not found.
311 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate,
312                                            unsigned Idx) {
313   GetElementPtrInst *GEPAtIdx = nullptr;
314   // Find GEP instruction at that index.
315   for (User *Usr : Aggregate->users()) {
316     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr);
317     if (!GEP)
318       continue;
319 
320     if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx))
321       continue;
322 
323     EXPECT_EQ(GEPAtIdx, nullptr);
324     GEPAtIdx = GEP;
325   }
326 
327   EXPECT_NE(GEPAtIdx, nullptr);
328   EXPECT_EQ(GEPAtIdx->getNumUses(), 1U);
329 
330   // Find the value stored to the aggregate.
331   StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin());
332   Value *StoredAggValue = StoreToAgg->getValueOperand();
333 
334   Value *StoredValue = nullptr;
335 
336   // Find the value stored to the value stored in the aggregate.
337   for (User *Usr : StoredAggValue->users()) {
338     StoreInst *Store = dyn_cast<StoreInst>(Usr);
339     if (!Store)
340       continue;
341 
342     if (Store->getPointerOperand() != StoredAggValue)
343       continue;
344 
345     EXPECT_EQ(StoredValue, nullptr);
346     StoredValue = Store->getValueOperand();
347   }
348 
349   return StoredValue;
350 }
351 
352 // Returns the aggregate that the value is originating from.
353 static Value *findAggregateFromValue(Value *V) {
354   // Expects a load instruction that loads from the aggregate.
355   LoadInst *Load = dyn_cast<LoadInst>(V);
356   EXPECT_NE(Load, nullptr);
357   // Find the GEP instruction used in the load instruction.
358   GetElementPtrInst *GEP =
359       dyn_cast<GetElementPtrInst>(Load->getPointerOperand());
360   EXPECT_NE(GEP, nullptr);
361   // Find the aggregate used in the GEP instruction.
362   Value *Aggregate = GEP->getPointerOperand();
363 
364   return Aggregate;
365 }
366 
367 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
368   OpenMPIRBuilder OMPBuilder(*M);
369   OMPBuilder.initialize();
370 
371   IRBuilder<> Builder(BB);
372 
373   ASSERT_THAT_EXPECTED(
374       OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for),
375       Succeeded());
376   EXPECT_TRUE(M->global_empty());
377   EXPECT_EQ(M->size(), 1U);
378   EXPECT_EQ(F->size(), 1U);
379   EXPECT_EQ(BB->size(), 0U);
380 
381   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
382   ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded());
383   EXPECT_FALSE(M->global_empty());
384   EXPECT_EQ(M->size(), 3U);
385   EXPECT_EQ(F->size(), 1U);
386   EXPECT_EQ(BB->size(), 2U);
387 
388   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
389   EXPECT_NE(GTID, nullptr);
390   EXPECT_EQ(GTID->arg_size(), 1U);
391   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
392   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
393   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
394 
395   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
396   EXPECT_NE(Barrier, nullptr);
397   EXPECT_EQ(Barrier->arg_size(), 2U);
398   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier");
399   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
400   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
401 
402   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
403 
404   Builder.CreateUnreachable();
405   EXPECT_FALSE(verifyModule(*M, &errs()));
406 }
407 
408 TEST_F(OpenMPIRBuilderTest, CreateCancel) {
409   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
410   OpenMPIRBuilder OMPBuilder(*M);
411   OMPBuilder.initialize();
412 
413   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
414   new UnreachableInst(Ctx, CBB);
415   auto FiniCB = [&](InsertPointTy IP) {
416     ASSERT_NE(IP.getBlock(), nullptr);
417     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
418     BranchInst::Create(CBB, IP.getBlock());
419   };
420   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
421 
422   IRBuilder<> Builder(BB);
423 
424   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
425   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP,
426                        OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel));
427   Builder.restoreIP(NewIP);
428   EXPECT_FALSE(M->global_empty());
429   EXPECT_EQ(M->size(), 4U);
430   EXPECT_EQ(F->size(), 4U);
431   EXPECT_EQ(BB->size(), 4U);
432 
433   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
434   EXPECT_NE(GTID, nullptr);
435   EXPECT_EQ(GTID->arg_size(), 1U);
436   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
437   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
438   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
439 
440   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
441   EXPECT_NE(Cancel, nullptr);
442   EXPECT_EQ(Cancel->arg_size(), 3U);
443   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
444   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
445   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
446   EXPECT_EQ(Cancel->getNumUses(), 1U);
447   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
448   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
449   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
450   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
451   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
452   EXPECT_NE(GTID1, nullptr);
453   EXPECT_EQ(GTID1->arg_size(), 1U);
454   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
455   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
456   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
457   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
458   EXPECT_NE(Barrier, nullptr);
459   EXPECT_EQ(Barrier->arg_size(), 2U);
460   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
461   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
462   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
463   EXPECT_EQ(Barrier->getNumUses(), 0U);
464   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
465             1U);
466   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
467 
468   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
469 
470   OMPBuilder.popFinalizationCB();
471 
472   Builder.CreateUnreachable();
473   EXPECT_FALSE(verifyModule(*M, &errs()));
474 }
475 
476 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
477   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
478   OpenMPIRBuilder OMPBuilder(*M);
479   OMPBuilder.initialize();
480 
481   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
482   new UnreachableInst(Ctx, CBB);
483   auto FiniCB = [&](InsertPointTy IP) {
484     ASSERT_NE(IP.getBlock(), nullptr);
485     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
486     BranchInst::Create(CBB, IP.getBlock());
487   };
488   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
489 
490   IRBuilder<> Builder(BB);
491 
492   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
493   ASSERT_EXPECTED_INIT(
494       OpenMPIRBuilder::InsertPointTy, NewIP,
495       OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel));
496   Builder.restoreIP(NewIP);
497   EXPECT_FALSE(M->global_empty());
498   EXPECT_EQ(M->size(), 4U);
499   EXPECT_EQ(F->size(), 7U);
500   EXPECT_EQ(BB->size(), 1U);
501   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
502   ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U);
503   BB = BB->getTerminator()->getSuccessor(0);
504   EXPECT_EQ(BB->size(), 4U);
505 
506   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
507   EXPECT_NE(GTID, nullptr);
508   EXPECT_EQ(GTID->arg_size(), 1U);
509   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
510   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
511   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
512 
513   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
514   EXPECT_NE(Cancel, nullptr);
515   EXPECT_EQ(Cancel->arg_size(), 3U);
516   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
517   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
518   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
519   EXPECT_EQ(Cancel->getNumUses(), 1U);
520   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
521   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
522   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
523   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
524             NewIP.getBlock());
525   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
526   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
527   EXPECT_NE(GTID1, nullptr);
528   EXPECT_EQ(GTID1->arg_size(), 1U);
529   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
530   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
531   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
532   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
533   EXPECT_NE(Barrier, nullptr);
534   EXPECT_EQ(Barrier->arg_size(), 2U);
535   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
536   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
537   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
538   EXPECT_EQ(Barrier->getNumUses(), 0U);
539   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
540             1U);
541   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
542 
543   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
544 
545   OMPBuilder.popFinalizationCB();
546 
547   Builder.CreateUnreachable();
548   EXPECT_FALSE(verifyModule(*M, &errs()));
549 }
550 
551 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
552   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
553   OpenMPIRBuilder OMPBuilder(*M);
554   OMPBuilder.initialize();
555 
556   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
557   new UnreachableInst(Ctx, CBB);
558   auto FiniCB = [&](InsertPointTy IP) {
559     ASSERT_NE(IP.getBlock(), nullptr);
560     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
561     BranchInst::Create(CBB, IP.getBlock());
562   };
563   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
564 
565   IRBuilder<> Builder(BB);
566 
567   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
568   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP,
569                        OMPBuilder.createBarrier(Loc, OMPD_for));
570   Builder.restoreIP(NewIP);
571   EXPECT_FALSE(M->global_empty());
572   EXPECT_EQ(M->size(), 3U);
573   EXPECT_EQ(F->size(), 4U);
574   EXPECT_EQ(BB->size(), 4U);
575 
576   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
577   EXPECT_NE(GTID, nullptr);
578   EXPECT_EQ(GTID->arg_size(), 1U);
579   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
580   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
581   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
582 
583   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
584   EXPECT_NE(Barrier, nullptr);
585   EXPECT_EQ(Barrier->arg_size(), 2U);
586   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
587   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
588   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
589   EXPECT_EQ(Barrier->getNumUses(), 1U);
590   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
591   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
592   EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
593   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
594   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
595             1U);
596   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
597             CBB);
598 
599   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
600 
601   OMPBuilder.popFinalizationCB();
602 
603   Builder.CreateUnreachable();
604   EXPECT_FALSE(verifyModule(*M, &errs()));
605 }
606 
607 TEST_F(OpenMPIRBuilderTest, DbgLoc) {
608   OpenMPIRBuilder OMPBuilder(*M);
609   OMPBuilder.initialize();
610   F->setName("func");
611 
612   IRBuilder<> Builder(BB);
613 
614   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
615   ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded());
616   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
617   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
618   EXPECT_EQ(GTID->getDebugLoc(), DL);
619   EXPECT_EQ(Barrier->getDebugLoc(), DL);
620   EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0)));
621   if (!isa<GlobalVariable>(Barrier->getOperand(0)))
622     return;
623   GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0));
624   EXPECT_TRUE(Ident->hasInitializer());
625   if (!Ident->hasInitializer())
626     return;
627   Constant *Initializer = Ident->getInitializer();
628   EXPECT_TRUE(
629       isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()));
630   GlobalVariable *SrcStrGlob =
631       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
632   if (!SrcStrGlob)
633     return;
634   EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer()));
635   ConstantDataArray *SrcSrc =
636       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
637   if (!SrcSrc)
638     return;
639   EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;");
640 }
641 
642 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
643   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
644   std::string oldDLStr = M->getDataLayoutStr();
645   M->setDataLayout(
646       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
647       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
648       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
649   OpenMPIRBuilder OMPBuilder(*M);
650   OMPBuilder.Config.IsTargetDevice = true;
651   OMPBuilder.initialize();
652   F->setName("func");
653   IRBuilder<> Builder(BB);
654   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
655   Builder.CreateBr(EnterBB);
656   Builder.SetInsertPoint(EnterBB);
657   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
658 
659   AllocaInst *PrivAI = nullptr;
660 
661   unsigned NumBodiesGenerated = 0;
662   unsigned NumPrivatizedVars = 0;
663   unsigned NumFinalizationPoints = 0;
664 
665   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
666     ++NumBodiesGenerated;
667 
668     Builder.restoreIP(AllocaIP);
669     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
670     Builder.CreateStore(F->arg_begin(), PrivAI);
671 
672     Builder.restoreIP(CodeGenIP);
673     Value *PrivLoad =
674         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
675     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
676     Instruction *ThenTerm, *ElseTerm;
677     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
678                                   &ThenTerm, &ElseTerm);
679     return Error::success();
680   };
681 
682   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
683                     Value &Orig, Value &Inner,
684                     Value *&ReplacementValue) -> InsertPointTy {
685     ++NumPrivatizedVars;
686 
687     if (!isa<AllocaInst>(Orig)) {
688       EXPECT_EQ(&Orig, F->arg_begin());
689       ReplacementValue = &Inner;
690       return CodeGenIP;
691     }
692 
693     // Since the original value is an allocation, it has a pointer type and
694     // therefore no additional wrapping should happen.
695     EXPECT_EQ(&Orig, &Inner);
696 
697     // Trivial copy (=firstprivate).
698     Builder.restoreIP(AllocaIP);
699     Type *VTy = ReplacementValue->getType();
700     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
701     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
702     Builder.restoreIP(CodeGenIP);
703     Builder.CreateStore(V, ReplacementValue);
704     return CodeGenIP;
705   };
706 
707   auto FiniCB = [&](InsertPointTy CodeGenIP) {
708     ++NumFinalizationPoints;
709     return Error::success();
710   };
711 
712   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
713                                     F->getEntryBlock().getFirstInsertionPt());
714   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
715                        OMPBuilder.createParallel(
716                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
717                            nullptr, OMP_PROC_BIND_default, false));
718 
719   EXPECT_EQ(NumBodiesGenerated, 1U);
720   EXPECT_EQ(NumPrivatizedVars, 1U);
721   EXPECT_EQ(NumFinalizationPoints, 1U);
722 
723   Builder.restoreIP(AfterIP);
724   Builder.CreateRetVoid();
725 
726   OMPBuilder.finalize();
727   Function *OutlinedFn = PrivAI->getFunction();
728   EXPECT_FALSE(verifyModule(*M, &errs()));
729   EXPECT_NE(OutlinedFn, F);
730   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
731   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
732   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
733 
734   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
735   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
736   // Make sure that arguments are pointers in 0 address address space
737   EXPECT_EQ(OutlinedFn->getArg(0)->getType(),
738             PointerType::get(M->getContext(), 0));
739   EXPECT_EQ(OutlinedFn->getArg(1)->getType(),
740             PointerType::get(M->getContext(), 0));
741   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
742             PointerType::get(M->getContext(), 0));
743   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
744   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
745   User *Usr = OutlinedFn->user_back();
746   ASSERT_TRUE(isa<CallInst>(Usr));
747   CallInst *Parallel51CI = dyn_cast<CallInst>(Usr);
748   ASSERT_NE(Parallel51CI, nullptr);
749 
750   EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51");
751   EXPECT_EQ(Parallel51CI->arg_size(), 9U);
752   EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn);
753   EXPECT_TRUE(
754       isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts()));
755   EXPECT_EQ(Parallel51CI, Usr);
756   M->setDataLayout(oldDLStr);
757 }
758 
759 TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
760   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
761   OpenMPIRBuilder OMPBuilder(*M);
762   OMPBuilder.Config.IsTargetDevice = false;
763   OMPBuilder.initialize();
764   F->setName("func");
765   IRBuilder<> Builder(BB);
766 
767   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
768   Builder.CreateBr(EnterBB);
769   Builder.SetInsertPoint(EnterBB);
770   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
771 
772   AllocaInst *PrivAI = nullptr;
773 
774   unsigned NumBodiesGenerated = 0;
775   unsigned NumPrivatizedVars = 0;
776   unsigned NumFinalizationPoints = 0;
777 
778   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
779     ++NumBodiesGenerated;
780 
781     Builder.restoreIP(AllocaIP);
782     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
783     Builder.CreateStore(F->arg_begin(), PrivAI);
784 
785     Builder.restoreIP(CodeGenIP);
786     Value *PrivLoad =
787         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
788     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
789     Instruction *ThenTerm, *ElseTerm;
790     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
791                                   &ThenTerm, &ElseTerm);
792     return Error::success();
793   };
794 
795   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
796                     Value &Orig, Value &Inner,
797                     Value *&ReplacementValue) -> InsertPointTy {
798     ++NumPrivatizedVars;
799 
800     if (!isa<AllocaInst>(Orig)) {
801       EXPECT_EQ(&Orig, F->arg_begin());
802       ReplacementValue = &Inner;
803       return CodeGenIP;
804     }
805 
806     // Since the original value is an allocation, it has a pointer type and
807     // therefore no additional wrapping should happen.
808     EXPECT_EQ(&Orig, &Inner);
809 
810     // Trivial copy (=firstprivate).
811     Builder.restoreIP(AllocaIP);
812     Type *VTy = ReplacementValue->getType();
813     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
814     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
815     Builder.restoreIP(CodeGenIP);
816     Builder.CreateStore(V, ReplacementValue);
817     return CodeGenIP;
818   };
819 
820   auto FiniCB = [&](InsertPointTy CodeGenIP) {
821     ++NumFinalizationPoints;
822     return Error::success();
823   };
824 
825   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
826                                     F->getEntryBlock().getFirstInsertionPt());
827   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
828                        OMPBuilder.createParallel(
829                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
830                            nullptr, OMP_PROC_BIND_default, false));
831   EXPECT_EQ(NumBodiesGenerated, 1U);
832   EXPECT_EQ(NumPrivatizedVars, 1U);
833   EXPECT_EQ(NumFinalizationPoints, 1U);
834 
835   Builder.restoreIP(AfterIP);
836   Builder.CreateRetVoid();
837 
838   OMPBuilder.finalize();
839 
840   EXPECT_NE(PrivAI, nullptr);
841   Function *OutlinedFn = PrivAI->getFunction();
842   EXPECT_NE(F, OutlinedFn);
843   EXPECT_FALSE(verifyModule(*M, &errs()));
844   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
845   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
846   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
847 
848   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
849   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
850 
851   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
852   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
853   User *Usr = OutlinedFn->user_back();
854   ASSERT_TRUE(isa<CallInst>(Usr));
855   CallInst *ForkCI = dyn_cast<CallInst>(Usr);
856   ASSERT_NE(ForkCI, nullptr);
857 
858   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
859   EXPECT_EQ(ForkCI->arg_size(), 4U);
860   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
861   EXPECT_EQ(ForkCI->getArgOperand(1),
862             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
863   EXPECT_EQ(ForkCI, Usr);
864   Value *StoredValue =
865       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
866   EXPECT_EQ(StoredValue, F->arg_begin());
867 }
868 
869 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
870   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
871   OpenMPIRBuilder OMPBuilder(*M);
872   OMPBuilder.Config.IsTargetDevice = false;
873   OMPBuilder.initialize();
874   F->setName("func");
875   IRBuilder<> Builder(BB);
876 
877   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
878   Builder.CreateBr(EnterBB);
879   Builder.SetInsertPoint(EnterBB);
880   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
881 
882   unsigned NumInnerBodiesGenerated = 0;
883   unsigned NumOuterBodiesGenerated = 0;
884   unsigned NumFinalizationPoints = 0;
885 
886   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
887     ++NumInnerBodiesGenerated;
888     return Error::success();
889   };
890 
891   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
892                     Value &Orig, Value &Inner,
893                     Value *&ReplacementValue) -> InsertPointTy {
894     // Trivial copy (=firstprivate).
895     Builder.restoreIP(AllocaIP);
896     Type *VTy = ReplacementValue->getType();
897     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
898     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
899     Builder.restoreIP(CodeGenIP);
900     Builder.CreateStore(V, ReplacementValue);
901     return CodeGenIP;
902   };
903 
904   auto FiniCB = [&](InsertPointTy CodeGenIP) {
905     ++NumFinalizationPoints;
906     return Error::success();
907   };
908 
909   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
910     ++NumOuterBodiesGenerated;
911     Builder.restoreIP(CodeGenIP);
912     BasicBlock *CGBB = CodeGenIP.getBlock();
913     BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
914     CGBB->getTerminator()->eraseFromParent();
915 
916     ASSERT_EXPECTED_INIT(
917         OpenMPIRBuilder::InsertPointTy, AfterIP,
918         OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
919                                   InnerBodyGenCB, PrivCB, FiniCB, nullptr,
920                                   nullptr, OMP_PROC_BIND_default, false));
921 
922     Builder.restoreIP(AfterIP);
923     Builder.CreateBr(NewBB);
924   };
925 
926   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
927                                     F->getEntryBlock().getFirstInsertionPt());
928   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
929                        OMPBuilder.createParallel(
930                            Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
931                            PrivCB, FiniCB, nullptr, nullptr,
932                            OMP_PROC_BIND_default, false));
933 
934   EXPECT_EQ(NumInnerBodiesGenerated, 1U);
935   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
936   EXPECT_EQ(NumFinalizationPoints, 2U);
937 
938   Builder.restoreIP(AfterIP);
939   Builder.CreateRetVoid();
940 
941   OMPBuilder.finalize();
942 
943   EXPECT_EQ(M->size(), 5U);
944   for (Function &OutlinedFn : *M) {
945     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
946       continue;
947     EXPECT_FALSE(verifyModule(*M, &errs()));
948     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
949     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
950     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
951 
952     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
953     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
954 
955     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
956     User *Usr = OutlinedFn.user_back();
957     ASSERT_TRUE(isa<CallInst>(Usr));
958     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
959     ASSERT_NE(ForkCI, nullptr);
960 
961     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
962     EXPECT_EQ(ForkCI->arg_size(), 3U);
963     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
964     EXPECT_EQ(ForkCI->getArgOperand(1),
965               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
966     EXPECT_EQ(ForkCI, Usr);
967   }
968 }
969 
970 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
971   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
972   OpenMPIRBuilder OMPBuilder(*M);
973   OMPBuilder.Config.IsTargetDevice = false;
974   OMPBuilder.initialize();
975   F->setName("func");
976   IRBuilder<> Builder(BB);
977 
978   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
979   Builder.CreateBr(EnterBB);
980   Builder.SetInsertPoint(EnterBB);
981   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
982 
983   unsigned NumInnerBodiesGenerated = 0;
984   unsigned NumOuterBodiesGenerated = 0;
985   unsigned NumFinalizationPoints = 0;
986 
987   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
988     ++NumInnerBodiesGenerated;
989     return Error::success();
990   };
991 
992   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
993                     Value &Orig, Value &Inner,
994                     Value *&ReplacementValue) -> InsertPointTy {
995     // Trivial copy (=firstprivate).
996     Builder.restoreIP(AllocaIP);
997     Type *VTy = ReplacementValue->getType();
998     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
999     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
1000     Builder.restoreIP(CodeGenIP);
1001     Builder.CreateStore(V, ReplacementValue);
1002     return CodeGenIP;
1003   };
1004 
1005   auto FiniCB = [&](InsertPointTy CodeGenIP) {
1006     ++NumFinalizationPoints;
1007     return Error::success();
1008   };
1009 
1010   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1011     ++NumOuterBodiesGenerated;
1012     Builder.restoreIP(CodeGenIP);
1013     BasicBlock *CGBB = CodeGenIP.getBlock();
1014     BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
1015     BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
1016     CGBB->getTerminator()->eraseFromParent();
1017     ;
1018     NewBB1->getTerminator()->eraseFromParent();
1019     ;
1020 
1021     ASSERT_EXPECTED_INIT(
1022         OpenMPIRBuilder::InsertPointTy, AfterIP1,
1023         OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
1024                                   InnerBodyGenCB, PrivCB, FiniCB, nullptr,
1025                                   nullptr, OMP_PROC_BIND_default, false));
1026 
1027     Builder.restoreIP(AfterIP1);
1028     Builder.CreateBr(NewBB1);
1029 
1030     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2,
1031                          OMPBuilder.createParallel(
1032                              InsertPointTy(NewBB1, NewBB1->end()), AllocaIP,
1033                              InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
1034                              OMP_PROC_BIND_default, false));
1035 
1036     Builder.restoreIP(AfterIP2);
1037     Builder.CreateBr(NewBB2);
1038   };
1039 
1040   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1041                                     F->getEntryBlock().getFirstInsertionPt());
1042   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1043                        OMPBuilder.createParallel(
1044                            Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
1045                            PrivCB, FiniCB, nullptr, nullptr,
1046                            OMP_PROC_BIND_default, false));
1047 
1048   EXPECT_EQ(NumInnerBodiesGenerated, 2U);
1049   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
1050   EXPECT_EQ(NumFinalizationPoints, 3U);
1051 
1052   Builder.restoreIP(AfterIP);
1053   Builder.CreateRetVoid();
1054 
1055   OMPBuilder.finalize();
1056 
1057   EXPECT_EQ(M->size(), 6U);
1058   for (Function &OutlinedFn : *M) {
1059     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
1060       continue;
1061     EXPECT_FALSE(verifyModule(*M, &errs()));
1062     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
1063     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
1064     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
1065 
1066     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
1067     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
1068 
1069     unsigned NumAllocas = 0;
1070     for (Instruction &I : instructions(OutlinedFn))
1071       NumAllocas += isa<AllocaInst>(I);
1072     EXPECT_EQ(NumAllocas, 1U);
1073 
1074     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
1075     User *Usr = OutlinedFn.user_back();
1076     ASSERT_TRUE(isa<CallInst>(Usr));
1077     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
1078     ASSERT_NE(ForkCI, nullptr);
1079 
1080     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
1081     EXPECT_EQ(ForkCI->arg_size(), 3U);
1082     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1083     EXPECT_EQ(ForkCI->getArgOperand(1),
1084               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
1085     EXPECT_EQ(ForkCI, Usr);
1086   }
1087 }
1088 
1089 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
1090   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1091   OpenMPIRBuilder OMPBuilder(*M);
1092   OMPBuilder.Config.IsTargetDevice = false;
1093   OMPBuilder.initialize();
1094   F->setName("func");
1095   IRBuilder<> Builder(BB);
1096 
1097   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1098   Builder.CreateBr(EnterBB);
1099   Builder.SetInsertPoint(EnterBB);
1100   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1101 
1102   AllocaInst *PrivAI = nullptr;
1103 
1104   unsigned NumBodiesGenerated = 0;
1105   unsigned NumPrivatizedVars = 0;
1106   unsigned NumFinalizationPoints = 0;
1107 
1108   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1109     ++NumBodiesGenerated;
1110 
1111     Builder.restoreIP(AllocaIP);
1112     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
1113     Builder.CreateStore(F->arg_begin(), PrivAI);
1114 
1115     Builder.restoreIP(CodeGenIP);
1116     Value *PrivLoad =
1117         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
1118     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
1119     Instruction *ThenTerm, *ElseTerm;
1120     SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm,
1121                                   &ElseTerm);
1122     return Error::success();
1123   };
1124 
1125   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1126                     Value &Orig, Value &Inner,
1127                     Value *&ReplacementValue) -> InsertPointTy {
1128     ++NumPrivatizedVars;
1129 
1130     if (!isa<AllocaInst>(Orig)) {
1131       EXPECT_EQ(&Orig, F->arg_begin());
1132       ReplacementValue = &Inner;
1133       return CodeGenIP;
1134     }
1135 
1136     // Since the original value is an allocation, it has a pointer type and
1137     // therefore no additional wrapping should happen.
1138     EXPECT_EQ(&Orig, &Inner);
1139 
1140     // Trivial copy (=firstprivate).
1141     Builder.restoreIP(AllocaIP);
1142     Type *VTy = ReplacementValue->getType();
1143     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
1144     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
1145     Builder.restoreIP(CodeGenIP);
1146     Builder.CreateStore(V, ReplacementValue);
1147     return CodeGenIP;
1148   };
1149 
1150   auto FiniCB = [&](InsertPointTy CodeGenIP) {
1151     ++NumFinalizationPoints;
1152     // No destructors.
1153     return Error::success();
1154   };
1155 
1156   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1157                                     F->getEntryBlock().getFirstInsertionPt());
1158   ASSERT_EXPECTED_INIT(
1159       OpenMPIRBuilder::InsertPointTy, AfterIP,
1160       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1161                                 Builder.CreateIsNotNull(F->arg_begin()),
1162                                 nullptr, OMP_PROC_BIND_default, false));
1163 
1164   EXPECT_EQ(NumBodiesGenerated, 1U);
1165   EXPECT_EQ(NumPrivatizedVars, 1U);
1166   EXPECT_EQ(NumFinalizationPoints, 1U);
1167 
1168   Builder.restoreIP(AfterIP);
1169   Builder.CreateRetVoid();
1170   OMPBuilder.finalize();
1171 
1172   EXPECT_NE(PrivAI, nullptr);
1173   Function *OutlinedFn = PrivAI->getFunction();
1174   EXPECT_NE(F, OutlinedFn);
1175   EXPECT_FALSE(verifyModule(*M, &errs()));
1176 
1177   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
1178   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
1179 
1180   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
1181   ASSERT_EQ(OutlinedFn->getNumUses(), 1U);
1182 
1183   CallInst *ForkCI = nullptr;
1184   for (User *Usr : OutlinedFn->users()) {
1185     ASSERT_TRUE(isa<CallInst>(Usr));
1186     ForkCI = cast<CallInst>(Usr);
1187   }
1188 
1189   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if");
1190   EXPECT_EQ(ForkCI->arg_size(), 5U);
1191   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1192   EXPECT_EQ(ForkCI->getArgOperand(1),
1193             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
1194   EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx));
1195 }
1196 
1197 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
1198   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1199   OpenMPIRBuilder OMPBuilder(*M);
1200   OMPBuilder.Config.IsTargetDevice = false;
1201   OMPBuilder.initialize();
1202   F->setName("func");
1203   IRBuilder<> Builder(BB);
1204 
1205   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1206   Builder.CreateBr(EnterBB);
1207   Builder.SetInsertPoint(EnterBB);
1208   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1209 
1210   unsigned NumBodiesGenerated = 0;
1211   unsigned NumPrivatizedVars = 0;
1212   unsigned NumFinalizationPoints = 0;
1213 
1214   CallInst *CheckedBarrier = nullptr;
1215   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1216     ++NumBodiesGenerated;
1217 
1218     Builder.restoreIP(CodeGenIP);
1219 
1220     // Create three barriers, two cancel barriers but only one checked.
1221     Function *CBFn, *BFn;
1222 
1223     ASSERT_EXPECTED_INIT(
1224         OpenMPIRBuilder::InsertPointTy, BarrierIP1,
1225         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel));
1226     Builder.restoreIP(BarrierIP1);
1227 
1228     CBFn = M->getFunction("__kmpc_cancel_barrier");
1229     BFn = M->getFunction("__kmpc_barrier");
1230     ASSERT_NE(CBFn, nullptr);
1231     ASSERT_EQ(BFn, nullptr);
1232     ASSERT_EQ(CBFn->getNumUses(), 1U);
1233     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1234     ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
1235     CheckedBarrier = cast<CallInst>(CBFn->user_back());
1236 
1237     ASSERT_EXPECTED_INIT(
1238         OpenMPIRBuilder::InsertPointTy, BarrierIP2,
1239         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true));
1240     Builder.restoreIP(BarrierIP2);
1241     CBFn = M->getFunction("__kmpc_cancel_barrier");
1242     BFn = M->getFunction("__kmpc_barrier");
1243     ASSERT_NE(CBFn, nullptr);
1244     ASSERT_NE(BFn, nullptr);
1245     ASSERT_EQ(CBFn->getNumUses(), 1U);
1246     ASSERT_EQ(BFn->getNumUses(), 1U);
1247     ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
1248     ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
1249 
1250     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, BarrierIP3,
1251                          OMPBuilder.createBarrier(Builder.saveIP(),
1252                                                   OMPD_parallel, false, false));
1253     Builder.restoreIP(BarrierIP3);
1254     ASSERT_EQ(CBFn->getNumUses(), 2U);
1255     ASSERT_EQ(BFn->getNumUses(), 1U);
1256     ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
1257     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1258     ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
1259   };
1260 
1261   auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
1262                     Value *&) -> InsertPointTy {
1263     ++NumPrivatizedVars;
1264     llvm_unreachable("No privatization callback call expected!");
1265   };
1266 
1267   FunctionType *FakeDestructorTy =
1268       FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
1269                         /*isVarArg=*/false);
1270   auto *FakeDestructor = Function::Create(
1271       FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
1272 
1273   auto FiniCB = [&](InsertPointTy IP) {
1274     ++NumFinalizationPoints;
1275     Builder.restoreIP(IP);
1276     Builder.CreateCall(FakeDestructor,
1277                        {Builder.getInt32(NumFinalizationPoints)});
1278     return Error::success();
1279   };
1280 
1281   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1282                                     F->getEntryBlock().getFirstInsertionPt());
1283   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1284                        OMPBuilder.createParallel(
1285                            Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB,
1286                            FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
1287                            nullptr, OMP_PROC_BIND_default, true));
1288 
1289   EXPECT_EQ(NumBodiesGenerated, 1U);
1290   EXPECT_EQ(NumPrivatizedVars, 0U);
1291   EXPECT_EQ(NumFinalizationPoints, 2U);
1292   EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
1293 
1294   Builder.restoreIP(AfterIP);
1295   Builder.CreateRetVoid();
1296   OMPBuilder.finalize();
1297 
1298   EXPECT_FALSE(verifyModule(*M, &errs()));
1299 
1300   BasicBlock *ExitBB = nullptr;
1301   for (const User *Usr : FakeDestructor->users()) {
1302     const CallInst *CI = dyn_cast<CallInst>(Usr);
1303     ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
1304     ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
1305     ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
1306     if (ExitBB)
1307       ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
1308     else
1309       ExitBB = CI->getNextNode()->getSuccessor(0);
1310     ASSERT_EQ(ExitBB->size(), 1U);
1311     if (!isa<ReturnInst>(ExitBB->front())) {
1312       ASSERT_TRUE(isa<BranchInst>(ExitBB->front()));
1313       ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U);
1314       ASSERT_TRUE(isa<ReturnInst>(
1315           cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front()));
1316     }
1317   }
1318 }
1319 
1320 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
1321   OpenMPIRBuilder OMPBuilder(*M);
1322   OMPBuilder.Config.IsTargetDevice = false;
1323   OMPBuilder.initialize();
1324   F->setName("func");
1325   IRBuilder<> Builder(BB);
1326   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1327   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1328 
1329   Type *I32Ty = Type::getInt32Ty(M->getContext());
1330   Type *PtrTy = PointerType::get(M->getContext(), 0);
1331   Type *StructTy = StructType::get(I32Ty, PtrTy);
1332   Type *VoidTy = Type::getVoidTy(M->getContext());
1333   FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
1334   FunctionCallee TakeI32Func =
1335       M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
1336   FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy);
1337   FunctionCallee TakeI32PtrFunc =
1338       M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy);
1339   FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
1340   FunctionCallee TakeStructFunc =
1341       M->getOrInsertFunction("take_struct", VoidTy, StructTy);
1342   FunctionCallee RetStructPtrFunc =
1343       M->getOrInsertFunction("ret_structptr", PtrTy);
1344   FunctionCallee TakeStructPtrFunc =
1345       M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy);
1346   Value *I32Val = Builder.CreateCall(RetI32Func);
1347   Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
1348   Value *StructVal = Builder.CreateCall(RetStructFunc);
1349   Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
1350 
1351   Instruction *Internal;
1352   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1353     IRBuilder<>::InsertPointGuard Guard(Builder);
1354     Builder.restoreIP(CodeGenIP);
1355     Internal = Builder.CreateCall(TakeI32Func, I32Val);
1356     Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
1357     Builder.CreateCall(TakeStructFunc, StructVal);
1358     Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
1359     return Error::success();
1360   };
1361   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1362                     Value &Inner, Value *&ReplacementValue) {
1363     ReplacementValue = &Inner;
1364     return CodeGenIP;
1365   };
1366   auto FiniCB = [](InsertPointTy) { return Error::success(); };
1367 
1368   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1369                                     F->getEntryBlock().getFirstInsertionPt());
1370   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1371                        OMPBuilder.createParallel(
1372                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
1373                            nullptr, OMP_PROC_BIND_default, false));
1374   Builder.restoreIP(AfterIP);
1375   Builder.CreateRetVoid();
1376 
1377   OMPBuilder.finalize();
1378 
1379   EXPECT_FALSE(verifyModule(*M, &errs()));
1380   Function *OutlinedFn = Internal->getFunction();
1381 
1382   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
1383   EXPECT_TRUE(Arg2Type->isPointerTy());
1384 }
1385 
1386 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
1387   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1388   OpenMPIRBuilder OMPBuilder(*M);
1389   OMPBuilder.initialize();
1390   IRBuilder<> Builder(BB);
1391   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1392   Value *TripCount = F->getArg(0);
1393 
1394   unsigned NumBodiesGenerated = 0;
1395   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1396     NumBodiesGenerated += 1;
1397 
1398     Builder.restoreIP(CodeGenIP);
1399 
1400     Value *Cmp = Builder.CreateICmpEQ(LC, TripCount);
1401     Instruction *ThenTerm, *ElseTerm;
1402     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
1403                                   &ThenTerm, &ElseTerm);
1404     return Error::success();
1405   };
1406 
1407   ASSERT_EXPECTED_INIT(
1408       CanonicalLoopInfo *, Loop,
1409       OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount));
1410 
1411   Builder.restoreIP(Loop->getAfterIP());
1412   ReturnInst *RetInst = Builder.CreateRetVoid();
1413   OMPBuilder.finalize();
1414 
1415   Loop->assertOK();
1416   EXPECT_FALSE(verifyModule(*M, &errs()));
1417 
1418   EXPECT_EQ(NumBodiesGenerated, 1U);
1419 
1420   // Verify control flow structure (in addition to Loop->assertOK()).
1421   EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock());
1422   EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock());
1423 
1424   Instruction *IndVar = Loop->getIndVar();
1425   EXPECT_TRUE(isa<PHINode>(IndVar));
1426   EXPECT_EQ(IndVar->getType(), TripCount->getType());
1427   EXPECT_EQ(IndVar->getParent(), Loop->getHeader());
1428 
1429   EXPECT_EQ(Loop->getTripCount(), TripCount);
1430 
1431   BasicBlock *Body = Loop->getBody();
1432   Instruction *CmpInst = &Body->front();
1433   EXPECT_TRUE(isa<ICmpInst>(CmpInst));
1434   EXPECT_EQ(CmpInst->getOperand(0), IndVar);
1435 
1436   BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor();
1437   EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) {
1438     return SuccBB->getSingleSuccessor() == LatchPred;
1439   }));
1440 
1441   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
1442 }
1443 
1444 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
1445   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1446   OpenMPIRBuilder OMPBuilder(*M);
1447   OMPBuilder.initialize();
1448   IRBuilder<> Builder(BB);
1449 
1450   // Check the trip count is computed correctly. We generate the canonical loop
1451   // but rely on the IRBuilder's constant folder to compute the final result
1452   // since all inputs are constant. To verify overflow situations, limit the
1453   // trip count / loop counter widths to 16 bits.
1454   auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1455                            bool IsSigned, bool InclusiveStop) -> int64_t {
1456     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1457     Type *LCTy = Type::getInt16Ty(Ctx);
1458     Value *StartVal = ConstantInt::get(LCTy, Start);
1459     Value *StopVal = ConstantInt::get(LCTy, Stop);
1460     Value *StepVal = ConstantInt::get(LCTy, Step);
1461     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1462       return Error::success();
1463     };
1464     ASSERT_EXPECTED_INIT_RETURN(
1465         CanonicalLoopInfo *, Loop,
1466         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1467                                        StepVal, IsSigned, InclusiveStop),
1468         -1);
1469     Loop->assertOK();
1470     Builder.restoreIP(Loop->getAfterIP());
1471     Value *TripCount = Loop->getTripCount();
1472     return cast<ConstantInt>(TripCount)->getValue().getZExtValue();
1473   };
1474 
1475   EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0);
1476   EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1);
1477   EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42);
1478   EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21);
1479   EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21);
1480   EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1);
1481   EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2);
1482   EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3);
1483   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF);
1484   EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0);
1485   EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1);
1486   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100);
1487   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1);
1488 
1489   EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2);
1490   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2);
1491   EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1);
1492   EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1);
1493   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF);
1494   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000);
1495 
1496   EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0);
1497   EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0);
1498   EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3);
1499   EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4);
1500   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1);
1501   EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1);
1502   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2);
1503 
1504   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000);
1505   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001);
1506   EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF);
1507   EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF);
1508   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2);
1509   EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF);
1510   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000);
1511   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800);
1512   EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF);
1513   EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1);
1514   EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2);
1515 
1516   // Finalize the function and verify it.
1517   Builder.CreateRetVoid();
1518   OMPBuilder.finalize();
1519   EXPECT_FALSE(verifyModule(*M, &errs()));
1520 }
1521 
1522 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
1523   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1524   OpenMPIRBuilder OMPBuilder(*M);
1525   OMPBuilder.initialize();
1526   F->setName("func");
1527 
1528   IRBuilder<> Builder(BB);
1529 
1530   Type *LCTy = F->getArg(0)->getType();
1531   Constant *One = ConstantInt::get(LCTy, 1);
1532   Constant *Two = ConstantInt::get(LCTy, 2);
1533   Value *OuterTripCount =
1534       Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer");
1535   Value *InnerTripCount =
1536       Builder.CreateAdd(F->getArg(0), One, "tripcount.inner");
1537 
1538   // Fix an insertion point for ComputeIP.
1539   BasicBlock *LoopNextEnter =
1540       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1541                          Builder.GetInsertBlock()->getNextNode());
1542   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1543   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1544 
1545   Builder.SetInsertPoint(LoopNextEnter);
1546   OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL);
1547 
1548   CanonicalLoopInfo *InnerLoop = nullptr;
1549   CallInst *InbetweenLead = nullptr;
1550   CallInst *InbetweenTrail = nullptr;
1551   CallInst *Call = nullptr;
1552   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) {
1553     Builder.restoreIP(OuterCodeGenIP);
1554     InbetweenLead =
1555         createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC});
1556 
1557     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1558                                   Value *InnerLC) {
1559       Builder.restoreIP(InnerCodeGenIP);
1560       Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
1561       return Error::success();
1562     };
1563     ASSERT_EXPECTED_INIT(
1564         CanonicalLoopInfo *, InnerLoopResult,
1565         OMPBuilder.createCanonicalLoop(Builder.saveIP(), InnerLoopBodyGenCB,
1566                                        InnerTripCount, "inner"));
1567     InnerLoop = InnerLoopResult;
1568 
1569     Builder.restoreIP(InnerLoop->getAfterIP());
1570     InbetweenTrail =
1571         createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
1572   };
1573   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop,
1574                        OMPBuilder.createCanonicalLoop(
1575                            OuterLoc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB),
1576                            OuterTripCount, "outer"));
1577 
1578   // Finish the function.
1579   Builder.restoreIP(OuterLoop->getAfterIP());
1580   Builder.CreateRetVoid();
1581 
1582   CanonicalLoopInfo *Collapsed =
1583       OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP);
1584 
1585   OMPBuilder.finalize();
1586   EXPECT_FALSE(verifyModule(*M, &errs()));
1587 
1588   // Verify control flow and BB order.
1589   BasicBlock *RefOrder[] = {
1590       Collapsed->getPreheader(),   Collapsed->getHeader(),
1591       Collapsed->getCond(),        Collapsed->getBody(),
1592       InbetweenLead->getParent(),  Call->getParent(),
1593       InbetweenTrail->getParent(), Collapsed->getLatch(),
1594       Collapsed->getExit(),        Collapsed->getAfter(),
1595   };
1596   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1597   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1598 
1599   // Verify the total trip count.
1600   auto *TripCount = cast<MulOperator>(Collapsed->getTripCount());
1601   EXPECT_EQ(TripCount->getOperand(0), OuterTripCount);
1602   EXPECT_EQ(TripCount->getOperand(1), InnerTripCount);
1603 
1604   // Verify the changed indvar.
1605   auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1));
1606   EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv);
1607   EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody());
1608   EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount);
1609   EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar());
1610 
1611   auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2));
1612   EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem);
1613   EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody());
1614   EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar());
1615   EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount);
1616 
1617   EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV);
1618   EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV);
1619 }
1620 
1621 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
1622   OpenMPIRBuilder OMPBuilder(*M);
1623   CallInst *Call;
1624   BasicBlock *BodyCode;
1625   CanonicalLoopInfo *Loop =
1626       buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode);
1627   ASSERT_NE(Loop, nullptr);
1628 
1629   Instruction *OrigIndVar = Loop->getIndVar();
1630   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
1631 
1632   // Tile the loop.
1633   Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
1634   std::vector<CanonicalLoopInfo *> GenLoops =
1635       OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
1636 
1637   OMPBuilder.finalize();
1638   EXPECT_FALSE(verifyModule(*M, &errs()));
1639 
1640   EXPECT_EQ(GenLoops.size(), 2u);
1641   CanonicalLoopInfo *Floor = GenLoops[0];
1642   CanonicalLoopInfo *Tile = GenLoops[1];
1643 
1644   BasicBlock *RefOrder[] = {
1645       Floor->getPreheader(), Floor->getHeader(),   Floor->getCond(),
1646       Floor->getBody(),      Tile->getPreheader(), Tile->getHeader(),
1647       Tile->getCond(),       Tile->getBody(),      BodyCode,
1648       Tile->getLatch(),      Tile->getExit(),      Tile->getAfter(),
1649       Floor->getLatch(),     Floor->getExit(),     Floor->getAfter(),
1650   };
1651   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1652   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1653 
1654   // Check the induction variable.
1655   EXPECT_EQ(Call->getParent(), BodyCode);
1656   auto *Shift = cast<AddOperator>(Call->getOperand(1));
1657   EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
1658   EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
1659   auto *Scale = cast<MulOperator>(Shift->getOperand(0));
1660   EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
1661   EXPECT_EQ(Scale->getOperand(0), TileSize);
1662   EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
1663 }
1664 
1665 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
1666   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1667   OpenMPIRBuilder OMPBuilder(*M);
1668   OMPBuilder.initialize();
1669   F->setName("func");
1670 
1671   IRBuilder<> Builder(BB);
1672   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1673   Value *TripCount = F->getArg(0);
1674   Type *LCTy = TripCount->getType();
1675 
1676   BasicBlock *BodyCode = nullptr;
1677   CanonicalLoopInfo *InnerLoop = nullptr;
1678   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1679                                 llvm::Value *OuterLC) {
1680     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1681                                   llvm::Value *InnerLC) {
1682       Builder.restoreIP(InnerCodeGenIP);
1683       BodyCode = Builder.GetInsertBlock();
1684 
1685       // Add something that consumes the induction variables to the body.
1686       createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1687       return Error::success();
1688     };
1689     ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, InnerLoopResult,
1690                          OMPBuilder.createCanonicalLoop(OuterCodeGenIP,
1691                                                         InnerLoopBodyGenCB,
1692                                                         TripCount, "inner"));
1693     InnerLoop = InnerLoopResult;
1694   };
1695   ASSERT_EXPECTED_INIT(
1696       CanonicalLoopInfo *, OuterLoop,
1697       OMPBuilder.createCanonicalLoop(
1698           Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), TripCount, "outer"));
1699 
1700   // Finalize the function.
1701   Builder.restoreIP(OuterLoop->getAfterIP());
1702   Builder.CreateRetVoid();
1703 
1704   // Tile to loop nest.
1705   Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
1706   Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
1707   std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
1708       DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
1709 
1710   OMPBuilder.finalize();
1711   EXPECT_FALSE(verifyModule(*M, &errs()));
1712 
1713   EXPECT_EQ(GenLoops.size(), 4u);
1714   CanonicalLoopInfo *Floor1 = GenLoops[0];
1715   CanonicalLoopInfo *Floor2 = GenLoops[1];
1716   CanonicalLoopInfo *Tile1 = GenLoops[2];
1717   CanonicalLoopInfo *Tile2 = GenLoops[3];
1718 
1719   BasicBlock *RefOrder[] = {
1720       Floor1->getPreheader(),
1721       Floor1->getHeader(),
1722       Floor1->getCond(),
1723       Floor1->getBody(),
1724       Floor2->getPreheader(),
1725       Floor2->getHeader(),
1726       Floor2->getCond(),
1727       Floor2->getBody(),
1728       Tile1->getPreheader(),
1729       Tile1->getHeader(),
1730       Tile1->getCond(),
1731       Tile1->getBody(),
1732       Tile2->getPreheader(),
1733       Tile2->getHeader(),
1734       Tile2->getCond(),
1735       Tile2->getBody(),
1736       BodyCode,
1737       Tile2->getLatch(),
1738       Tile2->getExit(),
1739       Tile2->getAfter(),
1740       Tile1->getLatch(),
1741       Tile1->getExit(),
1742       Tile1->getAfter(),
1743       Floor2->getLatch(),
1744       Floor2->getExit(),
1745       Floor2->getAfter(),
1746       Floor1->getLatch(),
1747       Floor1->getExit(),
1748       Floor1->getAfter(),
1749   };
1750   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1751   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1752 }
1753 
1754 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
1755   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1756   OpenMPIRBuilder OMPBuilder(*M);
1757   OMPBuilder.initialize();
1758   F->setName("func");
1759 
1760   IRBuilder<> Builder(BB);
1761   Value *TripCount = F->getArg(0);
1762   Type *LCTy = TripCount->getType();
1763 
1764   Value *OuterStartVal = ConstantInt::get(LCTy, 2);
1765   Value *OuterStopVal = TripCount;
1766   Value *OuterStep = ConstantInt::get(LCTy, 5);
1767   Value *InnerStartVal = ConstantInt::get(LCTy, 13);
1768   Value *InnerStopVal = TripCount;
1769   Value *InnerStep = ConstantInt::get(LCTy, 3);
1770 
1771   // Fix an insertion point for ComputeIP.
1772   BasicBlock *LoopNextEnter =
1773       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1774                          Builder.GetInsertBlock()->getNextNode());
1775   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1776   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1777 
1778   InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
1779   OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
1780 
1781   BasicBlock *BodyCode = nullptr;
1782   CanonicalLoopInfo *InnerLoop = nullptr;
1783   CallInst *Call = nullptr;
1784   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1785                                 llvm::Value *OuterLC) {
1786     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1787                                   llvm::Value *InnerLC) {
1788       Builder.restoreIP(InnerCodeGenIP);
1789       BodyCode = Builder.GetInsertBlock();
1790 
1791       // Add something that consumes the induction variable to the body.
1792       Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1793       return Error::success();
1794     };
1795     ASSERT_EXPECTED_INIT(
1796         CanonicalLoopInfo *, InnerLoopResult,
1797         OMPBuilder.createCanonicalLoop(OuterCodeGenIP, InnerLoopBodyGenCB,
1798                                        InnerStartVal, InnerStopVal, InnerStep,
1799                                        false, false, ComputeIP, "inner"));
1800     InnerLoop = InnerLoopResult;
1801   };
1802   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop,
1803                        OMPBuilder.createCanonicalLoop(
1804                            Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB),
1805                            OuterStartVal, OuterStopVal, OuterStep, false, false,
1806                            ComputeIP, "outer"));
1807 
1808   // Finalize the function
1809   Builder.restoreIP(OuterLoop->getAfterIP());
1810   Builder.CreateRetVoid();
1811 
1812   // Tile the loop nest.
1813   Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
1814   Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
1815   std::vector<CanonicalLoopInfo *> GenLoops =
1816       OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
1817 
1818   OMPBuilder.finalize();
1819   EXPECT_FALSE(verifyModule(*M, &errs()));
1820 
1821   EXPECT_EQ(GenLoops.size(), 4u);
1822   CanonicalLoopInfo *Floor0 = GenLoops[0];
1823   CanonicalLoopInfo *Floor1 = GenLoops[1];
1824   CanonicalLoopInfo *Tile0 = GenLoops[2];
1825   CanonicalLoopInfo *Tile1 = GenLoops[3];
1826 
1827   BasicBlock *RefOrder[] = {
1828       Floor0->getPreheader(),
1829       Floor0->getHeader(),
1830       Floor0->getCond(),
1831       Floor0->getBody(),
1832       Floor1->getPreheader(),
1833       Floor1->getHeader(),
1834       Floor1->getCond(),
1835       Floor1->getBody(),
1836       Tile0->getPreheader(),
1837       Tile0->getHeader(),
1838       Tile0->getCond(),
1839       Tile0->getBody(),
1840       Tile1->getPreheader(),
1841       Tile1->getHeader(),
1842       Tile1->getCond(),
1843       Tile1->getBody(),
1844       BodyCode,
1845       Tile1->getLatch(),
1846       Tile1->getExit(),
1847       Tile1->getAfter(),
1848       Tile0->getLatch(),
1849       Tile0->getExit(),
1850       Tile0->getAfter(),
1851       Floor1->getLatch(),
1852       Floor1->getExit(),
1853       Floor1->getAfter(),
1854       Floor0->getLatch(),
1855       Floor0->getExit(),
1856       Floor0->getAfter(),
1857   };
1858   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1859   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1860 
1861   EXPECT_EQ(Call->getParent(), BodyCode);
1862 
1863   auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
1864   EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
1865   auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
1866   EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
1867   auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
1868   EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
1869   EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
1870   auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
1871   EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
1872   EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
1873   EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
1874 
1875   auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
1876   EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
1877   EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
1878   auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
1879   EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
1880   EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
1881   auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
1882   EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
1883   EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
1884   auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
1885   EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
1886   EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
1887   EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
1888 }
1889 
1890 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
1891   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1892   OpenMPIRBuilder OMPBuilder(*M);
1893   OMPBuilder.initialize();
1894   IRBuilder<> Builder(BB);
1895 
1896   // Create a loop, tile it, and extract its trip count. All input values are
1897   // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
1898   // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
1899   // do the same for the tile loop.
1900   auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1901                            bool IsSigned, bool InclusiveStop,
1902                            int64_t TileSize) -> uint64_t {
1903     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
1904     Type *LCTy = Type::getInt16Ty(Ctx);
1905     Value *StartVal = ConstantInt::get(LCTy, Start);
1906     Value *StopVal = ConstantInt::get(LCTy, Stop);
1907     Value *StepVal = ConstantInt::get(LCTy, Step);
1908 
1909     // Generate a loop.
1910     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1911       return Error::success();
1912     };
1913     ASSERT_EXPECTED_INIT_RETURN(
1914         CanonicalLoopInfo *, Loop,
1915         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1916                                        StepVal, IsSigned, InclusiveStop),
1917         (unsigned)-1);
1918     InsertPointTy AfterIP = Loop->getAfterIP();
1919 
1920     // Tile the loop.
1921     Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
1922     std::vector<CanonicalLoopInfo *> GenLoops =
1923         OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
1924 
1925     // Set the insertion pointer to after loop, where the next loop will be
1926     // emitted.
1927     Builder.restoreIP(AfterIP);
1928 
1929     // Extract the trip count.
1930     CanonicalLoopInfo *FloorLoop = GenLoops[0];
1931     Value *FloorTripCount = FloorLoop->getTripCount();
1932     return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
1933   };
1934 
1935   // Empty iteration domain.
1936   EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u);
1937   EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u);
1938   EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u);
1939   EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u);
1940   EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u);
1941 
1942   // Only complete tiles.
1943   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1944   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1945   EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u);
1946   EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u);
1947   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u);
1948   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u);
1949 
1950   // Only a partial tile.
1951   EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u);
1952   EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u);
1953   EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u);
1954   EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u);
1955   EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u);
1956 
1957   // Complete and partial tiles.
1958   EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u);
1959   EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u);
1960   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u);
1961   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u);
1962   EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u);
1963 
1964   // Close to 16-bit integer range.
1965   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu);
1966   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1);
1967   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1);
1968   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1);
1969   EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1);
1970   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u);
1971   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u);
1972 
1973   // Finalize the function.
1974   Builder.CreateRetVoid();
1975   OMPBuilder.finalize();
1976 
1977   EXPECT_FALSE(verifyModule(*M, &errs()));
1978 }
1979 
1980 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
1981   OpenMPIRBuilder OMPBuilder(*M);
1982   MapVector<Value *, Value *> AlignedVars;
1983   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1984   ASSERT_NE(CLI, nullptr);
1985 
1986   // Simd-ize the loop.
1987   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
1988                        OrderKind::OMP_ORDER_unknown,
1989                        /* Simdlen */ nullptr,
1990                        /* Safelen */ nullptr);
1991 
1992   OMPBuilder.finalize();
1993   EXPECT_FALSE(verifyModule(*M, &errs()));
1994 
1995   PassBuilder PB;
1996   FunctionAnalysisManager FAM;
1997   PB.registerFunctionAnalyses(FAM);
1998   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1999 
2000   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2001   EXPECT_EQ(TopLvl.size(), 1u);
2002 
2003   Loop *L = TopLvl.front();
2004   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2005   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2006 
2007   // Check for llvm.access.group metadata attached to the printf
2008   // function in the loop body.
2009   BasicBlock *LoopBody = CLI->getBody();
2010   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2011     return I.getMetadata("llvm.access.group") != nullptr;
2012   }));
2013 }
2014 
2015 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
2016   OpenMPIRBuilder OMPBuilder(*M);
2017   IRBuilder<> Builder(BB);
2018   const int AlignmentValue = 32;
2019   llvm::BasicBlock *sourceBlock = Builder.GetInsertBlock();
2020   AllocaInst *Alloc1 =
2021       Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1));
2022   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
2023   MapVector<Value *, Value *> AlignedVars;
2024   AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)});
2025 
2026   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2027   ASSERT_NE(CLI, nullptr);
2028 
2029   // Simd-ize the loop.
2030   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
2031                        OrderKind::OMP_ORDER_unknown,
2032                        /* Simdlen */ nullptr,
2033                        /* Safelen */ nullptr);
2034 
2035   OMPBuilder.finalize();
2036   EXPECT_FALSE(verifyModule(*M, &errs()));
2037 
2038   PassBuilder PB;
2039   FunctionAnalysisManager FAM;
2040   PB.registerFunctionAnalyses(FAM);
2041   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2042 
2043   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2044   EXPECT_EQ(TopLvl.size(), 1u);
2045 
2046   Loop *L = TopLvl.front();
2047   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2048   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2049 
2050   // Check for llvm.access.group metadata attached to the printf
2051   // function in the loop body.
2052   BasicBlock *LoopBody = CLI->getBody();
2053   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2054     return I.getMetadata("llvm.access.group") != nullptr;
2055   }));
2056 
2057   // Check if number of assumption instructions is equal to number of aligned
2058   // variables
2059   size_t NumAssummptionCallsInPreheader =
2060       count_if(*sourceBlock, [](Instruction &I) { return isa<AssumeInst>(I); });
2061   EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size());
2062 
2063   // Check if variables are correctly aligned
2064   for (Instruction &Instr : *sourceBlock) {
2065     if (!isa<AssumeInst>(Instr))
2066       continue;
2067     AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr);
2068     if (AssumeInstruction->getNumTotalBundleOperands()) {
2069       auto Bundle = AssumeInstruction->getOperandBundleAt(0);
2070       if (Bundle.getTagName() == "align") {
2071         EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1]));
2072         auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]);
2073         EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue);
2074       }
2075     }
2076   }
2077 }
2078 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
2079   OpenMPIRBuilder OMPBuilder(*M);
2080   MapVector<Value *, Value *> AlignedVars;
2081   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2082   ASSERT_NE(CLI, nullptr);
2083 
2084   // Simd-ize the loop.
2085   OMPBuilder.applySimd(CLI, AlignedVars,
2086                        /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
2087                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
2088                        /* Safelen */ nullptr);
2089 
2090   OMPBuilder.finalize();
2091   EXPECT_FALSE(verifyModule(*M, &errs()));
2092 
2093   PassBuilder PB;
2094   FunctionAnalysisManager FAM;
2095   PB.registerFunctionAnalyses(FAM);
2096   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2097 
2098   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2099   EXPECT_EQ(TopLvl.size(), 1u);
2100 
2101   Loop *L = TopLvl.front();
2102   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2103   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2104   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2105 
2106   // Check for llvm.access.group metadata attached to the printf
2107   // function in the loop body.
2108   BasicBlock *LoopBody = CLI->getBody();
2109   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2110     return I.getMetadata("llvm.access.group") != nullptr;
2111   }));
2112 }
2113 
2114 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
2115   OpenMPIRBuilder OMPBuilder(*M);
2116   MapVector<Value *, Value *> AlignedVars;
2117 
2118   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2119   ASSERT_NE(CLI, nullptr);
2120 
2121   // Simd-ize the loop.
2122   OMPBuilder.applySimd(
2123       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
2124       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2125 
2126   OMPBuilder.finalize();
2127   EXPECT_FALSE(verifyModule(*M, &errs()));
2128 
2129   PassBuilder PB;
2130   FunctionAnalysisManager FAM;
2131   PB.registerFunctionAnalyses(FAM);
2132   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2133 
2134   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2135   EXPECT_EQ(TopLvl.size(), 1u);
2136 
2137   Loop *L = TopLvl.front();
2138   // Parallel metadata shoudl be attached because of presence of
2139   // the order(concurrent) OpenMP clause
2140   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2141   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2142   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2143 
2144   // Check for llvm.access.group metadata attached to the printf
2145   // function in the loop body.
2146   BasicBlock *LoopBody = CLI->getBody();
2147   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2148     return I.getMetadata("llvm.access.group") != nullptr;
2149   }));
2150 }
2151 
2152 TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
2153   OpenMPIRBuilder OMPBuilder(*M);
2154   MapVector<Value *, Value *> AlignedVars;
2155 
2156   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2157   ASSERT_NE(CLI, nullptr);
2158 
2159   OMPBuilder.applySimd(
2160       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
2161       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2162 
2163   OMPBuilder.finalize();
2164   EXPECT_FALSE(verifyModule(*M, &errs()));
2165 
2166   PassBuilder PB;
2167   FunctionAnalysisManager FAM;
2168   PB.registerFunctionAnalyses(FAM);
2169   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2170 
2171   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2172   EXPECT_EQ(TopLvl.size(), 1u);
2173 
2174   Loop *L = TopLvl.front();
2175   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2176   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2177   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2178 
2179   // Check for llvm.access.group metadata attached to the printf
2180   // function in the loop body.
2181   BasicBlock *LoopBody = CLI->getBody();
2182   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2183     return I.getMetadata("llvm.access.group") != nullptr;
2184   }));
2185 }
2186 
2187 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
2188   OpenMPIRBuilder OMPBuilder(*M);
2189   MapVector<Value *, Value *> AlignedVars;
2190 
2191   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2192   ASSERT_NE(CLI, nullptr);
2193 
2194   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
2195                        OrderKind::OMP_ORDER_unknown,
2196                        ConstantInt::get(Type::getInt32Ty(Ctx), 2),
2197                        ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2198 
2199   OMPBuilder.finalize();
2200   EXPECT_FALSE(verifyModule(*M, &errs()));
2201 
2202   PassBuilder PB;
2203   FunctionAnalysisManager FAM;
2204   PB.registerFunctionAnalyses(FAM);
2205   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2206 
2207   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2208   EXPECT_EQ(TopLvl.size(), 1u);
2209 
2210   Loop *L = TopLvl.front();
2211   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2212   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2213   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2);
2214 
2215   // Check for llvm.access.group metadata attached to the printf
2216   // function in the loop body.
2217   BasicBlock *LoopBody = CLI->getBody();
2218   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2219     return I.getMetadata("llvm.access.group") != nullptr;
2220   }));
2221 }
2222 
2223 TEST_F(OpenMPIRBuilderTest, ApplySimdIf) {
2224   OpenMPIRBuilder OMPBuilder(*M);
2225   IRBuilder<> Builder(BB);
2226   MapVector<Value *, Value *> AlignedVars;
2227   AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
2228   AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty());
2229 
2230   // Generation of if condition
2231   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1);
2232   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2);
2233   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
2234   LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2);
2235 
2236   Value *IfCmp = Builder.CreateICmpNE(Load1, Load2);
2237 
2238   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2239   ASSERT_NE(CLI, nullptr);
2240 
2241   // Simd-ize the loop with if condition
2242   OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown,
2243                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
2244                        /* Safelen */ nullptr);
2245 
2246   OMPBuilder.finalize();
2247   EXPECT_FALSE(verifyModule(*M, &errs()));
2248 
2249   PassBuilder PB;
2250   FunctionAnalysisManager FAM;
2251   PB.registerFunctionAnalyses(FAM);
2252   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2253 
2254   // Check if there are two loops (one with enabled vectorization)
2255   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2256   EXPECT_EQ(TopLvl.size(), 2u);
2257 
2258   Loop *L = TopLvl[0];
2259   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2260   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2261   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2262 
2263   // The second loop should have disabled vectorization
2264   L = TopLvl[1];
2265   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2266   EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2267   // Check for llvm.access.group metadata attached to the printf
2268   // function in the loop body.
2269   BasicBlock *LoopBody = CLI->getBody();
2270   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2271     return I.getMetadata("llvm.access.group") != nullptr;
2272   }));
2273 }
2274 
2275 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
2276   OpenMPIRBuilder OMPBuilder(*M);
2277 
2278   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2279   ASSERT_NE(CLI, nullptr);
2280 
2281   // Unroll the loop.
2282   OMPBuilder.unrollLoopFull(DL, CLI);
2283 
2284   OMPBuilder.finalize();
2285   EXPECT_FALSE(verifyModule(*M, &errs()));
2286 
2287   PassBuilder PB;
2288   FunctionAnalysisManager FAM;
2289   PB.registerFunctionAnalyses(FAM);
2290   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2291 
2292   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2293   EXPECT_EQ(TopLvl.size(), 1u);
2294 
2295   Loop *L = TopLvl.front();
2296   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2297   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full"));
2298 }
2299 
2300 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
2301   OpenMPIRBuilder OMPBuilder(*M);
2302   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2303   ASSERT_NE(CLI, nullptr);
2304 
2305   // Unroll the loop.
2306   CanonicalLoopInfo *UnrolledLoop = nullptr;
2307   OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop);
2308   ASSERT_NE(UnrolledLoop, nullptr);
2309 
2310   OMPBuilder.finalize();
2311   EXPECT_FALSE(verifyModule(*M, &errs()));
2312   UnrolledLoop->assertOK();
2313 
2314   PassBuilder PB;
2315   FunctionAnalysisManager FAM;
2316   PB.registerFunctionAnalyses(FAM);
2317   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2318 
2319   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2320   EXPECT_EQ(TopLvl.size(), 1u);
2321   Loop *Outer = TopLvl.front();
2322   EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader());
2323   EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch());
2324   EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond());
2325   EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit());
2326 
2327   EXPECT_EQ(Outer->getSubLoops().size(), 1u);
2328   Loop *Inner = Outer->getSubLoops().front();
2329 
2330   EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable"));
2331   EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5);
2332 }
2333 
2334 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
2335   OpenMPIRBuilder OMPBuilder(*M);
2336 
2337   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2338   ASSERT_NE(CLI, nullptr);
2339 
2340   // Unroll the loop.
2341   OMPBuilder.unrollLoopHeuristic(DL, CLI);
2342 
2343   OMPBuilder.finalize();
2344   EXPECT_FALSE(verifyModule(*M, &errs()));
2345 
2346   PassBuilder PB;
2347   FunctionAnalysisManager FAM;
2348   PB.registerFunctionAnalyses(FAM);
2349   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2350 
2351   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2352   EXPECT_EQ(TopLvl.size(), 1u);
2353 
2354   Loop *L = TopLvl.front();
2355   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2356 }
2357 
2358 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) {
2359   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2360   std::string oldDLStr = M->getDataLayoutStr();
2361   M->setDataLayout(
2362       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
2363       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
2364       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
2365   OpenMPIRBuilder OMPBuilder(*M);
2366   OMPBuilder.Config.IsTargetDevice = true;
2367   OMPBuilder.initialize();
2368   IRBuilder<> Builder(BB);
2369   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2370   InsertPointTy AllocaIP = Builder.saveIP();
2371 
2372   Type *LCTy = Type::getInt32Ty(Ctx);
2373   Value *StartVal = ConstantInt::get(LCTy, 10);
2374   Value *StopVal = ConstantInt::get(LCTy, 52);
2375   Value *StepVal = ConstantInt::get(LCTy, 2);
2376   auto LoopBodyGen = [&](InsertPointTy, Value *) { return Error::success(); };
2377 
2378   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2379                        OMPBuilder.createCanonicalLoop(Loc, LoopBodyGen,
2380                                                       StartVal, StopVal,
2381                                                       StepVal, false, false));
2382   BasicBlock *Preheader = CLI->getPreheader();
2383   Value *TripCount = CLI->getTripCount();
2384 
2385   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2386 
2387   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2388                        OMPBuilder.applyWorkshareLoop(
2389                            DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static,
2390                            nullptr, false, false, false, false,
2391                            WorksharingLoopType::ForStaticLoop));
2392   Builder.restoreIP(AfterIP);
2393   Builder.CreateRetVoid();
2394 
2395   OMPBuilder.finalize();
2396   EXPECT_FALSE(verifyModule(*M, &errs()));
2397 
2398   CallInst *WorkshareLoopRuntimeCall = nullptr;
2399   int WorkshareLoopRuntimeCallCnt = 0;
2400   for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) {
2401     CallInst *Call = dyn_cast<CallInst>(Inst);
2402     if (!Call)
2403       continue;
2404     if (!Call->getCalledFunction())
2405       continue;
2406 
2407     if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") {
2408       WorkshareLoopRuntimeCall = Call;
2409       WorkshareLoopRuntimeCallCnt++;
2410     }
2411   }
2412   EXPECT_NE(WorkshareLoopRuntimeCall, nullptr);
2413   // Verify that there is only one call to workshare loop function
2414   EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1);
2415   // Check that pointer to loop body function is passed as second argument
2416   Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1);
2417   EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType());
2418   Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg);
2419   EXPECT_NE(ArgFunction, nullptr);
2420   EXPECT_EQ(ArgFunction->arg_size(), 1u);
2421   EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType());
2422   // Check that no variables except for loop counter are used in loop body
2423   EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()),
2424             WorkshareLoopRuntimeCall->getArgOperand(2));
2425   // Check loop trip count argument
2426   EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3));
2427 }
2428 
2429 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
2430   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2431   OpenMPIRBuilder OMPBuilder(*M);
2432   OMPBuilder.Config.IsTargetDevice = false;
2433   OMPBuilder.initialize();
2434   IRBuilder<> Builder(BB);
2435   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2436 
2437   Type *LCTy = Type::getInt32Ty(Ctx);
2438   Value *StartVal = ConstantInt::get(LCTy, 10);
2439   Value *StopVal = ConstantInt::get(LCTy, 52);
2440   Value *StepVal = ConstantInt::get(LCTy, 2);
2441   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2442     return Error::success();
2443   };
2444   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2445                        OMPBuilder.createCanonicalLoop(
2446                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2447                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2448   BasicBlock *Preheader = CLI->getPreheader();
2449   BasicBlock *Body = CLI->getBody();
2450   Value *IV = CLI->getIndVar();
2451   BasicBlock *ExitBlock = CLI->getExit();
2452 
2453   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2454   InsertPointTy AllocaIP = Builder.saveIP();
2455 
2456   ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP,
2457                                                      /*NeedsBarrier=*/true,
2458                                                      OMP_SCHEDULE_Static),
2459                        Succeeded());
2460 
2461   BasicBlock *Cond = Body->getSinglePredecessor();
2462   Instruction *Cmp = &*Cond->begin();
2463   Value *TripCount = Cmp->getOperand(1);
2464 
2465   auto AllocaIter = BB->begin();
2466   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2467   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2468   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2469   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2470   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2471   EXPECT_NE(PLastIter, nullptr);
2472   EXPECT_NE(PLowerBound, nullptr);
2473   EXPECT_NE(PUpperBound, nullptr);
2474   EXPECT_NE(PStride, nullptr);
2475 
2476   auto PreheaderIter = Preheader->begin();
2477   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7);
2478   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2479   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2480   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2481   ASSERT_NE(LowerBoundStore, nullptr);
2482   ASSERT_NE(UpperBoundStore, nullptr);
2483   ASSERT_NE(StrideStore, nullptr);
2484 
2485   auto *OrigLowerBound =
2486       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2487   auto *OrigUpperBound =
2488       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2489   auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2490   ASSERT_NE(OrigLowerBound, nullptr);
2491   ASSERT_NE(OrigUpperBound, nullptr);
2492   ASSERT_NE(OrigStride, nullptr);
2493   EXPECT_EQ(OrigLowerBound->getValue(), 0);
2494   EXPECT_EQ(OrigUpperBound->getValue(), 20);
2495   EXPECT_EQ(OrigStride->getValue(), 1);
2496 
2497   // Check that the loop IV is updated to account for the lower bound returned
2498   // by the OpenMP runtime call.
2499   BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front());
2500   EXPECT_EQ(Add->getOperand(0), IV);
2501   auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
2502   ASSERT_NE(LoadedLowerBound, nullptr);
2503   EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
2504 
2505   // Check that the trip count is updated to account for the lower and upper
2506   // bounds return by the OpenMP runtime call.
2507   auto *AddOne = dyn_cast<Instruction>(TripCount);
2508   ASSERT_NE(AddOne, nullptr);
2509   ASSERT_TRUE(AddOne->isBinaryOp());
2510   auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
2511   ASSERT_NE(One, nullptr);
2512   EXPECT_EQ(One->getValue(), 1);
2513   auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0));
2514   ASSERT_NE(Difference, nullptr);
2515   ASSERT_TRUE(Difference->isBinaryOp());
2516   EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound);
2517   auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0));
2518   ASSERT_NE(LoadedUpperBound, nullptr);
2519   EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound);
2520 
2521   // The original loop iterator should only be used in the condition, in the
2522   // increment and in the statement that adds the lower bound to it.
2523   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2524 
2525   // The exit block should contain the "fini" call and the barrier call,
2526   // plus the call to obtain the thread ID.
2527   size_t NumCallsInExitBlock =
2528       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2529   EXPECT_EQ(NumCallsInExitBlock, 3u);
2530 }
2531 
2532 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
2533   unsigned IVBits = GetParam();
2534 
2535   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2536   OpenMPIRBuilder OMPBuilder(*M);
2537   OMPBuilder.Config.IsTargetDevice = false;
2538 
2539   BasicBlock *Body;
2540   CallInst *Call;
2541   CanonicalLoopInfo *CLI =
2542       buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body);
2543   ASSERT_NE(CLI, nullptr);
2544 
2545   Instruction *OrigIndVar = CLI->getIndVar();
2546   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
2547 
2548   Type *LCTy = Type::getInt32Ty(Ctx);
2549   Value *ChunkSize = ConstantInt::get(LCTy, 5);
2550   InsertPointTy AllocaIP{&F->getEntryBlock(),
2551                          F->getEntryBlock().getFirstInsertionPt()};
2552   ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP,
2553                                                      /*NeedsBarrier=*/true,
2554                                                      OMP_SCHEDULE_Static,
2555                                                      ChunkSize),
2556                        Succeeded());
2557 
2558   OMPBuilder.finalize();
2559   EXPECT_FALSE(verifyModule(*M, &errs()));
2560 
2561   BasicBlock *Entry = &F->getEntryBlock();
2562   BasicBlock *Preheader = Entry->getSingleSuccessor();
2563 
2564   BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor();
2565   BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor();
2566   BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor();
2567   BasicBlock *DispatchBody = succ_begin(DispatchCond)[0];
2568   BasicBlock *DispatchExit = succ_begin(DispatchCond)[1];
2569   BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor();
2570   BasicBlock *Return = DispatchAfter->getSingleSuccessor();
2571 
2572   BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor();
2573   BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor();
2574   BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor();
2575   BasicBlock *ChunkBody = succ_begin(ChunkCond)[0];
2576   BasicBlock *ChunkExit = succ_begin(ChunkCond)[1];
2577   BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor();
2578   BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor();
2579 
2580   BasicBlock *DispatchInc = ChunkAfter;
2581 
2582   EXPECT_EQ(ChunkBody, Body);
2583   EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader);
2584   EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader);
2585 
2586   EXPECT_TRUE(isa<ReturnInst>(Return->front()));
2587 
2588   Value *NewIV = Call->getOperand(1);
2589   EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits);
2590 
2591   CallInst *InitCall = findSingleCall(
2592       F,
2593       (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u
2594                     : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u,
2595       OMPBuilder);
2596   EXPECT_EQ(InitCall->getParent(), Preheader);
2597   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33);
2598   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1);
2599   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5);
2600 
2601   CallInst *FiniCall = findSingleCall(
2602       F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder);
2603   EXPECT_EQ(FiniCall->getParent(), DispatchExit);
2604 
2605   CallInst *BarrierCall = findSingleCall(
2606       F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder);
2607   EXPECT_EQ(BarrierCall->getParent(), DispatchExit);
2608 }
2609 
2610 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits,
2611                          ::testing::Values(8, 16, 32, 64));
2612 
2613 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
2614   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2615   OpenMPIRBuilder OMPBuilder(*M);
2616   OMPBuilder.Config.IsTargetDevice = false;
2617   OMPBuilder.initialize();
2618   IRBuilder<> Builder(BB);
2619   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2620 
2621   omp::OMPScheduleType SchedType = GetParam();
2622   uint32_t ChunkSize = 1;
2623   switch (SchedType & ~OMPScheduleType::ModifierMask) {
2624   case omp::OMPScheduleType::BaseDynamicChunked:
2625   case omp::OMPScheduleType::BaseGuidedChunked:
2626     ChunkSize = 7;
2627     break;
2628   case omp::OMPScheduleType::BaseAuto:
2629   case omp::OMPScheduleType::BaseRuntime:
2630     ChunkSize = 1;
2631     break;
2632   default:
2633     assert(0 && "unknown type for this test");
2634     break;
2635   }
2636 
2637   Type *LCTy = Type::getInt32Ty(Ctx);
2638   Value *StartVal = ConstantInt::get(LCTy, 10);
2639   Value *StopVal = ConstantInt::get(LCTy, 52);
2640   Value *StepVal = ConstantInt::get(LCTy, 2);
2641   Value *ChunkVal =
2642       (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize);
2643   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2644     return Error::success();
2645   };
2646 
2647   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2648                        OMPBuilder.createCanonicalLoop(
2649                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2650                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2651 
2652   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2653   InsertPointTy AllocaIP = Builder.saveIP();
2654 
2655   // Collect all the info from CLI, as it isn't usable after the call to
2656   // createDynamicWorkshareLoop.
2657   InsertPointTy AfterIP = CLI->getAfterIP();
2658   BasicBlock *Preheader = CLI->getPreheader();
2659   BasicBlock *ExitBlock = CLI->getExit();
2660   BasicBlock *LatchBlock = CLI->getLatch();
2661   Value *IV = CLI->getIndVar();
2662 
2663   ASSERT_EXPECTED_INIT(
2664       OpenMPIRBuilder::InsertPointTy, EndIP,
2665       OMPBuilder.applyWorkshareLoop(
2666           DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType),
2667           ChunkVal, /*Simd=*/false,
2668           (SchedType & omp::OMPScheduleType::ModifierMonotonic) ==
2669               omp::OMPScheduleType::ModifierMonotonic,
2670           (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) ==
2671               omp::OMPScheduleType::ModifierNonmonotonic,
2672           /*Ordered=*/false));
2673 
2674   // The returned value should be the "after" point.
2675   ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
2676   ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
2677 
2678   auto AllocaIter = BB->begin();
2679   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2680   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2681   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2682   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2683   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2684   EXPECT_NE(PLastIter, nullptr);
2685   EXPECT_NE(PLowerBound, nullptr);
2686   EXPECT_NE(PUpperBound, nullptr);
2687   EXPECT_NE(PStride, nullptr);
2688 
2689   auto PreheaderIter = Preheader->begin();
2690   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6);
2691   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2692   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2693   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2694   ASSERT_NE(LowerBoundStore, nullptr);
2695   ASSERT_NE(UpperBoundStore, nullptr);
2696   ASSERT_NE(StrideStore, nullptr);
2697 
2698   CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++));
2699   ASSERT_NE(ThreadIdCall, nullptr);
2700   EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(),
2701             "__kmpc_global_thread_num");
2702 
2703   CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter);
2704 
2705   ASSERT_NE(InitCall, nullptr);
2706   EXPECT_EQ(InitCall->getCalledFunction()->getName(),
2707             "__kmpc_dispatch_init_4u");
2708   EXPECT_EQ(InitCall->arg_size(), 7U);
2709   EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize));
2710   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2711   if ((SchedType & OMPScheduleType::MonotonicityMask) ==
2712       OMPScheduleType::None) {
2713     // Implementation is allowed to add default nonmonotonicity flag
2714     EXPECT_EQ(
2715         static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) |
2716             OMPScheduleType::ModifierNonmonotonic,
2717         SchedType | OMPScheduleType::ModifierNonmonotonic);
2718   } else {
2719     EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()),
2720               SchedType);
2721   }
2722 
2723   ConstantInt *OrigLowerBound =
2724       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2725   ConstantInt *OrigUpperBound =
2726       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2727   ConstantInt *OrigStride =
2728       dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2729   ASSERT_NE(OrigLowerBound, nullptr);
2730   ASSERT_NE(OrigUpperBound, nullptr);
2731   ASSERT_NE(OrigStride, nullptr);
2732   EXPECT_EQ(OrigLowerBound->getValue(), 1);
2733   EXPECT_EQ(OrigUpperBound->getValue(), 21);
2734   EXPECT_EQ(OrigStride->getValue(), 1);
2735 
2736   CallInst *FiniCall = dyn_cast<CallInst>(
2737       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2738   EXPECT_EQ(FiniCall, nullptr);
2739 
2740   // The original loop iterator should only be used in the condition, in the
2741   // increment and in the statement that adds the lower bound to it.
2742   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2743 
2744   // The exit block should contain the barrier call, plus the call to obtain
2745   // the thread ID.
2746   size_t NumCallsInExitBlock =
2747       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2748   EXPECT_EQ(NumCallsInExitBlock, 2u);
2749 
2750   // Add a termination to our block and check that it is internally consistent.
2751   Builder.restoreIP(EndIP);
2752   Builder.CreateRetVoid();
2753   OMPBuilder.finalize();
2754   EXPECT_FALSE(verifyModule(*M, &errs()));
2755 }
2756 
2757 INSTANTIATE_TEST_SUITE_P(
2758     OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams,
2759     ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked,
2760                       omp::OMPScheduleType::UnorderedGuidedChunked,
2761                       omp::OMPScheduleType::UnorderedAuto,
2762                       omp::OMPScheduleType::UnorderedRuntime,
2763                       omp::OMPScheduleType::UnorderedDynamicChunked |
2764                           omp::OMPScheduleType::ModifierMonotonic,
2765                       omp::OMPScheduleType::UnorderedDynamicChunked |
2766                           omp::OMPScheduleType::ModifierNonmonotonic,
2767                       omp::OMPScheduleType::UnorderedGuidedChunked |
2768                           omp::OMPScheduleType::ModifierMonotonic,
2769                       omp::OMPScheduleType::UnorderedGuidedChunked |
2770                           omp::OMPScheduleType::ModifierNonmonotonic,
2771                       omp::OMPScheduleType::UnorderedAuto |
2772                           omp::OMPScheduleType::ModifierMonotonic,
2773                       omp::OMPScheduleType::UnorderedRuntime |
2774                           omp::OMPScheduleType::ModifierMonotonic));
2775 
2776 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
2777   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2778   OpenMPIRBuilder OMPBuilder(*M);
2779   OMPBuilder.Config.IsTargetDevice = false;
2780   OMPBuilder.initialize();
2781   IRBuilder<> Builder(BB);
2782   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2783 
2784   uint32_t ChunkSize = 1;
2785   Type *LCTy = Type::getInt32Ty(Ctx);
2786   Value *StartVal = ConstantInt::get(LCTy, 10);
2787   Value *StopVal = ConstantInt::get(LCTy, 52);
2788   Value *StepVal = ConstantInt::get(LCTy, 2);
2789   Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
2790   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2791     return llvm::Error::success();
2792   };
2793 
2794   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2795                        OMPBuilder.createCanonicalLoop(
2796                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2797                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2798 
2799   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2800   InsertPointTy AllocaIP = Builder.saveIP();
2801 
2802   // Collect all the info from CLI, as it isn't usable after the call to
2803   // createDynamicWorkshareLoop.
2804   BasicBlock *Preheader = CLI->getPreheader();
2805   BasicBlock *ExitBlock = CLI->getExit();
2806   BasicBlock *LatchBlock = CLI->getLatch();
2807   Value *IV = CLI->getIndVar();
2808 
2809   ASSERT_EXPECTED_INIT(
2810       OpenMPIRBuilder::InsertPointTy, EndIP,
2811       OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
2812                                     OMP_SCHEDULE_Static, ChunkVal,
2813                                     /*HasSimdModifier=*/false,
2814                                     /*HasMonotonicModifier=*/false,
2815                                     /*HasNonmonotonicModifier=*/false,
2816                                     /*HasOrderedClause=*/true));
2817 
2818   // Add a termination to our block and check that it is internally consistent.
2819   Builder.restoreIP(EndIP);
2820   Builder.CreateRetVoid();
2821   OMPBuilder.finalize();
2822   EXPECT_FALSE(verifyModule(*M, &errs()));
2823 
2824   CallInst *InitCall = nullptr;
2825   for (Instruction &EI : *Preheader) {
2826     Instruction *Cur = &EI;
2827     if (isa<CallInst>(Cur)) {
2828       InitCall = cast<CallInst>(Cur);
2829       if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u")
2830         break;
2831       InitCall = nullptr;
2832     }
2833   }
2834   EXPECT_NE(InitCall, nullptr);
2835   EXPECT_EQ(InitCall->arg_size(), 7U);
2836   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2837   EXPECT_EQ(SchedVal->getValue(),
2838             static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked));
2839 
2840   CallInst *FiniCall = dyn_cast<CallInst>(
2841       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2842   ASSERT_NE(FiniCall, nullptr);
2843   EXPECT_EQ(FiniCall->getCalledFunction()->getName(),
2844             "__kmpc_dispatch_fini_4u");
2845   EXPECT_EQ(FiniCall->arg_size(), 2U);
2846   EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0));
2847   EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1));
2848 
2849   // The original loop iterator should only be used in the condition, in the
2850   // increment and in the statement that adds the lower bound to it.
2851   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2852 
2853   // The exit block should contain the barrier call, plus the call to obtain
2854   // the thread ID.
2855   size_t NumCallsInExitBlock =
2856       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2857   EXPECT_EQ(NumCallsInExitBlock, 2u);
2858 }
2859 
2860 TEST_F(OpenMPIRBuilderTest, MasterDirective) {
2861   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2862   OpenMPIRBuilder OMPBuilder(*M);
2863   OMPBuilder.initialize();
2864   F->setName("func");
2865   IRBuilder<> Builder(BB);
2866 
2867   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2868 
2869   AllocaInst *PrivAI = nullptr;
2870 
2871   BasicBlock *EntryBB = nullptr;
2872   BasicBlock *ThenBB = nullptr;
2873 
2874   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2875     if (AllocaIP.isSet())
2876       Builder.restoreIP(AllocaIP);
2877     else
2878       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2879     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2880     Builder.CreateStore(F->arg_begin(), PrivAI);
2881 
2882     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2883     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2884     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2885 
2886     Builder.restoreIP(CodeGenIP);
2887 
2888     // collect some info for checks later
2889     ThenBB = Builder.GetInsertBlock();
2890     EntryBB = ThenBB->getUniquePredecessor();
2891 
2892     // simple instructions for body
2893     Value *PrivLoad =
2894         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2895     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2896   };
2897 
2898   auto FiniCB = [&](InsertPointTy IP) {
2899     BasicBlock *IPBB = IP.getBlock();
2900     EXPECT_NE(IPBB->end(), IP.getPoint());
2901   };
2902 
2903   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2904                        OMPBuilder.createMaster(Builder,
2905                                                BODYGENCB_WRAPPER(BodyGenCB),
2906                                                FINICB_WRAPPER(FiniCB)));
2907   Builder.restoreIP(AfterIP);
2908   Value *EntryBBTI = EntryBB->getTerminator();
2909   EXPECT_NE(EntryBBTI, nullptr);
2910   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2911   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2912   EXPECT_TRUE(EntryBr->isConditional());
2913   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2914   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2915   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2916 
2917   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2918   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2919 
2920   CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0));
2921   EXPECT_EQ(MasterEntryCI->arg_size(), 2U);
2922   EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master");
2923   EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0)));
2924 
2925   CallInst *MasterEndCI = nullptr;
2926   for (auto &FI : *ThenBB) {
2927     Instruction *cur = &FI;
2928     if (isa<CallInst>(cur)) {
2929       MasterEndCI = cast<CallInst>(cur);
2930       if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master")
2931         break;
2932       MasterEndCI = nullptr;
2933     }
2934   }
2935   EXPECT_NE(MasterEndCI, nullptr);
2936   EXPECT_EQ(MasterEndCI->arg_size(), 2U);
2937   EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0)));
2938   EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1));
2939 }
2940 
2941 TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
2942   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2943   OpenMPIRBuilder OMPBuilder(*M);
2944   OMPBuilder.initialize();
2945   F->setName("func");
2946   IRBuilder<> Builder(BB);
2947 
2948   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2949 
2950   AllocaInst *PrivAI = nullptr;
2951 
2952   BasicBlock *EntryBB = nullptr;
2953   BasicBlock *ThenBB = nullptr;
2954 
2955   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2956     if (AllocaIP.isSet())
2957       Builder.restoreIP(AllocaIP);
2958     else
2959       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2960     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2961     Builder.CreateStore(F->arg_begin(), PrivAI);
2962 
2963     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2964     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2965     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2966 
2967     Builder.restoreIP(CodeGenIP);
2968 
2969     // collect some info for checks later
2970     ThenBB = Builder.GetInsertBlock();
2971     EntryBB = ThenBB->getUniquePredecessor();
2972 
2973     // simple instructions for body
2974     Value *PrivLoad =
2975         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2976     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2977   };
2978 
2979   auto FiniCB = [&](InsertPointTy IP) {
2980     BasicBlock *IPBB = IP.getBlock();
2981     EXPECT_NE(IPBB->end(), IP.getPoint());
2982   };
2983 
2984   Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
2985   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2986                        OMPBuilder.createMasked(Builder,
2987                                                BODYGENCB_WRAPPER(BodyGenCB),
2988                                                FINICB_WRAPPER(FiniCB), Filter));
2989   Builder.restoreIP(AfterIP);
2990   Value *EntryBBTI = EntryBB->getTerminator();
2991   EXPECT_NE(EntryBBTI, nullptr);
2992   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2993   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2994   EXPECT_TRUE(EntryBr->isConditional());
2995   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2996   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2997   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2998 
2999   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3000   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3001 
3002   CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0));
3003   EXPECT_EQ(MaskedEntryCI->arg_size(), 3U);
3004   EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked");
3005   EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0)));
3006 
3007   CallInst *MaskedEndCI = nullptr;
3008   for (auto &FI : *ThenBB) {
3009     Instruction *cur = &FI;
3010     if (isa<CallInst>(cur)) {
3011       MaskedEndCI = cast<CallInst>(cur);
3012       if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked")
3013         break;
3014       MaskedEndCI = nullptr;
3015     }
3016   }
3017   EXPECT_NE(MaskedEndCI, nullptr);
3018   EXPECT_EQ(MaskedEndCI->arg_size(), 2U);
3019   EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0)));
3020   EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1));
3021 }
3022 
3023 TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
3024   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3025   OpenMPIRBuilder OMPBuilder(*M);
3026   OMPBuilder.initialize();
3027   F->setName("func");
3028   IRBuilder<> Builder(BB);
3029 
3030   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3031 
3032   AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3033 
3034   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3035     // actual start for bodyCB
3036     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3037     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3038     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3039 
3040     // body begin
3041     Builder.restoreIP(CodeGenIP);
3042     Builder.CreateStore(F->arg_begin(), PrivAI);
3043     Value *PrivLoad =
3044         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3045     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3046   };
3047 
3048   auto FiniCB = [&](InsertPointTy IP) {
3049     BasicBlock *IPBB = IP.getBlock();
3050     EXPECT_NE(IPBB->end(), IP.getPoint());
3051   };
3052   BasicBlock *EntryBB = Builder.GetInsertBlock();
3053 
3054   ASSERT_EXPECTED_INIT(
3055       OpenMPIRBuilder::InsertPointTy, AfterIP,
3056       OMPBuilder.createCritical(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3057                                 FINICB_WRAPPER(FiniCB), "testCRT", nullptr));
3058   Builder.restoreIP(AfterIP);
3059 
3060   CallInst *CriticalEntryCI = nullptr;
3061   for (auto &EI : *EntryBB) {
3062     Instruction *cur = &EI;
3063     if (isa<CallInst>(cur)) {
3064       CriticalEntryCI = cast<CallInst>(cur);
3065       if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical")
3066         break;
3067       CriticalEntryCI = nullptr;
3068     }
3069   }
3070   EXPECT_NE(CriticalEntryCI, nullptr);
3071   EXPECT_EQ(CriticalEntryCI->arg_size(), 3U);
3072   EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical");
3073   EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0)));
3074 
3075   CallInst *CriticalEndCI = nullptr;
3076   for (auto &FI : *EntryBB) {
3077     Instruction *cur = &FI;
3078     if (isa<CallInst>(cur)) {
3079       CriticalEndCI = cast<CallInst>(cur);
3080       if (CriticalEndCI->getCalledFunction()->getName() ==
3081           "__kmpc_end_critical")
3082         break;
3083       CriticalEndCI = nullptr;
3084     }
3085   }
3086   EXPECT_NE(CriticalEndCI, nullptr);
3087   EXPECT_EQ(CriticalEndCI->arg_size(), 3U);
3088   EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0)));
3089   EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1));
3090   PointerType *CriticalNamePtrTy = PointerType::getUnqual(Ctx);
3091   EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2));
3092   GlobalVariable *GV =
3093       dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2));
3094   ASSERT_NE(GV, nullptr);
3095   EXPECT_EQ(GV->getType(), CriticalNamePtrTy);
3096   const DataLayout &DL = M->getDataLayout();
3097   const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy);
3098   const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace());
3099   if (const llvm::MaybeAlign Alignment = GV->getAlign())
3100     EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign));
3101 }
3102 
3103 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
3104   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3105   OpenMPIRBuilder OMPBuilder(*M);
3106   OMPBuilder.initialize();
3107   F->setName("func");
3108   IRBuilder<> Builder(BB);
3109   LLVMContext &Ctx = M->getContext();
3110 
3111   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3112 
3113   InsertPointTy AllocaIP(&F->getEntryBlock(),
3114                          F->getEntryBlock().getFirstInsertionPt());
3115 
3116   unsigned NumLoops = 2;
3117   SmallVector<Value *, 2> StoreValues;
3118   Type *LCTy = Type::getInt64Ty(Ctx);
3119   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
3120   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
3121 
3122   // Test for "#omp ordered depend(source)"
3123   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
3124                                                    StoreValues, ".cnt.addr",
3125                                                    /*IsDependSource=*/true));
3126 
3127   Builder.CreateRetVoid();
3128   OMPBuilder.finalize();
3129   EXPECT_FALSE(verifyModule(*M, &errs()));
3130 
3131   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
3132   ASSERT_NE(AllocInst, nullptr);
3133   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
3134   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
3135   EXPECT_TRUE(
3136       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
3137 
3138   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
3139   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
3140     GetElementPtrInst *DependAddrGEPIter =
3141         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3142     ASSERT_NE(DependAddrGEPIter, nullptr);
3143     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
3144     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
3145     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
3146     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
3147     ASSERT_NE(FirstIdx, nullptr);
3148     ASSERT_NE(SecondIdx, nullptr);
3149     EXPECT_EQ(FirstIdx->getValue(), 0);
3150     EXPECT_EQ(SecondIdx->getValue(), Iter);
3151     StoreInst *StoreValue =
3152         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
3153     ASSERT_NE(StoreValue, nullptr);
3154     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
3155     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
3156     EXPECT_EQ(StoreValue->getAlign(), Align(8));
3157     IterInst = dyn_cast<Instruction>(StoreValue);
3158   }
3159 
3160   GetElementPtrInst *DependBaseAddrGEP =
3161       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3162   ASSERT_NE(DependBaseAddrGEP, nullptr);
3163   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3164   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3165   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3166   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3167   ASSERT_NE(FirstIdx, nullptr);
3168   ASSERT_NE(SecondIdx, nullptr);
3169   EXPECT_EQ(FirstIdx->getValue(), 0);
3170   EXPECT_EQ(SecondIdx->getValue(), 0);
3171 
3172   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3173   ASSERT_NE(GTID, nullptr);
3174   EXPECT_EQ(GTID->arg_size(), 1U);
3175   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3176   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3177   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3178 
3179   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3180   ASSERT_NE(Depend, nullptr);
3181   EXPECT_EQ(Depend->arg_size(), 3U);
3182   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post");
3183   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3184   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3185   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3186 }
3187 
3188 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
3189   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3190   OpenMPIRBuilder OMPBuilder(*M);
3191   OMPBuilder.initialize();
3192   F->setName("func");
3193   IRBuilder<> Builder(BB);
3194   LLVMContext &Ctx = M->getContext();
3195 
3196   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3197 
3198   InsertPointTy AllocaIP(&F->getEntryBlock(),
3199                          F->getEntryBlock().getFirstInsertionPt());
3200 
3201   unsigned NumLoops = 2;
3202   SmallVector<Value *, 2> StoreValues;
3203   Type *LCTy = Type::getInt64Ty(Ctx);
3204   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
3205   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
3206 
3207   // Test for "#omp ordered depend(sink: vec)"
3208   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
3209                                                    StoreValues, ".cnt.addr",
3210                                                    /*IsDependSource=*/false));
3211 
3212   Builder.CreateRetVoid();
3213   OMPBuilder.finalize();
3214   EXPECT_FALSE(verifyModule(*M, &errs()));
3215 
3216   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
3217   ASSERT_NE(AllocInst, nullptr);
3218   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
3219   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
3220   EXPECT_TRUE(
3221       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
3222 
3223   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
3224   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
3225     GetElementPtrInst *DependAddrGEPIter =
3226         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3227     ASSERT_NE(DependAddrGEPIter, nullptr);
3228     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
3229     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
3230     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
3231     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
3232     ASSERT_NE(FirstIdx, nullptr);
3233     ASSERT_NE(SecondIdx, nullptr);
3234     EXPECT_EQ(FirstIdx->getValue(), 0);
3235     EXPECT_EQ(SecondIdx->getValue(), Iter);
3236     StoreInst *StoreValue =
3237         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
3238     ASSERT_NE(StoreValue, nullptr);
3239     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
3240     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
3241     EXPECT_EQ(StoreValue->getAlign(), Align(8));
3242     IterInst = dyn_cast<Instruction>(StoreValue);
3243   }
3244 
3245   GetElementPtrInst *DependBaseAddrGEP =
3246       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3247   ASSERT_NE(DependBaseAddrGEP, nullptr);
3248   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3249   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3250   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3251   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3252   ASSERT_NE(FirstIdx, nullptr);
3253   ASSERT_NE(SecondIdx, nullptr);
3254   EXPECT_EQ(FirstIdx->getValue(), 0);
3255   EXPECT_EQ(SecondIdx->getValue(), 0);
3256 
3257   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3258   ASSERT_NE(GTID, nullptr);
3259   EXPECT_EQ(GTID->arg_size(), 1U);
3260   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3261   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3262   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3263 
3264   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3265   ASSERT_NE(Depend, nullptr);
3266   EXPECT_EQ(Depend->arg_size(), 3U);
3267   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait");
3268   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3269   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3270   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3271 }
3272 
3273 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
3274   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3275   OpenMPIRBuilder OMPBuilder(*M);
3276   OMPBuilder.initialize();
3277   F->setName("func");
3278   IRBuilder<> Builder(BB);
3279 
3280   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3281 
3282   AllocaInst *PrivAI =
3283       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3284 
3285   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3286     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3287     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3288     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3289 
3290     Builder.restoreIP(CodeGenIP);
3291     Builder.CreateStore(F->arg_begin(), PrivAI);
3292     Value *PrivLoad =
3293         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3294     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3295   };
3296 
3297   auto FiniCB = [&](InsertPointTy IP) {
3298     BasicBlock *IPBB = IP.getBlock();
3299     EXPECT_NE(IPBB->end(), IP.getPoint());
3300   };
3301 
3302   // Test for "#omp ordered [threads]"
3303   BasicBlock *EntryBB = Builder.GetInsertBlock();
3304   ASSERT_EXPECTED_INIT(
3305       OpenMPIRBuilder::InsertPointTy, AfterIP,
3306       OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3307                                           FINICB_WRAPPER(FiniCB), true));
3308   Builder.restoreIP(AfterIP);
3309 
3310   Builder.CreateRetVoid();
3311   OMPBuilder.finalize();
3312   EXPECT_FALSE(verifyModule(*M, &errs()));
3313 
3314   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3315 
3316   CallInst *OrderedEntryCI = nullptr;
3317   for (auto &EI : *EntryBB) {
3318     Instruction *Cur = &EI;
3319     if (isa<CallInst>(Cur)) {
3320       OrderedEntryCI = cast<CallInst>(Cur);
3321       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3322         break;
3323       OrderedEntryCI = nullptr;
3324     }
3325   }
3326   EXPECT_NE(OrderedEntryCI, nullptr);
3327   EXPECT_EQ(OrderedEntryCI->arg_size(), 2U);
3328   EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
3329   EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
3330 
3331   CallInst *OrderedEndCI = nullptr;
3332   for (auto &FI : *EntryBB) {
3333     Instruction *Cur = &FI;
3334     if (isa<CallInst>(Cur)) {
3335       OrderedEndCI = cast<CallInst>(Cur);
3336       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3337         break;
3338       OrderedEndCI = nullptr;
3339     }
3340   }
3341   EXPECT_NE(OrderedEndCI, nullptr);
3342   EXPECT_EQ(OrderedEndCI->arg_size(), 2U);
3343   EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
3344   EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
3345 }
3346 
3347 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
3348   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3349   OpenMPIRBuilder OMPBuilder(*M);
3350   OMPBuilder.initialize();
3351   F->setName("func");
3352   IRBuilder<> Builder(BB);
3353 
3354   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3355 
3356   AllocaInst *PrivAI =
3357       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3358 
3359   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3360     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3361     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3362     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3363 
3364     Builder.restoreIP(CodeGenIP);
3365     Builder.CreateStore(F->arg_begin(), PrivAI);
3366     Value *PrivLoad =
3367         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3368     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3369   };
3370 
3371   auto FiniCB = [&](InsertPointTy IP) {
3372     BasicBlock *IPBB = IP.getBlock();
3373     EXPECT_NE(IPBB->end(), IP.getPoint());
3374   };
3375 
3376   // Test for "#omp ordered simd"
3377   BasicBlock *EntryBB = Builder.GetInsertBlock();
3378   ASSERT_EXPECTED_INIT(
3379       OpenMPIRBuilder::InsertPointTy, AfterIP,
3380       OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3381                                           FINICB_WRAPPER(FiniCB), false));
3382   Builder.restoreIP(AfterIP);
3383 
3384   Builder.CreateRetVoid();
3385   OMPBuilder.finalize();
3386   EXPECT_FALSE(verifyModule(*M, &errs()));
3387 
3388   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3389 
3390   CallInst *OrderedEntryCI = nullptr;
3391   for (auto &EI : *EntryBB) {
3392     Instruction *Cur = &EI;
3393     if (isa<CallInst>(Cur)) {
3394       OrderedEntryCI = cast<CallInst>(Cur);
3395       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3396         break;
3397       OrderedEntryCI = nullptr;
3398     }
3399   }
3400   EXPECT_EQ(OrderedEntryCI, nullptr);
3401 
3402   CallInst *OrderedEndCI = nullptr;
3403   for (auto &FI : *EntryBB) {
3404     Instruction *Cur = &FI;
3405     if (isa<CallInst>(Cur)) {
3406       OrderedEndCI = cast<CallInst>(Cur);
3407       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3408         break;
3409       OrderedEndCI = nullptr;
3410     }
3411   }
3412   EXPECT_EQ(OrderedEndCI, nullptr);
3413 }
3414 
3415 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
3416   OpenMPIRBuilder OMPBuilder(*M);
3417   OMPBuilder.initialize();
3418   F->setName("func");
3419   IRBuilder<> Builder(BB);
3420 
3421   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3422 
3423   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3424   AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy());
3425   AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy());
3426 
3427   BasicBlock *EntryBB = BB;
3428 
3429   OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress,
3430                                       PrivAddress, Int32, /*BranchtoEnd*/ true);
3431 
3432   BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator());
3433 
3434   EXPECT_NE(EntryBr, nullptr);
3435   EXPECT_TRUE(EntryBr->isConditional());
3436 
3437   BasicBlock *NotMasterBB = EntryBr->getSuccessor(0);
3438   BasicBlock *CopyinEnd = EntryBr->getSuccessor(1);
3439   CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition());
3440 
3441   EXPECT_NE(CMP, nullptr);
3442   EXPECT_NE(NotMasterBB, nullptr);
3443   EXPECT_NE(CopyinEnd, nullptr);
3444 
3445   BranchInst *NotMasterBr =
3446       dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator());
3447   EXPECT_NE(NotMasterBr, nullptr);
3448   EXPECT_FALSE(NotMasterBr->isConditional());
3449   EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0));
3450 }
3451 
3452 TEST_F(OpenMPIRBuilderTest, SingleDirective) {
3453   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3454   OpenMPIRBuilder OMPBuilder(*M);
3455   OMPBuilder.initialize();
3456   F->setName("func");
3457   IRBuilder<> Builder(BB);
3458 
3459   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3460 
3461   AllocaInst *PrivAI = nullptr;
3462 
3463   BasicBlock *EntryBB = nullptr;
3464   BasicBlock *ThenBB = nullptr;
3465 
3466   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3467     if (AllocaIP.isSet())
3468       Builder.restoreIP(AllocaIP);
3469     else
3470       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3471     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3472     Builder.CreateStore(F->arg_begin(), PrivAI);
3473 
3474     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3475     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3476     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3477 
3478     Builder.restoreIP(CodeGenIP);
3479 
3480     // collect some info for checks later
3481     ThenBB = Builder.GetInsertBlock();
3482     EntryBB = ThenBB->getUniquePredecessor();
3483 
3484     // simple instructions for body
3485     Value *PrivLoad =
3486         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3487     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3488   };
3489 
3490   auto FiniCB = [&](InsertPointTy IP) {
3491     BasicBlock *IPBB = IP.getBlock();
3492     EXPECT_NE(IPBB->end(), IP.getPoint());
3493   };
3494 
3495   ASSERT_EXPECTED_INIT(
3496       OpenMPIRBuilder::InsertPointTy, AfterIP,
3497       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3498                               FINICB_WRAPPER(FiniCB), /*IsNowait*/ false));
3499   Builder.restoreIP(AfterIP);
3500   Value *EntryBBTI = EntryBB->getTerminator();
3501   EXPECT_NE(EntryBBTI, nullptr);
3502   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3503   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3504   EXPECT_TRUE(EntryBr->isConditional());
3505   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3506   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3507   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3508 
3509   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3510   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3511 
3512   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3513   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3514   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3515   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3516 
3517   CallInst *SingleEndCI = nullptr;
3518   for (auto &FI : *ThenBB) {
3519     Instruction *cur = &FI;
3520     if (isa<CallInst>(cur)) {
3521       SingleEndCI = cast<CallInst>(cur);
3522       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3523         break;
3524       SingleEndCI = nullptr;
3525     }
3526   }
3527   EXPECT_NE(SingleEndCI, nullptr);
3528   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3529   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3530   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3531 
3532   bool FoundBarrier = false;
3533   for (auto &FI : *ExitBB) {
3534     Instruction *cur = &FI;
3535     if (auto CI = dyn_cast<CallInst>(cur)) {
3536       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3537         FoundBarrier = true;
3538         break;
3539       }
3540     }
3541   }
3542   EXPECT_TRUE(FoundBarrier);
3543 }
3544 
3545 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
3546   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3547   OpenMPIRBuilder OMPBuilder(*M);
3548   OMPBuilder.initialize();
3549   F->setName("func");
3550   IRBuilder<> Builder(BB);
3551 
3552   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3553 
3554   AllocaInst *PrivAI = nullptr;
3555 
3556   BasicBlock *EntryBB = nullptr;
3557   BasicBlock *ThenBB = nullptr;
3558 
3559   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3560     if (AllocaIP.isSet())
3561       Builder.restoreIP(AllocaIP);
3562     else
3563       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3564     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3565     Builder.CreateStore(F->arg_begin(), PrivAI);
3566 
3567     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3568     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3569     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3570 
3571     Builder.restoreIP(CodeGenIP);
3572 
3573     // collect some info for checks later
3574     ThenBB = Builder.GetInsertBlock();
3575     EntryBB = ThenBB->getUniquePredecessor();
3576 
3577     // simple instructions for body
3578     Value *PrivLoad =
3579         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3580     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3581   };
3582 
3583   auto FiniCB = [&](InsertPointTy IP) {
3584     BasicBlock *IPBB = IP.getBlock();
3585     EXPECT_NE(IPBB->end(), IP.getPoint());
3586   };
3587 
3588   ASSERT_EXPECTED_INIT(
3589       OpenMPIRBuilder::InsertPointTy, AfterIP,
3590       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3591                               FINICB_WRAPPER(FiniCB), /*IsNowait*/ true));
3592   Builder.restoreIP(AfterIP);
3593   Value *EntryBBTI = EntryBB->getTerminator();
3594   EXPECT_NE(EntryBBTI, nullptr);
3595   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3596   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3597   EXPECT_TRUE(EntryBr->isConditional());
3598   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3599   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3600   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3601 
3602   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3603   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3604 
3605   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3606   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3607   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3608   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3609 
3610   CallInst *SingleEndCI = nullptr;
3611   for (auto &FI : *ThenBB) {
3612     Instruction *cur = &FI;
3613     if (isa<CallInst>(cur)) {
3614       SingleEndCI = cast<CallInst>(cur);
3615       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3616         break;
3617       SingleEndCI = nullptr;
3618     }
3619   }
3620   EXPECT_NE(SingleEndCI, nullptr);
3621   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3622   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3623   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3624 
3625   CallInst *ExitBarrier = nullptr;
3626   for (auto &FI : *ExitBB) {
3627     Instruction *cur = &FI;
3628     if (auto CI = dyn_cast<CallInst>(cur)) {
3629       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3630         ExitBarrier = CI;
3631         break;
3632       }
3633     }
3634   }
3635   EXPECT_EQ(ExitBarrier, nullptr);
3636 }
3637 
3638 // Helper class to check each instruction of a BB.
3639 class BBInstIter {
3640   BasicBlock *BB;
3641   BasicBlock::iterator BBI;
3642 
3643 public:
3644   BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {}
3645 
3646   bool hasNext() const { return BBI != BB->end(); }
3647 
3648   template <typename InstTy> InstTy *next() {
3649     if (!hasNext())
3650       return nullptr;
3651     Instruction *Cur = &*BBI++;
3652     if (!isa<InstTy>(Cur))
3653       return nullptr;
3654     return cast<InstTy>(Cur);
3655   }
3656 };
3657 
3658 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
3659   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3660   OpenMPIRBuilder OMPBuilder(*M);
3661   OMPBuilder.initialize();
3662   F->setName("func");
3663   IRBuilder<> Builder(BB);
3664 
3665   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3666 
3667   AllocaInst *PrivAI = nullptr;
3668 
3669   BasicBlock *EntryBB = nullptr;
3670   BasicBlock *ThenBB = nullptr;
3671 
3672   Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType());
3673   Builder.CreateStore(F->arg_begin(), CPVar);
3674 
3675   FunctionType *CopyFuncTy = FunctionType::get(
3676       Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false);
3677   Function *CopyFunc =
3678       Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M);
3679 
3680   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3681     if (AllocaIP.isSet())
3682       Builder.restoreIP(AllocaIP);
3683     else
3684       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3685     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3686     Builder.CreateStore(F->arg_begin(), PrivAI);
3687 
3688     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3689     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3690     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3691 
3692     Builder.restoreIP(CodeGenIP);
3693 
3694     // collect some info for checks later
3695     ThenBB = Builder.GetInsertBlock();
3696     EntryBB = ThenBB->getUniquePredecessor();
3697 
3698     // simple instructions for body
3699     Value *PrivLoad =
3700         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3701     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3702   };
3703 
3704   auto FiniCB = [&](InsertPointTy IP) {
3705     BasicBlock *IPBB = IP.getBlock();
3706     // IP must be before the unconditional branch to ExitBB
3707     EXPECT_NE(IPBB->end(), IP.getPoint());
3708   };
3709 
3710   ASSERT_EXPECTED_INIT(
3711       OpenMPIRBuilder::InsertPointTy, AfterIP,
3712       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3713                               FINICB_WRAPPER(FiniCB),
3714                               /*IsNowait*/ false, {CPVar}, {CopyFunc}));
3715   Builder.restoreIP(AfterIP);
3716   Value *EntryBBTI = EntryBB->getTerminator();
3717   EXPECT_NE(EntryBBTI, nullptr);
3718   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3719   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3720   EXPECT_TRUE(EntryBr->isConditional());
3721   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3722   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3723   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3724 
3725   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3726   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3727 
3728   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3729   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3730   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3731   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3732 
3733   // check ThenBB
3734   BBInstIter ThenBBI(ThenBB);
3735   // load PrivAI
3736   auto *PrivLI = ThenBBI.next<LoadInst>();
3737   EXPECT_NE(PrivLI, nullptr);
3738   EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI);
3739   // icmp
3740   EXPECT_TRUE(ThenBBI.next<ICmpInst>());
3741   // store 1, DidIt
3742   auto *DidItSI = ThenBBI.next<StoreInst>();
3743   EXPECT_NE(DidItSI, nullptr);
3744   EXPECT_EQ(DidItSI->getValueOperand(),
3745             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
3746   Value *DidIt = DidItSI->getPointerOperand();
3747   // call __kmpc_end_single
3748   auto *SingleEndCI = ThenBBI.next<CallInst>();
3749   EXPECT_NE(SingleEndCI, nullptr);
3750   EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single");
3751   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3752   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3753   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3754   // br ExitBB
3755   auto *ExitBBBI = ThenBBI.next<BranchInst>();
3756   EXPECT_NE(ExitBBBI, nullptr);
3757   EXPECT_TRUE(ExitBBBI->isUnconditional());
3758   EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB);
3759   EXPECT_FALSE(ThenBBI.hasNext());
3760 
3761   // check ExitBB
3762   BBInstIter ExitBBI(ExitBB);
3763   // call __kmpc_global_thread_num
3764   auto *ThreadNumCI = ExitBBI.next<CallInst>();
3765   EXPECT_NE(ThreadNumCI, nullptr);
3766   EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(),
3767             "__kmpc_global_thread_num");
3768   // load DidIt
3769   auto *DidItLI = ExitBBI.next<LoadInst>();
3770   EXPECT_NE(DidItLI, nullptr);
3771   EXPECT_EQ(DidItLI->getPointerOperand(), DidIt);
3772   // call __kmpc_copyprivate
3773   auto *CopyPrivateCI = ExitBBI.next<CallInst>();
3774   EXPECT_NE(CopyPrivateCI, nullptr);
3775   EXPECT_EQ(CopyPrivateCI->arg_size(), 6U);
3776   EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3)));
3777   EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar);
3778   EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4)));
3779   EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc);
3780   EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5)));
3781   DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5));
3782   EXPECT_EQ(DidItLI->getOperand(0), DidIt);
3783   EXPECT_FALSE(ExitBBI.hasNext());
3784 }
3785 
3786 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
3787   OpenMPIRBuilder OMPBuilder(*M);
3788   OMPBuilder.initialize();
3789   F->setName("func");
3790   IRBuilder<> Builder(BB);
3791 
3792   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3793 
3794   Type *Float32 = Type::getFloatTy(M->getContext());
3795   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3796   XVal->setName("AtomicVar");
3797   AllocaInst *VVal = Builder.CreateAlloca(Float32);
3798   VVal->setName("AtomicRead");
3799   AtomicOrdering AO = AtomicOrdering::Monotonic;
3800   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3801   OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false};
3802 
3803   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3804 
3805   IntegerType *IntCastTy =
3806       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3807 
3808   LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode());
3809   EXPECT_TRUE(AtomicLoad->isAtomic());
3810   EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal);
3811 
3812   BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
3813   EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
3814   EXPECT_EQ(CastToFlt->getDestTy(), Float32);
3815   EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
3816 
3817   StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode());
3818   EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt);
3819   EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
3820 
3821   Builder.CreateRetVoid();
3822   OMPBuilder.finalize();
3823   EXPECT_FALSE(verifyModule(*M, &errs()));
3824 }
3825 
3826 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
3827   OpenMPIRBuilder OMPBuilder(*M);
3828   OMPBuilder.initialize();
3829   F->setName("func");
3830   IRBuilder<> Builder(BB);
3831 
3832   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3833 
3834   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3835   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3836   XVal->setName("AtomicVar");
3837   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3838   VVal->setName("AtomicRead");
3839   AtomicOrdering AO = AtomicOrdering::Monotonic;
3840   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3841   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3842 
3843   BasicBlock *EntryBB = BB;
3844 
3845   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3846   LoadInst *AtomicLoad = nullptr;
3847   StoreInst *StoreofAtomic = nullptr;
3848 
3849   for (Instruction &Cur : *EntryBB) {
3850     if (isa<LoadInst>(Cur)) {
3851       AtomicLoad = cast<LoadInst>(&Cur);
3852       if (AtomicLoad->getPointerOperand() == XVal)
3853         continue;
3854       AtomicLoad = nullptr;
3855     } else if (isa<StoreInst>(Cur)) {
3856       StoreofAtomic = cast<StoreInst>(&Cur);
3857       if (StoreofAtomic->getPointerOperand() == VVal)
3858         continue;
3859       StoreofAtomic = nullptr;
3860     }
3861   }
3862 
3863   EXPECT_NE(AtomicLoad, nullptr);
3864   EXPECT_TRUE(AtomicLoad->isAtomic());
3865 
3866   EXPECT_NE(StoreofAtomic, nullptr);
3867   EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
3868 
3869   Builder.CreateRetVoid();
3870   OMPBuilder.finalize();
3871 
3872   EXPECT_FALSE(verifyModule(*M, &errs()));
3873 }
3874 
3875 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
3876   OpenMPIRBuilder OMPBuilder(*M);
3877   OMPBuilder.initialize();
3878   F->setName("func");
3879   IRBuilder<> Builder(BB);
3880 
3881   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3882 
3883   LLVMContext &Ctx = M->getContext();
3884   Type *Float32 = Type::getFloatTy(Ctx);
3885   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3886   XVal->setName("AtomicVar");
3887   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3888   AtomicOrdering AO = AtomicOrdering::Monotonic;
3889   Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
3890 
3891   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3892 
3893   IntegerType *IntCastTy =
3894       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3895 
3896   Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
3897 
3898   StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode());
3899   EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast);
3900   EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal);
3901   EXPECT_TRUE(StoreofAtomic->isAtomic());
3902 
3903   Builder.CreateRetVoid();
3904   OMPBuilder.finalize();
3905   EXPECT_FALSE(verifyModule(*M, &errs()));
3906 }
3907 
3908 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
3909   OpenMPIRBuilder OMPBuilder(*M);
3910   OMPBuilder.initialize();
3911   F->setName("func");
3912   IRBuilder<> Builder(BB);
3913 
3914   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3915 
3916   LLVMContext &Ctx = M->getContext();
3917   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3918   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3919   XVal->setName("AtomicVar");
3920   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3921   AtomicOrdering AO = AtomicOrdering::Monotonic;
3922   ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3923 
3924   BasicBlock *EntryBB = BB;
3925 
3926   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3927 
3928   StoreInst *StoreofAtomic = nullptr;
3929 
3930   for (Instruction &Cur : *EntryBB) {
3931     if (isa<StoreInst>(Cur)) {
3932       StoreofAtomic = cast<StoreInst>(&Cur);
3933       if (StoreofAtomic->getPointerOperand() == XVal)
3934         continue;
3935       StoreofAtomic = nullptr;
3936     }
3937   }
3938 
3939   EXPECT_NE(StoreofAtomic, nullptr);
3940   EXPECT_TRUE(StoreofAtomic->isAtomic());
3941   EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite);
3942 
3943   Builder.CreateRetVoid();
3944   OMPBuilder.finalize();
3945   EXPECT_FALSE(verifyModule(*M, &errs()));
3946 }
3947 
3948 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
3949   OpenMPIRBuilder OMPBuilder(*M);
3950   OMPBuilder.initialize();
3951   F->setName("func");
3952   IRBuilder<> Builder(BB);
3953 
3954   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3955 
3956   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3957   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3958   XVal->setName("AtomicVar");
3959   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3960   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3961   AtomicOrdering AO = AtomicOrdering::Monotonic;
3962   ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3963   Value *Expr = nullptr;
3964   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub;
3965   bool IsXLHSInRHSPart = false;
3966 
3967   BasicBlock *EntryBB = BB;
3968   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3969                                           EntryBB->getFirstInsertionPt());
3970   Value *Sub = nullptr;
3971 
3972   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3973     Sub = IRB.CreateSub(ConstVal, Atomic);
3974     return Sub;
3975   };
3976   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
3977                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
3978                                                      AO, RMWOp, UpdateOp,
3979                                                      IsXLHSInRHSPart));
3980   Builder.restoreIP(AfterIP);
3981   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3982   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3983   EXPECT_NE(ContTI, nullptr);
3984   BasicBlock *EndBB = ContTI->getSuccessor(0);
3985   EXPECT_TRUE(ContTI->isConditional());
3986   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3987   EXPECT_NE(EndBB, nullptr);
3988 
3989   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3990   EXPECT_NE(Phi, nullptr);
3991   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3992   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3993   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3994 
3995   EXPECT_EQ(Sub->getNumUses(), 1U);
3996   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3997   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3998 
3999   ExtractValueInst *ExVI1 =
4000       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4001   EXPECT_NE(ExVI1, nullptr);
4002   AtomicCmpXchgInst *CmpExchg =
4003       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4004   EXPECT_NE(CmpExchg, nullptr);
4005   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4006   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4007   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4008 
4009   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4010   EXPECT_NE(Ld, nullptr);
4011   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4012 
4013   Builder.CreateRetVoid();
4014   OMPBuilder.finalize();
4015   EXPECT_FALSE(verifyModule(*M, &errs()));
4016 }
4017 
4018 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
4019   OpenMPIRBuilder OMPBuilder(*M);
4020   OMPBuilder.initialize();
4021   F->setName("func");
4022   IRBuilder<> Builder(BB);
4023 
4024   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4025 
4026   Type *FloatTy = Type::getFloatTy(M->getContext());
4027   AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
4028   XVal->setName("AtomicVar");
4029   Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal);
4030   OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
4031   AtomicOrdering AO = AtomicOrdering::Monotonic;
4032   Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0);
4033   Value *Expr = nullptr;
4034   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub;
4035   bool IsXLHSInRHSPart = false;
4036 
4037   BasicBlock *EntryBB = BB;
4038   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4039                                           EntryBB->getFirstInsertionPt());
4040   Value *Sub = nullptr;
4041 
4042   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
4043     Sub = IRB.CreateFSub(ConstVal, Atomic);
4044     return Sub;
4045   };
4046   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4047                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
4048                                                      AO, RMWOp, UpdateOp,
4049                                                      IsXLHSInRHSPart));
4050   Builder.restoreIP(AfterIP);
4051   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
4052   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
4053   EXPECT_NE(ContTI, nullptr);
4054   BasicBlock *EndBB = ContTI->getSuccessor(0);
4055   EXPECT_TRUE(ContTI->isConditional());
4056   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
4057   EXPECT_NE(EndBB, nullptr);
4058 
4059   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
4060   EXPECT_NE(Phi, nullptr);
4061   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
4062   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
4063   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
4064 
4065   EXPECT_EQ(Sub->getNumUses(), 1U);
4066   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
4067   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
4068 
4069   ExtractValueInst *ExVI1 =
4070       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4071   EXPECT_NE(ExVI1, nullptr);
4072   AtomicCmpXchgInst *CmpExchg =
4073       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4074   EXPECT_NE(CmpExchg, nullptr);
4075   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4076   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4077   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4078 
4079   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4080   EXPECT_NE(Ld, nullptr);
4081   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4082   Builder.CreateRetVoid();
4083   OMPBuilder.finalize();
4084   EXPECT_FALSE(verifyModule(*M, &errs()));
4085 }
4086 
4087 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
4088   OpenMPIRBuilder OMPBuilder(*M);
4089   OMPBuilder.initialize();
4090   F->setName("func");
4091   IRBuilder<> Builder(BB);
4092 
4093   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4094 
4095   Type *IntTy = Type::getInt32Ty(M->getContext());
4096   AllocaInst *XVal = Builder.CreateAlloca(IntTy);
4097   XVal->setName("AtomicVar");
4098   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal);
4099   OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false};
4100   AtomicOrdering AO = AtomicOrdering::Monotonic;
4101   Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
4102   Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
4103   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax;
4104   bool IsXLHSInRHSPart = false;
4105 
4106   BasicBlock *EntryBB = BB;
4107   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4108                                           EntryBB->getFirstInsertionPt());
4109   Value *Sub = nullptr;
4110 
4111   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
4112     Sub = IRB.CreateSub(ConstVal, Atomic);
4113     return Sub;
4114   };
4115   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4116                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
4117                                                      AO, RMWOp, UpdateOp,
4118                                                      IsXLHSInRHSPart));
4119   Builder.restoreIP(AfterIP);
4120   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
4121   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
4122   EXPECT_NE(ContTI, nullptr);
4123   BasicBlock *EndBB = ContTI->getSuccessor(0);
4124   EXPECT_TRUE(ContTI->isConditional());
4125   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
4126   EXPECT_NE(EndBB, nullptr);
4127 
4128   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
4129   EXPECT_NE(Phi, nullptr);
4130   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
4131   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
4132   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
4133 
4134   EXPECT_EQ(Sub->getNumUses(), 1U);
4135   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
4136   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
4137 
4138   ExtractValueInst *ExVI1 =
4139       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4140   EXPECT_NE(ExVI1, nullptr);
4141   AtomicCmpXchgInst *CmpExchg =
4142       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4143   EXPECT_NE(CmpExchg, nullptr);
4144   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4145   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4146   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4147 
4148   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4149   EXPECT_NE(Ld, nullptr);
4150   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4151 
4152   Builder.CreateRetVoid();
4153   OMPBuilder.finalize();
4154   EXPECT_FALSE(verifyModule(*M, &errs()));
4155 }
4156 
4157 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
4158   OpenMPIRBuilder OMPBuilder(*M);
4159   OMPBuilder.initialize();
4160   F->setName("func");
4161   IRBuilder<> Builder(BB);
4162 
4163   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4164 
4165   LLVMContext &Ctx = M->getContext();
4166   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4167   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4168   XVal->setName("AtomicVar");
4169   AllocaInst *VVal = Builder.CreateAlloca(Int32);
4170   VVal->setName("AtomicCapTar");
4171   StoreInst *Init =
4172       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4173 
4174   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
4175   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
4176   AtomicOrdering AO = AtomicOrdering::Monotonic;
4177   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4178   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add;
4179   bool IsXLHSInRHSPart = true;
4180   bool IsPostfixUpdate = true;
4181   bool UpdateExpr = true;
4182 
4183   BasicBlock *EntryBB = BB;
4184   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4185                                           EntryBB->getFirstInsertionPt());
4186 
4187   // integer update - not used
4188   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; };
4189 
4190   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4191                        OMPBuilder.createAtomicCapture(
4192                            Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp,
4193                            UpdateExpr, IsPostfixUpdate, IsXLHSInRHSPart));
4194   Builder.restoreIP(AfterIP);
4195   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4196   AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4197   EXPECT_NE(ARWM, nullptr);
4198   EXPECT_EQ(ARWM->getPointerOperand(), XVal);
4199   EXPECT_EQ(ARWM->getOperation(), RMWOp);
4200   StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back());
4201   EXPECT_NE(St, nullptr);
4202   EXPECT_EQ(St->getPointerOperand(), VVal);
4203 
4204   Builder.CreateRetVoid();
4205   OMPBuilder.finalize();
4206   EXPECT_FALSE(verifyModule(*M, &errs()));
4207 }
4208 
4209 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
4210   OpenMPIRBuilder OMPBuilder(*M);
4211   OMPBuilder.initialize();
4212   F->setName("func");
4213   IRBuilder<> Builder(BB);
4214 
4215   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4216 
4217   LLVMContext &Ctx = M->getContext();
4218   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4219   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4220   XVal->setName("x");
4221   StoreInst *Init =
4222       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4223 
4224   OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false};
4225   OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false};
4226   // V and R are not used in atomic compare
4227   OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false};
4228   OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false};
4229   AtomicOrdering AO = AtomicOrdering::Monotonic;
4230   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4231   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4232   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4233   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4234 
4235   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4236       Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false));
4237   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4238       Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false));
4239   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4240       Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false));
4241 
4242   BasicBlock *EntryBB = BB;
4243   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4244   EXPECT_EQ(EntryBB->size(), 5U);
4245 
4246   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4247   EXPECT_NE(ARWM1, nullptr);
4248   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4249   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4250   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4251 
4252   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode());
4253   EXPECT_NE(ARWM2, nullptr);
4254   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4255   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4256   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax);
4257 
4258   AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode());
4259   EXPECT_NE(AXCHG, nullptr);
4260   EXPECT_EQ(AXCHG->getPointerOperand(), XVal);
4261   EXPECT_EQ(AXCHG->getCompareOperand(), Expr);
4262   EXPECT_EQ(AXCHG->getNewValOperand(), D);
4263 
4264   Builder.CreateRetVoid();
4265   OMPBuilder.finalize();
4266   EXPECT_FALSE(verifyModule(*M, &errs()));
4267 }
4268 
4269 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) {
4270   OpenMPIRBuilder OMPBuilder(*M);
4271   OMPBuilder.initialize();
4272   F->setName("func");
4273   IRBuilder<> Builder(BB);
4274 
4275   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4276 
4277   LLVMContext &Ctx = M->getContext();
4278   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4279   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4280   XVal->setName("x");
4281   AllocaInst *VVal = Builder.CreateAlloca(Int32);
4282   VVal->setName("v");
4283   AllocaInst *RVal = Builder.CreateAlloca(Int32);
4284   RVal->setName("r");
4285 
4286   StoreInst *Init =
4287       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4288 
4289   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false};
4290   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
4291   OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false};
4292   OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false};
4293   OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false};
4294 
4295   AtomicOrdering AO = AtomicOrdering::Monotonic;
4296   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4297   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4298   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4299   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4300 
4301   // { cond-update-stmt v = x; }
4302   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4303       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4304       /* IsPostfixUpdate */ false,
4305       /* IsFailOnly */ false));
4306   // { v = x; cond-update-stmt }
4307   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4308       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4309       /* IsPostfixUpdate */ true,
4310       /* IsFailOnly */ false));
4311   // if(x == e) { x = d; } else { v = x; }
4312   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4313       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4314       /* IsPostfixUpdate */ false,
4315       /* IsFailOnly */ true));
4316   // { r = x == e; if(r) { x = d; } }
4317   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4318       Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4319       /* IsPostfixUpdate */ false,
4320       /* IsFailOnly */ false));
4321   // { r = x == e; if(r) { x = d; } else { v = x; } }
4322   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4323       Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4324       /* IsPostfixUpdate */ false,
4325       /* IsFailOnly */ true));
4326 
4327   // { v = x; cond-update-stmt }
4328   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4329       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true,
4330       /* IsPostfixUpdate */ true,
4331       /* IsFailOnly */ false));
4332   // { cond-update-stmt v = x; }
4333   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4334       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false,
4335       /* IsPostfixUpdate */ false,
4336       /* IsFailOnly */ false));
4337 
4338   BasicBlock *EntryBB = BB;
4339   EXPECT_EQ(EntryBB->getParent()->size(), 5U);
4340   BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode());
4341   EXPECT_NE(Cont1, nullptr);
4342   BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode());
4343   EXPECT_NE(Exit1, nullptr);
4344   BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode());
4345   EXPECT_NE(Cont2, nullptr);
4346   BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode());
4347   EXPECT_NE(Exit2, nullptr);
4348 
4349   AtomicCmpXchgInst *CmpXchg1 =
4350       dyn_cast<AtomicCmpXchgInst>(Init->getNextNode());
4351   EXPECT_NE(CmpXchg1, nullptr);
4352   EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal);
4353   EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr);
4354   EXPECT_EQ(CmpXchg1->getNewValOperand(), D);
4355   ExtractValueInst *ExtVal1 =
4356       dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode());
4357   EXPECT_NE(ExtVal1, nullptr);
4358   EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1);
4359   EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U));
4360   ExtractValueInst *ExtVal2 =
4361       dyn_cast<ExtractValueInst>(ExtVal1->getNextNode());
4362   EXPECT_NE(ExtVal2, nullptr);
4363   EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1);
4364   EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U));
4365   SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode());
4366   EXPECT_NE(Sel1, nullptr);
4367   EXPECT_EQ(Sel1->getCondition(), ExtVal2);
4368   EXPECT_EQ(Sel1->getTrueValue(), Expr);
4369   EXPECT_EQ(Sel1->getFalseValue(), ExtVal1);
4370   StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode());
4371   EXPECT_NE(Store1, nullptr);
4372   EXPECT_EQ(Store1->getPointerOperand(), VVal);
4373   EXPECT_EQ(Store1->getValueOperand(), Sel1);
4374 
4375   AtomicCmpXchgInst *CmpXchg2 =
4376       dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode());
4377   EXPECT_NE(CmpXchg2, nullptr);
4378   EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal);
4379   EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr);
4380   EXPECT_EQ(CmpXchg2->getNewValOperand(), D);
4381   ExtractValueInst *ExtVal3 =
4382       dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode());
4383   EXPECT_NE(ExtVal3, nullptr);
4384   EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2);
4385   EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U));
4386   StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode());
4387   EXPECT_NE(Store2, nullptr);
4388   EXPECT_EQ(Store2->getPointerOperand(), VVal);
4389   EXPECT_EQ(Store2->getValueOperand(), ExtVal3);
4390 
4391   AtomicCmpXchgInst *CmpXchg3 =
4392       dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode());
4393   EXPECT_NE(CmpXchg3, nullptr);
4394   EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal);
4395   EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr);
4396   EXPECT_EQ(CmpXchg3->getNewValOperand(), D);
4397   ExtractValueInst *ExtVal4 =
4398       dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode());
4399   EXPECT_NE(ExtVal4, nullptr);
4400   EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3);
4401   EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U));
4402   ExtractValueInst *ExtVal5 =
4403       dyn_cast<ExtractValueInst>(ExtVal4->getNextNode());
4404   EXPECT_NE(ExtVal5, nullptr);
4405   EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3);
4406   EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U));
4407   BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode());
4408   EXPECT_NE(Br1, nullptr);
4409   EXPECT_EQ(Br1->isConditional(), true);
4410   EXPECT_EQ(Br1->getCondition(), ExtVal5);
4411   EXPECT_EQ(Br1->getSuccessor(0), Exit1);
4412   EXPECT_EQ(Br1->getSuccessor(1), Cont1);
4413 
4414   StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front());
4415   EXPECT_NE(Store3, nullptr);
4416   EXPECT_EQ(Store3->getPointerOperand(), VVal);
4417   EXPECT_EQ(Store3->getValueOperand(), ExtVal4);
4418   BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode());
4419   EXPECT_NE(Br2, nullptr);
4420   EXPECT_EQ(Br2->isUnconditional(), true);
4421   EXPECT_EQ(Br2->getSuccessor(0), Exit1);
4422 
4423   AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front());
4424   EXPECT_NE(CmpXchg4, nullptr);
4425   EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal);
4426   EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr);
4427   EXPECT_EQ(CmpXchg4->getNewValOperand(), D);
4428   ExtractValueInst *ExtVal6 =
4429       dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode());
4430   EXPECT_NE(ExtVal6, nullptr);
4431   EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4);
4432   EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U));
4433   ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode());
4434   EXPECT_NE(ZExt1, nullptr);
4435   EXPECT_EQ(ZExt1->getDestTy(), Int32);
4436   StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode());
4437   EXPECT_NE(Store4, nullptr);
4438   EXPECT_EQ(Store4->getPointerOperand(), RVal);
4439   EXPECT_EQ(Store4->getValueOperand(), ZExt1);
4440 
4441   AtomicCmpXchgInst *CmpXchg5 =
4442       dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode());
4443   EXPECT_NE(CmpXchg5, nullptr);
4444   EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal);
4445   EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr);
4446   EXPECT_EQ(CmpXchg5->getNewValOperand(), D);
4447   ExtractValueInst *ExtVal7 =
4448       dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode());
4449   EXPECT_NE(ExtVal7, nullptr);
4450   EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5);
4451   EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U));
4452   ExtractValueInst *ExtVal8 =
4453       dyn_cast<ExtractValueInst>(ExtVal7->getNextNode());
4454   EXPECT_NE(ExtVal8, nullptr);
4455   EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5);
4456   EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U));
4457   BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode());
4458   EXPECT_NE(Br3, nullptr);
4459   EXPECT_EQ(Br3->isConditional(), true);
4460   EXPECT_EQ(Br3->getCondition(), ExtVal8);
4461   EXPECT_EQ(Br3->getSuccessor(0), Exit2);
4462   EXPECT_EQ(Br3->getSuccessor(1), Cont2);
4463 
4464   StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front());
4465   EXPECT_NE(Store5, nullptr);
4466   EXPECT_EQ(Store5->getPointerOperand(), VVal);
4467   EXPECT_EQ(Store5->getValueOperand(), ExtVal7);
4468   BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode());
4469   EXPECT_NE(Br4, nullptr);
4470   EXPECT_EQ(Br4->isUnconditional(), true);
4471   EXPECT_EQ(Br4->getSuccessor(0), Exit2);
4472 
4473   ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front());
4474   EXPECT_NE(ExtVal9, nullptr);
4475   EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5);
4476   EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U));
4477   ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode());
4478   EXPECT_NE(ZExt2, nullptr);
4479   EXPECT_EQ(ZExt2->getDestTy(), Int32);
4480   StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode());
4481   EXPECT_NE(Store6, nullptr);
4482   EXPECT_EQ(Store6->getPointerOperand(), RVal);
4483   EXPECT_EQ(Store6->getValueOperand(), ZExt2);
4484 
4485   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode());
4486   EXPECT_NE(ARWM1, nullptr);
4487   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4488   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4489   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4490   StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode());
4491   EXPECT_NE(Store7, nullptr);
4492   EXPECT_EQ(Store7->getPointerOperand(), VVal);
4493   EXPECT_EQ(Store7->getValueOperand(), ARWM1);
4494 
4495   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode());
4496   EXPECT_NE(ARWM2, nullptr);
4497   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4498   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4499   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max);
4500   CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode());
4501   EXPECT_NE(Cmp1, nullptr);
4502   EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT);
4503   EXPECT_EQ(Cmp1->getOperand(0), ARWM2);
4504   EXPECT_EQ(Cmp1->getOperand(1), Expr);
4505   SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode());
4506   EXPECT_NE(Sel2, nullptr);
4507   EXPECT_EQ(Sel2->getCondition(), Cmp1);
4508   EXPECT_EQ(Sel2->getTrueValue(), Expr);
4509   EXPECT_EQ(Sel2->getFalseValue(), ARWM2);
4510   StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode());
4511   EXPECT_NE(Store8, nullptr);
4512   EXPECT_EQ(Store8->getPointerOperand(), VVal);
4513   EXPECT_EQ(Store8->getValueOperand(), Sel2);
4514 
4515   Builder.CreateRetVoid();
4516   OMPBuilder.finalize();
4517   EXPECT_FALSE(verifyModule(*M, &errs()));
4518 }
4519 
4520 TEST_F(OpenMPIRBuilderTest, CreateTeams) {
4521   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4522   OpenMPIRBuilder OMPBuilder(*M);
4523   OMPBuilder.Config.IsTargetDevice = false;
4524   OMPBuilder.initialize();
4525   F->setName("func");
4526   IRBuilder<> Builder(BB);
4527 
4528   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
4529   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
4530   Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load");
4531 
4532   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4533     Builder.restoreIP(AllocaIP);
4534     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
4535                                                 "bodygen.alloca128");
4536 
4537     Builder.restoreIP(CodeGenIP);
4538     // Loading and storing captured pointer and values
4539     Builder.CreateStore(Val128, Local128);
4540     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
4541                                       "bodygen.load32");
4542 
4543     LoadInst *PrivLoad128 = Builder.CreateLoad(
4544         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
4545     Value *Cmp = Builder.CreateICmpNE(
4546         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
4547     Instruction *ThenTerm, *ElseTerm;
4548     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
4549                                   &ThenTerm, &ElseTerm);
4550     return Error::success();
4551   };
4552 
4553   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4554   ASSERT_EXPECTED_INIT(
4555       OpenMPIRBuilder::InsertPointTy, AfterIP,
4556       OMPBuilder.createTeams(Builder, BodyGenCB, /*NumTeamsLower=*/nullptr,
4557                              /*NumTeamsUpper=*/nullptr,
4558                              /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4559   Builder.restoreIP(AfterIP);
4560 
4561   OMPBuilder.finalize();
4562   Builder.CreateRetVoid();
4563 
4564   EXPECT_FALSE(verifyModule(*M, &errs()));
4565 
4566   CallInst *TeamsForkCall = dyn_cast<CallInst>(
4567       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)
4568           ->user_back());
4569 
4570   // Verify the Ident argument
4571   GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0));
4572   ASSERT_NE(Ident, nullptr);
4573   EXPECT_TRUE(Ident->hasInitializer());
4574   Constant *Initializer = Ident->getInitializer();
4575   GlobalVariable *SrcStrGlob =
4576       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
4577   ASSERT_NE(SrcStrGlob, nullptr);
4578   ConstantDataArray *SrcSrc =
4579       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
4580   ASSERT_NE(SrcSrc, nullptr);
4581 
4582   // Verify the outlined function signature.
4583   Function *OutlinedFn =
4584       dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts());
4585   ASSERT_NE(OutlinedFn, nullptr);
4586   EXPECT_FALSE(OutlinedFn->isDeclaration());
4587   EXPECT_TRUE(OutlinedFn->arg_size() >= 3);
4588   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid
4589   EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid
4590   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
4591             Builder.getPtrTy()); // captured args
4592 
4593   // Check for TruncInst and ICmpInst in the outlined function.
4594   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4595                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
4596   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4597                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
4598 }
4599 
4600 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) {
4601   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4602   OpenMPIRBuilder OMPBuilder(*M);
4603   OMPBuilder.Config.IsTargetDevice = false;
4604   OMPBuilder.initialize();
4605   F->setName("func");
4606   IRBuilder<> &Builder = OMPBuilder.Builder;
4607   Builder.SetInsertPoint(BB);
4608 
4609   Function *FakeFunction =
4610       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4611                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4612 
4613   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4614     Builder.restoreIP(CodeGenIP);
4615     Builder.CreateCall(FakeFunction, {});
4616     return Error::success();
4617   };
4618 
4619   // `F` has an argument - an integer, so we use that as the thread limit.
4620   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4621                        OMPBuilder.createTeams(
4622                            /*=*/Builder, BodyGenCB, /*NumTeamsLower=*/nullptr,
4623                            /*NumTeamsUpper=*/nullptr,
4624                            /*ThreadLimit=*/F->arg_begin(),
4625                            /*IfExpr=*/nullptr));
4626   Builder.restoreIP(AfterIP);
4627 
4628   Builder.CreateRetVoid();
4629   OMPBuilder.finalize();
4630 
4631   ASSERT_FALSE(verifyModule(*M));
4632 
4633   CallInst *PushNumTeamsCallInst =
4634       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4635   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4636 
4637   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0));
4638   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0));
4639   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin());
4640 
4641   // Verifying that the next instruction to execute is kmpc_fork_teams
4642   BranchInst *BrInst =
4643       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4644   ASSERT_NE(BrInst, nullptr);
4645   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4646   BasicBlock::iterator NextInstruction =
4647       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4648   CallInst *ForkTeamsCI = nullptr;
4649   if (NextInstruction != BrInst->getSuccessor(0)->end())
4650     ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4651   ASSERT_NE(ForkTeamsCI, nullptr);
4652   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4653             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4654 }
4655 
4656 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) {
4657   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4658   OpenMPIRBuilder OMPBuilder(*M);
4659   OMPBuilder.Config.IsTargetDevice = false;
4660   OMPBuilder.initialize();
4661   F->setName("func");
4662   IRBuilder<> &Builder = OMPBuilder.Builder;
4663   Builder.SetInsertPoint(BB);
4664 
4665   Function *FakeFunction =
4666       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4667                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4668 
4669   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4670     Builder.restoreIP(CodeGenIP);
4671     Builder.CreateCall(FakeFunction, {});
4672     return Error::success();
4673   };
4674 
4675   // `F` already has an integer argument, so we use that as upper bound to
4676   // `num_teams`
4677   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4678                        OMPBuilder.createTeams(Builder, BodyGenCB,
4679                                               /*NumTeamsLower=*/nullptr,
4680                                               /*NumTeamsUpper=*/F->arg_begin(),
4681                                               /*ThreadLimit=*/nullptr,
4682                                               /*IfExpr=*/nullptr));
4683   Builder.restoreIP(AfterIP);
4684 
4685   Builder.CreateRetVoid();
4686   OMPBuilder.finalize();
4687 
4688   ASSERT_FALSE(verifyModule(*M));
4689 
4690   CallInst *PushNumTeamsCallInst =
4691       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4692   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4693 
4694   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin());
4695   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin());
4696   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4697 
4698   // Verifying that the next instruction to execute is kmpc_fork_teams
4699   BranchInst *BrInst =
4700       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4701   ASSERT_NE(BrInst, nullptr);
4702   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4703   BasicBlock::iterator NextInstruction =
4704       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4705   CallInst *ForkTeamsCI = nullptr;
4706   if (NextInstruction != BrInst->getSuccessor(0)->end())
4707     ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4708   ASSERT_NE(ForkTeamsCI, nullptr);
4709   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4710             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4711 }
4712 
4713 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) {
4714   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4715   OpenMPIRBuilder OMPBuilder(*M);
4716   OMPBuilder.Config.IsTargetDevice = false;
4717   OMPBuilder.initialize();
4718   F->setName("func");
4719   IRBuilder<> &Builder = OMPBuilder.Builder;
4720   Builder.SetInsertPoint(BB);
4721 
4722   Function *FakeFunction =
4723       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4724                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4725 
4726   Value *NumTeamsLower =
4727       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4728   Value *NumTeamsUpper =
4729       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4730 
4731   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4732     Builder.restoreIP(CodeGenIP);
4733     Builder.CreateCall(FakeFunction, {});
4734     return Error::success();
4735   };
4736 
4737   // `F` already has an integer argument, so we use that as upper bound to
4738   // `num_teams`
4739   ASSERT_EXPECTED_INIT(
4740       OpenMPIRBuilder::InsertPointTy, AfterIP,
4741       OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper,
4742                              /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4743   Builder.restoreIP(AfterIP);
4744 
4745   Builder.CreateRetVoid();
4746   OMPBuilder.finalize();
4747 
4748   ASSERT_FALSE(verifyModule(*M));
4749 
4750   CallInst *PushNumTeamsCallInst =
4751       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4752   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4753 
4754   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4755   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4756   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4757 
4758   // Verifying that the next instruction to execute is kmpc_fork_teams
4759   BranchInst *BrInst =
4760       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4761   ASSERT_NE(BrInst, nullptr);
4762   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4763   BasicBlock::iterator NextInstruction =
4764       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4765   CallInst *ForkTeamsCI = nullptr;
4766   if (NextInstruction != BrInst->getSuccessor(0)->end())
4767     ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4768   ASSERT_NE(ForkTeamsCI, nullptr);
4769   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4770             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4771 }
4772 
4773 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) {
4774   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4775   OpenMPIRBuilder OMPBuilder(*M);
4776   OMPBuilder.Config.IsTargetDevice = false;
4777   OMPBuilder.initialize();
4778   F->setName("func");
4779   IRBuilder<> &Builder = OMPBuilder.Builder;
4780   Builder.SetInsertPoint(BB);
4781 
4782   BasicBlock *CodegenBB = splitBB(Builder, true);
4783   Builder.SetInsertPoint(CodegenBB);
4784 
4785   // Generate values for `num_teams` and `thread_limit` using the first argument
4786   // of the testing function.
4787   Value *NumTeamsLower =
4788       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4789   Value *NumTeamsUpper =
4790       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4791   Value *ThreadLimit =
4792       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit");
4793 
4794   Function *FakeFunction =
4795       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4796                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4797 
4798   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4799     Builder.restoreIP(CodeGenIP);
4800     Builder.CreateCall(FakeFunction, {});
4801     return Error::success();
4802   };
4803 
4804   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4805   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4806                        OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
4807                                               NumTeamsUpper, ThreadLimit,
4808                                               nullptr));
4809   Builder.restoreIP(AfterIP);
4810 
4811   Builder.CreateRetVoid();
4812   OMPBuilder.finalize();
4813 
4814   ASSERT_FALSE(verifyModule(*M));
4815 
4816   CallInst *PushNumTeamsCallInst =
4817       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4818   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4819 
4820   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4821   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4822   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit);
4823 
4824   // Verifying that the next instruction to execute is kmpc_fork_teams
4825   BranchInst *BrInst =
4826       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4827   ASSERT_NE(BrInst, nullptr);
4828   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4829   BasicBlock::iterator NextInstruction =
4830       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4831   CallInst *ForkTeamsCI = nullptr;
4832   if (NextInstruction != BrInst->getSuccessor(0)->end())
4833     ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4834   ASSERT_NE(ForkTeamsCI, nullptr);
4835   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4836             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4837 }
4838 
4839 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) {
4840   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4841   OpenMPIRBuilder OMPBuilder(*M);
4842   OMPBuilder.Config.IsTargetDevice = false;
4843   OMPBuilder.initialize();
4844   F->setName("func");
4845   IRBuilder<> &Builder = OMPBuilder.Builder;
4846   Builder.SetInsertPoint(BB);
4847 
4848   Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(),
4849                                      Builder.CreateAlloca(Builder.getInt1Ty()));
4850 
4851   Function *FakeFunction =
4852       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4853                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4854 
4855   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4856     Builder.restoreIP(CodeGenIP);
4857     Builder.CreateCall(FakeFunction, {});
4858     return Error::success();
4859   };
4860 
4861   // `F` already has an integer argument, so we use that as upper bound to
4862   // `num_teams`
4863   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4864                        OMPBuilder.createTeams(Builder, BodyGenCB,
4865                                               /*NumTeamsLower=*/nullptr,
4866                                               /*NumTeamsUpper=*/nullptr,
4867                                               /*ThreadLimit=*/nullptr, IfExpr));
4868   Builder.restoreIP(AfterIP);
4869 
4870   Builder.CreateRetVoid();
4871   OMPBuilder.finalize();
4872 
4873   ASSERT_FALSE(verifyModule(*M));
4874 
4875   CallInst *PushNumTeamsCallInst =
4876       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4877   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4878   Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2);
4879   Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3);
4880   Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4);
4881 
4882   // Check the lower_bound
4883   ASSERT_NE(NumTeamsLower, nullptr);
4884   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower);
4885   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4886   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr);
4887   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0));
4888   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4889 
4890   // Check the upper_bound
4891   ASSERT_NE(NumTeamsUpper, nullptr);
4892   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper);
4893   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4894   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr);
4895   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0));
4896   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4897 
4898   // Check thread_limit
4899   EXPECT_EQ(ThreadLimit, Builder.getInt32(0));
4900 }
4901 
4902 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) {
4903   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4904   OpenMPIRBuilder OMPBuilder(*M);
4905   OMPBuilder.Config.IsTargetDevice = false;
4906   OMPBuilder.initialize();
4907   F->setName("func");
4908   IRBuilder<> &Builder = OMPBuilder.Builder;
4909   Builder.SetInsertPoint(BB);
4910 
4911   Value *IfExpr = Builder.CreateLoad(
4912       Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty()));
4913   Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5));
4914   Value *NumTeamsUpper =
4915       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10));
4916   Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20));
4917 
4918   Function *FakeFunction =
4919       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4920                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4921 
4922   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4923     Builder.restoreIP(CodeGenIP);
4924     Builder.CreateCall(FakeFunction, {});
4925     return Error::success();
4926   };
4927 
4928   // `F` already has an integer argument, so we use that as upper bound to
4929   // `num_teams`
4930   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4931                        OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
4932                                               NumTeamsUpper, ThreadLimit,
4933                                               IfExpr));
4934   Builder.restoreIP(AfterIP);
4935 
4936   Builder.CreateRetVoid();
4937   OMPBuilder.finalize();
4938 
4939   ASSERT_FALSE(verifyModule(*M));
4940 
4941   CallInst *PushNumTeamsCallInst =
4942       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4943   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4944   Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2);
4945   Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3);
4946   Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4);
4947 
4948   // Get the boolean conversion of if expression
4949   ASSERT_EQ(IfExpr->getNumUses(), 1U);
4950   User *IfExprInst = IfExpr->user_back();
4951   ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst);
4952   ASSERT_NE(IfExprCmpInst, nullptr);
4953   EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE);
4954   EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr);
4955   EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0));
4956 
4957   // Check the lower_bound
4958   ASSERT_NE(NumTeamsLowerArg, nullptr);
4959   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg);
4960   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4961   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst);
4962   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower);
4963   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4964 
4965   // Check the upper_bound
4966   ASSERT_NE(NumTeamsUpperArg, nullptr);
4967   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg);
4968   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4969   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst);
4970   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper);
4971   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4972 
4973   // Check thread_limit
4974   EXPECT_EQ(ThreadLimitArg, ThreadLimit);
4975 }
4976 
4977 /// Returns the single instruction of InstTy type in BB that uses the value V.
4978 /// If there is more than one such instruction, returns null.
4979 template <typename InstTy>
4980 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) {
4981   InstTy *Result = nullptr;
4982   for (User *U : V->users()) {
4983     auto *Inst = dyn_cast<InstTy>(U);
4984     if (!Inst || Inst->getParent() != BB)
4985       continue;
4986     if (Result) {
4987       if (auto *SI = dyn_cast<StoreInst>(Inst)) {
4988         if (V == SI->getValueOperand())
4989           continue;
4990       } else {
4991         return nullptr;
4992       }
4993     }
4994     Result = Inst;
4995   }
4996   return Result;
4997 }
4998 
4999 /// Returns true if BB contains a simple binary reduction that loads a value
5000 /// from Accum, performs some binary operation with it, and stores it back to
5001 /// Accum.
5002 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB,
5003                                     Instruction::BinaryOps *OpCode = nullptr) {
5004   StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB);
5005   if (!Store)
5006     return false;
5007   auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0));
5008   if (!Stored)
5009     return false;
5010   if (OpCode && *OpCode != Stored->getOpcode())
5011     return false;
5012   auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0));
5013   return Load && Load->getOperand(0) == Accum;
5014 }
5015 
5016 /// Returns true if BB contains a binary reduction that reduces V using a binary
5017 /// operator into an accumulator that is a function argument.
5018 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
5019   auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB);
5020   if (!ReductionOp)
5021     return false;
5022 
5023   auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0));
5024   if (!GlobalLoad)
5025     return false;
5026 
5027   auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB);
5028   if (!Store)
5029     return false;
5030 
5031   return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
5032          isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand()));
5033 }
5034 
5035 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
5036 /// [0, 1], respectively, and assigns results of these instructions to Zero and
5037 /// One. Returns true on success, false on failure or if such instructions are
5038 /// not unique among the users of Ptr.
5039 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) {
5040   Zero = nullptr;
5041   One = nullptr;
5042   for (User *U : Ptr->users()) {
5043     if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
5044       if (GEP->getNumIndices() != 2)
5045         continue;
5046       auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
5047       auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2));
5048       EXPECT_NE(FirstIdx, nullptr);
5049       EXPECT_NE(SecondIdx, nullptr);
5050 
5051       EXPECT_TRUE(FirstIdx->isZero());
5052       if (SecondIdx->isZero()) {
5053         if (Zero)
5054           return false;
5055         Zero = GEP;
5056       } else if (SecondIdx->isOne()) {
5057         if (One)
5058           return false;
5059         One = GEP;
5060       } else {
5061         return false;
5062       }
5063     }
5064   }
5065   return Zero != nullptr && One != nullptr;
5066 }
5067 
5068 static OpenMPIRBuilder::InsertPointTy
5069 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
5070              Value *&Result) {
5071   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5072   Result = Builder.CreateFAdd(LHS, RHS, "red.add");
5073   return Builder.saveIP();
5074 }
5075 
5076 static OpenMPIRBuilder::InsertPointTy
5077 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
5078                    Value *RHS) {
5079   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5080   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
5081   Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt,
5082                           AtomicOrdering::Monotonic);
5083   return Builder.saveIP();
5084 }
5085 
5086 static OpenMPIRBuilder::InsertPointTy
5087 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
5088              Value *&Result) {
5089   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5090   Result = Builder.CreateXor(LHS, RHS, "red.xor");
5091   return Builder.saveIP();
5092 }
5093 
5094 static OpenMPIRBuilder::InsertPointTy
5095 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
5096                    Value *RHS) {
5097   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5098   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
5099   Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt,
5100                           AtomicOrdering::Monotonic);
5101   return Builder.saveIP();
5102 }
5103 
5104 TEST_F(OpenMPIRBuilderTest, CreateReductions) {
5105   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5106   OpenMPIRBuilder OMPBuilder(*M);
5107   OMPBuilder.Config.IsTargetDevice = false;
5108   OMPBuilder.initialize();
5109   F->setName("func");
5110   IRBuilder<> Builder(BB);
5111 
5112   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
5113   Builder.CreateBr(EnterBB);
5114   Builder.SetInsertPoint(EnterBB);
5115   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5116 
5117   // Create variables to be reduced.
5118   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
5119                               F->getEntryBlock().getFirstInsertionPt());
5120   Type *SumType = Builder.getFloatTy();
5121   Type *XorType = Builder.getInt32Ty();
5122   Value *SumReduced;
5123   Value *XorReduced;
5124   {
5125     IRBuilderBase::InsertPointGuard Guard(Builder);
5126     Builder.restoreIP(OuterAllocaIP);
5127     SumReduced = Builder.CreateAlloca(SumType);
5128     XorReduced = Builder.CreateAlloca(XorType);
5129   }
5130 
5131   // Store initial values of reductions into global variables.
5132   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
5133   Builder.CreateStore(Builder.getInt32(1), XorReduced);
5134 
5135   // The loop body computes two reductions:
5136   //   sum of (float) thread-id;
5137   //   xor of thread-id;
5138   // and store the result in global variables.
5139   InsertPointTy BodyIP, BodyAllocaIP;
5140   auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) {
5141     IRBuilderBase::InsertPointGuard Guard(Builder);
5142     Builder.restoreIP(CodeGenIP);
5143 
5144     uint32_t StrSize;
5145     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5146     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5147     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5148     Value *SumLocal =
5149         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
5150     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
5151     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
5152     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
5153     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
5154     Builder.CreateStore(Sum, SumReduced);
5155     Builder.CreateStore(Xor, XorReduced);
5156 
5157     BodyIP = Builder.saveIP();
5158     BodyAllocaIP = InnerAllocaIP;
5159     return Error::success();
5160   };
5161 
5162   // Privatization for reduction creates local copies of reduction variables and
5163   // initializes them to reduction-neutral values.
5164   Value *SumPrivatized;
5165   Value *XorPrivatized;
5166   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
5167                     Value &Original, Value &Inner, Value *&ReplVal) {
5168     IRBuilderBase::InsertPointGuard Guard(Builder);
5169     Builder.restoreIP(InnerAllocaIP);
5170     if (&Original == SumReduced) {
5171       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
5172       ReplVal = SumPrivatized;
5173     } else if (&Original == XorReduced) {
5174       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
5175       ReplVal = XorPrivatized;
5176     } else {
5177       ReplVal = &Inner;
5178       return CodeGenIP;
5179     }
5180 
5181     Builder.restoreIP(CodeGenIP);
5182     if (&Original == SumReduced)
5183       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
5184                           SumPrivatized);
5185     else if (&Original == XorReduced)
5186       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
5187 
5188     return Builder.saveIP();
5189   };
5190 
5191   // Do nothing in finalization.
5192   auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
5193 
5194   ASSERT_EXPECTED_INIT(
5195       OpenMPIRBuilder::InsertPointTy, AfterIP,
5196       OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
5197                                 /* IfCondition */ nullptr,
5198                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5199                                 /* IsCancellable */ false));
5200   Builder.restoreIP(AfterIP);
5201 
5202   OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
5203       {SumType, SumReduced, SumPrivatized,
5204        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
5205        /*ReductionGenClang=*/nullptr, sumAtomicReduction},
5206       {XorType, XorReduced, XorPrivatized,
5207        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
5208        /*ReductionGenClang=*/nullptr, xorAtomicReduction}};
5209   OMPBuilder.Config.setIsGPU(false);
5210 
5211   bool ReduceVariableByRef[] = {false, false};
5212   ASSERT_THAT_EXPECTED(OMPBuilder.createReductions(BodyIP, BodyAllocaIP,
5213                                                    ReductionInfos,
5214                                                    ReduceVariableByRef),
5215                        Succeeded());
5216 
5217   Builder.restoreIP(AfterIP);
5218   Builder.CreateRetVoid();
5219 
5220   OMPBuilder.finalize(F);
5221 
5222   // The IR must be valid.
5223   EXPECT_FALSE(verifyModule(*M));
5224 
5225   // Outlining must have happened.
5226   SmallVector<CallInst *> ForkCalls;
5227   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5228             ForkCalls);
5229   ASSERT_EQ(ForkCalls.size(), 1u);
5230   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5231   Function *Outlined = dyn_cast<Function>(CalleeVal);
5232   EXPECT_NE(Outlined, nullptr);
5233 
5234   // Check that the lock variable was created with the expected name.
5235   GlobalVariable *LockVar =
5236       M->getGlobalVariable(".gomp_critical_user_.reduction.var");
5237   EXPECT_NE(LockVar, nullptr);
5238 
5239   // Find the allocation of a local array that will be used to call the runtime
5240   // reduciton function.
5241   BasicBlock &AllocBlock = Outlined->getEntryBlock();
5242   Value *LocalArray = nullptr;
5243   for (Instruction &I : AllocBlock) {
5244     if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) {
5245       if (!Alloc->getAllocatedType()->isArrayTy() ||
5246           !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy())
5247         continue;
5248       LocalArray = Alloc;
5249       break;
5250     }
5251   }
5252   ASSERT_NE(LocalArray, nullptr);
5253 
5254   // Find the call to the runtime reduction function.
5255   BasicBlock *BB = AllocBlock.getUniqueSuccessor();
5256   Value *LocalArrayPtr = nullptr;
5257   Value *ReductionFnVal = nullptr;
5258   Value *SwitchArg = nullptr;
5259   for (Instruction &I : *BB) {
5260     if (CallInst *Call = dyn_cast<CallInst>(&I)) {
5261       if (Call->getCalledFunction() !=
5262           OMPBuilder.getOrCreateRuntimeFunctionPtr(
5263               RuntimeFunction::OMPRTL___kmpc_reduce))
5264         continue;
5265       LocalArrayPtr = Call->getOperand(4);
5266       ReductionFnVal = Call->getOperand(5);
5267       SwitchArg = Call;
5268       break;
5269     }
5270   }
5271 
5272   // Check that the local array is passed to the function.
5273   ASSERT_NE(LocalArrayPtr, nullptr);
5274   EXPECT_EQ(LocalArrayPtr, LocalArray);
5275 
5276   // Find the GEP instructions preceding stores to the local array.
5277   Value *FirstArrayElemPtr = nullptr;
5278   Value *SecondArrayElemPtr = nullptr;
5279   EXPECT_EQ(LocalArray->getNumUses(), 3u);
5280   ASSERT_TRUE(
5281       findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr));
5282 
5283   // Check that the values stored into the local array are privatized reduction
5284   // variables.
5285   auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>(
5286       findStoredValue<GetElementPtrInst>(FirstArrayElemPtr));
5287   auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>(
5288       findStoredValue<GetElementPtrInst>(SecondArrayElemPtr));
5289   ASSERT_NE(FirstPrivatized, nullptr);
5290   ASSERT_NE(SecondPrivatized, nullptr);
5291   ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr));
5292   EXPECT_TRUE(isSimpleBinaryReduction(
5293       FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5294   EXPECT_TRUE(isSimpleBinaryReduction(
5295       SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5296 
5297   // Check that the result of the runtime reduction call is used for further
5298   // dispatch.
5299   ASSERT_EQ(SwitchArg->getNumUses(), 1u);
5300   SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin());
5301   ASSERT_NE(Switch, nullptr);
5302   EXPECT_EQ(Switch->getNumSuccessors(), 3u);
5303   BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor();
5304   BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor();
5305 
5306   // Non-atomic block contains reductions to the global reduction variable,
5307   // which is passed into the outlined function as an argument.
5308   Value *FirstLoad =
5309       findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB);
5310   Value *SecondLoad =
5311       findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB);
5312   EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB));
5313   EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB));
5314 
5315   // Atomic block also constains reductions to the global reduction variable.
5316   FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB);
5317   SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB);
5318   auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB);
5319   auto *SecondAtomic =
5320       findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
5321   ASSERT_NE(FirstAtomic, nullptr);
5322   Value *AtomicStorePointer = FirstAtomic->getPointerOperand();
5323   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5324   ASSERT_NE(SecondAtomic, nullptr);
5325   AtomicStorePointer = SecondAtomic->getPointerOperand();
5326   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5327 
5328   // Check that the separate reduction function also performs (non-atomic)
5329   // reductions after extracting reduction variables from its arguments.
5330   Function *ReductionFn = cast<Function>(ReductionFnVal);
5331   BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock();
5332   Value *FirstLHSPtr;
5333   Value *SecondLHSPtr;
5334   ASSERT_TRUE(
5335       findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr));
5336   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5337   ASSERT_NE(Opaque, nullptr);
5338   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5339   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5340   ASSERT_NE(Opaque, nullptr);
5341   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5342 
5343   Value *FirstRHS;
5344   Value *SecondRHS;
5345   EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
5346 }
5347 
5348 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
5349   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5350   OpenMPIRBuilder OMPBuilder(*M);
5351   OMPBuilder.Config.IsTargetDevice = false;
5352   OMPBuilder.initialize();
5353   F->setName("func");
5354   IRBuilder<> Builder(BB);
5355 
5356   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
5357   Builder.CreateBr(EnterBB);
5358   Builder.SetInsertPoint(EnterBB);
5359   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5360 
5361   // Create variables to be reduced.
5362   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
5363                               F->getEntryBlock().getFirstInsertionPt());
5364   Type *SumType = Builder.getFloatTy();
5365   Type *XorType = Builder.getInt32Ty();
5366   Value *SumReduced;
5367   Value *XorReduced;
5368   {
5369     IRBuilderBase::InsertPointGuard Guard(Builder);
5370     Builder.restoreIP(OuterAllocaIP);
5371     SumReduced = Builder.CreateAlloca(SumType);
5372     XorReduced = Builder.CreateAlloca(XorType);
5373   }
5374 
5375   // Store initial values of reductions into global variables.
5376   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
5377   Builder.CreateStore(Builder.getInt32(1), XorReduced);
5378 
5379   InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
5380   auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5381                             InsertPointTy CodeGenIP) {
5382     IRBuilderBase::InsertPointGuard Guard(Builder);
5383     Builder.restoreIP(CodeGenIP);
5384 
5385     uint32_t StrSize;
5386     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5387     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5388     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5389     Value *SumLocal =
5390         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
5391     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
5392     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
5393     Builder.CreateStore(Sum, SumReduced);
5394 
5395     FirstBodyIP = Builder.saveIP();
5396     FirstBodyAllocaIP = InnerAllocaIP;
5397     return Error::success();
5398   };
5399 
5400   InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
5401   auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5402                              InsertPointTy CodeGenIP) {
5403     IRBuilderBase::InsertPointGuard Guard(Builder);
5404     Builder.restoreIP(CodeGenIP);
5405 
5406     uint32_t StrSize;
5407     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5408     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5409     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5410     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
5411     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
5412     Builder.CreateStore(Xor, XorReduced);
5413 
5414     SecondBodyIP = Builder.saveIP();
5415     SecondBodyAllocaIP = InnerAllocaIP;
5416     return Error::success();
5417   };
5418 
5419   // Privatization for reduction creates local copies of reduction variables and
5420   // initializes them to reduction-neutral values. The same privatization
5421   // callback is used for both loops, with dispatch based on the value being
5422   // privatized.
5423   Value *SumPrivatized;
5424   Value *XorPrivatized;
5425   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
5426                     Value &Original, Value &Inner, Value *&ReplVal) {
5427     IRBuilderBase::InsertPointGuard Guard(Builder);
5428     Builder.restoreIP(InnerAllocaIP);
5429     if (&Original == SumReduced) {
5430       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
5431       ReplVal = SumPrivatized;
5432     } else if (&Original == XorReduced) {
5433       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
5434       ReplVal = XorPrivatized;
5435     } else {
5436       ReplVal = &Inner;
5437       return CodeGenIP;
5438     }
5439 
5440     Builder.restoreIP(CodeGenIP);
5441     if (&Original == SumReduced)
5442       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
5443                           SumPrivatized);
5444     else if (&Original == XorReduced)
5445       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
5446 
5447     return Builder.saveIP();
5448   };
5449 
5450   // Do nothing in finalization.
5451   auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
5452 
5453   ASSERT_EXPECTED_INIT(
5454       OpenMPIRBuilder::InsertPointTy, AfterIP1,
5455       OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
5456                                 FiniCB, /* IfCondition */ nullptr,
5457                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5458                                 /* IsCancellable */ false));
5459   Builder.restoreIP(AfterIP1);
5460   ASSERT_EXPECTED_INIT(
5461       OpenMPIRBuilder::InsertPointTy, AfterIP2,
5462       OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP,
5463                                 SecondBodyGenCB, PrivCB, FiniCB,
5464                                 /* IfCondition */ nullptr,
5465                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5466                                 /* IsCancellable */ false));
5467   Builder.restoreIP(AfterIP2);
5468 
5469   OMPBuilder.Config.setIsGPU(false);
5470   bool ReduceVariableByRef[] = {false};
5471 
5472   ASSERT_THAT_EXPECTED(
5473       OMPBuilder.createReductions(
5474           FirstBodyIP, FirstBodyAllocaIP,
5475           {{SumType, SumReduced, SumPrivatized,
5476             /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
5477             /*ReductionGenClang=*/nullptr, sumAtomicReduction}},
5478           ReduceVariableByRef),
5479       Succeeded());
5480   ASSERT_THAT_EXPECTED(
5481       OMPBuilder.createReductions(
5482           SecondBodyIP, SecondBodyAllocaIP,
5483           {{XorType, XorReduced, XorPrivatized,
5484             /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
5485             /*ReductionGenClang=*/nullptr, xorAtomicReduction}},
5486           ReduceVariableByRef),
5487       Succeeded());
5488 
5489   Builder.restoreIP(AfterIP2);
5490   Builder.CreateRetVoid();
5491 
5492   OMPBuilder.finalize(F);
5493 
5494   // The IR must be valid.
5495   EXPECT_FALSE(verifyModule(*M));
5496 
5497   // Two different outlined functions must have been created.
5498   SmallVector<CallInst *> ForkCalls;
5499   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5500             ForkCalls);
5501   ASSERT_EQ(ForkCalls.size(), 2u);
5502   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5503   Function *FirstCallee = cast<Function>(CalleeVal);
5504   CalleeVal = ForkCalls[1]->getOperand(2);
5505   Function *SecondCallee = cast<Function>(CalleeVal);
5506   EXPECT_NE(FirstCallee, SecondCallee);
5507 
5508   // Two different reduction functions must have been created.
5509   SmallVector<CallInst *> ReduceCalls;
5510   findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder,
5511             ReduceCalls);
5512   ASSERT_EQ(ReduceCalls.size(), 1u);
5513   auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5514   ReduceCalls.clear();
5515   findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce,
5516             OMPBuilder, ReduceCalls);
5517   auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5518   EXPECT_NE(AddReduction, XorReduction);
5519 
5520   // Each reduction function does its own kind of reduction.
5521   BasicBlock *FnReductionBB = &AddReduction->getEntryBlock();
5522   Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5523       AddReduction->getArg(0), FnReductionBB);
5524   ASSERT_NE(FirstLHSPtr, nullptr);
5525   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5526   ASSERT_NE(Opaque, nullptr);
5527   Instruction::BinaryOps Opcode = Instruction::FAdd;
5528   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5529 
5530   FnReductionBB = &XorReduction->getEntryBlock();
5531   Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5532       XorReduction->getArg(0), FnReductionBB);
5533   ASSERT_NE(FirstLHSPtr, nullptr);
5534   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5535   ASSERT_NE(Opaque, nullptr);
5536   Opcode = Instruction::Xor;
5537   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5538 }
5539 
5540 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
5541   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5542   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5543   OpenMPIRBuilder OMPBuilder(*M);
5544   OMPBuilder.initialize();
5545   F->setName("func");
5546   IRBuilder<> Builder(BB);
5547 
5548   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5549   Builder.CreateBr(EnterBB);
5550   Builder.SetInsertPoint(EnterBB);
5551   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5552 
5553   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5554   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5555 
5556   auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
5557   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
5558     return Error::success();
5559   };
5560   SectionCBVector.push_back(SectionCB);
5561 
5562   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5563                    llvm::Value &, llvm::Value &Val,
5564                    llvm::Value *&ReplVal) { return CodeGenIP; };
5565   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5566                                     F->getEntryBlock().getFirstInsertionPt());
5567   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5568                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5569                                                  PrivCB, FiniCB, false, false));
5570   Builder.restoreIP(AfterIP);
5571   Builder.CreateRetVoid(); // Required at the end of the function
5572   EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr);
5573   EXPECT_FALSE(verifyModule(*M, &errs()));
5574 }
5575 
5576 TEST_F(OpenMPIRBuilderTest, CreateSections) {
5577   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5578   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5579   OpenMPIRBuilder OMPBuilder(*M);
5580   OMPBuilder.initialize();
5581   F->setName("func");
5582   IRBuilder<> Builder(BB);
5583 
5584   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5585   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5586   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5587 
5588   BasicBlock *SwitchBB = nullptr;
5589   AllocaInst *PrivAI = nullptr;
5590   SwitchInst *Switch = nullptr;
5591 
5592   unsigned NumBodiesGenerated = 0;
5593   unsigned NumFiniCBCalls = 0;
5594   PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
5595 
5596   auto FiniCB = [&](InsertPointTy IP) {
5597     ++NumFiniCBCalls;
5598     BasicBlock *IPBB = IP.getBlock();
5599     EXPECT_NE(IPBB->end(), IP.getPoint());
5600   };
5601 
5602   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
5603     ++NumBodiesGenerated;
5604     CaseBBs.push_back(CodeGenIP.getBlock());
5605     SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
5606     Builder.restoreIP(CodeGenIP);
5607     Builder.CreateStore(F->arg_begin(), PrivAI);
5608     Value *PrivLoad =
5609         Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca");
5610     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
5611     return Error::success();
5612   };
5613   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5614                    llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
5615     // TODO: Privatization not implemented yet
5616     return CodeGenIP;
5617   };
5618 
5619   SectionCBVector.push_back(SectionCB);
5620   SectionCBVector.push_back(SectionCB);
5621 
5622   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5623                                     F->getEntryBlock().getFirstInsertionPt());
5624   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5625                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5626                                                  PrivCB, FINICB_WRAPPER(FiniCB),
5627                                                  false, false));
5628   Builder.restoreIP(AfterIP);
5629   Builder.CreateRetVoid(); // Required at the end of the function
5630 
5631   // Switch BB's predecessor is loop condition BB, whose successor at index 1 is
5632   // loop's exit BB
5633   BasicBlock *ForExitBB =
5634       SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1);
5635   EXPECT_NE(ForExitBB, nullptr);
5636 
5637   EXPECT_NE(PrivAI, nullptr);
5638   Function *OutlinedFn = PrivAI->getFunction();
5639   EXPECT_EQ(F, OutlinedFn);
5640   EXPECT_FALSE(verifyModule(*M, &errs()));
5641   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
5642 
5643   BasicBlock *LoopPreheaderBB =
5644       OutlinedFn->getEntryBlock().getSingleSuccessor();
5645   // loop variables are 5 - lower bound, upper bound, stride, islastiter, and
5646   // iterator/counter
5647   bool FoundForInit = false;
5648   for (Instruction &Inst : *LoopPreheaderBB) {
5649     if (isa<CallInst>(Inst)) {
5650       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5651           "__kmpc_for_static_init_4u") {
5652         FoundForInit = true;
5653       }
5654     }
5655   }
5656   EXPECT_EQ(FoundForInit, true);
5657 
5658   bool FoundForExit = false;
5659   bool FoundBarrier = false;
5660   for (Instruction &Inst : *ForExitBB) {
5661     if (isa<CallInst>(Inst)) {
5662       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5663           "__kmpc_for_static_fini") {
5664         FoundForExit = true;
5665       }
5666       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5667           "__kmpc_barrier") {
5668         FoundBarrier = true;
5669       }
5670       if (FoundForExit && FoundBarrier)
5671         break;
5672     }
5673   }
5674   EXPECT_EQ(FoundForExit, true);
5675   EXPECT_EQ(FoundBarrier, true);
5676 
5677   EXPECT_NE(SwitchBB, nullptr);
5678   EXPECT_NE(SwitchBB->getTerminator(), nullptr);
5679   EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true);
5680   Switch = cast<SwitchInst>(SwitchBB->getTerminator());
5681   EXPECT_EQ(Switch->getNumCases(), 2U);
5682 
5683   EXPECT_EQ(CaseBBs.size(), 2U);
5684   for (auto *&CaseBB : CaseBBs) {
5685     EXPECT_EQ(CaseBB->getParent(), OutlinedFn);
5686   }
5687 
5688   ASSERT_EQ(NumBodiesGenerated, 2U);
5689   ASSERT_EQ(NumFiniCBCalls, 1U);
5690   EXPECT_FALSE(verifyModule(*M, &errs()));
5691 }
5692 
5693 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) {
5694   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5695   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5696   OpenMPIRBuilder OMPBuilder(*M);
5697   OMPBuilder.initialize();
5698   F->setName("func");
5699   IRBuilder<> Builder(BB);
5700 
5701   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5702   Builder.CreateBr(EnterBB);
5703   Builder.SetInsertPoint(EnterBB);
5704   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5705 
5706   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5707                                     F->getEntryBlock().getFirstInsertionPt());
5708   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5709   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5710                    llvm::Value &, llvm::Value &Val,
5711                    llvm::Value *&ReplVal) { return CodeGenIP; };
5712   auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
5713 
5714   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5715                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5716                                                  PrivCB, FiniCB, false, true));
5717   Builder.restoreIP(AfterIP);
5718   Builder.CreateRetVoid(); // Required at the end of the function
5719   for (auto &Inst : instructions(*F)) {
5720     EXPECT_FALSE(isa<CallInst>(Inst) &&
5721                  cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5722                      "__kmpc_barrier" &&
5723                  "call to function __kmpc_barrier found with nowait");
5724   }
5725 }
5726 
5727 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) {
5728   OpenMPIRBuilder OMPBuilder(*M);
5729   OMPBuilder.initialize();
5730 
5731   IRBuilder<> Builder(BB);
5732 
5733   SmallVector<uint64_t> Mappings = {0, 1};
5734   GlobalVariable *OffloadMaptypesGlobal =
5735       OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes");
5736   EXPECT_FALSE(M->global_empty());
5737   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes");
5738   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5739   EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5740   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5741   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5742   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5743   EXPECT_TRUE(isa<ConstantDataArray>(Initializer));
5744   ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer);
5745   EXPECT_EQ(MappingInit->getNumElements(), Mappings.size());
5746   EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64));
5747   Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings);
5748   EXPECT_EQ(MappingInit, CA);
5749 }
5750 
5751 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) {
5752   OpenMPIRBuilder OMPBuilder(*M);
5753   OMPBuilder.initialize();
5754 
5755   IRBuilder<> Builder(BB);
5756 
5757   uint32_t StrSize;
5758   Constant *Cst1 =
5759       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5760   Constant *Cst2 =
5761       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5762   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5763 
5764   GlobalVariable *OffloadMaptypesGlobal =
5765       OMPBuilder.createOffloadMapnames(Names, "offload_mapnames");
5766   EXPECT_FALSE(M->global_empty());
5767   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames");
5768   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5769   EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5770   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5771   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5772   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5773   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts()));
5774   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts()));
5775 
5776   GlobalVariable *Name1Gbl =
5777       cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts());
5778   EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer()));
5779   ConstantDataArray *Name1GblCA =
5780       dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer());
5781   EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;");
5782 
5783   GlobalVariable *Name2Gbl =
5784       cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts());
5785   EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer()));
5786   ConstantDataArray *Name2GblCA =
5787       dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer());
5788   EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;");
5789 
5790   EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy());
5791   EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size());
5792 }
5793 
5794 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
5795   OpenMPIRBuilder OMPBuilder(*M);
5796   OMPBuilder.initialize();
5797   F->setName("func");
5798   IRBuilder<> Builder(BB);
5799 
5800   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5801 
5802   unsigned TotalNbOperand = 2;
5803 
5804   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5805   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5806                                     F->getEntryBlock().getFirstInsertionPt());
5807   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5808   EXPECT_NE(MapperAllocas.ArgsBase, nullptr);
5809   EXPECT_NE(MapperAllocas.Args, nullptr);
5810   EXPECT_NE(MapperAllocas.ArgSizes, nullptr);
5811   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy());
5812   ArrayType *ArrType =
5813       dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType());
5814   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5815   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
5816                   ->getArrayElementType()
5817                   ->isPointerTy());
5818 
5819   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
5820   ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType());
5821   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5822   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
5823                   ->getArrayElementType()
5824                   ->isPointerTy());
5825 
5826   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
5827   ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType());
5828   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5829   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()
5830                   ->getArrayElementType()
5831                   ->isIntegerTy(64));
5832 }
5833 
5834 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) {
5835   OpenMPIRBuilder OMPBuilder(*M);
5836   OMPBuilder.initialize();
5837   F->setName("func");
5838   IRBuilder<> Builder(BB);
5839   LLVMContext &Ctx = M->getContext();
5840 
5841   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5842 
5843   unsigned TotalNbOperand = 2;
5844 
5845   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5846   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5847                                     F->getEntryBlock().getFirstInsertionPt());
5848   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5849 
5850   auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr(
5851       omp::OMPRTL___tgt_target_data_begin_mapper);
5852 
5853   SmallVector<uint64_t> Flags = {0, 2};
5854 
5855   uint32_t StrSize;
5856   Constant *SrcLocCst =
5857       OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize);
5858   Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize);
5859 
5860   Constant *Cst1 =
5861       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5862   Constant *Cst2 =
5863       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5864   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5865 
5866   GlobalVariable *Maptypes =
5867       OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes");
5868   Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32(
5869       ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes,
5870       /*Idx0=*/0, /*Idx1=*/0);
5871 
5872   GlobalVariable *Mapnames =
5873       OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames");
5874   Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32(
5875       ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames,
5876       /*Idx0=*/0, /*Idx1=*/0);
5877 
5878   OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo,
5879                             MaptypesArg, MapnamesArg, MapperAllocas, -1,
5880                             TotalNbOperand);
5881 
5882   CallInst *MapperCall = dyn_cast<CallInst>(&BB->back());
5883   EXPECT_NE(MapperCall, nullptr);
5884   EXPECT_EQ(MapperCall->arg_size(), 9U);
5885   EXPECT_EQ(MapperCall->getCalledFunction()->getName(),
5886             "__tgt_target_data_begin_mapper");
5887   EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo);
5888   EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64));
5889   EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32));
5890 
5891   EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg);
5892   EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg);
5893   EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy());
5894 }
5895 
5896 TEST_F(OpenMPIRBuilderTest, TargetEnterData) {
5897   OpenMPIRBuilder OMPBuilder(*M);
5898   OMPBuilder.initialize();
5899   F->setName("func");
5900   IRBuilder<> Builder(BB);
5901   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5902 
5903   int64_t DeviceID = 2;
5904 
5905   AllocaInst *Val1 =
5906       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5907   ASSERT_NE(Val1, nullptr);
5908 
5909   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5910                                     F->getEntryBlock().getFirstInsertionPt());
5911 
5912   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5913   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5914   auto GenMapInfoCB =
5915       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5916     // Get map clause information.
5917     Builder.restoreIP(codeGenIP);
5918 
5919     CombinedInfo.BasePointers.emplace_back(Val1);
5920     CombinedInfo.Pointers.emplace_back(Val1);
5921     CombinedInfo.DevicePointers.emplace_back(
5922         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5923     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5924     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1));
5925     uint32_t temp;
5926     CombinedInfo.Names.emplace_back(
5927         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5928     return CombinedInfo;
5929   };
5930 
5931   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5932       /*RequiresDevicePointerInfo=*/false,
5933       /*SeparateBeginEndCalls=*/true);
5934 
5935   OMPBuilder.Config.setIsGPU(true);
5936 
5937   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper;
5938   ASSERT_EXPECTED_INIT(
5939       OpenMPIRBuilder::InsertPointTy, AfterIP,
5940       OMPBuilder.createTargetData(
5941           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5942           /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5943   Builder.restoreIP(AfterIP);
5944 
5945   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5946   EXPECT_NE(TargetDataCall, nullptr);
5947   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5948   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5949             "__tgt_target_data_begin_mapper");
5950   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5951   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5952   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5953 
5954   Builder.CreateRetVoid();
5955   EXPECT_FALSE(verifyModule(*M, &errs()));
5956 }
5957 
5958 TEST_F(OpenMPIRBuilderTest, TargetExitData) {
5959   OpenMPIRBuilder OMPBuilder(*M);
5960   OMPBuilder.initialize();
5961   F->setName("func");
5962   IRBuilder<> Builder(BB);
5963   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5964 
5965   int64_t DeviceID = 2;
5966 
5967   AllocaInst *Val1 =
5968       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5969   ASSERT_NE(Val1, nullptr);
5970 
5971   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5972                                     F->getEntryBlock().getFirstInsertionPt());
5973 
5974   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5975   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5976   auto GenMapInfoCB =
5977       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5978     // Get map clause information.
5979     Builder.restoreIP(codeGenIP);
5980 
5981     CombinedInfo.BasePointers.emplace_back(Val1);
5982     CombinedInfo.Pointers.emplace_back(Val1);
5983     CombinedInfo.DevicePointers.emplace_back(
5984         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5985     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5986     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2));
5987     uint32_t temp;
5988     CombinedInfo.Names.emplace_back(
5989         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5990     return CombinedInfo;
5991   };
5992 
5993   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5994       /*RequiresDevicePointerInfo=*/false,
5995       /*SeparateBeginEndCalls=*/true);
5996 
5997   OMPBuilder.Config.setIsGPU(true);
5998 
5999   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper;
6000   ASSERT_EXPECTED_INIT(
6001       OpenMPIRBuilder::InsertPointTy, AfterIP,
6002       OMPBuilder.createTargetData(
6003           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
6004           /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
6005   Builder.restoreIP(AfterIP);
6006 
6007   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
6008   EXPECT_NE(TargetDataCall, nullptr);
6009   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6010   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6011             "__tgt_target_data_end_mapper");
6012   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6013   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6014   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6015 
6016   Builder.CreateRetVoid();
6017   EXPECT_FALSE(verifyModule(*M, &errs()));
6018 }
6019 
6020 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
6021   OpenMPIRBuilder OMPBuilder(*M);
6022   OMPBuilder.initialize();
6023   F->setName("func");
6024   IRBuilder<> Builder(BB);
6025   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6026 
6027   int64_t DeviceID = 2;
6028 
6029   AllocaInst *Val1 =
6030       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
6031   ASSERT_NE(Val1, nullptr);
6032 
6033   AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy());
6034   ASSERT_NE(Val2, nullptr);
6035 
6036   AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy());
6037   ASSERT_NE(Val3, nullptr);
6038 
6039   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
6040                                     F->getEntryBlock().getFirstInsertionPt());
6041 
6042   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6043   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
6044   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6045   auto GenMapInfoCB =
6046       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
6047     // Get map clause information.
6048     Builder.restoreIP(codeGenIP);
6049     uint32_t temp;
6050 
6051     CombinedInfo.BasePointers.emplace_back(Val1);
6052     CombinedInfo.Pointers.emplace_back(Val1);
6053     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None);
6054     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
6055     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3));
6056     CombinedInfo.Names.emplace_back(
6057         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6058 
6059     CombinedInfo.BasePointers.emplace_back(Val2);
6060     CombinedInfo.Pointers.emplace_back(Val2);
6061     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
6062     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
6063     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
6064     CombinedInfo.Names.emplace_back(
6065         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6066 
6067     CombinedInfo.BasePointers.emplace_back(Val3);
6068     CombinedInfo.Pointers.emplace_back(Val3);
6069     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address);
6070     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
6071     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
6072     CombinedInfo.Names.emplace_back(
6073         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6074     return CombinedInfo;
6075   };
6076 
6077   llvm::OpenMPIRBuilder::TargetDataInfo Info(
6078       /*RequiresDevicePointerInfo=*/true,
6079       /*SeparateBeginEndCalls=*/true);
6080 
6081   OMPBuilder.Config.setIsGPU(true);
6082 
6083   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
6084   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
6085     if (BodyGenType == BodyGenTy::Priv) {
6086       EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u);
6087       Builder.restoreIP(CodeGenIP);
6088       CallInst *TargetDataCall =
6089           dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
6090       EXPECT_NE(TargetDataCall, nullptr);
6091       EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6092       EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6093                 "__tgt_target_data_begin_mapper");
6094       EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6095       EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6096       EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6097 
6098       LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode());
6099       EXPECT_NE(LI, nullptr);
6100       StoreInst *SI = dyn_cast<StoreInst>(&BB->back());
6101       EXPECT_NE(SI, nullptr);
6102       EXPECT_EQ(SI->getValueOperand(), LI);
6103       EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second);
6104       EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second));
6105       EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second));
6106       Builder.CreateStore(Builder.getInt32(99), Val1);
6107     }
6108     return Builder.saveIP();
6109   };
6110 
6111   ASSERT_EXPECTED_INIT(
6112       OpenMPIRBuilder::InsertPointTy, TargetDataIP1,
6113       OMPBuilder.createTargetData(
6114           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
6115           /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB));
6116   Builder.restoreIP(TargetDataIP1);
6117 
6118   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
6119   EXPECT_NE(TargetDataCall, nullptr);
6120   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6121   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6122             "__tgt_target_data_end_mapper");
6123   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6124   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6125   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6126 
6127   // Check that BodyGenCB is still made when IsTargetDevice is set to true.
6128   OMPBuilder.Config.setIsTargetDevice(true);
6129   bool CheckDevicePassBodyGen = false;
6130   auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
6131     CheckDevicePassBodyGen = true;
6132     Builder.restoreIP(CodeGenIP);
6133     CallInst *TargetDataCall =
6134         dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
6135     // Make sure no begin_mapper call is present for device pass.
6136     EXPECT_EQ(TargetDataCall, nullptr);
6137     return Builder.saveIP();
6138   };
6139   ASSERT_EXPECTED_INIT(
6140       OpenMPIRBuilder::InsertPointTy, TargetDataIP2,
6141       OMPBuilder.createTargetData(
6142           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
6143           /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB));
6144   Builder.restoreIP(TargetDataIP2);
6145   EXPECT_TRUE(CheckDevicePassBodyGen);
6146 
6147   Builder.CreateRetVoid();
6148   EXPECT_FALSE(verifyModule(*M, &errs()));
6149 }
6150 
6151 namespace {
6152 // Some basic handling of argument mapping for the moment
6153 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder,
6154                            llvm::SmallVectorImpl<llvm::Value *> &Args,
6155                            llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) {
6156   for (auto Arg : Args) {
6157     CombinedInfo.BasePointers.emplace_back(Arg);
6158     CombinedInfo.Pointers.emplace_back(Arg);
6159     uint32_t SrcLocStrSize;
6160     CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr(
6161         "Unknown loc - stub implementation", SrcLocStrSize));
6162     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(
6163         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
6164         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM |
6165         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM));
6166     CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64(
6167         OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType())));
6168   }
6169 }
6170 } // namespace
6171 
6172 TEST_F(OpenMPIRBuilderTest, TargetRegion) {
6173   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6174   OpenMPIRBuilder OMPBuilder(*M);
6175   OMPBuilder.initialize();
6176   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
6177   OMPBuilder.setConfig(Config);
6178   F->setName("func");
6179   F->addFnAttr("target-cpu", "x86-64");
6180   F->addFnAttr("target-features", "+mmx,+sse");
6181   IRBuilder<> Builder(BB);
6182   auto *Int32Ty = Builder.getInt32Ty();
6183 
6184   AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr");
6185   AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr");
6186   AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr");
6187 
6188   Builder.CreateStore(Builder.getInt32(10), APtr);
6189   Builder.CreateStore(Builder.getInt32(20), BPtr);
6190   auto BodyGenCB = [&](InsertPointTy AllocaIP,
6191                        InsertPointTy CodeGenIP) -> InsertPointTy {
6192     Builder.restoreIP(CodeGenIP);
6193     LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr);
6194     LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr);
6195     Value *Sum = Builder.CreateAdd(AVal, BVal);
6196     Builder.CreateStore(Sum, CPtr);
6197     return Builder.saveIP();
6198   };
6199 
6200   llvm::SmallVector<llvm::Value *> Inputs;
6201   Inputs.push_back(APtr);
6202   Inputs.push_back(BPtr);
6203   Inputs.push_back(CPtr);
6204 
6205   auto SimpleArgAccessorCB =
6206       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6207           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6208           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6209         if (!OMPBuilder.Config.isTargetDevice()) {
6210           RetVal = cast<llvm::Value>(&Arg);
6211           return CodeGenIP;
6212         }
6213 
6214         Builder.restoreIP(AllocaIP);
6215 
6216         llvm::Value *Addr = Builder.CreateAlloca(
6217             Arg.getType()->isPointerTy()
6218                 ? Arg.getType()
6219                 : Type::getInt64Ty(Builder.getContext()),
6220             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6221         llvm::Value *AddrAscast =
6222             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6223         Builder.CreateStore(&Arg, AddrAscast);
6224 
6225         Builder.restoreIP(CodeGenIP);
6226 
6227         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6228 
6229         return Builder.saveIP();
6230       };
6231 
6232   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6233   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6234       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6235     CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos);
6236     return CombinedInfos;
6237   };
6238 
6239   TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
6240   OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
6241   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6242   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6243       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC,
6244       /*MaxTeams=*/{10}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6245   RuntimeAttrs.TargetThreadLimit[0] = Builder.getInt32(20);
6246   RuntimeAttrs.TeamsThreadLimit[0] = Builder.getInt32(30);
6247   RuntimeAttrs.MaxThreads = Builder.getInt32(40);
6248 
6249   ASSERT_EXPECTED_INIT(
6250       OpenMPIRBuilder::InsertPointTy, AfterIP,
6251       OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
6252                               Builder.saveIP(), EntryInfo, DefaultAttrs,
6253                               RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
6254                               GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
6255   Builder.restoreIP(AfterIP);
6256 
6257   OMPBuilder.finalize();
6258   Builder.CreateRetVoid();
6259 
6260   // Check the kernel launch sequence
6261   auto Iter = F->getEntryBlock().rbegin();
6262   EXPECT_TRUE(isa<BranchInst>(&*(Iter)));
6263   BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter));
6264   EXPECT_TRUE(isa<CmpInst>(&*(++Iter)));
6265   EXPECT_TRUE(isa<CallInst>(&*(++Iter)));
6266   CallInst *Call = dyn_cast<CallInst>(&*(Iter));
6267 
6268   // Check that the kernel launch function is called
6269   Function *KernelLaunchFunc = Call->getCalledFunction();
6270   EXPECT_NE(KernelLaunchFunc, nullptr);
6271   StringRef FunctionName = KernelLaunchFunc->getName();
6272   EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel"));
6273 
6274   // Check num_teams and num_threads in call arguments
6275   EXPECT_TRUE(Call->arg_size() >= 4);
6276   Value *NumTeamsArg = Call->getArgOperand(2);
6277   EXPECT_TRUE(isa<ConstantInt>(NumTeamsArg));
6278   EXPECT_EQ(10U, cast<ConstantInt>(NumTeamsArg)->getZExtValue());
6279   Value *NumThreadsArg = Call->getArgOperand(3);
6280   EXPECT_TRUE(isa<ConstantInt>(NumThreadsArg));
6281   EXPECT_EQ(20U, cast<ConstantInt>(NumThreadsArg)->getZExtValue());
6282 
6283   // Check num_teams and num_threads kernel arguments (use number 5 starting
6284   // from the end and counting the call to __tgt_target_kernel as the first use)
6285   Value *KernelArgs = Call->getArgOperand(Call->arg_size() - 1);
6286   EXPECT_TRUE(KernelArgs->getNumUses() >= 4);
6287   Value *NumTeamsGetElemPtr = *std::next(KernelArgs->user_begin(), 3);
6288   EXPECT_TRUE(isa<GetElementPtrInst>(NumTeamsGetElemPtr));
6289   Value *NumTeamsStore = NumTeamsGetElemPtr->getUniqueUndroppableUser();
6290   EXPECT_TRUE(isa<StoreInst>(NumTeamsStore));
6291   Value *NumTeamsStoreArg = cast<StoreInst>(NumTeamsStore)->getValueOperand();
6292   EXPECT_TRUE(isa<ConstantDataSequential>(NumTeamsStoreArg));
6293   auto *NumTeamsStoreValue = cast<ConstantDataSequential>(NumTeamsStoreArg);
6294   EXPECT_EQ(3U, NumTeamsStoreValue->getNumElements());
6295   EXPECT_EQ(10U, NumTeamsStoreValue->getElementAsInteger(0));
6296   EXPECT_EQ(0U, NumTeamsStoreValue->getElementAsInteger(1));
6297   EXPECT_EQ(0U, NumTeamsStoreValue->getElementAsInteger(2));
6298   Value *NumThreadsGetElemPtr = *std::next(KernelArgs->user_begin(), 2);
6299   EXPECT_TRUE(isa<GetElementPtrInst>(NumThreadsGetElemPtr));
6300   Value *NumThreadsStore = NumThreadsGetElemPtr->getUniqueUndroppableUser();
6301   EXPECT_TRUE(isa<StoreInst>(NumThreadsStore));
6302   Value *NumThreadsStoreArg =
6303       cast<StoreInst>(NumThreadsStore)->getValueOperand();
6304   EXPECT_TRUE(isa<ConstantDataSequential>(NumThreadsStoreArg));
6305   auto *NumThreadsStoreValue = cast<ConstantDataSequential>(NumThreadsStoreArg);
6306   EXPECT_EQ(3U, NumThreadsStoreValue->getNumElements());
6307   EXPECT_EQ(20U, NumThreadsStoreValue->getElementAsInteger(0));
6308   EXPECT_EQ(0U, NumThreadsStoreValue->getElementAsInteger(1));
6309   EXPECT_EQ(0U, NumThreadsStoreValue->getElementAsInteger(2));
6310 
6311   // Check the fallback call
6312   BasicBlock *FallbackBlock = Branch->getSuccessor(0);
6313   Iter = FallbackBlock->rbegin();
6314   CallInst *FCall = dyn_cast<CallInst>(&*(++Iter));
6315   // 'F' has a dummy DISubprogram which causes OutlinedFunc to also
6316   // have a DISubprogram. In this case, the call to OutlinedFunc needs
6317   // to have a debug loc, otherwise verifier will complain.
6318   FCall->setDebugLoc(DL);
6319   EXPECT_NE(FCall, nullptr);
6320 
6321   // Check that the correct aguments are passed in
6322   for (auto ArgInput : zip(FCall->args(), Inputs)) {
6323     EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput));
6324   }
6325 
6326   // Check that the outlined function exists with the expected prefix
6327   Function *OutlinedFunc = FCall->getCalledFunction();
6328   EXPECT_NE(OutlinedFunc, nullptr);
6329   StringRef FunctionName2 = OutlinedFunc->getName();
6330   EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading"));
6331 
6332   // Check that target-cpu and target-features were propagated to the outlined
6333   // function
6334   EXPECT_EQ(OutlinedFunc->getFnAttribute("target-cpu"),
6335             F->getFnAttribute("target-cpu"));
6336   EXPECT_EQ(OutlinedFunc->getFnAttribute("target-features"),
6337             F->getFnAttribute("target-features"));
6338 
6339   EXPECT_FALSE(verifyModule(*M, &errs()));
6340 }
6341 
6342 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
6343   OpenMPIRBuilder OMPBuilder(*M);
6344   OMPBuilder.setConfig(
6345       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6346   OMPBuilder.initialize();
6347 
6348   F->setName("func");
6349   F->addFnAttr("target-cpu", "gfx90a");
6350   F->addFnAttr("target-features", "+gfx9-insts,+wavefrontsize64");
6351   IRBuilder<> Builder(BB);
6352   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6353 
6354   LoadInst *Value = nullptr;
6355   StoreInst *TargetStore = nullptr;
6356   llvm::SmallVector<llvm::Value *, 2> CapturedArgs = {
6357       Constant::getNullValue(PointerType::get(Ctx, 0)),
6358       Constant::getNullValue(PointerType::get(Ctx, 0))};
6359 
6360   auto SimpleArgAccessorCB =
6361       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6362           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6363           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6364         if (!OMPBuilder.Config.isTargetDevice()) {
6365           RetVal = cast<llvm::Value>(&Arg);
6366           return CodeGenIP;
6367         }
6368 
6369         Builder.restoreIP(AllocaIP);
6370 
6371         llvm::Value *Addr = Builder.CreateAlloca(
6372             Arg.getType()->isPointerTy()
6373                 ? Arg.getType()
6374                 : Type::getInt64Ty(Builder.getContext()),
6375             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6376         llvm::Value *AddrAscast =
6377             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6378         Builder.CreateStore(&Arg, AddrAscast);
6379 
6380         Builder.restoreIP(CodeGenIP);
6381 
6382         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6383 
6384         return Builder.saveIP();
6385       };
6386 
6387   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6388   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6389       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6390     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6391     return CombinedInfos;
6392   };
6393 
6394   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6395                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6396       -> OpenMPIRBuilder::InsertPointTy {
6397     Builder.restoreIP(CodeGenIP);
6398     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6399     TargetStore = Builder.CreateStore(Value, CapturedArgs[1]);
6400     return Builder.saveIP();
6401   };
6402 
6403   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6404                                    F->getEntryBlock().getFirstInsertionPt());
6405   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6406                                   /*Line=*/3, /*Count=*/0);
6407   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6408   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6409       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC,
6410       /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6411 
6412   ASSERT_EXPECTED_INIT(
6413       OpenMPIRBuilder::InsertPointTy, AfterIP,
6414       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6415                               EntryInfo, DefaultAttrs, RuntimeAttrs,
6416                               /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
6417                               BodyGenCB, SimpleArgAccessorCB));
6418   Builder.restoreIP(AfterIP);
6419 
6420   Builder.CreateRetVoid();
6421   OMPBuilder.finalize();
6422 
6423   // Check outlined function
6424   EXPECT_FALSE(verifyModule(*M, &errs()));
6425   EXPECT_NE(TargetStore, nullptr);
6426   Function *OutlinedFn = TargetStore->getFunction();
6427   EXPECT_NE(F, OutlinedFn);
6428 
6429   // Check that target-cpu and target-features were propagated to the outlined
6430   // function
6431   EXPECT_EQ(OutlinedFn->getFnAttribute("target-cpu"),
6432             F->getFnAttribute("target-cpu"));
6433   EXPECT_EQ(OutlinedFn->getFnAttribute("target-features"),
6434             F->getFnAttribute("target-features"));
6435 
6436   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6437   // Account for the "implicit" first argument.
6438   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6439   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
6440   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6441   EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy());
6442 
6443   // Check entry block
6444   auto &EntryBlock = OutlinedFn->getEntryBlock();
6445   Instruction *Alloca1 = &*EntryBlock.getFirstNonPHIIt();
6446   EXPECT_NE(Alloca1, nullptr);
6447 
6448   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6449   auto *Store1 = Alloca1->getNextNode();
6450   EXPECT_TRUE(isa<StoreInst>(Store1));
6451   auto *Alloca2 = Store1->getNextNode();
6452   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6453   auto *Store2 = Alloca2->getNextNode();
6454   EXPECT_TRUE(isa<StoreInst>(Store2));
6455 
6456   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6457   EXPECT_NE(InitCall, nullptr);
6458   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6459   EXPECT_EQ(InitCall->arg_size(), 2U);
6460   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6461   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6462   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6463   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6464   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6465   auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6466   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6467             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6468   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6469             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6470   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6471             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6472 
6473   auto *EntryBlockBranch = EntryBlock.getTerminator();
6474   EXPECT_NE(EntryBlockBranch, nullptr);
6475   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6476 
6477   // Check user code block
6478   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6479   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6480   Instruction *Load1 = &*UserCodeBlock->getFirstNonPHIIt();
6481   EXPECT_TRUE(isa<LoadInst>(Load1));
6482   auto *Load2 = Load1->getNextNode();
6483   EXPECT_TRUE(isa<LoadInst>(Load2));
6484 
6485   auto *OutlinedBlockBr = Load2->getNextNode();
6486   EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr));
6487 
6488   auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0);
6489   EXPECT_EQ(OutlinedBlock->getName(), "outlined.body");
6490 
6491   Instruction *Value1 = &*OutlinedBlock->getFirstNonPHIIt();
6492   EXPECT_EQ(Value1, Value);
6493   EXPECT_EQ(Value1->getNextNode(), TargetStore);
6494   auto *Deinit = TargetStore->getNextNode();
6495   EXPECT_NE(Deinit, nullptr);
6496 
6497   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6498   EXPECT_NE(DeinitCall, nullptr);
6499   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6500   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6501 
6502   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6503 
6504   // Check exit block
6505   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6506   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6507   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHIIt()));
6508 
6509   // Check global exec_mode.
6510   GlobalVariable *Used = M->getGlobalVariable("llvm.compiler.used");
6511   EXPECT_NE(Used, nullptr);
6512   Constant *UsedInit = Used->getInitializer();
6513   EXPECT_NE(UsedInit, nullptr);
6514   EXPECT_TRUE(isa<ConstantArray>(UsedInit));
6515   auto *UsedInitData = cast<ConstantArray>(UsedInit);
6516   EXPECT_EQ(1U, UsedInitData->getNumOperands());
6517   Constant *ExecMode = UsedInitData->getOperand(0);
6518   EXPECT_TRUE(isa<GlobalVariable>(ExecMode));
6519   Constant *ExecModeValue = cast<GlobalVariable>(ExecMode)->getInitializer();
6520   EXPECT_NE(ExecModeValue, nullptr);
6521   EXPECT_TRUE(isa<ConstantInt>(ExecModeValue));
6522   EXPECT_EQ(OMP_TGT_EXEC_MODE_GENERIC,
6523             cast<ConstantInt>(ExecModeValue)->getZExtValue());
6524 }
6525 
6526 TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) {
6527   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6528   OpenMPIRBuilder OMPBuilder(*M);
6529   OMPBuilder.initialize();
6530   OpenMPIRBuilderConfig Config(/*IsTargetDevice=*/false, /*IsGPU=*/false,
6531                                /*OpenMPOffloadMandatory=*/false,
6532                                /*HasRequiresReverseOffload=*/false,
6533                                /*HasRequiresUnifiedAddress=*/false,
6534                                /*HasRequiresUnifiedSharedMemory=*/false,
6535                                /*HasRequiresDynamicAllocators=*/false);
6536   OMPBuilder.setConfig(Config);
6537   F->setName("func");
6538   IRBuilder<> Builder(BB);
6539 
6540   auto BodyGenCB = [&](InsertPointTy,
6541                        InsertPointTy CodeGenIP) -> InsertPointTy {
6542     Builder.restoreIP(CodeGenIP);
6543     return Builder.saveIP();
6544   };
6545 
6546   auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&,
6547                                  OpenMPIRBuilder::InsertPointTy,
6548                                  OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6549     Builder.restoreIP(CodeGenIP);
6550     return Builder.saveIP();
6551   };
6552 
6553   SmallVector<Value *> Inputs;
6554   OpenMPIRBuilder::MapInfosTy CombinedInfos;
6555   auto GenMapInfoCB =
6556       [&](OpenMPIRBuilder::InsertPointTy) -> OpenMPIRBuilder::MapInfosTy & {
6557     return CombinedInfos;
6558   };
6559 
6560   TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
6561   OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
6562   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6563   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6564       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD,
6565       /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6566   RuntimeAttrs.LoopTripCount = Builder.getInt64(1000);
6567 
6568   ASSERT_EXPECTED_INIT(
6569       OpenMPIRBuilder::InsertPointTy, AfterIP,
6570       OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
6571                               Builder.saveIP(), EntryInfo, DefaultAttrs,
6572                               RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
6573                               GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
6574   Builder.restoreIP(AfterIP);
6575 
6576   OMPBuilder.finalize();
6577   Builder.CreateRetVoid();
6578 
6579   // Check the kernel launch sequence
6580   auto Iter = F->getEntryBlock().rbegin();
6581   EXPECT_TRUE(isa<BranchInst>(&*(Iter)));
6582   BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter));
6583   EXPECT_TRUE(isa<CmpInst>(&*(++Iter)));
6584   EXPECT_TRUE(isa<CallInst>(&*(++Iter)));
6585   CallInst *Call = dyn_cast<CallInst>(&*(Iter));
6586 
6587   // Check that the kernel launch function is called
6588   Function *KernelLaunchFunc = Call->getCalledFunction();
6589   EXPECT_NE(KernelLaunchFunc, nullptr);
6590   StringRef FunctionName = KernelLaunchFunc->getName();
6591   EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel"));
6592 
6593   // Check the trip count kernel argument (use number 5 starting from the end
6594   // and counting the call to __tgt_target_kernel as the first use)
6595   Value *KernelArgs = Call->getArgOperand(Call->arg_size() - 1);
6596   EXPECT_TRUE(KernelArgs->getNumUses() >= 6);
6597   Value *TripCountGetElemPtr = *std::next(KernelArgs->user_begin(), 5);
6598   EXPECT_TRUE(isa<GetElementPtrInst>(TripCountGetElemPtr));
6599   Value *TripCountStore = TripCountGetElemPtr->getUniqueUndroppableUser();
6600   EXPECT_TRUE(isa<StoreInst>(TripCountStore));
6601   Value *TripCountStoreArg = cast<StoreInst>(TripCountStore)->getValueOperand();
6602   EXPECT_TRUE(isa<ConstantInt>(TripCountStoreArg));
6603   EXPECT_EQ(1000U, cast<ConstantInt>(TripCountStoreArg)->getZExtValue());
6604 
6605   // Check the fallback call
6606   BasicBlock *FallbackBlock = Branch->getSuccessor(0);
6607   Iter = FallbackBlock->rbegin();
6608   CallInst *FCall = dyn_cast<CallInst>(&*(++Iter));
6609   // 'F' has a dummy DISubprogram which causes OutlinedFunc to also
6610   // have a DISubprogram. In this case, the call to OutlinedFunc needs
6611   // to have a debug loc, otherwise verifier will complain.
6612   FCall->setDebugLoc(DL);
6613   EXPECT_NE(FCall, nullptr);
6614 
6615   // Check that the outlined function exists with the expected prefix
6616   Function *OutlinedFunc = FCall->getCalledFunction();
6617   EXPECT_NE(OutlinedFunc, nullptr);
6618   StringRef FunctionName2 = OutlinedFunc->getName();
6619   EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading"));
6620 
6621   EXPECT_FALSE(verifyModule(*M, &errs()));
6622 }
6623 
6624 TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) {
6625   OpenMPIRBuilder OMPBuilder(*M);
6626   OMPBuilder.setConfig(
6627       OpenMPIRBuilderConfig(/*IsTargetDevice=*/true, /*IsGPU=*/false,
6628                             /*OpenMPOffloadMandatory=*/false,
6629                             /*HasRequiresReverseOffload=*/false,
6630                             /*HasRequiresUnifiedAddress=*/false,
6631                             /*HasRequiresUnifiedSharedMemory=*/false,
6632                             /*HasRequiresDynamicAllocators=*/false));
6633   OMPBuilder.initialize();
6634   F->setName("func");
6635   IRBuilder<> Builder(BB);
6636   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6637 
6638   Function *OutlinedFn = nullptr;
6639   SmallVector<Value *> CapturedArgs;
6640 
6641   auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&,
6642                                  OpenMPIRBuilder::InsertPointTy,
6643                                  OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6644     Builder.restoreIP(CodeGenIP);
6645     return Builder.saveIP();
6646   };
6647 
6648   OpenMPIRBuilder::MapInfosTy CombinedInfos;
6649   auto GenMapInfoCB =
6650       [&](OpenMPIRBuilder::InsertPointTy) -> OpenMPIRBuilder::MapInfosTy & {
6651     return CombinedInfos;
6652   };
6653 
6654   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy,
6655                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6656       -> OpenMPIRBuilder::InsertPointTy {
6657     Builder.restoreIP(CodeGenIP);
6658     OutlinedFn = CodeGenIP.getBlock()->getParent();
6659     return Builder.saveIP();
6660   };
6661 
6662   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6663                                    F->getEntryBlock().getFirstInsertionPt());
6664   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6665                                   /*Line=*/3, /*Count=*/0);
6666   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6667   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6668       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD,
6669       /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6670 
6671   ASSERT_EXPECTED_INIT(
6672       OpenMPIRBuilder::InsertPointTy, AfterIP,
6673       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6674                               EntryInfo, DefaultAttrs, RuntimeAttrs,
6675                               /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
6676                               BodyGenCB, SimpleArgAccessorCB));
6677   Builder.restoreIP(AfterIP);
6678 
6679   Builder.CreateRetVoid();
6680   OMPBuilder.finalize();
6681 
6682   // Check outlined function
6683   EXPECT_FALSE(verifyModule(*M, &errs()));
6684   EXPECT_NE(OutlinedFn, nullptr);
6685   EXPECT_NE(F, OutlinedFn);
6686 
6687   // Check that target-cpu and target-features were propagated to the outlined
6688   // function
6689   EXPECT_EQ(OutlinedFn->getFnAttribute("target-cpu"),
6690             F->getFnAttribute("target-cpu"));
6691   EXPECT_EQ(OutlinedFn->getFnAttribute("target-features"),
6692             F->getFnAttribute("target-features"));
6693 
6694   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6695   // Account for the "implicit" first argument.
6696   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6697   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
6698 
6699   // Check global exec_mode.
6700   GlobalVariable *Used = M->getGlobalVariable("llvm.compiler.used");
6701   EXPECT_NE(Used, nullptr);
6702   Constant *UsedInit = Used->getInitializer();
6703   EXPECT_NE(UsedInit, nullptr);
6704   EXPECT_TRUE(isa<ConstantArray>(UsedInit));
6705   auto *UsedInitData = cast<ConstantArray>(UsedInit);
6706   EXPECT_EQ(1U, UsedInitData->getNumOperands());
6707   Constant *ExecMode = UsedInitData->getOperand(0);
6708   EXPECT_TRUE(isa<GlobalVariable>(ExecMode));
6709   Constant *ExecModeValue = cast<GlobalVariable>(ExecMode)->getInitializer();
6710   EXPECT_NE(ExecModeValue, nullptr);
6711   EXPECT_TRUE(isa<ConstantInt>(ExecModeValue));
6712   EXPECT_EQ(OMP_TGT_EXEC_MODE_SPMD,
6713             cast<ConstantInt>(ExecModeValue)->getZExtValue());
6714 }
6715 
6716 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
6717   OpenMPIRBuilder OMPBuilder(*M);
6718   OMPBuilder.setConfig(
6719       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6720   OMPBuilder.initialize();
6721 
6722   F->setName("func");
6723   IRBuilder<> Builder(BB);
6724   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6725 
6726   LoadInst *Value = nullptr;
6727   StoreInst *TargetStore = nullptr;
6728   llvm::SmallVector<llvm::Value *, 1> CapturedArgs = {
6729       Constant::getNullValue(PointerType::get(Ctx, 0))};
6730 
6731   auto SimpleArgAccessorCB =
6732       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6733           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6734           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6735         if (!OMPBuilder.Config.isTargetDevice()) {
6736           RetVal = cast<llvm::Value>(&Arg);
6737           return CodeGenIP;
6738         }
6739 
6740         Builder.restoreIP(AllocaIP);
6741 
6742         llvm::Value *Addr = Builder.CreateAlloca(
6743             Arg.getType()->isPointerTy()
6744                 ? Arg.getType()
6745                 : Type::getInt64Ty(Builder.getContext()),
6746             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6747         llvm::Value *AddrAscast =
6748             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6749         Builder.CreateStore(&Arg, AddrAscast);
6750 
6751         Builder.restoreIP(CodeGenIP);
6752 
6753         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6754 
6755         return Builder.saveIP();
6756       };
6757 
6758   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6759   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6760       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6761     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6762     return CombinedInfos;
6763   };
6764 
6765   llvm::Value *RaiseAlloca = nullptr;
6766 
6767   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6768                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6769       -> OpenMPIRBuilder::InsertPointTy {
6770     Builder.restoreIP(CodeGenIP);
6771     RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty());
6772     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6773     TargetStore = Builder.CreateStore(Value, RaiseAlloca);
6774     return Builder.saveIP();
6775   };
6776 
6777   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6778                                    F->getEntryBlock().getFirstInsertionPt());
6779   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6780                                   /*Line=*/3, /*Count=*/0);
6781   OpenMPIRBuilder::TargetKernelRuntimeAttrs RuntimeAttrs;
6782   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6783       /*ExecFlags=*/omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC,
6784       /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
6785 
6786   ASSERT_EXPECTED_INIT(
6787       OpenMPIRBuilder::InsertPointTy, AfterIP,
6788       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6789                               EntryInfo, DefaultAttrs, RuntimeAttrs,
6790                               /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
6791                               BodyGenCB, SimpleArgAccessorCB));
6792   Builder.restoreIP(AfterIP);
6793 
6794   Builder.CreateRetVoid();
6795   OMPBuilder.finalize();
6796 
6797   // Check outlined function
6798   EXPECT_FALSE(verifyModule(*M, &errs()));
6799   EXPECT_NE(TargetStore, nullptr);
6800   Function *OutlinedFn = TargetStore->getFunction();
6801   EXPECT_NE(F, OutlinedFn);
6802 
6803   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6804   // Account for the "implicit" first argument.
6805   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6806   EXPECT_EQ(OutlinedFn->arg_size(), 2U);
6807   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6808 
6809   // Check entry block, to see if we have raised our alloca
6810   // from the body to the entry block.
6811   auto &EntryBlock = OutlinedFn->getEntryBlock();
6812 
6813   // Check that we have moved our alloca created in the
6814   // BodyGenCB function, to the top of the function.
6815   Instruction *Alloca1 = &*EntryBlock.getFirstNonPHIIt();
6816   EXPECT_NE(Alloca1, nullptr);
6817   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6818   EXPECT_EQ(Alloca1, RaiseAlloca);
6819 
6820   // Verify we have not altered the rest of the function
6821   // inappropriately with our alloca movement.
6822   auto *Alloca2 = Alloca1->getNextNode();
6823   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6824   auto *Store2 = Alloca2->getNextNode();
6825   EXPECT_TRUE(isa<StoreInst>(Store2));
6826 
6827   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6828   EXPECT_NE(InitCall, nullptr);
6829   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6830   EXPECT_EQ(InitCall->arg_size(), 2U);
6831   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6832   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6833   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6834   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6835   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6836   auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6837   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6838             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6839   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6840             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6841   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6842             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6843 
6844   auto *EntryBlockBranch = EntryBlock.getTerminator();
6845   EXPECT_NE(EntryBlockBranch, nullptr);
6846   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6847 
6848   // Check user code block
6849   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6850   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6851   BasicBlock::iterator Load1 = UserCodeBlock->getFirstNonPHIIt();
6852   EXPECT_TRUE(isa<LoadInst>(Load1));
6853 
6854   auto *OutlinedBlockBr = Load1->getNextNode();
6855   EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr));
6856 
6857   auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0);
6858   EXPECT_EQ(OutlinedBlock->getName(), "outlined.body");
6859 
6860   Instruction *Load2 = &*OutlinedBlock->getFirstNonPHIIt();
6861   EXPECT_TRUE(isa<LoadInst>(Load2));
6862   EXPECT_EQ(Load2, Value);
6863   EXPECT_EQ(Load2->getNextNode(), TargetStore);
6864   auto *Deinit = TargetStore->getNextNode();
6865   EXPECT_NE(Deinit, nullptr);
6866 
6867   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6868   EXPECT_NE(DeinitCall, nullptr);
6869   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6870   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6871 
6872   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6873 
6874   // Check exit block
6875   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6876   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6877   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHIIt()));
6878 }
6879 
6880 TEST_F(OpenMPIRBuilderTest, CreateTask) {
6881   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6882   OpenMPIRBuilder OMPBuilder(*M);
6883   OMPBuilder.Config.IsTargetDevice = false;
6884   OMPBuilder.initialize();
6885   F->setName("func");
6886   IRBuilder<> Builder(BB);
6887 
6888   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
6889   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
6890   Value *Val128 =
6891       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
6892 
6893   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6894     Builder.restoreIP(AllocaIP);
6895     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
6896                                                 "bodygen.alloca128");
6897 
6898     Builder.restoreIP(CodeGenIP);
6899     // Loading and storing captured pointer and values
6900     Builder.CreateStore(Val128, Local128);
6901     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
6902                                       "bodygen.load32");
6903 
6904     LoadInst *PrivLoad128 = Builder.CreateLoad(
6905         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
6906     Value *Cmp = Builder.CreateICmpNE(
6907         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
6908     Instruction *ThenTerm, *ElseTerm;
6909     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
6910                                   &ThenTerm, &ElseTerm);
6911     return Error::success();
6912   };
6913 
6914   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6915   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6916   OpenMPIRBuilder::LocationDescription Loc(
6917       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6918   ASSERT_EXPECTED_INIT(
6919       OpenMPIRBuilder::InsertPointTy, AfterIP,
6920       OMPBuilder.createTask(
6921           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6922           BodyGenCB));
6923   Builder.restoreIP(AfterIP);
6924   OMPBuilder.finalize();
6925   Builder.CreateRetVoid();
6926 
6927   EXPECT_FALSE(verifyModule(*M, &errs()));
6928 
6929   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6930       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6931           ->user_back());
6932 
6933   // Verify the Ident argument
6934   GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0));
6935   ASSERT_NE(Ident, nullptr);
6936   EXPECT_TRUE(Ident->hasInitializer());
6937   Constant *Initializer = Ident->getInitializer();
6938   GlobalVariable *SrcStrGlob =
6939       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
6940   ASSERT_NE(SrcStrGlob, nullptr);
6941   ConstantDataArray *SrcSrc =
6942       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
6943   ASSERT_NE(SrcSrc, nullptr);
6944 
6945   // Verify the num_threads argument.
6946   CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1));
6947   ASSERT_NE(GTID, nullptr);
6948   EXPECT_EQ(GTID->arg_size(), 1U);
6949   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
6950 
6951   // Verify the flags
6952   // TODO: Check for others flags. Currently testing only for tiedness.
6953   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
6954   ASSERT_NE(Flags, nullptr);
6955   EXPECT_EQ(Flags->getSExtValue(), 1);
6956 
6957   // Verify the data size
6958   ConstantInt *DataSize =
6959       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
6960   ASSERT_NE(DataSize, nullptr);
6961   EXPECT_EQ(DataSize->getSExtValue(), 40);
6962 
6963   ConstantInt *SharedsSize =
6964       dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4));
6965   EXPECT_EQ(SharedsSize->getSExtValue(),
6966             24); // 64-bit pointer + 128-bit integer
6967 
6968   // Verify Wrapper function
6969   Function *OutlinedFn =
6970       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
6971   ASSERT_NE(OutlinedFn, nullptr);
6972 
6973   LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin());
6974   ASSERT_NE(SharedsLoad, nullptr);
6975   EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1));
6976 
6977   EXPECT_FALSE(OutlinedFn->isDeclaration());
6978   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty());
6979 
6980   // Verify that the data argument is used only once, and that too in the load
6981   // instruction that is then used for accessing shared data.
6982   Value *DataPtr = OutlinedFn->getArg(1);
6983   EXPECT_EQ(DataPtr->getNumUses(), 1U);
6984   EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser()));
6985   Value *Data = DataPtr->uses().begin()->getUser();
6986   EXPECT_TRUE(all_of(Data->uses(), [](Use &U) {
6987     return isa<GetElementPtrInst>(U.getUser());
6988   }));
6989 
6990   // Verify the presence of `trunc` and `icmp` instructions in Outlined function
6991   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6992                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
6993   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6994                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
6995 
6996   // Verify the execution of the task
6997   CallInst *TaskCall = dyn_cast<CallInst>(
6998       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
6999           ->user_back());
7000   ASSERT_NE(TaskCall, nullptr);
7001   EXPECT_EQ(TaskCall->getArgOperand(0), Ident);
7002   EXPECT_EQ(TaskCall->getArgOperand(1), GTID);
7003   EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall);
7004 
7005   // Verify that the argument data has been copied
7006   for (User *in : TaskAllocCall->users()) {
7007     if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) {
7008       EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall);
7009     }
7010   }
7011 }
7012 
7013 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
7014   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7015   OpenMPIRBuilder OMPBuilder(*M);
7016   OMPBuilder.Config.IsTargetDevice = false;
7017   OMPBuilder.initialize();
7018   F->setName("func");
7019   IRBuilder<> Builder(BB);
7020 
7021   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7022     return Error::success();
7023   };
7024 
7025   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7026   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7027   OpenMPIRBuilder::LocationDescription Loc(
7028       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7029   ASSERT_EXPECTED_INIT(
7030       OpenMPIRBuilder::InsertPointTy, AfterIP,
7031       OMPBuilder.createTask(
7032           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7033           BodyGenCB));
7034   Builder.restoreIP(AfterIP);
7035   OMPBuilder.finalize();
7036   Builder.CreateRetVoid();
7037 
7038   EXPECT_FALSE(verifyModule(*M, &errs()));
7039 
7040   // Check that the outlined function has only one argument.
7041   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7042       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
7043           ->user_back());
7044   Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5));
7045   ASSERT_NE(OutlinedFn, nullptr);
7046   ASSERT_EQ(OutlinedFn->arg_size(), 1U);
7047 }
7048 
7049 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
7050   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7051   OpenMPIRBuilder OMPBuilder(*M);
7052   OMPBuilder.Config.IsTargetDevice = false;
7053   OMPBuilder.initialize();
7054   F->setName("func");
7055   IRBuilder<> Builder(BB);
7056   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7057     return Error::success();
7058   };
7059   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7060   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7061   OpenMPIRBuilder::LocationDescription Loc(
7062       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7063   ASSERT_EXPECTED_INIT(
7064       OpenMPIRBuilder::InsertPointTy, AfterIP,
7065       OMPBuilder.createTask(
7066           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7067           BodyGenCB,
7068           /*Tied=*/false));
7069   Builder.restoreIP(AfterIP);
7070   OMPBuilder.finalize();
7071   Builder.CreateRetVoid();
7072 
7073   // Check for the `Tied` argument
7074   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7075       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
7076           ->user_back());
7077   ASSERT_NE(TaskAllocCall, nullptr);
7078   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
7079   ASSERT_NE(Flags, nullptr);
7080   EXPECT_EQ(Flags->getZExtValue() & 1U, 0U);
7081 
7082   EXPECT_FALSE(verifyModule(*M, &errs()));
7083 }
7084 
7085 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
7086   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7087   OpenMPIRBuilder OMPBuilder(*M);
7088   OMPBuilder.Config.IsTargetDevice = false;
7089   OMPBuilder.initialize();
7090   F->setName("func");
7091   IRBuilder<> Builder(BB);
7092   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7093     return Error::success();
7094   };
7095   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7096   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7097   OpenMPIRBuilder::LocationDescription Loc(
7098       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7099   AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext()));
7100   SmallVector<OpenMPIRBuilder::DependData> DDS;
7101   {
7102     OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn,
7103                                      Type::getInt32Ty(M->getContext()), InDep);
7104     DDS.push_back(DDIn);
7105   }
7106   ASSERT_EXPECTED_INIT(
7107       OpenMPIRBuilder::InsertPointTy, AfterIP,
7108       OMPBuilder.createTask(
7109           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7110           BodyGenCB,
7111           /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS));
7112   Builder.restoreIP(AfterIP);
7113   OMPBuilder.finalize();
7114   Builder.CreateRetVoid();
7115 
7116   // Check for the `NumDeps` argument
7117   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7118       OMPBuilder
7119           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps)
7120           ->user_back());
7121   ASSERT_NE(TaskAllocCall, nullptr);
7122   ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
7123   ASSERT_NE(NumDeps, nullptr);
7124   EXPECT_EQ(NumDeps->getZExtValue(), 1U);
7125 
7126   // Check for the `DepInfo` array argument
7127   AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4));
7128   ASSERT_NE(DepArray, nullptr);
7129   Value::user_iterator DepArrayI = DepArray->user_begin();
7130   ++DepArrayI;
7131   Value::user_iterator DepInfoI = DepArrayI->user_begin();
7132   // Check for the `DependKind` flag in the `DepInfo` array
7133   Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI);
7134   ASSERT_NE(Flag, nullptr);
7135   ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag);
7136   ASSERT_NE(FlagInt, nullptr);
7137   EXPECT_EQ(FlagInt->getZExtValue(),
7138             static_cast<unsigned int>(RTLDependenceKindTy::DepIn));
7139   ++DepInfoI;
7140   // Check for the size in the `DepInfo` array
7141   Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI);
7142   ASSERT_NE(Size, nullptr);
7143   ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size);
7144   ASSERT_NE(SizeInt, nullptr);
7145   EXPECT_EQ(SizeInt->getZExtValue(), 4U);
7146   ++DepInfoI;
7147   // Check for the variable address in the `DepInfo` array
7148   Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI);
7149   ASSERT_NE(AddrStored, nullptr);
7150   PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored);
7151   ASSERT_NE(AddrInt, nullptr);
7152   Value *Addr = AddrInt->getPointerOperand();
7153   EXPECT_EQ(Addr, InDep);
7154 
7155   ConstantInt *NumDepsNoAlias =
7156       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5));
7157   ASSERT_NE(NumDepsNoAlias, nullptr);
7158   EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U);
7159   EXPECT_EQ(TaskAllocCall->getOperand(6),
7160             ConstantPointerNull::get(PointerType::getUnqual(M->getContext())));
7161 
7162   EXPECT_FALSE(verifyModule(*M, &errs()));
7163 }
7164 
7165 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
7166   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7167   OpenMPIRBuilder OMPBuilder(*M);
7168   OMPBuilder.Config.IsTargetDevice = false;
7169   OMPBuilder.initialize();
7170   F->setName("func");
7171   IRBuilder<> Builder(BB);
7172   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7173     return Error::success();
7174   };
7175   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7176   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
7177   Builder.SetInsertPoint(BodyBB);
7178   Value *Final = Builder.CreateICmp(
7179       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
7180       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
7181   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
7182   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
7183                        OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
7184                                              /*Tied=*/false, Final));
7185   Builder.restoreIP(AfterIP);
7186   OMPBuilder.finalize();
7187   Builder.CreateRetVoid();
7188 
7189   // Check for the `Tied` argument
7190   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7191       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
7192           ->user_back());
7193   ASSERT_NE(TaskAllocCall, nullptr);
7194   BinaryOperator *OrInst =
7195       dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2));
7196   ASSERT_NE(OrInst, nullptr);
7197   EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or);
7198 
7199   // One of the arguments to `or` instruction is the tied flag, which is equal
7200   // to zero.
7201   EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) {
7202     if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op))
7203       return TiedValue->getSExtValue() == 0;
7204     return false;
7205   }));
7206 
7207   // One of the arguments to `or` instruction is the final condition.
7208   EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) {
7209     if (SelectInst *Select = dyn_cast<SelectInst>(op)) {
7210       ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue());
7211       ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue());
7212       if (!TrueValue || !FalseValue)
7213         return false;
7214       return Select->getCondition() == Final &&
7215              TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0;
7216     }
7217     return false;
7218   }));
7219 
7220   EXPECT_FALSE(verifyModule(*M, &errs()));
7221 }
7222 
7223 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
7224   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7225   OpenMPIRBuilder OMPBuilder(*M);
7226   OMPBuilder.Config.IsTargetDevice = false;
7227   OMPBuilder.initialize();
7228   F->setName("func");
7229   IRBuilder<> Builder(BB);
7230   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7231     return Error::success();
7232   };
7233   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7234   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
7235   Builder.SetInsertPoint(BodyBB);
7236   Value *IfCondition = Builder.CreateICmp(
7237       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
7238       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
7239   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
7240   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
7241                        OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
7242                                              /*Tied=*/false, /*Final=*/nullptr,
7243                                              IfCondition));
7244   Builder.restoreIP(AfterIP);
7245   OMPBuilder.finalize();
7246   Builder.CreateRetVoid();
7247 
7248   EXPECT_FALSE(verifyModule(*M, &errs()));
7249 
7250   CallInst *TaskAllocCall = dyn_cast<CallInst>(
7251       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
7252           ->user_back());
7253   ASSERT_NE(TaskAllocCall, nullptr);
7254 
7255   // Check the branching is based on the if condition argument.
7256   BranchInst *IfConditionBranchInst =
7257       dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator());
7258   ASSERT_NE(IfConditionBranchInst, nullptr);
7259   ASSERT_TRUE(IfConditionBranchInst->isConditional());
7260   EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition);
7261 
7262   // Check that the `__kmpc_omp_task` executes only in the then branch.
7263   CallInst *TaskCall = dyn_cast<CallInst>(
7264       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
7265           ->user_back());
7266   ASSERT_NE(TaskCall, nullptr);
7267   EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0));
7268 
7269   // Check that the OpenMP Runtime Functions specific to `if` clause execute
7270   // only in the else branch. Also check that the function call is between the
7271   // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls.
7272   CallInst *TaskBeginIfCall = dyn_cast<CallInst>(
7273       OMPBuilder
7274           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0)
7275           ->user_back());
7276   CallInst *TaskCompleteCall = dyn_cast<CallInst>(
7277       OMPBuilder
7278           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0)
7279           ->user_back());
7280   ASSERT_NE(TaskBeginIfCall, nullptr);
7281   ASSERT_NE(TaskCompleteCall, nullptr);
7282   Function *OulinedFn =
7283       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
7284   ASSERT_NE(OulinedFn, nullptr);
7285   CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back());
7286   ASSERT_NE(OulinedFnCall, nullptr);
7287   EXPECT_EQ(TaskBeginIfCall->getParent(),
7288             IfConditionBranchInst->getSuccessor(1));
7289 
7290   EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall);
7291   EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall);
7292 }
7293 
7294 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
7295   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7296   OpenMPIRBuilder OMPBuilder(*M);
7297   OMPBuilder.initialize();
7298   F->setName("func");
7299   IRBuilder<> Builder(BB);
7300 
7301   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
7302   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
7303   Value *Val128 =
7304       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
7305   Instruction *ThenTerm, *ElseTerm;
7306 
7307   Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp;
7308 
7309   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7310     Builder.restoreIP(AllocaIP);
7311     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
7312                                                 "bodygen.alloca128");
7313 
7314     Builder.restoreIP(CodeGenIP);
7315     // Loading and storing captured pointer and values
7316     InternalStoreInst = Builder.CreateStore(Val128, Local128);
7317     InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
7318                                         "bodygen.load32");
7319 
7320     InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128,
7321                                          "bodygen.local.load128");
7322     InternalIfCmp = Builder.CreateICmpNE(
7323         InternalLoad32,
7324         Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType()));
7325     SplitBlockAndInsertIfThenElse(InternalIfCmp,
7326                                   CodeGenIP.getBlock()->getTerminator(),
7327                                   &ThenTerm, &ElseTerm);
7328     return Error::success();
7329   };
7330 
7331   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7332   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7333   OpenMPIRBuilder::LocationDescription Loc(
7334       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7335   ASSERT_EXPECTED_INIT(
7336       OpenMPIRBuilder::InsertPointTy, AfterIP,
7337       OMPBuilder.createTaskgroup(
7338           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7339           BodyGenCB));
7340   Builder.restoreIP(AfterIP);
7341   OMPBuilder.finalize();
7342   Builder.CreateRetVoid();
7343 
7344   EXPECT_FALSE(verifyModule(*M, &errs()));
7345 
7346   CallInst *TaskgroupCall = dyn_cast<CallInst>(
7347       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
7348           ->user_back());
7349   ASSERT_NE(TaskgroupCall, nullptr);
7350   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
7351       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
7352           ->user_back());
7353   ASSERT_NE(EndTaskgroupCall, nullptr);
7354 
7355   // Verify the Ident argument
7356   GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0));
7357   ASSERT_NE(Ident, nullptr);
7358   EXPECT_TRUE(Ident->hasInitializer());
7359   Constant *Initializer = Ident->getInitializer();
7360   GlobalVariable *SrcStrGlob =
7361       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
7362   ASSERT_NE(SrcStrGlob, nullptr);
7363   ConstantDataArray *SrcSrc =
7364       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
7365   ASSERT_NE(SrcSrc, nullptr);
7366 
7367   // Verify the num_threads argument.
7368   CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1));
7369   ASSERT_NE(GTID, nullptr);
7370   EXPECT_EQ(GTID->arg_size(), 1U);
7371   EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr(
7372                                            OMPRTL___kmpc_global_thread_num));
7373 
7374   // Checking the general structure of the IR generated is same as expected.
7375   Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction();
7376   EXPECT_EQ(GeneratedStoreInst, InternalStoreInst);
7377   Instruction *GeneratedLoad32 =
7378       GeneratedStoreInst->getNextNonDebugInstruction();
7379   EXPECT_EQ(GeneratedLoad32, InternalLoad32);
7380   Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction();
7381   EXPECT_EQ(GeneratedLoad128, InternalLoad128);
7382 
7383   // Checking the ordering because of the if statements and that
7384   // `__kmp_end_taskgroup` call is after the if branching.
7385   BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(),
7386                             ThenTerm->getSuccessor(0),
7387                             EndTaskgroupCall->getParent(),
7388                             ElseTerm->getParent()};
7389   verifyDFSOrder(F, RefOrder);
7390 }
7391 
7392 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
7393   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7394   OpenMPIRBuilder OMPBuilder(*M);
7395   OMPBuilder.Config.IsTargetDevice = false;
7396   OMPBuilder.initialize();
7397   F->setName("func");
7398   IRBuilder<> Builder(BB);
7399 
7400   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7401     Builder.restoreIP(AllocaIP);
7402     AllocaInst *Alloca32 =
7403         Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32");
7404     AllocaInst *Alloca64 =
7405         Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64");
7406     Builder.restoreIP(CodeGenIP);
7407     auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7408       Builder.restoreIP(CodeGenIP);
7409       LoadInst *LoadValue =
7410           Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64);
7411       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64));
7412       Builder.CreateStore(AddInst, Alloca64);
7413       return Error::success();
7414     };
7415     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
7416     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1,
7417                          OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1));
7418     Builder.restoreIP(TaskIP1);
7419 
7420     auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7421       Builder.restoreIP(CodeGenIP);
7422       LoadInst *LoadValue =
7423           Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32);
7424       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32));
7425       Builder.CreateStore(AddInst, Alloca32);
7426       return Error::success();
7427     };
7428     OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL);
7429     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2,
7430                          OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2));
7431     Builder.restoreIP(TaskIP2);
7432   };
7433 
7434   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7435   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7436   OpenMPIRBuilder::LocationDescription Loc(
7437       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7438   ASSERT_EXPECTED_INIT(
7439       OpenMPIRBuilder::InsertPointTy, AfterIP,
7440       OMPBuilder.createTaskgroup(
7441           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7442           BODYGENCB_WRAPPER(BodyGenCB)));
7443   Builder.restoreIP(AfterIP);
7444   OMPBuilder.finalize();
7445   Builder.CreateRetVoid();
7446 
7447   EXPECT_FALSE(verifyModule(*M, &errs()));
7448 
7449   CallInst *TaskgroupCall = dyn_cast<CallInst>(
7450       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
7451           ->user_back());
7452   ASSERT_NE(TaskgroupCall, nullptr);
7453   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
7454       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
7455           ->user_back());
7456   ASSERT_NE(EndTaskgroupCall, nullptr);
7457 
7458   Function *TaskAllocFn =
7459       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
7460   ASSERT_EQ(TaskAllocFn->getNumUses(), 2u);
7461 
7462   CallInst *FirstTaskAllocCall =
7463       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin());
7464   CallInst *SecondTaskAllocCall =
7465       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++);
7466   ASSERT_NE(FirstTaskAllocCall, nullptr);
7467   ASSERT_NE(SecondTaskAllocCall, nullptr);
7468 
7469   // Verify that the tasks have been generated in order and inside taskgroup
7470   // construct.
7471   BasicBlock *RefOrder[] = {
7472       TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(),
7473       SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()};
7474   verifyDFSOrder(F, RefOrder);
7475 }
7476 
7477 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
7478   OpenMPIRBuilder OMPBuilder(*M);
7479   OMPBuilder.initialize();
7480 
7481   IRBuilder<> Builder(BB);
7482 
7483   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
7484   OpenMPIRBuilder::TargetDataInfo Info(true, false);
7485 
7486   auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext());
7487   auto Int64PtrTy = PointerType::getUnqual(Builder.getContext());
7488 
7489   Info.RTArgs.BasePointersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7490   Info.RTArgs.PointersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7491   Info.RTArgs.SizesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7492   Info.RTArgs.MapTypesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7493   Info.RTArgs.MapNamesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7494   Info.RTArgs.MappersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7495   Info.NumberOfPtrs = 4;
7496   Info.EmitDebug = false;
7497   OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
7498 
7499   EXPECT_NE(RTArgs.BasePointersArray, nullptr);
7500   EXPECT_NE(RTArgs.PointersArray, nullptr);
7501   EXPECT_NE(RTArgs.SizesArray, nullptr);
7502   EXPECT_NE(RTArgs.MapTypesArray, nullptr);
7503   EXPECT_NE(RTArgs.MappersArray, nullptr);
7504   EXPECT_NE(RTArgs.MapNamesArray, nullptr);
7505   EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr);
7506 
7507   EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy);
7508   EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy);
7509   EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy);
7510   EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy);
7511   EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy);
7512   EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy);
7513 }
7514 
7515 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) {
7516   OpenMPIRBuilder OMPBuilder(*M);
7517   OMPBuilder.setConfig(
7518       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
7519   OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager;
7520   TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0);
7521   InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0);
7522   EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo));
7523   InfoManager.initializeDeviceGlobalVarEntryInfo(
7524       "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0);
7525   InfoManager.registerTargetRegionEntryInfo(
7526       EntryInfo, nullptr, nullptr,
7527       OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7528   InfoManager.registerDeviceGlobalVarEntryInfo(
7529       "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
7530       GlobalValue::WeakAnyLinkage);
7531   EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar"));
7532 }
7533 
7534 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they
7535 // call each other (recursively in some cases). The test case test these
7536 // functions by utilising them for host code generation for declare target
7537 // global variables
7538 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) {
7539   OpenMPIRBuilder OMPBuilder(*M);
7540   OMPBuilder.initialize();
7541   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
7542   OMPBuilder.setConfig(Config);
7543 
7544   std::vector<llvm::Triple> TargetTriple;
7545   TargetTriple.emplace_back("amdgcn-amd-amdhsa");
7546 
7547   TargetRegionEntryInfo EntryInfo("", 42, 4711, 17);
7548   std::vector<GlobalVariable *> RefsGathered;
7549 
7550   std::vector<Constant *> Globals;
7551   auto *IntTy = Type::getInt32Ty(Ctx);
7552   for (int I = 0; I < 2; ++I) {
7553     Globals.push_back(M->getOrInsertGlobal(
7554         "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * {
7555           return new GlobalVariable(
7556               *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage,
7557               ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I));
7558         }));
7559   }
7560 
7561   OMPBuilder.registerTargetGlobalVariable(
7562       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
7563       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
7564       EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple,
7565       nullptr, nullptr, Globals[0]->getType(), Globals[0]);
7566 
7567   OMPBuilder.registerTargetGlobalVariable(
7568       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink,
7569       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
7570       EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple,
7571       nullptr, nullptr, Globals[1]->getType(), Globals[1]);
7572 
7573   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn =
7574       [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
7575          const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
7576     // If this is invoked, then we want to emit an error, even if it is not
7577     // neccesarily the most readable, as something has went wrong. The
7578     // test-suite unfortunately eats up all error output
7579     ASSERT_EQ(Kind, Kind);
7580   };
7581 
7582   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn);
7583 
7584   // Clauses for data_int_0 with To + Any clauses for the host
7585   std::vector<GlobalVariable *> OffloadEntries;
7586   OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name"));
7587   OffloadEntries.push_back(
7588       M->getNamedGlobal(".offloading.entry.test_data_int_0"));
7589 
7590   // Clauses for data_int_1 with Link + Any clauses for the host
7591   OffloadEntries.push_back(
7592       M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr"));
7593   OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name.1"));
7594   OffloadEntries.push_back(
7595       M->getNamedGlobal(".offloading.entry.test_data_int_1_decl_tgt_ref_ptr"));
7596 
7597   for (unsigned I = 0; I < OffloadEntries.size(); ++I)
7598     EXPECT_NE(OffloadEntries[I], nullptr);
7599 
7600   // Metadata generated for the host offload module
7601   NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info");
7602   ASSERT_THAT(OffloadMetadata, testing::NotNull());
7603   StringRef Nodes[2] = {
7604       cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1))
7605           ->getString(),
7606       cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1))
7607           ->getString()};
7608   EXPECT_THAT(
7609       Nodes, testing::UnorderedElementsAre("test_data_int_0",
7610                                            "test_data_int_1_decl_tgt_ref_ptr"));
7611 }
7612 
7613 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) {
7614   OpenMPIRBuilder OMPBuilder(*M);
7615   OMPBuilder.initialize();
7616   OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true,
7617                                /* IsGPU = */ true,
7618                                /* OpenMPOffloadMandatory = */ false,
7619                                /* HasRequiresReverseOffload = */ false,
7620                                /* HasRequiresUnifiedAddress = */ false,
7621                                /* HasRequiresUnifiedSharedMemory = */ false,
7622                                /* HasRequiresDynamicAllocators = */ false);
7623   OMPBuilder.setConfig(Config);
7624 
7625   FunctionCallee FnTypeAndCallee =
7626       M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx));
7627 
7628   auto *Fn = cast<Function>(FnTypeAndCallee.getCallee());
7629   OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn,
7630                                 /* Size = */ 0,
7631                                 /* Flags = */ 0, GlobalValue::WeakAnyLinkage);
7632 
7633   // Check kernel attributes
7634   EXPECT_TRUE(Fn->hasFnAttribute("kernel"));
7635   EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress));
7636 }
7637 
7638 } // namespace
7639