xref: /llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (revision 27bc6bdaba1138d611e256e890023eefee677edc)
1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/OpenMP/OMPConstants.h"
10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
12 #include "llvm/IR/BasicBlock.h"
13 #include "llvm/IR/DIBuilder.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/InstIterator.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/Verifier.h"
20 #include "llvm/Passes/PassBuilder.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Testing/Support/Error.h"
23 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
24 #include "gmock/gmock.h"
25 #include "gtest/gtest.h"
26 #include <optional>
27 
28 using namespace llvm;
29 using namespace omp;
30 
31 // Helper that intends to be functionally equivalent to `VarType VarName = Init`
32 // for an `Init` that returns an `Expected<VarType>` value. It produces an error
33 // message and returns if `Init` didn't produce a valid result.
34 #define ASSERT_EXPECTED_INIT(VarType, VarName, Init)                           \
35   auto __Expected##VarName = Init;                                             \
36   ASSERT_THAT_EXPECTED(__Expected##VarName, Succeeded());                      \
37   VarType VarName = *__Expected##VarName
38 
39 // Similar to ASSERT_EXPECTED_INIT, but returns a given expression in case of
40 // error after printing the error message.
41 #define ASSERT_EXPECTED_INIT_RETURN(VarType, VarName, Init, Return)            \
42   auto __Expected##VarName = Init;                                             \
43   EXPECT_THAT_EXPECTED(__Expected##VarName, Succeeded());                      \
44   if (!__Expected##VarName)                                                    \
45     return Return;                                                             \
46   VarType VarName = *__Expected##VarName
47 
48 // Wrapper lambdas to allow using EXPECT*() macros inside of error-returning
49 // callbacks.
50 #define FINICB_WRAPPER(cb)                                                     \
51   [&cb](InsertPointTy IP) -> Error {                                           \
52     cb(IP);                                                                    \
53     return Error::success();                                                   \
54   }
55 
56 #define BODYGENCB_WRAPPER(cb)                                                  \
57   [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error {            \
58     cb(AllocaIP, CodeGenIP);                                                   \
59     return Error::success();                                                   \
60   }
61 
62 #define LOOP_BODYGENCB_WRAPPER(cb)                                             \
63   [&cb](InsertPointTy CodeGenIP, Value *LC) -> Error {                         \
64     cb(CodeGenIP, LC);                                                         \
65     return Error::success();                                                   \
66   }
67 
68 namespace {
69 
70 /// Create an instruction that uses the values in \p Values. We use "printf"
71 /// just because it is often used for this purpose in test code, but it is never
72 /// executed here.
73 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
74                                   ArrayRef<Value *> Values) {
75   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
76 
77   GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
78   Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
79   Constant *Indices[] = {Zero, Zero};
80   Constant *FormatStrConst =
81       ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
82 
83   Function *PrintfDecl = M->getFunction("printf");
84   if (!PrintfDecl) {
85     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
86     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
87     PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
88   }
89 
90   SmallVector<Value *, 4> Args;
91   Args.push_back(FormatStrConst);
92   Args.append(Values.begin(), Values.end());
93   return Builder.CreateCall(PrintfDecl, Args);
94 }
95 
96 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
97 /// order the control flow of \p F.
98 ///
99 /// This is an easy way to verify the branching structure of the CFG without
100 /// checking every branch instruction individually. For the CFG of a
101 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
102 /// the body, i.e. the DFS order corresponds to the execution order with one
103 /// loop iteration.
104 static testing::AssertionResult
105 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
106   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
107   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
108 
109   df_iterator_default_set<BasicBlock *, 16> Visited;
110   auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
111 
112   BasicBlock *Prev = nullptr;
113   for (BasicBlock *BB : DFS) {
114     if (It != E && BB == *It) {
115       Prev = *It;
116       ++It;
117     }
118   }
119 
120   if (It == E)
121     return testing::AssertionSuccess();
122   if (!Prev)
123     return testing::AssertionFailure()
124            << "Did not find " << (*It)->getName() << " in control flow";
125   return testing::AssertionFailure()
126          << "Expected " << Prev->getName() << " before " << (*It)->getName()
127          << " in control flow";
128 }
129 
130 /// Verify that blocks in \p RefOrder are in the same relative order in the
131 /// linked lists of blocks in \p F. The linked list may contain additional
132 /// blocks in-between.
133 ///
134 /// While the order in the linked list is not relevant for semantics, keeping
135 /// the order roughly in execution order makes its printout easier to read.
136 static testing::AssertionResult
137 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
138   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
139   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
140 
141   BasicBlock *Prev = nullptr;
142   for (BasicBlock &BB : *F) {
143     if (It != E && &BB == *It) {
144       Prev = *It;
145       ++It;
146     }
147   }
148 
149   if (It == E)
150     return testing::AssertionSuccess();
151   if (!Prev)
152     return testing::AssertionFailure() << "Did not find " << (*It)->getName()
153                                        << " in function " << F->getName();
154   return testing::AssertionFailure()
155          << "Expected " << Prev->getName() << " before " << (*It)->getName()
156          << " in function " << F->getName();
157 }
158 
159 /// Populate Calls with call instructions calling the function with the given
160 /// FnID from the given function F.
161 static void findCalls(Function *F, omp::RuntimeFunction FnID,
162                       OpenMPIRBuilder &OMPBuilder,
163                       SmallVectorImpl<CallInst *> &Calls) {
164   Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID);
165   for (BasicBlock &BB : *F) {
166     for (Instruction &I : BB) {
167       auto *Call = dyn_cast<CallInst>(&I);
168       if (Call && Call->getCalledFunction() == Fn)
169         Calls.push_back(Call);
170     }
171   }
172 }
173 
174 /// Assuming \p F contains only one call to the function with the given \p FnID,
175 /// return that call.
176 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID,
177                                 OpenMPIRBuilder &OMPBuilder) {
178   SmallVector<CallInst *, 1> Calls;
179   findCalls(F, FnID, OMPBuilder, Calls);
180   EXPECT_EQ(1u, Calls.size());
181   if (Calls.size() != 1)
182     return nullptr;
183   return Calls.front();
184 }
185 
186 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
187   switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
188   case omp::OMPScheduleType::BaseDynamicChunked:
189     return omp::OMP_SCHEDULE_Dynamic;
190   case omp::OMPScheduleType::BaseGuidedChunked:
191     return omp::OMP_SCHEDULE_Guided;
192   case omp::OMPScheduleType::BaseAuto:
193     return omp::OMP_SCHEDULE_Auto;
194   case omp::OMPScheduleType::BaseRuntime:
195     return omp::OMP_SCHEDULE_Runtime;
196   default:
197     llvm_unreachable("unknown type for this test");
198   }
199 }
200 
201 class OpenMPIRBuilderTest : public testing::Test {
202 protected:
203   void SetUp() override {
204     M.reset(new Module("MyModule", Ctx));
205     FunctionType *FTy =
206         FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
207                           /*isVarArg=*/false);
208     F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
209     BB = BasicBlock::Create(Ctx, "", F);
210 
211     DIBuilder DIB(*M);
212     auto File = DIB.createFile("test.dbg", "/src", std::nullopt,
213                                std::optional<StringRef>("/src/test.dbg"));
214     auto CU =
215         DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0);
216     auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray({}));
217     auto SP = DIB.createFunction(
218         CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
219         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
220     F->setSubprogram(SP);
221     auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
222     DIB.finalize();
223     DL = DILocation::get(Ctx, 3, 7, Scope);
224   }
225 
226   void TearDown() override {
227     BB = nullptr;
228     M.reset();
229   }
230 
231   /// Create a function with a simple loop that calls printf using the logical
232   /// loop counter for use with tests that need a CanonicalLoopInfo object.
233   CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL,
234                                              OpenMPIRBuilder &OMPBuilder,
235                                              int UseIVBits,
236                                              CallInst **Call = nullptr,
237                                              BasicBlock **BodyCode = nullptr) {
238     OMPBuilder.initialize();
239     F->setName("func");
240 
241     IRBuilder<> Builder(BB);
242     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
243     Value *TripCount = F->getArg(0);
244 
245     Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits);
246     Value *CastedTripCount =
247         Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount");
248 
249     auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP,
250                              llvm::Value *LC) {
251       Builder.restoreIP(CodeGenIP);
252       if (BodyCode)
253         *BodyCode = Builder.GetInsertBlock();
254 
255       // Add something that consumes the induction variable to the body.
256       CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
257       if (Call)
258         *Call = CallInst;
259 
260       return Error::success();
261     };
262 
263     ASSERT_EXPECTED_INIT_RETURN(
264         CanonicalLoopInfo *, Loop,
265         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount),
266         nullptr);
267 
268     // Finalize the function.
269     Builder.restoreIP(Loop->getAfterIP());
270     Builder.CreateRetVoid();
271 
272     return Loop;
273   }
274 
275   LLVMContext Ctx;
276   std::unique_ptr<Module> M;
277   Function *F;
278   BasicBlock *BB;
279   DebugLoc DL;
280 };
281 
282 class OpenMPIRBuilderTestWithParams
283     : public OpenMPIRBuilderTest,
284       public ::testing::WithParamInterface<omp::OMPScheduleType> {};
285 
286 class OpenMPIRBuilderTestWithIVBits
287     : public OpenMPIRBuilderTest,
288       public ::testing::WithParamInterface<int> {};
289 
290 // Returns the value stored in the given allocation. Returns null if the given
291 // value is not a result of an InstTy instruction, if no value is stored or if
292 // there is more than one store.
293 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
294   Instruction *Inst = dyn_cast<InstTy>(AllocaValue);
295   if (!Inst)
296     return nullptr;
297   StoreInst *Store = nullptr;
298   for (Use &U : Inst->uses()) {
299     if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
300       EXPECT_EQ(Store, nullptr);
301       Store = CandidateStore;
302     }
303   }
304   if (!Store)
305     return nullptr;
306   return Store->getValueOperand();
307 }
308 
309 // Returns the value stored in the aggregate argument of an outlined function,
310 // or nullptr if it is not found.
311 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate,
312                                            unsigned Idx) {
313   GetElementPtrInst *GEPAtIdx = nullptr;
314   // Find GEP instruction at that index.
315   for (User *Usr : Aggregate->users()) {
316     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr);
317     if (!GEP)
318       continue;
319 
320     if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx))
321       continue;
322 
323     EXPECT_EQ(GEPAtIdx, nullptr);
324     GEPAtIdx = GEP;
325   }
326 
327   EXPECT_NE(GEPAtIdx, nullptr);
328   EXPECT_EQ(GEPAtIdx->getNumUses(), 1U);
329 
330   // Find the value stored to the aggregate.
331   StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin());
332   Value *StoredAggValue = StoreToAgg->getValueOperand();
333 
334   Value *StoredValue = nullptr;
335 
336   // Find the value stored to the value stored in the aggregate.
337   for (User *Usr : StoredAggValue->users()) {
338     StoreInst *Store = dyn_cast<StoreInst>(Usr);
339     if (!Store)
340       continue;
341 
342     if (Store->getPointerOperand() != StoredAggValue)
343       continue;
344 
345     EXPECT_EQ(StoredValue, nullptr);
346     StoredValue = Store->getValueOperand();
347   }
348 
349   return StoredValue;
350 }
351 
352 // Returns the aggregate that the value is originating from.
353 static Value *findAggregateFromValue(Value *V) {
354   // Expects a load instruction that loads from the aggregate.
355   LoadInst *Load = dyn_cast<LoadInst>(V);
356   EXPECT_NE(Load, nullptr);
357   // Find the GEP instruction used in the load instruction.
358   GetElementPtrInst *GEP =
359       dyn_cast<GetElementPtrInst>(Load->getPointerOperand());
360   EXPECT_NE(GEP, nullptr);
361   // Find the aggregate used in the GEP instruction.
362   Value *Aggregate = GEP->getPointerOperand();
363 
364   return Aggregate;
365 }
366 
367 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
368   OpenMPIRBuilder OMPBuilder(*M);
369   OMPBuilder.initialize();
370 
371   IRBuilder<> Builder(BB);
372 
373   ASSERT_THAT_EXPECTED(
374       OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for),
375       Succeeded());
376   EXPECT_TRUE(M->global_empty());
377   EXPECT_EQ(M->size(), 1U);
378   EXPECT_EQ(F->size(), 1U);
379   EXPECT_EQ(BB->size(), 0U);
380 
381   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
382   ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded());
383   EXPECT_FALSE(M->global_empty());
384   EXPECT_EQ(M->size(), 3U);
385   EXPECT_EQ(F->size(), 1U);
386   EXPECT_EQ(BB->size(), 2U);
387 
388   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
389   EXPECT_NE(GTID, nullptr);
390   EXPECT_EQ(GTID->arg_size(), 1U);
391   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
392   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
393   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
394 
395   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
396   EXPECT_NE(Barrier, nullptr);
397   EXPECT_EQ(Barrier->arg_size(), 2U);
398   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier");
399   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
400   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
401 
402   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
403 
404   Builder.CreateUnreachable();
405   EXPECT_FALSE(verifyModule(*M, &errs()));
406 }
407 
408 TEST_F(OpenMPIRBuilderTest, CreateCancel) {
409   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
410   OpenMPIRBuilder OMPBuilder(*M);
411   OMPBuilder.initialize();
412 
413   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
414   new UnreachableInst(Ctx, CBB);
415   auto FiniCB = [&](InsertPointTy IP) {
416     ASSERT_NE(IP.getBlock(), nullptr);
417     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
418     BranchInst::Create(CBB, IP.getBlock());
419   };
420   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
421 
422   IRBuilder<> Builder(BB);
423 
424   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
425   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP,
426                        OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel));
427   Builder.restoreIP(NewIP);
428   EXPECT_FALSE(M->global_empty());
429   EXPECT_EQ(M->size(), 4U);
430   EXPECT_EQ(F->size(), 4U);
431   EXPECT_EQ(BB->size(), 4U);
432 
433   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
434   EXPECT_NE(GTID, nullptr);
435   EXPECT_EQ(GTID->arg_size(), 1U);
436   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
437   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
438   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
439 
440   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
441   EXPECT_NE(Cancel, nullptr);
442   EXPECT_EQ(Cancel->arg_size(), 3U);
443   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
444   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
445   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
446   EXPECT_EQ(Cancel->getNumUses(), 1U);
447   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
448   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
449   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
450   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
451   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
452   EXPECT_NE(GTID1, nullptr);
453   EXPECT_EQ(GTID1->arg_size(), 1U);
454   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
455   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
456   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
457   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
458   EXPECT_NE(Barrier, nullptr);
459   EXPECT_EQ(Barrier->arg_size(), 2U);
460   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
461   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
462   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
463   EXPECT_EQ(Barrier->getNumUses(), 0U);
464   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
465             1U);
466   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
467 
468   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
469 
470   OMPBuilder.popFinalizationCB();
471 
472   Builder.CreateUnreachable();
473   EXPECT_FALSE(verifyModule(*M, &errs()));
474 }
475 
476 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
477   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
478   OpenMPIRBuilder OMPBuilder(*M);
479   OMPBuilder.initialize();
480 
481   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
482   new UnreachableInst(Ctx, CBB);
483   auto FiniCB = [&](InsertPointTy IP) {
484     ASSERT_NE(IP.getBlock(), nullptr);
485     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
486     BranchInst::Create(CBB, IP.getBlock());
487   };
488   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
489 
490   IRBuilder<> Builder(BB);
491 
492   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
493   ASSERT_EXPECTED_INIT(
494       OpenMPIRBuilder::InsertPointTy, NewIP,
495       OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel));
496   Builder.restoreIP(NewIP);
497   EXPECT_FALSE(M->global_empty());
498   EXPECT_EQ(M->size(), 4U);
499   EXPECT_EQ(F->size(), 7U);
500   EXPECT_EQ(BB->size(), 1U);
501   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
502   ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U);
503   BB = BB->getTerminator()->getSuccessor(0);
504   EXPECT_EQ(BB->size(), 4U);
505 
506   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
507   EXPECT_NE(GTID, nullptr);
508   EXPECT_EQ(GTID->arg_size(), 1U);
509   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
510   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
511   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
512 
513   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
514   EXPECT_NE(Cancel, nullptr);
515   EXPECT_EQ(Cancel->arg_size(), 3U);
516   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
517   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
518   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
519   EXPECT_EQ(Cancel->getNumUses(), 1U);
520   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
521   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
522   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
523   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
524             NewIP.getBlock());
525   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
526   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
527   EXPECT_NE(GTID1, nullptr);
528   EXPECT_EQ(GTID1->arg_size(), 1U);
529   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
530   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
531   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
532   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
533   EXPECT_NE(Barrier, nullptr);
534   EXPECT_EQ(Barrier->arg_size(), 2U);
535   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
536   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
537   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
538   EXPECT_EQ(Barrier->getNumUses(), 0U);
539   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
540             1U);
541   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
542 
543   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
544 
545   OMPBuilder.popFinalizationCB();
546 
547   Builder.CreateUnreachable();
548   EXPECT_FALSE(verifyModule(*M, &errs()));
549 }
550 
551 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
552   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
553   OpenMPIRBuilder OMPBuilder(*M);
554   OMPBuilder.initialize();
555 
556   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
557   new UnreachableInst(Ctx, CBB);
558   auto FiniCB = [&](InsertPointTy IP) {
559     ASSERT_NE(IP.getBlock(), nullptr);
560     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
561     BranchInst::Create(CBB, IP.getBlock());
562   };
563   OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
564 
565   IRBuilder<> Builder(BB);
566 
567   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
568   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, NewIP,
569                        OMPBuilder.createBarrier(Loc, OMPD_for));
570   Builder.restoreIP(NewIP);
571   EXPECT_FALSE(M->global_empty());
572   EXPECT_EQ(M->size(), 3U);
573   EXPECT_EQ(F->size(), 4U);
574   EXPECT_EQ(BB->size(), 4U);
575 
576   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
577   EXPECT_NE(GTID, nullptr);
578   EXPECT_EQ(GTID->arg_size(), 1U);
579   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
580   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
581   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
582 
583   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
584   EXPECT_NE(Barrier, nullptr);
585   EXPECT_EQ(Barrier->arg_size(), 2U);
586   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
587   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
588   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
589   EXPECT_EQ(Barrier->getNumUses(), 1U);
590   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
591   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
592   EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
593   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
594   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
595             1U);
596   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
597             CBB);
598 
599   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
600 
601   OMPBuilder.popFinalizationCB();
602 
603   Builder.CreateUnreachable();
604   EXPECT_FALSE(verifyModule(*M, &errs()));
605 }
606 
607 TEST_F(OpenMPIRBuilderTest, DbgLoc) {
608   OpenMPIRBuilder OMPBuilder(*M);
609   OMPBuilder.initialize();
610   F->setName("func");
611 
612   IRBuilder<> Builder(BB);
613 
614   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
615   ASSERT_THAT_EXPECTED(OMPBuilder.createBarrier(Loc, OMPD_for), Succeeded());
616   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
617   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
618   EXPECT_EQ(GTID->getDebugLoc(), DL);
619   EXPECT_EQ(Barrier->getDebugLoc(), DL);
620   EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0)));
621   if (!isa<GlobalVariable>(Barrier->getOperand(0)))
622     return;
623   GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0));
624   EXPECT_TRUE(Ident->hasInitializer());
625   if (!Ident->hasInitializer())
626     return;
627   Constant *Initializer = Ident->getInitializer();
628   EXPECT_TRUE(
629       isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()));
630   GlobalVariable *SrcStrGlob =
631       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
632   if (!SrcStrGlob)
633     return;
634   EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer()));
635   ConstantDataArray *SrcSrc =
636       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
637   if (!SrcSrc)
638     return;
639   EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;");
640 }
641 
642 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
643   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
644   std::string oldDLStr = M->getDataLayoutStr();
645   M->setDataLayout(
646       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
647       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
648       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
649   OpenMPIRBuilder OMPBuilder(*M);
650   OMPBuilder.Config.IsTargetDevice = true;
651   OMPBuilder.initialize();
652   F->setName("func");
653   IRBuilder<> Builder(BB);
654   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
655   Builder.CreateBr(EnterBB);
656   Builder.SetInsertPoint(EnterBB);
657   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
658 
659   AllocaInst *PrivAI = nullptr;
660 
661   unsigned NumBodiesGenerated = 0;
662   unsigned NumPrivatizedVars = 0;
663   unsigned NumFinalizationPoints = 0;
664 
665   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
666     ++NumBodiesGenerated;
667 
668     Builder.restoreIP(AllocaIP);
669     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
670     Builder.CreateStore(F->arg_begin(), PrivAI);
671 
672     Builder.restoreIP(CodeGenIP);
673     Value *PrivLoad =
674         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
675     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
676     Instruction *ThenTerm, *ElseTerm;
677     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
678                                   &ThenTerm, &ElseTerm);
679     return Error::success();
680   };
681 
682   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
683                     Value &Orig, Value &Inner,
684                     Value *&ReplacementValue) -> InsertPointTy {
685     ++NumPrivatizedVars;
686 
687     if (!isa<AllocaInst>(Orig)) {
688       EXPECT_EQ(&Orig, F->arg_begin());
689       ReplacementValue = &Inner;
690       return CodeGenIP;
691     }
692 
693     // Since the original value is an allocation, it has a pointer type and
694     // therefore no additional wrapping should happen.
695     EXPECT_EQ(&Orig, &Inner);
696 
697     // Trivial copy (=firstprivate).
698     Builder.restoreIP(AllocaIP);
699     Type *VTy = ReplacementValue->getType();
700     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
701     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
702     Builder.restoreIP(CodeGenIP);
703     Builder.CreateStore(V, ReplacementValue);
704     return CodeGenIP;
705   };
706 
707   auto FiniCB = [&](InsertPointTy CodeGenIP) {
708     ++NumFinalizationPoints;
709     return Error::success();
710   };
711 
712   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
713                                     F->getEntryBlock().getFirstInsertionPt());
714   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
715                        OMPBuilder.createParallel(
716                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
717                            nullptr, OMP_PROC_BIND_default, false));
718 
719   EXPECT_EQ(NumBodiesGenerated, 1U);
720   EXPECT_EQ(NumPrivatizedVars, 1U);
721   EXPECT_EQ(NumFinalizationPoints, 1U);
722 
723   Builder.restoreIP(AfterIP);
724   Builder.CreateRetVoid();
725 
726   OMPBuilder.finalize();
727   Function *OutlinedFn = PrivAI->getFunction();
728   EXPECT_FALSE(verifyModule(*M, &errs()));
729   EXPECT_NE(OutlinedFn, F);
730   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
731   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
732   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
733 
734   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
735   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
736   // Make sure that arguments are pointers in 0 address address space
737   EXPECT_EQ(OutlinedFn->getArg(0)->getType(),
738             PointerType::get(M->getContext(), 0));
739   EXPECT_EQ(OutlinedFn->getArg(1)->getType(),
740             PointerType::get(M->getContext(), 0));
741   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
742             PointerType::get(M->getContext(), 0));
743   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
744   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
745   User *Usr = OutlinedFn->user_back();
746   ASSERT_TRUE(isa<CallInst>(Usr));
747   CallInst *Parallel51CI = dyn_cast<CallInst>(Usr);
748   ASSERT_NE(Parallel51CI, nullptr);
749 
750   EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51");
751   EXPECT_EQ(Parallel51CI->arg_size(), 9U);
752   EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn);
753   EXPECT_TRUE(
754       isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts()));
755   EXPECT_EQ(Parallel51CI, Usr);
756   M->setDataLayout(oldDLStr);
757 }
758 
759 TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
760   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
761   OpenMPIRBuilder OMPBuilder(*M);
762   OMPBuilder.Config.IsTargetDevice = false;
763   OMPBuilder.initialize();
764   F->setName("func");
765   IRBuilder<> Builder(BB);
766 
767   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
768   Builder.CreateBr(EnterBB);
769   Builder.SetInsertPoint(EnterBB);
770   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
771 
772   AllocaInst *PrivAI = nullptr;
773 
774   unsigned NumBodiesGenerated = 0;
775   unsigned NumPrivatizedVars = 0;
776   unsigned NumFinalizationPoints = 0;
777 
778   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
779     ++NumBodiesGenerated;
780 
781     Builder.restoreIP(AllocaIP);
782     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
783     Builder.CreateStore(F->arg_begin(), PrivAI);
784 
785     Builder.restoreIP(CodeGenIP);
786     Value *PrivLoad =
787         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
788     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
789     Instruction *ThenTerm, *ElseTerm;
790     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
791                                   &ThenTerm, &ElseTerm);
792     return Error::success();
793   };
794 
795   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
796                     Value &Orig, Value &Inner,
797                     Value *&ReplacementValue) -> InsertPointTy {
798     ++NumPrivatizedVars;
799 
800     if (!isa<AllocaInst>(Orig)) {
801       EXPECT_EQ(&Orig, F->arg_begin());
802       ReplacementValue = &Inner;
803       return CodeGenIP;
804     }
805 
806     // Since the original value is an allocation, it has a pointer type and
807     // therefore no additional wrapping should happen.
808     EXPECT_EQ(&Orig, &Inner);
809 
810     // Trivial copy (=firstprivate).
811     Builder.restoreIP(AllocaIP);
812     Type *VTy = ReplacementValue->getType();
813     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
814     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
815     Builder.restoreIP(CodeGenIP);
816     Builder.CreateStore(V, ReplacementValue);
817     return CodeGenIP;
818   };
819 
820   auto FiniCB = [&](InsertPointTy CodeGenIP) {
821     ++NumFinalizationPoints;
822     return Error::success();
823   };
824 
825   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
826                                     F->getEntryBlock().getFirstInsertionPt());
827   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
828                        OMPBuilder.createParallel(
829                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
830                            nullptr, OMP_PROC_BIND_default, false));
831   EXPECT_EQ(NumBodiesGenerated, 1U);
832   EXPECT_EQ(NumPrivatizedVars, 1U);
833   EXPECT_EQ(NumFinalizationPoints, 1U);
834 
835   Builder.restoreIP(AfterIP);
836   Builder.CreateRetVoid();
837 
838   OMPBuilder.finalize();
839 
840   EXPECT_NE(PrivAI, nullptr);
841   Function *OutlinedFn = PrivAI->getFunction();
842   EXPECT_NE(F, OutlinedFn);
843   EXPECT_FALSE(verifyModule(*M, &errs()));
844   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
845   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
846   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
847 
848   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
849   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
850 
851   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
852   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
853   User *Usr = OutlinedFn->user_back();
854   ASSERT_TRUE(isa<CallInst>(Usr));
855   CallInst *ForkCI = dyn_cast<CallInst>(Usr);
856   ASSERT_NE(ForkCI, nullptr);
857 
858   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
859   EXPECT_EQ(ForkCI->arg_size(), 4U);
860   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
861   EXPECT_EQ(ForkCI->getArgOperand(1),
862             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
863   EXPECT_EQ(ForkCI, Usr);
864   Value *StoredValue =
865       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
866   EXPECT_EQ(StoredValue, F->arg_begin());
867 }
868 
869 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
870   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
871   OpenMPIRBuilder OMPBuilder(*M);
872   OMPBuilder.Config.IsTargetDevice = false;
873   OMPBuilder.initialize();
874   F->setName("func");
875   IRBuilder<> Builder(BB);
876 
877   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
878   Builder.CreateBr(EnterBB);
879   Builder.SetInsertPoint(EnterBB);
880   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
881 
882   unsigned NumInnerBodiesGenerated = 0;
883   unsigned NumOuterBodiesGenerated = 0;
884   unsigned NumFinalizationPoints = 0;
885 
886   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
887     ++NumInnerBodiesGenerated;
888     return Error::success();
889   };
890 
891   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
892                     Value &Orig, Value &Inner,
893                     Value *&ReplacementValue) -> InsertPointTy {
894     // Trivial copy (=firstprivate).
895     Builder.restoreIP(AllocaIP);
896     Type *VTy = ReplacementValue->getType();
897     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
898     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
899     Builder.restoreIP(CodeGenIP);
900     Builder.CreateStore(V, ReplacementValue);
901     return CodeGenIP;
902   };
903 
904   auto FiniCB = [&](InsertPointTy CodeGenIP) {
905     ++NumFinalizationPoints;
906     return Error::success();
907   };
908 
909   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
910     ++NumOuterBodiesGenerated;
911     Builder.restoreIP(CodeGenIP);
912     BasicBlock *CGBB = CodeGenIP.getBlock();
913     BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
914     CGBB->getTerminator()->eraseFromParent();
915 
916     ASSERT_EXPECTED_INIT(
917         OpenMPIRBuilder::InsertPointTy, AfterIP,
918         OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
919                                   InnerBodyGenCB, PrivCB, FiniCB, nullptr,
920                                   nullptr, OMP_PROC_BIND_default, false));
921 
922     Builder.restoreIP(AfterIP);
923     Builder.CreateBr(NewBB);
924   };
925 
926   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
927                                     F->getEntryBlock().getFirstInsertionPt());
928   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
929                        OMPBuilder.createParallel(
930                            Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
931                            PrivCB, FiniCB, nullptr, nullptr,
932                            OMP_PROC_BIND_default, false));
933 
934   EXPECT_EQ(NumInnerBodiesGenerated, 1U);
935   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
936   EXPECT_EQ(NumFinalizationPoints, 2U);
937 
938   Builder.restoreIP(AfterIP);
939   Builder.CreateRetVoid();
940 
941   OMPBuilder.finalize();
942 
943   EXPECT_EQ(M->size(), 5U);
944   for (Function &OutlinedFn : *M) {
945     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
946       continue;
947     EXPECT_FALSE(verifyModule(*M, &errs()));
948     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
949     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
950     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
951 
952     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
953     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
954 
955     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
956     User *Usr = OutlinedFn.user_back();
957     ASSERT_TRUE(isa<CallInst>(Usr));
958     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
959     ASSERT_NE(ForkCI, nullptr);
960 
961     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
962     EXPECT_EQ(ForkCI->arg_size(), 3U);
963     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
964     EXPECT_EQ(ForkCI->getArgOperand(1),
965               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
966     EXPECT_EQ(ForkCI, Usr);
967   }
968 }
969 
970 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
971   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
972   OpenMPIRBuilder OMPBuilder(*M);
973   OMPBuilder.Config.IsTargetDevice = false;
974   OMPBuilder.initialize();
975   F->setName("func");
976   IRBuilder<> Builder(BB);
977 
978   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
979   Builder.CreateBr(EnterBB);
980   Builder.SetInsertPoint(EnterBB);
981   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
982 
983   unsigned NumInnerBodiesGenerated = 0;
984   unsigned NumOuterBodiesGenerated = 0;
985   unsigned NumFinalizationPoints = 0;
986 
987   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
988     ++NumInnerBodiesGenerated;
989     return Error::success();
990   };
991 
992   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
993                     Value &Orig, Value &Inner,
994                     Value *&ReplacementValue) -> InsertPointTy {
995     // Trivial copy (=firstprivate).
996     Builder.restoreIP(AllocaIP);
997     Type *VTy = ReplacementValue->getType();
998     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
999     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
1000     Builder.restoreIP(CodeGenIP);
1001     Builder.CreateStore(V, ReplacementValue);
1002     return CodeGenIP;
1003   };
1004 
1005   auto FiniCB = [&](InsertPointTy CodeGenIP) {
1006     ++NumFinalizationPoints;
1007     return Error::success();
1008   };
1009 
1010   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1011     ++NumOuterBodiesGenerated;
1012     Builder.restoreIP(CodeGenIP);
1013     BasicBlock *CGBB = CodeGenIP.getBlock();
1014     BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
1015     BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
1016     CGBB->getTerminator()->eraseFromParent();
1017     ;
1018     NewBB1->getTerminator()->eraseFromParent();
1019     ;
1020 
1021     ASSERT_EXPECTED_INIT(
1022         OpenMPIRBuilder::InsertPointTy, AfterIP1,
1023         OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
1024                                   InnerBodyGenCB, PrivCB, FiniCB, nullptr,
1025                                   nullptr, OMP_PROC_BIND_default, false));
1026 
1027     Builder.restoreIP(AfterIP1);
1028     Builder.CreateBr(NewBB1);
1029 
1030     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2,
1031                          OMPBuilder.createParallel(
1032                              InsertPointTy(NewBB1, NewBB1->end()), AllocaIP,
1033                              InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
1034                              OMP_PROC_BIND_default, false));
1035 
1036     Builder.restoreIP(AfterIP2);
1037     Builder.CreateBr(NewBB2);
1038   };
1039 
1040   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1041                                     F->getEntryBlock().getFirstInsertionPt());
1042   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1043                        OMPBuilder.createParallel(
1044                            Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
1045                            PrivCB, FiniCB, nullptr, nullptr,
1046                            OMP_PROC_BIND_default, false));
1047 
1048   EXPECT_EQ(NumInnerBodiesGenerated, 2U);
1049   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
1050   EXPECT_EQ(NumFinalizationPoints, 3U);
1051 
1052   Builder.restoreIP(AfterIP);
1053   Builder.CreateRetVoid();
1054 
1055   OMPBuilder.finalize();
1056 
1057   EXPECT_EQ(M->size(), 6U);
1058   for (Function &OutlinedFn : *M) {
1059     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
1060       continue;
1061     EXPECT_FALSE(verifyModule(*M, &errs()));
1062     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
1063     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
1064     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
1065 
1066     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
1067     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
1068 
1069     unsigned NumAllocas = 0;
1070     for (Instruction &I : instructions(OutlinedFn))
1071       NumAllocas += isa<AllocaInst>(I);
1072     EXPECT_EQ(NumAllocas, 1U);
1073 
1074     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
1075     User *Usr = OutlinedFn.user_back();
1076     ASSERT_TRUE(isa<CallInst>(Usr));
1077     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
1078     ASSERT_NE(ForkCI, nullptr);
1079 
1080     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
1081     EXPECT_EQ(ForkCI->arg_size(), 3U);
1082     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1083     EXPECT_EQ(ForkCI->getArgOperand(1),
1084               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
1085     EXPECT_EQ(ForkCI, Usr);
1086   }
1087 }
1088 
1089 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
1090   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1091   OpenMPIRBuilder OMPBuilder(*M);
1092   OMPBuilder.Config.IsTargetDevice = false;
1093   OMPBuilder.initialize();
1094   F->setName("func");
1095   IRBuilder<> Builder(BB);
1096 
1097   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1098   Builder.CreateBr(EnterBB);
1099   Builder.SetInsertPoint(EnterBB);
1100   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1101 
1102   AllocaInst *PrivAI = nullptr;
1103 
1104   unsigned NumBodiesGenerated = 0;
1105   unsigned NumPrivatizedVars = 0;
1106   unsigned NumFinalizationPoints = 0;
1107 
1108   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1109     ++NumBodiesGenerated;
1110 
1111     Builder.restoreIP(AllocaIP);
1112     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
1113     Builder.CreateStore(F->arg_begin(), PrivAI);
1114 
1115     Builder.restoreIP(CodeGenIP);
1116     Value *PrivLoad =
1117         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
1118     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
1119     Instruction *ThenTerm, *ElseTerm;
1120     SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm,
1121                                   &ElseTerm);
1122     return Error::success();
1123   };
1124 
1125   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1126                     Value &Orig, Value &Inner,
1127                     Value *&ReplacementValue) -> InsertPointTy {
1128     ++NumPrivatizedVars;
1129 
1130     if (!isa<AllocaInst>(Orig)) {
1131       EXPECT_EQ(&Orig, F->arg_begin());
1132       ReplacementValue = &Inner;
1133       return CodeGenIP;
1134     }
1135 
1136     // Since the original value is an allocation, it has a pointer type and
1137     // therefore no additional wrapping should happen.
1138     EXPECT_EQ(&Orig, &Inner);
1139 
1140     // Trivial copy (=firstprivate).
1141     Builder.restoreIP(AllocaIP);
1142     Type *VTy = ReplacementValue->getType();
1143     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
1144     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
1145     Builder.restoreIP(CodeGenIP);
1146     Builder.CreateStore(V, ReplacementValue);
1147     return CodeGenIP;
1148   };
1149 
1150   auto FiniCB = [&](InsertPointTy CodeGenIP) {
1151     ++NumFinalizationPoints;
1152     // No destructors.
1153     return Error::success();
1154   };
1155 
1156   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1157                                     F->getEntryBlock().getFirstInsertionPt());
1158   ASSERT_EXPECTED_INIT(
1159       OpenMPIRBuilder::InsertPointTy, AfterIP,
1160       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1161                                 Builder.CreateIsNotNull(F->arg_begin()),
1162                                 nullptr, OMP_PROC_BIND_default, false));
1163 
1164   EXPECT_EQ(NumBodiesGenerated, 1U);
1165   EXPECT_EQ(NumPrivatizedVars, 1U);
1166   EXPECT_EQ(NumFinalizationPoints, 1U);
1167 
1168   Builder.restoreIP(AfterIP);
1169   Builder.CreateRetVoid();
1170   OMPBuilder.finalize();
1171 
1172   EXPECT_NE(PrivAI, nullptr);
1173   Function *OutlinedFn = PrivAI->getFunction();
1174   EXPECT_NE(F, OutlinedFn);
1175   EXPECT_FALSE(verifyModule(*M, &errs()));
1176 
1177   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
1178   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
1179 
1180   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
1181   ASSERT_EQ(OutlinedFn->getNumUses(), 1U);
1182 
1183   CallInst *ForkCI = nullptr;
1184   for (User *Usr : OutlinedFn->users()) {
1185     ASSERT_TRUE(isa<CallInst>(Usr));
1186     ForkCI = cast<CallInst>(Usr);
1187   }
1188 
1189   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if");
1190   EXPECT_EQ(ForkCI->arg_size(), 5U);
1191   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1192   EXPECT_EQ(ForkCI->getArgOperand(1),
1193             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
1194   EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx));
1195 }
1196 
1197 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
1198   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1199   OpenMPIRBuilder OMPBuilder(*M);
1200   OMPBuilder.Config.IsTargetDevice = false;
1201   OMPBuilder.initialize();
1202   F->setName("func");
1203   IRBuilder<> Builder(BB);
1204 
1205   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1206   Builder.CreateBr(EnterBB);
1207   Builder.SetInsertPoint(EnterBB);
1208   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1209 
1210   unsigned NumBodiesGenerated = 0;
1211   unsigned NumPrivatizedVars = 0;
1212   unsigned NumFinalizationPoints = 0;
1213 
1214   CallInst *CheckedBarrier = nullptr;
1215   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1216     ++NumBodiesGenerated;
1217 
1218     Builder.restoreIP(CodeGenIP);
1219 
1220     // Create three barriers, two cancel barriers but only one checked.
1221     Function *CBFn, *BFn;
1222 
1223     ASSERT_EXPECTED_INIT(
1224         OpenMPIRBuilder::InsertPointTy, BarrierIP1,
1225         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel));
1226     Builder.restoreIP(BarrierIP1);
1227 
1228     CBFn = M->getFunction("__kmpc_cancel_barrier");
1229     BFn = M->getFunction("__kmpc_barrier");
1230     ASSERT_NE(CBFn, nullptr);
1231     ASSERT_EQ(BFn, nullptr);
1232     ASSERT_EQ(CBFn->getNumUses(), 1U);
1233     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1234     ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
1235     CheckedBarrier = cast<CallInst>(CBFn->user_back());
1236 
1237     ASSERT_EXPECTED_INIT(
1238         OpenMPIRBuilder::InsertPointTy, BarrierIP2,
1239         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true));
1240     Builder.restoreIP(BarrierIP2);
1241     CBFn = M->getFunction("__kmpc_cancel_barrier");
1242     BFn = M->getFunction("__kmpc_barrier");
1243     ASSERT_NE(CBFn, nullptr);
1244     ASSERT_NE(BFn, nullptr);
1245     ASSERT_EQ(CBFn->getNumUses(), 1U);
1246     ASSERT_EQ(BFn->getNumUses(), 1U);
1247     ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
1248     ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
1249 
1250     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, BarrierIP3,
1251                          OMPBuilder.createBarrier(Builder.saveIP(),
1252                                                   OMPD_parallel, false, false));
1253     Builder.restoreIP(BarrierIP3);
1254     ASSERT_EQ(CBFn->getNumUses(), 2U);
1255     ASSERT_EQ(BFn->getNumUses(), 1U);
1256     ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
1257     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1258     ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
1259   };
1260 
1261   auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
1262                     Value *&) -> InsertPointTy {
1263     ++NumPrivatizedVars;
1264     llvm_unreachable("No privatization callback call expected!");
1265   };
1266 
1267   FunctionType *FakeDestructorTy =
1268       FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
1269                         /*isVarArg=*/false);
1270   auto *FakeDestructor = Function::Create(
1271       FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
1272 
1273   auto FiniCB = [&](InsertPointTy IP) {
1274     ++NumFinalizationPoints;
1275     Builder.restoreIP(IP);
1276     Builder.CreateCall(FakeDestructor,
1277                        {Builder.getInt32(NumFinalizationPoints)});
1278     return Error::success();
1279   };
1280 
1281   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1282                                     F->getEntryBlock().getFirstInsertionPt());
1283   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1284                        OMPBuilder.createParallel(
1285                            Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB,
1286                            FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
1287                            nullptr, OMP_PROC_BIND_default, true));
1288 
1289   EXPECT_EQ(NumBodiesGenerated, 1U);
1290   EXPECT_EQ(NumPrivatizedVars, 0U);
1291   EXPECT_EQ(NumFinalizationPoints, 2U);
1292   EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
1293 
1294   Builder.restoreIP(AfterIP);
1295   Builder.CreateRetVoid();
1296   OMPBuilder.finalize();
1297 
1298   EXPECT_FALSE(verifyModule(*M, &errs()));
1299 
1300   BasicBlock *ExitBB = nullptr;
1301   for (const User *Usr : FakeDestructor->users()) {
1302     const CallInst *CI = dyn_cast<CallInst>(Usr);
1303     ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
1304     ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
1305     ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
1306     if (ExitBB)
1307       ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
1308     else
1309       ExitBB = CI->getNextNode()->getSuccessor(0);
1310     ASSERT_EQ(ExitBB->size(), 1U);
1311     if (!isa<ReturnInst>(ExitBB->front())) {
1312       ASSERT_TRUE(isa<BranchInst>(ExitBB->front()));
1313       ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U);
1314       ASSERT_TRUE(isa<ReturnInst>(
1315           cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front()));
1316     }
1317   }
1318 }
1319 
1320 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
1321   OpenMPIRBuilder OMPBuilder(*M);
1322   OMPBuilder.Config.IsTargetDevice = false;
1323   OMPBuilder.initialize();
1324   F->setName("func");
1325   IRBuilder<> Builder(BB);
1326   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1327   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1328 
1329   Type *I32Ty = Type::getInt32Ty(M->getContext());
1330   Type *PtrTy = PointerType::get(M->getContext(), 0);
1331   Type *StructTy = StructType::get(I32Ty, PtrTy);
1332   Type *VoidTy = Type::getVoidTy(M->getContext());
1333   FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
1334   FunctionCallee TakeI32Func =
1335       M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
1336   FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy);
1337   FunctionCallee TakeI32PtrFunc =
1338       M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy);
1339   FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
1340   FunctionCallee TakeStructFunc =
1341       M->getOrInsertFunction("take_struct", VoidTy, StructTy);
1342   FunctionCallee RetStructPtrFunc =
1343       M->getOrInsertFunction("ret_structptr", PtrTy);
1344   FunctionCallee TakeStructPtrFunc =
1345       M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy);
1346   Value *I32Val = Builder.CreateCall(RetI32Func);
1347   Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
1348   Value *StructVal = Builder.CreateCall(RetStructFunc);
1349   Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
1350 
1351   Instruction *Internal;
1352   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1353     IRBuilder<>::InsertPointGuard Guard(Builder);
1354     Builder.restoreIP(CodeGenIP);
1355     Internal = Builder.CreateCall(TakeI32Func, I32Val);
1356     Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
1357     Builder.CreateCall(TakeStructFunc, StructVal);
1358     Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
1359     return Error::success();
1360   };
1361   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1362                     Value &Inner, Value *&ReplacementValue) {
1363     ReplacementValue = &Inner;
1364     return CodeGenIP;
1365   };
1366   auto FiniCB = [](InsertPointTy) { return Error::success(); };
1367 
1368   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1369                                     F->getEntryBlock().getFirstInsertionPt());
1370   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
1371                        OMPBuilder.createParallel(
1372                            Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
1373                            nullptr, OMP_PROC_BIND_default, false));
1374   Builder.restoreIP(AfterIP);
1375   Builder.CreateRetVoid();
1376 
1377   OMPBuilder.finalize();
1378 
1379   EXPECT_FALSE(verifyModule(*M, &errs()));
1380   Function *OutlinedFn = Internal->getFunction();
1381 
1382   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
1383   EXPECT_TRUE(Arg2Type->isPointerTy());
1384 }
1385 
1386 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
1387   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1388   OpenMPIRBuilder OMPBuilder(*M);
1389   OMPBuilder.initialize();
1390   IRBuilder<> Builder(BB);
1391   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1392   Value *TripCount = F->getArg(0);
1393 
1394   unsigned NumBodiesGenerated = 0;
1395   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1396     NumBodiesGenerated += 1;
1397 
1398     Builder.restoreIP(CodeGenIP);
1399 
1400     Value *Cmp = Builder.CreateICmpEQ(LC, TripCount);
1401     Instruction *ThenTerm, *ElseTerm;
1402     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
1403                                   &ThenTerm, &ElseTerm);
1404     return Error::success();
1405   };
1406 
1407   ASSERT_EXPECTED_INIT(
1408       CanonicalLoopInfo *, Loop,
1409       OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount));
1410 
1411   Builder.restoreIP(Loop->getAfterIP());
1412   ReturnInst *RetInst = Builder.CreateRetVoid();
1413   OMPBuilder.finalize();
1414 
1415   Loop->assertOK();
1416   EXPECT_FALSE(verifyModule(*M, &errs()));
1417 
1418   EXPECT_EQ(NumBodiesGenerated, 1U);
1419 
1420   // Verify control flow structure (in addition to Loop->assertOK()).
1421   EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock());
1422   EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock());
1423 
1424   Instruction *IndVar = Loop->getIndVar();
1425   EXPECT_TRUE(isa<PHINode>(IndVar));
1426   EXPECT_EQ(IndVar->getType(), TripCount->getType());
1427   EXPECT_EQ(IndVar->getParent(), Loop->getHeader());
1428 
1429   EXPECT_EQ(Loop->getTripCount(), TripCount);
1430 
1431   BasicBlock *Body = Loop->getBody();
1432   Instruction *CmpInst = &Body->front();
1433   EXPECT_TRUE(isa<ICmpInst>(CmpInst));
1434   EXPECT_EQ(CmpInst->getOperand(0), IndVar);
1435 
1436   BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor();
1437   EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) {
1438     return SuccBB->getSingleSuccessor() == LatchPred;
1439   }));
1440 
1441   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
1442 }
1443 
1444 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
1445   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1446   OpenMPIRBuilder OMPBuilder(*M);
1447   OMPBuilder.initialize();
1448   IRBuilder<> Builder(BB);
1449 
1450   // Check the trip count is computed correctly. We generate the canonical loop
1451   // but rely on the IRBuilder's constant folder to compute the final result
1452   // since all inputs are constant. To verify overflow situations, limit the
1453   // trip count / loop counter widths to 16 bits.
1454   auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1455                            bool IsSigned, bool InclusiveStop) -> int64_t {
1456     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1457     Type *LCTy = Type::getInt16Ty(Ctx);
1458     Value *StartVal = ConstantInt::get(LCTy, Start);
1459     Value *StopVal = ConstantInt::get(LCTy, Stop);
1460     Value *StepVal = ConstantInt::get(LCTy, Step);
1461     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1462       return Error::success();
1463     };
1464     ASSERT_EXPECTED_INIT_RETURN(
1465         CanonicalLoopInfo *, Loop,
1466         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1467                                        StepVal, IsSigned, InclusiveStop),
1468         -1);
1469     Loop->assertOK();
1470     Builder.restoreIP(Loop->getAfterIP());
1471     Value *TripCount = Loop->getTripCount();
1472     return cast<ConstantInt>(TripCount)->getValue().getZExtValue();
1473   };
1474 
1475   EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0);
1476   EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1);
1477   EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42);
1478   EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21);
1479   EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21);
1480   EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1);
1481   EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2);
1482   EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3);
1483   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF);
1484   EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0);
1485   EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1);
1486   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100);
1487   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1);
1488 
1489   EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2);
1490   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2);
1491   EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1);
1492   EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1);
1493   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF);
1494   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000);
1495 
1496   EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0);
1497   EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0);
1498   EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3);
1499   EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4);
1500   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1);
1501   EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1);
1502   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2);
1503 
1504   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000);
1505   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001);
1506   EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF);
1507   EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF);
1508   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2);
1509   EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF);
1510   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000);
1511   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800);
1512   EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF);
1513   EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1);
1514   EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2);
1515 
1516   // Finalize the function and verify it.
1517   Builder.CreateRetVoid();
1518   OMPBuilder.finalize();
1519   EXPECT_FALSE(verifyModule(*M, &errs()));
1520 }
1521 
1522 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
1523   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1524   OpenMPIRBuilder OMPBuilder(*M);
1525   OMPBuilder.initialize();
1526   F->setName("func");
1527 
1528   IRBuilder<> Builder(BB);
1529 
1530   Type *LCTy = F->getArg(0)->getType();
1531   Constant *One = ConstantInt::get(LCTy, 1);
1532   Constant *Two = ConstantInt::get(LCTy, 2);
1533   Value *OuterTripCount =
1534       Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer");
1535   Value *InnerTripCount =
1536       Builder.CreateAdd(F->getArg(0), One, "tripcount.inner");
1537 
1538   // Fix an insertion point for ComputeIP.
1539   BasicBlock *LoopNextEnter =
1540       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1541                          Builder.GetInsertBlock()->getNextNode());
1542   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1543   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1544 
1545   Builder.SetInsertPoint(LoopNextEnter);
1546   OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL);
1547 
1548   CanonicalLoopInfo *InnerLoop = nullptr;
1549   CallInst *InbetweenLead = nullptr;
1550   CallInst *InbetweenTrail = nullptr;
1551   CallInst *Call = nullptr;
1552   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) {
1553     Builder.restoreIP(OuterCodeGenIP);
1554     InbetweenLead =
1555         createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC});
1556 
1557     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1558                                   Value *InnerLC) {
1559       Builder.restoreIP(InnerCodeGenIP);
1560       Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
1561       return Error::success();
1562     };
1563     ASSERT_EXPECTED_INIT(
1564         CanonicalLoopInfo *, InnerLoopResult,
1565         OMPBuilder.createCanonicalLoop(Builder.saveIP(), InnerLoopBodyGenCB,
1566                                        InnerTripCount, "inner"));
1567     InnerLoop = InnerLoopResult;
1568 
1569     Builder.restoreIP(InnerLoop->getAfterIP());
1570     InbetweenTrail =
1571         createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
1572   };
1573   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop,
1574                        OMPBuilder.createCanonicalLoop(
1575                            OuterLoc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB),
1576                            OuterTripCount, "outer"));
1577 
1578   // Finish the function.
1579   Builder.restoreIP(OuterLoop->getAfterIP());
1580   Builder.CreateRetVoid();
1581 
1582   CanonicalLoopInfo *Collapsed =
1583       OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP);
1584 
1585   OMPBuilder.finalize();
1586   EXPECT_FALSE(verifyModule(*M, &errs()));
1587 
1588   // Verify control flow and BB order.
1589   BasicBlock *RefOrder[] = {
1590       Collapsed->getPreheader(),   Collapsed->getHeader(),
1591       Collapsed->getCond(),        Collapsed->getBody(),
1592       InbetweenLead->getParent(),  Call->getParent(),
1593       InbetweenTrail->getParent(), Collapsed->getLatch(),
1594       Collapsed->getExit(),        Collapsed->getAfter(),
1595   };
1596   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1597   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1598 
1599   // Verify the total trip count.
1600   auto *TripCount = cast<MulOperator>(Collapsed->getTripCount());
1601   EXPECT_EQ(TripCount->getOperand(0), OuterTripCount);
1602   EXPECT_EQ(TripCount->getOperand(1), InnerTripCount);
1603 
1604   // Verify the changed indvar.
1605   auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1));
1606   EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv);
1607   EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody());
1608   EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount);
1609   EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar());
1610 
1611   auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2));
1612   EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem);
1613   EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody());
1614   EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar());
1615   EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount);
1616 
1617   EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV);
1618   EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV);
1619 }
1620 
1621 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
1622   OpenMPIRBuilder OMPBuilder(*M);
1623   CallInst *Call;
1624   BasicBlock *BodyCode;
1625   CanonicalLoopInfo *Loop =
1626       buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode);
1627   ASSERT_NE(Loop, nullptr);
1628 
1629   Instruction *OrigIndVar = Loop->getIndVar();
1630   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
1631 
1632   // Tile the loop.
1633   Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
1634   std::vector<CanonicalLoopInfo *> GenLoops =
1635       OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
1636 
1637   OMPBuilder.finalize();
1638   EXPECT_FALSE(verifyModule(*M, &errs()));
1639 
1640   EXPECT_EQ(GenLoops.size(), 2u);
1641   CanonicalLoopInfo *Floor = GenLoops[0];
1642   CanonicalLoopInfo *Tile = GenLoops[1];
1643 
1644   BasicBlock *RefOrder[] = {
1645       Floor->getPreheader(), Floor->getHeader(),   Floor->getCond(),
1646       Floor->getBody(),      Tile->getPreheader(), Tile->getHeader(),
1647       Tile->getCond(),       Tile->getBody(),      BodyCode,
1648       Tile->getLatch(),      Tile->getExit(),      Tile->getAfter(),
1649       Floor->getLatch(),     Floor->getExit(),     Floor->getAfter(),
1650   };
1651   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1652   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1653 
1654   // Check the induction variable.
1655   EXPECT_EQ(Call->getParent(), BodyCode);
1656   auto *Shift = cast<AddOperator>(Call->getOperand(1));
1657   EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
1658   EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
1659   auto *Scale = cast<MulOperator>(Shift->getOperand(0));
1660   EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
1661   EXPECT_EQ(Scale->getOperand(0), TileSize);
1662   EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
1663 }
1664 
1665 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
1666   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1667   OpenMPIRBuilder OMPBuilder(*M);
1668   OMPBuilder.initialize();
1669   F->setName("func");
1670 
1671   IRBuilder<> Builder(BB);
1672   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1673   Value *TripCount = F->getArg(0);
1674   Type *LCTy = TripCount->getType();
1675 
1676   BasicBlock *BodyCode = nullptr;
1677   CanonicalLoopInfo *InnerLoop = nullptr;
1678   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1679                                 llvm::Value *OuterLC) {
1680     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1681                                   llvm::Value *InnerLC) {
1682       Builder.restoreIP(InnerCodeGenIP);
1683       BodyCode = Builder.GetInsertBlock();
1684 
1685       // Add something that consumes the induction variables to the body.
1686       createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1687       return Error::success();
1688     };
1689     ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, InnerLoopResult,
1690                          OMPBuilder.createCanonicalLoop(OuterCodeGenIP,
1691                                                         InnerLoopBodyGenCB,
1692                                                         TripCount, "inner"));
1693     InnerLoop = InnerLoopResult;
1694   };
1695   ASSERT_EXPECTED_INIT(
1696       CanonicalLoopInfo *, OuterLoop,
1697       OMPBuilder.createCanonicalLoop(
1698           Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB), TripCount, "outer"));
1699 
1700   // Finalize the function.
1701   Builder.restoreIP(OuterLoop->getAfterIP());
1702   Builder.CreateRetVoid();
1703 
1704   // Tile to loop nest.
1705   Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
1706   Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
1707   std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
1708       DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
1709 
1710   OMPBuilder.finalize();
1711   EXPECT_FALSE(verifyModule(*M, &errs()));
1712 
1713   EXPECT_EQ(GenLoops.size(), 4u);
1714   CanonicalLoopInfo *Floor1 = GenLoops[0];
1715   CanonicalLoopInfo *Floor2 = GenLoops[1];
1716   CanonicalLoopInfo *Tile1 = GenLoops[2];
1717   CanonicalLoopInfo *Tile2 = GenLoops[3];
1718 
1719   BasicBlock *RefOrder[] = {
1720       Floor1->getPreheader(),
1721       Floor1->getHeader(),
1722       Floor1->getCond(),
1723       Floor1->getBody(),
1724       Floor2->getPreheader(),
1725       Floor2->getHeader(),
1726       Floor2->getCond(),
1727       Floor2->getBody(),
1728       Tile1->getPreheader(),
1729       Tile1->getHeader(),
1730       Tile1->getCond(),
1731       Tile1->getBody(),
1732       Tile2->getPreheader(),
1733       Tile2->getHeader(),
1734       Tile2->getCond(),
1735       Tile2->getBody(),
1736       BodyCode,
1737       Tile2->getLatch(),
1738       Tile2->getExit(),
1739       Tile2->getAfter(),
1740       Tile1->getLatch(),
1741       Tile1->getExit(),
1742       Tile1->getAfter(),
1743       Floor2->getLatch(),
1744       Floor2->getExit(),
1745       Floor2->getAfter(),
1746       Floor1->getLatch(),
1747       Floor1->getExit(),
1748       Floor1->getAfter(),
1749   };
1750   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1751   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1752 }
1753 
1754 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
1755   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1756   OpenMPIRBuilder OMPBuilder(*M);
1757   OMPBuilder.initialize();
1758   F->setName("func");
1759 
1760   IRBuilder<> Builder(BB);
1761   Value *TripCount = F->getArg(0);
1762   Type *LCTy = TripCount->getType();
1763 
1764   Value *OuterStartVal = ConstantInt::get(LCTy, 2);
1765   Value *OuterStopVal = TripCount;
1766   Value *OuterStep = ConstantInt::get(LCTy, 5);
1767   Value *InnerStartVal = ConstantInt::get(LCTy, 13);
1768   Value *InnerStopVal = TripCount;
1769   Value *InnerStep = ConstantInt::get(LCTy, 3);
1770 
1771   // Fix an insertion point for ComputeIP.
1772   BasicBlock *LoopNextEnter =
1773       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1774                          Builder.GetInsertBlock()->getNextNode());
1775   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1776   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1777 
1778   InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
1779   OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
1780 
1781   BasicBlock *BodyCode = nullptr;
1782   CanonicalLoopInfo *InnerLoop = nullptr;
1783   CallInst *Call = nullptr;
1784   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1785                                 llvm::Value *OuterLC) {
1786     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1787                                   llvm::Value *InnerLC) {
1788       Builder.restoreIP(InnerCodeGenIP);
1789       BodyCode = Builder.GetInsertBlock();
1790 
1791       // Add something that consumes the induction variable to the body.
1792       Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1793       return Error::success();
1794     };
1795     ASSERT_EXPECTED_INIT(
1796         CanonicalLoopInfo *, InnerLoopResult,
1797         OMPBuilder.createCanonicalLoop(OuterCodeGenIP, InnerLoopBodyGenCB,
1798                                        InnerStartVal, InnerStopVal, InnerStep,
1799                                        false, false, ComputeIP, "inner"));
1800     InnerLoop = InnerLoopResult;
1801   };
1802   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, OuterLoop,
1803                        OMPBuilder.createCanonicalLoop(
1804                            Loc, LOOP_BODYGENCB_WRAPPER(OuterLoopBodyGenCB),
1805                            OuterStartVal, OuterStopVal, OuterStep, false, false,
1806                            ComputeIP, "outer"));
1807 
1808   // Finalize the function
1809   Builder.restoreIP(OuterLoop->getAfterIP());
1810   Builder.CreateRetVoid();
1811 
1812   // Tile the loop nest.
1813   Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
1814   Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
1815   std::vector<CanonicalLoopInfo *> GenLoops =
1816       OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
1817 
1818   OMPBuilder.finalize();
1819   EXPECT_FALSE(verifyModule(*M, &errs()));
1820 
1821   EXPECT_EQ(GenLoops.size(), 4u);
1822   CanonicalLoopInfo *Floor0 = GenLoops[0];
1823   CanonicalLoopInfo *Floor1 = GenLoops[1];
1824   CanonicalLoopInfo *Tile0 = GenLoops[2];
1825   CanonicalLoopInfo *Tile1 = GenLoops[3];
1826 
1827   BasicBlock *RefOrder[] = {
1828       Floor0->getPreheader(),
1829       Floor0->getHeader(),
1830       Floor0->getCond(),
1831       Floor0->getBody(),
1832       Floor1->getPreheader(),
1833       Floor1->getHeader(),
1834       Floor1->getCond(),
1835       Floor1->getBody(),
1836       Tile0->getPreheader(),
1837       Tile0->getHeader(),
1838       Tile0->getCond(),
1839       Tile0->getBody(),
1840       Tile1->getPreheader(),
1841       Tile1->getHeader(),
1842       Tile1->getCond(),
1843       Tile1->getBody(),
1844       BodyCode,
1845       Tile1->getLatch(),
1846       Tile1->getExit(),
1847       Tile1->getAfter(),
1848       Tile0->getLatch(),
1849       Tile0->getExit(),
1850       Tile0->getAfter(),
1851       Floor1->getLatch(),
1852       Floor1->getExit(),
1853       Floor1->getAfter(),
1854       Floor0->getLatch(),
1855       Floor0->getExit(),
1856       Floor0->getAfter(),
1857   };
1858   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1859   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1860 
1861   EXPECT_EQ(Call->getParent(), BodyCode);
1862 
1863   auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
1864   EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
1865   auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
1866   EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
1867   auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
1868   EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
1869   EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
1870   auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
1871   EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
1872   EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
1873   EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
1874 
1875   auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
1876   EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
1877   EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
1878   auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
1879   EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
1880   EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
1881   auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
1882   EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
1883   EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
1884   auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
1885   EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
1886   EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
1887   EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
1888 }
1889 
1890 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
1891   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1892   OpenMPIRBuilder OMPBuilder(*M);
1893   OMPBuilder.initialize();
1894   IRBuilder<> Builder(BB);
1895 
1896   // Create a loop, tile it, and extract its trip count. All input values are
1897   // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
1898   // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
1899   // do the same for the tile loop.
1900   auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1901                            bool IsSigned, bool InclusiveStop,
1902                            int64_t TileSize) -> uint64_t {
1903     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
1904     Type *LCTy = Type::getInt16Ty(Ctx);
1905     Value *StartVal = ConstantInt::get(LCTy, Start);
1906     Value *StopVal = ConstantInt::get(LCTy, Stop);
1907     Value *StepVal = ConstantInt::get(LCTy, Step);
1908 
1909     // Generate a loop.
1910     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1911       return Error::success();
1912     };
1913     ASSERT_EXPECTED_INIT_RETURN(
1914         CanonicalLoopInfo *, Loop,
1915         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1916                                        StepVal, IsSigned, InclusiveStop),
1917         (unsigned)-1);
1918     InsertPointTy AfterIP = Loop->getAfterIP();
1919 
1920     // Tile the loop.
1921     Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
1922     std::vector<CanonicalLoopInfo *> GenLoops =
1923         OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
1924 
1925     // Set the insertion pointer to after loop, where the next loop will be
1926     // emitted.
1927     Builder.restoreIP(AfterIP);
1928 
1929     // Extract the trip count.
1930     CanonicalLoopInfo *FloorLoop = GenLoops[0];
1931     Value *FloorTripCount = FloorLoop->getTripCount();
1932     return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
1933   };
1934 
1935   // Empty iteration domain.
1936   EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u);
1937   EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u);
1938   EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u);
1939   EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u);
1940   EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u);
1941 
1942   // Only complete tiles.
1943   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1944   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1945   EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u);
1946   EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u);
1947   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u);
1948   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u);
1949 
1950   // Only a partial tile.
1951   EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u);
1952   EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u);
1953   EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u);
1954   EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u);
1955   EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u);
1956 
1957   // Complete and partial tiles.
1958   EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u);
1959   EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u);
1960   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u);
1961   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u);
1962   EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u);
1963 
1964   // Close to 16-bit integer range.
1965   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu);
1966   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1);
1967   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1);
1968   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1);
1969   EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1);
1970   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u);
1971   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u);
1972 
1973   // Finalize the function.
1974   Builder.CreateRetVoid();
1975   OMPBuilder.finalize();
1976 
1977   EXPECT_FALSE(verifyModule(*M, &errs()));
1978 }
1979 
1980 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
1981   OpenMPIRBuilder OMPBuilder(*M);
1982   MapVector<Value *, Value *> AlignedVars;
1983   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1984   ASSERT_NE(CLI, nullptr);
1985 
1986   // Simd-ize the loop.
1987   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
1988                        OrderKind::OMP_ORDER_unknown,
1989                        /* Simdlen */ nullptr,
1990                        /* Safelen */ nullptr);
1991 
1992   OMPBuilder.finalize();
1993   EXPECT_FALSE(verifyModule(*M, &errs()));
1994 
1995   PassBuilder PB;
1996   FunctionAnalysisManager FAM;
1997   PB.registerFunctionAnalyses(FAM);
1998   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1999 
2000   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2001   EXPECT_EQ(TopLvl.size(), 1u);
2002 
2003   Loop *L = TopLvl.front();
2004   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2005   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2006 
2007   // Check for llvm.access.group metadata attached to the printf
2008   // function in the loop body.
2009   BasicBlock *LoopBody = CLI->getBody();
2010   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2011     return I.getMetadata("llvm.access.group") != nullptr;
2012   }));
2013 }
2014 
2015 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
2016   OpenMPIRBuilder OMPBuilder(*M);
2017   IRBuilder<> Builder(BB);
2018   const int AlignmentValue = 32;
2019   llvm::BasicBlock *sourceBlock = Builder.GetInsertBlock();
2020   AllocaInst *Alloc1 =
2021       Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1));
2022   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
2023   MapVector<Value *, Value *> AlignedVars;
2024   AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)});
2025 
2026   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2027   ASSERT_NE(CLI, nullptr);
2028 
2029   // Simd-ize the loop.
2030   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
2031                        OrderKind::OMP_ORDER_unknown,
2032                        /* Simdlen */ nullptr,
2033                        /* Safelen */ nullptr);
2034 
2035   OMPBuilder.finalize();
2036   EXPECT_FALSE(verifyModule(*M, &errs()));
2037 
2038   PassBuilder PB;
2039   FunctionAnalysisManager FAM;
2040   PB.registerFunctionAnalyses(FAM);
2041   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2042 
2043   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2044   EXPECT_EQ(TopLvl.size(), 1u);
2045 
2046   Loop *L = TopLvl.front();
2047   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2048   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2049 
2050   // Check for llvm.access.group metadata attached to the printf
2051   // function in the loop body.
2052   BasicBlock *LoopBody = CLI->getBody();
2053   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2054     return I.getMetadata("llvm.access.group") != nullptr;
2055   }));
2056 
2057   // Check if number of assumption instructions is equal to number of aligned
2058   // variables
2059   size_t NumAssummptionCallsInPreheader =
2060       count_if(*sourceBlock, [](Instruction &I) { return isa<AssumeInst>(I); });
2061   EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size());
2062 
2063   // Check if variables are correctly aligned
2064   for (Instruction &Instr : *sourceBlock) {
2065     if (!isa<AssumeInst>(Instr))
2066       continue;
2067     AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr);
2068     if (AssumeInstruction->getNumTotalBundleOperands()) {
2069       auto Bundle = AssumeInstruction->getOperandBundleAt(0);
2070       if (Bundle.getTagName() == "align") {
2071         EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1]));
2072         auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]);
2073         EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue);
2074       }
2075     }
2076   }
2077 }
2078 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
2079   OpenMPIRBuilder OMPBuilder(*M);
2080   MapVector<Value *, Value *> AlignedVars;
2081   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2082   ASSERT_NE(CLI, nullptr);
2083 
2084   // Simd-ize the loop.
2085   OMPBuilder.applySimd(CLI, AlignedVars,
2086                        /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
2087                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
2088                        /* Safelen */ nullptr);
2089 
2090   OMPBuilder.finalize();
2091   EXPECT_FALSE(verifyModule(*M, &errs()));
2092 
2093   PassBuilder PB;
2094   FunctionAnalysisManager FAM;
2095   PB.registerFunctionAnalyses(FAM);
2096   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2097 
2098   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2099   EXPECT_EQ(TopLvl.size(), 1u);
2100 
2101   Loop *L = TopLvl.front();
2102   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2103   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2104   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2105 
2106   // Check for llvm.access.group metadata attached to the printf
2107   // function in the loop body.
2108   BasicBlock *LoopBody = CLI->getBody();
2109   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2110     return I.getMetadata("llvm.access.group") != nullptr;
2111   }));
2112 }
2113 
2114 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
2115   OpenMPIRBuilder OMPBuilder(*M);
2116   MapVector<Value *, Value *> AlignedVars;
2117 
2118   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2119   ASSERT_NE(CLI, nullptr);
2120 
2121   // Simd-ize the loop.
2122   OMPBuilder.applySimd(
2123       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
2124       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2125 
2126   OMPBuilder.finalize();
2127   EXPECT_FALSE(verifyModule(*M, &errs()));
2128 
2129   PassBuilder PB;
2130   FunctionAnalysisManager FAM;
2131   PB.registerFunctionAnalyses(FAM);
2132   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2133 
2134   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2135   EXPECT_EQ(TopLvl.size(), 1u);
2136 
2137   Loop *L = TopLvl.front();
2138   // Parallel metadata shoudl be attached because of presence of
2139   // the order(concurrent) OpenMP clause
2140   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2141   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2142   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2143 
2144   // Check for llvm.access.group metadata attached to the printf
2145   // function in the loop body.
2146   BasicBlock *LoopBody = CLI->getBody();
2147   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2148     return I.getMetadata("llvm.access.group") != nullptr;
2149   }));
2150 }
2151 
2152 TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
2153   OpenMPIRBuilder OMPBuilder(*M);
2154   MapVector<Value *, Value *> AlignedVars;
2155 
2156   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2157   ASSERT_NE(CLI, nullptr);
2158 
2159   OMPBuilder.applySimd(
2160       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
2161       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2162 
2163   OMPBuilder.finalize();
2164   EXPECT_FALSE(verifyModule(*M, &errs()));
2165 
2166   PassBuilder PB;
2167   FunctionAnalysisManager FAM;
2168   PB.registerFunctionAnalyses(FAM);
2169   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2170 
2171   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2172   EXPECT_EQ(TopLvl.size(), 1u);
2173 
2174   Loop *L = TopLvl.front();
2175   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2176   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2177   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2178 
2179   // Check for llvm.access.group metadata attached to the printf
2180   // function in the loop body.
2181   BasicBlock *LoopBody = CLI->getBody();
2182   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2183     return I.getMetadata("llvm.access.group") != nullptr;
2184   }));
2185 }
2186 
2187 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
2188   OpenMPIRBuilder OMPBuilder(*M);
2189   MapVector<Value *, Value *> AlignedVars;
2190 
2191   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2192   ASSERT_NE(CLI, nullptr);
2193 
2194   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
2195                        OrderKind::OMP_ORDER_unknown,
2196                        ConstantInt::get(Type::getInt32Ty(Ctx), 2),
2197                        ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2198 
2199   OMPBuilder.finalize();
2200   EXPECT_FALSE(verifyModule(*M, &errs()));
2201 
2202   PassBuilder PB;
2203   FunctionAnalysisManager FAM;
2204   PB.registerFunctionAnalyses(FAM);
2205   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2206 
2207   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2208   EXPECT_EQ(TopLvl.size(), 1u);
2209 
2210   Loop *L = TopLvl.front();
2211   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2212   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2213   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2);
2214 
2215   // Check for llvm.access.group metadata attached to the printf
2216   // function in the loop body.
2217   BasicBlock *LoopBody = CLI->getBody();
2218   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2219     return I.getMetadata("llvm.access.group") != nullptr;
2220   }));
2221 }
2222 
2223 TEST_F(OpenMPIRBuilderTest, ApplySimdIf) {
2224   OpenMPIRBuilder OMPBuilder(*M);
2225   IRBuilder<> Builder(BB);
2226   MapVector<Value *, Value *> AlignedVars;
2227   AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
2228   AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty());
2229 
2230   // Generation of if condition
2231   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1);
2232   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2);
2233   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
2234   LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2);
2235 
2236   Value *IfCmp = Builder.CreateICmpNE(Load1, Load2);
2237 
2238   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2239   ASSERT_NE(CLI, nullptr);
2240 
2241   // Simd-ize the loop with if condition
2242   OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown,
2243                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
2244                        /* Safelen */ nullptr);
2245 
2246   OMPBuilder.finalize();
2247   EXPECT_FALSE(verifyModule(*M, &errs()));
2248 
2249   PassBuilder PB;
2250   FunctionAnalysisManager FAM;
2251   PB.registerFunctionAnalyses(FAM);
2252   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2253 
2254   // Check if there are two loops (one with enabled vectorization)
2255   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2256   EXPECT_EQ(TopLvl.size(), 2u);
2257 
2258   Loop *L = TopLvl[0];
2259   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2260   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2261   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2262 
2263   // The second loop should have disabled vectorization
2264   L = TopLvl[1];
2265   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2266   EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2267   // Check for llvm.access.group metadata attached to the printf
2268   // function in the loop body.
2269   BasicBlock *LoopBody = CLI->getBody();
2270   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2271     return I.getMetadata("llvm.access.group") != nullptr;
2272   }));
2273 }
2274 
2275 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
2276   OpenMPIRBuilder OMPBuilder(*M);
2277 
2278   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2279   ASSERT_NE(CLI, nullptr);
2280 
2281   // Unroll the loop.
2282   OMPBuilder.unrollLoopFull(DL, CLI);
2283 
2284   OMPBuilder.finalize();
2285   EXPECT_FALSE(verifyModule(*M, &errs()));
2286 
2287   PassBuilder PB;
2288   FunctionAnalysisManager FAM;
2289   PB.registerFunctionAnalyses(FAM);
2290   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2291 
2292   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2293   EXPECT_EQ(TopLvl.size(), 1u);
2294 
2295   Loop *L = TopLvl.front();
2296   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2297   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full"));
2298 }
2299 
2300 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
2301   OpenMPIRBuilder OMPBuilder(*M);
2302   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2303   ASSERT_NE(CLI, nullptr);
2304 
2305   // Unroll the loop.
2306   CanonicalLoopInfo *UnrolledLoop = nullptr;
2307   OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop);
2308   ASSERT_NE(UnrolledLoop, nullptr);
2309 
2310   OMPBuilder.finalize();
2311   EXPECT_FALSE(verifyModule(*M, &errs()));
2312   UnrolledLoop->assertOK();
2313 
2314   PassBuilder PB;
2315   FunctionAnalysisManager FAM;
2316   PB.registerFunctionAnalyses(FAM);
2317   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2318 
2319   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2320   EXPECT_EQ(TopLvl.size(), 1u);
2321   Loop *Outer = TopLvl.front();
2322   EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader());
2323   EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch());
2324   EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond());
2325   EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit());
2326 
2327   EXPECT_EQ(Outer->getSubLoops().size(), 1u);
2328   Loop *Inner = Outer->getSubLoops().front();
2329 
2330   EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable"));
2331   EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5);
2332 }
2333 
2334 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
2335   OpenMPIRBuilder OMPBuilder(*M);
2336 
2337   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2338   ASSERT_NE(CLI, nullptr);
2339 
2340   // Unroll the loop.
2341   OMPBuilder.unrollLoopHeuristic(DL, CLI);
2342 
2343   OMPBuilder.finalize();
2344   EXPECT_FALSE(verifyModule(*M, &errs()));
2345 
2346   PassBuilder PB;
2347   FunctionAnalysisManager FAM;
2348   PB.registerFunctionAnalyses(FAM);
2349   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2350 
2351   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2352   EXPECT_EQ(TopLvl.size(), 1u);
2353 
2354   Loop *L = TopLvl.front();
2355   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2356 }
2357 
2358 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) {
2359   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2360   std::string oldDLStr = M->getDataLayoutStr();
2361   M->setDataLayout(
2362       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
2363       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
2364       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
2365   OpenMPIRBuilder OMPBuilder(*M);
2366   OMPBuilder.Config.IsTargetDevice = true;
2367   OMPBuilder.initialize();
2368   IRBuilder<> Builder(BB);
2369   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2370   InsertPointTy AllocaIP = Builder.saveIP();
2371 
2372   Type *LCTy = Type::getInt32Ty(Ctx);
2373   Value *StartVal = ConstantInt::get(LCTy, 10);
2374   Value *StopVal = ConstantInt::get(LCTy, 52);
2375   Value *StepVal = ConstantInt::get(LCTy, 2);
2376   auto LoopBodyGen = [&](InsertPointTy, Value *) { return Error::success(); };
2377 
2378   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2379                        OMPBuilder.createCanonicalLoop(Loc, LoopBodyGen,
2380                                                       StartVal, StopVal,
2381                                                       StepVal, false, false));
2382   BasicBlock *Preheader = CLI->getPreheader();
2383   Value *TripCount = CLI->getTripCount();
2384 
2385   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2386 
2387   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2388                        OMPBuilder.applyWorkshareLoop(
2389                            DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static,
2390                            nullptr, false, false, false, false,
2391                            WorksharingLoopType::ForStaticLoop));
2392   Builder.restoreIP(AfterIP);
2393   Builder.CreateRetVoid();
2394 
2395   OMPBuilder.finalize();
2396   EXPECT_FALSE(verifyModule(*M, &errs()));
2397 
2398   CallInst *WorkshareLoopRuntimeCall = nullptr;
2399   int WorkshareLoopRuntimeCallCnt = 0;
2400   for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) {
2401     CallInst *Call = dyn_cast<CallInst>(Inst);
2402     if (!Call)
2403       continue;
2404     if (!Call->getCalledFunction())
2405       continue;
2406 
2407     if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") {
2408       WorkshareLoopRuntimeCall = Call;
2409       WorkshareLoopRuntimeCallCnt++;
2410     }
2411   }
2412   EXPECT_NE(WorkshareLoopRuntimeCall, nullptr);
2413   // Verify that there is only one call to workshare loop function
2414   EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1);
2415   // Check that pointer to loop body function is passed as second argument
2416   Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1);
2417   EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType());
2418   Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg);
2419   EXPECT_NE(ArgFunction, nullptr);
2420   EXPECT_EQ(ArgFunction->arg_size(), 1u);
2421   EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType());
2422   // Check that no variables except for loop counter are used in loop body
2423   EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()),
2424             WorkshareLoopRuntimeCall->getArgOperand(2));
2425   // Check loop trip count argument
2426   EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3));
2427 }
2428 
2429 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
2430   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2431   OpenMPIRBuilder OMPBuilder(*M);
2432   OMPBuilder.Config.IsTargetDevice = false;
2433   OMPBuilder.initialize();
2434   IRBuilder<> Builder(BB);
2435   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2436 
2437   Type *LCTy = Type::getInt32Ty(Ctx);
2438   Value *StartVal = ConstantInt::get(LCTy, 10);
2439   Value *StopVal = ConstantInt::get(LCTy, 52);
2440   Value *StepVal = ConstantInt::get(LCTy, 2);
2441   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2442     return Error::success();
2443   };
2444   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2445                        OMPBuilder.createCanonicalLoop(
2446                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2447                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2448   BasicBlock *Preheader = CLI->getPreheader();
2449   BasicBlock *Body = CLI->getBody();
2450   Value *IV = CLI->getIndVar();
2451   BasicBlock *ExitBlock = CLI->getExit();
2452 
2453   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2454   InsertPointTy AllocaIP = Builder.saveIP();
2455 
2456   ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP,
2457                                                      /*NeedsBarrier=*/true,
2458                                                      OMP_SCHEDULE_Static),
2459                        Succeeded());
2460 
2461   BasicBlock *Cond = Body->getSinglePredecessor();
2462   Instruction *Cmp = &*Cond->begin();
2463   Value *TripCount = Cmp->getOperand(1);
2464 
2465   auto AllocaIter = BB->begin();
2466   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2467   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2468   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2469   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2470   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2471   EXPECT_NE(PLastIter, nullptr);
2472   EXPECT_NE(PLowerBound, nullptr);
2473   EXPECT_NE(PUpperBound, nullptr);
2474   EXPECT_NE(PStride, nullptr);
2475 
2476   auto PreheaderIter = Preheader->begin();
2477   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7);
2478   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2479   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2480   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2481   ASSERT_NE(LowerBoundStore, nullptr);
2482   ASSERT_NE(UpperBoundStore, nullptr);
2483   ASSERT_NE(StrideStore, nullptr);
2484 
2485   auto *OrigLowerBound =
2486       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2487   auto *OrigUpperBound =
2488       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2489   auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2490   ASSERT_NE(OrigLowerBound, nullptr);
2491   ASSERT_NE(OrigUpperBound, nullptr);
2492   ASSERT_NE(OrigStride, nullptr);
2493   EXPECT_EQ(OrigLowerBound->getValue(), 0);
2494   EXPECT_EQ(OrigUpperBound->getValue(), 20);
2495   EXPECT_EQ(OrigStride->getValue(), 1);
2496 
2497   // Check that the loop IV is updated to account for the lower bound returned
2498   // by the OpenMP runtime call.
2499   BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front());
2500   EXPECT_EQ(Add->getOperand(0), IV);
2501   auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
2502   ASSERT_NE(LoadedLowerBound, nullptr);
2503   EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
2504 
2505   // Check that the trip count is updated to account for the lower and upper
2506   // bounds return by the OpenMP runtime call.
2507   auto *AddOne = dyn_cast<Instruction>(TripCount);
2508   ASSERT_NE(AddOne, nullptr);
2509   ASSERT_TRUE(AddOne->isBinaryOp());
2510   auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
2511   ASSERT_NE(One, nullptr);
2512   EXPECT_EQ(One->getValue(), 1);
2513   auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0));
2514   ASSERT_NE(Difference, nullptr);
2515   ASSERT_TRUE(Difference->isBinaryOp());
2516   EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound);
2517   auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0));
2518   ASSERT_NE(LoadedUpperBound, nullptr);
2519   EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound);
2520 
2521   // The original loop iterator should only be used in the condition, in the
2522   // increment and in the statement that adds the lower bound to it.
2523   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2524 
2525   // The exit block should contain the "fini" call and the barrier call,
2526   // plus the call to obtain the thread ID.
2527   size_t NumCallsInExitBlock =
2528       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2529   EXPECT_EQ(NumCallsInExitBlock, 3u);
2530 }
2531 
2532 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
2533   unsigned IVBits = GetParam();
2534 
2535   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2536   OpenMPIRBuilder OMPBuilder(*M);
2537   OMPBuilder.Config.IsTargetDevice = false;
2538 
2539   BasicBlock *Body;
2540   CallInst *Call;
2541   CanonicalLoopInfo *CLI =
2542       buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body);
2543   ASSERT_NE(CLI, nullptr);
2544 
2545   Instruction *OrigIndVar = CLI->getIndVar();
2546   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
2547 
2548   Type *LCTy = Type::getInt32Ty(Ctx);
2549   Value *ChunkSize = ConstantInt::get(LCTy, 5);
2550   InsertPointTy AllocaIP{&F->getEntryBlock(),
2551                          F->getEntryBlock().getFirstInsertionPt()};
2552   ASSERT_THAT_EXPECTED(OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP,
2553                                                      /*NeedsBarrier=*/true,
2554                                                      OMP_SCHEDULE_Static,
2555                                                      ChunkSize),
2556                        Succeeded());
2557 
2558   OMPBuilder.finalize();
2559   EXPECT_FALSE(verifyModule(*M, &errs()));
2560 
2561   BasicBlock *Entry = &F->getEntryBlock();
2562   BasicBlock *Preheader = Entry->getSingleSuccessor();
2563 
2564   BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor();
2565   BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor();
2566   BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor();
2567   BasicBlock *DispatchBody = succ_begin(DispatchCond)[0];
2568   BasicBlock *DispatchExit = succ_begin(DispatchCond)[1];
2569   BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor();
2570   BasicBlock *Return = DispatchAfter->getSingleSuccessor();
2571 
2572   BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor();
2573   BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor();
2574   BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor();
2575   BasicBlock *ChunkBody = succ_begin(ChunkCond)[0];
2576   BasicBlock *ChunkExit = succ_begin(ChunkCond)[1];
2577   BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor();
2578   BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor();
2579 
2580   BasicBlock *DispatchInc = ChunkAfter;
2581 
2582   EXPECT_EQ(ChunkBody, Body);
2583   EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader);
2584   EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader);
2585 
2586   EXPECT_TRUE(isa<ReturnInst>(Return->front()));
2587 
2588   Value *NewIV = Call->getOperand(1);
2589   EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits);
2590 
2591   CallInst *InitCall = findSingleCall(
2592       F,
2593       (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u
2594                     : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u,
2595       OMPBuilder);
2596   EXPECT_EQ(InitCall->getParent(), Preheader);
2597   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33);
2598   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1);
2599   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5);
2600 
2601   CallInst *FiniCall = findSingleCall(
2602       F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder);
2603   EXPECT_EQ(FiniCall->getParent(), DispatchExit);
2604 
2605   CallInst *BarrierCall = findSingleCall(
2606       F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder);
2607   EXPECT_EQ(BarrierCall->getParent(), DispatchExit);
2608 }
2609 
2610 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits,
2611                          ::testing::Values(8, 16, 32, 64));
2612 
2613 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
2614   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2615   OpenMPIRBuilder OMPBuilder(*M);
2616   OMPBuilder.Config.IsTargetDevice = false;
2617   OMPBuilder.initialize();
2618   IRBuilder<> Builder(BB);
2619   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2620 
2621   omp::OMPScheduleType SchedType = GetParam();
2622   uint32_t ChunkSize = 1;
2623   switch (SchedType & ~OMPScheduleType::ModifierMask) {
2624   case omp::OMPScheduleType::BaseDynamicChunked:
2625   case omp::OMPScheduleType::BaseGuidedChunked:
2626     ChunkSize = 7;
2627     break;
2628   case omp::OMPScheduleType::BaseAuto:
2629   case omp::OMPScheduleType::BaseRuntime:
2630     ChunkSize = 1;
2631     break;
2632   default:
2633     assert(0 && "unknown type for this test");
2634     break;
2635   }
2636 
2637   Type *LCTy = Type::getInt32Ty(Ctx);
2638   Value *StartVal = ConstantInt::get(LCTy, 10);
2639   Value *StopVal = ConstantInt::get(LCTy, 52);
2640   Value *StepVal = ConstantInt::get(LCTy, 2);
2641   Value *ChunkVal =
2642       (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize);
2643   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2644     return Error::success();
2645   };
2646 
2647   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2648                        OMPBuilder.createCanonicalLoop(
2649                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2650                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2651 
2652   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2653   InsertPointTy AllocaIP = Builder.saveIP();
2654 
2655   // Collect all the info from CLI, as it isn't usable after the call to
2656   // createDynamicWorkshareLoop.
2657   InsertPointTy AfterIP = CLI->getAfterIP();
2658   BasicBlock *Preheader = CLI->getPreheader();
2659   BasicBlock *ExitBlock = CLI->getExit();
2660   BasicBlock *LatchBlock = CLI->getLatch();
2661   Value *IV = CLI->getIndVar();
2662 
2663   ASSERT_EXPECTED_INIT(
2664       OpenMPIRBuilder::InsertPointTy, EndIP,
2665       OMPBuilder.applyWorkshareLoop(
2666           DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType),
2667           ChunkVal, /*Simd=*/false,
2668           (SchedType & omp::OMPScheduleType::ModifierMonotonic) ==
2669               omp::OMPScheduleType::ModifierMonotonic,
2670           (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) ==
2671               omp::OMPScheduleType::ModifierNonmonotonic,
2672           /*Ordered=*/false));
2673 
2674   // The returned value should be the "after" point.
2675   ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
2676   ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
2677 
2678   auto AllocaIter = BB->begin();
2679   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2680   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2681   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2682   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2683   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2684   EXPECT_NE(PLastIter, nullptr);
2685   EXPECT_NE(PLowerBound, nullptr);
2686   EXPECT_NE(PUpperBound, nullptr);
2687   EXPECT_NE(PStride, nullptr);
2688 
2689   auto PreheaderIter = Preheader->begin();
2690   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6);
2691   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2692   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2693   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2694   ASSERT_NE(LowerBoundStore, nullptr);
2695   ASSERT_NE(UpperBoundStore, nullptr);
2696   ASSERT_NE(StrideStore, nullptr);
2697 
2698   CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++));
2699   ASSERT_NE(ThreadIdCall, nullptr);
2700   EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(),
2701             "__kmpc_global_thread_num");
2702 
2703   CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter);
2704 
2705   ASSERT_NE(InitCall, nullptr);
2706   EXPECT_EQ(InitCall->getCalledFunction()->getName(),
2707             "__kmpc_dispatch_init_4u");
2708   EXPECT_EQ(InitCall->arg_size(), 7U);
2709   EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize));
2710   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2711   if ((SchedType & OMPScheduleType::MonotonicityMask) ==
2712       OMPScheduleType::None) {
2713     // Implementation is allowed to add default nonmonotonicity flag
2714     EXPECT_EQ(
2715         static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) |
2716             OMPScheduleType::ModifierNonmonotonic,
2717         SchedType | OMPScheduleType::ModifierNonmonotonic);
2718   } else {
2719     EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()),
2720               SchedType);
2721   }
2722 
2723   ConstantInt *OrigLowerBound =
2724       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2725   ConstantInt *OrigUpperBound =
2726       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2727   ConstantInt *OrigStride =
2728       dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2729   ASSERT_NE(OrigLowerBound, nullptr);
2730   ASSERT_NE(OrigUpperBound, nullptr);
2731   ASSERT_NE(OrigStride, nullptr);
2732   EXPECT_EQ(OrigLowerBound->getValue(), 1);
2733   EXPECT_EQ(OrigUpperBound->getValue(), 21);
2734   EXPECT_EQ(OrigStride->getValue(), 1);
2735 
2736   CallInst *FiniCall = dyn_cast<CallInst>(
2737       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2738   EXPECT_EQ(FiniCall, nullptr);
2739 
2740   // The original loop iterator should only be used in the condition, in the
2741   // increment and in the statement that adds the lower bound to it.
2742   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2743 
2744   // The exit block should contain the barrier call, plus the call to obtain
2745   // the thread ID.
2746   size_t NumCallsInExitBlock =
2747       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2748   EXPECT_EQ(NumCallsInExitBlock, 2u);
2749 
2750   // Add a termination to our block and check that it is internally consistent.
2751   Builder.restoreIP(EndIP);
2752   Builder.CreateRetVoid();
2753   OMPBuilder.finalize();
2754   EXPECT_FALSE(verifyModule(*M, &errs()));
2755 }
2756 
2757 INSTANTIATE_TEST_SUITE_P(
2758     OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams,
2759     ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked,
2760                       omp::OMPScheduleType::UnorderedGuidedChunked,
2761                       omp::OMPScheduleType::UnorderedAuto,
2762                       omp::OMPScheduleType::UnorderedRuntime,
2763                       omp::OMPScheduleType::UnorderedDynamicChunked |
2764                           omp::OMPScheduleType::ModifierMonotonic,
2765                       omp::OMPScheduleType::UnorderedDynamicChunked |
2766                           omp::OMPScheduleType::ModifierNonmonotonic,
2767                       omp::OMPScheduleType::UnorderedGuidedChunked |
2768                           omp::OMPScheduleType::ModifierMonotonic,
2769                       omp::OMPScheduleType::UnorderedGuidedChunked |
2770                           omp::OMPScheduleType::ModifierNonmonotonic,
2771                       omp::OMPScheduleType::UnorderedAuto |
2772                           omp::OMPScheduleType::ModifierMonotonic,
2773                       omp::OMPScheduleType::UnorderedRuntime |
2774                           omp::OMPScheduleType::ModifierMonotonic));
2775 
2776 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
2777   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2778   OpenMPIRBuilder OMPBuilder(*M);
2779   OMPBuilder.Config.IsTargetDevice = false;
2780   OMPBuilder.initialize();
2781   IRBuilder<> Builder(BB);
2782   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2783 
2784   uint32_t ChunkSize = 1;
2785   Type *LCTy = Type::getInt32Ty(Ctx);
2786   Value *StartVal = ConstantInt::get(LCTy, 10);
2787   Value *StopVal = ConstantInt::get(LCTy, 52);
2788   Value *StepVal = ConstantInt::get(LCTy, 2);
2789   Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
2790   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
2791     return llvm::Error::success();
2792   };
2793 
2794   ASSERT_EXPECTED_INIT(CanonicalLoopInfo *, CLI,
2795                        OMPBuilder.createCanonicalLoop(
2796                            Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2797                            /*IsSigned=*/false, /*InclusiveStop=*/false));
2798 
2799   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2800   InsertPointTy AllocaIP = Builder.saveIP();
2801 
2802   // Collect all the info from CLI, as it isn't usable after the call to
2803   // createDynamicWorkshareLoop.
2804   BasicBlock *Preheader = CLI->getPreheader();
2805   BasicBlock *ExitBlock = CLI->getExit();
2806   BasicBlock *LatchBlock = CLI->getLatch();
2807   Value *IV = CLI->getIndVar();
2808 
2809   ASSERT_EXPECTED_INIT(
2810       OpenMPIRBuilder::InsertPointTy, EndIP,
2811       OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
2812                                     OMP_SCHEDULE_Static, ChunkVal,
2813                                     /*HasSimdModifier=*/false,
2814                                     /*HasMonotonicModifier=*/false,
2815                                     /*HasNonmonotonicModifier=*/false,
2816                                     /*HasOrderedClause=*/true));
2817 
2818   // Add a termination to our block and check that it is internally consistent.
2819   Builder.restoreIP(EndIP);
2820   Builder.CreateRetVoid();
2821   OMPBuilder.finalize();
2822   EXPECT_FALSE(verifyModule(*M, &errs()));
2823 
2824   CallInst *InitCall = nullptr;
2825   for (Instruction &EI : *Preheader) {
2826     Instruction *Cur = &EI;
2827     if (isa<CallInst>(Cur)) {
2828       InitCall = cast<CallInst>(Cur);
2829       if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u")
2830         break;
2831       InitCall = nullptr;
2832     }
2833   }
2834   EXPECT_NE(InitCall, nullptr);
2835   EXPECT_EQ(InitCall->arg_size(), 7U);
2836   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2837   EXPECT_EQ(SchedVal->getValue(),
2838             static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked));
2839 
2840   CallInst *FiniCall = dyn_cast<CallInst>(
2841       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2842   ASSERT_NE(FiniCall, nullptr);
2843   EXPECT_EQ(FiniCall->getCalledFunction()->getName(),
2844             "__kmpc_dispatch_fini_4u");
2845   EXPECT_EQ(FiniCall->arg_size(), 2U);
2846   EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0));
2847   EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1));
2848 
2849   // The original loop iterator should only be used in the condition, in the
2850   // increment and in the statement that adds the lower bound to it.
2851   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2852 
2853   // The exit block should contain the barrier call, plus the call to obtain
2854   // the thread ID.
2855   size_t NumCallsInExitBlock =
2856       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2857   EXPECT_EQ(NumCallsInExitBlock, 2u);
2858 }
2859 
2860 TEST_F(OpenMPIRBuilderTest, MasterDirective) {
2861   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2862   OpenMPIRBuilder OMPBuilder(*M);
2863   OMPBuilder.initialize();
2864   F->setName("func");
2865   IRBuilder<> Builder(BB);
2866 
2867   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2868 
2869   AllocaInst *PrivAI = nullptr;
2870 
2871   BasicBlock *EntryBB = nullptr;
2872   BasicBlock *ThenBB = nullptr;
2873 
2874   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2875     if (AllocaIP.isSet())
2876       Builder.restoreIP(AllocaIP);
2877     else
2878       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2879     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2880     Builder.CreateStore(F->arg_begin(), PrivAI);
2881 
2882     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2883     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2884     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2885 
2886     Builder.restoreIP(CodeGenIP);
2887 
2888     // collect some info for checks later
2889     ThenBB = Builder.GetInsertBlock();
2890     EntryBB = ThenBB->getUniquePredecessor();
2891 
2892     // simple instructions for body
2893     Value *PrivLoad =
2894         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2895     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2896   };
2897 
2898   auto FiniCB = [&](InsertPointTy IP) {
2899     BasicBlock *IPBB = IP.getBlock();
2900     EXPECT_NE(IPBB->end(), IP.getPoint());
2901   };
2902 
2903   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2904                        OMPBuilder.createMaster(Builder,
2905                                                BODYGENCB_WRAPPER(BodyGenCB),
2906                                                FINICB_WRAPPER(FiniCB)));
2907   Builder.restoreIP(AfterIP);
2908   Value *EntryBBTI = EntryBB->getTerminator();
2909   EXPECT_NE(EntryBBTI, nullptr);
2910   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2911   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2912   EXPECT_TRUE(EntryBr->isConditional());
2913   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2914   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2915   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2916 
2917   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2918   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2919 
2920   CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0));
2921   EXPECT_EQ(MasterEntryCI->arg_size(), 2U);
2922   EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master");
2923   EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0)));
2924 
2925   CallInst *MasterEndCI = nullptr;
2926   for (auto &FI : *ThenBB) {
2927     Instruction *cur = &FI;
2928     if (isa<CallInst>(cur)) {
2929       MasterEndCI = cast<CallInst>(cur);
2930       if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master")
2931         break;
2932       MasterEndCI = nullptr;
2933     }
2934   }
2935   EXPECT_NE(MasterEndCI, nullptr);
2936   EXPECT_EQ(MasterEndCI->arg_size(), 2U);
2937   EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0)));
2938   EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1));
2939 }
2940 
2941 TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
2942   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2943   OpenMPIRBuilder OMPBuilder(*M);
2944   OMPBuilder.initialize();
2945   F->setName("func");
2946   IRBuilder<> Builder(BB);
2947 
2948   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2949 
2950   AllocaInst *PrivAI = nullptr;
2951 
2952   BasicBlock *EntryBB = nullptr;
2953   BasicBlock *ThenBB = nullptr;
2954 
2955   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2956     if (AllocaIP.isSet())
2957       Builder.restoreIP(AllocaIP);
2958     else
2959       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2960     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2961     Builder.CreateStore(F->arg_begin(), PrivAI);
2962 
2963     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2964     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2965     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2966 
2967     Builder.restoreIP(CodeGenIP);
2968 
2969     // collect some info for checks later
2970     ThenBB = Builder.GetInsertBlock();
2971     EntryBB = ThenBB->getUniquePredecessor();
2972 
2973     // simple instructions for body
2974     Value *PrivLoad =
2975         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2976     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2977   };
2978 
2979   auto FiniCB = [&](InsertPointTy IP) {
2980     BasicBlock *IPBB = IP.getBlock();
2981     EXPECT_NE(IPBB->end(), IP.getPoint());
2982   };
2983 
2984   Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
2985   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
2986                        OMPBuilder.createMasked(Builder,
2987                                                BODYGENCB_WRAPPER(BodyGenCB),
2988                                                FINICB_WRAPPER(FiniCB), Filter));
2989   Builder.restoreIP(AfterIP);
2990   Value *EntryBBTI = EntryBB->getTerminator();
2991   EXPECT_NE(EntryBBTI, nullptr);
2992   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2993   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2994   EXPECT_TRUE(EntryBr->isConditional());
2995   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2996   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2997   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2998 
2999   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3000   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3001 
3002   CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0));
3003   EXPECT_EQ(MaskedEntryCI->arg_size(), 3U);
3004   EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked");
3005   EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0)));
3006 
3007   CallInst *MaskedEndCI = nullptr;
3008   for (auto &FI : *ThenBB) {
3009     Instruction *cur = &FI;
3010     if (isa<CallInst>(cur)) {
3011       MaskedEndCI = cast<CallInst>(cur);
3012       if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked")
3013         break;
3014       MaskedEndCI = nullptr;
3015     }
3016   }
3017   EXPECT_NE(MaskedEndCI, nullptr);
3018   EXPECT_EQ(MaskedEndCI->arg_size(), 2U);
3019   EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0)));
3020   EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1));
3021 }
3022 
3023 TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
3024   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3025   OpenMPIRBuilder OMPBuilder(*M);
3026   OMPBuilder.initialize();
3027   F->setName("func");
3028   IRBuilder<> Builder(BB);
3029 
3030   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3031 
3032   AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3033 
3034   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3035     // actual start for bodyCB
3036     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3037     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3038     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3039 
3040     // body begin
3041     Builder.restoreIP(CodeGenIP);
3042     Builder.CreateStore(F->arg_begin(), PrivAI);
3043     Value *PrivLoad =
3044         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3045     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3046   };
3047 
3048   auto FiniCB = [&](InsertPointTy IP) {
3049     BasicBlock *IPBB = IP.getBlock();
3050     EXPECT_NE(IPBB->end(), IP.getPoint());
3051   };
3052   BasicBlock *EntryBB = Builder.GetInsertBlock();
3053 
3054   ASSERT_EXPECTED_INIT(
3055       OpenMPIRBuilder::InsertPointTy, AfterIP,
3056       OMPBuilder.createCritical(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3057                                 FINICB_WRAPPER(FiniCB), "testCRT", nullptr));
3058   Builder.restoreIP(AfterIP);
3059 
3060   CallInst *CriticalEntryCI = nullptr;
3061   for (auto &EI : *EntryBB) {
3062     Instruction *cur = &EI;
3063     if (isa<CallInst>(cur)) {
3064       CriticalEntryCI = cast<CallInst>(cur);
3065       if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical")
3066         break;
3067       CriticalEntryCI = nullptr;
3068     }
3069   }
3070   EXPECT_NE(CriticalEntryCI, nullptr);
3071   EXPECT_EQ(CriticalEntryCI->arg_size(), 3U);
3072   EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical");
3073   EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0)));
3074 
3075   CallInst *CriticalEndCI = nullptr;
3076   for (auto &FI : *EntryBB) {
3077     Instruction *cur = &FI;
3078     if (isa<CallInst>(cur)) {
3079       CriticalEndCI = cast<CallInst>(cur);
3080       if (CriticalEndCI->getCalledFunction()->getName() ==
3081           "__kmpc_end_critical")
3082         break;
3083       CriticalEndCI = nullptr;
3084     }
3085   }
3086   EXPECT_NE(CriticalEndCI, nullptr);
3087   EXPECT_EQ(CriticalEndCI->arg_size(), 3U);
3088   EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0)));
3089   EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1));
3090   PointerType *CriticalNamePtrTy =
3091       PointerType::getUnqual(ArrayType::get(Type::getInt32Ty(Ctx), 8));
3092   EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2));
3093   GlobalVariable *GV =
3094       dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2));
3095   ASSERT_NE(GV, nullptr);
3096   EXPECT_EQ(GV->getType(), CriticalNamePtrTy);
3097   const DataLayout &DL = M->getDataLayout();
3098   const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy);
3099   const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace());
3100   if (const llvm::MaybeAlign Alignment = GV->getAlign())
3101     EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign));
3102 }
3103 
3104 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
3105   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3106   OpenMPIRBuilder OMPBuilder(*M);
3107   OMPBuilder.initialize();
3108   F->setName("func");
3109   IRBuilder<> Builder(BB);
3110   LLVMContext &Ctx = M->getContext();
3111 
3112   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3113 
3114   InsertPointTy AllocaIP(&F->getEntryBlock(),
3115                          F->getEntryBlock().getFirstInsertionPt());
3116 
3117   unsigned NumLoops = 2;
3118   SmallVector<Value *, 2> StoreValues;
3119   Type *LCTy = Type::getInt64Ty(Ctx);
3120   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
3121   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
3122 
3123   // Test for "#omp ordered depend(source)"
3124   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
3125                                                    StoreValues, ".cnt.addr",
3126                                                    /*IsDependSource=*/true));
3127 
3128   Builder.CreateRetVoid();
3129   OMPBuilder.finalize();
3130   EXPECT_FALSE(verifyModule(*M, &errs()));
3131 
3132   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
3133   ASSERT_NE(AllocInst, nullptr);
3134   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
3135   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
3136   EXPECT_TRUE(
3137       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
3138 
3139   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
3140   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
3141     GetElementPtrInst *DependAddrGEPIter =
3142         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3143     ASSERT_NE(DependAddrGEPIter, nullptr);
3144     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
3145     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
3146     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
3147     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
3148     ASSERT_NE(FirstIdx, nullptr);
3149     ASSERT_NE(SecondIdx, nullptr);
3150     EXPECT_EQ(FirstIdx->getValue(), 0);
3151     EXPECT_EQ(SecondIdx->getValue(), Iter);
3152     StoreInst *StoreValue =
3153         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
3154     ASSERT_NE(StoreValue, nullptr);
3155     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
3156     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
3157     EXPECT_EQ(StoreValue->getAlign(), Align(8));
3158     IterInst = dyn_cast<Instruction>(StoreValue);
3159   }
3160 
3161   GetElementPtrInst *DependBaseAddrGEP =
3162       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3163   ASSERT_NE(DependBaseAddrGEP, nullptr);
3164   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3165   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3166   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3167   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3168   ASSERT_NE(FirstIdx, nullptr);
3169   ASSERT_NE(SecondIdx, nullptr);
3170   EXPECT_EQ(FirstIdx->getValue(), 0);
3171   EXPECT_EQ(SecondIdx->getValue(), 0);
3172 
3173   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3174   ASSERT_NE(GTID, nullptr);
3175   EXPECT_EQ(GTID->arg_size(), 1U);
3176   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3177   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3178   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3179 
3180   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3181   ASSERT_NE(Depend, nullptr);
3182   EXPECT_EQ(Depend->arg_size(), 3U);
3183   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post");
3184   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3185   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3186   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3187 }
3188 
3189 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
3190   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3191   OpenMPIRBuilder OMPBuilder(*M);
3192   OMPBuilder.initialize();
3193   F->setName("func");
3194   IRBuilder<> Builder(BB);
3195   LLVMContext &Ctx = M->getContext();
3196 
3197   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3198 
3199   InsertPointTy AllocaIP(&F->getEntryBlock(),
3200                          F->getEntryBlock().getFirstInsertionPt());
3201 
3202   unsigned NumLoops = 2;
3203   SmallVector<Value *, 2> StoreValues;
3204   Type *LCTy = Type::getInt64Ty(Ctx);
3205   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
3206   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
3207 
3208   // Test for "#omp ordered depend(sink: vec)"
3209   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
3210                                                    StoreValues, ".cnt.addr",
3211                                                    /*IsDependSource=*/false));
3212 
3213   Builder.CreateRetVoid();
3214   OMPBuilder.finalize();
3215   EXPECT_FALSE(verifyModule(*M, &errs()));
3216 
3217   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
3218   ASSERT_NE(AllocInst, nullptr);
3219   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
3220   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
3221   EXPECT_TRUE(
3222       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
3223 
3224   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
3225   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
3226     GetElementPtrInst *DependAddrGEPIter =
3227         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3228     ASSERT_NE(DependAddrGEPIter, nullptr);
3229     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
3230     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
3231     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
3232     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
3233     ASSERT_NE(FirstIdx, nullptr);
3234     ASSERT_NE(SecondIdx, nullptr);
3235     EXPECT_EQ(FirstIdx->getValue(), 0);
3236     EXPECT_EQ(SecondIdx->getValue(), Iter);
3237     StoreInst *StoreValue =
3238         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
3239     ASSERT_NE(StoreValue, nullptr);
3240     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
3241     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
3242     EXPECT_EQ(StoreValue->getAlign(), Align(8));
3243     IterInst = dyn_cast<Instruction>(StoreValue);
3244   }
3245 
3246   GetElementPtrInst *DependBaseAddrGEP =
3247       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3248   ASSERT_NE(DependBaseAddrGEP, nullptr);
3249   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3250   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3251   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3252   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3253   ASSERT_NE(FirstIdx, nullptr);
3254   ASSERT_NE(SecondIdx, nullptr);
3255   EXPECT_EQ(FirstIdx->getValue(), 0);
3256   EXPECT_EQ(SecondIdx->getValue(), 0);
3257 
3258   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3259   ASSERT_NE(GTID, nullptr);
3260   EXPECT_EQ(GTID->arg_size(), 1U);
3261   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3262   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3263   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3264 
3265   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3266   ASSERT_NE(Depend, nullptr);
3267   EXPECT_EQ(Depend->arg_size(), 3U);
3268   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait");
3269   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3270   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3271   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3272 }
3273 
3274 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
3275   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3276   OpenMPIRBuilder OMPBuilder(*M);
3277   OMPBuilder.initialize();
3278   F->setName("func");
3279   IRBuilder<> Builder(BB);
3280 
3281   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3282 
3283   AllocaInst *PrivAI =
3284       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3285 
3286   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3287     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3288     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3289     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3290 
3291     Builder.restoreIP(CodeGenIP);
3292     Builder.CreateStore(F->arg_begin(), PrivAI);
3293     Value *PrivLoad =
3294         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3295     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3296   };
3297 
3298   auto FiniCB = [&](InsertPointTy IP) {
3299     BasicBlock *IPBB = IP.getBlock();
3300     EXPECT_NE(IPBB->end(), IP.getPoint());
3301   };
3302 
3303   // Test for "#omp ordered [threads]"
3304   BasicBlock *EntryBB = Builder.GetInsertBlock();
3305   ASSERT_EXPECTED_INIT(
3306       OpenMPIRBuilder::InsertPointTy, AfterIP,
3307       OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3308                                           FINICB_WRAPPER(FiniCB), true));
3309   Builder.restoreIP(AfterIP);
3310 
3311   Builder.CreateRetVoid();
3312   OMPBuilder.finalize();
3313   EXPECT_FALSE(verifyModule(*M, &errs()));
3314 
3315   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3316 
3317   CallInst *OrderedEntryCI = nullptr;
3318   for (auto &EI : *EntryBB) {
3319     Instruction *Cur = &EI;
3320     if (isa<CallInst>(Cur)) {
3321       OrderedEntryCI = cast<CallInst>(Cur);
3322       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3323         break;
3324       OrderedEntryCI = nullptr;
3325     }
3326   }
3327   EXPECT_NE(OrderedEntryCI, nullptr);
3328   EXPECT_EQ(OrderedEntryCI->arg_size(), 2U);
3329   EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
3330   EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
3331 
3332   CallInst *OrderedEndCI = nullptr;
3333   for (auto &FI : *EntryBB) {
3334     Instruction *Cur = &FI;
3335     if (isa<CallInst>(Cur)) {
3336       OrderedEndCI = cast<CallInst>(Cur);
3337       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3338         break;
3339       OrderedEndCI = nullptr;
3340     }
3341   }
3342   EXPECT_NE(OrderedEndCI, nullptr);
3343   EXPECT_EQ(OrderedEndCI->arg_size(), 2U);
3344   EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
3345   EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
3346 }
3347 
3348 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
3349   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3350   OpenMPIRBuilder OMPBuilder(*M);
3351   OMPBuilder.initialize();
3352   F->setName("func");
3353   IRBuilder<> Builder(BB);
3354 
3355   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3356 
3357   AllocaInst *PrivAI =
3358       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3359 
3360   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3361     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3362     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3363     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3364 
3365     Builder.restoreIP(CodeGenIP);
3366     Builder.CreateStore(F->arg_begin(), PrivAI);
3367     Value *PrivLoad =
3368         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3369     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3370   };
3371 
3372   auto FiniCB = [&](InsertPointTy IP) {
3373     BasicBlock *IPBB = IP.getBlock();
3374     EXPECT_NE(IPBB->end(), IP.getPoint());
3375   };
3376 
3377   // Test for "#omp ordered simd"
3378   BasicBlock *EntryBB = Builder.GetInsertBlock();
3379   ASSERT_EXPECTED_INIT(
3380       OpenMPIRBuilder::InsertPointTy, AfterIP,
3381       OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3382                                           FINICB_WRAPPER(FiniCB), false));
3383   Builder.restoreIP(AfterIP);
3384 
3385   Builder.CreateRetVoid();
3386   OMPBuilder.finalize();
3387   EXPECT_FALSE(verifyModule(*M, &errs()));
3388 
3389   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3390 
3391   CallInst *OrderedEntryCI = nullptr;
3392   for (auto &EI : *EntryBB) {
3393     Instruction *Cur = &EI;
3394     if (isa<CallInst>(Cur)) {
3395       OrderedEntryCI = cast<CallInst>(Cur);
3396       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3397         break;
3398       OrderedEntryCI = nullptr;
3399     }
3400   }
3401   EXPECT_EQ(OrderedEntryCI, nullptr);
3402 
3403   CallInst *OrderedEndCI = nullptr;
3404   for (auto &FI : *EntryBB) {
3405     Instruction *Cur = &FI;
3406     if (isa<CallInst>(Cur)) {
3407       OrderedEndCI = cast<CallInst>(Cur);
3408       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3409         break;
3410       OrderedEndCI = nullptr;
3411     }
3412   }
3413   EXPECT_EQ(OrderedEndCI, nullptr);
3414 }
3415 
3416 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
3417   OpenMPIRBuilder OMPBuilder(*M);
3418   OMPBuilder.initialize();
3419   F->setName("func");
3420   IRBuilder<> Builder(BB);
3421 
3422   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3423 
3424   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3425   AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy());
3426   AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy());
3427 
3428   BasicBlock *EntryBB = BB;
3429 
3430   OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress,
3431                                       PrivAddress, Int32, /*BranchtoEnd*/ true);
3432 
3433   BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator());
3434 
3435   EXPECT_NE(EntryBr, nullptr);
3436   EXPECT_TRUE(EntryBr->isConditional());
3437 
3438   BasicBlock *NotMasterBB = EntryBr->getSuccessor(0);
3439   BasicBlock *CopyinEnd = EntryBr->getSuccessor(1);
3440   CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition());
3441 
3442   EXPECT_NE(CMP, nullptr);
3443   EXPECT_NE(NotMasterBB, nullptr);
3444   EXPECT_NE(CopyinEnd, nullptr);
3445 
3446   BranchInst *NotMasterBr =
3447       dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator());
3448   EXPECT_NE(NotMasterBr, nullptr);
3449   EXPECT_FALSE(NotMasterBr->isConditional());
3450   EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0));
3451 }
3452 
3453 TEST_F(OpenMPIRBuilderTest, SingleDirective) {
3454   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3455   OpenMPIRBuilder OMPBuilder(*M);
3456   OMPBuilder.initialize();
3457   F->setName("func");
3458   IRBuilder<> Builder(BB);
3459 
3460   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3461 
3462   AllocaInst *PrivAI = nullptr;
3463 
3464   BasicBlock *EntryBB = nullptr;
3465   BasicBlock *ThenBB = nullptr;
3466 
3467   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3468     if (AllocaIP.isSet())
3469       Builder.restoreIP(AllocaIP);
3470     else
3471       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3472     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3473     Builder.CreateStore(F->arg_begin(), PrivAI);
3474 
3475     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3476     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3477     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3478 
3479     Builder.restoreIP(CodeGenIP);
3480 
3481     // collect some info for checks later
3482     ThenBB = Builder.GetInsertBlock();
3483     EntryBB = ThenBB->getUniquePredecessor();
3484 
3485     // simple instructions for body
3486     Value *PrivLoad =
3487         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3488     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3489   };
3490 
3491   auto FiniCB = [&](InsertPointTy IP) {
3492     BasicBlock *IPBB = IP.getBlock();
3493     EXPECT_NE(IPBB->end(), IP.getPoint());
3494   };
3495 
3496   ASSERT_EXPECTED_INIT(
3497       OpenMPIRBuilder::InsertPointTy, AfterIP,
3498       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3499                               FINICB_WRAPPER(FiniCB), /*IsNowait*/ false));
3500   Builder.restoreIP(AfterIP);
3501   Value *EntryBBTI = EntryBB->getTerminator();
3502   EXPECT_NE(EntryBBTI, nullptr);
3503   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3504   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3505   EXPECT_TRUE(EntryBr->isConditional());
3506   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3507   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3508   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3509 
3510   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3511   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3512 
3513   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3514   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3515   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3516   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3517 
3518   CallInst *SingleEndCI = nullptr;
3519   for (auto &FI : *ThenBB) {
3520     Instruction *cur = &FI;
3521     if (isa<CallInst>(cur)) {
3522       SingleEndCI = cast<CallInst>(cur);
3523       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3524         break;
3525       SingleEndCI = nullptr;
3526     }
3527   }
3528   EXPECT_NE(SingleEndCI, nullptr);
3529   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3530   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3531   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3532 
3533   bool FoundBarrier = false;
3534   for (auto &FI : *ExitBB) {
3535     Instruction *cur = &FI;
3536     if (auto CI = dyn_cast<CallInst>(cur)) {
3537       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3538         FoundBarrier = true;
3539         break;
3540       }
3541     }
3542   }
3543   EXPECT_TRUE(FoundBarrier);
3544 }
3545 
3546 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
3547   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3548   OpenMPIRBuilder OMPBuilder(*M);
3549   OMPBuilder.initialize();
3550   F->setName("func");
3551   IRBuilder<> Builder(BB);
3552 
3553   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3554 
3555   AllocaInst *PrivAI = nullptr;
3556 
3557   BasicBlock *EntryBB = nullptr;
3558   BasicBlock *ThenBB = nullptr;
3559 
3560   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3561     if (AllocaIP.isSet())
3562       Builder.restoreIP(AllocaIP);
3563     else
3564       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3565     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3566     Builder.CreateStore(F->arg_begin(), PrivAI);
3567 
3568     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3569     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3570     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3571 
3572     Builder.restoreIP(CodeGenIP);
3573 
3574     // collect some info for checks later
3575     ThenBB = Builder.GetInsertBlock();
3576     EntryBB = ThenBB->getUniquePredecessor();
3577 
3578     // simple instructions for body
3579     Value *PrivLoad =
3580         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3581     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3582   };
3583 
3584   auto FiniCB = [&](InsertPointTy IP) {
3585     BasicBlock *IPBB = IP.getBlock();
3586     EXPECT_NE(IPBB->end(), IP.getPoint());
3587   };
3588 
3589   ASSERT_EXPECTED_INIT(
3590       OpenMPIRBuilder::InsertPointTy, AfterIP,
3591       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3592                               FINICB_WRAPPER(FiniCB), /*IsNowait*/ true));
3593   Builder.restoreIP(AfterIP);
3594   Value *EntryBBTI = EntryBB->getTerminator();
3595   EXPECT_NE(EntryBBTI, nullptr);
3596   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3597   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3598   EXPECT_TRUE(EntryBr->isConditional());
3599   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3600   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3601   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3602 
3603   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3604   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3605 
3606   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3607   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3608   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3609   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3610 
3611   CallInst *SingleEndCI = nullptr;
3612   for (auto &FI : *ThenBB) {
3613     Instruction *cur = &FI;
3614     if (isa<CallInst>(cur)) {
3615       SingleEndCI = cast<CallInst>(cur);
3616       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3617         break;
3618       SingleEndCI = nullptr;
3619     }
3620   }
3621   EXPECT_NE(SingleEndCI, nullptr);
3622   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3623   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3624   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3625 
3626   CallInst *ExitBarrier = nullptr;
3627   for (auto &FI : *ExitBB) {
3628     Instruction *cur = &FI;
3629     if (auto CI = dyn_cast<CallInst>(cur)) {
3630       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3631         ExitBarrier = CI;
3632         break;
3633       }
3634     }
3635   }
3636   EXPECT_EQ(ExitBarrier, nullptr);
3637 }
3638 
3639 // Helper class to check each instruction of a BB.
3640 class BBInstIter {
3641   BasicBlock *BB;
3642   BasicBlock::iterator BBI;
3643 
3644 public:
3645   BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {}
3646 
3647   bool hasNext() const { return BBI != BB->end(); }
3648 
3649   template <typename InstTy> InstTy *next() {
3650     if (!hasNext())
3651       return nullptr;
3652     Instruction *Cur = &*BBI++;
3653     if (!isa<InstTy>(Cur))
3654       return nullptr;
3655     return cast<InstTy>(Cur);
3656   }
3657 };
3658 
3659 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
3660   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3661   OpenMPIRBuilder OMPBuilder(*M);
3662   OMPBuilder.initialize();
3663   F->setName("func");
3664   IRBuilder<> Builder(BB);
3665 
3666   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3667 
3668   AllocaInst *PrivAI = nullptr;
3669 
3670   BasicBlock *EntryBB = nullptr;
3671   BasicBlock *ThenBB = nullptr;
3672 
3673   Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType());
3674   Builder.CreateStore(F->arg_begin(), CPVar);
3675 
3676   FunctionType *CopyFuncTy = FunctionType::get(
3677       Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false);
3678   Function *CopyFunc =
3679       Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M);
3680 
3681   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3682     if (AllocaIP.isSet())
3683       Builder.restoreIP(AllocaIP);
3684     else
3685       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3686     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3687     Builder.CreateStore(F->arg_begin(), PrivAI);
3688 
3689     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3690     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3691     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3692 
3693     Builder.restoreIP(CodeGenIP);
3694 
3695     // collect some info for checks later
3696     ThenBB = Builder.GetInsertBlock();
3697     EntryBB = ThenBB->getUniquePredecessor();
3698 
3699     // simple instructions for body
3700     Value *PrivLoad =
3701         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3702     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3703   };
3704 
3705   auto FiniCB = [&](InsertPointTy IP) {
3706     BasicBlock *IPBB = IP.getBlock();
3707     // IP must be before the unconditional branch to ExitBB
3708     EXPECT_NE(IPBB->end(), IP.getPoint());
3709   };
3710 
3711   ASSERT_EXPECTED_INIT(
3712       OpenMPIRBuilder::InsertPointTy, AfterIP,
3713       OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
3714                               FINICB_WRAPPER(FiniCB),
3715                               /*IsNowait*/ false, {CPVar}, {CopyFunc}));
3716   Builder.restoreIP(AfterIP);
3717   Value *EntryBBTI = EntryBB->getTerminator();
3718   EXPECT_NE(EntryBBTI, nullptr);
3719   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3720   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3721   EXPECT_TRUE(EntryBr->isConditional());
3722   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3723   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3724   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3725 
3726   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3727   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3728 
3729   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3730   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3731   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3732   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3733 
3734   // check ThenBB
3735   BBInstIter ThenBBI(ThenBB);
3736   // load PrivAI
3737   auto *PrivLI = ThenBBI.next<LoadInst>();
3738   EXPECT_NE(PrivLI, nullptr);
3739   EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI);
3740   // icmp
3741   EXPECT_TRUE(ThenBBI.next<ICmpInst>());
3742   // store 1, DidIt
3743   auto *DidItSI = ThenBBI.next<StoreInst>();
3744   EXPECT_NE(DidItSI, nullptr);
3745   EXPECT_EQ(DidItSI->getValueOperand(),
3746             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
3747   Value *DidIt = DidItSI->getPointerOperand();
3748   // call __kmpc_end_single
3749   auto *SingleEndCI = ThenBBI.next<CallInst>();
3750   EXPECT_NE(SingleEndCI, nullptr);
3751   EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single");
3752   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3753   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3754   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3755   // br ExitBB
3756   auto *ExitBBBI = ThenBBI.next<BranchInst>();
3757   EXPECT_NE(ExitBBBI, nullptr);
3758   EXPECT_TRUE(ExitBBBI->isUnconditional());
3759   EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB);
3760   EXPECT_FALSE(ThenBBI.hasNext());
3761 
3762   // check ExitBB
3763   BBInstIter ExitBBI(ExitBB);
3764   // call __kmpc_global_thread_num
3765   auto *ThreadNumCI = ExitBBI.next<CallInst>();
3766   EXPECT_NE(ThreadNumCI, nullptr);
3767   EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(),
3768             "__kmpc_global_thread_num");
3769   // load DidIt
3770   auto *DidItLI = ExitBBI.next<LoadInst>();
3771   EXPECT_NE(DidItLI, nullptr);
3772   EXPECT_EQ(DidItLI->getPointerOperand(), DidIt);
3773   // call __kmpc_copyprivate
3774   auto *CopyPrivateCI = ExitBBI.next<CallInst>();
3775   EXPECT_NE(CopyPrivateCI, nullptr);
3776   EXPECT_EQ(CopyPrivateCI->arg_size(), 6U);
3777   EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3)));
3778   EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar);
3779   EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4)));
3780   EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc);
3781   EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5)));
3782   DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5));
3783   EXPECT_EQ(DidItLI->getOperand(0), DidIt);
3784   EXPECT_FALSE(ExitBBI.hasNext());
3785 }
3786 
3787 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
3788   OpenMPIRBuilder OMPBuilder(*M);
3789   OMPBuilder.initialize();
3790   F->setName("func");
3791   IRBuilder<> Builder(BB);
3792 
3793   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3794 
3795   Type *Float32 = Type::getFloatTy(M->getContext());
3796   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3797   XVal->setName("AtomicVar");
3798   AllocaInst *VVal = Builder.CreateAlloca(Float32);
3799   VVal->setName("AtomicRead");
3800   AtomicOrdering AO = AtomicOrdering::Monotonic;
3801   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3802   OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false};
3803 
3804   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3805 
3806   IntegerType *IntCastTy =
3807       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3808 
3809   LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode());
3810   EXPECT_TRUE(AtomicLoad->isAtomic());
3811   EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal);
3812 
3813   BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
3814   EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
3815   EXPECT_EQ(CastToFlt->getDestTy(), Float32);
3816   EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
3817 
3818   StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode());
3819   EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt);
3820   EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
3821 
3822   Builder.CreateRetVoid();
3823   OMPBuilder.finalize();
3824   EXPECT_FALSE(verifyModule(*M, &errs()));
3825 }
3826 
3827 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
3828   OpenMPIRBuilder OMPBuilder(*M);
3829   OMPBuilder.initialize();
3830   F->setName("func");
3831   IRBuilder<> Builder(BB);
3832 
3833   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3834 
3835   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3836   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3837   XVal->setName("AtomicVar");
3838   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3839   VVal->setName("AtomicRead");
3840   AtomicOrdering AO = AtomicOrdering::Monotonic;
3841   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3842   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3843 
3844   BasicBlock *EntryBB = BB;
3845 
3846   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3847   LoadInst *AtomicLoad = nullptr;
3848   StoreInst *StoreofAtomic = nullptr;
3849 
3850   for (Instruction &Cur : *EntryBB) {
3851     if (isa<LoadInst>(Cur)) {
3852       AtomicLoad = cast<LoadInst>(&Cur);
3853       if (AtomicLoad->getPointerOperand() == XVal)
3854         continue;
3855       AtomicLoad = nullptr;
3856     } else if (isa<StoreInst>(Cur)) {
3857       StoreofAtomic = cast<StoreInst>(&Cur);
3858       if (StoreofAtomic->getPointerOperand() == VVal)
3859         continue;
3860       StoreofAtomic = nullptr;
3861     }
3862   }
3863 
3864   EXPECT_NE(AtomicLoad, nullptr);
3865   EXPECT_TRUE(AtomicLoad->isAtomic());
3866 
3867   EXPECT_NE(StoreofAtomic, nullptr);
3868   EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
3869 
3870   Builder.CreateRetVoid();
3871   OMPBuilder.finalize();
3872 
3873   EXPECT_FALSE(verifyModule(*M, &errs()));
3874 }
3875 
3876 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
3877   OpenMPIRBuilder OMPBuilder(*M);
3878   OMPBuilder.initialize();
3879   F->setName("func");
3880   IRBuilder<> Builder(BB);
3881 
3882   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3883 
3884   LLVMContext &Ctx = M->getContext();
3885   Type *Float32 = Type::getFloatTy(Ctx);
3886   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3887   XVal->setName("AtomicVar");
3888   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3889   AtomicOrdering AO = AtomicOrdering::Monotonic;
3890   Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
3891 
3892   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3893 
3894   IntegerType *IntCastTy =
3895       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3896 
3897   Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
3898 
3899   StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode());
3900   EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast);
3901   EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal);
3902   EXPECT_TRUE(StoreofAtomic->isAtomic());
3903 
3904   Builder.CreateRetVoid();
3905   OMPBuilder.finalize();
3906   EXPECT_FALSE(verifyModule(*M, &errs()));
3907 }
3908 
3909 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
3910   OpenMPIRBuilder OMPBuilder(*M);
3911   OMPBuilder.initialize();
3912   F->setName("func");
3913   IRBuilder<> Builder(BB);
3914 
3915   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3916 
3917   LLVMContext &Ctx = M->getContext();
3918   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3919   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3920   XVal->setName("AtomicVar");
3921   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3922   AtomicOrdering AO = AtomicOrdering::Monotonic;
3923   ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3924 
3925   BasicBlock *EntryBB = BB;
3926 
3927   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3928 
3929   StoreInst *StoreofAtomic = nullptr;
3930 
3931   for (Instruction &Cur : *EntryBB) {
3932     if (isa<StoreInst>(Cur)) {
3933       StoreofAtomic = cast<StoreInst>(&Cur);
3934       if (StoreofAtomic->getPointerOperand() == XVal)
3935         continue;
3936       StoreofAtomic = nullptr;
3937     }
3938   }
3939 
3940   EXPECT_NE(StoreofAtomic, nullptr);
3941   EXPECT_TRUE(StoreofAtomic->isAtomic());
3942   EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite);
3943 
3944   Builder.CreateRetVoid();
3945   OMPBuilder.finalize();
3946   EXPECT_FALSE(verifyModule(*M, &errs()));
3947 }
3948 
3949 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
3950   OpenMPIRBuilder OMPBuilder(*M);
3951   OMPBuilder.initialize();
3952   F->setName("func");
3953   IRBuilder<> Builder(BB);
3954 
3955   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3956 
3957   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3958   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3959   XVal->setName("AtomicVar");
3960   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3961   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3962   AtomicOrdering AO = AtomicOrdering::Monotonic;
3963   ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3964   Value *Expr = nullptr;
3965   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub;
3966   bool IsXLHSInRHSPart = false;
3967 
3968   BasicBlock *EntryBB = BB;
3969   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3970                                           EntryBB->getFirstInsertionPt());
3971   Value *Sub = nullptr;
3972 
3973   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3974     Sub = IRB.CreateSub(ConstVal, Atomic);
3975     return Sub;
3976   };
3977   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
3978                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
3979                                                      AO, RMWOp, UpdateOp,
3980                                                      IsXLHSInRHSPart));
3981   Builder.restoreIP(AfterIP);
3982   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3983   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3984   EXPECT_NE(ContTI, nullptr);
3985   BasicBlock *EndBB = ContTI->getSuccessor(0);
3986   EXPECT_TRUE(ContTI->isConditional());
3987   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3988   EXPECT_NE(EndBB, nullptr);
3989 
3990   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3991   EXPECT_NE(Phi, nullptr);
3992   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3993   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3994   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3995 
3996   EXPECT_EQ(Sub->getNumUses(), 1U);
3997   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3998   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3999 
4000   ExtractValueInst *ExVI1 =
4001       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4002   EXPECT_NE(ExVI1, nullptr);
4003   AtomicCmpXchgInst *CmpExchg =
4004       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4005   EXPECT_NE(CmpExchg, nullptr);
4006   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4007   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4008   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4009 
4010   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4011   EXPECT_NE(Ld, nullptr);
4012   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4013 
4014   Builder.CreateRetVoid();
4015   OMPBuilder.finalize();
4016   EXPECT_FALSE(verifyModule(*M, &errs()));
4017 }
4018 
4019 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
4020   OpenMPIRBuilder OMPBuilder(*M);
4021   OMPBuilder.initialize();
4022   F->setName("func");
4023   IRBuilder<> Builder(BB);
4024 
4025   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4026 
4027   Type *FloatTy = Type::getFloatTy(M->getContext());
4028   AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
4029   XVal->setName("AtomicVar");
4030   Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal);
4031   OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
4032   AtomicOrdering AO = AtomicOrdering::Monotonic;
4033   Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0);
4034   Value *Expr = nullptr;
4035   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub;
4036   bool IsXLHSInRHSPart = false;
4037 
4038   BasicBlock *EntryBB = BB;
4039   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4040                                           EntryBB->getFirstInsertionPt());
4041   Value *Sub = nullptr;
4042 
4043   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
4044     Sub = IRB.CreateFSub(ConstVal, Atomic);
4045     return Sub;
4046   };
4047   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4048                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
4049                                                      AO, RMWOp, UpdateOp,
4050                                                      IsXLHSInRHSPart));
4051   Builder.restoreIP(AfterIP);
4052   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
4053   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
4054   EXPECT_NE(ContTI, nullptr);
4055   BasicBlock *EndBB = ContTI->getSuccessor(0);
4056   EXPECT_TRUE(ContTI->isConditional());
4057   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
4058   EXPECT_NE(EndBB, nullptr);
4059 
4060   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
4061   EXPECT_NE(Phi, nullptr);
4062   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
4063   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
4064   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
4065 
4066   EXPECT_EQ(Sub->getNumUses(), 1U);
4067   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
4068   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
4069 
4070   ExtractValueInst *ExVI1 =
4071       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4072   EXPECT_NE(ExVI1, nullptr);
4073   AtomicCmpXchgInst *CmpExchg =
4074       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4075   EXPECT_NE(CmpExchg, nullptr);
4076   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4077   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4078   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4079 
4080   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4081   EXPECT_NE(Ld, nullptr);
4082   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4083   Builder.CreateRetVoid();
4084   OMPBuilder.finalize();
4085   EXPECT_FALSE(verifyModule(*M, &errs()));
4086 }
4087 
4088 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
4089   OpenMPIRBuilder OMPBuilder(*M);
4090   OMPBuilder.initialize();
4091   F->setName("func");
4092   IRBuilder<> Builder(BB);
4093 
4094   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4095 
4096   Type *IntTy = Type::getInt32Ty(M->getContext());
4097   AllocaInst *XVal = Builder.CreateAlloca(IntTy);
4098   XVal->setName("AtomicVar");
4099   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal);
4100   OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false};
4101   AtomicOrdering AO = AtomicOrdering::Monotonic;
4102   Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
4103   Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
4104   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax;
4105   bool IsXLHSInRHSPart = false;
4106 
4107   BasicBlock *EntryBB = BB;
4108   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4109                                           EntryBB->getFirstInsertionPt());
4110   Value *Sub = nullptr;
4111 
4112   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
4113     Sub = IRB.CreateSub(ConstVal, Atomic);
4114     return Sub;
4115   };
4116   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4117                        OMPBuilder.createAtomicUpdate(Builder, AllocaIP, X, Expr,
4118                                                      AO, RMWOp, UpdateOp,
4119                                                      IsXLHSInRHSPart));
4120   Builder.restoreIP(AfterIP);
4121   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
4122   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
4123   EXPECT_NE(ContTI, nullptr);
4124   BasicBlock *EndBB = ContTI->getSuccessor(0);
4125   EXPECT_TRUE(ContTI->isConditional());
4126   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
4127   EXPECT_NE(EndBB, nullptr);
4128 
4129   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
4130   EXPECT_NE(Phi, nullptr);
4131   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
4132   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
4133   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
4134 
4135   EXPECT_EQ(Sub->getNumUses(), 1U);
4136   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
4137   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
4138 
4139   ExtractValueInst *ExVI1 =
4140       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
4141   EXPECT_NE(ExVI1, nullptr);
4142   AtomicCmpXchgInst *CmpExchg =
4143       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
4144   EXPECT_NE(CmpExchg, nullptr);
4145   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
4146   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
4147   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
4148 
4149   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
4150   EXPECT_NE(Ld, nullptr);
4151   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
4152 
4153   Builder.CreateRetVoid();
4154   OMPBuilder.finalize();
4155   EXPECT_FALSE(verifyModule(*M, &errs()));
4156 }
4157 
4158 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
4159   OpenMPIRBuilder OMPBuilder(*M);
4160   OMPBuilder.initialize();
4161   F->setName("func");
4162   IRBuilder<> Builder(BB);
4163 
4164   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4165 
4166   LLVMContext &Ctx = M->getContext();
4167   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4168   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4169   XVal->setName("AtomicVar");
4170   AllocaInst *VVal = Builder.CreateAlloca(Int32);
4171   VVal->setName("AtomicCapTar");
4172   StoreInst *Init =
4173       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4174 
4175   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
4176   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
4177   AtomicOrdering AO = AtomicOrdering::Monotonic;
4178   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4179   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add;
4180   bool IsXLHSInRHSPart = true;
4181   bool IsPostfixUpdate = true;
4182   bool UpdateExpr = true;
4183 
4184   BasicBlock *EntryBB = BB;
4185   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4186                                           EntryBB->getFirstInsertionPt());
4187 
4188   // integer update - not used
4189   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; };
4190 
4191   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4192                        OMPBuilder.createAtomicCapture(
4193                            Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp,
4194                            UpdateExpr, IsPostfixUpdate, IsXLHSInRHSPart));
4195   Builder.restoreIP(AfterIP);
4196   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4197   AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4198   EXPECT_NE(ARWM, nullptr);
4199   EXPECT_EQ(ARWM->getPointerOperand(), XVal);
4200   EXPECT_EQ(ARWM->getOperation(), RMWOp);
4201   StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back());
4202   EXPECT_NE(St, nullptr);
4203   EXPECT_EQ(St->getPointerOperand(), VVal);
4204 
4205   Builder.CreateRetVoid();
4206   OMPBuilder.finalize();
4207   EXPECT_FALSE(verifyModule(*M, &errs()));
4208 }
4209 
4210 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
4211   OpenMPIRBuilder OMPBuilder(*M);
4212   OMPBuilder.initialize();
4213   F->setName("func");
4214   IRBuilder<> Builder(BB);
4215 
4216   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4217 
4218   LLVMContext &Ctx = M->getContext();
4219   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4220   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4221   XVal->setName("x");
4222   StoreInst *Init =
4223       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4224 
4225   OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false};
4226   OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false};
4227   // V and R are not used in atomic compare
4228   OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false};
4229   OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false};
4230   AtomicOrdering AO = AtomicOrdering::Monotonic;
4231   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4232   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4233   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4234   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4235 
4236   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4237       Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false));
4238   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4239       Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false));
4240   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4241       Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false));
4242 
4243   BasicBlock *EntryBB = BB;
4244   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4245   EXPECT_EQ(EntryBB->size(), 5U);
4246 
4247   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4248   EXPECT_NE(ARWM1, nullptr);
4249   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4250   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4251   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4252 
4253   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode());
4254   EXPECT_NE(ARWM2, nullptr);
4255   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4256   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4257   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax);
4258 
4259   AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode());
4260   EXPECT_NE(AXCHG, nullptr);
4261   EXPECT_EQ(AXCHG->getPointerOperand(), XVal);
4262   EXPECT_EQ(AXCHG->getCompareOperand(), Expr);
4263   EXPECT_EQ(AXCHG->getNewValOperand(), D);
4264 
4265   Builder.CreateRetVoid();
4266   OMPBuilder.finalize();
4267   EXPECT_FALSE(verifyModule(*M, &errs()));
4268 }
4269 
4270 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) {
4271   OpenMPIRBuilder OMPBuilder(*M);
4272   OMPBuilder.initialize();
4273   F->setName("func");
4274   IRBuilder<> Builder(BB);
4275 
4276   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4277 
4278   LLVMContext &Ctx = M->getContext();
4279   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4280   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4281   XVal->setName("x");
4282   AllocaInst *VVal = Builder.CreateAlloca(Int32);
4283   VVal->setName("v");
4284   AllocaInst *RVal = Builder.CreateAlloca(Int32);
4285   RVal->setName("r");
4286 
4287   StoreInst *Init =
4288       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4289 
4290   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false};
4291   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
4292   OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false};
4293   OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false};
4294   OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false};
4295 
4296   AtomicOrdering AO = AtomicOrdering::Monotonic;
4297   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4298   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4299   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4300   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4301 
4302   // { cond-update-stmt v = x; }
4303   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4304       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4305       /* IsPostfixUpdate */ false,
4306       /* IsFailOnly */ false));
4307   // { v = x; cond-update-stmt }
4308   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4309       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4310       /* IsPostfixUpdate */ true,
4311       /* IsFailOnly */ false));
4312   // if(x == e) { x = d; } else { v = x; }
4313   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4314       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4315       /* IsPostfixUpdate */ false,
4316       /* IsFailOnly */ true));
4317   // { r = x == e; if(r) { x = d; } }
4318   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4319       Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4320       /* IsPostfixUpdate */ false,
4321       /* IsFailOnly */ false));
4322   // { r = x == e; if(r) { x = d; } else { v = x; } }
4323   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4324       Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4325       /* IsPostfixUpdate */ false,
4326       /* IsFailOnly */ true));
4327 
4328   // { v = x; cond-update-stmt }
4329   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4330       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true,
4331       /* IsPostfixUpdate */ true,
4332       /* IsFailOnly */ false));
4333   // { cond-update-stmt v = x; }
4334   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4335       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false,
4336       /* IsPostfixUpdate */ false,
4337       /* IsFailOnly */ false));
4338 
4339   BasicBlock *EntryBB = BB;
4340   EXPECT_EQ(EntryBB->getParent()->size(), 5U);
4341   BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode());
4342   EXPECT_NE(Cont1, nullptr);
4343   BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode());
4344   EXPECT_NE(Exit1, nullptr);
4345   BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode());
4346   EXPECT_NE(Cont2, nullptr);
4347   BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode());
4348   EXPECT_NE(Exit2, nullptr);
4349 
4350   AtomicCmpXchgInst *CmpXchg1 =
4351       dyn_cast<AtomicCmpXchgInst>(Init->getNextNode());
4352   EXPECT_NE(CmpXchg1, nullptr);
4353   EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal);
4354   EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr);
4355   EXPECT_EQ(CmpXchg1->getNewValOperand(), D);
4356   ExtractValueInst *ExtVal1 =
4357       dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode());
4358   EXPECT_NE(ExtVal1, nullptr);
4359   EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1);
4360   EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U));
4361   ExtractValueInst *ExtVal2 =
4362       dyn_cast<ExtractValueInst>(ExtVal1->getNextNode());
4363   EXPECT_NE(ExtVal2, nullptr);
4364   EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1);
4365   EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U));
4366   SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode());
4367   EXPECT_NE(Sel1, nullptr);
4368   EXPECT_EQ(Sel1->getCondition(), ExtVal2);
4369   EXPECT_EQ(Sel1->getTrueValue(), Expr);
4370   EXPECT_EQ(Sel1->getFalseValue(), ExtVal1);
4371   StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode());
4372   EXPECT_NE(Store1, nullptr);
4373   EXPECT_EQ(Store1->getPointerOperand(), VVal);
4374   EXPECT_EQ(Store1->getValueOperand(), Sel1);
4375 
4376   AtomicCmpXchgInst *CmpXchg2 =
4377       dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode());
4378   EXPECT_NE(CmpXchg2, nullptr);
4379   EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal);
4380   EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr);
4381   EXPECT_EQ(CmpXchg2->getNewValOperand(), D);
4382   ExtractValueInst *ExtVal3 =
4383       dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode());
4384   EXPECT_NE(ExtVal3, nullptr);
4385   EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2);
4386   EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U));
4387   StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode());
4388   EXPECT_NE(Store2, nullptr);
4389   EXPECT_EQ(Store2->getPointerOperand(), VVal);
4390   EXPECT_EQ(Store2->getValueOperand(), ExtVal3);
4391 
4392   AtomicCmpXchgInst *CmpXchg3 =
4393       dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode());
4394   EXPECT_NE(CmpXchg3, nullptr);
4395   EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal);
4396   EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr);
4397   EXPECT_EQ(CmpXchg3->getNewValOperand(), D);
4398   ExtractValueInst *ExtVal4 =
4399       dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode());
4400   EXPECT_NE(ExtVal4, nullptr);
4401   EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3);
4402   EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U));
4403   ExtractValueInst *ExtVal5 =
4404       dyn_cast<ExtractValueInst>(ExtVal4->getNextNode());
4405   EXPECT_NE(ExtVal5, nullptr);
4406   EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3);
4407   EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U));
4408   BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode());
4409   EXPECT_NE(Br1, nullptr);
4410   EXPECT_EQ(Br1->isConditional(), true);
4411   EXPECT_EQ(Br1->getCondition(), ExtVal5);
4412   EXPECT_EQ(Br1->getSuccessor(0), Exit1);
4413   EXPECT_EQ(Br1->getSuccessor(1), Cont1);
4414 
4415   StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front());
4416   EXPECT_NE(Store3, nullptr);
4417   EXPECT_EQ(Store3->getPointerOperand(), VVal);
4418   EXPECT_EQ(Store3->getValueOperand(), ExtVal4);
4419   BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode());
4420   EXPECT_NE(Br2, nullptr);
4421   EXPECT_EQ(Br2->isUnconditional(), true);
4422   EXPECT_EQ(Br2->getSuccessor(0), Exit1);
4423 
4424   AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front());
4425   EXPECT_NE(CmpXchg4, nullptr);
4426   EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal);
4427   EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr);
4428   EXPECT_EQ(CmpXchg4->getNewValOperand(), D);
4429   ExtractValueInst *ExtVal6 =
4430       dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode());
4431   EXPECT_NE(ExtVal6, nullptr);
4432   EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4);
4433   EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U));
4434   ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode());
4435   EXPECT_NE(ZExt1, nullptr);
4436   EXPECT_EQ(ZExt1->getDestTy(), Int32);
4437   StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode());
4438   EXPECT_NE(Store4, nullptr);
4439   EXPECT_EQ(Store4->getPointerOperand(), RVal);
4440   EXPECT_EQ(Store4->getValueOperand(), ZExt1);
4441 
4442   AtomicCmpXchgInst *CmpXchg5 =
4443       dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode());
4444   EXPECT_NE(CmpXchg5, nullptr);
4445   EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal);
4446   EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr);
4447   EXPECT_EQ(CmpXchg5->getNewValOperand(), D);
4448   ExtractValueInst *ExtVal7 =
4449       dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode());
4450   EXPECT_NE(ExtVal7, nullptr);
4451   EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5);
4452   EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U));
4453   ExtractValueInst *ExtVal8 =
4454       dyn_cast<ExtractValueInst>(ExtVal7->getNextNode());
4455   EXPECT_NE(ExtVal8, nullptr);
4456   EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5);
4457   EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U));
4458   BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode());
4459   EXPECT_NE(Br3, nullptr);
4460   EXPECT_EQ(Br3->isConditional(), true);
4461   EXPECT_EQ(Br3->getCondition(), ExtVal8);
4462   EXPECT_EQ(Br3->getSuccessor(0), Exit2);
4463   EXPECT_EQ(Br3->getSuccessor(1), Cont2);
4464 
4465   StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front());
4466   EXPECT_NE(Store5, nullptr);
4467   EXPECT_EQ(Store5->getPointerOperand(), VVal);
4468   EXPECT_EQ(Store5->getValueOperand(), ExtVal7);
4469   BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode());
4470   EXPECT_NE(Br4, nullptr);
4471   EXPECT_EQ(Br4->isUnconditional(), true);
4472   EXPECT_EQ(Br4->getSuccessor(0), Exit2);
4473 
4474   ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front());
4475   EXPECT_NE(ExtVal9, nullptr);
4476   EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5);
4477   EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U));
4478   ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode());
4479   EXPECT_NE(ZExt2, nullptr);
4480   EXPECT_EQ(ZExt2->getDestTy(), Int32);
4481   StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode());
4482   EXPECT_NE(Store6, nullptr);
4483   EXPECT_EQ(Store6->getPointerOperand(), RVal);
4484   EXPECT_EQ(Store6->getValueOperand(), ZExt2);
4485 
4486   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode());
4487   EXPECT_NE(ARWM1, nullptr);
4488   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4489   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4490   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4491   StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode());
4492   EXPECT_NE(Store7, nullptr);
4493   EXPECT_EQ(Store7->getPointerOperand(), VVal);
4494   EXPECT_EQ(Store7->getValueOperand(), ARWM1);
4495 
4496   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode());
4497   EXPECT_NE(ARWM2, nullptr);
4498   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4499   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4500   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max);
4501   CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode());
4502   EXPECT_NE(Cmp1, nullptr);
4503   EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT);
4504   EXPECT_EQ(Cmp1->getOperand(0), ARWM2);
4505   EXPECT_EQ(Cmp1->getOperand(1), Expr);
4506   SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode());
4507   EXPECT_NE(Sel2, nullptr);
4508   EXPECT_EQ(Sel2->getCondition(), Cmp1);
4509   EXPECT_EQ(Sel2->getTrueValue(), Expr);
4510   EXPECT_EQ(Sel2->getFalseValue(), ARWM2);
4511   StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode());
4512   EXPECT_NE(Store8, nullptr);
4513   EXPECT_EQ(Store8->getPointerOperand(), VVal);
4514   EXPECT_EQ(Store8->getValueOperand(), Sel2);
4515 
4516   Builder.CreateRetVoid();
4517   OMPBuilder.finalize();
4518   EXPECT_FALSE(verifyModule(*M, &errs()));
4519 }
4520 
4521 TEST_F(OpenMPIRBuilderTest, CreateTeams) {
4522   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4523   OpenMPIRBuilder OMPBuilder(*M);
4524   OMPBuilder.Config.IsTargetDevice = false;
4525   OMPBuilder.initialize();
4526   F->setName("func");
4527   IRBuilder<> Builder(BB);
4528 
4529   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
4530   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
4531   Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load");
4532 
4533   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4534     Builder.restoreIP(AllocaIP);
4535     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
4536                                                 "bodygen.alloca128");
4537 
4538     Builder.restoreIP(CodeGenIP);
4539     // Loading and storing captured pointer and values
4540     Builder.CreateStore(Val128, Local128);
4541     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
4542                                       "bodygen.load32");
4543 
4544     LoadInst *PrivLoad128 = Builder.CreateLoad(
4545         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
4546     Value *Cmp = Builder.CreateICmpNE(
4547         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
4548     Instruction *ThenTerm, *ElseTerm;
4549     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
4550                                   &ThenTerm, &ElseTerm);
4551     return Error::success();
4552   };
4553 
4554   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4555   ASSERT_EXPECTED_INIT(
4556       OpenMPIRBuilder::InsertPointTy, AfterIP,
4557       OMPBuilder.createTeams(Builder, BodyGenCB, /*NumTeamsLower=*/nullptr,
4558                              /*NumTeamsUpper=*/nullptr,
4559                              /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4560   Builder.restoreIP(AfterIP);
4561 
4562   OMPBuilder.finalize();
4563   Builder.CreateRetVoid();
4564 
4565   EXPECT_FALSE(verifyModule(*M, &errs()));
4566 
4567   CallInst *TeamsForkCall = dyn_cast<CallInst>(
4568       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)
4569           ->user_back());
4570 
4571   // Verify the Ident argument
4572   GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0));
4573   ASSERT_NE(Ident, nullptr);
4574   EXPECT_TRUE(Ident->hasInitializer());
4575   Constant *Initializer = Ident->getInitializer();
4576   GlobalVariable *SrcStrGlob =
4577       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
4578   ASSERT_NE(SrcStrGlob, nullptr);
4579   ConstantDataArray *SrcSrc =
4580       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
4581   ASSERT_NE(SrcSrc, nullptr);
4582 
4583   // Verify the outlined function signature.
4584   Function *OutlinedFn =
4585       dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts());
4586   ASSERT_NE(OutlinedFn, nullptr);
4587   EXPECT_FALSE(OutlinedFn->isDeclaration());
4588   EXPECT_TRUE(OutlinedFn->arg_size() >= 3);
4589   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid
4590   EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid
4591   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
4592             Builder.getPtrTy()); // captured args
4593 
4594   // Check for TruncInst and ICmpInst in the outlined function.
4595   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4596                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
4597   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4598                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
4599 }
4600 
4601 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) {
4602   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4603   OpenMPIRBuilder OMPBuilder(*M);
4604   OMPBuilder.Config.IsTargetDevice = false;
4605   OMPBuilder.initialize();
4606   F->setName("func");
4607   IRBuilder<> &Builder = OMPBuilder.Builder;
4608   Builder.SetInsertPoint(BB);
4609 
4610   Function *FakeFunction =
4611       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4612                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4613 
4614   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4615     Builder.restoreIP(CodeGenIP);
4616     Builder.CreateCall(FakeFunction, {});
4617     return Error::success();
4618   };
4619 
4620   // `F` has an argument - an integer, so we use that as the thread limit.
4621   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4622                        OMPBuilder.createTeams(
4623                            /*=*/Builder, BodyGenCB, /*NumTeamsLower=*/nullptr,
4624                            /*NumTeamsUpper=*/nullptr,
4625                            /*ThreadLimit=*/F->arg_begin(),
4626                            /*IfExpr=*/nullptr));
4627   Builder.restoreIP(AfterIP);
4628 
4629   Builder.CreateRetVoid();
4630   OMPBuilder.finalize();
4631 
4632   ASSERT_FALSE(verifyModule(*M));
4633 
4634   CallInst *PushNumTeamsCallInst =
4635       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4636   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4637 
4638   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0));
4639   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0));
4640   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin());
4641 
4642   // Verifying that the next instruction to execute is kmpc_fork_teams
4643   BranchInst *BrInst =
4644       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4645   ASSERT_NE(BrInst, nullptr);
4646   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4647   Instruction *NextInstruction =
4648       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4649   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4650   ASSERT_NE(ForkTeamsCI, nullptr);
4651   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4652             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4653 }
4654 
4655 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) {
4656   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4657   OpenMPIRBuilder OMPBuilder(*M);
4658   OMPBuilder.Config.IsTargetDevice = false;
4659   OMPBuilder.initialize();
4660   F->setName("func");
4661   IRBuilder<> &Builder = OMPBuilder.Builder;
4662   Builder.SetInsertPoint(BB);
4663 
4664   Function *FakeFunction =
4665       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4666                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4667 
4668   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4669     Builder.restoreIP(CodeGenIP);
4670     Builder.CreateCall(FakeFunction, {});
4671     return Error::success();
4672   };
4673 
4674   // `F` already has an integer argument, so we use that as upper bound to
4675   // `num_teams`
4676   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4677                        OMPBuilder.createTeams(Builder, BodyGenCB,
4678                                               /*NumTeamsLower=*/nullptr,
4679                                               /*NumTeamsUpper=*/F->arg_begin(),
4680                                               /*ThreadLimit=*/nullptr,
4681                                               /*IfExpr=*/nullptr));
4682   Builder.restoreIP(AfterIP);
4683 
4684   Builder.CreateRetVoid();
4685   OMPBuilder.finalize();
4686 
4687   ASSERT_FALSE(verifyModule(*M));
4688 
4689   CallInst *PushNumTeamsCallInst =
4690       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4691   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4692 
4693   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin());
4694   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin());
4695   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4696 
4697   // Verifying that the next instruction to execute is kmpc_fork_teams
4698   BranchInst *BrInst =
4699       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4700   ASSERT_NE(BrInst, nullptr);
4701   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4702   Instruction *NextInstruction =
4703       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4704   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4705   ASSERT_NE(ForkTeamsCI, nullptr);
4706   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4707             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4708 }
4709 
4710 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) {
4711   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4712   OpenMPIRBuilder OMPBuilder(*M);
4713   OMPBuilder.Config.IsTargetDevice = false;
4714   OMPBuilder.initialize();
4715   F->setName("func");
4716   IRBuilder<> &Builder = OMPBuilder.Builder;
4717   Builder.SetInsertPoint(BB);
4718 
4719   Function *FakeFunction =
4720       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4721                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4722 
4723   Value *NumTeamsLower =
4724       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4725   Value *NumTeamsUpper =
4726       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4727 
4728   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4729     Builder.restoreIP(CodeGenIP);
4730     Builder.CreateCall(FakeFunction, {});
4731     return Error::success();
4732   };
4733 
4734   // `F` already has an integer argument, so we use that as upper bound to
4735   // `num_teams`
4736   ASSERT_EXPECTED_INIT(
4737       OpenMPIRBuilder::InsertPointTy, AfterIP,
4738       OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper,
4739                              /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4740   Builder.restoreIP(AfterIP);
4741 
4742   Builder.CreateRetVoid();
4743   OMPBuilder.finalize();
4744 
4745   ASSERT_FALSE(verifyModule(*M));
4746 
4747   CallInst *PushNumTeamsCallInst =
4748       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4749   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4750 
4751   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4752   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4753   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4754 
4755   // Verifying that the next instruction to execute is kmpc_fork_teams
4756   BranchInst *BrInst =
4757       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4758   ASSERT_NE(BrInst, nullptr);
4759   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4760   Instruction *NextInstruction =
4761       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4762   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4763   ASSERT_NE(ForkTeamsCI, nullptr);
4764   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4765             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4766 }
4767 
4768 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) {
4769   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4770   OpenMPIRBuilder OMPBuilder(*M);
4771   OMPBuilder.Config.IsTargetDevice = false;
4772   OMPBuilder.initialize();
4773   F->setName("func");
4774   IRBuilder<> &Builder = OMPBuilder.Builder;
4775   Builder.SetInsertPoint(BB);
4776 
4777   BasicBlock *CodegenBB = splitBB(Builder, true);
4778   Builder.SetInsertPoint(CodegenBB);
4779 
4780   // Generate values for `num_teams` and `thread_limit` using the first argument
4781   // of the testing function.
4782   Value *NumTeamsLower =
4783       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4784   Value *NumTeamsUpper =
4785       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4786   Value *ThreadLimit =
4787       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit");
4788 
4789   Function *FakeFunction =
4790       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4791                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4792 
4793   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4794     Builder.restoreIP(CodeGenIP);
4795     Builder.CreateCall(FakeFunction, {});
4796     return Error::success();
4797   };
4798 
4799   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4800   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4801                        OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
4802                                               NumTeamsUpper, ThreadLimit,
4803                                               nullptr));
4804   Builder.restoreIP(AfterIP);
4805 
4806   Builder.CreateRetVoid();
4807   OMPBuilder.finalize();
4808 
4809   ASSERT_FALSE(verifyModule(*M));
4810 
4811   CallInst *PushNumTeamsCallInst =
4812       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4813   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4814 
4815   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4816   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4817   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit);
4818 
4819   // Verifying that the next instruction to execute is kmpc_fork_teams
4820   BranchInst *BrInst =
4821       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4822   ASSERT_NE(BrInst, nullptr);
4823   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4824   Instruction *NextInstruction =
4825       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4826   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4827   ASSERT_NE(ForkTeamsCI, nullptr);
4828   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4829             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4830 }
4831 
4832 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) {
4833   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4834   OpenMPIRBuilder OMPBuilder(*M);
4835   OMPBuilder.Config.IsTargetDevice = false;
4836   OMPBuilder.initialize();
4837   F->setName("func");
4838   IRBuilder<> &Builder = OMPBuilder.Builder;
4839   Builder.SetInsertPoint(BB);
4840 
4841   Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(),
4842                                      Builder.CreateAlloca(Builder.getInt1Ty()));
4843 
4844   Function *FakeFunction =
4845       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4846                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4847 
4848   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4849     Builder.restoreIP(CodeGenIP);
4850     Builder.CreateCall(FakeFunction, {});
4851     return Error::success();
4852   };
4853 
4854   // `F` already has an integer argument, so we use that as upper bound to
4855   // `num_teams`
4856   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4857                        OMPBuilder.createTeams(Builder, BodyGenCB,
4858                                               /*NumTeamsLower=*/nullptr,
4859                                               /*NumTeamsUpper=*/nullptr,
4860                                               /*ThreadLimit=*/nullptr, IfExpr));
4861   Builder.restoreIP(AfterIP);
4862 
4863   Builder.CreateRetVoid();
4864   OMPBuilder.finalize();
4865 
4866   ASSERT_FALSE(verifyModule(*M));
4867 
4868   CallInst *PushNumTeamsCallInst =
4869       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4870   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4871   Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2);
4872   Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3);
4873   Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4);
4874 
4875   // Check the lower_bound
4876   ASSERT_NE(NumTeamsLower, nullptr);
4877   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower);
4878   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4879   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr);
4880   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0));
4881   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4882 
4883   // Check the upper_bound
4884   ASSERT_NE(NumTeamsUpper, nullptr);
4885   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper);
4886   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4887   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr);
4888   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0));
4889   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4890 
4891   // Check thread_limit
4892   EXPECT_EQ(ThreadLimit, Builder.getInt32(0));
4893 }
4894 
4895 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) {
4896   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4897   OpenMPIRBuilder OMPBuilder(*M);
4898   OMPBuilder.Config.IsTargetDevice = false;
4899   OMPBuilder.initialize();
4900   F->setName("func");
4901   IRBuilder<> &Builder = OMPBuilder.Builder;
4902   Builder.SetInsertPoint(BB);
4903 
4904   Value *IfExpr = Builder.CreateLoad(
4905       Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty()));
4906   Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5));
4907   Value *NumTeamsUpper =
4908       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10));
4909   Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20));
4910 
4911   Function *FakeFunction =
4912       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4913                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4914 
4915   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4916     Builder.restoreIP(CodeGenIP);
4917     Builder.CreateCall(FakeFunction, {});
4918     return Error::success();
4919   };
4920 
4921   // `F` already has an integer argument, so we use that as upper bound to
4922   // `num_teams`
4923   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
4924                        OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
4925                                               NumTeamsUpper, ThreadLimit,
4926                                               IfExpr));
4927   Builder.restoreIP(AfterIP);
4928 
4929   Builder.CreateRetVoid();
4930   OMPBuilder.finalize();
4931 
4932   ASSERT_FALSE(verifyModule(*M));
4933 
4934   CallInst *PushNumTeamsCallInst =
4935       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4936   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4937   Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2);
4938   Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3);
4939   Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4);
4940 
4941   // Get the boolean conversion of if expression
4942   ASSERT_EQ(IfExpr->getNumUses(), 1U);
4943   User *IfExprInst = IfExpr->user_back();
4944   ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst);
4945   ASSERT_NE(IfExprCmpInst, nullptr);
4946   EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE);
4947   EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr);
4948   EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0));
4949 
4950   // Check the lower_bound
4951   ASSERT_NE(NumTeamsLowerArg, nullptr);
4952   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg);
4953   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4954   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst);
4955   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower);
4956   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4957 
4958   // Check the upper_bound
4959   ASSERT_NE(NumTeamsUpperArg, nullptr);
4960   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg);
4961   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4962   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst);
4963   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper);
4964   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4965 
4966   // Check thread_limit
4967   EXPECT_EQ(ThreadLimitArg, ThreadLimit);
4968 }
4969 
4970 /// Returns the single instruction of InstTy type in BB that uses the value V.
4971 /// If there is more than one such instruction, returns null.
4972 template <typename InstTy>
4973 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) {
4974   InstTy *Result = nullptr;
4975   for (User *U : V->users()) {
4976     auto *Inst = dyn_cast<InstTy>(U);
4977     if (!Inst || Inst->getParent() != BB)
4978       continue;
4979     if (Result) {
4980       if (auto *SI = dyn_cast<StoreInst>(Inst)) {
4981         if (V == SI->getValueOperand())
4982           continue;
4983       } else {
4984         return nullptr;
4985       }
4986     }
4987     Result = Inst;
4988   }
4989   return Result;
4990 }
4991 
4992 /// Returns true if BB contains a simple binary reduction that loads a value
4993 /// from Accum, performs some binary operation with it, and stores it back to
4994 /// Accum.
4995 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB,
4996                                     Instruction::BinaryOps *OpCode = nullptr) {
4997   StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB);
4998   if (!Store)
4999     return false;
5000   auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0));
5001   if (!Stored)
5002     return false;
5003   if (OpCode && *OpCode != Stored->getOpcode())
5004     return false;
5005   auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0));
5006   return Load && Load->getOperand(0) == Accum;
5007 }
5008 
5009 /// Returns true if BB contains a binary reduction that reduces V using a binary
5010 /// operator into an accumulator that is a function argument.
5011 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
5012   auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB);
5013   if (!ReductionOp)
5014     return false;
5015 
5016   auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0));
5017   if (!GlobalLoad)
5018     return false;
5019 
5020   auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB);
5021   if (!Store)
5022     return false;
5023 
5024   return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
5025          isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand()));
5026 }
5027 
5028 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
5029 /// [0, 1], respectively, and assigns results of these instructions to Zero and
5030 /// One. Returns true on success, false on failure or if such instructions are
5031 /// not unique among the users of Ptr.
5032 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) {
5033   Zero = nullptr;
5034   One = nullptr;
5035   for (User *U : Ptr->users()) {
5036     if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
5037       if (GEP->getNumIndices() != 2)
5038         continue;
5039       auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
5040       auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2));
5041       EXPECT_NE(FirstIdx, nullptr);
5042       EXPECT_NE(SecondIdx, nullptr);
5043 
5044       EXPECT_TRUE(FirstIdx->isZero());
5045       if (SecondIdx->isZero()) {
5046         if (Zero)
5047           return false;
5048         Zero = GEP;
5049       } else if (SecondIdx->isOne()) {
5050         if (One)
5051           return false;
5052         One = GEP;
5053       } else {
5054         return false;
5055       }
5056     }
5057   }
5058   return Zero != nullptr && One != nullptr;
5059 }
5060 
5061 static OpenMPIRBuilder::InsertPointTy
5062 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
5063              Value *&Result) {
5064   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5065   Result = Builder.CreateFAdd(LHS, RHS, "red.add");
5066   return Builder.saveIP();
5067 }
5068 
5069 static OpenMPIRBuilder::InsertPointTy
5070 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
5071                    Value *RHS) {
5072   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5073   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
5074   Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt,
5075                           AtomicOrdering::Monotonic);
5076   return Builder.saveIP();
5077 }
5078 
5079 static OpenMPIRBuilder::InsertPointTy
5080 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
5081              Value *&Result) {
5082   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5083   Result = Builder.CreateXor(LHS, RHS, "red.xor");
5084   return Builder.saveIP();
5085 }
5086 
5087 static OpenMPIRBuilder::InsertPointTy
5088 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
5089                    Value *RHS) {
5090   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
5091   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
5092   Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt,
5093                           AtomicOrdering::Monotonic);
5094   return Builder.saveIP();
5095 }
5096 
5097 TEST_F(OpenMPIRBuilderTest, CreateReductions) {
5098   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5099   OpenMPIRBuilder OMPBuilder(*M);
5100   OMPBuilder.Config.IsTargetDevice = false;
5101   OMPBuilder.initialize();
5102   F->setName("func");
5103   IRBuilder<> Builder(BB);
5104 
5105   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
5106   Builder.CreateBr(EnterBB);
5107   Builder.SetInsertPoint(EnterBB);
5108   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5109 
5110   // Create variables to be reduced.
5111   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
5112                               F->getEntryBlock().getFirstInsertionPt());
5113   Type *SumType = Builder.getFloatTy();
5114   Type *XorType = Builder.getInt32Ty();
5115   Value *SumReduced;
5116   Value *XorReduced;
5117   {
5118     IRBuilderBase::InsertPointGuard Guard(Builder);
5119     Builder.restoreIP(OuterAllocaIP);
5120     SumReduced = Builder.CreateAlloca(SumType);
5121     XorReduced = Builder.CreateAlloca(XorType);
5122   }
5123 
5124   // Store initial values of reductions into global variables.
5125   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
5126   Builder.CreateStore(Builder.getInt32(1), XorReduced);
5127 
5128   // The loop body computes two reductions:
5129   //   sum of (float) thread-id;
5130   //   xor of thread-id;
5131   // and store the result in global variables.
5132   InsertPointTy BodyIP, BodyAllocaIP;
5133   auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) {
5134     IRBuilderBase::InsertPointGuard Guard(Builder);
5135     Builder.restoreIP(CodeGenIP);
5136 
5137     uint32_t StrSize;
5138     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5139     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5140     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5141     Value *SumLocal =
5142         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
5143     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
5144     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
5145     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
5146     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
5147     Builder.CreateStore(Sum, SumReduced);
5148     Builder.CreateStore(Xor, XorReduced);
5149 
5150     BodyIP = Builder.saveIP();
5151     BodyAllocaIP = InnerAllocaIP;
5152     return Error::success();
5153   };
5154 
5155   // Privatization for reduction creates local copies of reduction variables and
5156   // initializes them to reduction-neutral values.
5157   Value *SumPrivatized;
5158   Value *XorPrivatized;
5159   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
5160                     Value &Original, Value &Inner, Value *&ReplVal) {
5161     IRBuilderBase::InsertPointGuard Guard(Builder);
5162     Builder.restoreIP(InnerAllocaIP);
5163     if (&Original == SumReduced) {
5164       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
5165       ReplVal = SumPrivatized;
5166     } else if (&Original == XorReduced) {
5167       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
5168       ReplVal = XorPrivatized;
5169     } else {
5170       ReplVal = &Inner;
5171       return CodeGenIP;
5172     }
5173 
5174     Builder.restoreIP(CodeGenIP);
5175     if (&Original == SumReduced)
5176       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
5177                           SumPrivatized);
5178     else if (&Original == XorReduced)
5179       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
5180 
5181     return Builder.saveIP();
5182   };
5183 
5184   // Do nothing in finalization.
5185   auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
5186 
5187   ASSERT_EXPECTED_INIT(
5188       OpenMPIRBuilder::InsertPointTy, AfterIP,
5189       OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
5190                                 /* IfCondition */ nullptr,
5191                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5192                                 /* IsCancellable */ false));
5193   Builder.restoreIP(AfterIP);
5194 
5195   OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
5196       {SumType, SumReduced, SumPrivatized,
5197        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
5198        /*ReductionGenClang=*/nullptr, sumAtomicReduction},
5199       {XorType, XorReduced, XorPrivatized,
5200        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
5201        /*ReductionGenClang=*/nullptr, xorAtomicReduction}};
5202   OMPBuilder.Config.setIsGPU(false);
5203 
5204   bool ReduceVariableByRef[] = {false, false};
5205   ASSERT_THAT_EXPECTED(OMPBuilder.createReductions(BodyIP, BodyAllocaIP,
5206                                                    ReductionInfos,
5207                                                    ReduceVariableByRef),
5208                        Succeeded());
5209 
5210   Builder.restoreIP(AfterIP);
5211   Builder.CreateRetVoid();
5212 
5213   OMPBuilder.finalize(F);
5214 
5215   // The IR must be valid.
5216   EXPECT_FALSE(verifyModule(*M));
5217 
5218   // Outlining must have happened.
5219   SmallVector<CallInst *> ForkCalls;
5220   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5221             ForkCalls);
5222   ASSERT_EQ(ForkCalls.size(), 1u);
5223   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5224   Function *Outlined = dyn_cast<Function>(CalleeVal);
5225   EXPECT_NE(Outlined, nullptr);
5226 
5227   // Check that the lock variable was created with the expected name.
5228   GlobalVariable *LockVar =
5229       M->getGlobalVariable(".gomp_critical_user_.reduction.var");
5230   EXPECT_NE(LockVar, nullptr);
5231 
5232   // Find the allocation of a local array that will be used to call the runtime
5233   // reduciton function.
5234   BasicBlock &AllocBlock = Outlined->getEntryBlock();
5235   Value *LocalArray = nullptr;
5236   for (Instruction &I : AllocBlock) {
5237     if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) {
5238       if (!Alloc->getAllocatedType()->isArrayTy() ||
5239           !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy())
5240         continue;
5241       LocalArray = Alloc;
5242       break;
5243     }
5244   }
5245   ASSERT_NE(LocalArray, nullptr);
5246 
5247   // Find the call to the runtime reduction function.
5248   BasicBlock *BB = AllocBlock.getUniqueSuccessor();
5249   Value *LocalArrayPtr = nullptr;
5250   Value *ReductionFnVal = nullptr;
5251   Value *SwitchArg = nullptr;
5252   for (Instruction &I : *BB) {
5253     if (CallInst *Call = dyn_cast<CallInst>(&I)) {
5254       if (Call->getCalledFunction() !=
5255           OMPBuilder.getOrCreateRuntimeFunctionPtr(
5256               RuntimeFunction::OMPRTL___kmpc_reduce))
5257         continue;
5258       LocalArrayPtr = Call->getOperand(4);
5259       ReductionFnVal = Call->getOperand(5);
5260       SwitchArg = Call;
5261       break;
5262     }
5263   }
5264 
5265   // Check that the local array is passed to the function.
5266   ASSERT_NE(LocalArrayPtr, nullptr);
5267   EXPECT_EQ(LocalArrayPtr, LocalArray);
5268 
5269   // Find the GEP instructions preceding stores to the local array.
5270   Value *FirstArrayElemPtr = nullptr;
5271   Value *SecondArrayElemPtr = nullptr;
5272   EXPECT_EQ(LocalArray->getNumUses(), 3u);
5273   ASSERT_TRUE(
5274       findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr));
5275 
5276   // Check that the values stored into the local array are privatized reduction
5277   // variables.
5278   auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>(
5279       findStoredValue<GetElementPtrInst>(FirstArrayElemPtr));
5280   auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>(
5281       findStoredValue<GetElementPtrInst>(SecondArrayElemPtr));
5282   ASSERT_NE(FirstPrivatized, nullptr);
5283   ASSERT_NE(SecondPrivatized, nullptr);
5284   ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr));
5285   EXPECT_TRUE(isSimpleBinaryReduction(
5286       FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5287   EXPECT_TRUE(isSimpleBinaryReduction(
5288       SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5289 
5290   // Check that the result of the runtime reduction call is used for further
5291   // dispatch.
5292   ASSERT_EQ(SwitchArg->getNumUses(), 1u);
5293   SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin());
5294   ASSERT_NE(Switch, nullptr);
5295   EXPECT_EQ(Switch->getNumSuccessors(), 3u);
5296   BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor();
5297   BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor();
5298 
5299   // Non-atomic block contains reductions to the global reduction variable,
5300   // which is passed into the outlined function as an argument.
5301   Value *FirstLoad =
5302       findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB);
5303   Value *SecondLoad =
5304       findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB);
5305   EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB));
5306   EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB));
5307 
5308   // Atomic block also constains reductions to the global reduction variable.
5309   FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB);
5310   SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB);
5311   auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB);
5312   auto *SecondAtomic =
5313       findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
5314   ASSERT_NE(FirstAtomic, nullptr);
5315   Value *AtomicStorePointer = FirstAtomic->getPointerOperand();
5316   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5317   ASSERT_NE(SecondAtomic, nullptr);
5318   AtomicStorePointer = SecondAtomic->getPointerOperand();
5319   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5320 
5321   // Check that the separate reduction function also performs (non-atomic)
5322   // reductions after extracting reduction variables from its arguments.
5323   Function *ReductionFn = cast<Function>(ReductionFnVal);
5324   BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock();
5325   Value *FirstLHSPtr;
5326   Value *SecondLHSPtr;
5327   ASSERT_TRUE(
5328       findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr));
5329   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5330   ASSERT_NE(Opaque, nullptr);
5331   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5332   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5333   ASSERT_NE(Opaque, nullptr);
5334   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5335 
5336   Value *FirstRHS;
5337   Value *SecondRHS;
5338   EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
5339 }
5340 
5341 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
5342   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5343   OpenMPIRBuilder OMPBuilder(*M);
5344   OMPBuilder.Config.IsTargetDevice = false;
5345   OMPBuilder.initialize();
5346   F->setName("func");
5347   IRBuilder<> Builder(BB);
5348 
5349   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
5350   Builder.CreateBr(EnterBB);
5351   Builder.SetInsertPoint(EnterBB);
5352   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5353 
5354   // Create variables to be reduced.
5355   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
5356                               F->getEntryBlock().getFirstInsertionPt());
5357   Type *SumType = Builder.getFloatTy();
5358   Type *XorType = Builder.getInt32Ty();
5359   Value *SumReduced;
5360   Value *XorReduced;
5361   {
5362     IRBuilderBase::InsertPointGuard Guard(Builder);
5363     Builder.restoreIP(OuterAllocaIP);
5364     SumReduced = Builder.CreateAlloca(SumType);
5365     XorReduced = Builder.CreateAlloca(XorType);
5366   }
5367 
5368   // Store initial values of reductions into global variables.
5369   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
5370   Builder.CreateStore(Builder.getInt32(1), XorReduced);
5371 
5372   InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
5373   auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5374                             InsertPointTy CodeGenIP) {
5375     IRBuilderBase::InsertPointGuard Guard(Builder);
5376     Builder.restoreIP(CodeGenIP);
5377 
5378     uint32_t StrSize;
5379     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5380     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5381     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5382     Value *SumLocal =
5383         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
5384     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
5385     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
5386     Builder.CreateStore(Sum, SumReduced);
5387 
5388     FirstBodyIP = Builder.saveIP();
5389     FirstBodyAllocaIP = InnerAllocaIP;
5390     return Error::success();
5391   };
5392 
5393   InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
5394   auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5395                              InsertPointTy CodeGenIP) {
5396     IRBuilderBase::InsertPointGuard Guard(Builder);
5397     Builder.restoreIP(CodeGenIP);
5398 
5399     uint32_t StrSize;
5400     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5401     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5402     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5403     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
5404     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
5405     Builder.CreateStore(Xor, XorReduced);
5406 
5407     SecondBodyIP = Builder.saveIP();
5408     SecondBodyAllocaIP = InnerAllocaIP;
5409     return Error::success();
5410   };
5411 
5412   // Privatization for reduction creates local copies of reduction variables and
5413   // initializes them to reduction-neutral values. The same privatization
5414   // callback is used for both loops, with dispatch based on the value being
5415   // privatized.
5416   Value *SumPrivatized;
5417   Value *XorPrivatized;
5418   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
5419                     Value &Original, Value &Inner, Value *&ReplVal) {
5420     IRBuilderBase::InsertPointGuard Guard(Builder);
5421     Builder.restoreIP(InnerAllocaIP);
5422     if (&Original == SumReduced) {
5423       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
5424       ReplVal = SumPrivatized;
5425     } else if (&Original == XorReduced) {
5426       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
5427       ReplVal = XorPrivatized;
5428     } else {
5429       ReplVal = &Inner;
5430       return CodeGenIP;
5431     }
5432 
5433     Builder.restoreIP(CodeGenIP);
5434     if (&Original == SumReduced)
5435       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
5436                           SumPrivatized);
5437     else if (&Original == XorReduced)
5438       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
5439 
5440     return Builder.saveIP();
5441   };
5442 
5443   // Do nothing in finalization.
5444   auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
5445 
5446   ASSERT_EXPECTED_INIT(
5447       OpenMPIRBuilder::InsertPointTy, AfterIP1,
5448       OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
5449                                 FiniCB, /* IfCondition */ nullptr,
5450                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5451                                 /* IsCancellable */ false));
5452   Builder.restoreIP(AfterIP1);
5453   ASSERT_EXPECTED_INIT(
5454       OpenMPIRBuilder::InsertPointTy, AfterIP2,
5455       OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP,
5456                                 SecondBodyGenCB, PrivCB, FiniCB,
5457                                 /* IfCondition */ nullptr,
5458                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5459                                 /* IsCancellable */ false));
5460   Builder.restoreIP(AfterIP2);
5461 
5462   OMPBuilder.Config.setIsGPU(false);
5463   bool ReduceVariableByRef[] = {false};
5464 
5465   ASSERT_THAT_EXPECTED(
5466       OMPBuilder.createReductions(
5467           FirstBodyIP, FirstBodyAllocaIP,
5468           {{SumType, SumReduced, SumPrivatized,
5469             /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
5470             /*ReductionGenClang=*/nullptr, sumAtomicReduction}},
5471           ReduceVariableByRef),
5472       Succeeded());
5473   ASSERT_THAT_EXPECTED(
5474       OMPBuilder.createReductions(
5475           SecondBodyIP, SecondBodyAllocaIP,
5476           {{XorType, XorReduced, XorPrivatized,
5477             /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
5478             /*ReductionGenClang=*/nullptr, xorAtomicReduction}},
5479           ReduceVariableByRef),
5480       Succeeded());
5481 
5482   Builder.restoreIP(AfterIP2);
5483   Builder.CreateRetVoid();
5484 
5485   OMPBuilder.finalize(F);
5486 
5487   // The IR must be valid.
5488   EXPECT_FALSE(verifyModule(*M));
5489 
5490   // Two different outlined functions must have been created.
5491   SmallVector<CallInst *> ForkCalls;
5492   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5493             ForkCalls);
5494   ASSERT_EQ(ForkCalls.size(), 2u);
5495   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5496   Function *FirstCallee = cast<Function>(CalleeVal);
5497   CalleeVal = ForkCalls[1]->getOperand(2);
5498   Function *SecondCallee = cast<Function>(CalleeVal);
5499   EXPECT_NE(FirstCallee, SecondCallee);
5500 
5501   // Two different reduction functions must have been created.
5502   SmallVector<CallInst *> ReduceCalls;
5503   findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder,
5504             ReduceCalls);
5505   ASSERT_EQ(ReduceCalls.size(), 1u);
5506   auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5507   ReduceCalls.clear();
5508   findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce,
5509             OMPBuilder, ReduceCalls);
5510   auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5511   EXPECT_NE(AddReduction, XorReduction);
5512 
5513   // Each reduction function does its own kind of reduction.
5514   BasicBlock *FnReductionBB = &AddReduction->getEntryBlock();
5515   Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5516       AddReduction->getArg(0), FnReductionBB);
5517   ASSERT_NE(FirstLHSPtr, nullptr);
5518   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5519   ASSERT_NE(Opaque, nullptr);
5520   Instruction::BinaryOps Opcode = Instruction::FAdd;
5521   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5522 
5523   FnReductionBB = &XorReduction->getEntryBlock();
5524   Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5525       XorReduction->getArg(0), FnReductionBB);
5526   ASSERT_NE(FirstLHSPtr, nullptr);
5527   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5528   ASSERT_NE(Opaque, nullptr);
5529   Opcode = Instruction::Xor;
5530   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5531 }
5532 
5533 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
5534   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5535   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5536   OpenMPIRBuilder OMPBuilder(*M);
5537   OMPBuilder.initialize();
5538   F->setName("func");
5539   IRBuilder<> Builder(BB);
5540 
5541   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5542   Builder.CreateBr(EnterBB);
5543   Builder.SetInsertPoint(EnterBB);
5544   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5545 
5546   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5547   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5548 
5549   auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
5550   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
5551     return Error::success();
5552   };
5553   SectionCBVector.push_back(SectionCB);
5554 
5555   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5556                    llvm::Value &, llvm::Value &Val,
5557                    llvm::Value *&ReplVal) { return CodeGenIP; };
5558   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5559                                     F->getEntryBlock().getFirstInsertionPt());
5560   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5561                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5562                                                  PrivCB, FiniCB, false, false));
5563   Builder.restoreIP(AfterIP);
5564   Builder.CreateRetVoid(); // Required at the end of the function
5565   EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr);
5566   EXPECT_FALSE(verifyModule(*M, &errs()));
5567 }
5568 
5569 TEST_F(OpenMPIRBuilderTest, CreateSections) {
5570   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5571   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5572   OpenMPIRBuilder OMPBuilder(*M);
5573   OMPBuilder.initialize();
5574   F->setName("func");
5575   IRBuilder<> Builder(BB);
5576 
5577   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5578   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5579   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5580 
5581   BasicBlock *SwitchBB = nullptr;
5582   AllocaInst *PrivAI = nullptr;
5583   SwitchInst *Switch = nullptr;
5584 
5585   unsigned NumBodiesGenerated = 0;
5586   unsigned NumFiniCBCalls = 0;
5587   PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
5588 
5589   auto FiniCB = [&](InsertPointTy IP) {
5590     ++NumFiniCBCalls;
5591     BasicBlock *IPBB = IP.getBlock();
5592     EXPECT_NE(IPBB->end(), IP.getPoint());
5593   };
5594 
5595   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
5596     ++NumBodiesGenerated;
5597     CaseBBs.push_back(CodeGenIP.getBlock());
5598     SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
5599     Builder.restoreIP(CodeGenIP);
5600     Builder.CreateStore(F->arg_begin(), PrivAI);
5601     Value *PrivLoad =
5602         Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca");
5603     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
5604     return Error::success();
5605   };
5606   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5607                    llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
5608     // TODO: Privatization not implemented yet
5609     return CodeGenIP;
5610   };
5611 
5612   SectionCBVector.push_back(SectionCB);
5613   SectionCBVector.push_back(SectionCB);
5614 
5615   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5616                                     F->getEntryBlock().getFirstInsertionPt());
5617   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5618                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5619                                                  PrivCB, FINICB_WRAPPER(FiniCB),
5620                                                  false, false));
5621   Builder.restoreIP(AfterIP);
5622   Builder.CreateRetVoid(); // Required at the end of the function
5623 
5624   // Switch BB's predecessor is loop condition BB, whose successor at index 1 is
5625   // loop's exit BB
5626   BasicBlock *ForExitBB =
5627       SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1);
5628   EXPECT_NE(ForExitBB, nullptr);
5629 
5630   EXPECT_NE(PrivAI, nullptr);
5631   Function *OutlinedFn = PrivAI->getFunction();
5632   EXPECT_EQ(F, OutlinedFn);
5633   EXPECT_FALSE(verifyModule(*M, &errs()));
5634   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
5635 
5636   BasicBlock *LoopPreheaderBB =
5637       OutlinedFn->getEntryBlock().getSingleSuccessor();
5638   // loop variables are 5 - lower bound, upper bound, stride, islastiter, and
5639   // iterator/counter
5640   bool FoundForInit = false;
5641   for (Instruction &Inst : *LoopPreheaderBB) {
5642     if (isa<CallInst>(Inst)) {
5643       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5644           "__kmpc_for_static_init_4u") {
5645         FoundForInit = true;
5646       }
5647     }
5648   }
5649   EXPECT_EQ(FoundForInit, true);
5650 
5651   bool FoundForExit = false;
5652   bool FoundBarrier = false;
5653   for (Instruction &Inst : *ForExitBB) {
5654     if (isa<CallInst>(Inst)) {
5655       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5656           "__kmpc_for_static_fini") {
5657         FoundForExit = true;
5658       }
5659       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5660           "__kmpc_barrier") {
5661         FoundBarrier = true;
5662       }
5663       if (FoundForExit && FoundBarrier)
5664         break;
5665     }
5666   }
5667   EXPECT_EQ(FoundForExit, true);
5668   EXPECT_EQ(FoundBarrier, true);
5669 
5670   EXPECT_NE(SwitchBB, nullptr);
5671   EXPECT_NE(SwitchBB->getTerminator(), nullptr);
5672   EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true);
5673   Switch = cast<SwitchInst>(SwitchBB->getTerminator());
5674   EXPECT_EQ(Switch->getNumCases(), 2U);
5675 
5676   EXPECT_EQ(CaseBBs.size(), 2U);
5677   for (auto *&CaseBB : CaseBBs) {
5678     EXPECT_EQ(CaseBB->getParent(), OutlinedFn);
5679   }
5680 
5681   ASSERT_EQ(NumBodiesGenerated, 2U);
5682   ASSERT_EQ(NumFiniCBCalls, 1U);
5683   EXPECT_FALSE(verifyModule(*M, &errs()));
5684 }
5685 
5686 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) {
5687   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5688   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5689   OpenMPIRBuilder OMPBuilder(*M);
5690   OMPBuilder.initialize();
5691   F->setName("func");
5692   IRBuilder<> Builder(BB);
5693 
5694   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5695   Builder.CreateBr(EnterBB);
5696   Builder.SetInsertPoint(EnterBB);
5697   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5698 
5699   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5700                                     F->getEntryBlock().getFirstInsertionPt());
5701   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5702   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5703                    llvm::Value &, llvm::Value &Val,
5704                    llvm::Value *&ReplVal) { return CodeGenIP; };
5705   auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
5706 
5707   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
5708                        OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5709                                                  PrivCB, FiniCB, false, true));
5710   Builder.restoreIP(AfterIP);
5711   Builder.CreateRetVoid(); // Required at the end of the function
5712   for (auto &Inst : instructions(*F)) {
5713     EXPECT_FALSE(isa<CallInst>(Inst) &&
5714                  cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5715                      "__kmpc_barrier" &&
5716                  "call to function __kmpc_barrier found with nowait");
5717   }
5718 }
5719 
5720 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) {
5721   OpenMPIRBuilder OMPBuilder(*M);
5722   OMPBuilder.initialize();
5723 
5724   IRBuilder<> Builder(BB);
5725 
5726   SmallVector<uint64_t> Mappings = {0, 1};
5727   GlobalVariable *OffloadMaptypesGlobal =
5728       OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes");
5729   EXPECT_FALSE(M->global_empty());
5730   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes");
5731   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5732   EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5733   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5734   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5735   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5736   EXPECT_TRUE(isa<ConstantDataArray>(Initializer));
5737   ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer);
5738   EXPECT_EQ(MappingInit->getNumElements(), Mappings.size());
5739   EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64));
5740   Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings);
5741   EXPECT_EQ(MappingInit, CA);
5742 }
5743 
5744 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) {
5745   OpenMPIRBuilder OMPBuilder(*M);
5746   OMPBuilder.initialize();
5747 
5748   IRBuilder<> Builder(BB);
5749 
5750   uint32_t StrSize;
5751   Constant *Cst1 =
5752       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5753   Constant *Cst2 =
5754       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5755   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5756 
5757   GlobalVariable *OffloadMaptypesGlobal =
5758       OMPBuilder.createOffloadMapnames(Names, "offload_mapnames");
5759   EXPECT_FALSE(M->global_empty());
5760   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames");
5761   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5762   EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5763   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5764   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5765   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5766   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts()));
5767   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts()));
5768 
5769   GlobalVariable *Name1Gbl =
5770       cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts());
5771   EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer()));
5772   ConstantDataArray *Name1GblCA =
5773       dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer());
5774   EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;");
5775 
5776   GlobalVariable *Name2Gbl =
5777       cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts());
5778   EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer()));
5779   ConstantDataArray *Name2GblCA =
5780       dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer());
5781   EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;");
5782 
5783   EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy());
5784   EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size());
5785 }
5786 
5787 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
5788   OpenMPIRBuilder OMPBuilder(*M);
5789   OMPBuilder.initialize();
5790   F->setName("func");
5791   IRBuilder<> Builder(BB);
5792 
5793   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5794 
5795   unsigned TotalNbOperand = 2;
5796 
5797   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5798   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5799                                     F->getEntryBlock().getFirstInsertionPt());
5800   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5801   EXPECT_NE(MapperAllocas.ArgsBase, nullptr);
5802   EXPECT_NE(MapperAllocas.Args, nullptr);
5803   EXPECT_NE(MapperAllocas.ArgSizes, nullptr);
5804   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy());
5805   ArrayType *ArrType =
5806       dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType());
5807   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5808   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
5809                   ->getArrayElementType()
5810                   ->isPointerTy());
5811 
5812   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
5813   ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType());
5814   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5815   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
5816                   ->getArrayElementType()
5817                   ->isPointerTy());
5818 
5819   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
5820   ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType());
5821   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5822   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()
5823                   ->getArrayElementType()
5824                   ->isIntegerTy(64));
5825 }
5826 
5827 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) {
5828   OpenMPIRBuilder OMPBuilder(*M);
5829   OMPBuilder.initialize();
5830   F->setName("func");
5831   IRBuilder<> Builder(BB);
5832   LLVMContext &Ctx = M->getContext();
5833 
5834   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5835 
5836   unsigned TotalNbOperand = 2;
5837 
5838   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5839   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5840                                     F->getEntryBlock().getFirstInsertionPt());
5841   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5842 
5843   auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr(
5844       omp::OMPRTL___tgt_target_data_begin_mapper);
5845 
5846   SmallVector<uint64_t> Flags = {0, 2};
5847 
5848   uint32_t StrSize;
5849   Constant *SrcLocCst =
5850       OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize);
5851   Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize);
5852 
5853   Constant *Cst1 =
5854       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5855   Constant *Cst2 =
5856       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5857   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5858 
5859   GlobalVariable *Maptypes =
5860       OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes");
5861   Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32(
5862       ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes,
5863       /*Idx0=*/0, /*Idx1=*/0);
5864 
5865   GlobalVariable *Mapnames =
5866       OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames");
5867   Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32(
5868       ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames,
5869       /*Idx0=*/0, /*Idx1=*/0);
5870 
5871   OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo,
5872                             MaptypesArg, MapnamesArg, MapperAllocas, -1,
5873                             TotalNbOperand);
5874 
5875   CallInst *MapperCall = dyn_cast<CallInst>(&BB->back());
5876   EXPECT_NE(MapperCall, nullptr);
5877   EXPECT_EQ(MapperCall->arg_size(), 9U);
5878   EXPECT_EQ(MapperCall->getCalledFunction()->getName(),
5879             "__tgt_target_data_begin_mapper");
5880   EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo);
5881   EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64));
5882   EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32));
5883 
5884   EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg);
5885   EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg);
5886   EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy());
5887 }
5888 
5889 TEST_F(OpenMPIRBuilderTest, TargetEnterData) {
5890   OpenMPIRBuilder OMPBuilder(*M);
5891   OMPBuilder.initialize();
5892   F->setName("func");
5893   IRBuilder<> Builder(BB);
5894   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5895 
5896   int64_t DeviceID = 2;
5897 
5898   AllocaInst *Val1 =
5899       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5900   ASSERT_NE(Val1, nullptr);
5901 
5902   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5903                                     F->getEntryBlock().getFirstInsertionPt());
5904 
5905   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5906   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5907   auto GenMapInfoCB =
5908       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5909     // Get map clause information.
5910     Builder.restoreIP(codeGenIP);
5911 
5912     CombinedInfo.BasePointers.emplace_back(Val1);
5913     CombinedInfo.Pointers.emplace_back(Val1);
5914     CombinedInfo.DevicePointers.emplace_back(
5915         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5916     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5917     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1));
5918     uint32_t temp;
5919     CombinedInfo.Names.emplace_back(
5920         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5921     return CombinedInfo;
5922   };
5923 
5924   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5925       /*RequiresDevicePointerInfo=*/false,
5926       /*SeparateBeginEndCalls=*/true);
5927 
5928   OMPBuilder.Config.setIsGPU(true);
5929 
5930   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper;
5931   ASSERT_EXPECTED_INIT(
5932       OpenMPIRBuilder::InsertPointTy, AfterIP,
5933       OMPBuilder.createTargetData(
5934           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5935           /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5936   Builder.restoreIP(AfterIP);
5937 
5938   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5939   EXPECT_NE(TargetDataCall, nullptr);
5940   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5941   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5942             "__tgt_target_data_begin_mapper");
5943   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5944   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5945   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5946 
5947   Builder.CreateRetVoid();
5948   EXPECT_FALSE(verifyModule(*M, &errs()));
5949 }
5950 
5951 TEST_F(OpenMPIRBuilderTest, TargetExitData) {
5952   OpenMPIRBuilder OMPBuilder(*M);
5953   OMPBuilder.initialize();
5954   F->setName("func");
5955   IRBuilder<> Builder(BB);
5956   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5957 
5958   int64_t DeviceID = 2;
5959 
5960   AllocaInst *Val1 =
5961       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5962   ASSERT_NE(Val1, nullptr);
5963 
5964   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5965                                     F->getEntryBlock().getFirstInsertionPt());
5966 
5967   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5968   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5969   auto GenMapInfoCB =
5970       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5971     // Get map clause information.
5972     Builder.restoreIP(codeGenIP);
5973 
5974     CombinedInfo.BasePointers.emplace_back(Val1);
5975     CombinedInfo.Pointers.emplace_back(Val1);
5976     CombinedInfo.DevicePointers.emplace_back(
5977         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5978     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5979     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2));
5980     uint32_t temp;
5981     CombinedInfo.Names.emplace_back(
5982         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5983     return CombinedInfo;
5984   };
5985 
5986   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5987       /*RequiresDevicePointerInfo=*/false,
5988       /*SeparateBeginEndCalls=*/true);
5989 
5990   OMPBuilder.Config.setIsGPU(true);
5991 
5992   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper;
5993   ASSERT_EXPECTED_INIT(
5994       OpenMPIRBuilder::InsertPointTy, AfterIP,
5995       OMPBuilder.createTargetData(
5996           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5997           /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5998   Builder.restoreIP(AfterIP);
5999 
6000   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
6001   EXPECT_NE(TargetDataCall, nullptr);
6002   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6003   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6004             "__tgt_target_data_end_mapper");
6005   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6006   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6007   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6008 
6009   Builder.CreateRetVoid();
6010   EXPECT_FALSE(verifyModule(*M, &errs()));
6011 }
6012 
6013 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
6014   OpenMPIRBuilder OMPBuilder(*M);
6015   OMPBuilder.initialize();
6016   F->setName("func");
6017   IRBuilder<> Builder(BB);
6018   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6019 
6020   int64_t DeviceID = 2;
6021 
6022   AllocaInst *Val1 =
6023       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
6024   ASSERT_NE(Val1, nullptr);
6025 
6026   AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy());
6027   ASSERT_NE(Val2, nullptr);
6028 
6029   AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy());
6030   ASSERT_NE(Val3, nullptr);
6031 
6032   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
6033                                     F->getEntryBlock().getFirstInsertionPt());
6034 
6035   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6036   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
6037   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6038   auto GenMapInfoCB =
6039       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
6040     // Get map clause information.
6041     Builder.restoreIP(codeGenIP);
6042     uint32_t temp;
6043 
6044     CombinedInfo.BasePointers.emplace_back(Val1);
6045     CombinedInfo.Pointers.emplace_back(Val1);
6046     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None);
6047     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
6048     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3));
6049     CombinedInfo.Names.emplace_back(
6050         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6051 
6052     CombinedInfo.BasePointers.emplace_back(Val2);
6053     CombinedInfo.Pointers.emplace_back(Val2);
6054     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
6055     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
6056     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
6057     CombinedInfo.Names.emplace_back(
6058         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6059 
6060     CombinedInfo.BasePointers.emplace_back(Val3);
6061     CombinedInfo.Pointers.emplace_back(Val3);
6062     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address);
6063     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
6064     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
6065     CombinedInfo.Names.emplace_back(
6066         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
6067     return CombinedInfo;
6068   };
6069 
6070   llvm::OpenMPIRBuilder::TargetDataInfo Info(
6071       /*RequiresDevicePointerInfo=*/true,
6072       /*SeparateBeginEndCalls=*/true);
6073 
6074   OMPBuilder.Config.setIsGPU(true);
6075 
6076   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
6077   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
6078     if (BodyGenType == BodyGenTy::Priv) {
6079       EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u);
6080       Builder.restoreIP(CodeGenIP);
6081       CallInst *TargetDataCall =
6082           dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
6083       EXPECT_NE(TargetDataCall, nullptr);
6084       EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6085       EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6086                 "__tgt_target_data_begin_mapper");
6087       EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6088       EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6089       EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6090 
6091       LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode());
6092       EXPECT_NE(LI, nullptr);
6093       StoreInst *SI = dyn_cast<StoreInst>(&BB->back());
6094       EXPECT_NE(SI, nullptr);
6095       EXPECT_EQ(SI->getValueOperand(), LI);
6096       EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second);
6097       EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second));
6098       EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second));
6099       Builder.CreateStore(Builder.getInt32(99), Val1);
6100     }
6101     return Builder.saveIP();
6102   };
6103 
6104   ASSERT_EXPECTED_INIT(
6105       OpenMPIRBuilder::InsertPointTy, TargetDataIP1,
6106       OMPBuilder.createTargetData(
6107           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
6108           /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB));
6109   Builder.restoreIP(TargetDataIP1);
6110 
6111   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
6112   EXPECT_NE(TargetDataCall, nullptr);
6113   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
6114   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
6115             "__tgt_target_data_end_mapper");
6116   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
6117   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
6118   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
6119 
6120   // Check that BodyGenCB is still made when IsTargetDevice is set to true.
6121   OMPBuilder.Config.setIsTargetDevice(true);
6122   bool CheckDevicePassBodyGen = false;
6123   auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
6124     CheckDevicePassBodyGen = true;
6125     Builder.restoreIP(CodeGenIP);
6126     CallInst *TargetDataCall =
6127         dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
6128     // Make sure no begin_mapper call is present for device pass.
6129     EXPECT_EQ(TargetDataCall, nullptr);
6130     return Builder.saveIP();
6131   };
6132   ASSERT_EXPECTED_INIT(
6133       OpenMPIRBuilder::InsertPointTy, TargetDataIP2,
6134       OMPBuilder.createTargetData(
6135           Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
6136           /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB));
6137   Builder.restoreIP(TargetDataIP2);
6138   EXPECT_TRUE(CheckDevicePassBodyGen);
6139 
6140   Builder.CreateRetVoid();
6141   EXPECT_FALSE(verifyModule(*M, &errs()));
6142 }
6143 
6144 namespace {
6145 // Some basic handling of argument mapping for the moment
6146 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder,
6147                            llvm::SmallVectorImpl<llvm::Value *> &Args,
6148                            llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) {
6149   for (auto Arg : Args) {
6150     CombinedInfo.BasePointers.emplace_back(Arg);
6151     CombinedInfo.Pointers.emplace_back(Arg);
6152     uint32_t SrcLocStrSize;
6153     CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr(
6154         "Unknown loc - stub implementation", SrcLocStrSize));
6155     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(
6156         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
6157         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM |
6158         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM));
6159     CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64(
6160         OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType())));
6161   }
6162 }
6163 } // namespace
6164 
6165 TEST_F(OpenMPIRBuilderTest, TargetRegion) {
6166   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6167   OpenMPIRBuilder OMPBuilder(*M);
6168   OMPBuilder.initialize();
6169   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
6170   OMPBuilder.setConfig(Config);
6171   F->setName("func");
6172   IRBuilder<> Builder(BB);
6173   auto Int32Ty = Builder.getInt32Ty();
6174 
6175   AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr");
6176   AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr");
6177   AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr");
6178 
6179   Builder.CreateStore(Builder.getInt32(10), APtr);
6180   Builder.CreateStore(Builder.getInt32(20), BPtr);
6181   auto BodyGenCB = [&](InsertPointTy AllocaIP,
6182                        InsertPointTy CodeGenIP) -> InsertPointTy {
6183     Builder.restoreIP(CodeGenIP);
6184     LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr);
6185     LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr);
6186     Value *Sum = Builder.CreateAdd(AVal, BVal);
6187     Builder.CreateStore(Sum, CPtr);
6188     return Builder.saveIP();
6189   };
6190 
6191   llvm::SmallVector<llvm::Value *> Inputs;
6192   Inputs.push_back(APtr);
6193   Inputs.push_back(BPtr);
6194   Inputs.push_back(CPtr);
6195 
6196   auto SimpleArgAccessorCB =
6197       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6198           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6199           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6200         if (!OMPBuilder.Config.isTargetDevice()) {
6201           RetVal = cast<llvm::Value>(&Arg);
6202           return CodeGenIP;
6203         }
6204 
6205         Builder.restoreIP(AllocaIP);
6206 
6207         llvm::Value *Addr = Builder.CreateAlloca(
6208             Arg.getType()->isPointerTy()
6209                 ? Arg.getType()
6210                 : Type::getInt64Ty(Builder.getContext()),
6211             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6212         llvm::Value *AddrAscast =
6213             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6214         Builder.CreateStore(&Arg, AddrAscast);
6215 
6216         Builder.restoreIP(CodeGenIP);
6217 
6218         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6219 
6220         return Builder.saveIP();
6221       };
6222 
6223   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6224   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6225       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6226     CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos);
6227     return CombinedInfos;
6228   };
6229 
6230   TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
6231   OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
6232   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6233       /*IsSPMD=*/false, /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0},
6234       /*MinThreads=*/0};
6235 
6236   ASSERT_EXPECTED_INIT(
6237       OpenMPIRBuilder::InsertPointTy, AfterIP,
6238       OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
6239                               Builder.saveIP(), EntryInfo, DefaultAttrs, Inputs,
6240                               GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
6241   Builder.restoreIP(AfterIP);
6242   OMPBuilder.finalize();
6243   Builder.CreateRetVoid();
6244 
6245   // Check the kernel launch sequence
6246   auto Iter = F->getEntryBlock().rbegin();
6247   EXPECT_TRUE(isa<BranchInst>(&*(Iter)));
6248   BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter));
6249   EXPECT_TRUE(isa<CmpInst>(&*(++Iter)));
6250   EXPECT_TRUE(isa<CallInst>(&*(++Iter)));
6251   CallInst *Call = dyn_cast<CallInst>(&*(Iter));
6252 
6253   // Check that the kernel launch function is called
6254   Function *KernelLaunchFunc = Call->getCalledFunction();
6255   EXPECT_NE(KernelLaunchFunc, nullptr);
6256   StringRef FunctionName = KernelLaunchFunc->getName();
6257   EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel"));
6258 
6259   // Check the fallback call
6260   BasicBlock *FallbackBlock = Branch->getSuccessor(0);
6261   Iter = FallbackBlock->rbegin();
6262   CallInst *FCall = dyn_cast<CallInst>(&*(++Iter));
6263   // 'F' has a dummy DISubprogram which causes OutlinedFunc to also
6264   // have a DISubprogram. In this case, the call to OutlinedFunc needs
6265   // to have a debug loc, otherwise verifier will complain.
6266   FCall->setDebugLoc(DL);
6267   EXPECT_NE(FCall, nullptr);
6268 
6269   // Check that the correct aguments are passed in
6270   for (auto ArgInput : zip(FCall->args(), Inputs)) {
6271     EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput));
6272   }
6273 
6274   // Check that the outlined function exists with the expected prefix
6275   Function *OutlinedFunc = FCall->getCalledFunction();
6276   EXPECT_NE(OutlinedFunc, nullptr);
6277   StringRef FunctionName2 = OutlinedFunc->getName();
6278   EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading"));
6279 
6280   EXPECT_FALSE(verifyModule(*M, &errs()));
6281 }
6282 
6283 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
6284   OpenMPIRBuilder OMPBuilder(*M);
6285   OMPBuilder.setConfig(
6286       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6287   OMPBuilder.initialize();
6288 
6289   F->setName("func");
6290   IRBuilder<> Builder(BB);
6291   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6292 
6293   LoadInst *Value = nullptr;
6294   StoreInst *TargetStore = nullptr;
6295   llvm::SmallVector<llvm::Value *, 2> CapturedArgs = {
6296       Constant::getNullValue(PointerType::get(Ctx, 0)),
6297       Constant::getNullValue(PointerType::get(Ctx, 0))};
6298 
6299   auto SimpleArgAccessorCB =
6300       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6301           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6302           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6303         if (!OMPBuilder.Config.isTargetDevice()) {
6304           RetVal = cast<llvm::Value>(&Arg);
6305           return CodeGenIP;
6306         }
6307 
6308         Builder.restoreIP(AllocaIP);
6309 
6310         llvm::Value *Addr = Builder.CreateAlloca(
6311             Arg.getType()->isPointerTy()
6312                 ? Arg.getType()
6313                 : Type::getInt64Ty(Builder.getContext()),
6314             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6315         llvm::Value *AddrAscast =
6316             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6317         Builder.CreateStore(&Arg, AddrAscast);
6318 
6319         Builder.restoreIP(CodeGenIP);
6320 
6321         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6322 
6323         return Builder.saveIP();
6324       };
6325 
6326   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6327   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6328       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6329     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6330     return CombinedInfos;
6331   };
6332 
6333   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6334                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6335       -> OpenMPIRBuilder::InsertPointTy {
6336     Builder.restoreIP(CodeGenIP);
6337     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6338     TargetStore = Builder.CreateStore(Value, CapturedArgs[1]);
6339     return Builder.saveIP();
6340   };
6341 
6342   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6343                                    F->getEntryBlock().getFirstInsertionPt());
6344   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6345                                   /*Line=*/3, /*Count=*/0);
6346   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6347       /*IsSPMD=*/false, /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0},
6348       /*MinThreads=*/0};
6349 
6350   ASSERT_EXPECTED_INIT(
6351       OpenMPIRBuilder::InsertPointTy, AfterIP,
6352       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6353                               EntryInfo, DefaultAttrs, CapturedArgs,
6354                               GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
6355   Builder.restoreIP(AfterIP);
6356 
6357   Builder.CreateRetVoid();
6358   OMPBuilder.finalize();
6359 
6360   // Check outlined function
6361   EXPECT_FALSE(verifyModule(*M, &errs()));
6362   EXPECT_NE(TargetStore, nullptr);
6363   Function *OutlinedFn = TargetStore->getFunction();
6364   EXPECT_NE(F, OutlinedFn);
6365 
6366   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6367   // Account for the "implicit" first argument.
6368   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6369   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
6370   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6371   EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy());
6372 
6373   // Check entry block
6374   auto &EntryBlock = OutlinedFn->getEntryBlock();
6375   Instruction *Alloca1 = EntryBlock.getFirstNonPHI();
6376   EXPECT_NE(Alloca1, nullptr);
6377 
6378   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6379   auto *Store1 = Alloca1->getNextNode();
6380   EXPECT_TRUE(isa<StoreInst>(Store1));
6381   auto *Alloca2 = Store1->getNextNode();
6382   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6383   auto *Store2 = Alloca2->getNextNode();
6384   EXPECT_TRUE(isa<StoreInst>(Store2));
6385 
6386   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6387   EXPECT_NE(InitCall, nullptr);
6388   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6389   EXPECT_EQ(InitCall->arg_size(), 2U);
6390   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6391   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6392   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6393   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6394   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6395   auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6396   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6397             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6398   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6399             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6400   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6401             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6402 
6403   auto *EntryBlockBranch = EntryBlock.getTerminator();
6404   EXPECT_NE(EntryBlockBranch, nullptr);
6405   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6406 
6407   // Check user code block
6408   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6409   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6410   auto *Load1 = UserCodeBlock->getFirstNonPHI();
6411   EXPECT_TRUE(isa<LoadInst>(Load1));
6412   auto *Load2 = Load1->getNextNode();
6413   EXPECT_TRUE(isa<LoadInst>(Load2));
6414 
6415   auto *OutlinedBlockBr = Load2->getNextNode();
6416   EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr));
6417 
6418   auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0);
6419   EXPECT_EQ(OutlinedBlock->getName(), "outlined.body");
6420 
6421   auto *Value1 = OutlinedBlock->getFirstNonPHI();
6422   EXPECT_EQ(Value1, Value);
6423   EXPECT_EQ(Value1->getNextNode(), TargetStore);
6424   auto *Deinit = TargetStore->getNextNode();
6425   EXPECT_NE(Deinit, nullptr);
6426 
6427   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6428   EXPECT_NE(DeinitCall, nullptr);
6429   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6430   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6431 
6432   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6433 
6434   // Check exit block
6435   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6436   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6437   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI()));
6438 }
6439 
6440 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
6441   OpenMPIRBuilder OMPBuilder(*M);
6442   OMPBuilder.setConfig(
6443       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6444   OMPBuilder.initialize();
6445 
6446   F->setName("func");
6447   IRBuilder<> Builder(BB);
6448   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6449 
6450   LoadInst *Value = nullptr;
6451   StoreInst *TargetStore = nullptr;
6452   llvm::SmallVector<llvm::Value *, 1> CapturedArgs = {
6453       Constant::getNullValue(PointerType::get(Ctx, 0))};
6454 
6455   auto SimpleArgAccessorCB =
6456       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6457           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6458           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6459         if (!OMPBuilder.Config.isTargetDevice()) {
6460           RetVal = cast<llvm::Value>(&Arg);
6461           return CodeGenIP;
6462         }
6463 
6464         Builder.restoreIP(AllocaIP);
6465 
6466         llvm::Value *Addr = Builder.CreateAlloca(
6467             Arg.getType()->isPointerTy()
6468                 ? Arg.getType()
6469                 : Type::getInt64Ty(Builder.getContext()),
6470             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6471         llvm::Value *AddrAscast =
6472             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6473         Builder.CreateStore(&Arg, AddrAscast);
6474 
6475         Builder.restoreIP(CodeGenIP);
6476 
6477         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6478 
6479         return Builder.saveIP();
6480       };
6481 
6482   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6483   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6484       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6485     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6486     return CombinedInfos;
6487   };
6488 
6489   llvm::Value *RaiseAlloca = nullptr;
6490 
6491   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6492                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6493       -> OpenMPIRBuilder::InsertPointTy {
6494     Builder.restoreIP(CodeGenIP);
6495     RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty());
6496     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6497     TargetStore = Builder.CreateStore(Value, RaiseAlloca);
6498     return Builder.saveIP();
6499   };
6500 
6501   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6502                                    F->getEntryBlock().getFirstInsertionPt());
6503   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6504                                   /*Line=*/3, /*Count=*/0);
6505   OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
6506       /*IsSPMD=*/false, /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0},
6507       /*MinThreads=*/0};
6508 
6509   ASSERT_EXPECTED_INIT(
6510       OpenMPIRBuilder::InsertPointTy, AfterIP,
6511       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6512                               EntryInfo, DefaultAttrs, CapturedArgs,
6513                               GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
6514   Builder.restoreIP(AfterIP);
6515 
6516   Builder.CreateRetVoid();
6517   OMPBuilder.finalize();
6518 
6519   // Check outlined function
6520   EXPECT_FALSE(verifyModule(*M, &errs()));
6521   EXPECT_NE(TargetStore, nullptr);
6522   Function *OutlinedFn = TargetStore->getFunction();
6523   EXPECT_NE(F, OutlinedFn);
6524 
6525   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6526   // Account for the "implicit" first argument.
6527   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6528   EXPECT_EQ(OutlinedFn->arg_size(), 2U);
6529   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6530 
6531   // Check entry block, to see if we have raised our alloca
6532   // from the body to the entry block.
6533   auto &EntryBlock = OutlinedFn->getEntryBlock();
6534 
6535   // Check that we have moved our alloca created in the
6536   // BodyGenCB function, to the top of the function.
6537   Instruction *Alloca1 = EntryBlock.getFirstNonPHI();
6538   EXPECT_NE(Alloca1, nullptr);
6539   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6540   EXPECT_EQ(Alloca1, RaiseAlloca);
6541 
6542   // Verify we have not altered the rest of the function
6543   // inappropriately with our alloca movement.
6544   auto *Alloca2 = Alloca1->getNextNode();
6545   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6546   auto *Store2 = Alloca2->getNextNode();
6547   EXPECT_TRUE(isa<StoreInst>(Store2));
6548 
6549   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6550   EXPECT_NE(InitCall, nullptr);
6551   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6552   EXPECT_EQ(InitCall->arg_size(), 2U);
6553   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6554   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6555   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6556   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6557   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6558   auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6559   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6560             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6561   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6562             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6563   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6564             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6565 
6566   auto *EntryBlockBranch = EntryBlock.getTerminator();
6567   EXPECT_NE(EntryBlockBranch, nullptr);
6568   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6569 
6570   // Check user code block
6571   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6572   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6573   auto *Load1 = UserCodeBlock->getFirstNonPHI();
6574   EXPECT_TRUE(isa<LoadInst>(Load1));
6575 
6576   auto *OutlinedBlockBr = Load1->getNextNode();
6577   EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr));
6578 
6579   auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0);
6580   EXPECT_EQ(OutlinedBlock->getName(), "outlined.body");
6581 
6582   auto *Load2 = OutlinedBlock->getFirstNonPHI();
6583   EXPECT_TRUE(isa<LoadInst>(Load2));
6584   EXPECT_EQ(Load2, Value);
6585   EXPECT_EQ(Load2->getNextNode(), TargetStore);
6586   auto *Deinit = TargetStore->getNextNode();
6587   EXPECT_NE(Deinit, nullptr);
6588 
6589   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6590   EXPECT_NE(DeinitCall, nullptr);
6591   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6592   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6593 
6594   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6595 
6596   // Check exit block
6597   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6598   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6599   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI()));
6600 }
6601 
6602 TEST_F(OpenMPIRBuilderTest, CreateTask) {
6603   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6604   OpenMPIRBuilder OMPBuilder(*M);
6605   OMPBuilder.Config.IsTargetDevice = false;
6606   OMPBuilder.initialize();
6607   F->setName("func");
6608   IRBuilder<> Builder(BB);
6609 
6610   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
6611   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
6612   Value *Val128 =
6613       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
6614 
6615   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6616     Builder.restoreIP(AllocaIP);
6617     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
6618                                                 "bodygen.alloca128");
6619 
6620     Builder.restoreIP(CodeGenIP);
6621     // Loading and storing captured pointer and values
6622     Builder.CreateStore(Val128, Local128);
6623     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
6624                                       "bodygen.load32");
6625 
6626     LoadInst *PrivLoad128 = Builder.CreateLoad(
6627         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
6628     Value *Cmp = Builder.CreateICmpNE(
6629         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
6630     Instruction *ThenTerm, *ElseTerm;
6631     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
6632                                   &ThenTerm, &ElseTerm);
6633     return Error::success();
6634   };
6635 
6636   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6637   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6638   OpenMPIRBuilder::LocationDescription Loc(
6639       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6640   ASSERT_EXPECTED_INIT(
6641       OpenMPIRBuilder::InsertPointTy, AfterIP,
6642       OMPBuilder.createTask(
6643           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6644           BodyGenCB));
6645   Builder.restoreIP(AfterIP);
6646   OMPBuilder.finalize();
6647   Builder.CreateRetVoid();
6648 
6649   EXPECT_FALSE(verifyModule(*M, &errs()));
6650 
6651   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6652       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6653           ->user_back());
6654 
6655   // Verify the Ident argument
6656   GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0));
6657   ASSERT_NE(Ident, nullptr);
6658   EXPECT_TRUE(Ident->hasInitializer());
6659   Constant *Initializer = Ident->getInitializer();
6660   GlobalVariable *SrcStrGlob =
6661       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
6662   ASSERT_NE(SrcStrGlob, nullptr);
6663   ConstantDataArray *SrcSrc =
6664       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
6665   ASSERT_NE(SrcSrc, nullptr);
6666 
6667   // Verify the num_threads argument.
6668   CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1));
6669   ASSERT_NE(GTID, nullptr);
6670   EXPECT_EQ(GTID->arg_size(), 1U);
6671   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
6672 
6673   // Verify the flags
6674   // TODO: Check for others flags. Currently testing only for tiedness.
6675   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
6676   ASSERT_NE(Flags, nullptr);
6677   EXPECT_EQ(Flags->getSExtValue(), 1);
6678 
6679   // Verify the data size
6680   ConstantInt *DataSize =
6681       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
6682   ASSERT_NE(DataSize, nullptr);
6683   EXPECT_EQ(DataSize->getSExtValue(), 40);
6684 
6685   ConstantInt *SharedsSize =
6686       dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4));
6687   EXPECT_EQ(SharedsSize->getSExtValue(),
6688             24); // 64-bit pointer + 128-bit integer
6689 
6690   // Verify Wrapper function
6691   Function *OutlinedFn =
6692       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
6693   ASSERT_NE(OutlinedFn, nullptr);
6694 
6695   LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin());
6696   ASSERT_NE(SharedsLoad, nullptr);
6697   EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1));
6698 
6699   EXPECT_FALSE(OutlinedFn->isDeclaration());
6700   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty());
6701 
6702   // Verify that the data argument is used only once, and that too in the load
6703   // instruction that is then used for accessing shared data.
6704   Value *DataPtr = OutlinedFn->getArg(1);
6705   EXPECT_EQ(DataPtr->getNumUses(), 1U);
6706   EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser()));
6707   Value *Data = DataPtr->uses().begin()->getUser();
6708   EXPECT_TRUE(all_of(Data->uses(), [](Use &U) {
6709     return isa<GetElementPtrInst>(U.getUser());
6710   }));
6711 
6712   // Verify the presence of `trunc` and `icmp` instructions in Outlined function
6713   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6714                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
6715   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6716                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
6717 
6718   // Verify the execution of the task
6719   CallInst *TaskCall = dyn_cast<CallInst>(
6720       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
6721           ->user_back());
6722   ASSERT_NE(TaskCall, nullptr);
6723   EXPECT_EQ(TaskCall->getArgOperand(0), Ident);
6724   EXPECT_EQ(TaskCall->getArgOperand(1), GTID);
6725   EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall);
6726 
6727   // Verify that the argument data has been copied
6728   for (User *in : TaskAllocCall->users()) {
6729     if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) {
6730       EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall);
6731     }
6732   }
6733 }
6734 
6735 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
6736   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6737   OpenMPIRBuilder OMPBuilder(*M);
6738   OMPBuilder.Config.IsTargetDevice = false;
6739   OMPBuilder.initialize();
6740   F->setName("func");
6741   IRBuilder<> Builder(BB);
6742 
6743   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6744     return Error::success();
6745   };
6746 
6747   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6748   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6749   OpenMPIRBuilder::LocationDescription Loc(
6750       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6751   ASSERT_EXPECTED_INIT(
6752       OpenMPIRBuilder::InsertPointTy, AfterIP,
6753       OMPBuilder.createTask(
6754           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6755           BodyGenCB));
6756   Builder.restoreIP(AfterIP);
6757   OMPBuilder.finalize();
6758   Builder.CreateRetVoid();
6759 
6760   EXPECT_FALSE(verifyModule(*M, &errs()));
6761 
6762   // Check that the outlined function has only one argument.
6763   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6764       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6765           ->user_back());
6766   Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5));
6767   ASSERT_NE(OutlinedFn, nullptr);
6768   ASSERT_EQ(OutlinedFn->arg_size(), 1U);
6769 }
6770 
6771 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
6772   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6773   OpenMPIRBuilder OMPBuilder(*M);
6774   OMPBuilder.Config.IsTargetDevice = false;
6775   OMPBuilder.initialize();
6776   F->setName("func");
6777   IRBuilder<> Builder(BB);
6778   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6779     return Error::success();
6780   };
6781   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6782   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6783   OpenMPIRBuilder::LocationDescription Loc(
6784       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6785   ASSERT_EXPECTED_INIT(
6786       OpenMPIRBuilder::InsertPointTy, AfterIP,
6787       OMPBuilder.createTask(
6788           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6789           BodyGenCB,
6790           /*Tied=*/false));
6791   Builder.restoreIP(AfterIP);
6792   OMPBuilder.finalize();
6793   Builder.CreateRetVoid();
6794 
6795   // Check for the `Tied` argument
6796   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6797       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6798           ->user_back());
6799   ASSERT_NE(TaskAllocCall, nullptr);
6800   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
6801   ASSERT_NE(Flags, nullptr);
6802   EXPECT_EQ(Flags->getZExtValue() & 1U, 0U);
6803 
6804   EXPECT_FALSE(verifyModule(*M, &errs()));
6805 }
6806 
6807 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
6808   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6809   OpenMPIRBuilder OMPBuilder(*M);
6810   OMPBuilder.Config.IsTargetDevice = false;
6811   OMPBuilder.initialize();
6812   F->setName("func");
6813   IRBuilder<> Builder(BB);
6814   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6815     return Error::success();
6816   };
6817   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6818   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6819   OpenMPIRBuilder::LocationDescription Loc(
6820       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6821   AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext()));
6822   SmallVector<OpenMPIRBuilder::DependData> DDS;
6823   {
6824     OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn,
6825                                      Type::getInt32Ty(M->getContext()), InDep);
6826     DDS.push_back(DDIn);
6827   }
6828   ASSERT_EXPECTED_INIT(
6829       OpenMPIRBuilder::InsertPointTy, AfterIP,
6830       OMPBuilder.createTask(
6831           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6832           BodyGenCB,
6833           /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS));
6834   Builder.restoreIP(AfterIP);
6835   OMPBuilder.finalize();
6836   Builder.CreateRetVoid();
6837 
6838   // Check for the `NumDeps` argument
6839   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6840       OMPBuilder
6841           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps)
6842           ->user_back());
6843   ASSERT_NE(TaskAllocCall, nullptr);
6844   ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
6845   ASSERT_NE(NumDeps, nullptr);
6846   EXPECT_EQ(NumDeps->getZExtValue(), 1U);
6847 
6848   // Check for the `DepInfo` array argument
6849   AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4));
6850   ASSERT_NE(DepArray, nullptr);
6851   Value::user_iterator DepArrayI = DepArray->user_begin();
6852   ++DepArrayI;
6853   Value::user_iterator DepInfoI = DepArrayI->user_begin();
6854   // Check for the `DependKind` flag in the `DepInfo` array
6855   Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI);
6856   ASSERT_NE(Flag, nullptr);
6857   ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag);
6858   ASSERT_NE(FlagInt, nullptr);
6859   EXPECT_EQ(FlagInt->getZExtValue(),
6860             static_cast<unsigned int>(RTLDependenceKindTy::DepIn));
6861   ++DepInfoI;
6862   // Check for the size in the `DepInfo` array
6863   Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI);
6864   ASSERT_NE(Size, nullptr);
6865   ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size);
6866   ASSERT_NE(SizeInt, nullptr);
6867   EXPECT_EQ(SizeInt->getZExtValue(), 4U);
6868   ++DepInfoI;
6869   // Check for the variable address in the `DepInfo` array
6870   Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI);
6871   ASSERT_NE(AddrStored, nullptr);
6872   PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored);
6873   ASSERT_NE(AddrInt, nullptr);
6874   Value *Addr = AddrInt->getPointerOperand();
6875   EXPECT_EQ(Addr, InDep);
6876 
6877   ConstantInt *NumDepsNoAlias =
6878       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5));
6879   ASSERT_NE(NumDepsNoAlias, nullptr);
6880   EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U);
6881   EXPECT_EQ(TaskAllocCall->getOperand(6),
6882             ConstantPointerNull::get(PointerType::getUnqual(M->getContext())));
6883 
6884   EXPECT_FALSE(verifyModule(*M, &errs()));
6885 }
6886 
6887 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
6888   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6889   OpenMPIRBuilder OMPBuilder(*M);
6890   OMPBuilder.Config.IsTargetDevice = false;
6891   OMPBuilder.initialize();
6892   F->setName("func");
6893   IRBuilder<> Builder(BB);
6894   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6895     return Error::success();
6896   };
6897   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6898   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
6899   Builder.SetInsertPoint(BodyBB);
6900   Value *Final = Builder.CreateICmp(
6901       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
6902       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
6903   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
6904   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
6905                        OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
6906                                              /*Tied=*/false, Final));
6907   Builder.restoreIP(AfterIP);
6908   OMPBuilder.finalize();
6909   Builder.CreateRetVoid();
6910 
6911   // Check for the `Tied` argument
6912   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6913       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6914           ->user_back());
6915   ASSERT_NE(TaskAllocCall, nullptr);
6916   BinaryOperator *OrInst =
6917       dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2));
6918   ASSERT_NE(OrInst, nullptr);
6919   EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or);
6920 
6921   // One of the arguments to `or` instruction is the tied flag, which is equal
6922   // to zero.
6923   EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) {
6924     if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op))
6925       return TiedValue->getSExtValue() == 0;
6926     return false;
6927   }));
6928 
6929   // One of the arguments to `or` instruction is the final condition.
6930   EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) {
6931     if (SelectInst *Select = dyn_cast<SelectInst>(op)) {
6932       ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue());
6933       ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue());
6934       if (!TrueValue || !FalseValue)
6935         return false;
6936       return Select->getCondition() == Final &&
6937              TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0;
6938     }
6939     return false;
6940   }));
6941 
6942   EXPECT_FALSE(verifyModule(*M, &errs()));
6943 }
6944 
6945 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
6946   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6947   OpenMPIRBuilder OMPBuilder(*M);
6948   OMPBuilder.Config.IsTargetDevice = false;
6949   OMPBuilder.initialize();
6950   F->setName("func");
6951   IRBuilder<> Builder(BB);
6952   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6953     return Error::success();
6954   };
6955   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6956   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
6957   Builder.SetInsertPoint(BodyBB);
6958   Value *IfCondition = Builder.CreateICmp(
6959       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
6960       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
6961   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
6962   ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
6963                        OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
6964                                              /*Tied=*/false, /*Final=*/nullptr,
6965                                              IfCondition));
6966   Builder.restoreIP(AfterIP);
6967   OMPBuilder.finalize();
6968   Builder.CreateRetVoid();
6969 
6970   EXPECT_FALSE(verifyModule(*M, &errs()));
6971 
6972   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6973       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6974           ->user_back());
6975   ASSERT_NE(TaskAllocCall, nullptr);
6976 
6977   // Check the branching is based on the if condition argument.
6978   BranchInst *IfConditionBranchInst =
6979       dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator());
6980   ASSERT_NE(IfConditionBranchInst, nullptr);
6981   ASSERT_TRUE(IfConditionBranchInst->isConditional());
6982   EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition);
6983 
6984   // Check that the `__kmpc_omp_task` executes only in the then branch.
6985   CallInst *TaskCall = dyn_cast<CallInst>(
6986       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
6987           ->user_back());
6988   ASSERT_NE(TaskCall, nullptr);
6989   EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0));
6990 
6991   // Check that the OpenMP Runtime Functions specific to `if` clause execute
6992   // only in the else branch. Also check that the function call is between the
6993   // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls.
6994   CallInst *TaskBeginIfCall = dyn_cast<CallInst>(
6995       OMPBuilder
6996           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0)
6997           ->user_back());
6998   CallInst *TaskCompleteCall = dyn_cast<CallInst>(
6999       OMPBuilder
7000           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0)
7001           ->user_back());
7002   ASSERT_NE(TaskBeginIfCall, nullptr);
7003   ASSERT_NE(TaskCompleteCall, nullptr);
7004   Function *OulinedFn =
7005       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
7006   ASSERT_NE(OulinedFn, nullptr);
7007   CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back());
7008   ASSERT_NE(OulinedFnCall, nullptr);
7009   EXPECT_EQ(TaskBeginIfCall->getParent(),
7010             IfConditionBranchInst->getSuccessor(1));
7011 
7012   EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall);
7013   EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall);
7014 }
7015 
7016 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
7017   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7018   OpenMPIRBuilder OMPBuilder(*M);
7019   OMPBuilder.initialize();
7020   F->setName("func");
7021   IRBuilder<> Builder(BB);
7022 
7023   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
7024   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
7025   Value *Val128 =
7026       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
7027   Instruction *ThenTerm, *ElseTerm;
7028 
7029   Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp;
7030 
7031   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7032     Builder.restoreIP(AllocaIP);
7033     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
7034                                                 "bodygen.alloca128");
7035 
7036     Builder.restoreIP(CodeGenIP);
7037     // Loading and storing captured pointer and values
7038     InternalStoreInst = Builder.CreateStore(Val128, Local128);
7039     InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
7040                                         "bodygen.load32");
7041 
7042     InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128,
7043                                          "bodygen.local.load128");
7044     InternalIfCmp = Builder.CreateICmpNE(
7045         InternalLoad32,
7046         Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType()));
7047     SplitBlockAndInsertIfThenElse(InternalIfCmp,
7048                                   CodeGenIP.getBlock()->getTerminator(),
7049                                   &ThenTerm, &ElseTerm);
7050     return Error::success();
7051   };
7052 
7053   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7054   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7055   OpenMPIRBuilder::LocationDescription Loc(
7056       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7057   ASSERT_EXPECTED_INIT(
7058       OpenMPIRBuilder::InsertPointTy, AfterIP,
7059       OMPBuilder.createTaskgroup(
7060           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7061           BodyGenCB));
7062   Builder.restoreIP(AfterIP);
7063   OMPBuilder.finalize();
7064   Builder.CreateRetVoid();
7065 
7066   EXPECT_FALSE(verifyModule(*M, &errs()));
7067 
7068   CallInst *TaskgroupCall = dyn_cast<CallInst>(
7069       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
7070           ->user_back());
7071   ASSERT_NE(TaskgroupCall, nullptr);
7072   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
7073       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
7074           ->user_back());
7075   ASSERT_NE(EndTaskgroupCall, nullptr);
7076 
7077   // Verify the Ident argument
7078   GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0));
7079   ASSERT_NE(Ident, nullptr);
7080   EXPECT_TRUE(Ident->hasInitializer());
7081   Constant *Initializer = Ident->getInitializer();
7082   GlobalVariable *SrcStrGlob =
7083       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
7084   ASSERT_NE(SrcStrGlob, nullptr);
7085   ConstantDataArray *SrcSrc =
7086       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
7087   ASSERT_NE(SrcSrc, nullptr);
7088 
7089   // Verify the num_threads argument.
7090   CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1));
7091   ASSERT_NE(GTID, nullptr);
7092   EXPECT_EQ(GTID->arg_size(), 1U);
7093   EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr(
7094                                            OMPRTL___kmpc_global_thread_num));
7095 
7096   // Checking the general structure of the IR generated is same as expected.
7097   Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction();
7098   EXPECT_EQ(GeneratedStoreInst, InternalStoreInst);
7099   Instruction *GeneratedLoad32 =
7100       GeneratedStoreInst->getNextNonDebugInstruction();
7101   EXPECT_EQ(GeneratedLoad32, InternalLoad32);
7102   Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction();
7103   EXPECT_EQ(GeneratedLoad128, InternalLoad128);
7104 
7105   // Checking the ordering because of the if statements and that
7106   // `__kmp_end_taskgroup` call is after the if branching.
7107   BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(),
7108                             ThenTerm->getSuccessor(0),
7109                             EndTaskgroupCall->getParent(),
7110                             ElseTerm->getParent()};
7111   verifyDFSOrder(F, RefOrder);
7112 }
7113 
7114 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
7115   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
7116   OpenMPIRBuilder OMPBuilder(*M);
7117   OMPBuilder.Config.IsTargetDevice = false;
7118   OMPBuilder.initialize();
7119   F->setName("func");
7120   IRBuilder<> Builder(BB);
7121 
7122   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7123     Builder.restoreIP(AllocaIP);
7124     AllocaInst *Alloca32 =
7125         Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32");
7126     AllocaInst *Alloca64 =
7127         Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64");
7128     Builder.restoreIP(CodeGenIP);
7129     auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7130       Builder.restoreIP(CodeGenIP);
7131       LoadInst *LoadValue =
7132           Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64);
7133       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64));
7134       Builder.CreateStore(AddInst, Alloca64);
7135       return Error::success();
7136     };
7137     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
7138     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1,
7139                          OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1));
7140     Builder.restoreIP(TaskIP1);
7141 
7142     auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7143       Builder.restoreIP(CodeGenIP);
7144       LoadInst *LoadValue =
7145           Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32);
7146       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32));
7147       Builder.CreateStore(AddInst, Alloca32);
7148       return Error::success();
7149     };
7150     OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL);
7151     ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2,
7152                          OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2));
7153     Builder.restoreIP(TaskIP2);
7154   };
7155 
7156   BasicBlock *AllocaBB = Builder.GetInsertBlock();
7157   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
7158   OpenMPIRBuilder::LocationDescription Loc(
7159       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
7160   ASSERT_EXPECTED_INIT(
7161       OpenMPIRBuilder::InsertPointTy, AfterIP,
7162       OMPBuilder.createTaskgroup(
7163           Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
7164           BODYGENCB_WRAPPER(BodyGenCB)));
7165   Builder.restoreIP(AfterIP);
7166   OMPBuilder.finalize();
7167   Builder.CreateRetVoid();
7168 
7169   EXPECT_FALSE(verifyModule(*M, &errs()));
7170 
7171   CallInst *TaskgroupCall = dyn_cast<CallInst>(
7172       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
7173           ->user_back());
7174   ASSERT_NE(TaskgroupCall, nullptr);
7175   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
7176       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
7177           ->user_back());
7178   ASSERT_NE(EndTaskgroupCall, nullptr);
7179 
7180   Function *TaskAllocFn =
7181       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
7182   ASSERT_EQ(TaskAllocFn->getNumUses(), 2u);
7183 
7184   CallInst *FirstTaskAllocCall =
7185       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin());
7186   CallInst *SecondTaskAllocCall =
7187       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++);
7188   ASSERT_NE(FirstTaskAllocCall, nullptr);
7189   ASSERT_NE(SecondTaskAllocCall, nullptr);
7190 
7191   // Verify that the tasks have been generated in order and inside taskgroup
7192   // construct.
7193   BasicBlock *RefOrder[] = {
7194       TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(),
7195       SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()};
7196   verifyDFSOrder(F, RefOrder);
7197 }
7198 
7199 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
7200   OpenMPIRBuilder OMPBuilder(*M);
7201   OMPBuilder.initialize();
7202 
7203   IRBuilder<> Builder(BB);
7204 
7205   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
7206   OpenMPIRBuilder::TargetDataInfo Info(true, false);
7207 
7208   auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext());
7209   auto Int64PtrTy = PointerType::getUnqual(Builder.getContext());
7210 
7211   Info.RTArgs.BasePointersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7212   Info.RTArgs.PointersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7213   Info.RTArgs.SizesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7214   Info.RTArgs.MapTypesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7215   Info.RTArgs.MapNamesArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7216   Info.RTArgs.MappersArray = ConstantPointerNull::get(Builder.getPtrTy(0));
7217   Info.NumberOfPtrs = 4;
7218   Info.EmitDebug = false;
7219   OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
7220 
7221   EXPECT_NE(RTArgs.BasePointersArray, nullptr);
7222   EXPECT_NE(RTArgs.PointersArray, nullptr);
7223   EXPECT_NE(RTArgs.SizesArray, nullptr);
7224   EXPECT_NE(RTArgs.MapTypesArray, nullptr);
7225   EXPECT_NE(RTArgs.MappersArray, nullptr);
7226   EXPECT_NE(RTArgs.MapNamesArray, nullptr);
7227   EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr);
7228 
7229   EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy);
7230   EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy);
7231   EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy);
7232   EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy);
7233   EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy);
7234   EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy);
7235 }
7236 
7237 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) {
7238   OpenMPIRBuilder OMPBuilder(*M);
7239   OMPBuilder.setConfig(
7240       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
7241   OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager;
7242   TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0);
7243   InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0);
7244   EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo));
7245   InfoManager.initializeDeviceGlobalVarEntryInfo(
7246       "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0);
7247   InfoManager.registerTargetRegionEntryInfo(
7248       EntryInfo, nullptr, nullptr,
7249       OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7250   InfoManager.registerDeviceGlobalVarEntryInfo(
7251       "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
7252       GlobalValue::WeakAnyLinkage);
7253   EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar"));
7254 }
7255 
7256 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they
7257 // call each other (recursively in some cases). The test case test these
7258 // functions by utilising them for host code generation for declare target
7259 // global variables
7260 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) {
7261   OpenMPIRBuilder OMPBuilder(*M);
7262   OMPBuilder.initialize();
7263   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
7264   OMPBuilder.setConfig(Config);
7265 
7266   std::vector<llvm::Triple> TargetTriple;
7267   TargetTriple.emplace_back("amdgcn-amd-amdhsa");
7268 
7269   TargetRegionEntryInfo EntryInfo("", 42, 4711, 17);
7270   std::vector<GlobalVariable *> RefsGathered;
7271 
7272   std::vector<Constant *> Globals;
7273   auto *IntTy = Type::getInt32Ty(Ctx);
7274   for (int I = 0; I < 2; ++I) {
7275     Globals.push_back(M->getOrInsertGlobal(
7276         "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * {
7277           return new GlobalVariable(
7278               *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage,
7279               ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I));
7280         }));
7281   }
7282 
7283   OMPBuilder.registerTargetGlobalVariable(
7284       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
7285       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
7286       EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple,
7287       nullptr, nullptr, Globals[0]->getType(), Globals[0]);
7288 
7289   OMPBuilder.registerTargetGlobalVariable(
7290       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink,
7291       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
7292       EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple,
7293       nullptr, nullptr, Globals[1]->getType(), Globals[1]);
7294 
7295   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn =
7296       [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
7297          const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
7298     // If this is invoked, then we want to emit an error, even if it is not
7299     // neccesarily the most readable, as something has went wrong. The
7300     // test-suite unfortunately eats up all error output
7301     ASSERT_EQ(Kind, Kind);
7302   };
7303 
7304   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn);
7305 
7306   // Clauses for data_int_0 with To + Any clauses for the host
7307   std::vector<GlobalVariable *> OffloadEntries;
7308   OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name"));
7309   OffloadEntries.push_back(
7310       M->getNamedGlobal(".offloading.entry.test_data_int_0"));
7311 
7312   // Clauses for data_int_1 with Link + Any clauses for the host
7313   OffloadEntries.push_back(
7314       M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr"));
7315   OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name.1"));
7316   OffloadEntries.push_back(
7317       M->getNamedGlobal(".offloading.entry.test_data_int_1_decl_tgt_ref_ptr"));
7318 
7319   for (unsigned I = 0; I < OffloadEntries.size(); ++I)
7320     EXPECT_NE(OffloadEntries[I], nullptr);
7321 
7322   // Metadata generated for the host offload module
7323   NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info");
7324   ASSERT_THAT(OffloadMetadata, testing::NotNull());
7325   StringRef Nodes[2] = {
7326       cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1))
7327           ->getString(),
7328       cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1))
7329           ->getString()};
7330   EXPECT_THAT(
7331       Nodes, testing::UnorderedElementsAre("test_data_int_0",
7332                                            "test_data_int_1_decl_tgt_ref_ptr"));
7333 }
7334 
7335 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) {
7336   OpenMPIRBuilder OMPBuilder(*M);
7337   OMPBuilder.initialize();
7338   OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true,
7339                                /* IsGPU = */ true,
7340                                /* OpenMPOffloadMandatory = */ false,
7341                                /* HasRequiresReverseOffload = */ false,
7342                                /* HasRequiresUnifiedAddress = */ false,
7343                                /* HasRequiresUnifiedSharedMemory = */ false,
7344                                /* HasRequiresDynamicAllocators = */ false);
7345   OMPBuilder.setConfig(Config);
7346 
7347   FunctionCallee FnTypeAndCallee =
7348       M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx));
7349 
7350   auto *Fn = cast<Function>(FnTypeAndCallee.getCallee());
7351   OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn,
7352                                 /* Size = */ 0,
7353                                 /* Flags = */ 0, GlobalValue::WeakAnyLinkage);
7354 
7355   // Check nvvm.annotations only created for GPU kernels
7356   NamedMDNode *MD = M->getNamedMetadata("nvvm.annotations");
7357   EXPECT_NE(MD, nullptr);
7358   EXPECT_EQ(MD->getNumOperands(), 1u);
7359 
7360   MDNode *Annotations = MD->getOperand(0);
7361   EXPECT_EQ(Annotations->getNumOperands(), 3u);
7362 
7363   Constant *ConstVal =
7364       dyn_cast<ConstantAsMetadata>(Annotations->getOperand(0))->getValue();
7365   EXPECT_TRUE(isa<Function>(Fn));
7366   EXPECT_EQ(ConstVal, cast<Function>(Fn));
7367 
7368   EXPECT_TRUE(Annotations->getOperand(1).equalsStr("kernel"));
7369 
7370   EXPECT_TRUE(mdconst::hasa<ConstantInt>(Annotations->getOperand(2)));
7371   APInt IntVal =
7372       mdconst::extract<ConstantInt>(Annotations->getOperand(2))->getValue();
7373   EXPECT_EQ(IntVal, 1);
7374 
7375   // Check kernel attributes
7376   EXPECT_TRUE(Fn->hasFnAttribute("kernel"));
7377   EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress));
7378 }
7379 
7380 } // namespace
7381