xref: /llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (revision 64422cf826354ee1d586c2484ec72d66db898e75)
1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/OpenMP/OMPConstants.h"
10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
12 #include "llvm/IR/BasicBlock.h"
13 #include "llvm/IR/DIBuilder.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/InstIterator.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/Verifier.h"
20 #include "llvm/Passes/PassBuilder.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
23 #include "gmock/gmock.h"
24 #include "gtest/gtest.h"
25 #include <optional>
26 
27 using namespace llvm;
28 using namespace omp;
29 
30 namespace {
31 
32 /// Create an instruction that uses the values in \p Values. We use "printf"
33 /// just because it is often used for this purpose in test code, but it is never
34 /// executed here.
35 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
36                                   ArrayRef<Value *> Values) {
37   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
38 
39   GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
40   Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
41   Constant *Indices[] = {Zero, Zero};
42   Constant *FormatStrConst =
43       ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
44 
45   Function *PrintfDecl = M->getFunction("printf");
46   if (!PrintfDecl) {
47     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
48     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
49     PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
50   }
51 
52   SmallVector<Value *, 4> Args;
53   Args.push_back(FormatStrConst);
54   Args.append(Values.begin(), Values.end());
55   return Builder.CreateCall(PrintfDecl, Args);
56 }
57 
58 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
59 /// order the control flow of \p F.
60 ///
61 /// This is an easy way to verify the branching structure of the CFG without
62 /// checking every branch instruction individually. For the CFG of a
63 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
64 /// the body, i.e. the DFS order corresponds to the execution order with one
65 /// loop iteration.
66 static testing::AssertionResult
67 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
68   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
69   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
70 
71   df_iterator_default_set<BasicBlock *, 16> Visited;
72   auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
73 
74   BasicBlock *Prev = nullptr;
75   for (BasicBlock *BB : DFS) {
76     if (It != E && BB == *It) {
77       Prev = *It;
78       ++It;
79     }
80   }
81 
82   if (It == E)
83     return testing::AssertionSuccess();
84   if (!Prev)
85     return testing::AssertionFailure()
86            << "Did not find " << (*It)->getName() << " in control flow";
87   return testing::AssertionFailure()
88          << "Expected " << Prev->getName() << " before " << (*It)->getName()
89          << " in control flow";
90 }
91 
92 /// Verify that blocks in \p RefOrder are in the same relative order in the
93 /// linked lists of blocks in \p F. The linked list may contain additional
94 /// blocks in-between.
95 ///
96 /// While the order in the linked list is not relevant for semantics, keeping
97 /// the order roughly in execution order makes its printout easier to read.
98 static testing::AssertionResult
99 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
100   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
101   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
102 
103   BasicBlock *Prev = nullptr;
104   for (BasicBlock &BB : *F) {
105     if (It != E && &BB == *It) {
106       Prev = *It;
107       ++It;
108     }
109   }
110 
111   if (It == E)
112     return testing::AssertionSuccess();
113   if (!Prev)
114     return testing::AssertionFailure() << "Did not find " << (*It)->getName()
115                                        << " in function " << F->getName();
116   return testing::AssertionFailure()
117          << "Expected " << Prev->getName() << " before " << (*It)->getName()
118          << " in function " << F->getName();
119 }
120 
121 /// Populate Calls with call instructions calling the function with the given
122 /// FnID from the given function F.
123 static void findCalls(Function *F, omp::RuntimeFunction FnID,
124                       OpenMPIRBuilder &OMPBuilder,
125                       SmallVectorImpl<CallInst *> &Calls) {
126   Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID);
127   for (BasicBlock &BB : *F) {
128     for (Instruction &I : BB) {
129       auto *Call = dyn_cast<CallInst>(&I);
130       if (Call && Call->getCalledFunction() == Fn)
131         Calls.push_back(Call);
132     }
133   }
134 }
135 
136 /// Assuming \p F contains only one call to the function with the given \p FnID,
137 /// return that call.
138 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID,
139                                 OpenMPIRBuilder &OMPBuilder) {
140   SmallVector<CallInst *, 1> Calls;
141   findCalls(F, FnID, OMPBuilder, Calls);
142   EXPECT_EQ(1u, Calls.size());
143   if (Calls.size() != 1)
144     return nullptr;
145   return Calls.front();
146 }
147 
148 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
149   switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
150   case omp::OMPScheduleType::BaseDynamicChunked:
151     return omp::OMP_SCHEDULE_Dynamic;
152   case omp::OMPScheduleType::BaseGuidedChunked:
153     return omp::OMP_SCHEDULE_Guided;
154   case omp::OMPScheduleType::BaseAuto:
155     return omp::OMP_SCHEDULE_Auto;
156   case omp::OMPScheduleType::BaseRuntime:
157     return omp::OMP_SCHEDULE_Runtime;
158   default:
159     llvm_unreachable("unknown type for this test");
160   }
161 }
162 
163 class OpenMPIRBuilderTest : public testing::Test {
164 protected:
165   void SetUp() override {
166     M.reset(new Module("MyModule", Ctx));
167     FunctionType *FTy =
168         FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
169                           /*isVarArg=*/false);
170     F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
171     BB = BasicBlock::Create(Ctx, "", F);
172 
173     DIBuilder DIB(*M);
174     auto File = DIB.createFile("test.dbg", "/src", std::nullopt,
175                                std::optional<StringRef>("/src/test.dbg"));
176     auto CU =
177         DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0);
178     auto Type =
179         DIB.createSubroutineType(DIB.getOrCreateTypeArray(std::nullopt));
180     auto SP = DIB.createFunction(
181         CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
182         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
183     F->setSubprogram(SP);
184     auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
185     DIB.finalize();
186     DL = DILocation::get(Ctx, 3, 7, Scope);
187   }
188 
189   void TearDown() override {
190     BB = nullptr;
191     M.reset();
192   }
193 
194   /// Create a function with a simple loop that calls printf using the logical
195   /// loop counter for use with tests that need a CanonicalLoopInfo object.
196   CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL,
197                                              OpenMPIRBuilder &OMPBuilder,
198                                              int UseIVBits,
199                                              CallInst **Call = nullptr,
200                                              BasicBlock **BodyCode = nullptr) {
201     OMPBuilder.initialize();
202     F->setName("func");
203 
204     IRBuilder<> Builder(BB);
205     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
206     Value *TripCount = F->getArg(0);
207 
208     Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits);
209     Value *CastedTripCount =
210         Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount");
211 
212     auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP,
213                              llvm::Value *LC) {
214       Builder.restoreIP(CodeGenIP);
215       if (BodyCode)
216         *BodyCode = Builder.GetInsertBlock();
217 
218       // Add something that consumes the induction variable to the body.
219       CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
220       if (Call)
221         *Call = CallInst;
222     };
223     CanonicalLoopInfo *Loop =
224         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount);
225 
226     // Finalize the function.
227     Builder.restoreIP(Loop->getAfterIP());
228     Builder.CreateRetVoid();
229 
230     return Loop;
231   }
232 
233   LLVMContext Ctx;
234   std::unique_ptr<Module> M;
235   Function *F;
236   BasicBlock *BB;
237   DebugLoc DL;
238 };
239 
240 class OpenMPIRBuilderTestWithParams
241     : public OpenMPIRBuilderTest,
242       public ::testing::WithParamInterface<omp::OMPScheduleType> {};
243 
244 class OpenMPIRBuilderTestWithIVBits
245     : public OpenMPIRBuilderTest,
246       public ::testing::WithParamInterface<int> {};
247 
248 // Returns the value stored in the given allocation. Returns null if the given
249 // value is not a result of an InstTy instruction, if no value is stored or if
250 // there is more than one store.
251 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
252   Instruction *Inst = dyn_cast<InstTy>(AllocaValue);
253   if (!Inst)
254     return nullptr;
255   StoreInst *Store = nullptr;
256   for (Use &U : Inst->uses()) {
257     if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
258       EXPECT_EQ(Store, nullptr);
259       Store = CandidateStore;
260     }
261   }
262   if (!Store)
263     return nullptr;
264   return Store->getValueOperand();
265 }
266 
267 // Returns the value stored in the aggregate argument of an outlined function,
268 // or nullptr if it is not found.
269 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate,
270                                            unsigned Idx) {
271   GetElementPtrInst *GEPAtIdx = nullptr;
272   // Find GEP instruction at that index.
273   for (User *Usr : Aggregate->users()) {
274     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr);
275     if (!GEP)
276       continue;
277 
278     if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx))
279       continue;
280 
281     EXPECT_EQ(GEPAtIdx, nullptr);
282     GEPAtIdx = GEP;
283   }
284 
285   EXPECT_NE(GEPAtIdx, nullptr);
286   EXPECT_EQ(GEPAtIdx->getNumUses(), 1U);
287 
288   // Find the value stored to the aggregate.
289   StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin());
290   Value *StoredAggValue = StoreToAgg->getValueOperand();
291 
292   Value *StoredValue = nullptr;
293 
294   // Find the value stored to the value stored in the aggregate.
295   for (User *Usr : StoredAggValue->users()) {
296     StoreInst *Store = dyn_cast<StoreInst>(Usr);
297     if (!Store)
298       continue;
299 
300     if (Store->getPointerOperand() != StoredAggValue)
301       continue;
302 
303     EXPECT_EQ(StoredValue, nullptr);
304     StoredValue = Store->getValueOperand();
305   }
306 
307   return StoredValue;
308 }
309 
310 // Returns the aggregate that the value is originating from.
311 static Value *findAggregateFromValue(Value *V) {
312   // Expects a load instruction that loads from the aggregate.
313   LoadInst *Load = dyn_cast<LoadInst>(V);
314   EXPECT_NE(Load, nullptr);
315   // Find the GEP instruction used in the load instruction.
316   GetElementPtrInst *GEP =
317       dyn_cast<GetElementPtrInst>(Load->getPointerOperand());
318   EXPECT_NE(GEP, nullptr);
319   // Find the aggregate used in the GEP instruction.
320   Value *Aggregate = GEP->getPointerOperand();
321 
322   return Aggregate;
323 }
324 
325 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
326   OpenMPIRBuilder OMPBuilder(*M);
327   OMPBuilder.initialize();
328 
329   IRBuilder<> Builder(BB);
330 
331   OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for);
332   EXPECT_TRUE(M->global_empty());
333   EXPECT_EQ(M->size(), 1U);
334   EXPECT_EQ(F->size(), 1U);
335   EXPECT_EQ(BB->size(), 0U);
336 
337   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
338   OMPBuilder.createBarrier(Loc, OMPD_for);
339   EXPECT_FALSE(M->global_empty());
340   EXPECT_EQ(M->size(), 3U);
341   EXPECT_EQ(F->size(), 1U);
342   EXPECT_EQ(BB->size(), 2U);
343 
344   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
345   EXPECT_NE(GTID, nullptr);
346   EXPECT_EQ(GTID->arg_size(), 1U);
347   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
348   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
349   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
350 
351   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
352   EXPECT_NE(Barrier, nullptr);
353   EXPECT_EQ(Barrier->arg_size(), 2U);
354   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier");
355   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
356   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
357 
358   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
359 
360   Builder.CreateUnreachable();
361   EXPECT_FALSE(verifyModule(*M, &errs()));
362 }
363 
364 TEST_F(OpenMPIRBuilderTest, CreateCancel) {
365   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
366   OpenMPIRBuilder OMPBuilder(*M);
367   OMPBuilder.initialize();
368 
369   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
370   new UnreachableInst(Ctx, CBB);
371   auto FiniCB = [&](InsertPointTy IP) {
372     ASSERT_NE(IP.getBlock(), nullptr);
373     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
374     BranchInst::Create(CBB, IP.getBlock());
375   };
376   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
377 
378   IRBuilder<> Builder(BB);
379 
380   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
381   auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel);
382   Builder.restoreIP(NewIP);
383   EXPECT_FALSE(M->global_empty());
384   EXPECT_EQ(M->size(), 4U);
385   EXPECT_EQ(F->size(), 4U);
386   EXPECT_EQ(BB->size(), 4U);
387 
388   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
389   EXPECT_NE(GTID, nullptr);
390   EXPECT_EQ(GTID->arg_size(), 1U);
391   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
392   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
393   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
394 
395   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
396   EXPECT_NE(Cancel, nullptr);
397   EXPECT_EQ(Cancel->arg_size(), 3U);
398   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
399   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
400   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
401   EXPECT_EQ(Cancel->getNumUses(), 1U);
402   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
403   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
404   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
405   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
406   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
407   EXPECT_NE(GTID1, nullptr);
408   EXPECT_EQ(GTID1->arg_size(), 1U);
409   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
410   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
411   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
412   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
413   EXPECT_NE(Barrier, nullptr);
414   EXPECT_EQ(Barrier->arg_size(), 2U);
415   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
416   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
417   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
418   EXPECT_EQ(Barrier->getNumUses(), 0U);
419   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
420             1U);
421   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
422 
423   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
424 
425   OMPBuilder.popFinalizationCB();
426 
427   Builder.CreateUnreachable();
428   EXPECT_FALSE(verifyModule(*M, &errs()));
429 }
430 
431 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
432   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
433   OpenMPIRBuilder OMPBuilder(*M);
434   OMPBuilder.initialize();
435 
436   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
437   new UnreachableInst(Ctx, CBB);
438   auto FiniCB = [&](InsertPointTy IP) {
439     ASSERT_NE(IP.getBlock(), nullptr);
440     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
441     BranchInst::Create(CBB, IP.getBlock());
442   };
443   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
444 
445   IRBuilder<> Builder(BB);
446 
447   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
448   auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel);
449   Builder.restoreIP(NewIP);
450   EXPECT_FALSE(M->global_empty());
451   EXPECT_EQ(M->size(), 4U);
452   EXPECT_EQ(F->size(), 7U);
453   EXPECT_EQ(BB->size(), 1U);
454   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
455   ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U);
456   BB = BB->getTerminator()->getSuccessor(0);
457   EXPECT_EQ(BB->size(), 4U);
458 
459   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
460   EXPECT_NE(GTID, nullptr);
461   EXPECT_EQ(GTID->arg_size(), 1U);
462   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
463   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
464   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
465 
466   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
467   EXPECT_NE(Cancel, nullptr);
468   EXPECT_EQ(Cancel->arg_size(), 3U);
469   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
470   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
471   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
472   EXPECT_EQ(Cancel->getNumUses(), 1U);
473   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
474   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
475   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
476   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
477             NewIP.getBlock());
478   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
479   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
480   EXPECT_NE(GTID1, nullptr);
481   EXPECT_EQ(GTID1->arg_size(), 1U);
482   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
483   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
484   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
485   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
486   EXPECT_NE(Barrier, nullptr);
487   EXPECT_EQ(Barrier->arg_size(), 2U);
488   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
489   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
490   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
491   EXPECT_EQ(Barrier->getNumUses(), 0U);
492   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
493             1U);
494   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
495 
496   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
497 
498   OMPBuilder.popFinalizationCB();
499 
500   Builder.CreateUnreachable();
501   EXPECT_FALSE(verifyModule(*M, &errs()));
502 }
503 
504 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
505   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
506   OpenMPIRBuilder OMPBuilder(*M);
507   OMPBuilder.initialize();
508 
509   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
510   new UnreachableInst(Ctx, CBB);
511   auto FiniCB = [&](InsertPointTy IP) {
512     ASSERT_NE(IP.getBlock(), nullptr);
513     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
514     BranchInst::Create(CBB, IP.getBlock());
515   };
516   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
517 
518   IRBuilder<> Builder(BB);
519 
520   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
521   auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for);
522   Builder.restoreIP(NewIP);
523   EXPECT_FALSE(M->global_empty());
524   EXPECT_EQ(M->size(), 3U);
525   EXPECT_EQ(F->size(), 4U);
526   EXPECT_EQ(BB->size(), 4U);
527 
528   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
529   EXPECT_NE(GTID, nullptr);
530   EXPECT_EQ(GTID->arg_size(), 1U);
531   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
532   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
533   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
534 
535   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
536   EXPECT_NE(Barrier, nullptr);
537   EXPECT_EQ(Barrier->arg_size(), 2U);
538   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
539   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
540   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
541   EXPECT_EQ(Barrier->getNumUses(), 1U);
542   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
543   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
544   EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
545   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
546   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
547             1U);
548   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
549             CBB);
550 
551   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
552 
553   OMPBuilder.popFinalizationCB();
554 
555   Builder.CreateUnreachable();
556   EXPECT_FALSE(verifyModule(*M, &errs()));
557 }
558 
559 TEST_F(OpenMPIRBuilderTest, DbgLoc) {
560   OpenMPIRBuilder OMPBuilder(*M);
561   OMPBuilder.initialize();
562   F->setName("func");
563 
564   IRBuilder<> Builder(BB);
565 
566   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
567   OMPBuilder.createBarrier(Loc, OMPD_for);
568   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
569   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
570   EXPECT_EQ(GTID->getDebugLoc(), DL);
571   EXPECT_EQ(Barrier->getDebugLoc(), DL);
572   EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0)));
573   if (!isa<GlobalVariable>(Barrier->getOperand(0)))
574     return;
575   GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0));
576   EXPECT_TRUE(Ident->hasInitializer());
577   if (!Ident->hasInitializer())
578     return;
579   Constant *Initializer = Ident->getInitializer();
580   EXPECT_TRUE(
581       isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()));
582   GlobalVariable *SrcStrGlob =
583       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
584   if (!SrcStrGlob)
585     return;
586   EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer()));
587   ConstantDataArray *SrcSrc =
588       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
589   if (!SrcSrc)
590     return;
591   EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;");
592 }
593 
594 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
595   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
596   std::string oldDLStr = M->getDataLayoutStr();
597   M->setDataLayout(
598       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
599       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
600       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
601   OpenMPIRBuilder OMPBuilder(*M);
602   OMPBuilder.Config.IsTargetDevice = true;
603   OMPBuilder.initialize();
604   F->setName("func");
605   IRBuilder<> Builder(BB);
606   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
607   Builder.CreateBr(EnterBB);
608   Builder.SetInsertPoint(EnterBB);
609   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
610 
611   AllocaInst *PrivAI = nullptr;
612 
613   unsigned NumBodiesGenerated = 0;
614   unsigned NumPrivatizedVars = 0;
615   unsigned NumFinalizationPoints = 0;
616 
617   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
618     ++NumBodiesGenerated;
619 
620     Builder.restoreIP(AllocaIP);
621     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
622     Builder.CreateStore(F->arg_begin(), PrivAI);
623 
624     Builder.restoreIP(CodeGenIP);
625     Value *PrivLoad =
626         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
627     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
628     Instruction *ThenTerm, *ElseTerm;
629     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
630                                   &ThenTerm, &ElseTerm);
631   };
632 
633   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
634                     Value &Orig, Value &Inner,
635                     Value *&ReplacementValue) -> InsertPointTy {
636     ++NumPrivatizedVars;
637 
638     if (!isa<AllocaInst>(Orig)) {
639       EXPECT_EQ(&Orig, F->arg_begin());
640       ReplacementValue = &Inner;
641       return CodeGenIP;
642     }
643 
644     // Since the original value is an allocation, it has a pointer type and
645     // therefore no additional wrapping should happen.
646     EXPECT_EQ(&Orig, &Inner);
647 
648     // Trivial copy (=firstprivate).
649     Builder.restoreIP(AllocaIP);
650     Type *VTy = ReplacementValue->getType();
651     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
652     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
653     Builder.restoreIP(CodeGenIP);
654     Builder.CreateStore(V, ReplacementValue);
655     return CodeGenIP;
656   };
657 
658   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
659 
660   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
661                                     F->getEntryBlock().getFirstInsertionPt());
662   IRBuilder<>::InsertPoint AfterIP =
663       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
664                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
665 
666   EXPECT_EQ(NumBodiesGenerated, 1U);
667   EXPECT_EQ(NumPrivatizedVars, 1U);
668   EXPECT_EQ(NumFinalizationPoints, 1U);
669 
670   Builder.restoreIP(AfterIP);
671   Builder.CreateRetVoid();
672 
673   OMPBuilder.finalize();
674   Function *OutlinedFn = PrivAI->getFunction();
675   EXPECT_FALSE(verifyModule(*M, &errs()));
676   EXPECT_NE(OutlinedFn, F);
677   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
678   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
679   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
680 
681   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
682   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
683   // Make sure that arguments are pointers in 0 address address space
684   EXPECT_EQ(OutlinedFn->getArg(0)->getType(),
685             PointerType::get(M->getContext(), 0));
686   EXPECT_EQ(OutlinedFn->getArg(1)->getType(),
687             PointerType::get(M->getContext(), 0));
688   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
689             PointerType::get(M->getContext(), 0));
690   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
691   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
692   User *Usr = OutlinedFn->user_back();
693   ASSERT_TRUE(isa<CallInst>(Usr));
694   CallInst *Parallel51CI = dyn_cast<CallInst>(Usr);
695   ASSERT_NE(Parallel51CI, nullptr);
696 
697   EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51");
698   EXPECT_EQ(Parallel51CI->arg_size(), 9U);
699   EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn);
700   EXPECT_TRUE(
701       isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts()));
702   EXPECT_EQ(Parallel51CI, Usr);
703   M->setDataLayout(oldDLStr);
704 }
705 
706 TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
707   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
708   OpenMPIRBuilder OMPBuilder(*M);
709   OMPBuilder.Config.IsTargetDevice = false;
710   OMPBuilder.initialize();
711   F->setName("func");
712   IRBuilder<> Builder(BB);
713 
714   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
715   Builder.CreateBr(EnterBB);
716   Builder.SetInsertPoint(EnterBB);
717   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
718 
719   AllocaInst *PrivAI = nullptr;
720 
721   unsigned NumBodiesGenerated = 0;
722   unsigned NumPrivatizedVars = 0;
723   unsigned NumFinalizationPoints = 0;
724 
725   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
726     ++NumBodiesGenerated;
727 
728     Builder.restoreIP(AllocaIP);
729     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
730     Builder.CreateStore(F->arg_begin(), PrivAI);
731 
732     Builder.restoreIP(CodeGenIP);
733     Value *PrivLoad =
734         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
735     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
736     Instruction *ThenTerm, *ElseTerm;
737     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
738                                   &ThenTerm, &ElseTerm);
739   };
740 
741   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
742                     Value &Orig, Value &Inner,
743                     Value *&ReplacementValue) -> InsertPointTy {
744     ++NumPrivatizedVars;
745 
746     if (!isa<AllocaInst>(Orig)) {
747       EXPECT_EQ(&Orig, F->arg_begin());
748       ReplacementValue = &Inner;
749       return CodeGenIP;
750     }
751 
752     // Since the original value is an allocation, it has a pointer type and
753     // therefore no additional wrapping should happen.
754     EXPECT_EQ(&Orig, &Inner);
755 
756     // Trivial copy (=firstprivate).
757     Builder.restoreIP(AllocaIP);
758     Type *VTy = ReplacementValue->getType();
759     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
760     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
761     Builder.restoreIP(CodeGenIP);
762     Builder.CreateStore(V, ReplacementValue);
763     return CodeGenIP;
764   };
765 
766   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
767 
768   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
769                                     F->getEntryBlock().getFirstInsertionPt());
770   IRBuilder<>::InsertPoint AfterIP =
771       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
772                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
773   EXPECT_EQ(NumBodiesGenerated, 1U);
774   EXPECT_EQ(NumPrivatizedVars, 1U);
775   EXPECT_EQ(NumFinalizationPoints, 1U);
776 
777   Builder.restoreIP(AfterIP);
778   Builder.CreateRetVoid();
779 
780   OMPBuilder.finalize();
781 
782   EXPECT_NE(PrivAI, nullptr);
783   Function *OutlinedFn = PrivAI->getFunction();
784   EXPECT_NE(F, OutlinedFn);
785   EXPECT_FALSE(verifyModule(*M, &errs()));
786   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
787   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
788   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
789 
790   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
791   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
792 
793   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
794   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
795   User *Usr = OutlinedFn->user_back();
796   ASSERT_TRUE(isa<CallInst>(Usr));
797   CallInst *ForkCI = dyn_cast<CallInst>(Usr);
798   ASSERT_NE(ForkCI, nullptr);
799 
800   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
801   EXPECT_EQ(ForkCI->arg_size(), 4U);
802   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
803   EXPECT_EQ(ForkCI->getArgOperand(1),
804             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
805   EXPECT_EQ(ForkCI, Usr);
806   Value *StoredValue =
807       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
808   EXPECT_EQ(StoredValue, F->arg_begin());
809 }
810 
811 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
812   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
813   OpenMPIRBuilder OMPBuilder(*M);
814   OMPBuilder.Config.IsTargetDevice = false;
815   OMPBuilder.initialize();
816   F->setName("func");
817   IRBuilder<> Builder(BB);
818 
819   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
820   Builder.CreateBr(EnterBB);
821   Builder.SetInsertPoint(EnterBB);
822   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
823 
824   unsigned NumInnerBodiesGenerated = 0;
825   unsigned NumOuterBodiesGenerated = 0;
826   unsigned NumFinalizationPoints = 0;
827 
828   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
829     ++NumInnerBodiesGenerated;
830   };
831 
832   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
833                     Value &Orig, Value &Inner,
834                     Value *&ReplacementValue) -> InsertPointTy {
835     // Trivial copy (=firstprivate).
836     Builder.restoreIP(AllocaIP);
837     Type *VTy = ReplacementValue->getType();
838     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
839     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
840     Builder.restoreIP(CodeGenIP);
841     Builder.CreateStore(V, ReplacementValue);
842     return CodeGenIP;
843   };
844 
845   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
846 
847   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
848     ++NumOuterBodiesGenerated;
849     Builder.restoreIP(CodeGenIP);
850     BasicBlock *CGBB = CodeGenIP.getBlock();
851     BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
852     CGBB->getTerminator()->eraseFromParent();
853     ;
854 
855     IRBuilder<>::InsertPoint AfterIP = OMPBuilder.createParallel(
856         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
857         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
858 
859     Builder.restoreIP(AfterIP);
860     Builder.CreateBr(NewBB);
861   };
862 
863   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
864                                     F->getEntryBlock().getFirstInsertionPt());
865   IRBuilder<>::InsertPoint AfterIP =
866       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
867                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
868 
869   EXPECT_EQ(NumInnerBodiesGenerated, 1U);
870   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
871   EXPECT_EQ(NumFinalizationPoints, 2U);
872 
873   Builder.restoreIP(AfterIP);
874   Builder.CreateRetVoid();
875 
876   OMPBuilder.finalize();
877 
878   EXPECT_EQ(M->size(), 5U);
879   for (Function &OutlinedFn : *M) {
880     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
881       continue;
882     EXPECT_FALSE(verifyModule(*M, &errs()));
883     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
884     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
885     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
886 
887     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
888     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
889 
890     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
891     User *Usr = OutlinedFn.user_back();
892     ASSERT_TRUE(isa<CallInst>(Usr));
893     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
894     ASSERT_NE(ForkCI, nullptr);
895 
896     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
897     EXPECT_EQ(ForkCI->arg_size(), 3U);
898     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
899     EXPECT_EQ(ForkCI->getArgOperand(1),
900               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
901     EXPECT_EQ(ForkCI, Usr);
902   }
903 }
904 
905 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
906   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
907   OpenMPIRBuilder OMPBuilder(*M);
908   OMPBuilder.Config.IsTargetDevice = false;
909   OMPBuilder.initialize();
910   F->setName("func");
911   IRBuilder<> Builder(BB);
912 
913   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
914   Builder.CreateBr(EnterBB);
915   Builder.SetInsertPoint(EnterBB);
916   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
917 
918   unsigned NumInnerBodiesGenerated = 0;
919   unsigned NumOuterBodiesGenerated = 0;
920   unsigned NumFinalizationPoints = 0;
921 
922   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
923     ++NumInnerBodiesGenerated;
924   };
925 
926   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
927                     Value &Orig, Value &Inner,
928                     Value *&ReplacementValue) -> InsertPointTy {
929     // Trivial copy (=firstprivate).
930     Builder.restoreIP(AllocaIP);
931     Type *VTy = ReplacementValue->getType();
932     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
933     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
934     Builder.restoreIP(CodeGenIP);
935     Builder.CreateStore(V, ReplacementValue);
936     return CodeGenIP;
937   };
938 
939   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
940 
941   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
942     ++NumOuterBodiesGenerated;
943     Builder.restoreIP(CodeGenIP);
944     BasicBlock *CGBB = CodeGenIP.getBlock();
945     BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
946     BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
947     CGBB->getTerminator()->eraseFromParent();
948     ;
949     NewBB1->getTerminator()->eraseFromParent();
950     ;
951 
952     IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.createParallel(
953         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
954         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
955 
956     Builder.restoreIP(AfterIP1);
957     Builder.CreateBr(NewBB1);
958 
959     IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.createParallel(
960         InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
961         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
962 
963     Builder.restoreIP(AfterIP2);
964     Builder.CreateBr(NewBB2);
965   };
966 
967   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
968                                     F->getEntryBlock().getFirstInsertionPt());
969   IRBuilder<>::InsertPoint AfterIP =
970       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
971                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
972 
973   EXPECT_EQ(NumInnerBodiesGenerated, 2U);
974   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
975   EXPECT_EQ(NumFinalizationPoints, 3U);
976 
977   Builder.restoreIP(AfterIP);
978   Builder.CreateRetVoid();
979 
980   OMPBuilder.finalize();
981 
982   EXPECT_EQ(M->size(), 6U);
983   for (Function &OutlinedFn : *M) {
984     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
985       continue;
986     EXPECT_FALSE(verifyModule(*M, &errs()));
987     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
988     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
989     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
990 
991     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
992     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
993 
994     unsigned NumAllocas = 0;
995     for (Instruction &I : instructions(OutlinedFn))
996       NumAllocas += isa<AllocaInst>(I);
997     EXPECT_EQ(NumAllocas, 1U);
998 
999     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
1000     User *Usr = OutlinedFn.user_back();
1001     ASSERT_TRUE(isa<CallInst>(Usr));
1002     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
1003     ASSERT_NE(ForkCI, nullptr);
1004 
1005     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
1006     EXPECT_EQ(ForkCI->arg_size(), 3U);
1007     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1008     EXPECT_EQ(ForkCI->getArgOperand(1),
1009               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
1010     EXPECT_EQ(ForkCI, Usr);
1011   }
1012 }
1013 
1014 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
1015   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1016   OpenMPIRBuilder OMPBuilder(*M);
1017   OMPBuilder.Config.IsTargetDevice = false;
1018   OMPBuilder.initialize();
1019   F->setName("func");
1020   IRBuilder<> Builder(BB);
1021 
1022   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1023   Builder.CreateBr(EnterBB);
1024   Builder.SetInsertPoint(EnterBB);
1025   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1026 
1027   AllocaInst *PrivAI = nullptr;
1028 
1029   unsigned NumBodiesGenerated = 0;
1030   unsigned NumPrivatizedVars = 0;
1031   unsigned NumFinalizationPoints = 0;
1032 
1033   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1034     ++NumBodiesGenerated;
1035 
1036     Builder.restoreIP(AllocaIP);
1037     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
1038     Builder.CreateStore(F->arg_begin(), PrivAI);
1039 
1040     Builder.restoreIP(CodeGenIP);
1041     Value *PrivLoad =
1042         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
1043     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
1044     Instruction *ThenTerm, *ElseTerm;
1045     SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm,
1046                                   &ElseTerm);
1047   };
1048 
1049   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1050                     Value &Orig, Value &Inner,
1051                     Value *&ReplacementValue) -> InsertPointTy {
1052     ++NumPrivatizedVars;
1053 
1054     if (!isa<AllocaInst>(Orig)) {
1055       EXPECT_EQ(&Orig, F->arg_begin());
1056       ReplacementValue = &Inner;
1057       return CodeGenIP;
1058     }
1059 
1060     // Since the original value is an allocation, it has a pointer type and
1061     // therefore no additional wrapping should happen.
1062     EXPECT_EQ(&Orig, &Inner);
1063 
1064     // Trivial copy (=firstprivate).
1065     Builder.restoreIP(AllocaIP);
1066     Type *VTy = ReplacementValue->getType();
1067     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
1068     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
1069     Builder.restoreIP(CodeGenIP);
1070     Builder.CreateStore(V, ReplacementValue);
1071     return CodeGenIP;
1072   };
1073 
1074   auto FiniCB = [&](InsertPointTy CodeGenIP) {
1075     ++NumFinalizationPoints;
1076     // No destructors.
1077   };
1078 
1079   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1080                                     F->getEntryBlock().getFirstInsertionPt());
1081   IRBuilder<>::InsertPoint AfterIP =
1082       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1083                                 Builder.CreateIsNotNull(F->arg_begin()),
1084                                 nullptr, OMP_PROC_BIND_default, false);
1085 
1086   EXPECT_EQ(NumBodiesGenerated, 1U);
1087   EXPECT_EQ(NumPrivatizedVars, 1U);
1088   EXPECT_EQ(NumFinalizationPoints, 1U);
1089 
1090   Builder.restoreIP(AfterIP);
1091   Builder.CreateRetVoid();
1092   OMPBuilder.finalize();
1093 
1094   EXPECT_NE(PrivAI, nullptr);
1095   Function *OutlinedFn = PrivAI->getFunction();
1096   EXPECT_NE(F, OutlinedFn);
1097   EXPECT_FALSE(verifyModule(*M, &errs()));
1098 
1099   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
1100   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
1101 
1102   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
1103   ASSERT_EQ(OutlinedFn->getNumUses(), 1U);
1104 
1105   CallInst *ForkCI = nullptr;
1106   for (User *Usr : OutlinedFn->users()) {
1107     ASSERT_TRUE(isa<CallInst>(Usr));
1108     ForkCI = cast<CallInst>(Usr);
1109   }
1110 
1111   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if");
1112   EXPECT_EQ(ForkCI->arg_size(), 5U);
1113   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1114   EXPECT_EQ(ForkCI->getArgOperand(1),
1115             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
1116   EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx));
1117 }
1118 
1119 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
1120   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1121   OpenMPIRBuilder OMPBuilder(*M);
1122   OMPBuilder.Config.IsTargetDevice = false;
1123   OMPBuilder.initialize();
1124   F->setName("func");
1125   IRBuilder<> Builder(BB);
1126 
1127   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1128   Builder.CreateBr(EnterBB);
1129   Builder.SetInsertPoint(EnterBB);
1130   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1131 
1132   unsigned NumBodiesGenerated = 0;
1133   unsigned NumPrivatizedVars = 0;
1134   unsigned NumFinalizationPoints = 0;
1135 
1136   CallInst *CheckedBarrier = nullptr;
1137   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1138     ++NumBodiesGenerated;
1139 
1140     Builder.restoreIP(CodeGenIP);
1141 
1142     // Create three barriers, two cancel barriers but only one checked.
1143     Function *CBFn, *BFn;
1144 
1145     Builder.restoreIP(
1146         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel));
1147 
1148     CBFn = M->getFunction("__kmpc_cancel_barrier");
1149     BFn = M->getFunction("__kmpc_barrier");
1150     ASSERT_NE(CBFn, nullptr);
1151     ASSERT_EQ(BFn, nullptr);
1152     ASSERT_EQ(CBFn->getNumUses(), 1U);
1153     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1154     ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
1155     CheckedBarrier = cast<CallInst>(CBFn->user_back());
1156 
1157     Builder.restoreIP(
1158         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true));
1159     CBFn = M->getFunction("__kmpc_cancel_barrier");
1160     BFn = M->getFunction("__kmpc_barrier");
1161     ASSERT_NE(CBFn, nullptr);
1162     ASSERT_NE(BFn, nullptr);
1163     ASSERT_EQ(CBFn->getNumUses(), 1U);
1164     ASSERT_EQ(BFn->getNumUses(), 1U);
1165     ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
1166     ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
1167 
1168     Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel,
1169                                                false, false));
1170     ASSERT_EQ(CBFn->getNumUses(), 2U);
1171     ASSERT_EQ(BFn->getNumUses(), 1U);
1172     ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
1173     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1174     ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
1175   };
1176 
1177   auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
1178                     Value *&) -> InsertPointTy {
1179     ++NumPrivatizedVars;
1180     llvm_unreachable("No privatization callback call expected!");
1181   };
1182 
1183   FunctionType *FakeDestructorTy =
1184       FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
1185                         /*isVarArg=*/false);
1186   auto *FakeDestructor = Function::Create(
1187       FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
1188 
1189   auto FiniCB = [&](InsertPointTy IP) {
1190     ++NumFinalizationPoints;
1191     Builder.restoreIP(IP);
1192     Builder.CreateCall(FakeDestructor,
1193                        {Builder.getInt32(NumFinalizationPoints)});
1194   };
1195 
1196   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1197                                     F->getEntryBlock().getFirstInsertionPt());
1198   IRBuilder<>::InsertPoint AfterIP =
1199       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1200                                 Builder.CreateIsNotNull(F->arg_begin()),
1201                                 nullptr, OMP_PROC_BIND_default, true);
1202 
1203   EXPECT_EQ(NumBodiesGenerated, 1U);
1204   EXPECT_EQ(NumPrivatizedVars, 0U);
1205   EXPECT_EQ(NumFinalizationPoints, 2U);
1206   EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
1207 
1208   Builder.restoreIP(AfterIP);
1209   Builder.CreateRetVoid();
1210   OMPBuilder.finalize();
1211 
1212   EXPECT_FALSE(verifyModule(*M, &errs()));
1213 
1214   BasicBlock *ExitBB = nullptr;
1215   for (const User *Usr : FakeDestructor->users()) {
1216     const CallInst *CI = dyn_cast<CallInst>(Usr);
1217     ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
1218     ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
1219     ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
1220     if (ExitBB)
1221       ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
1222     else
1223       ExitBB = CI->getNextNode()->getSuccessor(0);
1224     ASSERT_EQ(ExitBB->size(), 1U);
1225     if (!isa<ReturnInst>(ExitBB->front())) {
1226       ASSERT_TRUE(isa<BranchInst>(ExitBB->front()));
1227       ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U);
1228       ASSERT_TRUE(isa<ReturnInst>(
1229           cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front()));
1230     }
1231   }
1232 }
1233 
1234 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
1235   OpenMPIRBuilder OMPBuilder(*M);
1236   OMPBuilder.Config.IsTargetDevice = false;
1237   OMPBuilder.initialize();
1238   F->setName("func");
1239   IRBuilder<> Builder(BB);
1240   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1241   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1242 
1243   Type *I32Ty = Type::getInt32Ty(M->getContext());
1244   Type *PtrTy = PointerType::get(M->getContext(), 0);
1245   Type *StructTy = StructType::get(I32Ty, PtrTy);
1246   Type *VoidTy = Type::getVoidTy(M->getContext());
1247   FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
1248   FunctionCallee TakeI32Func =
1249       M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
1250   FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy);
1251   FunctionCallee TakeI32PtrFunc =
1252       M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy);
1253   FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
1254   FunctionCallee TakeStructFunc =
1255       M->getOrInsertFunction("take_struct", VoidTy, StructTy);
1256   FunctionCallee RetStructPtrFunc =
1257       M->getOrInsertFunction("ret_structptr", PtrTy);
1258   FunctionCallee TakeStructPtrFunc =
1259       M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy);
1260   Value *I32Val = Builder.CreateCall(RetI32Func);
1261   Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
1262   Value *StructVal = Builder.CreateCall(RetStructFunc);
1263   Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
1264 
1265   Instruction *Internal;
1266   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1267     IRBuilder<>::InsertPointGuard Guard(Builder);
1268     Builder.restoreIP(CodeGenIP);
1269     Internal = Builder.CreateCall(TakeI32Func, I32Val);
1270     Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
1271     Builder.CreateCall(TakeStructFunc, StructVal);
1272     Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
1273   };
1274   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1275                     Value &Inner, Value *&ReplacementValue) {
1276     ReplacementValue = &Inner;
1277     return CodeGenIP;
1278   };
1279   auto FiniCB = [](InsertPointTy) {};
1280 
1281   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1282                                     F->getEntryBlock().getFirstInsertionPt());
1283   IRBuilder<>::InsertPoint AfterIP =
1284       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1285                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
1286   Builder.restoreIP(AfterIP);
1287   Builder.CreateRetVoid();
1288 
1289   OMPBuilder.finalize();
1290 
1291   EXPECT_FALSE(verifyModule(*M, &errs()));
1292   Function *OutlinedFn = Internal->getFunction();
1293 
1294   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
1295   EXPECT_TRUE(Arg2Type->isPointerTy());
1296 }
1297 
1298 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
1299   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1300   OpenMPIRBuilder OMPBuilder(*M);
1301   OMPBuilder.initialize();
1302   IRBuilder<> Builder(BB);
1303   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1304   Value *TripCount = F->getArg(0);
1305 
1306   unsigned NumBodiesGenerated = 0;
1307   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1308     NumBodiesGenerated += 1;
1309 
1310     Builder.restoreIP(CodeGenIP);
1311 
1312     Value *Cmp = Builder.CreateICmpEQ(LC, TripCount);
1313     Instruction *ThenTerm, *ElseTerm;
1314     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
1315                                   &ThenTerm, &ElseTerm);
1316   };
1317 
1318   CanonicalLoopInfo *Loop =
1319       OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
1320 
1321   Builder.restoreIP(Loop->getAfterIP());
1322   ReturnInst *RetInst = Builder.CreateRetVoid();
1323   OMPBuilder.finalize();
1324 
1325   Loop->assertOK();
1326   EXPECT_FALSE(verifyModule(*M, &errs()));
1327 
1328   EXPECT_EQ(NumBodiesGenerated, 1U);
1329 
1330   // Verify control flow structure (in addition to Loop->assertOK()).
1331   EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock());
1332   EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock());
1333 
1334   Instruction *IndVar = Loop->getIndVar();
1335   EXPECT_TRUE(isa<PHINode>(IndVar));
1336   EXPECT_EQ(IndVar->getType(), TripCount->getType());
1337   EXPECT_EQ(IndVar->getParent(), Loop->getHeader());
1338 
1339   EXPECT_EQ(Loop->getTripCount(), TripCount);
1340 
1341   BasicBlock *Body = Loop->getBody();
1342   Instruction *CmpInst = &Body->front();
1343   EXPECT_TRUE(isa<ICmpInst>(CmpInst));
1344   EXPECT_EQ(CmpInst->getOperand(0), IndVar);
1345 
1346   BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor();
1347   EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) {
1348     return SuccBB->getSingleSuccessor() == LatchPred;
1349   }));
1350 
1351   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
1352 }
1353 
1354 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
1355   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1356   OpenMPIRBuilder OMPBuilder(*M);
1357   OMPBuilder.initialize();
1358   IRBuilder<> Builder(BB);
1359 
1360   // Check the trip count is computed correctly. We generate the canonical loop
1361   // but rely on the IRBuilder's constant folder to compute the final result
1362   // since all inputs are constant. To verify overflow situations, limit the
1363   // trip count / loop counter widths to 16 bits.
1364   auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1365                            bool IsSigned, bool InclusiveStop) -> int64_t {
1366     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1367     Type *LCTy = Type::getInt16Ty(Ctx);
1368     Value *StartVal = ConstantInt::get(LCTy, Start);
1369     Value *StopVal = ConstantInt::get(LCTy, Stop);
1370     Value *StepVal = ConstantInt::get(LCTy, Step);
1371     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
1372     CanonicalLoopInfo *Loop =
1373         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1374                                        StepVal, IsSigned, InclusiveStop);
1375     Loop->assertOK();
1376     Builder.restoreIP(Loop->getAfterIP());
1377     Value *TripCount = Loop->getTripCount();
1378     return cast<ConstantInt>(TripCount)->getValue().getZExtValue();
1379   };
1380 
1381   EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0);
1382   EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1);
1383   EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42);
1384   EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21);
1385   EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21);
1386   EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1);
1387   EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2);
1388   EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3);
1389   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF);
1390   EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0);
1391   EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1);
1392   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100);
1393   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1);
1394 
1395   EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2);
1396   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2);
1397   EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1);
1398   EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1);
1399   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF);
1400   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000);
1401 
1402   EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0);
1403   EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0);
1404   EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3);
1405   EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4);
1406   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1);
1407   EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1);
1408   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2);
1409 
1410   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000);
1411   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001);
1412   EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF);
1413   EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF);
1414   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2);
1415   EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF);
1416   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000);
1417   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800);
1418   EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF);
1419   EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1);
1420   EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2);
1421 
1422   // Finalize the function and verify it.
1423   Builder.CreateRetVoid();
1424   OMPBuilder.finalize();
1425   EXPECT_FALSE(verifyModule(*M, &errs()));
1426 }
1427 
1428 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
1429   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1430   OpenMPIRBuilder OMPBuilder(*M);
1431   OMPBuilder.initialize();
1432   F->setName("func");
1433 
1434   IRBuilder<> Builder(BB);
1435 
1436   Type *LCTy = F->getArg(0)->getType();
1437   Constant *One = ConstantInt::get(LCTy, 1);
1438   Constant *Two = ConstantInt::get(LCTy, 2);
1439   Value *OuterTripCount =
1440       Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer");
1441   Value *InnerTripCount =
1442       Builder.CreateAdd(F->getArg(0), One, "tripcount.inner");
1443 
1444   // Fix an insertion point for ComputeIP.
1445   BasicBlock *LoopNextEnter =
1446       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1447                          Builder.GetInsertBlock()->getNextNode());
1448   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1449   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1450 
1451   Builder.SetInsertPoint(LoopNextEnter);
1452   OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL);
1453 
1454   CanonicalLoopInfo *InnerLoop = nullptr;
1455   CallInst *InbetweenLead = nullptr;
1456   CallInst *InbetweenTrail = nullptr;
1457   CallInst *Call = nullptr;
1458   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) {
1459     Builder.restoreIP(OuterCodeGenIP);
1460     InbetweenLead =
1461         createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC});
1462 
1463     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1464                                   Value *InnerLC) {
1465       Builder.restoreIP(InnerCodeGenIP);
1466       Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
1467     };
1468     InnerLoop = OMPBuilder.createCanonicalLoop(
1469         Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner");
1470 
1471     Builder.restoreIP(InnerLoop->getAfterIP());
1472     InbetweenTrail =
1473         createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
1474   };
1475   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1476       OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer");
1477 
1478   // Finish the function.
1479   Builder.restoreIP(OuterLoop->getAfterIP());
1480   Builder.CreateRetVoid();
1481 
1482   CanonicalLoopInfo *Collapsed =
1483       OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP);
1484 
1485   OMPBuilder.finalize();
1486   EXPECT_FALSE(verifyModule(*M, &errs()));
1487 
1488   // Verify control flow and BB order.
1489   BasicBlock *RefOrder[] = {
1490       Collapsed->getPreheader(),   Collapsed->getHeader(),
1491       Collapsed->getCond(),        Collapsed->getBody(),
1492       InbetweenLead->getParent(),  Call->getParent(),
1493       InbetweenTrail->getParent(), Collapsed->getLatch(),
1494       Collapsed->getExit(),        Collapsed->getAfter(),
1495   };
1496   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1497   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1498 
1499   // Verify the total trip count.
1500   auto *TripCount = cast<MulOperator>(Collapsed->getTripCount());
1501   EXPECT_EQ(TripCount->getOperand(0), OuterTripCount);
1502   EXPECT_EQ(TripCount->getOperand(1), InnerTripCount);
1503 
1504   // Verify the changed indvar.
1505   auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1));
1506   EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv);
1507   EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody());
1508   EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount);
1509   EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar());
1510 
1511   auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2));
1512   EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem);
1513   EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody());
1514   EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar());
1515   EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount);
1516 
1517   EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV);
1518   EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV);
1519 }
1520 
1521 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
1522   OpenMPIRBuilder OMPBuilder(*M);
1523   CallInst *Call;
1524   BasicBlock *BodyCode;
1525   CanonicalLoopInfo *Loop =
1526       buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode);
1527 
1528   Instruction *OrigIndVar = Loop->getIndVar();
1529   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
1530 
1531   // Tile the loop.
1532   Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
1533   std::vector<CanonicalLoopInfo *> GenLoops =
1534       OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
1535 
1536   OMPBuilder.finalize();
1537   EXPECT_FALSE(verifyModule(*M, &errs()));
1538 
1539   EXPECT_EQ(GenLoops.size(), 2u);
1540   CanonicalLoopInfo *Floor = GenLoops[0];
1541   CanonicalLoopInfo *Tile = GenLoops[1];
1542 
1543   BasicBlock *RefOrder[] = {
1544       Floor->getPreheader(), Floor->getHeader(),   Floor->getCond(),
1545       Floor->getBody(),      Tile->getPreheader(), Tile->getHeader(),
1546       Tile->getCond(),       Tile->getBody(),      BodyCode,
1547       Tile->getLatch(),      Tile->getExit(),      Tile->getAfter(),
1548       Floor->getLatch(),     Floor->getExit(),     Floor->getAfter(),
1549   };
1550   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1551   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1552 
1553   // Check the induction variable.
1554   EXPECT_EQ(Call->getParent(), BodyCode);
1555   auto *Shift = cast<AddOperator>(Call->getOperand(1));
1556   EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
1557   EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
1558   auto *Scale = cast<MulOperator>(Shift->getOperand(0));
1559   EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
1560   EXPECT_EQ(Scale->getOperand(0), TileSize);
1561   EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
1562 }
1563 
1564 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
1565   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1566   OpenMPIRBuilder OMPBuilder(*M);
1567   OMPBuilder.initialize();
1568   F->setName("func");
1569 
1570   IRBuilder<> Builder(BB);
1571   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1572   Value *TripCount = F->getArg(0);
1573   Type *LCTy = TripCount->getType();
1574 
1575   BasicBlock *BodyCode = nullptr;
1576   CanonicalLoopInfo *InnerLoop = nullptr;
1577   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1578                                 llvm::Value *OuterLC) {
1579     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1580                                   llvm::Value *InnerLC) {
1581       Builder.restoreIP(InnerCodeGenIP);
1582       BodyCode = Builder.GetInsertBlock();
1583 
1584       // Add something that consumes the induction variables to the body.
1585       createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1586     };
1587     InnerLoop = OMPBuilder.createCanonicalLoop(
1588         OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner");
1589   };
1590   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1591       Loc, OuterLoopBodyGenCB, TripCount, "outer");
1592 
1593   // Finalize the function.
1594   Builder.restoreIP(OuterLoop->getAfterIP());
1595   Builder.CreateRetVoid();
1596 
1597   // Tile to loop nest.
1598   Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
1599   Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
1600   std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
1601       DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
1602 
1603   OMPBuilder.finalize();
1604   EXPECT_FALSE(verifyModule(*M, &errs()));
1605 
1606   EXPECT_EQ(GenLoops.size(), 4u);
1607   CanonicalLoopInfo *Floor1 = GenLoops[0];
1608   CanonicalLoopInfo *Floor2 = GenLoops[1];
1609   CanonicalLoopInfo *Tile1 = GenLoops[2];
1610   CanonicalLoopInfo *Tile2 = GenLoops[3];
1611 
1612   BasicBlock *RefOrder[] = {
1613       Floor1->getPreheader(),
1614       Floor1->getHeader(),
1615       Floor1->getCond(),
1616       Floor1->getBody(),
1617       Floor2->getPreheader(),
1618       Floor2->getHeader(),
1619       Floor2->getCond(),
1620       Floor2->getBody(),
1621       Tile1->getPreheader(),
1622       Tile1->getHeader(),
1623       Tile1->getCond(),
1624       Tile1->getBody(),
1625       Tile2->getPreheader(),
1626       Tile2->getHeader(),
1627       Tile2->getCond(),
1628       Tile2->getBody(),
1629       BodyCode,
1630       Tile2->getLatch(),
1631       Tile2->getExit(),
1632       Tile2->getAfter(),
1633       Tile1->getLatch(),
1634       Tile1->getExit(),
1635       Tile1->getAfter(),
1636       Floor2->getLatch(),
1637       Floor2->getExit(),
1638       Floor2->getAfter(),
1639       Floor1->getLatch(),
1640       Floor1->getExit(),
1641       Floor1->getAfter(),
1642   };
1643   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1644   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1645 }
1646 
1647 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
1648   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1649   OpenMPIRBuilder OMPBuilder(*M);
1650   OMPBuilder.initialize();
1651   F->setName("func");
1652 
1653   IRBuilder<> Builder(BB);
1654   Value *TripCount = F->getArg(0);
1655   Type *LCTy = TripCount->getType();
1656 
1657   Value *OuterStartVal = ConstantInt::get(LCTy, 2);
1658   Value *OuterStopVal = TripCount;
1659   Value *OuterStep = ConstantInt::get(LCTy, 5);
1660   Value *InnerStartVal = ConstantInt::get(LCTy, 13);
1661   Value *InnerStopVal = TripCount;
1662   Value *InnerStep = ConstantInt::get(LCTy, 3);
1663 
1664   // Fix an insertion point for ComputeIP.
1665   BasicBlock *LoopNextEnter =
1666       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1667                          Builder.GetInsertBlock()->getNextNode());
1668   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1669   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1670 
1671   InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
1672   OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
1673 
1674   BasicBlock *BodyCode = nullptr;
1675   CanonicalLoopInfo *InnerLoop = nullptr;
1676   CallInst *Call = nullptr;
1677   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1678                                 llvm::Value *OuterLC) {
1679     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1680                                   llvm::Value *InnerLC) {
1681       Builder.restoreIP(InnerCodeGenIP);
1682       BodyCode = Builder.GetInsertBlock();
1683 
1684       // Add something that consumes the induction variable to the body.
1685       Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1686     };
1687     InnerLoop = OMPBuilder.createCanonicalLoop(
1688         OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal,
1689         InnerStep, false, false, ComputeIP, "inner");
1690   };
1691   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1692       Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false,
1693       false, ComputeIP, "outer");
1694 
1695   // Finalize the function
1696   Builder.restoreIP(OuterLoop->getAfterIP());
1697   Builder.CreateRetVoid();
1698 
1699   // Tile the loop nest.
1700   Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
1701   Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
1702   std::vector<CanonicalLoopInfo *> GenLoops =
1703       OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
1704 
1705   OMPBuilder.finalize();
1706   EXPECT_FALSE(verifyModule(*M, &errs()));
1707 
1708   EXPECT_EQ(GenLoops.size(), 4u);
1709   CanonicalLoopInfo *Floor0 = GenLoops[0];
1710   CanonicalLoopInfo *Floor1 = GenLoops[1];
1711   CanonicalLoopInfo *Tile0 = GenLoops[2];
1712   CanonicalLoopInfo *Tile1 = GenLoops[3];
1713 
1714   BasicBlock *RefOrder[] = {
1715       Floor0->getPreheader(),
1716       Floor0->getHeader(),
1717       Floor0->getCond(),
1718       Floor0->getBody(),
1719       Floor1->getPreheader(),
1720       Floor1->getHeader(),
1721       Floor1->getCond(),
1722       Floor1->getBody(),
1723       Tile0->getPreheader(),
1724       Tile0->getHeader(),
1725       Tile0->getCond(),
1726       Tile0->getBody(),
1727       Tile1->getPreheader(),
1728       Tile1->getHeader(),
1729       Tile1->getCond(),
1730       Tile1->getBody(),
1731       BodyCode,
1732       Tile1->getLatch(),
1733       Tile1->getExit(),
1734       Tile1->getAfter(),
1735       Tile0->getLatch(),
1736       Tile0->getExit(),
1737       Tile0->getAfter(),
1738       Floor1->getLatch(),
1739       Floor1->getExit(),
1740       Floor1->getAfter(),
1741       Floor0->getLatch(),
1742       Floor0->getExit(),
1743       Floor0->getAfter(),
1744   };
1745   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1746   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1747 
1748   EXPECT_EQ(Call->getParent(), BodyCode);
1749 
1750   auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
1751   EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
1752   auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
1753   EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
1754   auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
1755   EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
1756   EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
1757   auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
1758   EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
1759   EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
1760   EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
1761 
1762   auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
1763   EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
1764   EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
1765   auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
1766   EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
1767   EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
1768   auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
1769   EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
1770   EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
1771   auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
1772   EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
1773   EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
1774   EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
1775 }
1776 
1777 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
1778   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1779   OpenMPIRBuilder OMPBuilder(*M);
1780   OMPBuilder.initialize();
1781   IRBuilder<> Builder(BB);
1782 
1783   // Create a loop, tile it, and extract its trip count. All input values are
1784   // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
1785   // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
1786   // do the same for the tile loop.
1787   auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1788                            bool IsSigned, bool InclusiveStop,
1789                            int64_t TileSize) -> uint64_t {
1790     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
1791     Type *LCTy = Type::getInt16Ty(Ctx);
1792     Value *StartVal = ConstantInt::get(LCTy, Start);
1793     Value *StopVal = ConstantInt::get(LCTy, Stop);
1794     Value *StepVal = ConstantInt::get(LCTy, Step);
1795 
1796     // Generate a loop.
1797     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
1798     CanonicalLoopInfo *Loop =
1799         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1800                                        StepVal, IsSigned, InclusiveStop);
1801     InsertPointTy AfterIP = Loop->getAfterIP();
1802 
1803     // Tile the loop.
1804     Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
1805     std::vector<CanonicalLoopInfo *> GenLoops =
1806         OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
1807 
1808     // Set the insertion pointer to after loop, where the next loop will be
1809     // emitted.
1810     Builder.restoreIP(AfterIP);
1811 
1812     // Extract the trip count.
1813     CanonicalLoopInfo *FloorLoop = GenLoops[0];
1814     Value *FloorTripCount = FloorLoop->getTripCount();
1815     return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
1816   };
1817 
1818   // Empty iteration domain.
1819   EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u);
1820   EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u);
1821   EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u);
1822   EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u);
1823   EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u);
1824 
1825   // Only complete tiles.
1826   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1827   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1828   EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u);
1829   EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u);
1830   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u);
1831   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u);
1832 
1833   // Only a partial tile.
1834   EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u);
1835   EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u);
1836   EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u);
1837   EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u);
1838   EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u);
1839 
1840   // Complete and partial tiles.
1841   EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u);
1842   EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u);
1843   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u);
1844   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u);
1845   EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u);
1846 
1847   // Close to 16-bit integer range.
1848   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu);
1849   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1);
1850   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1);
1851   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1);
1852   EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1);
1853   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u);
1854   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u);
1855 
1856   // Finalize the function.
1857   Builder.CreateRetVoid();
1858   OMPBuilder.finalize();
1859 
1860   EXPECT_FALSE(verifyModule(*M, &errs()));
1861 }
1862 
1863 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
1864   OpenMPIRBuilder OMPBuilder(*M);
1865   MapVector<Value *, Value *> AlignedVars;
1866   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1867 
1868   // Simd-ize the loop.
1869   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
1870                        OrderKind::OMP_ORDER_unknown,
1871                        /* Simdlen */ nullptr,
1872                        /* Safelen */ nullptr);
1873 
1874   OMPBuilder.finalize();
1875   EXPECT_FALSE(verifyModule(*M, &errs()));
1876 
1877   PassBuilder PB;
1878   FunctionAnalysisManager FAM;
1879   PB.registerFunctionAnalyses(FAM);
1880   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1881 
1882   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1883   EXPECT_EQ(TopLvl.size(), 1u);
1884 
1885   Loop *L = TopLvl.front();
1886   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1887   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1888 
1889   // Check for llvm.access.group metadata attached to the printf
1890   // function in the loop body.
1891   BasicBlock *LoopBody = CLI->getBody();
1892   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1893     return I.getMetadata("llvm.access.group") != nullptr;
1894   }));
1895 }
1896 
1897 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
1898   OpenMPIRBuilder OMPBuilder(*M);
1899   IRBuilder<> Builder(BB);
1900   const int AlignmentValue = 32;
1901   AllocaInst *Alloc1 =
1902       Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1));
1903   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
1904   MapVector<Value *, Value *> AlignedVars;
1905   AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)});
1906 
1907   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1908 
1909   // Simd-ize the loop.
1910   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
1911                        OrderKind::OMP_ORDER_unknown,
1912                        /* Simdlen */ nullptr,
1913                        /* Safelen */ nullptr);
1914 
1915   OMPBuilder.finalize();
1916   EXPECT_FALSE(verifyModule(*M, &errs()));
1917 
1918   PassBuilder PB;
1919   FunctionAnalysisManager FAM;
1920   PB.registerFunctionAnalyses(FAM);
1921   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1922 
1923   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1924   EXPECT_EQ(TopLvl.size(), 1u);
1925 
1926   Loop *L = TopLvl.front();
1927   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1928   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1929 
1930   // Check for llvm.access.group metadata attached to the printf
1931   // function in the loop body.
1932   BasicBlock *LoopBody = CLI->getBody();
1933   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1934     return I.getMetadata("llvm.access.group") != nullptr;
1935   }));
1936 
1937   // Check if number of assumption instructions is equal to number of aligned
1938   // variables
1939   BasicBlock *LoopPreheader = CLI->getPreheader();
1940   size_t NumAssummptionCallsInPreheader = count_if(
1941       *LoopPreheader, [](Instruction &I) { return isa<AssumeInst>(I); });
1942   EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size());
1943 
1944   // Check if variables are correctly aligned
1945   for (Instruction &Instr : *LoopPreheader) {
1946     if (!isa<AssumeInst>(Instr))
1947       continue;
1948     AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr);
1949     if (AssumeInstruction->getNumTotalBundleOperands()) {
1950       auto Bundle = AssumeInstruction->getOperandBundleAt(0);
1951       if (Bundle.getTagName() == "align") {
1952         EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1]));
1953         auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]);
1954         EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue);
1955       }
1956     }
1957   }
1958 }
1959 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
1960   OpenMPIRBuilder OMPBuilder(*M);
1961   MapVector<Value *, Value *> AlignedVars;
1962   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1963 
1964   // Simd-ize the loop.
1965   OMPBuilder.applySimd(CLI, AlignedVars,
1966                        /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
1967                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
1968                        /* Safelen */ nullptr);
1969 
1970   OMPBuilder.finalize();
1971   EXPECT_FALSE(verifyModule(*M, &errs()));
1972 
1973   PassBuilder PB;
1974   FunctionAnalysisManager FAM;
1975   PB.registerFunctionAnalyses(FAM);
1976   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1977 
1978   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1979   EXPECT_EQ(TopLvl.size(), 1u);
1980 
1981   Loop *L = TopLvl.front();
1982   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1983   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1984   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
1985 
1986   // Check for llvm.access.group metadata attached to the printf
1987   // function in the loop body.
1988   BasicBlock *LoopBody = CLI->getBody();
1989   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1990     return I.getMetadata("llvm.access.group") != nullptr;
1991   }));
1992 }
1993 
1994 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
1995   OpenMPIRBuilder OMPBuilder(*M);
1996   MapVector<Value *, Value *> AlignedVars;
1997 
1998   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1999 
2000   // Simd-ize the loop.
2001   OMPBuilder.applySimd(
2002       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
2003       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2004 
2005   OMPBuilder.finalize();
2006   EXPECT_FALSE(verifyModule(*M, &errs()));
2007 
2008   PassBuilder PB;
2009   FunctionAnalysisManager FAM;
2010   PB.registerFunctionAnalyses(FAM);
2011   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2012 
2013   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2014   EXPECT_EQ(TopLvl.size(), 1u);
2015 
2016   Loop *L = TopLvl.front();
2017   // Parallel metadata shoudl be attached because of presence of
2018   // the order(concurrent) OpenMP clause
2019   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2020   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2021   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2022 
2023   // Check for llvm.access.group metadata attached to the printf
2024   // function in the loop body.
2025   BasicBlock *LoopBody = CLI->getBody();
2026   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2027     return I.getMetadata("llvm.access.group") != nullptr;
2028   }));
2029 }
2030 
2031 TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
2032   OpenMPIRBuilder OMPBuilder(*M);
2033   MapVector<Value *, Value *> AlignedVars;
2034 
2035   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2036 
2037   OMPBuilder.applySimd(
2038       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
2039       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2040 
2041   OMPBuilder.finalize();
2042   EXPECT_FALSE(verifyModule(*M, &errs()));
2043 
2044   PassBuilder PB;
2045   FunctionAnalysisManager FAM;
2046   PB.registerFunctionAnalyses(FAM);
2047   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2048 
2049   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2050   EXPECT_EQ(TopLvl.size(), 1u);
2051 
2052   Loop *L = TopLvl.front();
2053   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2054   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2055   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2056 
2057   // Check for llvm.access.group metadata attached to the printf
2058   // function in the loop body.
2059   BasicBlock *LoopBody = CLI->getBody();
2060   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2061     return I.getMetadata("llvm.access.group") != nullptr;
2062   }));
2063 }
2064 
2065 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
2066   OpenMPIRBuilder OMPBuilder(*M);
2067   MapVector<Value *, Value *> AlignedVars;
2068 
2069   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2070 
2071   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
2072                        OrderKind::OMP_ORDER_unknown,
2073                        ConstantInt::get(Type::getInt32Ty(Ctx), 2),
2074                        ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2075 
2076   OMPBuilder.finalize();
2077   EXPECT_FALSE(verifyModule(*M, &errs()));
2078 
2079   PassBuilder PB;
2080   FunctionAnalysisManager FAM;
2081   PB.registerFunctionAnalyses(FAM);
2082   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2083 
2084   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2085   EXPECT_EQ(TopLvl.size(), 1u);
2086 
2087   Loop *L = TopLvl.front();
2088   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2089   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2090   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2);
2091 
2092   // Check for llvm.access.group metadata attached to the printf
2093   // function in the loop body.
2094   BasicBlock *LoopBody = CLI->getBody();
2095   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2096     return I.getMetadata("llvm.access.group") != nullptr;
2097   }));
2098 }
2099 
2100 TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) {
2101   OpenMPIRBuilder OMPBuilder(*M);
2102   IRBuilder<> Builder(BB);
2103   MapVector<Value *, Value *> AlignedVars;
2104   AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
2105   AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty());
2106 
2107   // Generation of if condition
2108   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1);
2109   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2);
2110   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
2111   LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2);
2112 
2113   Value *IfCmp = Builder.CreateICmpNE(Load1, Load2);
2114 
2115   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2116 
2117   // Simd-ize the loop with if condition
2118   OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown,
2119                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
2120                        /* Safelen */ nullptr);
2121 
2122   OMPBuilder.finalize();
2123   EXPECT_FALSE(verifyModule(*M, &errs()));
2124 
2125   PassBuilder PB;
2126   FunctionAnalysisManager FAM;
2127   PB.registerFunctionAnalyses(FAM);
2128   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2129 
2130   // Check if there are two loops (one with enabled vectorization)
2131   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2132   EXPECT_EQ(TopLvl.size(), 2u);
2133 
2134   Loop *L = TopLvl[0];
2135   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2136   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2137   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2138 
2139   // The second loop should have disabled vectorization
2140   L = TopLvl[1];
2141   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2142   EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2143   // Check for llvm.access.group metadata attached to the printf
2144   // function in the loop body.
2145   BasicBlock *LoopBody = CLI->getBody();
2146   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2147     return I.getMetadata("llvm.access.group") != nullptr;
2148   }));
2149 }
2150 
2151 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
2152   OpenMPIRBuilder OMPBuilder(*M);
2153 
2154   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2155 
2156   // Unroll the loop.
2157   OMPBuilder.unrollLoopFull(DL, CLI);
2158 
2159   OMPBuilder.finalize();
2160   EXPECT_FALSE(verifyModule(*M, &errs()));
2161 
2162   PassBuilder PB;
2163   FunctionAnalysisManager FAM;
2164   PB.registerFunctionAnalyses(FAM);
2165   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2166 
2167   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2168   EXPECT_EQ(TopLvl.size(), 1u);
2169 
2170   Loop *L = TopLvl.front();
2171   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2172   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full"));
2173 }
2174 
2175 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
2176   OpenMPIRBuilder OMPBuilder(*M);
2177   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2178 
2179   // Unroll the loop.
2180   CanonicalLoopInfo *UnrolledLoop = nullptr;
2181   OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop);
2182   ASSERT_NE(UnrolledLoop, nullptr);
2183 
2184   OMPBuilder.finalize();
2185   EXPECT_FALSE(verifyModule(*M, &errs()));
2186   UnrolledLoop->assertOK();
2187 
2188   PassBuilder PB;
2189   FunctionAnalysisManager FAM;
2190   PB.registerFunctionAnalyses(FAM);
2191   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2192 
2193   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2194   EXPECT_EQ(TopLvl.size(), 1u);
2195   Loop *Outer = TopLvl.front();
2196   EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader());
2197   EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch());
2198   EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond());
2199   EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit());
2200 
2201   EXPECT_EQ(Outer->getSubLoops().size(), 1u);
2202   Loop *Inner = Outer->getSubLoops().front();
2203 
2204   EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable"));
2205   EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5);
2206 }
2207 
2208 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
2209   OpenMPIRBuilder OMPBuilder(*M);
2210 
2211   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2212 
2213   // Unroll the loop.
2214   OMPBuilder.unrollLoopHeuristic(DL, CLI);
2215 
2216   OMPBuilder.finalize();
2217   EXPECT_FALSE(verifyModule(*M, &errs()));
2218 
2219   PassBuilder PB;
2220   FunctionAnalysisManager FAM;
2221   PB.registerFunctionAnalyses(FAM);
2222   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2223 
2224   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2225   EXPECT_EQ(TopLvl.size(), 1u);
2226 
2227   Loop *L = TopLvl.front();
2228   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2229 }
2230 
2231 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) {
2232   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2233   std::string oldDLStr = M->getDataLayoutStr();
2234   M->setDataLayout(
2235       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
2236       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
2237       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
2238   OpenMPIRBuilder OMPBuilder(*M);
2239   OMPBuilder.Config.IsTargetDevice = true;
2240   OMPBuilder.initialize();
2241   IRBuilder<> Builder(BB);
2242   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2243   InsertPointTy AllocaIP = Builder.saveIP();
2244 
2245   Type *LCTy = Type::getInt32Ty(Ctx);
2246   Value *StartVal = ConstantInt::get(LCTy, 10);
2247   Value *StopVal = ConstantInt::get(LCTy, 52);
2248   Value *StepVal = ConstantInt::get(LCTy, 2);
2249   auto LoopBodyGen = [&](InsertPointTy, Value *) {};
2250 
2251   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2252       Loc, LoopBodyGen, StartVal, StopVal, StepVal, false, false);
2253   BasicBlock *Preheader = CLI->getPreheader();
2254   Value *TripCount = CLI->getTripCount();
2255 
2256   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2257 
2258   IRBuilder<>::InsertPoint AfterIP = OMPBuilder.applyWorkshareLoop(
2259       DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static, nullptr, false, false,
2260       false, false, WorksharingLoopType::ForStaticLoop);
2261   Builder.restoreIP(AfterIP);
2262   Builder.CreateRetVoid();
2263 
2264   OMPBuilder.finalize();
2265   EXPECT_FALSE(verifyModule(*M, &errs()));
2266 
2267   CallInst *WorkshareLoopRuntimeCall = nullptr;
2268   int WorkshareLoopRuntimeCallCnt = 0;
2269   for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) {
2270     CallInst *Call = dyn_cast<CallInst>(Inst);
2271     if (!Call)
2272       continue;
2273     if (!Call->getCalledFunction())
2274       continue;
2275 
2276     if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") {
2277       WorkshareLoopRuntimeCall = Call;
2278       WorkshareLoopRuntimeCallCnt++;
2279     }
2280   }
2281   EXPECT_NE(WorkshareLoopRuntimeCall, nullptr);
2282   // Verify that there is only one call to workshare loop function
2283   EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1);
2284   // Check that pointer to loop body function is passed as second argument
2285   Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1);
2286   EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType());
2287   Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg);
2288   EXPECT_NE(ArgFunction, nullptr);
2289   EXPECT_EQ(ArgFunction->arg_size(), 1u);
2290   EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType());
2291   // Check that no variables except for loop counter are used in loop body
2292   EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()),
2293             WorkshareLoopRuntimeCall->getArgOperand(2));
2294   // Check loop trip count argument
2295   EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3));
2296 }
2297 
2298 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
2299   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2300   OpenMPIRBuilder OMPBuilder(*M);
2301   OMPBuilder.Config.IsTargetDevice = false;
2302   OMPBuilder.initialize();
2303   IRBuilder<> Builder(BB);
2304   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2305 
2306   Type *LCTy = Type::getInt32Ty(Ctx);
2307   Value *StartVal = ConstantInt::get(LCTy, 10);
2308   Value *StopVal = ConstantInt::get(LCTy, 52);
2309   Value *StepVal = ConstantInt::get(LCTy, 2);
2310   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2311 
2312   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2313       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2314       /*IsSigned=*/false, /*InclusiveStop=*/false);
2315   BasicBlock *Preheader = CLI->getPreheader();
2316   BasicBlock *Body = CLI->getBody();
2317   Value *IV = CLI->getIndVar();
2318   BasicBlock *ExitBlock = CLI->getExit();
2319 
2320   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2321   InsertPointTy AllocaIP = Builder.saveIP();
2322 
2323   OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
2324                                 OMP_SCHEDULE_Static);
2325 
2326   BasicBlock *Cond = Body->getSinglePredecessor();
2327   Instruction *Cmp = &*Cond->begin();
2328   Value *TripCount = Cmp->getOperand(1);
2329 
2330   auto AllocaIter = BB->begin();
2331   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2332   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2333   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2334   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2335   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2336   EXPECT_NE(PLastIter, nullptr);
2337   EXPECT_NE(PLowerBound, nullptr);
2338   EXPECT_NE(PUpperBound, nullptr);
2339   EXPECT_NE(PStride, nullptr);
2340 
2341   auto PreheaderIter = Preheader->begin();
2342   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7);
2343   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2344   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2345   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2346   ASSERT_NE(LowerBoundStore, nullptr);
2347   ASSERT_NE(UpperBoundStore, nullptr);
2348   ASSERT_NE(StrideStore, nullptr);
2349 
2350   auto *OrigLowerBound =
2351       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2352   auto *OrigUpperBound =
2353       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2354   auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2355   ASSERT_NE(OrigLowerBound, nullptr);
2356   ASSERT_NE(OrigUpperBound, nullptr);
2357   ASSERT_NE(OrigStride, nullptr);
2358   EXPECT_EQ(OrigLowerBound->getValue(), 0);
2359   EXPECT_EQ(OrigUpperBound->getValue(), 20);
2360   EXPECT_EQ(OrigStride->getValue(), 1);
2361 
2362   // Check that the loop IV is updated to account for the lower bound returned
2363   // by the OpenMP runtime call.
2364   BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front());
2365   EXPECT_EQ(Add->getOperand(0), IV);
2366   auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
2367   ASSERT_NE(LoadedLowerBound, nullptr);
2368   EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
2369 
2370   // Check that the trip count is updated to account for the lower and upper
2371   // bounds return by the OpenMP runtime call.
2372   auto *AddOne = dyn_cast<Instruction>(TripCount);
2373   ASSERT_NE(AddOne, nullptr);
2374   ASSERT_TRUE(AddOne->isBinaryOp());
2375   auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
2376   ASSERT_NE(One, nullptr);
2377   EXPECT_EQ(One->getValue(), 1);
2378   auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0));
2379   ASSERT_NE(Difference, nullptr);
2380   ASSERT_TRUE(Difference->isBinaryOp());
2381   EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound);
2382   auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0));
2383   ASSERT_NE(LoadedUpperBound, nullptr);
2384   EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound);
2385 
2386   // The original loop iterator should only be used in the condition, in the
2387   // increment and in the statement that adds the lower bound to it.
2388   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2389 
2390   // The exit block should contain the "fini" call and the barrier call,
2391   // plus the call to obtain the thread ID.
2392   size_t NumCallsInExitBlock =
2393       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2394   EXPECT_EQ(NumCallsInExitBlock, 3u);
2395 }
2396 
2397 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
2398   unsigned IVBits = GetParam();
2399 
2400   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2401   OpenMPIRBuilder OMPBuilder(*M);
2402   OMPBuilder.Config.IsTargetDevice = false;
2403 
2404   BasicBlock *Body;
2405   CallInst *Call;
2406   CanonicalLoopInfo *CLI =
2407       buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body);
2408 
2409   Instruction *OrigIndVar = CLI->getIndVar();
2410   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
2411 
2412   Type *LCTy = Type::getInt32Ty(Ctx);
2413   Value *ChunkSize = ConstantInt::get(LCTy, 5);
2414   InsertPointTy AllocaIP{&F->getEntryBlock(),
2415                          F->getEntryBlock().getFirstInsertionPt()};
2416   OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
2417                                 OMP_SCHEDULE_Static, ChunkSize);
2418 
2419   OMPBuilder.finalize();
2420   EXPECT_FALSE(verifyModule(*M, &errs()));
2421 
2422   BasicBlock *Entry = &F->getEntryBlock();
2423   BasicBlock *Preheader = Entry->getSingleSuccessor();
2424 
2425   BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor();
2426   BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor();
2427   BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor();
2428   BasicBlock *DispatchBody = succ_begin(DispatchCond)[0];
2429   BasicBlock *DispatchExit = succ_begin(DispatchCond)[1];
2430   BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor();
2431   BasicBlock *Return = DispatchAfter->getSingleSuccessor();
2432 
2433   BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor();
2434   BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor();
2435   BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor();
2436   BasicBlock *ChunkBody = succ_begin(ChunkCond)[0];
2437   BasicBlock *ChunkExit = succ_begin(ChunkCond)[1];
2438   BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor();
2439   BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor();
2440 
2441   BasicBlock *DispatchInc = ChunkAfter;
2442 
2443   EXPECT_EQ(ChunkBody, Body);
2444   EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader);
2445   EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader);
2446 
2447   EXPECT_TRUE(isa<ReturnInst>(Return->front()));
2448 
2449   Value *NewIV = Call->getOperand(1);
2450   EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits);
2451 
2452   CallInst *InitCall = findSingleCall(
2453       F,
2454       (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u
2455                     : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u,
2456       OMPBuilder);
2457   EXPECT_EQ(InitCall->getParent(), Preheader);
2458   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33);
2459   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1);
2460   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5);
2461 
2462   CallInst *FiniCall = findSingleCall(
2463       F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder);
2464   EXPECT_EQ(FiniCall->getParent(), DispatchExit);
2465 
2466   CallInst *BarrierCall = findSingleCall(
2467       F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder);
2468   EXPECT_EQ(BarrierCall->getParent(), DispatchExit);
2469 }
2470 
2471 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits,
2472                          ::testing::Values(8, 16, 32, 64));
2473 
2474 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
2475   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2476   OpenMPIRBuilder OMPBuilder(*M);
2477   OMPBuilder.Config.IsTargetDevice = false;
2478   OMPBuilder.initialize();
2479   IRBuilder<> Builder(BB);
2480   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2481 
2482   omp::OMPScheduleType SchedType = GetParam();
2483   uint32_t ChunkSize = 1;
2484   switch (SchedType & ~OMPScheduleType::ModifierMask) {
2485   case omp::OMPScheduleType::BaseDynamicChunked:
2486   case omp::OMPScheduleType::BaseGuidedChunked:
2487     ChunkSize = 7;
2488     break;
2489   case omp::OMPScheduleType::BaseAuto:
2490   case omp::OMPScheduleType::BaseRuntime:
2491     ChunkSize = 1;
2492     break;
2493   default:
2494     assert(0 && "unknown type for this test");
2495     break;
2496   }
2497 
2498   Type *LCTy = Type::getInt32Ty(Ctx);
2499   Value *StartVal = ConstantInt::get(LCTy, 10);
2500   Value *StopVal = ConstantInt::get(LCTy, 52);
2501   Value *StepVal = ConstantInt::get(LCTy, 2);
2502   Value *ChunkVal =
2503       (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize);
2504   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2505 
2506   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2507       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2508       /*IsSigned=*/false, /*InclusiveStop=*/false);
2509 
2510   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2511   InsertPointTy AllocaIP = Builder.saveIP();
2512 
2513   // Collect all the info from CLI, as it isn't usable after the call to
2514   // createDynamicWorkshareLoop.
2515   InsertPointTy AfterIP = CLI->getAfterIP();
2516   BasicBlock *Preheader = CLI->getPreheader();
2517   BasicBlock *ExitBlock = CLI->getExit();
2518   BasicBlock *LatchBlock = CLI->getLatch();
2519   Value *IV = CLI->getIndVar();
2520 
2521   InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
2522       DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType),
2523       ChunkVal, /*Simd=*/false,
2524       (SchedType & omp::OMPScheduleType::ModifierMonotonic) ==
2525           omp::OMPScheduleType::ModifierMonotonic,
2526       (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) ==
2527           omp::OMPScheduleType::ModifierNonmonotonic,
2528       /*Ordered=*/false);
2529 
2530   // The returned value should be the "after" point.
2531   ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
2532   ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
2533 
2534   auto AllocaIter = BB->begin();
2535   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2536   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2537   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2538   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2539   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2540   EXPECT_NE(PLastIter, nullptr);
2541   EXPECT_NE(PLowerBound, nullptr);
2542   EXPECT_NE(PUpperBound, nullptr);
2543   EXPECT_NE(PStride, nullptr);
2544 
2545   auto PreheaderIter = Preheader->begin();
2546   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6);
2547   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2548   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2549   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2550   ASSERT_NE(LowerBoundStore, nullptr);
2551   ASSERT_NE(UpperBoundStore, nullptr);
2552   ASSERT_NE(StrideStore, nullptr);
2553 
2554   CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++));
2555   ASSERT_NE(ThreadIdCall, nullptr);
2556   EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(),
2557             "__kmpc_global_thread_num");
2558 
2559   CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter);
2560 
2561   ASSERT_NE(InitCall, nullptr);
2562   EXPECT_EQ(InitCall->getCalledFunction()->getName(),
2563             "__kmpc_dispatch_init_4u");
2564   EXPECT_EQ(InitCall->arg_size(), 7U);
2565   EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize));
2566   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2567   if ((SchedType & OMPScheduleType::MonotonicityMask) ==
2568       OMPScheduleType::None) {
2569     // Implementation is allowed to add default nonmonotonicity flag
2570     EXPECT_EQ(
2571         static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) |
2572             OMPScheduleType::ModifierNonmonotonic,
2573         SchedType | OMPScheduleType::ModifierNonmonotonic);
2574   } else {
2575     EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()),
2576               SchedType);
2577   }
2578 
2579   ConstantInt *OrigLowerBound =
2580       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2581   ConstantInt *OrigUpperBound =
2582       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2583   ConstantInt *OrigStride =
2584       dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2585   ASSERT_NE(OrigLowerBound, nullptr);
2586   ASSERT_NE(OrigUpperBound, nullptr);
2587   ASSERT_NE(OrigStride, nullptr);
2588   EXPECT_EQ(OrigLowerBound->getValue(), 1);
2589   EXPECT_EQ(OrigUpperBound->getValue(), 21);
2590   EXPECT_EQ(OrigStride->getValue(), 1);
2591 
2592   CallInst *FiniCall = dyn_cast<CallInst>(
2593       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2594   EXPECT_EQ(FiniCall, nullptr);
2595 
2596   // The original loop iterator should only be used in the condition, in the
2597   // increment and in the statement that adds the lower bound to it.
2598   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2599 
2600   // The exit block should contain the barrier call, plus the call to obtain
2601   // the thread ID.
2602   size_t NumCallsInExitBlock =
2603       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2604   EXPECT_EQ(NumCallsInExitBlock, 2u);
2605 
2606   // Add a termination to our block and check that it is internally consistent.
2607   Builder.restoreIP(EndIP);
2608   Builder.CreateRetVoid();
2609   OMPBuilder.finalize();
2610   EXPECT_FALSE(verifyModule(*M, &errs()));
2611 }
2612 
2613 INSTANTIATE_TEST_SUITE_P(
2614     OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams,
2615     ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked,
2616                       omp::OMPScheduleType::UnorderedGuidedChunked,
2617                       omp::OMPScheduleType::UnorderedAuto,
2618                       omp::OMPScheduleType::UnorderedRuntime,
2619                       omp::OMPScheduleType::UnorderedDynamicChunked |
2620                           omp::OMPScheduleType::ModifierMonotonic,
2621                       omp::OMPScheduleType::UnorderedDynamicChunked |
2622                           omp::OMPScheduleType::ModifierNonmonotonic,
2623                       omp::OMPScheduleType::UnorderedGuidedChunked |
2624                           omp::OMPScheduleType::ModifierMonotonic,
2625                       omp::OMPScheduleType::UnorderedGuidedChunked |
2626                           omp::OMPScheduleType::ModifierNonmonotonic,
2627                       omp::OMPScheduleType::UnorderedAuto |
2628                           omp::OMPScheduleType::ModifierMonotonic,
2629                       omp::OMPScheduleType::UnorderedRuntime |
2630                           omp::OMPScheduleType::ModifierMonotonic));
2631 
2632 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
2633   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2634   OpenMPIRBuilder OMPBuilder(*M);
2635   OMPBuilder.Config.IsTargetDevice = false;
2636   OMPBuilder.initialize();
2637   IRBuilder<> Builder(BB);
2638   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2639 
2640   uint32_t ChunkSize = 1;
2641   Type *LCTy = Type::getInt32Ty(Ctx);
2642   Value *StartVal = ConstantInt::get(LCTy, 10);
2643   Value *StopVal = ConstantInt::get(LCTy, 52);
2644   Value *StepVal = ConstantInt::get(LCTy, 2);
2645   Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
2646   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2647 
2648   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2649       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2650       /*IsSigned=*/false, /*InclusiveStop=*/false);
2651 
2652   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2653   InsertPointTy AllocaIP = Builder.saveIP();
2654 
2655   // Collect all the info from CLI, as it isn't usable after the call to
2656   // createDynamicWorkshareLoop.
2657   BasicBlock *Preheader = CLI->getPreheader();
2658   BasicBlock *ExitBlock = CLI->getExit();
2659   BasicBlock *LatchBlock = CLI->getLatch();
2660   Value *IV = CLI->getIndVar();
2661 
2662   InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
2663       DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkVal,
2664       /*HasSimdModifier=*/false, /*HasMonotonicModifier=*/false,
2665       /*HasNonmonotonicModifier=*/false,
2666       /*HasOrderedClause=*/true);
2667 
2668   // Add a termination to our block and check that it is internally consistent.
2669   Builder.restoreIP(EndIP);
2670   Builder.CreateRetVoid();
2671   OMPBuilder.finalize();
2672   EXPECT_FALSE(verifyModule(*M, &errs()));
2673 
2674   CallInst *InitCall = nullptr;
2675   for (Instruction &EI : *Preheader) {
2676     Instruction *Cur = &EI;
2677     if (isa<CallInst>(Cur)) {
2678       InitCall = cast<CallInst>(Cur);
2679       if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u")
2680         break;
2681       InitCall = nullptr;
2682     }
2683   }
2684   EXPECT_NE(InitCall, nullptr);
2685   EXPECT_EQ(InitCall->arg_size(), 7U);
2686   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2687   EXPECT_EQ(SchedVal->getValue(),
2688             static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked));
2689 
2690   CallInst *FiniCall = dyn_cast<CallInst>(
2691       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2692   ASSERT_NE(FiniCall, nullptr);
2693   EXPECT_EQ(FiniCall->getCalledFunction()->getName(),
2694             "__kmpc_dispatch_fini_4u");
2695   EXPECT_EQ(FiniCall->arg_size(), 2U);
2696   EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0));
2697   EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1));
2698 
2699   // The original loop iterator should only be used in the condition, in the
2700   // increment and in the statement that adds the lower bound to it.
2701   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2702 
2703   // The exit block should contain the barrier call, plus the call to obtain
2704   // the thread ID.
2705   size_t NumCallsInExitBlock =
2706       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2707   EXPECT_EQ(NumCallsInExitBlock, 2u);
2708 }
2709 
2710 TEST_F(OpenMPIRBuilderTest, MasterDirective) {
2711   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2712   OpenMPIRBuilder OMPBuilder(*M);
2713   OMPBuilder.initialize();
2714   F->setName("func");
2715   IRBuilder<> Builder(BB);
2716 
2717   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2718 
2719   AllocaInst *PrivAI = nullptr;
2720 
2721   BasicBlock *EntryBB = nullptr;
2722   BasicBlock *ThenBB = nullptr;
2723 
2724   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2725     if (AllocaIP.isSet())
2726       Builder.restoreIP(AllocaIP);
2727     else
2728       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2729     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2730     Builder.CreateStore(F->arg_begin(), PrivAI);
2731 
2732     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2733     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2734     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2735 
2736     Builder.restoreIP(CodeGenIP);
2737 
2738     // collect some info for checks later
2739     ThenBB = Builder.GetInsertBlock();
2740     EntryBB = ThenBB->getUniquePredecessor();
2741 
2742     // simple instructions for body
2743     Value *PrivLoad =
2744         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2745     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2746   };
2747 
2748   auto FiniCB = [&](InsertPointTy IP) {
2749     BasicBlock *IPBB = IP.getBlock();
2750     EXPECT_NE(IPBB->end(), IP.getPoint());
2751   };
2752 
2753   Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
2754   Value *EntryBBTI = EntryBB->getTerminator();
2755   EXPECT_NE(EntryBBTI, nullptr);
2756   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2757   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2758   EXPECT_TRUE(EntryBr->isConditional());
2759   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2760   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2761   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2762 
2763   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2764   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2765 
2766   CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0));
2767   EXPECT_EQ(MasterEntryCI->arg_size(), 2U);
2768   EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master");
2769   EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0)));
2770 
2771   CallInst *MasterEndCI = nullptr;
2772   for (auto &FI : *ThenBB) {
2773     Instruction *cur = &FI;
2774     if (isa<CallInst>(cur)) {
2775       MasterEndCI = cast<CallInst>(cur);
2776       if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master")
2777         break;
2778       MasterEndCI = nullptr;
2779     }
2780   }
2781   EXPECT_NE(MasterEndCI, nullptr);
2782   EXPECT_EQ(MasterEndCI->arg_size(), 2U);
2783   EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0)));
2784   EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1));
2785 }
2786 
2787 TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
2788   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2789   OpenMPIRBuilder OMPBuilder(*M);
2790   OMPBuilder.initialize();
2791   F->setName("func");
2792   IRBuilder<> Builder(BB);
2793 
2794   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2795 
2796   AllocaInst *PrivAI = nullptr;
2797 
2798   BasicBlock *EntryBB = nullptr;
2799   BasicBlock *ThenBB = nullptr;
2800 
2801   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2802     if (AllocaIP.isSet())
2803       Builder.restoreIP(AllocaIP);
2804     else
2805       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2806     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2807     Builder.CreateStore(F->arg_begin(), PrivAI);
2808 
2809     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2810     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2811     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2812 
2813     Builder.restoreIP(CodeGenIP);
2814 
2815     // collect some info for checks later
2816     ThenBB = Builder.GetInsertBlock();
2817     EntryBB = ThenBB->getUniquePredecessor();
2818 
2819     // simple instructions for body
2820     Value *PrivLoad =
2821         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2822     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2823   };
2824 
2825   auto FiniCB = [&](InsertPointTy IP) {
2826     BasicBlock *IPBB = IP.getBlock();
2827     EXPECT_NE(IPBB->end(), IP.getPoint());
2828   };
2829 
2830   Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
2831   Builder.restoreIP(
2832       OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, Filter));
2833   Value *EntryBBTI = EntryBB->getTerminator();
2834   EXPECT_NE(EntryBBTI, nullptr);
2835   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2836   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2837   EXPECT_TRUE(EntryBr->isConditional());
2838   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2839   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2840   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2841 
2842   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2843   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2844 
2845   CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0));
2846   EXPECT_EQ(MaskedEntryCI->arg_size(), 3U);
2847   EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked");
2848   EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0)));
2849 
2850   CallInst *MaskedEndCI = nullptr;
2851   for (auto &FI : *ThenBB) {
2852     Instruction *cur = &FI;
2853     if (isa<CallInst>(cur)) {
2854       MaskedEndCI = cast<CallInst>(cur);
2855       if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked")
2856         break;
2857       MaskedEndCI = nullptr;
2858     }
2859   }
2860   EXPECT_NE(MaskedEndCI, nullptr);
2861   EXPECT_EQ(MaskedEndCI->arg_size(), 2U);
2862   EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0)));
2863   EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1));
2864 }
2865 
2866 TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
2867   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2868   OpenMPIRBuilder OMPBuilder(*M);
2869   OMPBuilder.initialize();
2870   F->setName("func");
2871   IRBuilder<> Builder(BB);
2872 
2873   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2874 
2875   AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2876 
2877   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2878     // actual start for bodyCB
2879     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2880     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2881     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2882 
2883     // body begin
2884     Builder.restoreIP(CodeGenIP);
2885     Builder.CreateStore(F->arg_begin(), PrivAI);
2886     Value *PrivLoad =
2887         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2888     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2889   };
2890 
2891   auto FiniCB = [&](InsertPointTy IP) {
2892     BasicBlock *IPBB = IP.getBlock();
2893     EXPECT_NE(IPBB->end(), IP.getPoint());
2894   };
2895   BasicBlock *EntryBB = Builder.GetInsertBlock();
2896 
2897   Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB,
2898                                               "testCRT", nullptr));
2899 
2900   CallInst *CriticalEntryCI = nullptr;
2901   for (auto &EI : *EntryBB) {
2902     Instruction *cur = &EI;
2903     if (isa<CallInst>(cur)) {
2904       CriticalEntryCI = cast<CallInst>(cur);
2905       if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical")
2906         break;
2907       CriticalEntryCI = nullptr;
2908     }
2909   }
2910   EXPECT_NE(CriticalEntryCI, nullptr);
2911   EXPECT_EQ(CriticalEntryCI->arg_size(), 3U);
2912   EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical");
2913   EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0)));
2914 
2915   CallInst *CriticalEndCI = nullptr;
2916   for (auto &FI : *EntryBB) {
2917     Instruction *cur = &FI;
2918     if (isa<CallInst>(cur)) {
2919       CriticalEndCI = cast<CallInst>(cur);
2920       if (CriticalEndCI->getCalledFunction()->getName() ==
2921           "__kmpc_end_critical")
2922         break;
2923       CriticalEndCI = nullptr;
2924     }
2925   }
2926   EXPECT_NE(CriticalEndCI, nullptr);
2927   EXPECT_EQ(CriticalEndCI->arg_size(), 3U);
2928   EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0)));
2929   EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1));
2930   PointerType *CriticalNamePtrTy =
2931       PointerType::getUnqual(ArrayType::get(Type::getInt32Ty(Ctx), 8));
2932   EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2));
2933   GlobalVariable *GV =
2934       dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2));
2935   ASSERT_NE(GV, nullptr);
2936   EXPECT_EQ(GV->getType(), CriticalNamePtrTy);
2937   const DataLayout &DL = M->getDataLayout();
2938   const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy);
2939   const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace());
2940   if (const llvm::MaybeAlign Alignment = GV->getAlign())
2941     EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign));
2942 }
2943 
2944 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
2945   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2946   OpenMPIRBuilder OMPBuilder(*M);
2947   OMPBuilder.initialize();
2948   F->setName("func");
2949   IRBuilder<> Builder(BB);
2950   LLVMContext &Ctx = M->getContext();
2951 
2952   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2953 
2954   InsertPointTy AllocaIP(&F->getEntryBlock(),
2955                          F->getEntryBlock().getFirstInsertionPt());
2956 
2957   unsigned NumLoops = 2;
2958   SmallVector<Value *, 2> StoreValues;
2959   Type *LCTy = Type::getInt64Ty(Ctx);
2960   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
2961   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
2962 
2963   // Test for "#omp ordered depend(source)"
2964   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
2965                                                    StoreValues, ".cnt.addr",
2966                                                    /*IsDependSource=*/true));
2967 
2968   Builder.CreateRetVoid();
2969   OMPBuilder.finalize();
2970   EXPECT_FALSE(verifyModule(*M, &errs()));
2971 
2972   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
2973   ASSERT_NE(AllocInst, nullptr);
2974   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
2975   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
2976   EXPECT_TRUE(
2977       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
2978 
2979   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
2980   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
2981     GetElementPtrInst *DependAddrGEPIter =
2982         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2983     ASSERT_NE(DependAddrGEPIter, nullptr);
2984     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
2985     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
2986     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
2987     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
2988     ASSERT_NE(FirstIdx, nullptr);
2989     ASSERT_NE(SecondIdx, nullptr);
2990     EXPECT_EQ(FirstIdx->getValue(), 0);
2991     EXPECT_EQ(SecondIdx->getValue(), Iter);
2992     StoreInst *StoreValue =
2993         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
2994     ASSERT_NE(StoreValue, nullptr);
2995     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
2996     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
2997     EXPECT_EQ(StoreValue->getAlign(), Align(8));
2998     IterInst = dyn_cast<Instruction>(StoreValue);
2999   }
3000 
3001   GetElementPtrInst *DependBaseAddrGEP =
3002       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3003   ASSERT_NE(DependBaseAddrGEP, nullptr);
3004   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3005   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3006   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3007   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3008   ASSERT_NE(FirstIdx, nullptr);
3009   ASSERT_NE(SecondIdx, nullptr);
3010   EXPECT_EQ(FirstIdx->getValue(), 0);
3011   EXPECT_EQ(SecondIdx->getValue(), 0);
3012 
3013   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3014   ASSERT_NE(GTID, nullptr);
3015   EXPECT_EQ(GTID->arg_size(), 1U);
3016   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3017   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3018   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3019 
3020   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3021   ASSERT_NE(Depend, nullptr);
3022   EXPECT_EQ(Depend->arg_size(), 3U);
3023   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post");
3024   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3025   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3026   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3027 }
3028 
3029 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
3030   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3031   OpenMPIRBuilder OMPBuilder(*M);
3032   OMPBuilder.initialize();
3033   F->setName("func");
3034   IRBuilder<> Builder(BB);
3035   LLVMContext &Ctx = M->getContext();
3036 
3037   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3038 
3039   InsertPointTy AllocaIP(&F->getEntryBlock(),
3040                          F->getEntryBlock().getFirstInsertionPt());
3041 
3042   unsigned NumLoops = 2;
3043   SmallVector<Value *, 2> StoreValues;
3044   Type *LCTy = Type::getInt64Ty(Ctx);
3045   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
3046   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
3047 
3048   // Test for "#omp ordered depend(sink: vec)"
3049   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
3050                                                    StoreValues, ".cnt.addr",
3051                                                    /*IsDependSource=*/false));
3052 
3053   Builder.CreateRetVoid();
3054   OMPBuilder.finalize();
3055   EXPECT_FALSE(verifyModule(*M, &errs()));
3056 
3057   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
3058   ASSERT_NE(AllocInst, nullptr);
3059   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
3060   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
3061   EXPECT_TRUE(
3062       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
3063 
3064   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
3065   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
3066     GetElementPtrInst *DependAddrGEPIter =
3067         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3068     ASSERT_NE(DependAddrGEPIter, nullptr);
3069     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
3070     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
3071     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
3072     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
3073     ASSERT_NE(FirstIdx, nullptr);
3074     ASSERT_NE(SecondIdx, nullptr);
3075     EXPECT_EQ(FirstIdx->getValue(), 0);
3076     EXPECT_EQ(SecondIdx->getValue(), Iter);
3077     StoreInst *StoreValue =
3078         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
3079     ASSERT_NE(StoreValue, nullptr);
3080     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
3081     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
3082     EXPECT_EQ(StoreValue->getAlign(), Align(8));
3083     IterInst = dyn_cast<Instruction>(StoreValue);
3084   }
3085 
3086   GetElementPtrInst *DependBaseAddrGEP =
3087       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3088   ASSERT_NE(DependBaseAddrGEP, nullptr);
3089   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3090   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3091   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3092   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3093   ASSERT_NE(FirstIdx, nullptr);
3094   ASSERT_NE(SecondIdx, nullptr);
3095   EXPECT_EQ(FirstIdx->getValue(), 0);
3096   EXPECT_EQ(SecondIdx->getValue(), 0);
3097 
3098   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3099   ASSERT_NE(GTID, nullptr);
3100   EXPECT_EQ(GTID->arg_size(), 1U);
3101   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3102   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3103   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3104 
3105   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3106   ASSERT_NE(Depend, nullptr);
3107   EXPECT_EQ(Depend->arg_size(), 3U);
3108   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait");
3109   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3110   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3111   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3112 }
3113 
3114 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
3115   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3116   OpenMPIRBuilder OMPBuilder(*M);
3117   OMPBuilder.initialize();
3118   F->setName("func");
3119   IRBuilder<> Builder(BB);
3120 
3121   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3122 
3123   AllocaInst *PrivAI =
3124       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3125 
3126   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3127     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3128     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3129     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3130 
3131     Builder.restoreIP(CodeGenIP);
3132     Builder.CreateStore(F->arg_begin(), PrivAI);
3133     Value *PrivLoad =
3134         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3135     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3136   };
3137 
3138   auto FiniCB = [&](InsertPointTy IP) {
3139     BasicBlock *IPBB = IP.getBlock();
3140     EXPECT_NE(IPBB->end(), IP.getPoint());
3141   };
3142 
3143   // Test for "#omp ordered [threads]"
3144   BasicBlock *EntryBB = Builder.GetInsertBlock();
3145   Builder.restoreIP(
3146       OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true));
3147 
3148   Builder.CreateRetVoid();
3149   OMPBuilder.finalize();
3150   EXPECT_FALSE(verifyModule(*M, &errs()));
3151 
3152   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3153 
3154   CallInst *OrderedEntryCI = nullptr;
3155   for (auto &EI : *EntryBB) {
3156     Instruction *Cur = &EI;
3157     if (isa<CallInst>(Cur)) {
3158       OrderedEntryCI = cast<CallInst>(Cur);
3159       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3160         break;
3161       OrderedEntryCI = nullptr;
3162     }
3163   }
3164   EXPECT_NE(OrderedEntryCI, nullptr);
3165   EXPECT_EQ(OrderedEntryCI->arg_size(), 2U);
3166   EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
3167   EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
3168 
3169   CallInst *OrderedEndCI = nullptr;
3170   for (auto &FI : *EntryBB) {
3171     Instruction *Cur = &FI;
3172     if (isa<CallInst>(Cur)) {
3173       OrderedEndCI = cast<CallInst>(Cur);
3174       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3175         break;
3176       OrderedEndCI = nullptr;
3177     }
3178   }
3179   EXPECT_NE(OrderedEndCI, nullptr);
3180   EXPECT_EQ(OrderedEndCI->arg_size(), 2U);
3181   EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
3182   EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
3183 }
3184 
3185 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
3186   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3187   OpenMPIRBuilder OMPBuilder(*M);
3188   OMPBuilder.initialize();
3189   F->setName("func");
3190   IRBuilder<> Builder(BB);
3191 
3192   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3193 
3194   AllocaInst *PrivAI =
3195       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3196 
3197   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3198     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3199     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3200     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3201 
3202     Builder.restoreIP(CodeGenIP);
3203     Builder.CreateStore(F->arg_begin(), PrivAI);
3204     Value *PrivLoad =
3205         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3206     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3207   };
3208 
3209   auto FiniCB = [&](InsertPointTy IP) {
3210     BasicBlock *IPBB = IP.getBlock();
3211     EXPECT_NE(IPBB->end(), IP.getPoint());
3212   };
3213 
3214   // Test for "#omp ordered simd"
3215   BasicBlock *EntryBB = Builder.GetInsertBlock();
3216   Builder.restoreIP(
3217       OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false));
3218 
3219   Builder.CreateRetVoid();
3220   OMPBuilder.finalize();
3221   EXPECT_FALSE(verifyModule(*M, &errs()));
3222 
3223   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3224 
3225   CallInst *OrderedEntryCI = nullptr;
3226   for (auto &EI : *EntryBB) {
3227     Instruction *Cur = &EI;
3228     if (isa<CallInst>(Cur)) {
3229       OrderedEntryCI = cast<CallInst>(Cur);
3230       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3231         break;
3232       OrderedEntryCI = nullptr;
3233     }
3234   }
3235   EXPECT_EQ(OrderedEntryCI, nullptr);
3236 
3237   CallInst *OrderedEndCI = nullptr;
3238   for (auto &FI : *EntryBB) {
3239     Instruction *Cur = &FI;
3240     if (isa<CallInst>(Cur)) {
3241       OrderedEndCI = cast<CallInst>(Cur);
3242       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3243         break;
3244       OrderedEndCI = nullptr;
3245     }
3246   }
3247   EXPECT_EQ(OrderedEndCI, nullptr);
3248 }
3249 
3250 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
3251   OpenMPIRBuilder OMPBuilder(*M);
3252   OMPBuilder.initialize();
3253   F->setName("func");
3254   IRBuilder<> Builder(BB);
3255 
3256   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3257 
3258   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3259   AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy());
3260   AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy());
3261 
3262   BasicBlock *EntryBB = BB;
3263 
3264   OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress,
3265                                       PrivAddress, Int32, /*BranchtoEnd*/ true);
3266 
3267   BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator());
3268 
3269   EXPECT_NE(EntryBr, nullptr);
3270   EXPECT_TRUE(EntryBr->isConditional());
3271 
3272   BasicBlock *NotMasterBB = EntryBr->getSuccessor(0);
3273   BasicBlock *CopyinEnd = EntryBr->getSuccessor(1);
3274   CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition());
3275 
3276   EXPECT_NE(CMP, nullptr);
3277   EXPECT_NE(NotMasterBB, nullptr);
3278   EXPECT_NE(CopyinEnd, nullptr);
3279 
3280   BranchInst *NotMasterBr =
3281       dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator());
3282   EXPECT_NE(NotMasterBr, nullptr);
3283   EXPECT_FALSE(NotMasterBr->isConditional());
3284   EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0));
3285 }
3286 
3287 TEST_F(OpenMPIRBuilderTest, SingleDirective) {
3288   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3289   OpenMPIRBuilder OMPBuilder(*M);
3290   OMPBuilder.initialize();
3291   F->setName("func");
3292   IRBuilder<> Builder(BB);
3293 
3294   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3295 
3296   AllocaInst *PrivAI = nullptr;
3297 
3298   BasicBlock *EntryBB = nullptr;
3299   BasicBlock *ThenBB = nullptr;
3300 
3301   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3302     if (AllocaIP.isSet())
3303       Builder.restoreIP(AllocaIP);
3304     else
3305       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3306     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3307     Builder.CreateStore(F->arg_begin(), PrivAI);
3308 
3309     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3310     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3311     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3312 
3313     Builder.restoreIP(CodeGenIP);
3314 
3315     // collect some info for checks later
3316     ThenBB = Builder.GetInsertBlock();
3317     EntryBB = ThenBB->getUniquePredecessor();
3318 
3319     // simple instructions for body
3320     Value *PrivLoad =
3321         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3322     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3323   };
3324 
3325   auto FiniCB = [&](InsertPointTy IP) {
3326     BasicBlock *IPBB = IP.getBlock();
3327     EXPECT_NE(IPBB->end(), IP.getPoint());
3328   };
3329 
3330   Builder.restoreIP(
3331       OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ false));
3332   Value *EntryBBTI = EntryBB->getTerminator();
3333   EXPECT_NE(EntryBBTI, nullptr);
3334   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3335   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3336   EXPECT_TRUE(EntryBr->isConditional());
3337   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3338   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3339   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3340 
3341   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3342   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3343 
3344   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3345   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3346   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3347   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3348 
3349   CallInst *SingleEndCI = nullptr;
3350   for (auto &FI : *ThenBB) {
3351     Instruction *cur = &FI;
3352     if (isa<CallInst>(cur)) {
3353       SingleEndCI = cast<CallInst>(cur);
3354       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3355         break;
3356       SingleEndCI = nullptr;
3357     }
3358   }
3359   EXPECT_NE(SingleEndCI, nullptr);
3360   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3361   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3362   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3363 
3364   bool FoundBarrier = false;
3365   for (auto &FI : *ExitBB) {
3366     Instruction *cur = &FI;
3367     if (auto CI = dyn_cast<CallInst>(cur)) {
3368       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3369         FoundBarrier = true;
3370         break;
3371       }
3372     }
3373   }
3374   EXPECT_TRUE(FoundBarrier);
3375 }
3376 
3377 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
3378   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3379   OpenMPIRBuilder OMPBuilder(*M);
3380   OMPBuilder.initialize();
3381   F->setName("func");
3382   IRBuilder<> Builder(BB);
3383 
3384   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3385 
3386   AllocaInst *PrivAI = nullptr;
3387 
3388   BasicBlock *EntryBB = nullptr;
3389   BasicBlock *ThenBB = nullptr;
3390 
3391   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3392     if (AllocaIP.isSet())
3393       Builder.restoreIP(AllocaIP);
3394     else
3395       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3396     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3397     Builder.CreateStore(F->arg_begin(), PrivAI);
3398 
3399     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3400     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3401     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3402 
3403     Builder.restoreIP(CodeGenIP);
3404 
3405     // collect some info for checks later
3406     ThenBB = Builder.GetInsertBlock();
3407     EntryBB = ThenBB->getUniquePredecessor();
3408 
3409     // simple instructions for body
3410     Value *PrivLoad =
3411         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3412     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3413   };
3414 
3415   auto FiniCB = [&](InsertPointTy IP) {
3416     BasicBlock *IPBB = IP.getBlock();
3417     EXPECT_NE(IPBB->end(), IP.getPoint());
3418   };
3419 
3420   Builder.restoreIP(
3421       OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ true));
3422   Value *EntryBBTI = EntryBB->getTerminator();
3423   EXPECT_NE(EntryBBTI, nullptr);
3424   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3425   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3426   EXPECT_TRUE(EntryBr->isConditional());
3427   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3428   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3429   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3430 
3431   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3432   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3433 
3434   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3435   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3436   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3437   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3438 
3439   CallInst *SingleEndCI = nullptr;
3440   for (auto &FI : *ThenBB) {
3441     Instruction *cur = &FI;
3442     if (isa<CallInst>(cur)) {
3443       SingleEndCI = cast<CallInst>(cur);
3444       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3445         break;
3446       SingleEndCI = nullptr;
3447     }
3448   }
3449   EXPECT_NE(SingleEndCI, nullptr);
3450   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3451   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3452   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3453 
3454   CallInst *ExitBarrier = nullptr;
3455   for (auto &FI : *ExitBB) {
3456     Instruction *cur = &FI;
3457     if (auto CI = dyn_cast<CallInst>(cur)) {
3458       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3459         ExitBarrier = CI;
3460         break;
3461       }
3462     }
3463   }
3464   EXPECT_EQ(ExitBarrier, nullptr);
3465 }
3466 
3467 // Helper class to check each instruction of a BB.
3468 class BBInstIter {
3469   BasicBlock *BB;
3470   BasicBlock::iterator BBI;
3471 
3472 public:
3473   BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {}
3474 
3475   bool hasNext() const { return BBI != BB->end(); }
3476 
3477   template <typename InstTy> InstTy *next() {
3478     if (!hasNext())
3479       return nullptr;
3480     Instruction *Cur = &*BBI++;
3481     if (!isa<InstTy>(Cur))
3482       return nullptr;
3483     return cast<InstTy>(Cur);
3484   }
3485 };
3486 
3487 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
3488   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3489   OpenMPIRBuilder OMPBuilder(*M);
3490   OMPBuilder.initialize();
3491   F->setName("func");
3492   IRBuilder<> Builder(BB);
3493 
3494   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3495 
3496   AllocaInst *PrivAI = nullptr;
3497 
3498   BasicBlock *EntryBB = nullptr;
3499   BasicBlock *ThenBB = nullptr;
3500 
3501   Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType());
3502   Builder.CreateStore(F->arg_begin(), CPVar);
3503 
3504   FunctionType *CopyFuncTy = FunctionType::get(
3505       Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false);
3506   Function *CopyFunc =
3507       Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M);
3508 
3509   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3510     if (AllocaIP.isSet())
3511       Builder.restoreIP(AllocaIP);
3512     else
3513       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3514     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3515     Builder.CreateStore(F->arg_begin(), PrivAI);
3516 
3517     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3518     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3519     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3520 
3521     Builder.restoreIP(CodeGenIP);
3522 
3523     // collect some info for checks later
3524     ThenBB = Builder.GetInsertBlock();
3525     EntryBB = ThenBB->getUniquePredecessor();
3526 
3527     // simple instructions for body
3528     Value *PrivLoad =
3529         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3530     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3531   };
3532 
3533   auto FiniCB = [&](InsertPointTy IP) {
3534     BasicBlock *IPBB = IP.getBlock();
3535     // IP must be before the unconditional branch to ExitBB
3536     EXPECT_NE(IPBB->end(), IP.getPoint());
3537   };
3538 
3539   Builder.restoreIP(OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB,
3540                                             /*IsNowait*/ false, {CPVar},
3541                                             {CopyFunc}));
3542   Value *EntryBBTI = EntryBB->getTerminator();
3543   EXPECT_NE(EntryBBTI, nullptr);
3544   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3545   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3546   EXPECT_TRUE(EntryBr->isConditional());
3547   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3548   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3549   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3550 
3551   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3552   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3553 
3554   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3555   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3556   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3557   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3558 
3559   // check ThenBB
3560   BBInstIter ThenBBI(ThenBB);
3561   // load PrivAI
3562   auto *PrivLI = ThenBBI.next<LoadInst>();
3563   EXPECT_NE(PrivLI, nullptr);
3564   EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI);
3565   // icmp
3566   EXPECT_TRUE(ThenBBI.next<ICmpInst>());
3567   // store 1, DidIt
3568   auto *DidItSI = ThenBBI.next<StoreInst>();
3569   EXPECT_NE(DidItSI, nullptr);
3570   EXPECT_EQ(DidItSI->getValueOperand(),
3571             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
3572   Value *DidIt = DidItSI->getPointerOperand();
3573   // call __kmpc_end_single
3574   auto *SingleEndCI = ThenBBI.next<CallInst>();
3575   EXPECT_NE(SingleEndCI, nullptr);
3576   EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single");
3577   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3578   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3579   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3580   // br ExitBB
3581   auto *ExitBBBI = ThenBBI.next<BranchInst>();
3582   EXPECT_NE(ExitBBBI, nullptr);
3583   EXPECT_TRUE(ExitBBBI->isUnconditional());
3584   EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB);
3585   EXPECT_FALSE(ThenBBI.hasNext());
3586 
3587   // check ExitBB
3588   BBInstIter ExitBBI(ExitBB);
3589   // call __kmpc_global_thread_num
3590   auto *ThreadNumCI = ExitBBI.next<CallInst>();
3591   EXPECT_NE(ThreadNumCI, nullptr);
3592   EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(),
3593             "__kmpc_global_thread_num");
3594   // load DidIt
3595   auto *DidItLI = ExitBBI.next<LoadInst>();
3596   EXPECT_NE(DidItLI, nullptr);
3597   EXPECT_EQ(DidItLI->getPointerOperand(), DidIt);
3598   // call __kmpc_copyprivate
3599   auto *CopyPrivateCI = ExitBBI.next<CallInst>();
3600   EXPECT_NE(CopyPrivateCI, nullptr);
3601   EXPECT_EQ(CopyPrivateCI->arg_size(), 6U);
3602   EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3)));
3603   EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar);
3604   EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4)));
3605   EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc);
3606   EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5)));
3607   DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5));
3608   EXPECT_EQ(DidItLI->getOperand(0), DidIt);
3609   EXPECT_FALSE(ExitBBI.hasNext());
3610 }
3611 
3612 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
3613   OpenMPIRBuilder OMPBuilder(*M);
3614   OMPBuilder.initialize();
3615   F->setName("func");
3616   IRBuilder<> Builder(BB);
3617 
3618   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3619 
3620   Type *Float32 = Type::getFloatTy(M->getContext());
3621   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3622   XVal->setName("AtomicVar");
3623   AllocaInst *VVal = Builder.CreateAlloca(Float32);
3624   VVal->setName("AtomicRead");
3625   AtomicOrdering AO = AtomicOrdering::Monotonic;
3626   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3627   OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false};
3628 
3629   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3630 
3631   IntegerType *IntCastTy =
3632       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3633 
3634   LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode());
3635   EXPECT_TRUE(AtomicLoad->isAtomic());
3636   EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal);
3637 
3638   BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
3639   EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
3640   EXPECT_EQ(CastToFlt->getDestTy(), Float32);
3641   EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
3642 
3643   StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode());
3644   EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt);
3645   EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
3646 
3647   Builder.CreateRetVoid();
3648   OMPBuilder.finalize();
3649   EXPECT_FALSE(verifyModule(*M, &errs()));
3650 }
3651 
3652 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
3653   OpenMPIRBuilder OMPBuilder(*M);
3654   OMPBuilder.initialize();
3655   F->setName("func");
3656   IRBuilder<> Builder(BB);
3657 
3658   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3659 
3660   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3661   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3662   XVal->setName("AtomicVar");
3663   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3664   VVal->setName("AtomicRead");
3665   AtomicOrdering AO = AtomicOrdering::Monotonic;
3666   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3667   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3668 
3669   BasicBlock *EntryBB = BB;
3670 
3671   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3672   LoadInst *AtomicLoad = nullptr;
3673   StoreInst *StoreofAtomic = nullptr;
3674 
3675   for (Instruction &Cur : *EntryBB) {
3676     if (isa<LoadInst>(Cur)) {
3677       AtomicLoad = cast<LoadInst>(&Cur);
3678       if (AtomicLoad->getPointerOperand() == XVal)
3679         continue;
3680       AtomicLoad = nullptr;
3681     } else if (isa<StoreInst>(Cur)) {
3682       StoreofAtomic = cast<StoreInst>(&Cur);
3683       if (StoreofAtomic->getPointerOperand() == VVal)
3684         continue;
3685       StoreofAtomic = nullptr;
3686     }
3687   }
3688 
3689   EXPECT_NE(AtomicLoad, nullptr);
3690   EXPECT_TRUE(AtomicLoad->isAtomic());
3691 
3692   EXPECT_NE(StoreofAtomic, nullptr);
3693   EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
3694 
3695   Builder.CreateRetVoid();
3696   OMPBuilder.finalize();
3697 
3698   EXPECT_FALSE(verifyModule(*M, &errs()));
3699 }
3700 
3701 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
3702   OpenMPIRBuilder OMPBuilder(*M);
3703   OMPBuilder.initialize();
3704   F->setName("func");
3705   IRBuilder<> Builder(BB);
3706 
3707   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3708 
3709   LLVMContext &Ctx = M->getContext();
3710   Type *Float32 = Type::getFloatTy(Ctx);
3711   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3712   XVal->setName("AtomicVar");
3713   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3714   AtomicOrdering AO = AtomicOrdering::Monotonic;
3715   Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
3716 
3717   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3718 
3719   IntegerType *IntCastTy =
3720       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3721 
3722   Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
3723 
3724   StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode());
3725   EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast);
3726   EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal);
3727   EXPECT_TRUE(StoreofAtomic->isAtomic());
3728 
3729   Builder.CreateRetVoid();
3730   OMPBuilder.finalize();
3731   EXPECT_FALSE(verifyModule(*M, &errs()));
3732 }
3733 
3734 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
3735   OpenMPIRBuilder OMPBuilder(*M);
3736   OMPBuilder.initialize();
3737   F->setName("func");
3738   IRBuilder<> Builder(BB);
3739 
3740   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3741 
3742   LLVMContext &Ctx = M->getContext();
3743   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3744   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3745   XVal->setName("AtomicVar");
3746   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3747   AtomicOrdering AO = AtomicOrdering::Monotonic;
3748   ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3749 
3750   BasicBlock *EntryBB = BB;
3751 
3752   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3753 
3754   StoreInst *StoreofAtomic = nullptr;
3755 
3756   for (Instruction &Cur : *EntryBB) {
3757     if (isa<StoreInst>(Cur)) {
3758       StoreofAtomic = cast<StoreInst>(&Cur);
3759       if (StoreofAtomic->getPointerOperand() == XVal)
3760         continue;
3761       StoreofAtomic = nullptr;
3762     }
3763   }
3764 
3765   EXPECT_NE(StoreofAtomic, nullptr);
3766   EXPECT_TRUE(StoreofAtomic->isAtomic());
3767   EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite);
3768 
3769   Builder.CreateRetVoid();
3770   OMPBuilder.finalize();
3771   EXPECT_FALSE(verifyModule(*M, &errs()));
3772 }
3773 
3774 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
3775   OpenMPIRBuilder OMPBuilder(*M);
3776   OMPBuilder.initialize();
3777   F->setName("func");
3778   IRBuilder<> Builder(BB);
3779 
3780   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3781 
3782   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3783   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3784   XVal->setName("AtomicVar");
3785   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3786   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3787   AtomicOrdering AO = AtomicOrdering::Monotonic;
3788   ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3789   Value *Expr = nullptr;
3790   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub;
3791   bool IsXLHSInRHSPart = false;
3792 
3793   BasicBlock *EntryBB = BB;
3794   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3795                                           EntryBB->getFirstInsertionPt());
3796   Value *Sub = nullptr;
3797 
3798   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3799     Sub = IRB.CreateSub(ConstVal, Atomic);
3800     return Sub;
3801   };
3802   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3803       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3804   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3805   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3806   EXPECT_NE(ContTI, nullptr);
3807   BasicBlock *EndBB = ContTI->getSuccessor(0);
3808   EXPECT_TRUE(ContTI->isConditional());
3809   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3810   EXPECT_NE(EndBB, nullptr);
3811 
3812   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3813   EXPECT_NE(Phi, nullptr);
3814   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3815   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3816   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3817 
3818   EXPECT_EQ(Sub->getNumUses(), 1U);
3819   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3820   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3821 
3822   ExtractValueInst *ExVI1 =
3823       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3824   EXPECT_NE(ExVI1, nullptr);
3825   AtomicCmpXchgInst *CmpExchg =
3826       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3827   EXPECT_NE(CmpExchg, nullptr);
3828   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3829   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3830   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3831 
3832   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3833   EXPECT_NE(Ld, nullptr);
3834   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3835 
3836   Builder.CreateRetVoid();
3837   OMPBuilder.finalize();
3838   EXPECT_FALSE(verifyModule(*M, &errs()));
3839 }
3840 
3841 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
3842   OpenMPIRBuilder OMPBuilder(*M);
3843   OMPBuilder.initialize();
3844   F->setName("func");
3845   IRBuilder<> Builder(BB);
3846 
3847   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3848 
3849   Type *FloatTy = Type::getFloatTy(M->getContext());
3850   AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
3851   XVal->setName("AtomicVar");
3852   Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal);
3853   OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
3854   AtomicOrdering AO = AtomicOrdering::Monotonic;
3855   Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0);
3856   Value *Expr = nullptr;
3857   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub;
3858   bool IsXLHSInRHSPart = false;
3859 
3860   BasicBlock *EntryBB = BB;
3861   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3862                                           EntryBB->getFirstInsertionPt());
3863   Value *Sub = nullptr;
3864 
3865   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3866     Sub = IRB.CreateFSub(ConstVal, Atomic);
3867     return Sub;
3868   };
3869   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3870       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3871   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3872   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3873   EXPECT_NE(ContTI, nullptr);
3874   BasicBlock *EndBB = ContTI->getSuccessor(0);
3875   EXPECT_TRUE(ContTI->isConditional());
3876   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3877   EXPECT_NE(EndBB, nullptr);
3878 
3879   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3880   EXPECT_NE(Phi, nullptr);
3881   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3882   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3883   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3884 
3885   EXPECT_EQ(Sub->getNumUses(), 1U);
3886   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3887   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3888 
3889   ExtractValueInst *ExVI1 =
3890       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3891   EXPECT_NE(ExVI1, nullptr);
3892   AtomicCmpXchgInst *CmpExchg =
3893       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3894   EXPECT_NE(CmpExchg, nullptr);
3895   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3896   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3897   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3898 
3899   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3900   EXPECT_NE(Ld, nullptr);
3901   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3902   Builder.CreateRetVoid();
3903   OMPBuilder.finalize();
3904   EXPECT_FALSE(verifyModule(*M, &errs()));
3905 }
3906 
3907 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
3908   OpenMPIRBuilder OMPBuilder(*M);
3909   OMPBuilder.initialize();
3910   F->setName("func");
3911   IRBuilder<> Builder(BB);
3912 
3913   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3914 
3915   Type *IntTy = Type::getInt32Ty(M->getContext());
3916   AllocaInst *XVal = Builder.CreateAlloca(IntTy);
3917   XVal->setName("AtomicVar");
3918   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal);
3919   OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false};
3920   AtomicOrdering AO = AtomicOrdering::Monotonic;
3921   Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
3922   Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
3923   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax;
3924   bool IsXLHSInRHSPart = false;
3925 
3926   BasicBlock *EntryBB = BB;
3927   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3928                                           EntryBB->getFirstInsertionPt());
3929   Value *Sub = nullptr;
3930 
3931   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3932     Sub = IRB.CreateSub(ConstVal, Atomic);
3933     return Sub;
3934   };
3935   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3936       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3937   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3938   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3939   EXPECT_NE(ContTI, nullptr);
3940   BasicBlock *EndBB = ContTI->getSuccessor(0);
3941   EXPECT_TRUE(ContTI->isConditional());
3942   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3943   EXPECT_NE(EndBB, nullptr);
3944 
3945   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3946   EXPECT_NE(Phi, nullptr);
3947   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3948   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3949   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3950 
3951   EXPECT_EQ(Sub->getNumUses(), 1U);
3952   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3953   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3954 
3955   ExtractValueInst *ExVI1 =
3956       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3957   EXPECT_NE(ExVI1, nullptr);
3958   AtomicCmpXchgInst *CmpExchg =
3959       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3960   EXPECT_NE(CmpExchg, nullptr);
3961   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3962   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3963   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3964 
3965   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3966   EXPECT_NE(Ld, nullptr);
3967   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3968 
3969   Builder.CreateRetVoid();
3970   OMPBuilder.finalize();
3971   EXPECT_FALSE(verifyModule(*M, &errs()));
3972 }
3973 
3974 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
3975   OpenMPIRBuilder OMPBuilder(*M);
3976   OMPBuilder.initialize();
3977   F->setName("func");
3978   IRBuilder<> Builder(BB);
3979 
3980   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3981 
3982   LLVMContext &Ctx = M->getContext();
3983   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3984   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3985   XVal->setName("AtomicVar");
3986   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3987   VVal->setName("AtomicCapTar");
3988   StoreInst *Init =
3989       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3990 
3991   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3992   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3993   AtomicOrdering AO = AtomicOrdering::Monotonic;
3994   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3995   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add;
3996   bool IsXLHSInRHSPart = true;
3997   bool IsPostfixUpdate = true;
3998   bool UpdateExpr = true;
3999 
4000   BasicBlock *EntryBB = BB;
4001   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4002                                           EntryBB->getFirstInsertionPt());
4003 
4004   // integer update - not used
4005   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; };
4006 
4007   Builder.restoreIP(OMPBuilder.createAtomicCapture(
4008       Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, UpdateExpr,
4009       IsPostfixUpdate, IsXLHSInRHSPart));
4010   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4011   AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4012   EXPECT_NE(ARWM, nullptr);
4013   EXPECT_EQ(ARWM->getPointerOperand(), XVal);
4014   EXPECT_EQ(ARWM->getOperation(), RMWOp);
4015   StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back());
4016   EXPECT_NE(St, nullptr);
4017   EXPECT_EQ(St->getPointerOperand(), VVal);
4018 
4019   Builder.CreateRetVoid();
4020   OMPBuilder.finalize();
4021   EXPECT_FALSE(verifyModule(*M, &errs()));
4022 }
4023 
4024 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
4025   OpenMPIRBuilder OMPBuilder(*M);
4026   OMPBuilder.initialize();
4027   F->setName("func");
4028   IRBuilder<> Builder(BB);
4029 
4030   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4031 
4032   LLVMContext &Ctx = M->getContext();
4033   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4034   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4035   XVal->setName("x");
4036   StoreInst *Init =
4037       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4038 
4039   OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false};
4040   OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false};
4041   // V and R are not used in atomic compare
4042   OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false};
4043   OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false};
4044   AtomicOrdering AO = AtomicOrdering::Monotonic;
4045   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4046   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4047   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4048   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4049 
4050   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4051       Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false));
4052   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4053       Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false));
4054   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4055       Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false));
4056 
4057   BasicBlock *EntryBB = BB;
4058   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4059   EXPECT_EQ(EntryBB->size(), 5U);
4060 
4061   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4062   EXPECT_NE(ARWM1, nullptr);
4063   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4064   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4065   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4066 
4067   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode());
4068   EXPECT_NE(ARWM2, nullptr);
4069   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4070   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4071   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax);
4072 
4073   AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode());
4074   EXPECT_NE(AXCHG, nullptr);
4075   EXPECT_EQ(AXCHG->getPointerOperand(), XVal);
4076   EXPECT_EQ(AXCHG->getCompareOperand(), Expr);
4077   EXPECT_EQ(AXCHG->getNewValOperand(), D);
4078 
4079   Builder.CreateRetVoid();
4080   OMPBuilder.finalize();
4081   EXPECT_FALSE(verifyModule(*M, &errs()));
4082 }
4083 
4084 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) {
4085   OpenMPIRBuilder OMPBuilder(*M);
4086   OMPBuilder.initialize();
4087   F->setName("func");
4088   IRBuilder<> Builder(BB);
4089 
4090   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4091 
4092   LLVMContext &Ctx = M->getContext();
4093   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4094   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4095   XVal->setName("x");
4096   AllocaInst *VVal = Builder.CreateAlloca(Int32);
4097   VVal->setName("v");
4098   AllocaInst *RVal = Builder.CreateAlloca(Int32);
4099   RVal->setName("r");
4100 
4101   StoreInst *Init =
4102       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4103 
4104   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false};
4105   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
4106   OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false};
4107   OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false};
4108   OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false};
4109 
4110   AtomicOrdering AO = AtomicOrdering::Monotonic;
4111   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4112   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4113   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4114   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4115 
4116   // { cond-update-stmt v = x; }
4117   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4118       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4119       /* IsPostfixUpdate */ false,
4120       /* IsFailOnly */ false));
4121   // { v = x; cond-update-stmt }
4122   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4123       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4124       /* IsPostfixUpdate */ true,
4125       /* IsFailOnly */ false));
4126   // if(x == e) { x = d; } else { v = x; }
4127   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4128       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4129       /* IsPostfixUpdate */ false,
4130       /* IsFailOnly */ true));
4131   // { r = x == e; if(r) { x = d; } }
4132   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4133       Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4134       /* IsPostfixUpdate */ false,
4135       /* IsFailOnly */ false));
4136   // { r = x == e; if(r) { x = d; } else { v = x; } }
4137   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4138       Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4139       /* IsPostfixUpdate */ false,
4140       /* IsFailOnly */ true));
4141 
4142   // { v = x; cond-update-stmt }
4143   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4144       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true,
4145       /* IsPostfixUpdate */ true,
4146       /* IsFailOnly */ false));
4147   // { cond-update-stmt v = x; }
4148   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4149       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false,
4150       /* IsPostfixUpdate */ false,
4151       /* IsFailOnly */ false));
4152 
4153   BasicBlock *EntryBB = BB;
4154   EXPECT_EQ(EntryBB->getParent()->size(), 5U);
4155   BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode());
4156   EXPECT_NE(Cont1, nullptr);
4157   BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode());
4158   EXPECT_NE(Exit1, nullptr);
4159   BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode());
4160   EXPECT_NE(Cont2, nullptr);
4161   BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode());
4162   EXPECT_NE(Exit2, nullptr);
4163 
4164   AtomicCmpXchgInst *CmpXchg1 =
4165       dyn_cast<AtomicCmpXchgInst>(Init->getNextNode());
4166   EXPECT_NE(CmpXchg1, nullptr);
4167   EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal);
4168   EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr);
4169   EXPECT_EQ(CmpXchg1->getNewValOperand(), D);
4170   ExtractValueInst *ExtVal1 =
4171       dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode());
4172   EXPECT_NE(ExtVal1, nullptr);
4173   EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1);
4174   EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U));
4175   ExtractValueInst *ExtVal2 =
4176       dyn_cast<ExtractValueInst>(ExtVal1->getNextNode());
4177   EXPECT_NE(ExtVal2, nullptr);
4178   EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1);
4179   EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U));
4180   SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode());
4181   EXPECT_NE(Sel1, nullptr);
4182   EXPECT_EQ(Sel1->getCondition(), ExtVal2);
4183   EXPECT_EQ(Sel1->getTrueValue(), Expr);
4184   EXPECT_EQ(Sel1->getFalseValue(), ExtVal1);
4185   StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode());
4186   EXPECT_NE(Store1, nullptr);
4187   EXPECT_EQ(Store1->getPointerOperand(), VVal);
4188   EXPECT_EQ(Store1->getValueOperand(), Sel1);
4189 
4190   AtomicCmpXchgInst *CmpXchg2 =
4191       dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode());
4192   EXPECT_NE(CmpXchg2, nullptr);
4193   EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal);
4194   EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr);
4195   EXPECT_EQ(CmpXchg2->getNewValOperand(), D);
4196   ExtractValueInst *ExtVal3 =
4197       dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode());
4198   EXPECT_NE(ExtVal3, nullptr);
4199   EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2);
4200   EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U));
4201   StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode());
4202   EXPECT_NE(Store2, nullptr);
4203   EXPECT_EQ(Store2->getPointerOperand(), VVal);
4204   EXPECT_EQ(Store2->getValueOperand(), ExtVal3);
4205 
4206   AtomicCmpXchgInst *CmpXchg3 =
4207       dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode());
4208   EXPECT_NE(CmpXchg3, nullptr);
4209   EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal);
4210   EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr);
4211   EXPECT_EQ(CmpXchg3->getNewValOperand(), D);
4212   ExtractValueInst *ExtVal4 =
4213       dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode());
4214   EXPECT_NE(ExtVal4, nullptr);
4215   EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3);
4216   EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U));
4217   ExtractValueInst *ExtVal5 =
4218       dyn_cast<ExtractValueInst>(ExtVal4->getNextNode());
4219   EXPECT_NE(ExtVal5, nullptr);
4220   EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3);
4221   EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U));
4222   BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode());
4223   EXPECT_NE(Br1, nullptr);
4224   EXPECT_EQ(Br1->isConditional(), true);
4225   EXPECT_EQ(Br1->getCondition(), ExtVal5);
4226   EXPECT_EQ(Br1->getSuccessor(0), Exit1);
4227   EXPECT_EQ(Br1->getSuccessor(1), Cont1);
4228 
4229   StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front());
4230   EXPECT_NE(Store3, nullptr);
4231   EXPECT_EQ(Store3->getPointerOperand(), VVal);
4232   EXPECT_EQ(Store3->getValueOperand(), ExtVal4);
4233   BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode());
4234   EXPECT_NE(Br2, nullptr);
4235   EXPECT_EQ(Br2->isUnconditional(), true);
4236   EXPECT_EQ(Br2->getSuccessor(0), Exit1);
4237 
4238   AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front());
4239   EXPECT_NE(CmpXchg4, nullptr);
4240   EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal);
4241   EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr);
4242   EXPECT_EQ(CmpXchg4->getNewValOperand(), D);
4243   ExtractValueInst *ExtVal6 =
4244       dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode());
4245   EXPECT_NE(ExtVal6, nullptr);
4246   EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4);
4247   EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U));
4248   ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode());
4249   EXPECT_NE(ZExt1, nullptr);
4250   EXPECT_EQ(ZExt1->getDestTy(), Int32);
4251   StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode());
4252   EXPECT_NE(Store4, nullptr);
4253   EXPECT_EQ(Store4->getPointerOperand(), RVal);
4254   EXPECT_EQ(Store4->getValueOperand(), ZExt1);
4255 
4256   AtomicCmpXchgInst *CmpXchg5 =
4257       dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode());
4258   EXPECT_NE(CmpXchg5, nullptr);
4259   EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal);
4260   EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr);
4261   EXPECT_EQ(CmpXchg5->getNewValOperand(), D);
4262   ExtractValueInst *ExtVal7 =
4263       dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode());
4264   EXPECT_NE(ExtVal7, nullptr);
4265   EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5);
4266   EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U));
4267   ExtractValueInst *ExtVal8 =
4268       dyn_cast<ExtractValueInst>(ExtVal7->getNextNode());
4269   EXPECT_NE(ExtVal8, nullptr);
4270   EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5);
4271   EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U));
4272   BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode());
4273   EXPECT_NE(Br3, nullptr);
4274   EXPECT_EQ(Br3->isConditional(), true);
4275   EXPECT_EQ(Br3->getCondition(), ExtVal8);
4276   EXPECT_EQ(Br3->getSuccessor(0), Exit2);
4277   EXPECT_EQ(Br3->getSuccessor(1), Cont2);
4278 
4279   StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front());
4280   EXPECT_NE(Store5, nullptr);
4281   EXPECT_EQ(Store5->getPointerOperand(), VVal);
4282   EXPECT_EQ(Store5->getValueOperand(), ExtVal7);
4283   BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode());
4284   EXPECT_NE(Br4, nullptr);
4285   EXPECT_EQ(Br4->isUnconditional(), true);
4286   EXPECT_EQ(Br4->getSuccessor(0), Exit2);
4287 
4288   ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front());
4289   EXPECT_NE(ExtVal9, nullptr);
4290   EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5);
4291   EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U));
4292   ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode());
4293   EXPECT_NE(ZExt2, nullptr);
4294   EXPECT_EQ(ZExt2->getDestTy(), Int32);
4295   StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode());
4296   EXPECT_NE(Store6, nullptr);
4297   EXPECT_EQ(Store6->getPointerOperand(), RVal);
4298   EXPECT_EQ(Store6->getValueOperand(), ZExt2);
4299 
4300   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode());
4301   EXPECT_NE(ARWM1, nullptr);
4302   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4303   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4304   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4305   StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode());
4306   EXPECT_NE(Store7, nullptr);
4307   EXPECT_EQ(Store7->getPointerOperand(), VVal);
4308   EXPECT_EQ(Store7->getValueOperand(), ARWM1);
4309 
4310   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode());
4311   EXPECT_NE(ARWM2, nullptr);
4312   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4313   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4314   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max);
4315   CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode());
4316   EXPECT_NE(Cmp1, nullptr);
4317   EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT);
4318   EXPECT_EQ(Cmp1->getOperand(0), ARWM2);
4319   EXPECT_EQ(Cmp1->getOperand(1), Expr);
4320   SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode());
4321   EXPECT_NE(Sel2, nullptr);
4322   EXPECT_EQ(Sel2->getCondition(), Cmp1);
4323   EXPECT_EQ(Sel2->getTrueValue(), Expr);
4324   EXPECT_EQ(Sel2->getFalseValue(), ARWM2);
4325   StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode());
4326   EXPECT_NE(Store8, nullptr);
4327   EXPECT_EQ(Store8->getPointerOperand(), VVal);
4328   EXPECT_EQ(Store8->getValueOperand(), Sel2);
4329 
4330   Builder.CreateRetVoid();
4331   OMPBuilder.finalize();
4332   EXPECT_FALSE(verifyModule(*M, &errs()));
4333 }
4334 
4335 TEST_F(OpenMPIRBuilderTest, CreateTeams) {
4336   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4337   OpenMPIRBuilder OMPBuilder(*M);
4338   OMPBuilder.Config.IsTargetDevice = false;
4339   OMPBuilder.initialize();
4340   F->setName("func");
4341   IRBuilder<> Builder(BB);
4342 
4343   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
4344   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
4345   Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load");
4346 
4347   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4348     Builder.restoreIP(AllocaIP);
4349     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
4350                                                 "bodygen.alloca128");
4351 
4352     Builder.restoreIP(CodeGenIP);
4353     // Loading and storing captured pointer and values
4354     Builder.CreateStore(Val128, Local128);
4355     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
4356                                       "bodygen.load32");
4357 
4358     LoadInst *PrivLoad128 = Builder.CreateLoad(
4359         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
4360     Value *Cmp = Builder.CreateICmpNE(
4361         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
4362     Instruction *ThenTerm, *ElseTerm;
4363     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
4364                                   &ThenTerm, &ElseTerm);
4365   };
4366 
4367   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4368   Builder.restoreIP(OMPBuilder.createTeams(
4369       Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr,
4370       /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4371 
4372   OMPBuilder.finalize();
4373   Builder.CreateRetVoid();
4374 
4375   EXPECT_FALSE(verifyModule(*M, &errs()));
4376 
4377   CallInst *TeamsForkCall = dyn_cast<CallInst>(
4378       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)
4379           ->user_back());
4380 
4381   // Verify the Ident argument
4382   GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0));
4383   ASSERT_NE(Ident, nullptr);
4384   EXPECT_TRUE(Ident->hasInitializer());
4385   Constant *Initializer = Ident->getInitializer();
4386   GlobalVariable *SrcStrGlob =
4387       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
4388   ASSERT_NE(SrcStrGlob, nullptr);
4389   ConstantDataArray *SrcSrc =
4390       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
4391   ASSERT_NE(SrcSrc, nullptr);
4392 
4393   // Verify the outlined function signature.
4394   Function *OutlinedFn =
4395       dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts());
4396   ASSERT_NE(OutlinedFn, nullptr);
4397   EXPECT_FALSE(OutlinedFn->isDeclaration());
4398   EXPECT_TRUE(OutlinedFn->arg_size() >= 3);
4399   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid
4400   EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid
4401   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
4402             Builder.getPtrTy()); // captured args
4403 
4404   // Check for TruncInst and ICmpInst in the outlined function.
4405   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4406                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
4407   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4408                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
4409 }
4410 
4411 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) {
4412   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4413   OpenMPIRBuilder OMPBuilder(*M);
4414   OMPBuilder.Config.IsTargetDevice = false;
4415   OMPBuilder.initialize();
4416   F->setName("func");
4417   IRBuilder<> &Builder = OMPBuilder.Builder;
4418   Builder.SetInsertPoint(BB);
4419 
4420   Function *FakeFunction =
4421       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4422                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4423 
4424   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4425     Builder.restoreIP(CodeGenIP);
4426     Builder.CreateCall(FakeFunction, {});
4427   };
4428 
4429   // `F` has an argument - an integer, so we use that as the thread limit.
4430   Builder.restoreIP(OMPBuilder.createTeams(/*=*/Builder, BodyGenCB,
4431                                            /*NumTeamsLower=*/nullptr,
4432                                            /*NumTeamsUpper=*/nullptr,
4433                                            /*ThreadLimit=*/F->arg_begin(),
4434                                            /*IfExpr=*/nullptr));
4435 
4436   Builder.CreateRetVoid();
4437   OMPBuilder.finalize();
4438 
4439   ASSERT_FALSE(verifyModule(*M));
4440 
4441   CallInst *PushNumTeamsCallInst =
4442       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4443   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4444 
4445   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0));
4446   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0));
4447   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin());
4448 
4449   // Verifying that the next instruction to execute is kmpc_fork_teams
4450   BranchInst *BrInst =
4451       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4452   ASSERT_NE(BrInst, nullptr);
4453   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4454   Instruction *NextInstruction =
4455       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4456   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4457   ASSERT_NE(ForkTeamsCI, nullptr);
4458   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4459             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4460 }
4461 
4462 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) {
4463   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4464   OpenMPIRBuilder OMPBuilder(*M);
4465   OMPBuilder.Config.IsTargetDevice = false;
4466   OMPBuilder.initialize();
4467   F->setName("func");
4468   IRBuilder<> &Builder = OMPBuilder.Builder;
4469   Builder.SetInsertPoint(BB);
4470 
4471   Function *FakeFunction =
4472       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4473                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4474 
4475   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4476     Builder.restoreIP(CodeGenIP);
4477     Builder.CreateCall(FakeFunction, {});
4478   };
4479 
4480   // `F` already has an integer argument, so we use that as upper bound to
4481   // `num_teams`
4482   Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB,
4483                                            /*NumTeamsLower=*/nullptr,
4484                                            /*NumTeamsUpper=*/F->arg_begin(),
4485                                            /*ThreadLimit=*/nullptr,
4486                                            /*IfExpr=*/nullptr));
4487 
4488   Builder.CreateRetVoid();
4489   OMPBuilder.finalize();
4490 
4491   ASSERT_FALSE(verifyModule(*M));
4492 
4493   CallInst *PushNumTeamsCallInst =
4494       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4495   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4496 
4497   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin());
4498   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin());
4499   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4500 
4501   // Verifying that the next instruction to execute is kmpc_fork_teams
4502   BranchInst *BrInst =
4503       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4504   ASSERT_NE(BrInst, nullptr);
4505   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4506   Instruction *NextInstruction =
4507       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4508   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4509   ASSERT_NE(ForkTeamsCI, nullptr);
4510   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4511             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4512 }
4513 
4514 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) {
4515   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4516   OpenMPIRBuilder OMPBuilder(*M);
4517   OMPBuilder.Config.IsTargetDevice = false;
4518   OMPBuilder.initialize();
4519   F->setName("func");
4520   IRBuilder<> &Builder = OMPBuilder.Builder;
4521   Builder.SetInsertPoint(BB);
4522 
4523   Function *FakeFunction =
4524       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4525                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4526 
4527   Value *NumTeamsLower =
4528       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4529   Value *NumTeamsUpper =
4530       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4531 
4532   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4533     Builder.restoreIP(CodeGenIP);
4534     Builder.CreateCall(FakeFunction, {});
4535   };
4536 
4537   // `F` already has an integer argument, so we use that as upper bound to
4538   // `num_teams`
4539   Builder.restoreIP(
4540       OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper,
4541                              /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4542 
4543   Builder.CreateRetVoid();
4544   OMPBuilder.finalize();
4545 
4546   ASSERT_FALSE(verifyModule(*M));
4547 
4548   CallInst *PushNumTeamsCallInst =
4549       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4550   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4551 
4552   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4553   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4554   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4555 
4556   // Verifying that the next instruction to execute is kmpc_fork_teams
4557   BranchInst *BrInst =
4558       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4559   ASSERT_NE(BrInst, nullptr);
4560   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4561   Instruction *NextInstruction =
4562       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4563   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4564   ASSERT_NE(ForkTeamsCI, nullptr);
4565   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4566             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4567 }
4568 
4569 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) {
4570   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4571   OpenMPIRBuilder OMPBuilder(*M);
4572   OMPBuilder.Config.IsTargetDevice = false;
4573   OMPBuilder.initialize();
4574   F->setName("func");
4575   IRBuilder<> &Builder = OMPBuilder.Builder;
4576   Builder.SetInsertPoint(BB);
4577 
4578   BasicBlock *CodegenBB = splitBB(Builder, true);
4579   Builder.SetInsertPoint(CodegenBB);
4580 
4581   // Generate values for `num_teams` and `thread_limit` using the first argument
4582   // of the testing function.
4583   Value *NumTeamsLower =
4584       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4585   Value *NumTeamsUpper =
4586       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4587   Value *ThreadLimit =
4588       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit");
4589 
4590   Function *FakeFunction =
4591       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4592                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4593 
4594   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4595     Builder.restoreIP(CodeGenIP);
4596     Builder.CreateCall(FakeFunction, {});
4597   };
4598 
4599   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4600   Builder.restoreIP(OMPBuilder.createTeams(
4601       Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr));
4602 
4603   Builder.CreateRetVoid();
4604   OMPBuilder.finalize();
4605 
4606   ASSERT_FALSE(verifyModule(*M));
4607 
4608   CallInst *PushNumTeamsCallInst =
4609       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4610   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4611 
4612   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4613   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4614   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit);
4615 
4616   // Verifying that the next instruction to execute is kmpc_fork_teams
4617   BranchInst *BrInst =
4618       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4619   ASSERT_NE(BrInst, nullptr);
4620   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4621   Instruction *NextInstruction =
4622       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4623   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4624   ASSERT_NE(ForkTeamsCI, nullptr);
4625   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4626             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4627 }
4628 
4629 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) {
4630   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4631   OpenMPIRBuilder OMPBuilder(*M);
4632   OMPBuilder.Config.IsTargetDevice = false;
4633   OMPBuilder.initialize();
4634   F->setName("func");
4635   IRBuilder<> &Builder = OMPBuilder.Builder;
4636   Builder.SetInsertPoint(BB);
4637 
4638   Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(),
4639                                      Builder.CreateAlloca(Builder.getInt1Ty()));
4640 
4641   Function *FakeFunction =
4642       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4643                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4644 
4645   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4646     Builder.restoreIP(CodeGenIP);
4647     Builder.CreateCall(FakeFunction, {});
4648   };
4649 
4650   // `F` already has an integer argument, so we use that as upper bound to
4651   // `num_teams`
4652   Builder.restoreIP(OMPBuilder.createTeams(
4653       Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr,
4654       /*ThreadLimit=*/nullptr, IfExpr));
4655 
4656   Builder.CreateRetVoid();
4657   OMPBuilder.finalize();
4658 
4659   ASSERT_FALSE(verifyModule(*M));
4660 
4661   CallInst *PushNumTeamsCallInst =
4662       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4663   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4664   Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2);
4665   Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3);
4666   Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4);
4667 
4668   // Check the lower_bound
4669   ASSERT_NE(NumTeamsLower, nullptr);
4670   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower);
4671   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4672   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr);
4673   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0));
4674   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4675 
4676   // Check the upper_bound
4677   ASSERT_NE(NumTeamsUpper, nullptr);
4678   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper);
4679   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4680   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr);
4681   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0));
4682   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4683 
4684   // Check thread_limit
4685   EXPECT_EQ(ThreadLimit, Builder.getInt32(0));
4686 }
4687 
4688 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) {
4689   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4690   OpenMPIRBuilder OMPBuilder(*M);
4691   OMPBuilder.Config.IsTargetDevice = false;
4692   OMPBuilder.initialize();
4693   F->setName("func");
4694   IRBuilder<> &Builder = OMPBuilder.Builder;
4695   Builder.SetInsertPoint(BB);
4696 
4697   Value *IfExpr = Builder.CreateLoad(
4698       Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty()));
4699   Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5));
4700   Value *NumTeamsUpper =
4701       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10));
4702   Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20));
4703 
4704   Function *FakeFunction =
4705       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4706                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4707 
4708   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4709     Builder.restoreIP(CodeGenIP);
4710     Builder.CreateCall(FakeFunction, {});
4711   };
4712 
4713   // `F` already has an integer argument, so we use that as upper bound to
4714   // `num_teams`
4715   Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
4716                                            NumTeamsUpper, ThreadLimit, IfExpr));
4717 
4718   Builder.CreateRetVoid();
4719   OMPBuilder.finalize();
4720 
4721   ASSERT_FALSE(verifyModule(*M));
4722 
4723   CallInst *PushNumTeamsCallInst =
4724       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4725   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4726   Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2);
4727   Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3);
4728   Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4);
4729 
4730   // Get the boolean conversion of if expression
4731   ASSERT_EQ(IfExpr->getNumUses(), 1U);
4732   User *IfExprInst = IfExpr->user_back();
4733   ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst);
4734   ASSERT_NE(IfExprCmpInst, nullptr);
4735   EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE);
4736   EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr);
4737   EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0));
4738 
4739   // Check the lower_bound
4740   ASSERT_NE(NumTeamsLowerArg, nullptr);
4741   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg);
4742   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4743   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst);
4744   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower);
4745   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4746 
4747   // Check the upper_bound
4748   ASSERT_NE(NumTeamsUpperArg, nullptr);
4749   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg);
4750   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4751   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst);
4752   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper);
4753   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4754 
4755   // Check thread_limit
4756   EXPECT_EQ(ThreadLimitArg, ThreadLimit);
4757 }
4758 
4759 /// Returns the single instruction of InstTy type in BB that uses the value V.
4760 /// If there is more than one such instruction, returns null.
4761 template <typename InstTy>
4762 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) {
4763   InstTy *Result = nullptr;
4764   for (User *U : V->users()) {
4765     auto *Inst = dyn_cast<InstTy>(U);
4766     if (!Inst || Inst->getParent() != BB)
4767       continue;
4768     if (Result) {
4769       if (auto *SI = dyn_cast<StoreInst>(Inst)) {
4770         if (V == SI->getValueOperand())
4771           continue;
4772       } else {
4773         return nullptr;
4774       }
4775     }
4776     Result = Inst;
4777   }
4778   return Result;
4779 }
4780 
4781 /// Returns true if BB contains a simple binary reduction that loads a value
4782 /// from Accum, performs some binary operation with it, and stores it back to
4783 /// Accum.
4784 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB,
4785                                     Instruction::BinaryOps *OpCode = nullptr) {
4786   StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB);
4787   if (!Store)
4788     return false;
4789   auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0));
4790   if (!Stored)
4791     return false;
4792   if (OpCode && *OpCode != Stored->getOpcode())
4793     return false;
4794   auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0));
4795   return Load && Load->getOperand(0) == Accum;
4796 }
4797 
4798 /// Returns true if BB contains a binary reduction that reduces V using a binary
4799 /// operator into an accumulator that is a function argument.
4800 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
4801   auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB);
4802   if (!ReductionOp)
4803     return false;
4804 
4805   auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0));
4806   if (!GlobalLoad)
4807     return false;
4808 
4809   auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB);
4810   if (!Store)
4811     return false;
4812 
4813   return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
4814          isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand()));
4815 }
4816 
4817 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
4818 /// [0, 1], respectively, and assigns results of these instructions to Zero and
4819 /// One. Returns true on success, false on failure or if such instructions are
4820 /// not unique among the users of Ptr.
4821 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) {
4822   Zero = nullptr;
4823   One = nullptr;
4824   for (User *U : Ptr->users()) {
4825     if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
4826       if (GEP->getNumIndices() != 2)
4827         continue;
4828       auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
4829       auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2));
4830       EXPECT_NE(FirstIdx, nullptr);
4831       EXPECT_NE(SecondIdx, nullptr);
4832 
4833       EXPECT_TRUE(FirstIdx->isZero());
4834       if (SecondIdx->isZero()) {
4835         if (Zero)
4836           return false;
4837         Zero = GEP;
4838       } else if (SecondIdx->isOne()) {
4839         if (One)
4840           return false;
4841         One = GEP;
4842       } else {
4843         return false;
4844       }
4845     }
4846   }
4847   return Zero != nullptr && One != nullptr;
4848 }
4849 
4850 static OpenMPIRBuilder::InsertPointTy
4851 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
4852              Value *&Result) {
4853   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
4854   Result = Builder.CreateFAdd(LHS, RHS, "red.add");
4855   return Builder.saveIP();
4856 }
4857 
4858 static OpenMPIRBuilder::InsertPointTy
4859 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
4860                    Value *RHS) {
4861   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
4862   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
4863   Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt,
4864                           AtomicOrdering::Monotonic);
4865   return Builder.saveIP();
4866 }
4867 
4868 static OpenMPIRBuilder::InsertPointTy
4869 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
4870              Value *&Result) {
4871   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
4872   Result = Builder.CreateXor(LHS, RHS, "red.xor");
4873   return Builder.saveIP();
4874 }
4875 
4876 static OpenMPIRBuilder::InsertPointTy
4877 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
4878                    Value *RHS) {
4879   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
4880   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
4881   Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt,
4882                           AtomicOrdering::Monotonic);
4883   return Builder.saveIP();
4884 }
4885 
4886 TEST_F(OpenMPIRBuilderTest, CreateReductions) {
4887   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4888   OpenMPIRBuilder OMPBuilder(*M);
4889   OMPBuilder.Config.IsTargetDevice = false;
4890   OMPBuilder.initialize();
4891   F->setName("func");
4892   IRBuilder<> Builder(BB);
4893 
4894   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
4895   Builder.CreateBr(EnterBB);
4896   Builder.SetInsertPoint(EnterBB);
4897   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4898 
4899   // Create variables to be reduced.
4900   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
4901                               F->getEntryBlock().getFirstInsertionPt());
4902   Type *SumType = Builder.getFloatTy();
4903   Type *XorType = Builder.getInt32Ty();
4904   Value *SumReduced;
4905   Value *XorReduced;
4906   {
4907     IRBuilderBase::InsertPointGuard Guard(Builder);
4908     Builder.restoreIP(OuterAllocaIP);
4909     SumReduced = Builder.CreateAlloca(SumType);
4910     XorReduced = Builder.CreateAlloca(XorType);
4911   }
4912 
4913   // Store initial values of reductions into global variables.
4914   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
4915   Builder.CreateStore(Builder.getInt32(1), XorReduced);
4916 
4917   // The loop body computes two reductions:
4918   //   sum of (float) thread-id;
4919   //   xor of thread-id;
4920   // and store the result in global variables.
4921   InsertPointTy BodyIP, BodyAllocaIP;
4922   auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) {
4923     IRBuilderBase::InsertPointGuard Guard(Builder);
4924     Builder.restoreIP(CodeGenIP);
4925 
4926     uint32_t StrSize;
4927     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
4928     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
4929     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
4930     Value *SumLocal =
4931         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
4932     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
4933     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
4934     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
4935     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
4936     Builder.CreateStore(Sum, SumReduced);
4937     Builder.CreateStore(Xor, XorReduced);
4938 
4939     BodyIP = Builder.saveIP();
4940     BodyAllocaIP = InnerAllocaIP;
4941   };
4942 
4943   // Privatization for reduction creates local copies of reduction variables and
4944   // initializes them to reduction-neutral values.
4945   Value *SumPrivatized;
4946   Value *XorPrivatized;
4947   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
4948                     Value &Original, Value &Inner, Value *&ReplVal) {
4949     IRBuilderBase::InsertPointGuard Guard(Builder);
4950     Builder.restoreIP(InnerAllocaIP);
4951     if (&Original == SumReduced) {
4952       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
4953       ReplVal = SumPrivatized;
4954     } else if (&Original == XorReduced) {
4955       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
4956       ReplVal = XorPrivatized;
4957     } else {
4958       ReplVal = &Inner;
4959       return CodeGenIP;
4960     }
4961 
4962     Builder.restoreIP(CodeGenIP);
4963     if (&Original == SumReduced)
4964       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
4965                           SumPrivatized);
4966     else if (&Original == XorReduced)
4967       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
4968 
4969     return Builder.saveIP();
4970   };
4971 
4972   // Do nothing in finalization.
4973   auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
4974 
4975   InsertPointTy AfterIP =
4976       OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
4977                                 /* IfCondition */ nullptr,
4978                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
4979                                 /* IsCancellable */ false);
4980   Builder.restoreIP(AfterIP);
4981 
4982   OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
4983       {SumType, SumReduced, SumPrivatized, sumReduction, sumAtomicReduction},
4984       {XorType, XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}};
4985 
4986   OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos);
4987 
4988   Builder.restoreIP(AfterIP);
4989   Builder.CreateRetVoid();
4990 
4991   OMPBuilder.finalize(F);
4992 
4993   // The IR must be valid.
4994   EXPECT_FALSE(verifyModule(*M));
4995 
4996   // Outlining must have happened.
4997   SmallVector<CallInst *> ForkCalls;
4998   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
4999             ForkCalls);
5000   ASSERT_EQ(ForkCalls.size(), 1u);
5001   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5002   Function *Outlined = dyn_cast<Function>(CalleeVal);
5003   EXPECT_NE(Outlined, nullptr);
5004 
5005   // Check that the lock variable was created with the expected name.
5006   GlobalVariable *LockVar =
5007       M->getGlobalVariable(".gomp_critical_user_.reduction.var");
5008   EXPECT_NE(LockVar, nullptr);
5009 
5010   // Find the allocation of a local array that will be used to call the runtime
5011   // reduciton function.
5012   BasicBlock &AllocBlock = Outlined->getEntryBlock();
5013   Value *LocalArray = nullptr;
5014   for (Instruction &I : AllocBlock) {
5015     if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) {
5016       if (!Alloc->getAllocatedType()->isArrayTy() ||
5017           !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy())
5018         continue;
5019       LocalArray = Alloc;
5020       break;
5021     }
5022   }
5023   ASSERT_NE(LocalArray, nullptr);
5024 
5025   // Find the call to the runtime reduction function.
5026   BasicBlock *BB = AllocBlock.getUniqueSuccessor();
5027   Value *LocalArrayPtr = nullptr;
5028   Value *ReductionFnVal = nullptr;
5029   Value *SwitchArg = nullptr;
5030   for (Instruction &I : *BB) {
5031     if (CallInst *Call = dyn_cast<CallInst>(&I)) {
5032       if (Call->getCalledFunction() !=
5033           OMPBuilder.getOrCreateRuntimeFunctionPtr(
5034               RuntimeFunction::OMPRTL___kmpc_reduce))
5035         continue;
5036       LocalArrayPtr = Call->getOperand(4);
5037       ReductionFnVal = Call->getOperand(5);
5038       SwitchArg = Call;
5039       break;
5040     }
5041   }
5042 
5043   // Check that the local array is passed to the function.
5044   ASSERT_NE(LocalArrayPtr, nullptr);
5045   EXPECT_EQ(LocalArrayPtr, LocalArray);
5046 
5047   // Find the GEP instructions preceding stores to the local array.
5048   Value *FirstArrayElemPtr = nullptr;
5049   Value *SecondArrayElemPtr = nullptr;
5050   EXPECT_EQ(LocalArray->getNumUses(), 3u);
5051   ASSERT_TRUE(
5052       findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr));
5053 
5054   // Check that the values stored into the local array are privatized reduction
5055   // variables.
5056   auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>(
5057       findStoredValue<GetElementPtrInst>(FirstArrayElemPtr));
5058   auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>(
5059       findStoredValue<GetElementPtrInst>(SecondArrayElemPtr));
5060   ASSERT_NE(FirstPrivatized, nullptr);
5061   ASSERT_NE(SecondPrivatized, nullptr);
5062   ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr));
5063   EXPECT_TRUE(isSimpleBinaryReduction(
5064       FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5065   EXPECT_TRUE(isSimpleBinaryReduction(
5066       SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5067 
5068   // Check that the result of the runtime reduction call is used for further
5069   // dispatch.
5070   ASSERT_EQ(SwitchArg->getNumUses(), 1u);
5071   SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin());
5072   ASSERT_NE(Switch, nullptr);
5073   EXPECT_EQ(Switch->getNumSuccessors(), 3u);
5074   BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor();
5075   BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor();
5076 
5077   // Non-atomic block contains reductions to the global reduction variable,
5078   // which is passed into the outlined function as an argument.
5079   Value *FirstLoad =
5080       findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB);
5081   Value *SecondLoad =
5082       findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB);
5083   EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB));
5084   EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB));
5085 
5086   // Atomic block also constains reductions to the global reduction variable.
5087   FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB);
5088   SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB);
5089   auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB);
5090   auto *SecondAtomic =
5091       findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
5092   ASSERT_NE(FirstAtomic, nullptr);
5093   Value *AtomicStorePointer = FirstAtomic->getPointerOperand();
5094   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5095   ASSERT_NE(SecondAtomic, nullptr);
5096   AtomicStorePointer = SecondAtomic->getPointerOperand();
5097   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5098 
5099   // Check that the separate reduction function also performs (non-atomic)
5100   // reductions after extracting reduction variables from its arguments.
5101   Function *ReductionFn = cast<Function>(ReductionFnVal);
5102   BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock();
5103   Value *FirstLHSPtr;
5104   Value *SecondLHSPtr;
5105   ASSERT_TRUE(
5106       findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr));
5107   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5108   ASSERT_NE(Opaque, nullptr);
5109   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5110   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5111   ASSERT_NE(Opaque, nullptr);
5112   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5113 
5114   Value *FirstRHS;
5115   Value *SecondRHS;
5116   EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
5117 }
5118 
5119 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
5120   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5121   OpenMPIRBuilder OMPBuilder(*M);
5122   OMPBuilder.Config.IsTargetDevice = false;
5123   OMPBuilder.initialize();
5124   F->setName("func");
5125   IRBuilder<> Builder(BB);
5126 
5127   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
5128   Builder.CreateBr(EnterBB);
5129   Builder.SetInsertPoint(EnterBB);
5130   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5131 
5132   // Create variables to be reduced.
5133   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
5134                               F->getEntryBlock().getFirstInsertionPt());
5135   Type *SumType = Builder.getFloatTy();
5136   Type *XorType = Builder.getInt32Ty();
5137   Value *SumReduced;
5138   Value *XorReduced;
5139   {
5140     IRBuilderBase::InsertPointGuard Guard(Builder);
5141     Builder.restoreIP(OuterAllocaIP);
5142     SumReduced = Builder.CreateAlloca(SumType);
5143     XorReduced = Builder.CreateAlloca(XorType);
5144   }
5145 
5146   // Store initial values of reductions into global variables.
5147   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
5148   Builder.CreateStore(Builder.getInt32(1), XorReduced);
5149 
5150   InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
5151   auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5152                             InsertPointTy CodeGenIP) {
5153     IRBuilderBase::InsertPointGuard Guard(Builder);
5154     Builder.restoreIP(CodeGenIP);
5155 
5156     uint32_t StrSize;
5157     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5158     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5159     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5160     Value *SumLocal =
5161         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
5162     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
5163     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
5164     Builder.CreateStore(Sum, SumReduced);
5165 
5166     FirstBodyIP = Builder.saveIP();
5167     FirstBodyAllocaIP = InnerAllocaIP;
5168   };
5169 
5170   InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
5171   auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5172                              InsertPointTy CodeGenIP) {
5173     IRBuilderBase::InsertPointGuard Guard(Builder);
5174     Builder.restoreIP(CodeGenIP);
5175 
5176     uint32_t StrSize;
5177     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5178     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5179     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5180     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
5181     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
5182     Builder.CreateStore(Xor, XorReduced);
5183 
5184     SecondBodyIP = Builder.saveIP();
5185     SecondBodyAllocaIP = InnerAllocaIP;
5186   };
5187 
5188   // Privatization for reduction creates local copies of reduction variables and
5189   // initializes them to reduction-neutral values. The same privatization
5190   // callback is used for both loops, with dispatch based on the value being
5191   // privatized.
5192   Value *SumPrivatized;
5193   Value *XorPrivatized;
5194   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
5195                     Value &Original, Value &Inner, Value *&ReplVal) {
5196     IRBuilderBase::InsertPointGuard Guard(Builder);
5197     Builder.restoreIP(InnerAllocaIP);
5198     if (&Original == SumReduced) {
5199       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
5200       ReplVal = SumPrivatized;
5201     } else if (&Original == XorReduced) {
5202       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
5203       ReplVal = XorPrivatized;
5204     } else {
5205       ReplVal = &Inner;
5206       return CodeGenIP;
5207     }
5208 
5209     Builder.restoreIP(CodeGenIP);
5210     if (&Original == SumReduced)
5211       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
5212                           SumPrivatized);
5213     else if (&Original == XorReduced)
5214       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
5215 
5216     return Builder.saveIP();
5217   };
5218 
5219   // Do nothing in finalization.
5220   auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
5221 
5222   Builder.restoreIP(
5223       OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
5224                                 FiniCB, /* IfCondition */ nullptr,
5225                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5226                                 /* IsCancellable */ false));
5227   InsertPointTy AfterIP = OMPBuilder.createParallel(
5228       {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB,
5229       /* IfCondition */ nullptr,
5230       /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5231       /* IsCancellable */ false);
5232 
5233   OMPBuilder.createReductions(
5234       FirstBodyIP, FirstBodyAllocaIP,
5235       {{SumType, SumReduced, SumPrivatized, sumReduction, sumAtomicReduction}});
5236   OMPBuilder.createReductions(
5237       SecondBodyIP, SecondBodyAllocaIP,
5238       {{XorType, XorReduced, XorPrivatized, xorReduction, xorAtomicReduction}});
5239 
5240   Builder.restoreIP(AfterIP);
5241   Builder.CreateRetVoid();
5242 
5243   OMPBuilder.finalize(F);
5244 
5245   // The IR must be valid.
5246   EXPECT_FALSE(verifyModule(*M));
5247 
5248   // Two different outlined functions must have been created.
5249   SmallVector<CallInst *> ForkCalls;
5250   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5251             ForkCalls);
5252   ASSERT_EQ(ForkCalls.size(), 2u);
5253   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5254   Function *FirstCallee = cast<Function>(CalleeVal);
5255   CalleeVal = ForkCalls[1]->getOperand(2);
5256   Function *SecondCallee = cast<Function>(CalleeVal);
5257   EXPECT_NE(FirstCallee, SecondCallee);
5258 
5259   // Two different reduction functions must have been created.
5260   SmallVector<CallInst *> ReduceCalls;
5261   findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder,
5262             ReduceCalls);
5263   ASSERT_EQ(ReduceCalls.size(), 1u);
5264   auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5265   ReduceCalls.clear();
5266   findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce,
5267             OMPBuilder, ReduceCalls);
5268   auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5269   EXPECT_NE(AddReduction, XorReduction);
5270 
5271   // Each reduction function does its own kind of reduction.
5272   BasicBlock *FnReductionBB = &AddReduction->getEntryBlock();
5273   Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5274       AddReduction->getArg(0), FnReductionBB);
5275   ASSERT_NE(FirstLHSPtr, nullptr);
5276   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5277   ASSERT_NE(Opaque, nullptr);
5278   Instruction::BinaryOps Opcode = Instruction::FAdd;
5279   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5280 
5281   FnReductionBB = &XorReduction->getEntryBlock();
5282   Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5283       XorReduction->getArg(0), FnReductionBB);
5284   ASSERT_NE(FirstLHSPtr, nullptr);
5285   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5286   ASSERT_NE(Opaque, nullptr);
5287   Opcode = Instruction::Xor;
5288   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5289 }
5290 
5291 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
5292   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5293   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5294   OpenMPIRBuilder OMPBuilder(*M);
5295   OMPBuilder.initialize();
5296   F->setName("func");
5297   IRBuilder<> Builder(BB);
5298 
5299   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5300   Builder.CreateBr(EnterBB);
5301   Builder.SetInsertPoint(EnterBB);
5302   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5303 
5304   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5305   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5306 
5307   auto FiniCB = [&](InsertPointTy IP) {};
5308   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
5309   SectionCBVector.push_back(SectionCB);
5310 
5311   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5312                    llvm::Value &, llvm::Value &Val,
5313                    llvm::Value *&ReplVal) { return CodeGenIP; };
5314   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5315                                     F->getEntryBlock().getFirstInsertionPt());
5316   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5317                                               PrivCB, FiniCB, false, false));
5318   Builder.CreateRetVoid(); // Required at the end of the function
5319   EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr);
5320   EXPECT_FALSE(verifyModule(*M, &errs()));
5321 }
5322 
5323 TEST_F(OpenMPIRBuilderTest, CreateSections) {
5324   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5325   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5326   OpenMPIRBuilder OMPBuilder(*M);
5327   OMPBuilder.initialize();
5328   F->setName("func");
5329   IRBuilder<> Builder(BB);
5330 
5331   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5332   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5333   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5334 
5335   BasicBlock *SwitchBB = nullptr;
5336   AllocaInst *PrivAI = nullptr;
5337   SwitchInst *Switch = nullptr;
5338 
5339   unsigned NumBodiesGenerated = 0;
5340   unsigned NumFiniCBCalls = 0;
5341   PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
5342 
5343   auto FiniCB = [&](InsertPointTy IP) {
5344     ++NumFiniCBCalls;
5345     BasicBlock *IPBB = IP.getBlock();
5346     EXPECT_NE(IPBB->end(), IP.getPoint());
5347   };
5348 
5349   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
5350     ++NumBodiesGenerated;
5351     CaseBBs.push_back(CodeGenIP.getBlock());
5352     SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
5353     Builder.restoreIP(CodeGenIP);
5354     Builder.CreateStore(F->arg_begin(), PrivAI);
5355     Value *PrivLoad =
5356         Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca");
5357     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
5358   };
5359   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5360                    llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
5361     // TODO: Privatization not implemented yet
5362     return CodeGenIP;
5363   };
5364 
5365   SectionCBVector.push_back(SectionCB);
5366   SectionCBVector.push_back(SectionCB);
5367 
5368   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5369                                     F->getEntryBlock().getFirstInsertionPt());
5370   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5371                                               PrivCB, FiniCB, false, false));
5372   Builder.CreateRetVoid(); // Required at the end of the function
5373 
5374   // Switch BB's predecessor is loop condition BB, whose successor at index 1 is
5375   // loop's exit BB
5376   BasicBlock *ForExitBB =
5377       SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1);
5378   EXPECT_NE(ForExitBB, nullptr);
5379 
5380   EXPECT_NE(PrivAI, nullptr);
5381   Function *OutlinedFn = PrivAI->getFunction();
5382   EXPECT_EQ(F, OutlinedFn);
5383   EXPECT_FALSE(verifyModule(*M, &errs()));
5384   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
5385 
5386   BasicBlock *LoopPreheaderBB =
5387       OutlinedFn->getEntryBlock().getSingleSuccessor();
5388   // loop variables are 5 - lower bound, upper bound, stride, islastiter, and
5389   // iterator/counter
5390   bool FoundForInit = false;
5391   for (Instruction &Inst : *LoopPreheaderBB) {
5392     if (isa<CallInst>(Inst)) {
5393       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5394           "__kmpc_for_static_init_4u") {
5395         FoundForInit = true;
5396       }
5397     }
5398   }
5399   EXPECT_EQ(FoundForInit, true);
5400 
5401   bool FoundForExit = false;
5402   bool FoundBarrier = false;
5403   for (Instruction &Inst : *ForExitBB) {
5404     if (isa<CallInst>(Inst)) {
5405       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5406           "__kmpc_for_static_fini") {
5407         FoundForExit = true;
5408       }
5409       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5410           "__kmpc_barrier") {
5411         FoundBarrier = true;
5412       }
5413       if (FoundForExit && FoundBarrier)
5414         break;
5415     }
5416   }
5417   EXPECT_EQ(FoundForExit, true);
5418   EXPECT_EQ(FoundBarrier, true);
5419 
5420   EXPECT_NE(SwitchBB, nullptr);
5421   EXPECT_NE(SwitchBB->getTerminator(), nullptr);
5422   EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true);
5423   Switch = cast<SwitchInst>(SwitchBB->getTerminator());
5424   EXPECT_EQ(Switch->getNumCases(), 2U);
5425 
5426   EXPECT_EQ(CaseBBs.size(), 2U);
5427   for (auto *&CaseBB : CaseBBs) {
5428     EXPECT_EQ(CaseBB->getParent(), OutlinedFn);
5429   }
5430 
5431   ASSERT_EQ(NumBodiesGenerated, 2U);
5432   ASSERT_EQ(NumFiniCBCalls, 1U);
5433   EXPECT_FALSE(verifyModule(*M, &errs()));
5434 }
5435 
5436 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) {
5437   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5438   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5439   OpenMPIRBuilder OMPBuilder(*M);
5440   OMPBuilder.initialize();
5441   F->setName("func");
5442   IRBuilder<> Builder(BB);
5443 
5444   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5445   Builder.CreateBr(EnterBB);
5446   Builder.SetInsertPoint(EnterBB);
5447   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5448 
5449   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5450                                     F->getEntryBlock().getFirstInsertionPt());
5451   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5452   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5453                    llvm::Value &, llvm::Value &Val,
5454                    llvm::Value *&ReplVal) { return CodeGenIP; };
5455   auto FiniCB = [&](InsertPointTy IP) {};
5456 
5457   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5458                                               PrivCB, FiniCB, false, true));
5459   Builder.CreateRetVoid(); // Required at the end of the function
5460   for (auto &Inst : instructions(*F)) {
5461     EXPECT_FALSE(isa<CallInst>(Inst) &&
5462                  cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5463                      "__kmpc_barrier" &&
5464                  "call to function __kmpc_barrier found with nowait");
5465   }
5466 }
5467 
5468 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) {
5469   OpenMPIRBuilder OMPBuilder(*M);
5470   OMPBuilder.initialize();
5471 
5472   IRBuilder<> Builder(BB);
5473 
5474   SmallVector<uint64_t> Mappings = {0, 1};
5475   GlobalVariable *OffloadMaptypesGlobal =
5476       OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes");
5477   EXPECT_FALSE(M->global_empty());
5478   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes");
5479   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5480   EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5481   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5482   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5483   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5484   EXPECT_TRUE(isa<ConstantDataArray>(Initializer));
5485   ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer);
5486   EXPECT_EQ(MappingInit->getNumElements(), Mappings.size());
5487   EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64));
5488   Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings);
5489   EXPECT_EQ(MappingInit, CA);
5490 }
5491 
5492 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) {
5493   OpenMPIRBuilder OMPBuilder(*M);
5494   OMPBuilder.initialize();
5495 
5496   IRBuilder<> Builder(BB);
5497 
5498   uint32_t StrSize;
5499   Constant *Cst1 =
5500       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5501   Constant *Cst2 =
5502       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5503   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5504 
5505   GlobalVariable *OffloadMaptypesGlobal =
5506       OMPBuilder.createOffloadMapnames(Names, "offload_mapnames");
5507   EXPECT_FALSE(M->global_empty());
5508   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames");
5509   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5510   EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5511   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5512   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5513   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5514   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts()));
5515   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts()));
5516 
5517   GlobalVariable *Name1Gbl =
5518       cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts());
5519   EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer()));
5520   ConstantDataArray *Name1GblCA =
5521       dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer());
5522   EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;");
5523 
5524   GlobalVariable *Name2Gbl =
5525       cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts());
5526   EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer()));
5527   ConstantDataArray *Name2GblCA =
5528       dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer());
5529   EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;");
5530 
5531   EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy());
5532   EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size());
5533 }
5534 
5535 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
5536   OpenMPIRBuilder OMPBuilder(*M);
5537   OMPBuilder.initialize();
5538   F->setName("func");
5539   IRBuilder<> Builder(BB);
5540 
5541   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5542 
5543   unsigned TotalNbOperand = 2;
5544 
5545   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5546   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5547                                     F->getEntryBlock().getFirstInsertionPt());
5548   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5549   EXPECT_NE(MapperAllocas.ArgsBase, nullptr);
5550   EXPECT_NE(MapperAllocas.Args, nullptr);
5551   EXPECT_NE(MapperAllocas.ArgSizes, nullptr);
5552   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy());
5553   ArrayType *ArrType =
5554       dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType());
5555   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5556   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
5557                   ->getArrayElementType()
5558                   ->isPointerTy());
5559 
5560   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
5561   ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType());
5562   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5563   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
5564                   ->getArrayElementType()
5565                   ->isPointerTy());
5566 
5567   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
5568   ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType());
5569   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5570   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()
5571                   ->getArrayElementType()
5572                   ->isIntegerTy(64));
5573 }
5574 
5575 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) {
5576   OpenMPIRBuilder OMPBuilder(*M);
5577   OMPBuilder.initialize();
5578   F->setName("func");
5579   IRBuilder<> Builder(BB);
5580   LLVMContext &Ctx = M->getContext();
5581 
5582   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5583 
5584   unsigned TotalNbOperand = 2;
5585 
5586   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5587   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5588                                     F->getEntryBlock().getFirstInsertionPt());
5589   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5590 
5591   auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr(
5592       omp::OMPRTL___tgt_target_data_begin_mapper);
5593 
5594   SmallVector<uint64_t> Flags = {0, 2};
5595 
5596   uint32_t StrSize;
5597   Constant *SrcLocCst =
5598       OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize);
5599   Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize);
5600 
5601   Constant *Cst1 =
5602       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5603   Constant *Cst2 =
5604       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5605   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5606 
5607   GlobalVariable *Maptypes =
5608       OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes");
5609   Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32(
5610       ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes,
5611       /*Idx0=*/0, /*Idx1=*/0);
5612 
5613   GlobalVariable *Mapnames =
5614       OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames");
5615   Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32(
5616       ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames,
5617       /*Idx0=*/0, /*Idx1=*/0);
5618 
5619   OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo,
5620                             MaptypesArg, MapnamesArg, MapperAllocas, -1,
5621                             TotalNbOperand);
5622 
5623   CallInst *MapperCall = dyn_cast<CallInst>(&BB->back());
5624   EXPECT_NE(MapperCall, nullptr);
5625   EXPECT_EQ(MapperCall->arg_size(), 9U);
5626   EXPECT_EQ(MapperCall->getCalledFunction()->getName(),
5627             "__tgt_target_data_begin_mapper");
5628   EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo);
5629   EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64));
5630   EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32));
5631 
5632   EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg);
5633   EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg);
5634   EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy());
5635 }
5636 
5637 TEST_F(OpenMPIRBuilderTest, TargetEnterData) {
5638   OpenMPIRBuilder OMPBuilder(*M);
5639   OMPBuilder.initialize();
5640   F->setName("func");
5641   IRBuilder<> Builder(BB);
5642   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5643 
5644   int64_t DeviceID = 2;
5645 
5646   AllocaInst *Val1 =
5647       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5648   ASSERT_NE(Val1, nullptr);
5649 
5650   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5651                                     F->getEntryBlock().getFirstInsertionPt());
5652 
5653   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5654   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5655   auto GenMapInfoCB =
5656       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5657     // Get map clause information.
5658     Builder.restoreIP(codeGenIP);
5659 
5660     CombinedInfo.BasePointers.emplace_back(Val1);
5661     CombinedInfo.Pointers.emplace_back(Val1);
5662     CombinedInfo.DevicePointers.emplace_back(
5663         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5664     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5665     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1));
5666     uint32_t temp;
5667     CombinedInfo.Names.emplace_back(
5668         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5669     return CombinedInfo;
5670   };
5671 
5672   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5673       /*RequiresDevicePointerInfo=*/false,
5674       /*SeparateBeginEndCalls=*/true);
5675 
5676   OMPBuilder.Config.setIsGPU(true);
5677 
5678   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper;
5679   Builder.restoreIP(OMPBuilder.createTargetData(
5680       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5681       /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5682 
5683   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5684   EXPECT_NE(TargetDataCall, nullptr);
5685   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5686   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5687             "__tgt_target_data_begin_mapper");
5688   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5689   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5690   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5691 
5692   Builder.CreateRetVoid();
5693   EXPECT_FALSE(verifyModule(*M, &errs()));
5694 }
5695 
5696 TEST_F(OpenMPIRBuilderTest, TargetExitData) {
5697   OpenMPIRBuilder OMPBuilder(*M);
5698   OMPBuilder.initialize();
5699   F->setName("func");
5700   IRBuilder<> Builder(BB);
5701   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5702 
5703   int64_t DeviceID = 2;
5704 
5705   AllocaInst *Val1 =
5706       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5707   ASSERT_NE(Val1, nullptr);
5708 
5709   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5710                                     F->getEntryBlock().getFirstInsertionPt());
5711 
5712   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5713   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5714   auto GenMapInfoCB =
5715       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5716     // Get map clause information.
5717     Builder.restoreIP(codeGenIP);
5718 
5719     CombinedInfo.BasePointers.emplace_back(Val1);
5720     CombinedInfo.Pointers.emplace_back(Val1);
5721     CombinedInfo.DevicePointers.emplace_back(
5722         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5723     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5724     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2));
5725     uint32_t temp;
5726     CombinedInfo.Names.emplace_back(
5727         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5728     return CombinedInfo;
5729   };
5730 
5731   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5732       /*RequiresDevicePointerInfo=*/false,
5733       /*SeparateBeginEndCalls=*/true);
5734 
5735   OMPBuilder.Config.setIsGPU(true);
5736 
5737   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper;
5738   Builder.restoreIP(OMPBuilder.createTargetData(
5739       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5740       /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5741 
5742   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5743   EXPECT_NE(TargetDataCall, nullptr);
5744   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5745   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5746             "__tgt_target_data_end_mapper");
5747   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5748   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5749   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5750 
5751   Builder.CreateRetVoid();
5752   EXPECT_FALSE(verifyModule(*M, &errs()));
5753 }
5754 
5755 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
5756   OpenMPIRBuilder OMPBuilder(*M);
5757   OMPBuilder.initialize();
5758   F->setName("func");
5759   IRBuilder<> Builder(BB);
5760   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5761 
5762   int64_t DeviceID = 2;
5763 
5764   AllocaInst *Val1 =
5765       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5766   ASSERT_NE(Val1, nullptr);
5767 
5768   AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy());
5769   ASSERT_NE(Val2, nullptr);
5770 
5771   AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy());
5772   ASSERT_NE(Val3, nullptr);
5773 
5774   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5775                                     F->getEntryBlock().getFirstInsertionPt());
5776 
5777   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
5778   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5779   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5780   auto GenMapInfoCB =
5781       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5782     // Get map clause information.
5783     Builder.restoreIP(codeGenIP);
5784     uint32_t temp;
5785 
5786     CombinedInfo.BasePointers.emplace_back(Val1);
5787     CombinedInfo.Pointers.emplace_back(Val1);
5788     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None);
5789     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5790     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3));
5791     CombinedInfo.Names.emplace_back(
5792         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5793 
5794     CombinedInfo.BasePointers.emplace_back(Val2);
5795     CombinedInfo.Pointers.emplace_back(Val2);
5796     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
5797     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
5798     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
5799     CombinedInfo.Names.emplace_back(
5800         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5801 
5802     CombinedInfo.BasePointers.emplace_back(Val3);
5803     CombinedInfo.Pointers.emplace_back(Val3);
5804     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address);
5805     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
5806     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
5807     CombinedInfo.Names.emplace_back(
5808         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5809     return CombinedInfo;
5810   };
5811 
5812   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5813       /*RequiresDevicePointerInfo=*/true,
5814       /*SeparateBeginEndCalls=*/true);
5815 
5816   OMPBuilder.Config.setIsGPU(true);
5817 
5818   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
5819   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
5820     if (BodyGenType == BodyGenTy::Priv) {
5821       EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u);
5822       Builder.restoreIP(CodeGenIP);
5823       CallInst *TargetDataCall =
5824           dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
5825       EXPECT_NE(TargetDataCall, nullptr);
5826       EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5827       EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5828                 "__tgt_target_data_begin_mapper");
5829       EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5830       EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5831       EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5832 
5833       LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode());
5834       EXPECT_NE(LI, nullptr);
5835       StoreInst *SI = dyn_cast<StoreInst>(&BB->back());
5836       EXPECT_NE(SI, nullptr);
5837       EXPECT_EQ(SI->getValueOperand(), LI);
5838       EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second);
5839       EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second));
5840       EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second));
5841       Builder.CreateStore(Builder.getInt32(99), Val1);
5842     }
5843     return Builder.saveIP();
5844   };
5845 
5846   Builder.restoreIP(OMPBuilder.createTargetData(
5847       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5848       /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB));
5849 
5850   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5851   EXPECT_NE(TargetDataCall, nullptr);
5852   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5853   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5854             "__tgt_target_data_end_mapper");
5855   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5856   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5857   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5858 
5859   Builder.CreateRetVoid();
5860   EXPECT_FALSE(verifyModule(*M, &errs()));
5861 }
5862 
5863 namespace {
5864 // Some basic handling of argument mapping for the moment
5865 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder,
5866                            llvm::SmallVectorImpl<llvm::Value *> &Args,
5867                            llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) {
5868   for (auto Arg : Args) {
5869     CombinedInfo.BasePointers.emplace_back(Arg);
5870     CombinedInfo.Pointers.emplace_back(Arg);
5871     uint32_t SrcLocStrSize;
5872     CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr(
5873         "Unknown loc - stub implementation", SrcLocStrSize));
5874     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(
5875         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
5876         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM |
5877         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM));
5878     CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64(
5879         OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType())));
5880   }
5881 }
5882 } // namespace
5883 
5884 TEST_F(OpenMPIRBuilderTest, TargetRegion) {
5885   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5886   OpenMPIRBuilder OMPBuilder(*M);
5887   OMPBuilder.initialize();
5888   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
5889   OMPBuilder.setConfig(Config);
5890   F->setName("func");
5891   IRBuilder<> Builder(BB);
5892   auto Int32Ty = Builder.getInt32Ty();
5893 
5894   AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr");
5895   AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr");
5896   AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr");
5897 
5898   Builder.CreateStore(Builder.getInt32(10), APtr);
5899   Builder.CreateStore(Builder.getInt32(20), BPtr);
5900   auto BodyGenCB = [&](InsertPointTy AllocaIP,
5901                        InsertPointTy CodeGenIP) -> InsertPointTy {
5902     Builder.restoreIP(CodeGenIP);
5903     LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr);
5904     LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr);
5905     Value *Sum = Builder.CreateAdd(AVal, BVal);
5906     Builder.CreateStore(Sum, CPtr);
5907     return Builder.saveIP();
5908   };
5909 
5910   llvm::SmallVector<llvm::Value *> Inputs;
5911   Inputs.push_back(APtr);
5912   Inputs.push_back(BPtr);
5913   Inputs.push_back(CPtr);
5914 
5915   auto SimpleArgAccessorCB =
5916       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
5917           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5918           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
5919         if (!OMPBuilder.Config.isTargetDevice()) {
5920           RetVal = cast<llvm::Value>(&Arg);
5921           return CodeGenIP;
5922         }
5923 
5924         Builder.restoreIP(AllocaIP);
5925 
5926         llvm::Value *Addr = Builder.CreateAlloca(
5927             Arg.getType()->isPointerTy()
5928                 ? Arg.getType()
5929                 : Type::getInt64Ty(Builder.getContext()),
5930             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
5931         llvm::Value *AddrAscast =
5932             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
5933         Builder.CreateStore(&Arg, AddrAscast);
5934 
5935         Builder.restoreIP(CodeGenIP);
5936 
5937         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
5938 
5939         return Builder.saveIP();
5940       };
5941 
5942   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
5943   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
5944       -> llvm::OpenMPIRBuilder::MapInfosTy & {
5945     CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos);
5946     return CombinedInfos;
5947   };
5948 
5949   TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
5950   OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
5951   Builder.restoreIP(OMPBuilder.createTarget(
5952       OmpLoc, Builder.saveIP(), Builder.saveIP(), EntryInfo, -1, 0, Inputs,
5953       GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
5954   OMPBuilder.finalize();
5955   Builder.CreateRetVoid();
5956 
5957   // Check the kernel launch sequence
5958   auto Iter = F->getEntryBlock().rbegin();
5959   EXPECT_TRUE(isa<BranchInst>(&*(Iter)));
5960   BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter));
5961   EXPECT_TRUE(isa<CmpInst>(&*(++Iter)));
5962   EXPECT_TRUE(isa<CallInst>(&*(++Iter)));
5963   CallInst *Call = dyn_cast<CallInst>(&*(Iter));
5964 
5965   // Check that the kernel launch function is called
5966   Function *KernelLaunchFunc = Call->getCalledFunction();
5967   EXPECT_NE(KernelLaunchFunc, nullptr);
5968   StringRef FunctionName = KernelLaunchFunc->getName();
5969   EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel"));
5970 
5971   // Check the fallback call
5972   BasicBlock *FallbackBlock = Branch->getSuccessor(0);
5973   Iter = FallbackBlock->rbegin();
5974   CallInst *FCall = dyn_cast<CallInst>(&*(++Iter));
5975   EXPECT_NE(FCall, nullptr);
5976 
5977   // Check that the correct aguments are passed in
5978   for (auto ArgInput : zip(FCall->args(), Inputs)) {
5979     EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput));
5980   }
5981 
5982   // Check that the outlined function exists with the expected prefix
5983   Function *OutlinedFunc = FCall->getCalledFunction();
5984   EXPECT_NE(OutlinedFunc, nullptr);
5985   StringRef FunctionName2 = OutlinedFunc->getName();
5986   EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading"));
5987 
5988   EXPECT_FALSE(verifyModule(*M, &errs()));
5989 }
5990 
5991 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
5992   OpenMPIRBuilder OMPBuilder(*M);
5993   OMPBuilder.setConfig(
5994       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
5995   OMPBuilder.initialize();
5996 
5997   F->setName("func");
5998   IRBuilder<> Builder(BB);
5999   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6000 
6001   LoadInst *Value = nullptr;
6002   StoreInst *TargetStore = nullptr;
6003   llvm::SmallVector<llvm::Value *, 2> CapturedArgs = {
6004       Constant::getNullValue(PointerType::get(Ctx, 0)),
6005       Constant::getNullValue(PointerType::get(Ctx, 0))};
6006 
6007   auto SimpleArgAccessorCB =
6008       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6009           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6010           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6011         if (!OMPBuilder.Config.isTargetDevice()) {
6012           RetVal = cast<llvm::Value>(&Arg);
6013           return CodeGenIP;
6014         }
6015 
6016         Builder.restoreIP(AllocaIP);
6017 
6018         llvm::Value *Addr = Builder.CreateAlloca(
6019             Arg.getType()->isPointerTy()
6020                 ? Arg.getType()
6021                 : Type::getInt64Ty(Builder.getContext()),
6022             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6023         llvm::Value *AddrAscast =
6024             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6025         Builder.CreateStore(&Arg, AddrAscast);
6026 
6027         Builder.restoreIP(CodeGenIP);
6028 
6029         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6030 
6031         return Builder.saveIP();
6032       };
6033 
6034   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6035   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6036       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6037     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6038     return CombinedInfos;
6039   };
6040 
6041   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6042                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6043       -> OpenMPIRBuilder::InsertPointTy {
6044     Builder.restoreIP(CodeGenIP);
6045     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6046     TargetStore = Builder.CreateStore(Value, CapturedArgs[1]);
6047     return Builder.saveIP();
6048   };
6049 
6050   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6051                                    F->getEntryBlock().getFirstInsertionPt());
6052   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6053                                   /*Line=*/3, /*Count=*/0);
6054 
6055   Builder.restoreIP(
6056       OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
6057                               /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
6058                               BodyGenCB, SimpleArgAccessorCB));
6059 
6060   Builder.CreateRetVoid();
6061   OMPBuilder.finalize();
6062 
6063   // Check outlined function
6064   EXPECT_FALSE(verifyModule(*M, &errs()));
6065   EXPECT_NE(TargetStore, nullptr);
6066   Function *OutlinedFn = TargetStore->getFunction();
6067   EXPECT_NE(F, OutlinedFn);
6068 
6069   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6070   // Account for the "implicit" first argument.
6071   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6072   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
6073   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6074   EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy());
6075 
6076   // Check entry block
6077   auto &EntryBlock = OutlinedFn->getEntryBlock();
6078   Instruction *Alloca1 = EntryBlock.getFirstNonPHI();
6079   EXPECT_NE(Alloca1, nullptr);
6080 
6081   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6082   auto *Store1 = Alloca1->getNextNode();
6083   EXPECT_TRUE(isa<StoreInst>(Store1));
6084   auto *Alloca2 = Store1->getNextNode();
6085   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6086   auto *Store2 = Alloca2->getNextNode();
6087   EXPECT_TRUE(isa<StoreInst>(Store2));
6088 
6089   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6090   EXPECT_NE(InitCall, nullptr);
6091   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6092   EXPECT_EQ(InitCall->arg_size(), 2U);
6093   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6094   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6095   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6096   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6097   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6098   auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6099   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6100             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6101   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6102             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6103   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6104             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6105 
6106   auto *EntryBlockBranch = EntryBlock.getTerminator();
6107   EXPECT_NE(EntryBlockBranch, nullptr);
6108   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6109 
6110   // Check user code block
6111   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6112   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6113   auto *Load1 = UserCodeBlock->getFirstNonPHI();
6114   EXPECT_TRUE(isa<LoadInst>(Load1));
6115   auto *Load2 = Load1->getNextNode();
6116   EXPECT_TRUE(isa<LoadInst>(Load2));
6117 
6118   auto *Value1 = Load2->getNextNode();
6119   EXPECT_EQ(Value1, Value);
6120   EXPECT_EQ(Value1->getNextNode(), TargetStore);
6121   auto *Deinit = TargetStore->getNextNode();
6122   EXPECT_NE(Deinit, nullptr);
6123 
6124   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6125   EXPECT_NE(DeinitCall, nullptr);
6126   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6127   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6128 
6129   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6130 
6131   // Check exit block
6132   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6133   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6134   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI()));
6135 }
6136 
6137 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
6138   OpenMPIRBuilder OMPBuilder(*M);
6139   OMPBuilder.setConfig(
6140       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6141   OMPBuilder.initialize();
6142 
6143   F->setName("func");
6144   IRBuilder<> Builder(BB);
6145   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6146 
6147   LoadInst *Value = nullptr;
6148   StoreInst *TargetStore = nullptr;
6149   llvm::SmallVector<llvm::Value *, 1> CapturedArgs = {
6150       Constant::getNullValue(PointerType::get(Ctx, 0))};
6151 
6152   auto SimpleArgAccessorCB =
6153       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6154           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6155           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6156         if (!OMPBuilder.Config.isTargetDevice()) {
6157           RetVal = cast<llvm::Value>(&Arg);
6158           return CodeGenIP;
6159         }
6160 
6161         Builder.restoreIP(AllocaIP);
6162 
6163         llvm::Value *Addr = Builder.CreateAlloca(
6164             Arg.getType()->isPointerTy()
6165                 ? Arg.getType()
6166                 : Type::getInt64Ty(Builder.getContext()),
6167             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6168         llvm::Value *AddrAscast =
6169             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6170         Builder.CreateStore(&Arg, AddrAscast);
6171 
6172         Builder.restoreIP(CodeGenIP);
6173 
6174         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6175 
6176         return Builder.saveIP();
6177       };
6178 
6179   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6180   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6181       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6182     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6183     return CombinedInfos;
6184   };
6185 
6186   llvm::Value *RaiseAlloca = nullptr;
6187 
6188   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6189                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6190       -> OpenMPIRBuilder::InsertPointTy {
6191     Builder.restoreIP(CodeGenIP);
6192     RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty());
6193     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6194     TargetStore = Builder.CreateStore(Value, RaiseAlloca);
6195     return Builder.saveIP();
6196   };
6197 
6198   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6199                                    F->getEntryBlock().getFirstInsertionPt());
6200   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6201                                   /*Line=*/3, /*Count=*/0);
6202 
6203   Builder.restoreIP(
6204       OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
6205                               /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
6206                               BodyGenCB, SimpleArgAccessorCB));
6207 
6208   Builder.CreateRetVoid();
6209   OMPBuilder.finalize();
6210 
6211   // Check outlined function
6212   EXPECT_FALSE(verifyModule(*M, &errs()));
6213   EXPECT_NE(TargetStore, nullptr);
6214   Function *OutlinedFn = TargetStore->getFunction();
6215   EXPECT_NE(F, OutlinedFn);
6216 
6217   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6218   // Account for the "implicit" first argument.
6219   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6220   EXPECT_EQ(OutlinedFn->arg_size(), 2U);
6221   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6222 
6223   // Check entry block, to see if we have raised our alloca
6224   // from the body to the entry block.
6225   auto &EntryBlock = OutlinedFn->getEntryBlock();
6226 
6227   // Check that we have moved our alloca created in the
6228   // BodyGenCB function, to the top of the function.
6229   Instruction *Alloca1 = EntryBlock.getFirstNonPHI();
6230   EXPECT_NE(Alloca1, nullptr);
6231   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6232   EXPECT_EQ(Alloca1, RaiseAlloca);
6233 
6234   // Verify we have not altered the rest of the function
6235   // inappropriately with our alloca movement.
6236   auto *Alloca2 = Alloca1->getNextNode();
6237   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6238   auto *Store2 = Alloca2->getNextNode();
6239   EXPECT_TRUE(isa<StoreInst>(Store2));
6240 
6241   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6242   EXPECT_NE(InitCall, nullptr);
6243   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6244   EXPECT_EQ(InitCall->arg_size(), 2U);
6245   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6246   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6247   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6248   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6249   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6250   auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6251   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6252             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6253   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6254             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6255   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6256             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6257 
6258   auto *EntryBlockBranch = EntryBlock.getTerminator();
6259   EXPECT_NE(EntryBlockBranch, nullptr);
6260   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6261 
6262   // Check user code block
6263   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6264   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6265   auto *Load1 = UserCodeBlock->getFirstNonPHI();
6266   EXPECT_TRUE(isa<LoadInst>(Load1));
6267   auto *Load2 = Load1->getNextNode();
6268   EXPECT_TRUE(isa<LoadInst>(Load2));
6269   EXPECT_EQ(Load2, Value);
6270   EXPECT_EQ(Load2->getNextNode(), TargetStore);
6271   auto *Deinit = TargetStore->getNextNode();
6272   EXPECT_NE(Deinit, nullptr);
6273 
6274   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6275   EXPECT_NE(DeinitCall, nullptr);
6276   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6277   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6278 
6279   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6280 
6281   // Check exit block
6282   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6283   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6284   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI()));
6285 }
6286 
6287 TEST_F(OpenMPIRBuilderTest, CreateTask) {
6288   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6289   OpenMPIRBuilder OMPBuilder(*M);
6290   OMPBuilder.Config.IsTargetDevice = false;
6291   OMPBuilder.initialize();
6292   F->setName("func");
6293   IRBuilder<> Builder(BB);
6294 
6295   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
6296   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
6297   Value *Val128 =
6298       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
6299 
6300   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6301     Builder.restoreIP(AllocaIP);
6302     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
6303                                                 "bodygen.alloca128");
6304 
6305     Builder.restoreIP(CodeGenIP);
6306     // Loading and storing captured pointer and values
6307     Builder.CreateStore(Val128, Local128);
6308     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
6309                                       "bodygen.load32");
6310 
6311     LoadInst *PrivLoad128 = Builder.CreateLoad(
6312         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
6313     Value *Cmp = Builder.CreateICmpNE(
6314         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
6315     Instruction *ThenTerm, *ElseTerm;
6316     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
6317                                   &ThenTerm, &ElseTerm);
6318   };
6319 
6320   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6321   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6322   OpenMPIRBuilder::LocationDescription Loc(
6323       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6324   Builder.restoreIP(OMPBuilder.createTask(
6325       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6326       BodyGenCB));
6327   OMPBuilder.finalize();
6328   Builder.CreateRetVoid();
6329 
6330   EXPECT_FALSE(verifyModule(*M, &errs()));
6331 
6332   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6333       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6334           ->user_back());
6335 
6336   // Verify the Ident argument
6337   GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0));
6338   ASSERT_NE(Ident, nullptr);
6339   EXPECT_TRUE(Ident->hasInitializer());
6340   Constant *Initializer = Ident->getInitializer();
6341   GlobalVariable *SrcStrGlob =
6342       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
6343   ASSERT_NE(SrcStrGlob, nullptr);
6344   ConstantDataArray *SrcSrc =
6345       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
6346   ASSERT_NE(SrcSrc, nullptr);
6347 
6348   // Verify the num_threads argument.
6349   CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1));
6350   ASSERT_NE(GTID, nullptr);
6351   EXPECT_EQ(GTID->arg_size(), 1U);
6352   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
6353 
6354   // Verify the flags
6355   // TODO: Check for others flags. Currently testing only for tiedness.
6356   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
6357   ASSERT_NE(Flags, nullptr);
6358   EXPECT_EQ(Flags->getSExtValue(), 1);
6359 
6360   // Verify the data size
6361   ConstantInt *DataSize =
6362       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
6363   ASSERT_NE(DataSize, nullptr);
6364   EXPECT_EQ(DataSize->getSExtValue(), 40);
6365 
6366   ConstantInt *SharedsSize =
6367       dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4));
6368   EXPECT_EQ(SharedsSize->getSExtValue(),
6369             24); // 64-bit pointer + 128-bit integer
6370 
6371   // Verify Wrapper function
6372   Function *OutlinedFn =
6373       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
6374   ASSERT_NE(OutlinedFn, nullptr);
6375 
6376   LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin());
6377   ASSERT_NE(SharedsLoad, nullptr);
6378   EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1));
6379 
6380   EXPECT_FALSE(OutlinedFn->isDeclaration());
6381   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty());
6382 
6383   // Verify that the data argument is used only once, and that too in the load
6384   // instruction that is then used for accessing shared data.
6385   Value *DataPtr = OutlinedFn->getArg(1);
6386   EXPECT_EQ(DataPtr->getNumUses(), 1U);
6387   EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser()));
6388   Value *Data = DataPtr->uses().begin()->getUser();
6389   EXPECT_TRUE(all_of(Data->uses(), [](Use &U) {
6390     return isa<GetElementPtrInst>(U.getUser());
6391   }));
6392 
6393   // Verify the presence of `trunc` and `icmp` instructions in Outlined function
6394   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6395                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
6396   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6397                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
6398 
6399   // Verify the execution of the task
6400   CallInst *TaskCall = dyn_cast<CallInst>(
6401       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
6402           ->user_back());
6403   ASSERT_NE(TaskCall, nullptr);
6404   EXPECT_EQ(TaskCall->getArgOperand(0), Ident);
6405   EXPECT_EQ(TaskCall->getArgOperand(1), GTID);
6406   EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall);
6407 
6408   // Verify that the argument data has been copied
6409   for (User *in : TaskAllocCall->users()) {
6410     if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) {
6411       EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall);
6412     }
6413   }
6414 }
6415 
6416 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
6417   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6418   OpenMPIRBuilder OMPBuilder(*M);
6419   OMPBuilder.Config.IsTargetDevice = false;
6420   OMPBuilder.initialize();
6421   F->setName("func");
6422   IRBuilder<> Builder(BB);
6423 
6424   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6425 
6426   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6427   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6428   OpenMPIRBuilder::LocationDescription Loc(
6429       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6430   Builder.restoreIP(OMPBuilder.createTask(
6431       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6432       BodyGenCB));
6433   OMPBuilder.finalize();
6434   Builder.CreateRetVoid();
6435 
6436   EXPECT_FALSE(verifyModule(*M, &errs()));
6437 
6438   // Check that the outlined function has only one argument.
6439   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6440       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6441           ->user_back());
6442   Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5));
6443   ASSERT_NE(OutlinedFn, nullptr);
6444   ASSERT_EQ(OutlinedFn->arg_size(), 1U);
6445 }
6446 
6447 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
6448   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6449   OpenMPIRBuilder OMPBuilder(*M);
6450   OMPBuilder.Config.IsTargetDevice = false;
6451   OMPBuilder.initialize();
6452   F->setName("func");
6453   IRBuilder<> Builder(BB);
6454   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6455   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6456   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6457   OpenMPIRBuilder::LocationDescription Loc(
6458       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6459   Builder.restoreIP(OMPBuilder.createTask(
6460       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB,
6461       /*Tied=*/false));
6462   OMPBuilder.finalize();
6463   Builder.CreateRetVoid();
6464 
6465   // Check for the `Tied` argument
6466   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6467       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6468           ->user_back());
6469   ASSERT_NE(TaskAllocCall, nullptr);
6470   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
6471   ASSERT_NE(Flags, nullptr);
6472   EXPECT_EQ(Flags->getZExtValue() & 1U, 0U);
6473 
6474   EXPECT_FALSE(verifyModule(*M, &errs()));
6475 }
6476 
6477 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
6478   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6479   OpenMPIRBuilder OMPBuilder(*M);
6480   OMPBuilder.Config.IsTargetDevice = false;
6481   OMPBuilder.initialize();
6482   F->setName("func");
6483   IRBuilder<> Builder(BB);
6484   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6485   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6486   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6487   OpenMPIRBuilder::LocationDescription Loc(
6488       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6489   AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext()));
6490   SmallVector<OpenMPIRBuilder::DependData> DDS;
6491   {
6492     OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn,
6493                                      Type::getInt32Ty(M->getContext()), InDep);
6494     DDS.push_back(DDIn);
6495   }
6496   Builder.restoreIP(OMPBuilder.createTask(
6497       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB,
6498       /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS));
6499   OMPBuilder.finalize();
6500   Builder.CreateRetVoid();
6501 
6502   // Check for the `NumDeps` argument
6503   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6504       OMPBuilder
6505           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps)
6506           ->user_back());
6507   ASSERT_NE(TaskAllocCall, nullptr);
6508   ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
6509   ASSERT_NE(NumDeps, nullptr);
6510   EXPECT_EQ(NumDeps->getZExtValue(), 1U);
6511 
6512   // Check for the `DepInfo` array argument
6513   AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4));
6514   ASSERT_NE(DepArray, nullptr);
6515   Value::user_iterator DepArrayI = DepArray->user_begin();
6516   ++DepArrayI;
6517   Value::user_iterator DepInfoI = DepArrayI->user_begin();
6518   // Check for the `DependKind` flag in the `DepInfo` array
6519   Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI);
6520   ASSERT_NE(Flag, nullptr);
6521   ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag);
6522   ASSERT_NE(FlagInt, nullptr);
6523   EXPECT_EQ(FlagInt->getZExtValue(),
6524             static_cast<unsigned int>(RTLDependenceKindTy::DepIn));
6525   ++DepInfoI;
6526   // Check for the size in the `DepInfo` array
6527   Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI);
6528   ASSERT_NE(Size, nullptr);
6529   ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size);
6530   ASSERT_NE(SizeInt, nullptr);
6531   EXPECT_EQ(SizeInt->getZExtValue(), 4U);
6532   ++DepInfoI;
6533   // Check for the variable address in the `DepInfo` array
6534   Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI);
6535   ASSERT_NE(AddrStored, nullptr);
6536   PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored);
6537   ASSERT_NE(AddrInt, nullptr);
6538   Value *Addr = AddrInt->getPointerOperand();
6539   EXPECT_EQ(Addr, InDep);
6540 
6541   ConstantInt *NumDepsNoAlias =
6542       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5));
6543   ASSERT_NE(NumDepsNoAlias, nullptr);
6544   EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U);
6545   EXPECT_EQ(TaskAllocCall->getOperand(6),
6546             ConstantPointerNull::get(PointerType::getUnqual(M->getContext())));
6547 
6548   EXPECT_FALSE(verifyModule(*M, &errs()));
6549 }
6550 
6551 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
6552   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6553   OpenMPIRBuilder OMPBuilder(*M);
6554   OMPBuilder.Config.IsTargetDevice = false;
6555   OMPBuilder.initialize();
6556   F->setName("func");
6557   IRBuilder<> Builder(BB);
6558   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6559   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6560   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
6561   Builder.SetInsertPoint(BodyBB);
6562   Value *Final = Builder.CreateICmp(
6563       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
6564       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
6565   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
6566   Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
6567                                           /*Tied=*/false, Final));
6568   OMPBuilder.finalize();
6569   Builder.CreateRetVoid();
6570 
6571   // Check for the `Tied` argument
6572   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6573       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6574           ->user_back());
6575   ASSERT_NE(TaskAllocCall, nullptr);
6576   BinaryOperator *OrInst =
6577       dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2));
6578   ASSERT_NE(OrInst, nullptr);
6579   EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or);
6580 
6581   // One of the arguments to `or` instruction is the tied flag, which is equal
6582   // to zero.
6583   EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) {
6584     if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op))
6585       return TiedValue->getSExtValue() == 0;
6586     return false;
6587   }));
6588 
6589   // One of the arguments to `or` instruction is the final condition.
6590   EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) {
6591     if (SelectInst *Select = dyn_cast<SelectInst>(op)) {
6592       ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue());
6593       ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue());
6594       if (!TrueValue || !FalseValue)
6595         return false;
6596       return Select->getCondition() == Final &&
6597              TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0;
6598     }
6599     return false;
6600   }));
6601 
6602   EXPECT_FALSE(verifyModule(*M, &errs()));
6603 }
6604 
6605 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
6606   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6607   OpenMPIRBuilder OMPBuilder(*M);
6608   OMPBuilder.Config.IsTargetDevice = false;
6609   OMPBuilder.initialize();
6610   F->setName("func");
6611   IRBuilder<> Builder(BB);
6612   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6613   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6614   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
6615   Builder.SetInsertPoint(BodyBB);
6616   Value *IfCondition = Builder.CreateICmp(
6617       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
6618       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
6619   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
6620   Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
6621                                           /*Tied=*/false, /*Final=*/nullptr,
6622                                           IfCondition));
6623   OMPBuilder.finalize();
6624   Builder.CreateRetVoid();
6625 
6626   EXPECT_FALSE(verifyModule(*M, &errs()));
6627 
6628   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6629       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6630           ->user_back());
6631   ASSERT_NE(TaskAllocCall, nullptr);
6632 
6633   // Check the branching is based on the if condition argument.
6634   BranchInst *IfConditionBranchInst =
6635       dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator());
6636   ASSERT_NE(IfConditionBranchInst, nullptr);
6637   ASSERT_TRUE(IfConditionBranchInst->isConditional());
6638   EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition);
6639 
6640   // Check that the `__kmpc_omp_task` executes only in the then branch.
6641   CallInst *TaskCall = dyn_cast<CallInst>(
6642       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
6643           ->user_back());
6644   ASSERT_NE(TaskCall, nullptr);
6645   EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0));
6646 
6647   // Check that the OpenMP Runtime Functions specific to `if` clause execute
6648   // only in the else branch. Also check that the function call is between the
6649   // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls.
6650   CallInst *TaskBeginIfCall = dyn_cast<CallInst>(
6651       OMPBuilder
6652           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0)
6653           ->user_back());
6654   CallInst *TaskCompleteCall = dyn_cast<CallInst>(
6655       OMPBuilder
6656           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0)
6657           ->user_back());
6658   ASSERT_NE(TaskBeginIfCall, nullptr);
6659   ASSERT_NE(TaskCompleteCall, nullptr);
6660   Function *OulinedFn =
6661       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
6662   ASSERT_NE(OulinedFn, nullptr);
6663   CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back());
6664   ASSERT_NE(OulinedFnCall, nullptr);
6665   EXPECT_EQ(TaskBeginIfCall->getParent(),
6666             IfConditionBranchInst->getSuccessor(1));
6667 
6668   EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall);
6669   EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall);
6670 }
6671 
6672 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
6673   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6674   OpenMPIRBuilder OMPBuilder(*M);
6675   OMPBuilder.initialize();
6676   F->setName("func");
6677   IRBuilder<> Builder(BB);
6678 
6679   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
6680   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
6681   Value *Val128 =
6682       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
6683   Instruction *ThenTerm, *ElseTerm;
6684 
6685   Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp;
6686 
6687   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6688     Builder.restoreIP(AllocaIP);
6689     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
6690                                                 "bodygen.alloca128");
6691 
6692     Builder.restoreIP(CodeGenIP);
6693     // Loading and storing captured pointer and values
6694     InternalStoreInst = Builder.CreateStore(Val128, Local128);
6695     InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
6696                                         "bodygen.load32");
6697 
6698     InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128,
6699                                          "bodygen.local.load128");
6700     InternalIfCmp = Builder.CreateICmpNE(
6701         InternalLoad32,
6702         Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType()));
6703     SplitBlockAndInsertIfThenElse(InternalIfCmp,
6704                                   CodeGenIP.getBlock()->getTerminator(),
6705                                   &ThenTerm, &ElseTerm);
6706   };
6707 
6708   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6709   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6710   OpenMPIRBuilder::LocationDescription Loc(
6711       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6712   Builder.restoreIP(OMPBuilder.createTaskgroup(
6713       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6714       BodyGenCB));
6715   OMPBuilder.finalize();
6716   Builder.CreateRetVoid();
6717 
6718   EXPECT_FALSE(verifyModule(*M, &errs()));
6719 
6720   CallInst *TaskgroupCall = dyn_cast<CallInst>(
6721       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
6722           ->user_back());
6723   ASSERT_NE(TaskgroupCall, nullptr);
6724   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
6725       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
6726           ->user_back());
6727   ASSERT_NE(EndTaskgroupCall, nullptr);
6728 
6729   // Verify the Ident argument
6730   GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0));
6731   ASSERT_NE(Ident, nullptr);
6732   EXPECT_TRUE(Ident->hasInitializer());
6733   Constant *Initializer = Ident->getInitializer();
6734   GlobalVariable *SrcStrGlob =
6735       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
6736   ASSERT_NE(SrcStrGlob, nullptr);
6737   ConstantDataArray *SrcSrc =
6738       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
6739   ASSERT_NE(SrcSrc, nullptr);
6740 
6741   // Verify the num_threads argument.
6742   CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1));
6743   ASSERT_NE(GTID, nullptr);
6744   EXPECT_EQ(GTID->arg_size(), 1U);
6745   EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr(
6746                                            OMPRTL___kmpc_global_thread_num));
6747 
6748   // Checking the general structure of the IR generated is same as expected.
6749   Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction();
6750   EXPECT_EQ(GeneratedStoreInst, InternalStoreInst);
6751   Instruction *GeneratedLoad32 =
6752       GeneratedStoreInst->getNextNonDebugInstruction();
6753   EXPECT_EQ(GeneratedLoad32, InternalLoad32);
6754   Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction();
6755   EXPECT_EQ(GeneratedLoad128, InternalLoad128);
6756 
6757   // Checking the ordering because of the if statements and that
6758   // `__kmp_end_taskgroup` call is after the if branching.
6759   BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(),
6760                             ThenTerm->getSuccessor(0),
6761                             EndTaskgroupCall->getParent(),
6762                             ElseTerm->getParent()};
6763   verifyDFSOrder(F, RefOrder);
6764 }
6765 
6766 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
6767   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6768   OpenMPIRBuilder OMPBuilder(*M);
6769   OMPBuilder.Config.IsTargetDevice = false;
6770   OMPBuilder.initialize();
6771   F->setName("func");
6772   IRBuilder<> Builder(BB);
6773 
6774   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6775     Builder.restoreIP(AllocaIP);
6776     AllocaInst *Alloca32 =
6777         Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32");
6778     AllocaInst *Alloca64 =
6779         Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64");
6780     Builder.restoreIP(CodeGenIP);
6781     auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6782       Builder.restoreIP(CodeGenIP);
6783       LoadInst *LoadValue =
6784           Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64);
6785       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64));
6786       Builder.CreateStore(AddInst, Alloca64);
6787     };
6788     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
6789     Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1));
6790 
6791     auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6792       Builder.restoreIP(CodeGenIP);
6793       LoadInst *LoadValue =
6794           Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32);
6795       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32));
6796       Builder.CreateStore(AddInst, Alloca32);
6797     };
6798     OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL);
6799     Builder.restoreIP(OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2));
6800   };
6801 
6802   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6803   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6804   OpenMPIRBuilder::LocationDescription Loc(
6805       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6806   Builder.restoreIP(OMPBuilder.createTaskgroup(
6807       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6808       BodyGenCB));
6809   OMPBuilder.finalize();
6810   Builder.CreateRetVoid();
6811 
6812   EXPECT_FALSE(verifyModule(*M, &errs()));
6813 
6814   CallInst *TaskgroupCall = dyn_cast<CallInst>(
6815       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
6816           ->user_back());
6817   ASSERT_NE(TaskgroupCall, nullptr);
6818   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
6819       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
6820           ->user_back());
6821   ASSERT_NE(EndTaskgroupCall, nullptr);
6822 
6823   Function *TaskAllocFn =
6824       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
6825   ASSERT_EQ(TaskAllocFn->getNumUses(), 2u);
6826 
6827   CallInst *FirstTaskAllocCall =
6828       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin());
6829   CallInst *SecondTaskAllocCall =
6830       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++);
6831   ASSERT_NE(FirstTaskAllocCall, nullptr);
6832   ASSERT_NE(SecondTaskAllocCall, nullptr);
6833 
6834   // Verify that the tasks have been generated in order and inside taskgroup
6835   // construct.
6836   BasicBlock *RefOrder[] = {
6837       TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(),
6838       SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()};
6839   verifyDFSOrder(F, RefOrder);
6840 }
6841 
6842 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
6843   OpenMPIRBuilder OMPBuilder(*M);
6844   OMPBuilder.initialize();
6845 
6846   IRBuilder<> Builder(BB);
6847 
6848   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
6849   OpenMPIRBuilder::TargetDataInfo Info(true, false);
6850 
6851   auto VoidPtrTy = PointerType::getUnqual(Builder.getContext());
6852   auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext());
6853   auto Int64Ty = Type::getInt64Ty(Builder.getContext());
6854   auto Int64PtrTy = PointerType::getUnqual(Builder.getContext());
6855   auto Array4VoidPtrTy = ArrayType::get(VoidPtrTy, 4);
6856   auto Array4Int64PtrTy = ArrayType::get(Int64Ty, 4);
6857 
6858   Info.RTArgs.BasePointersArray =
6859       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo(0));
6860   Info.RTArgs.PointersArray =
6861       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
6862   Info.RTArgs.SizesArray =
6863       ConstantPointerNull::get(Array4Int64PtrTy->getPointerTo());
6864   Info.RTArgs.MapTypesArray =
6865       ConstantPointerNull::get(Array4Int64PtrTy->getPointerTo());
6866   Info.RTArgs.MapNamesArray =
6867       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
6868   Info.RTArgs.MappersArray =
6869       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
6870   Info.NumberOfPtrs = 4;
6871 
6872   OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false);
6873 
6874   EXPECT_NE(RTArgs.BasePointersArray, nullptr);
6875   EXPECT_NE(RTArgs.PointersArray, nullptr);
6876   EXPECT_NE(RTArgs.SizesArray, nullptr);
6877   EXPECT_NE(RTArgs.MapTypesArray, nullptr);
6878   EXPECT_NE(RTArgs.MappersArray, nullptr);
6879   EXPECT_NE(RTArgs.MapNamesArray, nullptr);
6880   EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr);
6881 
6882   EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy);
6883   EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy);
6884   EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy);
6885   EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy);
6886   EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy);
6887   EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy);
6888 }
6889 
6890 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) {
6891   OpenMPIRBuilder OMPBuilder(*M);
6892   OMPBuilder.setConfig(
6893       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6894   OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager;
6895   TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0);
6896   InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0);
6897   EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo));
6898   InfoManager.initializeDeviceGlobalVarEntryInfo(
6899       "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0);
6900   InfoManager.registerTargetRegionEntryInfo(
6901       EntryInfo, nullptr, nullptr,
6902       OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
6903   InfoManager.registerDeviceGlobalVarEntryInfo(
6904       "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
6905       GlobalValue::WeakAnyLinkage);
6906   EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar"));
6907 }
6908 
6909 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they
6910 // call each other (recursively in some cases). The test case test these
6911 // functions by utilising them for host code generation for declare target
6912 // global variables
6913 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) {
6914   OpenMPIRBuilder OMPBuilder(*M);
6915   OMPBuilder.initialize();
6916   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
6917   OMPBuilder.setConfig(Config);
6918 
6919   std::vector<llvm::Triple> TargetTriple;
6920   TargetTriple.emplace_back("amdgcn-amd-amdhsa");
6921 
6922   TargetRegionEntryInfo EntryInfo("", 42, 4711, 17);
6923   std::vector<GlobalVariable *> RefsGathered;
6924 
6925   std::vector<Constant *> Globals;
6926   auto *IntTy = Type::getInt32Ty(Ctx);
6927   for (int I = 0; I < 2; ++I) {
6928     Globals.push_back(M->getOrInsertGlobal(
6929         "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * {
6930           return new GlobalVariable(
6931               *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage,
6932               ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I));
6933         }));
6934   }
6935 
6936   OMPBuilder.registerTargetGlobalVariable(
6937       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
6938       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
6939       EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple,
6940       nullptr, nullptr, Globals[0]->getType(), Globals[0]);
6941 
6942   OMPBuilder.registerTargetGlobalVariable(
6943       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink,
6944       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
6945       EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple,
6946       nullptr, nullptr, Globals[1]->getType(), Globals[1]);
6947 
6948   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn =
6949       [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
6950          const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
6951     // If this is invoked, then we want to emit an error, even if it is not
6952     // neccesarily the most readable, as something has went wrong. The
6953     // test-suite unfortunately eats up all error output
6954     ASSERT_EQ(Kind, Kind);
6955   };
6956 
6957   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn);
6958 
6959   // Clauses for data_int_0 with To + Any clauses for the host
6960   std::vector<GlobalVariable *> OffloadEntries;
6961   OffloadEntries.push_back(M->getNamedGlobal(".omp_offloading.entry_name"));
6962   OffloadEntries.push_back(
6963       M->getNamedGlobal(".omp_offloading.entry.test_data_int_0"));
6964 
6965   // Clauses for data_int_1 with Link + Any clauses for the host
6966   OffloadEntries.push_back(
6967       M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr"));
6968   OffloadEntries.push_back(M->getNamedGlobal(".omp_offloading.entry_name.1"));
6969   OffloadEntries.push_back(M->getNamedGlobal(
6970       ".omp_offloading.entry.test_data_int_1_decl_tgt_ref_ptr"));
6971 
6972   for (unsigned I = 0; I < OffloadEntries.size(); ++I)
6973     EXPECT_NE(OffloadEntries[I], nullptr);
6974 
6975   // Metadata generated for the host offload module
6976   NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info");
6977   ASSERT_THAT(OffloadMetadata, testing::NotNull());
6978   StringRef Nodes[2] = {
6979       cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1))
6980           ->getString(),
6981       cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1))
6982           ->getString()};
6983   EXPECT_THAT(
6984       Nodes, testing::UnorderedElementsAre("test_data_int_0",
6985                                            "test_data_int_1_decl_tgt_ref_ptr"));
6986 }
6987 
6988 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) {
6989   OpenMPIRBuilder OMPBuilder(*M);
6990   OMPBuilder.initialize();
6991   OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true,
6992                                /* IsGPU = */ true,
6993                                /* OpenMPOffloadMandatory = */ false,
6994                                /* HasRequiresReverseOffload = */ false,
6995                                /* HasRequiresUnifiedAddress = */ false,
6996                                /* HasRequiresUnifiedSharedMemory = */ false,
6997                                /* HasRequiresDynamicAllocators = */ false);
6998   OMPBuilder.setConfig(Config);
6999 
7000   FunctionCallee FnTypeAndCallee =
7001       M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx));
7002 
7003   auto *Fn = cast<Function>(FnTypeAndCallee.getCallee());
7004   OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn,
7005                                 /* Size = */ 0,
7006                                 /* Flags = */ 0, GlobalValue::WeakAnyLinkage);
7007 
7008   // Check nvvm.annotations only created for GPU kernels
7009   NamedMDNode *MD = M->getNamedMetadata("nvvm.annotations");
7010   EXPECT_NE(MD, nullptr);
7011   EXPECT_EQ(MD->getNumOperands(), 1u);
7012 
7013   MDNode *Annotations = MD->getOperand(0);
7014   EXPECT_EQ(Annotations->getNumOperands(), 3u);
7015 
7016   Constant *ConstVal =
7017       dyn_cast<ConstantAsMetadata>(Annotations->getOperand(0))->getValue();
7018   EXPECT_TRUE(isa<Function>(Fn));
7019   EXPECT_EQ(ConstVal, cast<Function>(Fn));
7020 
7021   EXPECT_TRUE(Annotations->getOperand(1).equalsStr("kernel"));
7022 
7023   EXPECT_TRUE(mdconst::hasa<ConstantInt>(Annotations->getOperand(2)));
7024   APInt IntVal =
7025       mdconst::extract<ConstantInt>(Annotations->getOperand(2))->getValue();
7026   EXPECT_EQ(IntVal, 1);
7027 
7028   // Check kernel attributes
7029   EXPECT_TRUE(Fn->hasFnAttribute("kernel"));
7030   EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress));
7031 }
7032 
7033 } // namespace
7034