xref: /llvm-project/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (revision eb6e7e8f89a3d5c1cbc9856774ca00208753fb12)
1 //===- llvm/unittest/IR/OpenMPIRBuilderTest.cpp - OpenMPIRBuilder tests ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/OpenMP/OMPConstants.h"
10 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
11 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
12 #include "llvm/IR/BasicBlock.h"
13 #include "llvm/IR/DIBuilder.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/InstIterator.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/Verifier.h"
20 #include "llvm/Passes/PassBuilder.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
23 #include "gmock/gmock.h"
24 #include "gtest/gtest.h"
25 #include <optional>
26 
27 using namespace llvm;
28 using namespace omp;
29 
30 namespace {
31 
32 /// Create an instruction that uses the values in \p Values. We use "printf"
33 /// just because it is often used for this purpose in test code, but it is never
34 /// executed here.
35 static CallInst *createPrintfCall(IRBuilder<> &Builder, StringRef FormatStr,
36                                   ArrayRef<Value *> Values) {
37   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
38 
39   GlobalVariable *GV = Builder.CreateGlobalString(FormatStr, "", 0, M);
40   Constant *Zero = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
41   Constant *Indices[] = {Zero, Zero};
42   Constant *FormatStrConst =
43       ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, Indices);
44 
45   Function *PrintfDecl = M->getFunction("printf");
46   if (!PrintfDecl) {
47     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
48     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), true);
49     PrintfDecl = Function::Create(Ty, Linkage, "printf", M);
50   }
51 
52   SmallVector<Value *, 4> Args;
53   Args.push_back(FormatStrConst);
54   Args.append(Values.begin(), Values.end());
55   return Builder.CreateCall(PrintfDecl, Args);
56 }
57 
58 /// Verify that blocks in \p RefOrder are corresponds to the depth-first visit
59 /// order the control flow of \p F.
60 ///
61 /// This is an easy way to verify the branching structure of the CFG without
62 /// checking every branch instruction individually. For the CFG of a
63 /// CanonicalLoopInfo, the Cond BB's terminating branch's first edge is entering
64 /// the body, i.e. the DFS order corresponds to the execution order with one
65 /// loop iteration.
66 static testing::AssertionResult
67 verifyDFSOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
68   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
69   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
70 
71   df_iterator_default_set<BasicBlock *, 16> Visited;
72   auto DFS = llvm::depth_first_ext(&F->getEntryBlock(), Visited);
73 
74   BasicBlock *Prev = nullptr;
75   for (BasicBlock *BB : DFS) {
76     if (It != E && BB == *It) {
77       Prev = *It;
78       ++It;
79     }
80   }
81 
82   if (It == E)
83     return testing::AssertionSuccess();
84   if (!Prev)
85     return testing::AssertionFailure()
86            << "Did not find " << (*It)->getName() << " in control flow";
87   return testing::AssertionFailure()
88          << "Expected " << Prev->getName() << " before " << (*It)->getName()
89          << " in control flow";
90 }
91 
92 /// Verify that blocks in \p RefOrder are in the same relative order in the
93 /// linked lists of blocks in \p F. The linked list may contain additional
94 /// blocks in-between.
95 ///
96 /// While the order in the linked list is not relevant for semantics, keeping
97 /// the order roughly in execution order makes its printout easier to read.
98 static testing::AssertionResult
99 verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) {
100   ArrayRef<BasicBlock *>::iterator It = RefOrder.begin();
101   ArrayRef<BasicBlock *>::iterator E = RefOrder.end();
102 
103   BasicBlock *Prev = nullptr;
104   for (BasicBlock &BB : *F) {
105     if (It != E && &BB == *It) {
106       Prev = *It;
107       ++It;
108     }
109   }
110 
111   if (It == E)
112     return testing::AssertionSuccess();
113   if (!Prev)
114     return testing::AssertionFailure() << "Did not find " << (*It)->getName()
115                                        << " in function " << F->getName();
116   return testing::AssertionFailure()
117          << "Expected " << Prev->getName() << " before " << (*It)->getName()
118          << " in function " << F->getName();
119 }
120 
121 /// Populate Calls with call instructions calling the function with the given
122 /// FnID from the given function F.
123 static void findCalls(Function *F, omp::RuntimeFunction FnID,
124                       OpenMPIRBuilder &OMPBuilder,
125                       SmallVectorImpl<CallInst *> &Calls) {
126   Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID);
127   for (BasicBlock &BB : *F) {
128     for (Instruction &I : BB) {
129       auto *Call = dyn_cast<CallInst>(&I);
130       if (Call && Call->getCalledFunction() == Fn)
131         Calls.push_back(Call);
132     }
133   }
134 }
135 
136 /// Assuming \p F contains only one call to the function with the given \p FnID,
137 /// return that call.
138 static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID,
139                                 OpenMPIRBuilder &OMPBuilder) {
140   SmallVector<CallInst *, 1> Calls;
141   findCalls(F, FnID, OMPBuilder, Calls);
142   EXPECT_EQ(1u, Calls.size());
143   if (Calls.size() != 1)
144     return nullptr;
145   return Calls.front();
146 }
147 
148 static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
149   switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
150   case omp::OMPScheduleType::BaseDynamicChunked:
151     return omp::OMP_SCHEDULE_Dynamic;
152   case omp::OMPScheduleType::BaseGuidedChunked:
153     return omp::OMP_SCHEDULE_Guided;
154   case omp::OMPScheduleType::BaseAuto:
155     return omp::OMP_SCHEDULE_Auto;
156   case omp::OMPScheduleType::BaseRuntime:
157     return omp::OMP_SCHEDULE_Runtime;
158   default:
159     llvm_unreachable("unknown type for this test");
160   }
161 }
162 
163 class OpenMPIRBuilderTest : public testing::Test {
164 protected:
165   void SetUp() override {
166     M.reset(new Module("MyModule", Ctx));
167     FunctionType *FTy =
168         FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
169                           /*isVarArg=*/false);
170     F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
171     BB = BasicBlock::Create(Ctx, "", F);
172 
173     DIBuilder DIB(*M);
174     auto File = DIB.createFile("test.dbg", "/src", std::nullopt,
175                                std::optional<StringRef>("/src/test.dbg"));
176     auto CU =
177         DIB.createCompileUnit(dwarf::DW_LANG_C, File, "llvm-C", true, "", 0);
178     auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray({}));
179     auto SP = DIB.createFunction(
180         CU, "foo", "", File, 1, Type, 1, DINode::FlagZero,
181         DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
182     F->setSubprogram(SP);
183     auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
184     DIB.finalize();
185     DL = DILocation::get(Ctx, 3, 7, Scope);
186   }
187 
188   void TearDown() override {
189     BB = nullptr;
190     M.reset();
191   }
192 
193   /// Create a function with a simple loop that calls printf using the logical
194   /// loop counter for use with tests that need a CanonicalLoopInfo object.
195   CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL,
196                                              OpenMPIRBuilder &OMPBuilder,
197                                              int UseIVBits,
198                                              CallInst **Call = nullptr,
199                                              BasicBlock **BodyCode = nullptr) {
200     OMPBuilder.initialize();
201     F->setName("func");
202 
203     IRBuilder<> Builder(BB);
204     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
205     Value *TripCount = F->getArg(0);
206 
207     Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits);
208     Value *CastedTripCount =
209         Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount");
210 
211     auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP,
212                              llvm::Value *LC) {
213       Builder.restoreIP(CodeGenIP);
214       if (BodyCode)
215         *BodyCode = Builder.GetInsertBlock();
216 
217       // Add something that consumes the induction variable to the body.
218       CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
219       if (Call)
220         *Call = CallInst;
221     };
222     CanonicalLoopInfo *Loop =
223         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount);
224 
225     // Finalize the function.
226     Builder.restoreIP(Loop->getAfterIP());
227     Builder.CreateRetVoid();
228 
229     return Loop;
230   }
231 
232   LLVMContext Ctx;
233   std::unique_ptr<Module> M;
234   Function *F;
235   BasicBlock *BB;
236   DebugLoc DL;
237 };
238 
239 class OpenMPIRBuilderTestWithParams
240     : public OpenMPIRBuilderTest,
241       public ::testing::WithParamInterface<omp::OMPScheduleType> {};
242 
243 class OpenMPIRBuilderTestWithIVBits
244     : public OpenMPIRBuilderTest,
245       public ::testing::WithParamInterface<int> {};
246 
247 // Returns the value stored in the given allocation. Returns null if the given
248 // value is not a result of an InstTy instruction, if no value is stored or if
249 // there is more than one store.
250 template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
251   Instruction *Inst = dyn_cast<InstTy>(AllocaValue);
252   if (!Inst)
253     return nullptr;
254   StoreInst *Store = nullptr;
255   for (Use &U : Inst->uses()) {
256     if (auto *CandidateStore = dyn_cast<StoreInst>(U.getUser())) {
257       EXPECT_EQ(Store, nullptr);
258       Store = CandidateStore;
259     }
260   }
261   if (!Store)
262     return nullptr;
263   return Store->getValueOperand();
264 }
265 
266 // Returns the value stored in the aggregate argument of an outlined function,
267 // or nullptr if it is not found.
268 static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate,
269                                            unsigned Idx) {
270   GetElementPtrInst *GEPAtIdx = nullptr;
271   // Find GEP instruction at that index.
272   for (User *Usr : Aggregate->users()) {
273     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr);
274     if (!GEP)
275       continue;
276 
277     if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx))
278       continue;
279 
280     EXPECT_EQ(GEPAtIdx, nullptr);
281     GEPAtIdx = GEP;
282   }
283 
284   EXPECT_NE(GEPAtIdx, nullptr);
285   EXPECT_EQ(GEPAtIdx->getNumUses(), 1U);
286 
287   // Find the value stored to the aggregate.
288   StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin());
289   Value *StoredAggValue = StoreToAgg->getValueOperand();
290 
291   Value *StoredValue = nullptr;
292 
293   // Find the value stored to the value stored in the aggregate.
294   for (User *Usr : StoredAggValue->users()) {
295     StoreInst *Store = dyn_cast<StoreInst>(Usr);
296     if (!Store)
297       continue;
298 
299     if (Store->getPointerOperand() != StoredAggValue)
300       continue;
301 
302     EXPECT_EQ(StoredValue, nullptr);
303     StoredValue = Store->getValueOperand();
304   }
305 
306   return StoredValue;
307 }
308 
309 // Returns the aggregate that the value is originating from.
310 static Value *findAggregateFromValue(Value *V) {
311   // Expects a load instruction that loads from the aggregate.
312   LoadInst *Load = dyn_cast<LoadInst>(V);
313   EXPECT_NE(Load, nullptr);
314   // Find the GEP instruction used in the load instruction.
315   GetElementPtrInst *GEP =
316       dyn_cast<GetElementPtrInst>(Load->getPointerOperand());
317   EXPECT_NE(GEP, nullptr);
318   // Find the aggregate used in the GEP instruction.
319   Value *Aggregate = GEP->getPointerOperand();
320 
321   return Aggregate;
322 }
323 
324 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
325   OpenMPIRBuilder OMPBuilder(*M);
326   OMPBuilder.initialize();
327 
328   IRBuilder<> Builder(BB);
329 
330   OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for);
331   EXPECT_TRUE(M->global_empty());
332   EXPECT_EQ(M->size(), 1U);
333   EXPECT_EQ(F->size(), 1U);
334   EXPECT_EQ(BB->size(), 0U);
335 
336   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
337   OMPBuilder.createBarrier(Loc, OMPD_for);
338   EXPECT_FALSE(M->global_empty());
339   EXPECT_EQ(M->size(), 3U);
340   EXPECT_EQ(F->size(), 1U);
341   EXPECT_EQ(BB->size(), 2U);
342 
343   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
344   EXPECT_NE(GTID, nullptr);
345   EXPECT_EQ(GTID->arg_size(), 1U);
346   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
347   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
348   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
349 
350   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
351   EXPECT_NE(Barrier, nullptr);
352   EXPECT_EQ(Barrier->arg_size(), 2U);
353   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_barrier");
354   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
355   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
356 
357   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
358 
359   Builder.CreateUnreachable();
360   EXPECT_FALSE(verifyModule(*M, &errs()));
361 }
362 
363 TEST_F(OpenMPIRBuilderTest, CreateCancel) {
364   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
365   OpenMPIRBuilder OMPBuilder(*M);
366   OMPBuilder.initialize();
367 
368   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
369   new UnreachableInst(Ctx, CBB);
370   auto FiniCB = [&](InsertPointTy IP) {
371     ASSERT_NE(IP.getBlock(), nullptr);
372     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
373     BranchInst::Create(CBB, IP.getBlock());
374   };
375   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
376 
377   IRBuilder<> Builder(BB);
378 
379   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
380   auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel);
381   Builder.restoreIP(NewIP);
382   EXPECT_FALSE(M->global_empty());
383   EXPECT_EQ(M->size(), 4U);
384   EXPECT_EQ(F->size(), 4U);
385   EXPECT_EQ(BB->size(), 4U);
386 
387   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
388   EXPECT_NE(GTID, nullptr);
389   EXPECT_EQ(GTID->arg_size(), 1U);
390   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
391   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
392   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
393 
394   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
395   EXPECT_NE(Cancel, nullptr);
396   EXPECT_EQ(Cancel->arg_size(), 3U);
397   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
398   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
399   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
400   EXPECT_EQ(Cancel->getNumUses(), 1U);
401   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
402   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
403   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
404   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
405   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
406   EXPECT_NE(GTID1, nullptr);
407   EXPECT_EQ(GTID1->arg_size(), 1U);
408   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
409   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
410   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
411   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
412   EXPECT_NE(Barrier, nullptr);
413   EXPECT_EQ(Barrier->arg_size(), 2U);
414   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
415   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
416   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
417   EXPECT_EQ(Barrier->getNumUses(), 0U);
418   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
419             1U);
420   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
421 
422   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
423 
424   OMPBuilder.popFinalizationCB();
425 
426   Builder.CreateUnreachable();
427   EXPECT_FALSE(verifyModule(*M, &errs()));
428 }
429 
430 TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
431   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
432   OpenMPIRBuilder OMPBuilder(*M);
433   OMPBuilder.initialize();
434 
435   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
436   new UnreachableInst(Ctx, CBB);
437   auto FiniCB = [&](InsertPointTy IP) {
438     ASSERT_NE(IP.getBlock(), nullptr);
439     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
440     BranchInst::Create(CBB, IP.getBlock());
441   };
442   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
443 
444   IRBuilder<> Builder(BB);
445 
446   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
447   auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel);
448   Builder.restoreIP(NewIP);
449   EXPECT_FALSE(M->global_empty());
450   EXPECT_EQ(M->size(), 4U);
451   EXPECT_EQ(F->size(), 7U);
452   EXPECT_EQ(BB->size(), 1U);
453   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
454   ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U);
455   BB = BB->getTerminator()->getSuccessor(0);
456   EXPECT_EQ(BB->size(), 4U);
457 
458   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
459   EXPECT_NE(GTID, nullptr);
460   EXPECT_EQ(GTID->arg_size(), 1U);
461   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
462   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
463   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
464 
465   CallInst *Cancel = dyn_cast<CallInst>(GTID->getNextNode());
466   EXPECT_NE(Cancel, nullptr);
467   EXPECT_EQ(Cancel->arg_size(), 3U);
468   EXPECT_EQ(Cancel->getCalledFunction()->getName(), "__kmpc_cancel");
469   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotAccessMemory());
470   EXPECT_FALSE(Cancel->getCalledFunction()->doesNotFreeMemory());
471   EXPECT_EQ(Cancel->getNumUses(), 1U);
472   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
473   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
474   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
475   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
476             NewIP.getBlock());
477   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
478   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
479   EXPECT_NE(GTID1, nullptr);
480   EXPECT_EQ(GTID1->arg_size(), 1U);
481   EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
482   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
483   EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
484   CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
485   EXPECT_NE(Barrier, nullptr);
486   EXPECT_EQ(Barrier->arg_size(), 2U);
487   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
488   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
489   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
490   EXPECT_EQ(Barrier->getNumUses(), 0U);
491   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
492             1U);
493   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), CBB);
494 
495   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
496 
497   OMPBuilder.popFinalizationCB();
498 
499   Builder.CreateUnreachable();
500   EXPECT_FALSE(verifyModule(*M, &errs()));
501 }
502 
503 TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
504   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
505   OpenMPIRBuilder OMPBuilder(*M);
506   OMPBuilder.initialize();
507 
508   BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
509   new UnreachableInst(Ctx, CBB);
510   auto FiniCB = [&](InsertPointTy IP) {
511     ASSERT_NE(IP.getBlock(), nullptr);
512     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
513     BranchInst::Create(CBB, IP.getBlock());
514   };
515   OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
516 
517   IRBuilder<> Builder(BB);
518 
519   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
520   auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for);
521   Builder.restoreIP(NewIP);
522   EXPECT_FALSE(M->global_empty());
523   EXPECT_EQ(M->size(), 3U);
524   EXPECT_EQ(F->size(), 4U);
525   EXPECT_EQ(BB->size(), 4U);
526 
527   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
528   EXPECT_NE(GTID, nullptr);
529   EXPECT_EQ(GTID->arg_size(), 1U);
530   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
531   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
532   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
533 
534   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
535   EXPECT_NE(Barrier, nullptr);
536   EXPECT_EQ(Barrier->arg_size(), 2U);
537   EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
538   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
539   EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
540   EXPECT_EQ(Barrier->getNumUses(), 1U);
541   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
542   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
543   EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
544   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
545   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
546             1U);
547   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
548             CBB);
549 
550   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
551 
552   OMPBuilder.popFinalizationCB();
553 
554   Builder.CreateUnreachable();
555   EXPECT_FALSE(verifyModule(*M, &errs()));
556 }
557 
558 TEST_F(OpenMPIRBuilderTest, DbgLoc) {
559   OpenMPIRBuilder OMPBuilder(*M);
560   OMPBuilder.initialize();
561   F->setName("func");
562 
563   IRBuilder<> Builder(BB);
564 
565   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
566   OMPBuilder.createBarrier(Loc, OMPD_for);
567   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
568   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
569   EXPECT_EQ(GTID->getDebugLoc(), DL);
570   EXPECT_EQ(Barrier->getDebugLoc(), DL);
571   EXPECT_TRUE(isa<GlobalVariable>(Barrier->getOperand(0)));
572   if (!isa<GlobalVariable>(Barrier->getOperand(0)))
573     return;
574   GlobalVariable *Ident = cast<GlobalVariable>(Barrier->getOperand(0));
575   EXPECT_TRUE(Ident->hasInitializer());
576   if (!Ident->hasInitializer())
577     return;
578   Constant *Initializer = Ident->getInitializer();
579   EXPECT_TRUE(
580       isa<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts()));
581   GlobalVariable *SrcStrGlob =
582       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
583   if (!SrcStrGlob)
584     return;
585   EXPECT_TRUE(isa<ConstantDataArray>(SrcStrGlob->getInitializer()));
586   ConstantDataArray *SrcSrc =
587       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
588   if (!SrcSrc)
589     return;
590   EXPECT_EQ(SrcSrc->getAsCString(), ";/src/test.dbg;foo;3;7;;");
591 }
592 
593 TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
594   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
595   std::string oldDLStr = M->getDataLayoutStr();
596   M->setDataLayout(
597       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
598       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
599       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
600   OpenMPIRBuilder OMPBuilder(*M);
601   OMPBuilder.Config.IsTargetDevice = true;
602   OMPBuilder.initialize();
603   F->setName("func");
604   IRBuilder<> Builder(BB);
605   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
606   Builder.CreateBr(EnterBB);
607   Builder.SetInsertPoint(EnterBB);
608   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
609 
610   AllocaInst *PrivAI = nullptr;
611 
612   unsigned NumBodiesGenerated = 0;
613   unsigned NumPrivatizedVars = 0;
614   unsigned NumFinalizationPoints = 0;
615 
616   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
617     ++NumBodiesGenerated;
618 
619     Builder.restoreIP(AllocaIP);
620     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
621     Builder.CreateStore(F->arg_begin(), PrivAI);
622 
623     Builder.restoreIP(CodeGenIP);
624     Value *PrivLoad =
625         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
626     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
627     Instruction *ThenTerm, *ElseTerm;
628     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
629                                   &ThenTerm, &ElseTerm);
630   };
631 
632   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
633                     Value &Orig, Value &Inner,
634                     Value *&ReplacementValue) -> InsertPointTy {
635     ++NumPrivatizedVars;
636 
637     if (!isa<AllocaInst>(Orig)) {
638       EXPECT_EQ(&Orig, F->arg_begin());
639       ReplacementValue = &Inner;
640       return CodeGenIP;
641     }
642 
643     // Since the original value is an allocation, it has a pointer type and
644     // therefore no additional wrapping should happen.
645     EXPECT_EQ(&Orig, &Inner);
646 
647     // Trivial copy (=firstprivate).
648     Builder.restoreIP(AllocaIP);
649     Type *VTy = ReplacementValue->getType();
650     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
651     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
652     Builder.restoreIP(CodeGenIP);
653     Builder.CreateStore(V, ReplacementValue);
654     return CodeGenIP;
655   };
656 
657   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
658 
659   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
660                                     F->getEntryBlock().getFirstInsertionPt());
661   IRBuilder<>::InsertPoint AfterIP =
662       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
663                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
664 
665   EXPECT_EQ(NumBodiesGenerated, 1U);
666   EXPECT_EQ(NumPrivatizedVars, 1U);
667   EXPECT_EQ(NumFinalizationPoints, 1U);
668 
669   Builder.restoreIP(AfterIP);
670   Builder.CreateRetVoid();
671 
672   OMPBuilder.finalize();
673   Function *OutlinedFn = PrivAI->getFunction();
674   EXPECT_FALSE(verifyModule(*M, &errs()));
675   EXPECT_NE(OutlinedFn, F);
676   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
677   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
678   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
679 
680   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
681   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
682   // Make sure that arguments are pointers in 0 address address space
683   EXPECT_EQ(OutlinedFn->getArg(0)->getType(),
684             PointerType::get(M->getContext(), 0));
685   EXPECT_EQ(OutlinedFn->getArg(1)->getType(),
686             PointerType::get(M->getContext(), 0));
687   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
688             PointerType::get(M->getContext(), 0));
689   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
690   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
691   User *Usr = OutlinedFn->user_back();
692   ASSERT_TRUE(isa<CallInst>(Usr));
693   CallInst *Parallel51CI = dyn_cast<CallInst>(Usr);
694   ASSERT_NE(Parallel51CI, nullptr);
695 
696   EXPECT_EQ(Parallel51CI->getCalledFunction()->getName(), "__kmpc_parallel_51");
697   EXPECT_EQ(Parallel51CI->arg_size(), 9U);
698   EXPECT_EQ(Parallel51CI->getArgOperand(5), OutlinedFn);
699   EXPECT_TRUE(
700       isa<GlobalVariable>(Parallel51CI->getArgOperand(0)->stripPointerCasts()));
701   EXPECT_EQ(Parallel51CI, Usr);
702   M->setDataLayout(oldDLStr);
703 }
704 
705 TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
706   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
707   OpenMPIRBuilder OMPBuilder(*M);
708   OMPBuilder.Config.IsTargetDevice = false;
709   OMPBuilder.initialize();
710   F->setName("func");
711   IRBuilder<> Builder(BB);
712 
713   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
714   Builder.CreateBr(EnterBB);
715   Builder.SetInsertPoint(EnterBB);
716   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
717 
718   AllocaInst *PrivAI = nullptr;
719 
720   unsigned NumBodiesGenerated = 0;
721   unsigned NumPrivatizedVars = 0;
722   unsigned NumFinalizationPoints = 0;
723 
724   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
725     ++NumBodiesGenerated;
726 
727     Builder.restoreIP(AllocaIP);
728     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
729     Builder.CreateStore(F->arg_begin(), PrivAI);
730 
731     Builder.restoreIP(CodeGenIP);
732     Value *PrivLoad =
733         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
734     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
735     Instruction *ThenTerm, *ElseTerm;
736     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
737                                   &ThenTerm, &ElseTerm);
738   };
739 
740   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
741                     Value &Orig, Value &Inner,
742                     Value *&ReplacementValue) -> InsertPointTy {
743     ++NumPrivatizedVars;
744 
745     if (!isa<AllocaInst>(Orig)) {
746       EXPECT_EQ(&Orig, F->arg_begin());
747       ReplacementValue = &Inner;
748       return CodeGenIP;
749     }
750 
751     // Since the original value is an allocation, it has a pointer type and
752     // therefore no additional wrapping should happen.
753     EXPECT_EQ(&Orig, &Inner);
754 
755     // Trivial copy (=firstprivate).
756     Builder.restoreIP(AllocaIP);
757     Type *VTy = ReplacementValue->getType();
758     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
759     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
760     Builder.restoreIP(CodeGenIP);
761     Builder.CreateStore(V, ReplacementValue);
762     return CodeGenIP;
763   };
764 
765   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
766 
767   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
768                                     F->getEntryBlock().getFirstInsertionPt());
769   IRBuilder<>::InsertPoint AfterIP =
770       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
771                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
772   EXPECT_EQ(NumBodiesGenerated, 1U);
773   EXPECT_EQ(NumPrivatizedVars, 1U);
774   EXPECT_EQ(NumFinalizationPoints, 1U);
775 
776   Builder.restoreIP(AfterIP);
777   Builder.CreateRetVoid();
778 
779   OMPBuilder.finalize();
780 
781   EXPECT_NE(PrivAI, nullptr);
782   Function *OutlinedFn = PrivAI->getFunction();
783   EXPECT_NE(F, OutlinedFn);
784   EXPECT_FALSE(verifyModule(*M, &errs()));
785   EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
786   EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
787   EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
788 
789   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
790   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
791 
792   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
793   EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
794   User *Usr = OutlinedFn->user_back();
795   ASSERT_TRUE(isa<CallInst>(Usr));
796   CallInst *ForkCI = dyn_cast<CallInst>(Usr);
797   ASSERT_NE(ForkCI, nullptr);
798 
799   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
800   EXPECT_EQ(ForkCI->arg_size(), 4U);
801   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
802   EXPECT_EQ(ForkCI->getArgOperand(1),
803             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
804   EXPECT_EQ(ForkCI, Usr);
805   Value *StoredValue =
806       findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
807   EXPECT_EQ(StoredValue, F->arg_begin());
808 }
809 
810 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
811   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
812   OpenMPIRBuilder OMPBuilder(*M);
813   OMPBuilder.Config.IsTargetDevice = false;
814   OMPBuilder.initialize();
815   F->setName("func");
816   IRBuilder<> Builder(BB);
817 
818   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
819   Builder.CreateBr(EnterBB);
820   Builder.SetInsertPoint(EnterBB);
821   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
822 
823   unsigned NumInnerBodiesGenerated = 0;
824   unsigned NumOuterBodiesGenerated = 0;
825   unsigned NumFinalizationPoints = 0;
826 
827   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
828     ++NumInnerBodiesGenerated;
829   };
830 
831   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
832                     Value &Orig, Value &Inner,
833                     Value *&ReplacementValue) -> InsertPointTy {
834     // Trivial copy (=firstprivate).
835     Builder.restoreIP(AllocaIP);
836     Type *VTy = ReplacementValue->getType();
837     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
838     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
839     Builder.restoreIP(CodeGenIP);
840     Builder.CreateStore(V, ReplacementValue);
841     return CodeGenIP;
842   };
843 
844   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
845 
846   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
847     ++NumOuterBodiesGenerated;
848     Builder.restoreIP(CodeGenIP);
849     BasicBlock *CGBB = CodeGenIP.getBlock();
850     BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
851     CGBB->getTerminator()->eraseFromParent();
852     ;
853 
854     IRBuilder<>::InsertPoint AfterIP = OMPBuilder.createParallel(
855         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
856         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
857 
858     Builder.restoreIP(AfterIP);
859     Builder.CreateBr(NewBB);
860   };
861 
862   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
863                                     F->getEntryBlock().getFirstInsertionPt());
864   IRBuilder<>::InsertPoint AfterIP =
865       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
866                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
867 
868   EXPECT_EQ(NumInnerBodiesGenerated, 1U);
869   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
870   EXPECT_EQ(NumFinalizationPoints, 2U);
871 
872   Builder.restoreIP(AfterIP);
873   Builder.CreateRetVoid();
874 
875   OMPBuilder.finalize();
876 
877   EXPECT_EQ(M->size(), 5U);
878   for (Function &OutlinedFn : *M) {
879     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
880       continue;
881     EXPECT_FALSE(verifyModule(*M, &errs()));
882     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
883     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
884     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
885 
886     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
887     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
888 
889     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
890     User *Usr = OutlinedFn.user_back();
891     ASSERT_TRUE(isa<CallInst>(Usr));
892     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
893     ASSERT_NE(ForkCI, nullptr);
894 
895     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
896     EXPECT_EQ(ForkCI->arg_size(), 3U);
897     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
898     EXPECT_EQ(ForkCI->getArgOperand(1),
899               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
900     EXPECT_EQ(ForkCI, Usr);
901   }
902 }
903 
904 TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
905   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
906   OpenMPIRBuilder OMPBuilder(*M);
907   OMPBuilder.Config.IsTargetDevice = false;
908   OMPBuilder.initialize();
909   F->setName("func");
910   IRBuilder<> Builder(BB);
911 
912   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
913   Builder.CreateBr(EnterBB);
914   Builder.SetInsertPoint(EnterBB);
915   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
916 
917   unsigned NumInnerBodiesGenerated = 0;
918   unsigned NumOuterBodiesGenerated = 0;
919   unsigned NumFinalizationPoints = 0;
920 
921   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
922     ++NumInnerBodiesGenerated;
923   };
924 
925   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
926                     Value &Orig, Value &Inner,
927                     Value *&ReplacementValue) -> InsertPointTy {
928     // Trivial copy (=firstprivate).
929     Builder.restoreIP(AllocaIP);
930     Type *VTy = ReplacementValue->getType();
931     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
932     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
933     Builder.restoreIP(CodeGenIP);
934     Builder.CreateStore(V, ReplacementValue);
935     return CodeGenIP;
936   };
937 
938   auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
939 
940   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
941     ++NumOuterBodiesGenerated;
942     Builder.restoreIP(CodeGenIP);
943     BasicBlock *CGBB = CodeGenIP.getBlock();
944     BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
945     BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
946     CGBB->getTerminator()->eraseFromParent();
947     ;
948     NewBB1->getTerminator()->eraseFromParent();
949     ;
950 
951     IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.createParallel(
952         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
953         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
954 
955     Builder.restoreIP(AfterIP1);
956     Builder.CreateBr(NewBB1);
957 
958     IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.createParallel(
959         InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
960         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
961 
962     Builder.restoreIP(AfterIP2);
963     Builder.CreateBr(NewBB2);
964   };
965 
966   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
967                                     F->getEntryBlock().getFirstInsertionPt());
968   IRBuilder<>::InsertPoint AfterIP =
969       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
970                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
971 
972   EXPECT_EQ(NumInnerBodiesGenerated, 2U);
973   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
974   EXPECT_EQ(NumFinalizationPoints, 3U);
975 
976   Builder.restoreIP(AfterIP);
977   Builder.CreateRetVoid();
978 
979   OMPBuilder.finalize();
980 
981   EXPECT_EQ(M->size(), 6U);
982   for (Function &OutlinedFn : *M) {
983     if (F == &OutlinedFn || OutlinedFn.isDeclaration())
984       continue;
985     EXPECT_FALSE(verifyModule(*M, &errs()));
986     EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
987     EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
988     EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));
989 
990     EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
991     EXPECT_EQ(OutlinedFn.arg_size(), 2U);
992 
993     unsigned NumAllocas = 0;
994     for (Instruction &I : instructions(OutlinedFn))
995       NumAllocas += isa<AllocaInst>(I);
996     EXPECT_EQ(NumAllocas, 1U);
997 
998     EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
999     User *Usr = OutlinedFn.user_back();
1000     ASSERT_TRUE(isa<CallInst>(Usr));
1001     CallInst *ForkCI = dyn_cast<CallInst>(Usr);
1002     ASSERT_NE(ForkCI, nullptr);
1003 
1004     EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
1005     EXPECT_EQ(ForkCI->arg_size(), 3U);
1006     EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1007     EXPECT_EQ(ForkCI->getArgOperand(1),
1008               ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
1009     EXPECT_EQ(ForkCI, Usr);
1010   }
1011 }
1012 
1013 TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
1014   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1015   OpenMPIRBuilder OMPBuilder(*M);
1016   OMPBuilder.Config.IsTargetDevice = false;
1017   OMPBuilder.initialize();
1018   F->setName("func");
1019   IRBuilder<> Builder(BB);
1020 
1021   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1022   Builder.CreateBr(EnterBB);
1023   Builder.SetInsertPoint(EnterBB);
1024   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1025 
1026   AllocaInst *PrivAI = nullptr;
1027 
1028   unsigned NumBodiesGenerated = 0;
1029   unsigned NumPrivatizedVars = 0;
1030   unsigned NumFinalizationPoints = 0;
1031 
1032   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1033     ++NumBodiesGenerated;
1034 
1035     Builder.restoreIP(AllocaIP);
1036     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
1037     Builder.CreateStore(F->arg_begin(), PrivAI);
1038 
1039     Builder.restoreIP(CodeGenIP);
1040     Value *PrivLoad =
1041         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
1042     Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
1043     Instruction *ThenTerm, *ElseTerm;
1044     SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm,
1045                                   &ElseTerm);
1046   };
1047 
1048   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1049                     Value &Orig, Value &Inner,
1050                     Value *&ReplacementValue) -> InsertPointTy {
1051     ++NumPrivatizedVars;
1052 
1053     if (!isa<AllocaInst>(Orig)) {
1054       EXPECT_EQ(&Orig, F->arg_begin());
1055       ReplacementValue = &Inner;
1056       return CodeGenIP;
1057     }
1058 
1059     // Since the original value is an allocation, it has a pointer type and
1060     // therefore no additional wrapping should happen.
1061     EXPECT_EQ(&Orig, &Inner);
1062 
1063     // Trivial copy (=firstprivate).
1064     Builder.restoreIP(AllocaIP);
1065     Type *VTy = ReplacementValue->getType();
1066     Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload");
1067     ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy");
1068     Builder.restoreIP(CodeGenIP);
1069     Builder.CreateStore(V, ReplacementValue);
1070     return CodeGenIP;
1071   };
1072 
1073   auto FiniCB = [&](InsertPointTy CodeGenIP) {
1074     ++NumFinalizationPoints;
1075     // No destructors.
1076   };
1077 
1078   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1079                                     F->getEntryBlock().getFirstInsertionPt());
1080   IRBuilder<>::InsertPoint AfterIP =
1081       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1082                                 Builder.CreateIsNotNull(F->arg_begin()),
1083                                 nullptr, OMP_PROC_BIND_default, false);
1084 
1085   EXPECT_EQ(NumBodiesGenerated, 1U);
1086   EXPECT_EQ(NumPrivatizedVars, 1U);
1087   EXPECT_EQ(NumFinalizationPoints, 1U);
1088 
1089   Builder.restoreIP(AfterIP);
1090   Builder.CreateRetVoid();
1091   OMPBuilder.finalize();
1092 
1093   EXPECT_NE(PrivAI, nullptr);
1094   Function *OutlinedFn = PrivAI->getFunction();
1095   EXPECT_NE(F, OutlinedFn);
1096   EXPECT_FALSE(verifyModule(*M, &errs()));
1097 
1098   EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
1099   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
1100 
1101   EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
1102   ASSERT_EQ(OutlinedFn->getNumUses(), 1U);
1103 
1104   CallInst *ForkCI = nullptr;
1105   for (User *Usr : OutlinedFn->users()) {
1106     ASSERT_TRUE(isa<CallInst>(Usr));
1107     ForkCI = cast<CallInst>(Usr);
1108   }
1109 
1110   EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call_if");
1111   EXPECT_EQ(ForkCI->arg_size(), 5U);
1112   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
1113   EXPECT_EQ(ForkCI->getArgOperand(1),
1114             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
1115   EXPECT_EQ(ForkCI->getArgOperand(3)->getType(), Type::getInt32Ty(Ctx));
1116 }
1117 
1118 TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
1119   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1120   OpenMPIRBuilder OMPBuilder(*M);
1121   OMPBuilder.Config.IsTargetDevice = false;
1122   OMPBuilder.initialize();
1123   F->setName("func");
1124   IRBuilder<> Builder(BB);
1125 
1126   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
1127   Builder.CreateBr(EnterBB);
1128   Builder.SetInsertPoint(EnterBB);
1129   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1130 
1131   unsigned NumBodiesGenerated = 0;
1132   unsigned NumPrivatizedVars = 0;
1133   unsigned NumFinalizationPoints = 0;
1134 
1135   CallInst *CheckedBarrier = nullptr;
1136   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1137     ++NumBodiesGenerated;
1138 
1139     Builder.restoreIP(CodeGenIP);
1140 
1141     // Create three barriers, two cancel barriers but only one checked.
1142     Function *CBFn, *BFn;
1143 
1144     Builder.restoreIP(
1145         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel));
1146 
1147     CBFn = M->getFunction("__kmpc_cancel_barrier");
1148     BFn = M->getFunction("__kmpc_barrier");
1149     ASSERT_NE(CBFn, nullptr);
1150     ASSERT_EQ(BFn, nullptr);
1151     ASSERT_EQ(CBFn->getNumUses(), 1U);
1152     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1153     ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
1154     CheckedBarrier = cast<CallInst>(CBFn->user_back());
1155 
1156     Builder.restoreIP(
1157         OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true));
1158     CBFn = M->getFunction("__kmpc_cancel_barrier");
1159     BFn = M->getFunction("__kmpc_barrier");
1160     ASSERT_NE(CBFn, nullptr);
1161     ASSERT_NE(BFn, nullptr);
1162     ASSERT_EQ(CBFn->getNumUses(), 1U);
1163     ASSERT_EQ(BFn->getNumUses(), 1U);
1164     ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
1165     ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
1166 
1167     Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel,
1168                                                false, false));
1169     ASSERT_EQ(CBFn->getNumUses(), 2U);
1170     ASSERT_EQ(BFn->getNumUses(), 1U);
1171     ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
1172     ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
1173     ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
1174   };
1175 
1176   auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &,
1177                     Value *&) -> InsertPointTy {
1178     ++NumPrivatizedVars;
1179     llvm_unreachable("No privatization callback call expected!");
1180   };
1181 
1182   FunctionType *FakeDestructorTy =
1183       FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
1184                         /*isVarArg=*/false);
1185   auto *FakeDestructor = Function::Create(
1186       FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
1187 
1188   auto FiniCB = [&](InsertPointTy IP) {
1189     ++NumFinalizationPoints;
1190     Builder.restoreIP(IP);
1191     Builder.CreateCall(FakeDestructor,
1192                        {Builder.getInt32(NumFinalizationPoints)});
1193   };
1194 
1195   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1196                                     F->getEntryBlock().getFirstInsertionPt());
1197   IRBuilder<>::InsertPoint AfterIP =
1198       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1199                                 Builder.CreateIsNotNull(F->arg_begin()),
1200                                 nullptr, OMP_PROC_BIND_default, true);
1201 
1202   EXPECT_EQ(NumBodiesGenerated, 1U);
1203   EXPECT_EQ(NumPrivatizedVars, 0U);
1204   EXPECT_EQ(NumFinalizationPoints, 2U);
1205   EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
1206 
1207   Builder.restoreIP(AfterIP);
1208   Builder.CreateRetVoid();
1209   OMPBuilder.finalize();
1210 
1211   EXPECT_FALSE(verifyModule(*M, &errs()));
1212 
1213   BasicBlock *ExitBB = nullptr;
1214   for (const User *Usr : FakeDestructor->users()) {
1215     const CallInst *CI = dyn_cast<CallInst>(Usr);
1216     ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
1217     ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
1218     ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
1219     if (ExitBB)
1220       ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
1221     else
1222       ExitBB = CI->getNextNode()->getSuccessor(0);
1223     ASSERT_EQ(ExitBB->size(), 1U);
1224     if (!isa<ReturnInst>(ExitBB->front())) {
1225       ASSERT_TRUE(isa<BranchInst>(ExitBB->front()));
1226       ASSERT_EQ(cast<BranchInst>(ExitBB->front()).getNumSuccessors(), 1U);
1227       ASSERT_TRUE(isa<ReturnInst>(
1228           cast<BranchInst>(ExitBB->front()).getSuccessor(0)->front()));
1229     }
1230   }
1231 }
1232 
1233 TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
1234   OpenMPIRBuilder OMPBuilder(*M);
1235   OMPBuilder.Config.IsTargetDevice = false;
1236   OMPBuilder.initialize();
1237   F->setName("func");
1238   IRBuilder<> Builder(BB);
1239   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1240   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1241 
1242   Type *I32Ty = Type::getInt32Ty(M->getContext());
1243   Type *PtrTy = PointerType::get(M->getContext(), 0);
1244   Type *StructTy = StructType::get(I32Ty, PtrTy);
1245   Type *VoidTy = Type::getVoidTy(M->getContext());
1246   FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
1247   FunctionCallee TakeI32Func =
1248       M->getOrInsertFunction("take_i32", VoidTy, I32Ty);
1249   FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", PtrTy);
1250   FunctionCallee TakeI32PtrFunc =
1251       M->getOrInsertFunction("take_i32ptr", VoidTy, PtrTy);
1252   FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy);
1253   FunctionCallee TakeStructFunc =
1254       M->getOrInsertFunction("take_struct", VoidTy, StructTy);
1255   FunctionCallee RetStructPtrFunc =
1256       M->getOrInsertFunction("ret_structptr", PtrTy);
1257   FunctionCallee TakeStructPtrFunc =
1258       M->getOrInsertFunction("take_structPtr", VoidTy, PtrTy);
1259   Value *I32Val = Builder.CreateCall(RetI32Func);
1260   Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc);
1261   Value *StructVal = Builder.CreateCall(RetStructFunc);
1262   Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
1263 
1264   Instruction *Internal;
1265   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
1266     IRBuilder<>::InsertPointGuard Guard(Builder);
1267     Builder.restoreIP(CodeGenIP);
1268     Internal = Builder.CreateCall(TakeI32Func, I32Val);
1269     Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
1270     Builder.CreateCall(TakeStructFunc, StructVal);
1271     Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
1272   };
1273   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
1274                     Value &Inner, Value *&ReplacementValue) {
1275     ReplacementValue = &Inner;
1276     return CodeGenIP;
1277   };
1278   auto FiniCB = [](InsertPointTy) {};
1279 
1280   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
1281                                     F->getEntryBlock().getFirstInsertionPt());
1282   IRBuilder<>::InsertPoint AfterIP =
1283       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1284                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
1285   Builder.restoreIP(AfterIP);
1286   Builder.CreateRetVoid();
1287 
1288   OMPBuilder.finalize();
1289 
1290   EXPECT_FALSE(verifyModule(*M, &errs()));
1291   Function *OutlinedFn = Internal->getFunction();
1292 
1293   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
1294   EXPECT_TRUE(Arg2Type->isPointerTy());
1295 }
1296 
1297 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
1298   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1299   OpenMPIRBuilder OMPBuilder(*M);
1300   OMPBuilder.initialize();
1301   IRBuilder<> Builder(BB);
1302   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1303   Value *TripCount = F->getArg(0);
1304 
1305   unsigned NumBodiesGenerated = 0;
1306   auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
1307     NumBodiesGenerated += 1;
1308 
1309     Builder.restoreIP(CodeGenIP);
1310 
1311     Value *Cmp = Builder.CreateICmpEQ(LC, TripCount);
1312     Instruction *ThenTerm, *ElseTerm;
1313     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
1314                                   &ThenTerm, &ElseTerm);
1315   };
1316 
1317   CanonicalLoopInfo *Loop =
1318       OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
1319 
1320   Builder.restoreIP(Loop->getAfterIP());
1321   ReturnInst *RetInst = Builder.CreateRetVoid();
1322   OMPBuilder.finalize();
1323 
1324   Loop->assertOK();
1325   EXPECT_FALSE(verifyModule(*M, &errs()));
1326 
1327   EXPECT_EQ(NumBodiesGenerated, 1U);
1328 
1329   // Verify control flow structure (in addition to Loop->assertOK()).
1330   EXPECT_EQ(Loop->getPreheader()->getSinglePredecessor(), &F->getEntryBlock());
1331   EXPECT_EQ(Loop->getAfter(), Builder.GetInsertBlock());
1332 
1333   Instruction *IndVar = Loop->getIndVar();
1334   EXPECT_TRUE(isa<PHINode>(IndVar));
1335   EXPECT_EQ(IndVar->getType(), TripCount->getType());
1336   EXPECT_EQ(IndVar->getParent(), Loop->getHeader());
1337 
1338   EXPECT_EQ(Loop->getTripCount(), TripCount);
1339 
1340   BasicBlock *Body = Loop->getBody();
1341   Instruction *CmpInst = &Body->front();
1342   EXPECT_TRUE(isa<ICmpInst>(CmpInst));
1343   EXPECT_EQ(CmpInst->getOperand(0), IndVar);
1344 
1345   BasicBlock *LatchPred = Loop->getLatch()->getSinglePredecessor();
1346   EXPECT_TRUE(llvm::all_of(successors(Body), [=](BasicBlock *SuccBB) {
1347     return SuccBB->getSingleSuccessor() == LatchPred;
1348   }));
1349 
1350   EXPECT_EQ(&Loop->getAfter()->front(), RetInst);
1351 }
1352 
1353 TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
1354   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1355   OpenMPIRBuilder OMPBuilder(*M);
1356   OMPBuilder.initialize();
1357   IRBuilder<> Builder(BB);
1358 
1359   // Check the trip count is computed correctly. We generate the canonical loop
1360   // but rely on the IRBuilder's constant folder to compute the final result
1361   // since all inputs are constant. To verify overflow situations, limit the
1362   // trip count / loop counter widths to 16 bits.
1363   auto EvalTripCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1364                            bool IsSigned, bool InclusiveStop) -> int64_t {
1365     OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1366     Type *LCTy = Type::getInt16Ty(Ctx);
1367     Value *StartVal = ConstantInt::get(LCTy, Start);
1368     Value *StopVal = ConstantInt::get(LCTy, Stop);
1369     Value *StepVal = ConstantInt::get(LCTy, Step);
1370     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
1371     CanonicalLoopInfo *Loop =
1372         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1373                                        StepVal, IsSigned, InclusiveStop);
1374     Loop->assertOK();
1375     Builder.restoreIP(Loop->getAfterIP());
1376     Value *TripCount = Loop->getTripCount();
1377     return cast<ConstantInt>(TripCount)->getValue().getZExtValue();
1378   };
1379 
1380   EXPECT_EQ(EvalTripCount(0, 0, 1, false, false), 0);
1381   EXPECT_EQ(EvalTripCount(0, 1, 2, false, false), 1);
1382   EXPECT_EQ(EvalTripCount(0, 42, 1, false, false), 42);
1383   EXPECT_EQ(EvalTripCount(0, 42, 2, false, false), 21);
1384   EXPECT_EQ(EvalTripCount(21, 42, 1, false, false), 21);
1385   EXPECT_EQ(EvalTripCount(0, 5, 5, false, false), 1);
1386   EXPECT_EQ(EvalTripCount(0, 9, 5, false, false), 2);
1387   EXPECT_EQ(EvalTripCount(0, 11, 5, false, false), 3);
1388   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 1, false, false), 0xFFFF);
1389   EXPECT_EQ(EvalTripCount(0xFFFF, 0, 1, false, false), 0);
1390   EXPECT_EQ(EvalTripCount(0xFFFE, 0xFFFF, 1, false, false), 1);
1391   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0x100, false, false), 0x100);
1392   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFF, false, false), 1);
1393 
1394   EXPECT_EQ(EvalTripCount(0, 6, 5, false, false), 2);
1395   EXPECT_EQ(EvalTripCount(0, 0xFFFF, 0xFFFE, false, false), 2);
1396   EXPECT_EQ(EvalTripCount(0, 0, 1, false, true), 1);
1397   EXPECT_EQ(EvalTripCount(0, 0, 0xFFFF, false, true), 1);
1398   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 1, false, true), 0xFFFF);
1399   EXPECT_EQ(EvalTripCount(0, 0xFFFE, 2, false, true), 0x8000);
1400 
1401   EXPECT_EQ(EvalTripCount(0, 0, -1, true, false), 0);
1402   EXPECT_EQ(EvalTripCount(0, 1, -1, true, true), 0);
1403   EXPECT_EQ(EvalTripCount(20, 5, -5, true, false), 3);
1404   EXPECT_EQ(EvalTripCount(20, 5, -5, true, true), 4);
1405   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, false), 1);
1406   EXPECT_EQ(EvalTripCount(-4, -3, 2, true, false), 1);
1407   EXPECT_EQ(EvalTripCount(-4, -2, 2, true, true), 2);
1408 
1409   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, false), 0x8000);
1410   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 1, true, true), 0x8001);
1411   EXPECT_EQ(EvalTripCount(INT16_MIN, 0x7FFF, 1, true, false), 0xFFFF);
1412   EXPECT_EQ(EvalTripCount(INT16_MIN + 1, 0x7FFF, 1, true, true), 0xFFFF);
1413   EXPECT_EQ(EvalTripCount(INT16_MIN, 0, 0x7FFF, true, false), 2);
1414   EXPECT_EQ(EvalTripCount(0x7FFF, 0, -1, true, false), 0x7FFF);
1415   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -1, true, false), 0x8000);
1416   EXPECT_EQ(EvalTripCount(0, INT16_MIN, -16, true, false), 0x800);
1417   EXPECT_EQ(EvalTripCount(0x7FFF, INT16_MIN, -1, true, false), 0xFFFF);
1418   EXPECT_EQ(EvalTripCount(0x7FFF, 1, INT16_MIN, true, false), 1);
1419   EXPECT_EQ(EvalTripCount(0x7FFF, -1, INT16_MIN, true, true), 2);
1420 
1421   // Finalize the function and verify it.
1422   Builder.CreateRetVoid();
1423   OMPBuilder.finalize();
1424   EXPECT_FALSE(verifyModule(*M, &errs()));
1425 }
1426 
1427 TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
1428   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1429   OpenMPIRBuilder OMPBuilder(*M);
1430   OMPBuilder.initialize();
1431   F->setName("func");
1432 
1433   IRBuilder<> Builder(BB);
1434 
1435   Type *LCTy = F->getArg(0)->getType();
1436   Constant *One = ConstantInt::get(LCTy, 1);
1437   Constant *Two = ConstantInt::get(LCTy, 2);
1438   Value *OuterTripCount =
1439       Builder.CreateAdd(F->getArg(0), Two, "tripcount.outer");
1440   Value *InnerTripCount =
1441       Builder.CreateAdd(F->getArg(0), One, "tripcount.inner");
1442 
1443   // Fix an insertion point for ComputeIP.
1444   BasicBlock *LoopNextEnter =
1445       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1446                          Builder.GetInsertBlock()->getNextNode());
1447   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1448   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1449 
1450   Builder.SetInsertPoint(LoopNextEnter);
1451   OpenMPIRBuilder::LocationDescription OuterLoc(Builder.saveIP(), DL);
1452 
1453   CanonicalLoopInfo *InnerLoop = nullptr;
1454   CallInst *InbetweenLead = nullptr;
1455   CallInst *InbetweenTrail = nullptr;
1456   CallInst *Call = nullptr;
1457   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP, Value *OuterLC) {
1458     Builder.restoreIP(OuterCodeGenIP);
1459     InbetweenLead =
1460         createPrintfCall(Builder, "In-between lead i=%d\\n", {OuterLC});
1461 
1462     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1463                                   Value *InnerLC) {
1464       Builder.restoreIP(InnerCodeGenIP);
1465       Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
1466     };
1467     InnerLoop = OMPBuilder.createCanonicalLoop(
1468         Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner");
1469 
1470     Builder.restoreIP(InnerLoop->getAfterIP());
1471     InbetweenTrail =
1472         createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
1473   };
1474   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1475       OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer");
1476 
1477   // Finish the function.
1478   Builder.restoreIP(OuterLoop->getAfterIP());
1479   Builder.CreateRetVoid();
1480 
1481   CanonicalLoopInfo *Collapsed =
1482       OMPBuilder.collapseLoops(DL, {OuterLoop, InnerLoop}, ComputeIP);
1483 
1484   OMPBuilder.finalize();
1485   EXPECT_FALSE(verifyModule(*M, &errs()));
1486 
1487   // Verify control flow and BB order.
1488   BasicBlock *RefOrder[] = {
1489       Collapsed->getPreheader(),   Collapsed->getHeader(),
1490       Collapsed->getCond(),        Collapsed->getBody(),
1491       InbetweenLead->getParent(),  Call->getParent(),
1492       InbetweenTrail->getParent(), Collapsed->getLatch(),
1493       Collapsed->getExit(),        Collapsed->getAfter(),
1494   };
1495   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1496   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1497 
1498   // Verify the total trip count.
1499   auto *TripCount = cast<MulOperator>(Collapsed->getTripCount());
1500   EXPECT_EQ(TripCount->getOperand(0), OuterTripCount);
1501   EXPECT_EQ(TripCount->getOperand(1), InnerTripCount);
1502 
1503   // Verify the changed indvar.
1504   auto *OuterIV = cast<BinaryOperator>(Call->getOperand(1));
1505   EXPECT_EQ(OuterIV->getOpcode(), Instruction::UDiv);
1506   EXPECT_EQ(OuterIV->getParent(), Collapsed->getBody());
1507   EXPECT_EQ(OuterIV->getOperand(1), InnerTripCount);
1508   EXPECT_EQ(OuterIV->getOperand(0), Collapsed->getIndVar());
1509 
1510   auto *InnerIV = cast<BinaryOperator>(Call->getOperand(2));
1511   EXPECT_EQ(InnerIV->getOpcode(), Instruction::URem);
1512   EXPECT_EQ(InnerIV->getParent(), Collapsed->getBody());
1513   EXPECT_EQ(InnerIV->getOperand(0), Collapsed->getIndVar());
1514   EXPECT_EQ(InnerIV->getOperand(1), InnerTripCount);
1515 
1516   EXPECT_EQ(InbetweenLead->getOperand(1), OuterIV);
1517   EXPECT_EQ(InbetweenTrail->getOperand(1), OuterIV);
1518 }
1519 
1520 TEST_F(OpenMPIRBuilderTest, TileSingleLoop) {
1521   OpenMPIRBuilder OMPBuilder(*M);
1522   CallInst *Call;
1523   BasicBlock *BodyCode;
1524   CanonicalLoopInfo *Loop =
1525       buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode);
1526 
1527   Instruction *OrigIndVar = Loop->getIndVar();
1528   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
1529 
1530   // Tile the loop.
1531   Constant *TileSize = ConstantInt::get(Loop->getIndVarType(), APInt(32, 7));
1532   std::vector<CanonicalLoopInfo *> GenLoops =
1533       OMPBuilder.tileLoops(DL, {Loop}, {TileSize});
1534 
1535   OMPBuilder.finalize();
1536   EXPECT_FALSE(verifyModule(*M, &errs()));
1537 
1538   EXPECT_EQ(GenLoops.size(), 2u);
1539   CanonicalLoopInfo *Floor = GenLoops[0];
1540   CanonicalLoopInfo *Tile = GenLoops[1];
1541 
1542   BasicBlock *RefOrder[] = {
1543       Floor->getPreheader(), Floor->getHeader(),   Floor->getCond(),
1544       Floor->getBody(),      Tile->getPreheader(), Tile->getHeader(),
1545       Tile->getCond(),       Tile->getBody(),      BodyCode,
1546       Tile->getLatch(),      Tile->getExit(),      Tile->getAfter(),
1547       Floor->getLatch(),     Floor->getExit(),     Floor->getAfter(),
1548   };
1549   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1550   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1551 
1552   // Check the induction variable.
1553   EXPECT_EQ(Call->getParent(), BodyCode);
1554   auto *Shift = cast<AddOperator>(Call->getOperand(1));
1555   EXPECT_EQ(cast<Instruction>(Shift)->getParent(), Tile->getBody());
1556   EXPECT_EQ(Shift->getOperand(1), Tile->getIndVar());
1557   auto *Scale = cast<MulOperator>(Shift->getOperand(0));
1558   EXPECT_EQ(cast<Instruction>(Scale)->getParent(), Tile->getBody());
1559   EXPECT_EQ(Scale->getOperand(0), TileSize);
1560   EXPECT_EQ(Scale->getOperand(1), Floor->getIndVar());
1561 }
1562 
1563 TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
1564   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1565   OpenMPIRBuilder OMPBuilder(*M);
1566   OMPBuilder.initialize();
1567   F->setName("func");
1568 
1569   IRBuilder<> Builder(BB);
1570   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
1571   Value *TripCount = F->getArg(0);
1572   Type *LCTy = TripCount->getType();
1573 
1574   BasicBlock *BodyCode = nullptr;
1575   CanonicalLoopInfo *InnerLoop = nullptr;
1576   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1577                                 llvm::Value *OuterLC) {
1578     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1579                                   llvm::Value *InnerLC) {
1580       Builder.restoreIP(InnerCodeGenIP);
1581       BodyCode = Builder.GetInsertBlock();
1582 
1583       // Add something that consumes the induction variables to the body.
1584       createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1585     };
1586     InnerLoop = OMPBuilder.createCanonicalLoop(
1587         OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner");
1588   };
1589   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1590       Loc, OuterLoopBodyGenCB, TripCount, "outer");
1591 
1592   // Finalize the function.
1593   Builder.restoreIP(OuterLoop->getAfterIP());
1594   Builder.CreateRetVoid();
1595 
1596   // Tile to loop nest.
1597   Constant *OuterTileSize = ConstantInt::get(LCTy, APInt(32, 11));
1598   Constant *InnerTileSize = ConstantInt::get(LCTy, APInt(32, 7));
1599   std::vector<CanonicalLoopInfo *> GenLoops = OMPBuilder.tileLoops(
1600       DL, {OuterLoop, InnerLoop}, {OuterTileSize, InnerTileSize});
1601 
1602   OMPBuilder.finalize();
1603   EXPECT_FALSE(verifyModule(*M, &errs()));
1604 
1605   EXPECT_EQ(GenLoops.size(), 4u);
1606   CanonicalLoopInfo *Floor1 = GenLoops[0];
1607   CanonicalLoopInfo *Floor2 = GenLoops[1];
1608   CanonicalLoopInfo *Tile1 = GenLoops[2];
1609   CanonicalLoopInfo *Tile2 = GenLoops[3];
1610 
1611   BasicBlock *RefOrder[] = {
1612       Floor1->getPreheader(),
1613       Floor1->getHeader(),
1614       Floor1->getCond(),
1615       Floor1->getBody(),
1616       Floor2->getPreheader(),
1617       Floor2->getHeader(),
1618       Floor2->getCond(),
1619       Floor2->getBody(),
1620       Tile1->getPreheader(),
1621       Tile1->getHeader(),
1622       Tile1->getCond(),
1623       Tile1->getBody(),
1624       Tile2->getPreheader(),
1625       Tile2->getHeader(),
1626       Tile2->getCond(),
1627       Tile2->getBody(),
1628       BodyCode,
1629       Tile2->getLatch(),
1630       Tile2->getExit(),
1631       Tile2->getAfter(),
1632       Tile1->getLatch(),
1633       Tile1->getExit(),
1634       Tile1->getAfter(),
1635       Floor2->getLatch(),
1636       Floor2->getExit(),
1637       Floor2->getAfter(),
1638       Floor1->getLatch(),
1639       Floor1->getExit(),
1640       Floor1->getAfter(),
1641   };
1642   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1643   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1644 }
1645 
1646 TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
1647   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1648   OpenMPIRBuilder OMPBuilder(*M);
1649   OMPBuilder.initialize();
1650   F->setName("func");
1651 
1652   IRBuilder<> Builder(BB);
1653   Value *TripCount = F->getArg(0);
1654   Type *LCTy = TripCount->getType();
1655 
1656   Value *OuterStartVal = ConstantInt::get(LCTy, 2);
1657   Value *OuterStopVal = TripCount;
1658   Value *OuterStep = ConstantInt::get(LCTy, 5);
1659   Value *InnerStartVal = ConstantInt::get(LCTy, 13);
1660   Value *InnerStopVal = TripCount;
1661   Value *InnerStep = ConstantInt::get(LCTy, 3);
1662 
1663   // Fix an insertion point for ComputeIP.
1664   BasicBlock *LoopNextEnter =
1665       BasicBlock::Create(M->getContext(), "loopnest.enter", F,
1666                          Builder.GetInsertBlock()->getNextNode());
1667   BranchInst *EnterBr = Builder.CreateBr(LoopNextEnter);
1668   InsertPointTy ComputeIP{EnterBr->getParent(), EnterBr->getIterator()};
1669 
1670   InsertPointTy LoopIP{LoopNextEnter, LoopNextEnter->begin()};
1671   OpenMPIRBuilder::LocationDescription Loc({LoopIP, DL});
1672 
1673   BasicBlock *BodyCode = nullptr;
1674   CanonicalLoopInfo *InnerLoop = nullptr;
1675   CallInst *Call = nullptr;
1676   auto OuterLoopBodyGenCB = [&](InsertPointTy OuterCodeGenIP,
1677                                 llvm::Value *OuterLC) {
1678     auto InnerLoopBodyGenCB = [&](InsertPointTy InnerCodeGenIP,
1679                                   llvm::Value *InnerLC) {
1680       Builder.restoreIP(InnerCodeGenIP);
1681       BodyCode = Builder.GetInsertBlock();
1682 
1683       // Add something that consumes the induction variable to the body.
1684       Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
1685     };
1686     InnerLoop = OMPBuilder.createCanonicalLoop(
1687         OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal,
1688         InnerStep, false, false, ComputeIP, "inner");
1689   };
1690   CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
1691       Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false,
1692       false, ComputeIP, "outer");
1693 
1694   // Finalize the function
1695   Builder.restoreIP(OuterLoop->getAfterIP());
1696   Builder.CreateRetVoid();
1697 
1698   // Tile the loop nest.
1699   Constant *TileSize0 = ConstantInt::get(LCTy, APInt(32, 11));
1700   Constant *TileSize1 = ConstantInt::get(LCTy, APInt(32, 7));
1701   std::vector<CanonicalLoopInfo *> GenLoops =
1702       OMPBuilder.tileLoops(DL, {OuterLoop, InnerLoop}, {TileSize0, TileSize1});
1703 
1704   OMPBuilder.finalize();
1705   EXPECT_FALSE(verifyModule(*M, &errs()));
1706 
1707   EXPECT_EQ(GenLoops.size(), 4u);
1708   CanonicalLoopInfo *Floor0 = GenLoops[0];
1709   CanonicalLoopInfo *Floor1 = GenLoops[1];
1710   CanonicalLoopInfo *Tile0 = GenLoops[2];
1711   CanonicalLoopInfo *Tile1 = GenLoops[3];
1712 
1713   BasicBlock *RefOrder[] = {
1714       Floor0->getPreheader(),
1715       Floor0->getHeader(),
1716       Floor0->getCond(),
1717       Floor0->getBody(),
1718       Floor1->getPreheader(),
1719       Floor1->getHeader(),
1720       Floor1->getCond(),
1721       Floor1->getBody(),
1722       Tile0->getPreheader(),
1723       Tile0->getHeader(),
1724       Tile0->getCond(),
1725       Tile0->getBody(),
1726       Tile1->getPreheader(),
1727       Tile1->getHeader(),
1728       Tile1->getCond(),
1729       Tile1->getBody(),
1730       BodyCode,
1731       Tile1->getLatch(),
1732       Tile1->getExit(),
1733       Tile1->getAfter(),
1734       Tile0->getLatch(),
1735       Tile0->getExit(),
1736       Tile0->getAfter(),
1737       Floor1->getLatch(),
1738       Floor1->getExit(),
1739       Floor1->getAfter(),
1740       Floor0->getLatch(),
1741       Floor0->getExit(),
1742       Floor0->getAfter(),
1743   };
1744   EXPECT_TRUE(verifyDFSOrder(F, RefOrder));
1745   EXPECT_TRUE(verifyListOrder(F, RefOrder));
1746 
1747   EXPECT_EQ(Call->getParent(), BodyCode);
1748 
1749   auto *RangeShift0 = cast<AddOperator>(Call->getOperand(1));
1750   EXPECT_EQ(RangeShift0->getOperand(1), OuterStartVal);
1751   auto *RangeScale0 = cast<MulOperator>(RangeShift0->getOperand(0));
1752   EXPECT_EQ(RangeScale0->getOperand(1), OuterStep);
1753   auto *TileShift0 = cast<AddOperator>(RangeScale0->getOperand(0));
1754   EXPECT_EQ(cast<Instruction>(TileShift0)->getParent(), Tile1->getBody());
1755   EXPECT_EQ(TileShift0->getOperand(1), Tile0->getIndVar());
1756   auto *TileScale0 = cast<MulOperator>(TileShift0->getOperand(0));
1757   EXPECT_EQ(cast<Instruction>(TileScale0)->getParent(), Tile1->getBody());
1758   EXPECT_EQ(TileScale0->getOperand(0), TileSize0);
1759   EXPECT_EQ(TileScale0->getOperand(1), Floor0->getIndVar());
1760 
1761   auto *RangeShift1 = cast<AddOperator>(Call->getOperand(2));
1762   EXPECT_EQ(cast<Instruction>(RangeShift1)->getParent(), BodyCode);
1763   EXPECT_EQ(RangeShift1->getOperand(1), InnerStartVal);
1764   auto *RangeScale1 = cast<MulOperator>(RangeShift1->getOperand(0));
1765   EXPECT_EQ(cast<Instruction>(RangeScale1)->getParent(), BodyCode);
1766   EXPECT_EQ(RangeScale1->getOperand(1), InnerStep);
1767   auto *TileShift1 = cast<AddOperator>(RangeScale1->getOperand(0));
1768   EXPECT_EQ(cast<Instruction>(TileShift1)->getParent(), Tile1->getBody());
1769   EXPECT_EQ(TileShift1->getOperand(1), Tile1->getIndVar());
1770   auto *TileScale1 = cast<MulOperator>(TileShift1->getOperand(0));
1771   EXPECT_EQ(cast<Instruction>(TileScale1)->getParent(), Tile1->getBody());
1772   EXPECT_EQ(TileScale1->getOperand(0), TileSize1);
1773   EXPECT_EQ(TileScale1->getOperand(1), Floor1->getIndVar());
1774 }
1775 
1776 TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
1777   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
1778   OpenMPIRBuilder OMPBuilder(*M);
1779   OMPBuilder.initialize();
1780   IRBuilder<> Builder(BB);
1781 
1782   // Create a loop, tile it, and extract its trip count. All input values are
1783   // constant and IRBuilder evaluates all-constant arithmetic inplace, such that
1784   // the floor trip count itself will be a ConstantInt. Unfortunately we cannot
1785   // do the same for the tile loop.
1786   auto GetFloorCount = [&](int64_t Start, int64_t Stop, int64_t Step,
1787                            bool IsSigned, bool InclusiveStop,
1788                            int64_t TileSize) -> uint64_t {
1789     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
1790     Type *LCTy = Type::getInt16Ty(Ctx);
1791     Value *StartVal = ConstantInt::get(LCTy, Start);
1792     Value *StopVal = ConstantInt::get(LCTy, Stop);
1793     Value *StepVal = ConstantInt::get(LCTy, Step);
1794 
1795     // Generate a loop.
1796     auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
1797     CanonicalLoopInfo *Loop =
1798         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
1799                                        StepVal, IsSigned, InclusiveStop);
1800     InsertPointTy AfterIP = Loop->getAfterIP();
1801 
1802     // Tile the loop.
1803     Value *TileSizeVal = ConstantInt::get(LCTy, TileSize);
1804     std::vector<CanonicalLoopInfo *> GenLoops =
1805         OMPBuilder.tileLoops(Loc.DL, {Loop}, {TileSizeVal});
1806 
1807     // Set the insertion pointer to after loop, where the next loop will be
1808     // emitted.
1809     Builder.restoreIP(AfterIP);
1810 
1811     // Extract the trip count.
1812     CanonicalLoopInfo *FloorLoop = GenLoops[0];
1813     Value *FloorTripCount = FloorLoop->getTripCount();
1814     return cast<ConstantInt>(FloorTripCount)->getValue().getZExtValue();
1815   };
1816 
1817   // Empty iteration domain.
1818   EXPECT_EQ(GetFloorCount(0, 0, 1, false, false, 7), 0u);
1819   EXPECT_EQ(GetFloorCount(0, -1, 1, false, true, 7), 0u);
1820   EXPECT_EQ(GetFloorCount(-1, -1, -1, true, false, 7), 0u);
1821   EXPECT_EQ(GetFloorCount(-1, 0, -1, true, true, 7), 0u);
1822   EXPECT_EQ(GetFloorCount(-1, -1, 3, true, false, 7), 0u);
1823 
1824   // Only complete tiles.
1825   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1826   EXPECT_EQ(GetFloorCount(0, 14, 1, false, false, 7), 2u);
1827   EXPECT_EQ(GetFloorCount(1, 15, 1, false, false, 7), 2u);
1828   EXPECT_EQ(GetFloorCount(0, -14, -1, true, false, 7), 2u);
1829   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, true, 7), 2u);
1830   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 2, 3, false, false, 7), 2u);
1831 
1832   // Only a partial tile.
1833   EXPECT_EQ(GetFloorCount(0, 1, 1, false, false, 7), 1u);
1834   EXPECT_EQ(GetFloorCount(0, 6, 1, false, false, 7), 1u);
1835   EXPECT_EQ(GetFloorCount(-1, 1, 3, true, false, 7), 1u);
1836   EXPECT_EQ(GetFloorCount(-1, -2, -1, true, false, 7), 1u);
1837   EXPECT_EQ(GetFloorCount(0, 2, 3, false, false, 7), 1u);
1838 
1839   // Complete and partial tiles.
1840   EXPECT_EQ(GetFloorCount(0, 13, 1, false, false, 7), 2u);
1841   EXPECT_EQ(GetFloorCount(0, 15, 1, false, false, 7), 3u);
1842   EXPECT_EQ(GetFloorCount(-1, -14, -1, true, false, 7), 2u);
1843   EXPECT_EQ(GetFloorCount(0, 3 * 7 * 5 - 1, 3, false, false, 7), 5u);
1844   EXPECT_EQ(GetFloorCount(-1, -3 * 7 * 5, -3, true, false, 7), 5u);
1845 
1846   // Close to 16-bit integer range.
1847   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 1), 0xFFFFu);
1848   EXPECT_EQ(GetFloorCount(0, 0xFFFF, 1, false, false, 7), 0xFFFFu / 7 + 1);
1849   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, true, 7), 0xFFFFu / 7 + 1);
1850   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 7), 0xFFFFu / 7 + 1);
1851   EXPECT_EQ(GetFloorCount(-0x7FFF, 0x7FFF, 1, true, true, 7), 0xFFFFu / 7 + 1);
1852   EXPECT_EQ(GetFloorCount(0, 0xFFFE, 1, false, false, 0xFFFF), 1u);
1853   EXPECT_EQ(GetFloorCount(-0x8000, 0x7FFF, 1, true, false, 0xFFFF), 1u);
1854 
1855   // Finalize the function.
1856   Builder.CreateRetVoid();
1857   OMPBuilder.finalize();
1858 
1859   EXPECT_FALSE(verifyModule(*M, &errs()));
1860 }
1861 
1862 TEST_F(OpenMPIRBuilderTest, ApplySimd) {
1863   OpenMPIRBuilder OMPBuilder(*M);
1864   MapVector<Value *, Value *> AlignedVars;
1865   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1866 
1867   // Simd-ize the loop.
1868   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
1869                        OrderKind::OMP_ORDER_unknown,
1870                        /* Simdlen */ nullptr,
1871                        /* Safelen */ nullptr);
1872 
1873   OMPBuilder.finalize();
1874   EXPECT_FALSE(verifyModule(*M, &errs()));
1875 
1876   PassBuilder PB;
1877   FunctionAnalysisManager FAM;
1878   PB.registerFunctionAnalyses(FAM);
1879   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1880 
1881   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1882   EXPECT_EQ(TopLvl.size(), 1u);
1883 
1884   Loop *L = TopLvl.front();
1885   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1886   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1887 
1888   // Check for llvm.access.group metadata attached to the printf
1889   // function in the loop body.
1890   BasicBlock *LoopBody = CLI->getBody();
1891   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1892     return I.getMetadata("llvm.access.group") != nullptr;
1893   }));
1894 }
1895 
1896 TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) {
1897   OpenMPIRBuilder OMPBuilder(*M);
1898   IRBuilder<> Builder(BB);
1899   const int AlignmentValue = 32;
1900   AllocaInst *Alloc1 =
1901       Builder.CreateAlloca(Builder.getPtrTy(), Builder.getInt64(1));
1902   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
1903   MapVector<Value *, Value *> AlignedVars;
1904   AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)});
1905 
1906   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1907 
1908   // Simd-ize the loop.
1909   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
1910                        OrderKind::OMP_ORDER_unknown,
1911                        /* Simdlen */ nullptr,
1912                        /* Safelen */ nullptr);
1913 
1914   OMPBuilder.finalize();
1915   EXPECT_FALSE(verifyModule(*M, &errs()));
1916 
1917   PassBuilder PB;
1918   FunctionAnalysisManager FAM;
1919   PB.registerFunctionAnalyses(FAM);
1920   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1921 
1922   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1923   EXPECT_EQ(TopLvl.size(), 1u);
1924 
1925   Loop *L = TopLvl.front();
1926   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1927   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1928 
1929   // Check for llvm.access.group metadata attached to the printf
1930   // function in the loop body.
1931   BasicBlock *LoopBody = CLI->getBody();
1932   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1933     return I.getMetadata("llvm.access.group") != nullptr;
1934   }));
1935 
1936   // Check if number of assumption instructions is equal to number of aligned
1937   // variables
1938   BasicBlock *LoopPreheader = CLI->getPreheader();
1939   size_t NumAssummptionCallsInPreheader = count_if(
1940       *LoopPreheader, [](Instruction &I) { return isa<AssumeInst>(I); });
1941   EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size());
1942 
1943   // Check if variables are correctly aligned
1944   for (Instruction &Instr : *LoopPreheader) {
1945     if (!isa<AssumeInst>(Instr))
1946       continue;
1947     AssumeInst *AssumeInstruction = cast<AssumeInst>(&Instr);
1948     if (AssumeInstruction->getNumTotalBundleOperands()) {
1949       auto Bundle = AssumeInstruction->getOperandBundleAt(0);
1950       if (Bundle.getTagName() == "align") {
1951         EXPECT_TRUE(isa<ConstantInt>(Bundle.Inputs[1]));
1952         auto ConstIntVal = dyn_cast<ConstantInt>(Bundle.Inputs[1]);
1953         EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue);
1954       }
1955     }
1956   }
1957 }
1958 TEST_F(OpenMPIRBuilderTest, ApplySimdlen) {
1959   OpenMPIRBuilder OMPBuilder(*M);
1960   MapVector<Value *, Value *> AlignedVars;
1961   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1962 
1963   // Simd-ize the loop.
1964   OMPBuilder.applySimd(CLI, AlignedVars,
1965                        /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
1966                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
1967                        /* Safelen */ nullptr);
1968 
1969   OMPBuilder.finalize();
1970   EXPECT_FALSE(verifyModule(*M, &errs()));
1971 
1972   PassBuilder PB;
1973   FunctionAnalysisManager FAM;
1974   PB.registerFunctionAnalyses(FAM);
1975   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
1976 
1977   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
1978   EXPECT_EQ(TopLvl.size(), 1u);
1979 
1980   Loop *L = TopLvl.front();
1981   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
1982   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
1983   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
1984 
1985   // Check for llvm.access.group metadata attached to the printf
1986   // function in the loop body.
1987   BasicBlock *LoopBody = CLI->getBody();
1988   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
1989     return I.getMetadata("llvm.access.group") != nullptr;
1990   }));
1991 }
1992 
1993 TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) {
1994   OpenMPIRBuilder OMPBuilder(*M);
1995   MapVector<Value *, Value *> AlignedVars;
1996 
1997   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
1998 
1999   // Simd-ize the loop.
2000   OMPBuilder.applySimd(
2001       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent,
2002       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2003 
2004   OMPBuilder.finalize();
2005   EXPECT_FALSE(verifyModule(*M, &errs()));
2006 
2007   PassBuilder PB;
2008   FunctionAnalysisManager FAM;
2009   PB.registerFunctionAnalyses(FAM);
2010   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2011 
2012   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2013   EXPECT_EQ(TopLvl.size(), 1u);
2014 
2015   Loop *L = TopLvl.front();
2016   // Parallel metadata shoudl be attached because of presence of
2017   // the order(concurrent) OpenMP clause
2018   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2019   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2020   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2021 
2022   // Check for llvm.access.group metadata attached to the printf
2023   // function in the loop body.
2024   BasicBlock *LoopBody = CLI->getBody();
2025   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2026     return I.getMetadata("llvm.access.group") != nullptr;
2027   }));
2028 }
2029 
2030 TEST_F(OpenMPIRBuilderTest, ApplySafelen) {
2031   OpenMPIRBuilder OMPBuilder(*M);
2032   MapVector<Value *, Value *> AlignedVars;
2033 
2034   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2035 
2036   OMPBuilder.applySimd(
2037       CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown,
2038       /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2039 
2040   OMPBuilder.finalize();
2041   EXPECT_FALSE(verifyModule(*M, &errs()));
2042 
2043   PassBuilder PB;
2044   FunctionAnalysisManager FAM;
2045   PB.registerFunctionAnalyses(FAM);
2046   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2047 
2048   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2049   EXPECT_EQ(TopLvl.size(), 1u);
2050 
2051   Loop *L = TopLvl.front();
2052   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2053   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2054   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2055 
2056   // Check for llvm.access.group metadata attached to the printf
2057   // function in the loop body.
2058   BasicBlock *LoopBody = CLI->getBody();
2059   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2060     return I.getMetadata("llvm.access.group") != nullptr;
2061   }));
2062 }
2063 
2064 TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) {
2065   OpenMPIRBuilder OMPBuilder(*M);
2066   MapVector<Value *, Value *> AlignedVars;
2067 
2068   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2069 
2070   OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr,
2071                        OrderKind::OMP_ORDER_unknown,
2072                        ConstantInt::get(Type::getInt32Ty(Ctx), 2),
2073                        ConstantInt::get(Type::getInt32Ty(Ctx), 3));
2074 
2075   OMPBuilder.finalize();
2076   EXPECT_FALSE(verifyModule(*M, &errs()));
2077 
2078   PassBuilder PB;
2079   FunctionAnalysisManager FAM;
2080   PB.registerFunctionAnalyses(FAM);
2081   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2082 
2083   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2084   EXPECT_EQ(TopLvl.size(), 1u);
2085 
2086   Loop *L = TopLvl.front();
2087   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2088   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2089   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 2);
2090 
2091   // Check for llvm.access.group metadata attached to the printf
2092   // function in the loop body.
2093   BasicBlock *LoopBody = CLI->getBody();
2094   EXPECT_FALSE(any_of(*LoopBody, [](Instruction &I) {
2095     return I.getMetadata("llvm.access.group") != nullptr;
2096   }));
2097 }
2098 
2099 TEST_F(OpenMPIRBuilderTest, ApplySimdIf) {
2100   OpenMPIRBuilder OMPBuilder(*M);
2101   IRBuilder<> Builder(BB);
2102   MapVector<Value *, Value *> AlignedVars;
2103   AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty());
2104   AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty());
2105 
2106   // Generation of if condition
2107   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), Alloc1);
2108   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 1U), Alloc2);
2109   LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1);
2110   LoadInst *Load2 = Builder.CreateLoad(Alloc2->getAllocatedType(), Alloc2);
2111 
2112   Value *IfCmp = Builder.CreateICmpNE(Load1, Load2);
2113 
2114   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2115 
2116   // Simd-ize the loop with if condition
2117   OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown,
2118                        ConstantInt::get(Type::getInt32Ty(Ctx), 3),
2119                        /* Safelen */ nullptr);
2120 
2121   OMPBuilder.finalize();
2122   EXPECT_FALSE(verifyModule(*M, &errs()));
2123 
2124   PassBuilder PB;
2125   FunctionAnalysisManager FAM;
2126   PB.registerFunctionAnalyses(FAM);
2127   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2128 
2129   // Check if there are two loops (one with enabled vectorization)
2130   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2131   EXPECT_EQ(TopLvl.size(), 2u);
2132 
2133   Loop *L = TopLvl[0];
2134   EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2135   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2136   EXPECT_EQ(getIntLoopAttribute(L, "llvm.loop.vectorize.width"), 3);
2137 
2138   // The second loop should have disabled vectorization
2139   L = TopLvl[1];
2140   EXPECT_FALSE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses"));
2141   EXPECT_FALSE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable"));
2142   // Check for llvm.access.group metadata attached to the printf
2143   // function in the loop body.
2144   BasicBlock *LoopBody = CLI->getBody();
2145   EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) {
2146     return I.getMetadata("llvm.access.group") != nullptr;
2147   }));
2148 }
2149 
2150 TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) {
2151   OpenMPIRBuilder OMPBuilder(*M);
2152 
2153   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2154 
2155   // Unroll the loop.
2156   OMPBuilder.unrollLoopFull(DL, CLI);
2157 
2158   OMPBuilder.finalize();
2159   EXPECT_FALSE(verifyModule(*M, &errs()));
2160 
2161   PassBuilder PB;
2162   FunctionAnalysisManager FAM;
2163   PB.registerFunctionAnalyses(FAM);
2164   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2165 
2166   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2167   EXPECT_EQ(TopLvl.size(), 1u);
2168 
2169   Loop *L = TopLvl.front();
2170   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2171   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.full"));
2172 }
2173 
2174 TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) {
2175   OpenMPIRBuilder OMPBuilder(*M);
2176   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2177 
2178   // Unroll the loop.
2179   CanonicalLoopInfo *UnrolledLoop = nullptr;
2180   OMPBuilder.unrollLoopPartial(DL, CLI, 5, &UnrolledLoop);
2181   ASSERT_NE(UnrolledLoop, nullptr);
2182 
2183   OMPBuilder.finalize();
2184   EXPECT_FALSE(verifyModule(*M, &errs()));
2185   UnrolledLoop->assertOK();
2186 
2187   PassBuilder PB;
2188   FunctionAnalysisManager FAM;
2189   PB.registerFunctionAnalyses(FAM);
2190   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2191 
2192   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2193   EXPECT_EQ(TopLvl.size(), 1u);
2194   Loop *Outer = TopLvl.front();
2195   EXPECT_EQ(Outer->getHeader(), UnrolledLoop->getHeader());
2196   EXPECT_EQ(Outer->getLoopLatch(), UnrolledLoop->getLatch());
2197   EXPECT_EQ(Outer->getExitingBlock(), UnrolledLoop->getCond());
2198   EXPECT_EQ(Outer->getExitBlock(), UnrolledLoop->getExit());
2199 
2200   EXPECT_EQ(Outer->getSubLoops().size(), 1u);
2201   Loop *Inner = Outer->getSubLoops().front();
2202 
2203   EXPECT_TRUE(getBooleanLoopAttribute(Inner, "llvm.loop.unroll.enable"));
2204   EXPECT_EQ(getIntLoopAttribute(Inner, "llvm.loop.unroll.count"), 5);
2205 }
2206 
2207 TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) {
2208   OpenMPIRBuilder OMPBuilder(*M);
2209 
2210   CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32);
2211 
2212   // Unroll the loop.
2213   OMPBuilder.unrollLoopHeuristic(DL, CLI);
2214 
2215   OMPBuilder.finalize();
2216   EXPECT_FALSE(verifyModule(*M, &errs()));
2217 
2218   PassBuilder PB;
2219   FunctionAnalysisManager FAM;
2220   PB.registerFunctionAnalyses(FAM);
2221   LoopInfo &LI = FAM.getResult<LoopAnalysis>(*F);
2222 
2223   const std::vector<Loop *> &TopLvl = LI.getTopLevelLoops();
2224   EXPECT_EQ(TopLvl.size(), 1u);
2225 
2226   Loop *L = TopLvl.front();
2227   EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"));
2228 }
2229 
2230 TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) {
2231   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2232   std::string oldDLStr = M->getDataLayoutStr();
2233   M->setDataLayout(
2234       "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:"
2235       "256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:"
2236       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
2237   OpenMPIRBuilder OMPBuilder(*M);
2238   OMPBuilder.Config.IsTargetDevice = true;
2239   OMPBuilder.initialize();
2240   IRBuilder<> Builder(BB);
2241   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2242   InsertPointTy AllocaIP = Builder.saveIP();
2243 
2244   Type *LCTy = Type::getInt32Ty(Ctx);
2245   Value *StartVal = ConstantInt::get(LCTy, 10);
2246   Value *StopVal = ConstantInt::get(LCTy, 52);
2247   Value *StepVal = ConstantInt::get(LCTy, 2);
2248   auto LoopBodyGen = [&](InsertPointTy, Value *) {};
2249 
2250   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2251       Loc, LoopBodyGen, StartVal, StopVal, StepVal, false, false);
2252   BasicBlock *Preheader = CLI->getPreheader();
2253   Value *TripCount = CLI->getTripCount();
2254 
2255   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2256 
2257   IRBuilder<>::InsertPoint AfterIP = OMPBuilder.applyWorkshareLoop(
2258       DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static, nullptr, false, false,
2259       false, false, WorksharingLoopType::ForStaticLoop);
2260   Builder.restoreIP(AfterIP);
2261   Builder.CreateRetVoid();
2262 
2263   OMPBuilder.finalize();
2264   EXPECT_FALSE(verifyModule(*M, &errs()));
2265 
2266   CallInst *WorkshareLoopRuntimeCall = nullptr;
2267   int WorkshareLoopRuntimeCallCnt = 0;
2268   for (auto Inst = Preheader->begin(); Inst != Preheader->end(); ++Inst) {
2269     CallInst *Call = dyn_cast<CallInst>(Inst);
2270     if (!Call)
2271       continue;
2272     if (!Call->getCalledFunction())
2273       continue;
2274 
2275     if (Call->getCalledFunction()->getName() == "__kmpc_for_static_loop_4u") {
2276       WorkshareLoopRuntimeCall = Call;
2277       WorkshareLoopRuntimeCallCnt++;
2278     }
2279   }
2280   EXPECT_NE(WorkshareLoopRuntimeCall, nullptr);
2281   // Verify that there is only one call to workshare loop function
2282   EXPECT_EQ(WorkshareLoopRuntimeCallCnt, 1);
2283   // Check that pointer to loop body function is passed as second argument
2284   Value *LoopBodyFuncArg = WorkshareLoopRuntimeCall->getArgOperand(1);
2285   EXPECT_EQ(Builder.getPtrTy(), LoopBodyFuncArg->getType());
2286   Function *ArgFunction = dyn_cast<Function>(LoopBodyFuncArg);
2287   EXPECT_NE(ArgFunction, nullptr);
2288   EXPECT_EQ(ArgFunction->arg_size(), 1u);
2289   EXPECT_EQ(ArgFunction->getArg(0)->getType(), TripCount->getType());
2290   // Check that no variables except for loop counter are used in loop body
2291   EXPECT_EQ(Constant::getNullValue(Builder.getPtrTy()),
2292             WorkshareLoopRuntimeCall->getArgOperand(2));
2293   // Check loop trip count argument
2294   EXPECT_EQ(TripCount, WorkshareLoopRuntimeCall->getArgOperand(3));
2295 }
2296 
2297 TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
2298   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2299   OpenMPIRBuilder OMPBuilder(*M);
2300   OMPBuilder.Config.IsTargetDevice = false;
2301   OMPBuilder.initialize();
2302   IRBuilder<> Builder(BB);
2303   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2304 
2305   Type *LCTy = Type::getInt32Ty(Ctx);
2306   Value *StartVal = ConstantInt::get(LCTy, 10);
2307   Value *StopVal = ConstantInt::get(LCTy, 52);
2308   Value *StepVal = ConstantInt::get(LCTy, 2);
2309   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2310 
2311   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2312       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2313       /*IsSigned=*/false, /*InclusiveStop=*/false);
2314   BasicBlock *Preheader = CLI->getPreheader();
2315   BasicBlock *Body = CLI->getBody();
2316   Value *IV = CLI->getIndVar();
2317   BasicBlock *ExitBlock = CLI->getExit();
2318 
2319   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2320   InsertPointTy AllocaIP = Builder.saveIP();
2321 
2322   OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
2323                                 OMP_SCHEDULE_Static);
2324 
2325   BasicBlock *Cond = Body->getSinglePredecessor();
2326   Instruction *Cmp = &*Cond->begin();
2327   Value *TripCount = Cmp->getOperand(1);
2328 
2329   auto AllocaIter = BB->begin();
2330   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2331   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2332   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2333   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2334   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2335   EXPECT_NE(PLastIter, nullptr);
2336   EXPECT_NE(PLowerBound, nullptr);
2337   EXPECT_NE(PUpperBound, nullptr);
2338   EXPECT_NE(PStride, nullptr);
2339 
2340   auto PreheaderIter = Preheader->begin();
2341   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 7);
2342   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2343   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2344   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2345   ASSERT_NE(LowerBoundStore, nullptr);
2346   ASSERT_NE(UpperBoundStore, nullptr);
2347   ASSERT_NE(StrideStore, nullptr);
2348 
2349   auto *OrigLowerBound =
2350       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2351   auto *OrigUpperBound =
2352       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2353   auto *OrigStride = dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2354   ASSERT_NE(OrigLowerBound, nullptr);
2355   ASSERT_NE(OrigUpperBound, nullptr);
2356   ASSERT_NE(OrigStride, nullptr);
2357   EXPECT_EQ(OrigLowerBound->getValue(), 0);
2358   EXPECT_EQ(OrigUpperBound->getValue(), 20);
2359   EXPECT_EQ(OrigStride->getValue(), 1);
2360 
2361   // Check that the loop IV is updated to account for the lower bound returned
2362   // by the OpenMP runtime call.
2363   BinaryOperator *Add = dyn_cast<BinaryOperator>(&Body->front());
2364   EXPECT_EQ(Add->getOperand(0), IV);
2365   auto *LoadedLowerBound = dyn_cast<LoadInst>(Add->getOperand(1));
2366   ASSERT_NE(LoadedLowerBound, nullptr);
2367   EXPECT_EQ(LoadedLowerBound->getPointerOperand(), PLowerBound);
2368 
2369   // Check that the trip count is updated to account for the lower and upper
2370   // bounds return by the OpenMP runtime call.
2371   auto *AddOne = dyn_cast<Instruction>(TripCount);
2372   ASSERT_NE(AddOne, nullptr);
2373   ASSERT_TRUE(AddOne->isBinaryOp());
2374   auto *One = dyn_cast<ConstantInt>(AddOne->getOperand(1));
2375   ASSERT_NE(One, nullptr);
2376   EXPECT_EQ(One->getValue(), 1);
2377   auto *Difference = dyn_cast<Instruction>(AddOne->getOperand(0));
2378   ASSERT_NE(Difference, nullptr);
2379   ASSERT_TRUE(Difference->isBinaryOp());
2380   EXPECT_EQ(Difference->getOperand(1), LoadedLowerBound);
2381   auto *LoadedUpperBound = dyn_cast<LoadInst>(Difference->getOperand(0));
2382   ASSERT_NE(LoadedUpperBound, nullptr);
2383   EXPECT_EQ(LoadedUpperBound->getPointerOperand(), PUpperBound);
2384 
2385   // The original loop iterator should only be used in the condition, in the
2386   // increment and in the statement that adds the lower bound to it.
2387   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2388 
2389   // The exit block should contain the "fini" call and the barrier call,
2390   // plus the call to obtain the thread ID.
2391   size_t NumCallsInExitBlock =
2392       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2393   EXPECT_EQ(NumCallsInExitBlock, 3u);
2394 }
2395 
2396 TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
2397   unsigned IVBits = GetParam();
2398 
2399   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2400   OpenMPIRBuilder OMPBuilder(*M);
2401   OMPBuilder.Config.IsTargetDevice = false;
2402 
2403   BasicBlock *Body;
2404   CallInst *Call;
2405   CanonicalLoopInfo *CLI =
2406       buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body);
2407 
2408   Instruction *OrigIndVar = CLI->getIndVar();
2409   EXPECT_EQ(Call->getOperand(1), OrigIndVar);
2410 
2411   Type *LCTy = Type::getInt32Ty(Ctx);
2412   Value *ChunkSize = ConstantInt::get(LCTy, 5);
2413   InsertPointTy AllocaIP{&F->getEntryBlock(),
2414                          F->getEntryBlock().getFirstInsertionPt()};
2415   OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
2416                                 OMP_SCHEDULE_Static, ChunkSize);
2417 
2418   OMPBuilder.finalize();
2419   EXPECT_FALSE(verifyModule(*M, &errs()));
2420 
2421   BasicBlock *Entry = &F->getEntryBlock();
2422   BasicBlock *Preheader = Entry->getSingleSuccessor();
2423 
2424   BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor();
2425   BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor();
2426   BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor();
2427   BasicBlock *DispatchBody = succ_begin(DispatchCond)[0];
2428   BasicBlock *DispatchExit = succ_begin(DispatchCond)[1];
2429   BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor();
2430   BasicBlock *Return = DispatchAfter->getSingleSuccessor();
2431 
2432   BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor();
2433   BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor();
2434   BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor();
2435   BasicBlock *ChunkBody = succ_begin(ChunkCond)[0];
2436   BasicBlock *ChunkExit = succ_begin(ChunkCond)[1];
2437   BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor();
2438   BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor();
2439 
2440   BasicBlock *DispatchInc = ChunkAfter;
2441 
2442   EXPECT_EQ(ChunkBody, Body);
2443   EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader);
2444   EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader);
2445 
2446   EXPECT_TRUE(isa<ReturnInst>(Return->front()));
2447 
2448   Value *NewIV = Call->getOperand(1);
2449   EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits);
2450 
2451   CallInst *InitCall = findSingleCall(
2452       F,
2453       (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u
2454                     : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u,
2455       OMPBuilder);
2456   EXPECT_EQ(InitCall->getParent(), Preheader);
2457   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33);
2458   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1);
2459   EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5);
2460 
2461   CallInst *FiniCall = findSingleCall(
2462       F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder);
2463   EXPECT_EQ(FiniCall->getParent(), DispatchExit);
2464 
2465   CallInst *BarrierCall = findSingleCall(
2466       F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder);
2467   EXPECT_EQ(BarrierCall->getParent(), DispatchExit);
2468 }
2469 
2470 INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits,
2471                          ::testing::Values(8, 16, 32, 64));
2472 
2473 TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
2474   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2475   OpenMPIRBuilder OMPBuilder(*M);
2476   OMPBuilder.Config.IsTargetDevice = false;
2477   OMPBuilder.initialize();
2478   IRBuilder<> Builder(BB);
2479   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2480 
2481   omp::OMPScheduleType SchedType = GetParam();
2482   uint32_t ChunkSize = 1;
2483   switch (SchedType & ~OMPScheduleType::ModifierMask) {
2484   case omp::OMPScheduleType::BaseDynamicChunked:
2485   case omp::OMPScheduleType::BaseGuidedChunked:
2486     ChunkSize = 7;
2487     break;
2488   case omp::OMPScheduleType::BaseAuto:
2489   case omp::OMPScheduleType::BaseRuntime:
2490     ChunkSize = 1;
2491     break;
2492   default:
2493     assert(0 && "unknown type for this test");
2494     break;
2495   }
2496 
2497   Type *LCTy = Type::getInt32Ty(Ctx);
2498   Value *StartVal = ConstantInt::get(LCTy, 10);
2499   Value *StopVal = ConstantInt::get(LCTy, 52);
2500   Value *StepVal = ConstantInt::get(LCTy, 2);
2501   Value *ChunkVal =
2502       (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize);
2503   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2504 
2505   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2506       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2507       /*IsSigned=*/false, /*InclusiveStop=*/false);
2508 
2509   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2510   InsertPointTy AllocaIP = Builder.saveIP();
2511 
2512   // Collect all the info from CLI, as it isn't usable after the call to
2513   // createDynamicWorkshareLoop.
2514   InsertPointTy AfterIP = CLI->getAfterIP();
2515   BasicBlock *Preheader = CLI->getPreheader();
2516   BasicBlock *ExitBlock = CLI->getExit();
2517   BasicBlock *LatchBlock = CLI->getLatch();
2518   Value *IV = CLI->getIndVar();
2519 
2520   InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
2521       DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType),
2522       ChunkVal, /*Simd=*/false,
2523       (SchedType & omp::OMPScheduleType::ModifierMonotonic) ==
2524           omp::OMPScheduleType::ModifierMonotonic,
2525       (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) ==
2526           omp::OMPScheduleType::ModifierNonmonotonic,
2527       /*Ordered=*/false);
2528 
2529   // The returned value should be the "after" point.
2530   ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
2531   ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
2532 
2533   auto AllocaIter = BB->begin();
2534   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
2535   AllocaInst *PLastIter = dyn_cast<AllocaInst>(&*(AllocaIter++));
2536   AllocaInst *PLowerBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2537   AllocaInst *PUpperBound = dyn_cast<AllocaInst>(&*(AllocaIter++));
2538   AllocaInst *PStride = dyn_cast<AllocaInst>(&*(AllocaIter++));
2539   EXPECT_NE(PLastIter, nullptr);
2540   EXPECT_NE(PLowerBound, nullptr);
2541   EXPECT_NE(PUpperBound, nullptr);
2542   EXPECT_NE(PStride, nullptr);
2543 
2544   auto PreheaderIter = Preheader->begin();
2545   ASSERT_GE(std::distance(Preheader->begin(), Preheader->end()), 6);
2546   StoreInst *LowerBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2547   StoreInst *UpperBoundStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2548   StoreInst *StrideStore = dyn_cast<StoreInst>(&*(PreheaderIter++));
2549   ASSERT_NE(LowerBoundStore, nullptr);
2550   ASSERT_NE(UpperBoundStore, nullptr);
2551   ASSERT_NE(StrideStore, nullptr);
2552 
2553   CallInst *ThreadIdCall = dyn_cast<CallInst>(&*(PreheaderIter++));
2554   ASSERT_NE(ThreadIdCall, nullptr);
2555   EXPECT_EQ(ThreadIdCall->getCalledFunction()->getName(),
2556             "__kmpc_global_thread_num");
2557 
2558   CallInst *InitCall = dyn_cast<CallInst>(&*PreheaderIter);
2559 
2560   ASSERT_NE(InitCall, nullptr);
2561   EXPECT_EQ(InitCall->getCalledFunction()->getName(),
2562             "__kmpc_dispatch_init_4u");
2563   EXPECT_EQ(InitCall->arg_size(), 7U);
2564   EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize));
2565   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2566   if ((SchedType & OMPScheduleType::MonotonicityMask) ==
2567       OMPScheduleType::None) {
2568     // Implementation is allowed to add default nonmonotonicity flag
2569     EXPECT_EQ(
2570         static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) |
2571             OMPScheduleType::ModifierNonmonotonic,
2572         SchedType | OMPScheduleType::ModifierNonmonotonic);
2573   } else {
2574     EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()),
2575               SchedType);
2576   }
2577 
2578   ConstantInt *OrigLowerBound =
2579       dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
2580   ConstantInt *OrigUpperBound =
2581       dyn_cast<ConstantInt>(UpperBoundStore->getValueOperand());
2582   ConstantInt *OrigStride =
2583       dyn_cast<ConstantInt>(StrideStore->getValueOperand());
2584   ASSERT_NE(OrigLowerBound, nullptr);
2585   ASSERT_NE(OrigUpperBound, nullptr);
2586   ASSERT_NE(OrigStride, nullptr);
2587   EXPECT_EQ(OrigLowerBound->getValue(), 1);
2588   EXPECT_EQ(OrigUpperBound->getValue(), 21);
2589   EXPECT_EQ(OrigStride->getValue(), 1);
2590 
2591   CallInst *FiniCall = dyn_cast<CallInst>(
2592       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2593   EXPECT_EQ(FiniCall, nullptr);
2594 
2595   // The original loop iterator should only be used in the condition, in the
2596   // increment and in the statement that adds the lower bound to it.
2597   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2598 
2599   // The exit block should contain the barrier call, plus the call to obtain
2600   // the thread ID.
2601   size_t NumCallsInExitBlock =
2602       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2603   EXPECT_EQ(NumCallsInExitBlock, 2u);
2604 
2605   // Add a termination to our block and check that it is internally consistent.
2606   Builder.restoreIP(EndIP);
2607   Builder.CreateRetVoid();
2608   OMPBuilder.finalize();
2609   EXPECT_FALSE(verifyModule(*M, &errs()));
2610 }
2611 
2612 INSTANTIATE_TEST_SUITE_P(
2613     OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams,
2614     ::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked,
2615                       omp::OMPScheduleType::UnorderedGuidedChunked,
2616                       omp::OMPScheduleType::UnorderedAuto,
2617                       omp::OMPScheduleType::UnorderedRuntime,
2618                       omp::OMPScheduleType::UnorderedDynamicChunked |
2619                           omp::OMPScheduleType::ModifierMonotonic,
2620                       omp::OMPScheduleType::UnorderedDynamicChunked |
2621                           omp::OMPScheduleType::ModifierNonmonotonic,
2622                       omp::OMPScheduleType::UnorderedGuidedChunked |
2623                           omp::OMPScheduleType::ModifierMonotonic,
2624                       omp::OMPScheduleType::UnorderedGuidedChunked |
2625                           omp::OMPScheduleType::ModifierNonmonotonic,
2626                       omp::OMPScheduleType::UnorderedAuto |
2627                           omp::OMPScheduleType::ModifierMonotonic,
2628                       omp::OMPScheduleType::UnorderedRuntime |
2629                           omp::OMPScheduleType::ModifierMonotonic));
2630 
2631 TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
2632   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2633   OpenMPIRBuilder OMPBuilder(*M);
2634   OMPBuilder.Config.IsTargetDevice = false;
2635   OMPBuilder.initialize();
2636   IRBuilder<> Builder(BB);
2637   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2638 
2639   uint32_t ChunkSize = 1;
2640   Type *LCTy = Type::getInt32Ty(Ctx);
2641   Value *StartVal = ConstantInt::get(LCTy, 10);
2642   Value *StopVal = ConstantInt::get(LCTy, 52);
2643   Value *StepVal = ConstantInt::get(LCTy, 2);
2644   Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
2645   auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2646 
2647   CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2648       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2649       /*IsSigned=*/false, /*InclusiveStop=*/false);
2650 
2651   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2652   InsertPointTy AllocaIP = Builder.saveIP();
2653 
2654   // Collect all the info from CLI, as it isn't usable after the call to
2655   // createDynamicWorkshareLoop.
2656   BasicBlock *Preheader = CLI->getPreheader();
2657   BasicBlock *ExitBlock = CLI->getExit();
2658   BasicBlock *LatchBlock = CLI->getLatch();
2659   Value *IV = CLI->getIndVar();
2660 
2661   InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
2662       DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkVal,
2663       /*HasSimdModifier=*/false, /*HasMonotonicModifier=*/false,
2664       /*HasNonmonotonicModifier=*/false,
2665       /*HasOrderedClause=*/true);
2666 
2667   // Add a termination to our block and check that it is internally consistent.
2668   Builder.restoreIP(EndIP);
2669   Builder.CreateRetVoid();
2670   OMPBuilder.finalize();
2671   EXPECT_FALSE(verifyModule(*M, &errs()));
2672 
2673   CallInst *InitCall = nullptr;
2674   for (Instruction &EI : *Preheader) {
2675     Instruction *Cur = &EI;
2676     if (isa<CallInst>(Cur)) {
2677       InitCall = cast<CallInst>(Cur);
2678       if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u")
2679         break;
2680       InitCall = nullptr;
2681     }
2682   }
2683   EXPECT_NE(InitCall, nullptr);
2684   EXPECT_EQ(InitCall->arg_size(), 7U);
2685   ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2686   EXPECT_EQ(SchedVal->getValue(),
2687             static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked));
2688 
2689   CallInst *FiniCall = dyn_cast<CallInst>(
2690       &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2691   ASSERT_NE(FiniCall, nullptr);
2692   EXPECT_EQ(FiniCall->getCalledFunction()->getName(),
2693             "__kmpc_dispatch_fini_4u");
2694   EXPECT_EQ(FiniCall->arg_size(), 2U);
2695   EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0));
2696   EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1));
2697 
2698   // The original loop iterator should only be used in the condition, in the
2699   // increment and in the statement that adds the lower bound to it.
2700   EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2701 
2702   // The exit block should contain the barrier call, plus the call to obtain
2703   // the thread ID.
2704   size_t NumCallsInExitBlock =
2705       count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2706   EXPECT_EQ(NumCallsInExitBlock, 2u);
2707 }
2708 
2709 TEST_F(OpenMPIRBuilderTest, MasterDirective) {
2710   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2711   OpenMPIRBuilder OMPBuilder(*M);
2712   OMPBuilder.initialize();
2713   F->setName("func");
2714   IRBuilder<> Builder(BB);
2715 
2716   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2717 
2718   AllocaInst *PrivAI = nullptr;
2719 
2720   BasicBlock *EntryBB = nullptr;
2721   BasicBlock *ThenBB = nullptr;
2722 
2723   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2724     if (AllocaIP.isSet())
2725       Builder.restoreIP(AllocaIP);
2726     else
2727       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2728     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2729     Builder.CreateStore(F->arg_begin(), PrivAI);
2730 
2731     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2732     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2733     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2734 
2735     Builder.restoreIP(CodeGenIP);
2736 
2737     // collect some info for checks later
2738     ThenBB = Builder.GetInsertBlock();
2739     EntryBB = ThenBB->getUniquePredecessor();
2740 
2741     // simple instructions for body
2742     Value *PrivLoad =
2743         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2744     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2745   };
2746 
2747   auto FiniCB = [&](InsertPointTy IP) {
2748     BasicBlock *IPBB = IP.getBlock();
2749     EXPECT_NE(IPBB->end(), IP.getPoint());
2750   };
2751 
2752   Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
2753   Value *EntryBBTI = EntryBB->getTerminator();
2754   EXPECT_NE(EntryBBTI, nullptr);
2755   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2756   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2757   EXPECT_TRUE(EntryBr->isConditional());
2758   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2759   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2760   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2761 
2762   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2763   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2764 
2765   CallInst *MasterEntryCI = cast<CallInst>(CondInst->getOperand(0));
2766   EXPECT_EQ(MasterEntryCI->arg_size(), 2U);
2767   EXPECT_EQ(MasterEntryCI->getCalledFunction()->getName(), "__kmpc_master");
2768   EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0)));
2769 
2770   CallInst *MasterEndCI = nullptr;
2771   for (auto &FI : *ThenBB) {
2772     Instruction *cur = &FI;
2773     if (isa<CallInst>(cur)) {
2774       MasterEndCI = cast<CallInst>(cur);
2775       if (MasterEndCI->getCalledFunction()->getName() == "__kmpc_end_master")
2776         break;
2777       MasterEndCI = nullptr;
2778     }
2779   }
2780   EXPECT_NE(MasterEndCI, nullptr);
2781   EXPECT_EQ(MasterEndCI->arg_size(), 2U);
2782   EXPECT_TRUE(isa<GlobalVariable>(MasterEndCI->getArgOperand(0)));
2783   EXPECT_EQ(MasterEndCI->getArgOperand(1), MasterEntryCI->getArgOperand(1));
2784 }
2785 
2786 TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
2787   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2788   OpenMPIRBuilder OMPBuilder(*M);
2789   OMPBuilder.initialize();
2790   F->setName("func");
2791   IRBuilder<> Builder(BB);
2792 
2793   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2794 
2795   AllocaInst *PrivAI = nullptr;
2796 
2797   BasicBlock *EntryBB = nullptr;
2798   BasicBlock *ThenBB = nullptr;
2799 
2800   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2801     if (AllocaIP.isSet())
2802       Builder.restoreIP(AllocaIP);
2803     else
2804       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
2805     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2806     Builder.CreateStore(F->arg_begin(), PrivAI);
2807 
2808     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2809     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2810     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2811 
2812     Builder.restoreIP(CodeGenIP);
2813 
2814     // collect some info for checks later
2815     ThenBB = Builder.GetInsertBlock();
2816     EntryBB = ThenBB->getUniquePredecessor();
2817 
2818     // simple instructions for body
2819     Value *PrivLoad =
2820         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2821     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2822   };
2823 
2824   auto FiniCB = [&](InsertPointTy IP) {
2825     BasicBlock *IPBB = IP.getBlock();
2826     EXPECT_NE(IPBB->end(), IP.getPoint());
2827   };
2828 
2829   Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
2830   Builder.restoreIP(
2831       OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, Filter));
2832   Value *EntryBBTI = EntryBB->getTerminator();
2833   EXPECT_NE(EntryBBTI, nullptr);
2834   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
2835   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
2836   EXPECT_TRUE(EntryBr->isConditional());
2837   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
2838   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
2839   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
2840 
2841   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
2842   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
2843 
2844   CallInst *MaskedEntryCI = cast<CallInst>(CondInst->getOperand(0));
2845   EXPECT_EQ(MaskedEntryCI->arg_size(), 3U);
2846   EXPECT_EQ(MaskedEntryCI->getCalledFunction()->getName(), "__kmpc_masked");
2847   EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0)));
2848 
2849   CallInst *MaskedEndCI = nullptr;
2850   for (auto &FI : *ThenBB) {
2851     Instruction *cur = &FI;
2852     if (isa<CallInst>(cur)) {
2853       MaskedEndCI = cast<CallInst>(cur);
2854       if (MaskedEndCI->getCalledFunction()->getName() == "__kmpc_end_masked")
2855         break;
2856       MaskedEndCI = nullptr;
2857     }
2858   }
2859   EXPECT_NE(MaskedEndCI, nullptr);
2860   EXPECT_EQ(MaskedEndCI->arg_size(), 2U);
2861   EXPECT_TRUE(isa<GlobalVariable>(MaskedEndCI->getArgOperand(0)));
2862   EXPECT_EQ(MaskedEndCI->getArgOperand(1), MaskedEntryCI->getArgOperand(1));
2863 }
2864 
2865 TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
2866   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2867   OpenMPIRBuilder OMPBuilder(*M);
2868   OMPBuilder.initialize();
2869   F->setName("func");
2870   IRBuilder<> Builder(BB);
2871 
2872   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2873 
2874   AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
2875 
2876   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
2877     // actual start for bodyCB
2878     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
2879     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
2880     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
2881 
2882     // body begin
2883     Builder.restoreIP(CodeGenIP);
2884     Builder.CreateStore(F->arg_begin(), PrivAI);
2885     Value *PrivLoad =
2886         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
2887     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
2888   };
2889 
2890   auto FiniCB = [&](InsertPointTy IP) {
2891     BasicBlock *IPBB = IP.getBlock();
2892     EXPECT_NE(IPBB->end(), IP.getPoint());
2893   };
2894   BasicBlock *EntryBB = Builder.GetInsertBlock();
2895 
2896   Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB,
2897                                               "testCRT", nullptr));
2898 
2899   CallInst *CriticalEntryCI = nullptr;
2900   for (auto &EI : *EntryBB) {
2901     Instruction *cur = &EI;
2902     if (isa<CallInst>(cur)) {
2903       CriticalEntryCI = cast<CallInst>(cur);
2904       if (CriticalEntryCI->getCalledFunction()->getName() == "__kmpc_critical")
2905         break;
2906       CriticalEntryCI = nullptr;
2907     }
2908   }
2909   EXPECT_NE(CriticalEntryCI, nullptr);
2910   EXPECT_EQ(CriticalEntryCI->arg_size(), 3U);
2911   EXPECT_EQ(CriticalEntryCI->getCalledFunction()->getName(), "__kmpc_critical");
2912   EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0)));
2913 
2914   CallInst *CriticalEndCI = nullptr;
2915   for (auto &FI : *EntryBB) {
2916     Instruction *cur = &FI;
2917     if (isa<CallInst>(cur)) {
2918       CriticalEndCI = cast<CallInst>(cur);
2919       if (CriticalEndCI->getCalledFunction()->getName() ==
2920           "__kmpc_end_critical")
2921         break;
2922       CriticalEndCI = nullptr;
2923     }
2924   }
2925   EXPECT_NE(CriticalEndCI, nullptr);
2926   EXPECT_EQ(CriticalEndCI->arg_size(), 3U);
2927   EXPECT_TRUE(isa<GlobalVariable>(CriticalEndCI->getArgOperand(0)));
2928   EXPECT_EQ(CriticalEndCI->getArgOperand(1), CriticalEntryCI->getArgOperand(1));
2929   PointerType *CriticalNamePtrTy =
2930       PointerType::getUnqual(ArrayType::get(Type::getInt32Ty(Ctx), 8));
2931   EXPECT_EQ(CriticalEndCI->getArgOperand(2), CriticalEntryCI->getArgOperand(2));
2932   GlobalVariable *GV =
2933       dyn_cast<GlobalVariable>(CriticalEndCI->getArgOperand(2));
2934   ASSERT_NE(GV, nullptr);
2935   EXPECT_EQ(GV->getType(), CriticalNamePtrTy);
2936   const DataLayout &DL = M->getDataLayout();
2937   const llvm::Align TypeAlign = DL.getABITypeAlign(CriticalNamePtrTy);
2938   const llvm::Align PtrAlign = DL.getPointerABIAlignment(GV->getAddressSpace());
2939   if (const llvm::MaybeAlign Alignment = GV->getAlign())
2940     EXPECT_EQ(*Alignment, std::max(TypeAlign, PtrAlign));
2941 }
2942 
2943 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
2944   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2945   OpenMPIRBuilder OMPBuilder(*M);
2946   OMPBuilder.initialize();
2947   F->setName("func");
2948   IRBuilder<> Builder(BB);
2949   LLVMContext &Ctx = M->getContext();
2950 
2951   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2952 
2953   InsertPointTy AllocaIP(&F->getEntryBlock(),
2954                          F->getEntryBlock().getFirstInsertionPt());
2955 
2956   unsigned NumLoops = 2;
2957   SmallVector<Value *, 2> StoreValues;
2958   Type *LCTy = Type::getInt64Ty(Ctx);
2959   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
2960   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
2961 
2962   // Test for "#omp ordered depend(source)"
2963   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
2964                                                    StoreValues, ".cnt.addr",
2965                                                    /*IsDependSource=*/true));
2966 
2967   Builder.CreateRetVoid();
2968   OMPBuilder.finalize();
2969   EXPECT_FALSE(verifyModule(*M, &errs()));
2970 
2971   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
2972   ASSERT_NE(AllocInst, nullptr);
2973   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
2974   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
2975   EXPECT_TRUE(
2976       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
2977 
2978   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
2979   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
2980     GetElementPtrInst *DependAddrGEPIter =
2981         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
2982     ASSERT_NE(DependAddrGEPIter, nullptr);
2983     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
2984     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
2985     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
2986     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
2987     ASSERT_NE(FirstIdx, nullptr);
2988     ASSERT_NE(SecondIdx, nullptr);
2989     EXPECT_EQ(FirstIdx->getValue(), 0);
2990     EXPECT_EQ(SecondIdx->getValue(), Iter);
2991     StoreInst *StoreValue =
2992         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
2993     ASSERT_NE(StoreValue, nullptr);
2994     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
2995     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
2996     EXPECT_EQ(StoreValue->getAlign(), Align(8));
2997     IterInst = dyn_cast<Instruction>(StoreValue);
2998   }
2999 
3000   GetElementPtrInst *DependBaseAddrGEP =
3001       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3002   ASSERT_NE(DependBaseAddrGEP, nullptr);
3003   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3004   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3005   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3006   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3007   ASSERT_NE(FirstIdx, nullptr);
3008   ASSERT_NE(SecondIdx, nullptr);
3009   EXPECT_EQ(FirstIdx->getValue(), 0);
3010   EXPECT_EQ(SecondIdx->getValue(), 0);
3011 
3012   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3013   ASSERT_NE(GTID, nullptr);
3014   EXPECT_EQ(GTID->arg_size(), 1U);
3015   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3016   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3017   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3018 
3019   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3020   ASSERT_NE(Depend, nullptr);
3021   EXPECT_EQ(Depend->arg_size(), 3U);
3022   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post");
3023   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3024   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3025   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3026 }
3027 
3028 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
3029   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3030   OpenMPIRBuilder OMPBuilder(*M);
3031   OMPBuilder.initialize();
3032   F->setName("func");
3033   IRBuilder<> Builder(BB);
3034   LLVMContext &Ctx = M->getContext();
3035 
3036   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3037 
3038   InsertPointTy AllocaIP(&F->getEntryBlock(),
3039                          F->getEntryBlock().getFirstInsertionPt());
3040 
3041   unsigned NumLoops = 2;
3042   SmallVector<Value *, 2> StoreValues;
3043   Type *LCTy = Type::getInt64Ty(Ctx);
3044   StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
3045   StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
3046 
3047   // Test for "#omp ordered depend(sink: vec)"
3048   Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
3049                                                    StoreValues, ".cnt.addr",
3050                                                    /*IsDependSource=*/false));
3051 
3052   Builder.CreateRetVoid();
3053   OMPBuilder.finalize();
3054   EXPECT_FALSE(verifyModule(*M, &errs()));
3055 
3056   AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
3057   ASSERT_NE(AllocInst, nullptr);
3058   ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
3059   EXPECT_EQ(ArrType->getNumElements(), NumLoops);
3060   EXPECT_TRUE(
3061       AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
3062 
3063   Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
3064   for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
3065     GetElementPtrInst *DependAddrGEPIter =
3066         dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3067     ASSERT_NE(DependAddrGEPIter, nullptr);
3068     EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
3069     EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
3070     auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
3071     auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
3072     ASSERT_NE(FirstIdx, nullptr);
3073     ASSERT_NE(SecondIdx, nullptr);
3074     EXPECT_EQ(FirstIdx->getValue(), 0);
3075     EXPECT_EQ(SecondIdx->getValue(), Iter);
3076     StoreInst *StoreValue =
3077         dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
3078     ASSERT_NE(StoreValue, nullptr);
3079     EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
3080     EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
3081     EXPECT_EQ(StoreValue->getAlign(), Align(8));
3082     IterInst = dyn_cast<Instruction>(StoreValue);
3083   }
3084 
3085   GetElementPtrInst *DependBaseAddrGEP =
3086       dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
3087   ASSERT_NE(DependBaseAddrGEP, nullptr);
3088   EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
3089   EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
3090   auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
3091   auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
3092   ASSERT_NE(FirstIdx, nullptr);
3093   ASSERT_NE(SecondIdx, nullptr);
3094   EXPECT_EQ(FirstIdx->getValue(), 0);
3095   EXPECT_EQ(SecondIdx->getValue(), 0);
3096 
3097   CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
3098   ASSERT_NE(GTID, nullptr);
3099   EXPECT_EQ(GTID->arg_size(), 1U);
3100   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
3101   EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
3102   EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
3103 
3104   CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
3105   ASSERT_NE(Depend, nullptr);
3106   EXPECT_EQ(Depend->arg_size(), 3U);
3107   EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait");
3108   EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
3109   EXPECT_EQ(Depend->getArgOperand(1), GTID);
3110   EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
3111 }
3112 
3113 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
3114   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3115   OpenMPIRBuilder OMPBuilder(*M);
3116   OMPBuilder.initialize();
3117   F->setName("func");
3118   IRBuilder<> Builder(BB);
3119 
3120   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3121 
3122   AllocaInst *PrivAI =
3123       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3124 
3125   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3126     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3127     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3128     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3129 
3130     Builder.restoreIP(CodeGenIP);
3131     Builder.CreateStore(F->arg_begin(), PrivAI);
3132     Value *PrivLoad =
3133         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3134     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3135   };
3136 
3137   auto FiniCB = [&](InsertPointTy IP) {
3138     BasicBlock *IPBB = IP.getBlock();
3139     EXPECT_NE(IPBB->end(), IP.getPoint());
3140   };
3141 
3142   // Test for "#omp ordered [threads]"
3143   BasicBlock *EntryBB = Builder.GetInsertBlock();
3144   Builder.restoreIP(
3145       OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true));
3146 
3147   Builder.CreateRetVoid();
3148   OMPBuilder.finalize();
3149   EXPECT_FALSE(verifyModule(*M, &errs()));
3150 
3151   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3152 
3153   CallInst *OrderedEntryCI = nullptr;
3154   for (auto &EI : *EntryBB) {
3155     Instruction *Cur = &EI;
3156     if (isa<CallInst>(Cur)) {
3157       OrderedEntryCI = cast<CallInst>(Cur);
3158       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3159         break;
3160       OrderedEntryCI = nullptr;
3161     }
3162   }
3163   EXPECT_NE(OrderedEntryCI, nullptr);
3164   EXPECT_EQ(OrderedEntryCI->arg_size(), 2U);
3165   EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
3166   EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
3167 
3168   CallInst *OrderedEndCI = nullptr;
3169   for (auto &FI : *EntryBB) {
3170     Instruction *Cur = &FI;
3171     if (isa<CallInst>(Cur)) {
3172       OrderedEndCI = cast<CallInst>(Cur);
3173       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3174         break;
3175       OrderedEndCI = nullptr;
3176     }
3177   }
3178   EXPECT_NE(OrderedEndCI, nullptr);
3179   EXPECT_EQ(OrderedEndCI->arg_size(), 2U);
3180   EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
3181   EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
3182 }
3183 
3184 TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
3185   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3186   OpenMPIRBuilder OMPBuilder(*M);
3187   OMPBuilder.initialize();
3188   F->setName("func");
3189   IRBuilder<> Builder(BB);
3190 
3191   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3192 
3193   AllocaInst *PrivAI =
3194       Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
3195 
3196   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3197     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3198     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3199     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3200 
3201     Builder.restoreIP(CodeGenIP);
3202     Builder.CreateStore(F->arg_begin(), PrivAI);
3203     Value *PrivLoad =
3204         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3205     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3206   };
3207 
3208   auto FiniCB = [&](InsertPointTy IP) {
3209     BasicBlock *IPBB = IP.getBlock();
3210     EXPECT_NE(IPBB->end(), IP.getPoint());
3211   };
3212 
3213   // Test for "#omp ordered simd"
3214   BasicBlock *EntryBB = Builder.GetInsertBlock();
3215   Builder.restoreIP(
3216       OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false));
3217 
3218   Builder.CreateRetVoid();
3219   OMPBuilder.finalize();
3220   EXPECT_FALSE(verifyModule(*M, &errs()));
3221 
3222   EXPECT_NE(EntryBB->getTerminator(), nullptr);
3223 
3224   CallInst *OrderedEntryCI = nullptr;
3225   for (auto &EI : *EntryBB) {
3226     Instruction *Cur = &EI;
3227     if (isa<CallInst>(Cur)) {
3228       OrderedEntryCI = cast<CallInst>(Cur);
3229       if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
3230         break;
3231       OrderedEntryCI = nullptr;
3232     }
3233   }
3234   EXPECT_EQ(OrderedEntryCI, nullptr);
3235 
3236   CallInst *OrderedEndCI = nullptr;
3237   for (auto &FI : *EntryBB) {
3238     Instruction *Cur = &FI;
3239     if (isa<CallInst>(Cur)) {
3240       OrderedEndCI = cast<CallInst>(Cur);
3241       if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
3242         break;
3243       OrderedEndCI = nullptr;
3244     }
3245   }
3246   EXPECT_EQ(OrderedEndCI, nullptr);
3247 }
3248 
3249 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
3250   OpenMPIRBuilder OMPBuilder(*M);
3251   OMPBuilder.initialize();
3252   F->setName("func");
3253   IRBuilder<> Builder(BB);
3254 
3255   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3256 
3257   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3258   AllocaInst *MasterAddress = Builder.CreateAlloca(Builder.getPtrTy());
3259   AllocaInst *PrivAddress = Builder.CreateAlloca(Builder.getPtrTy());
3260 
3261   BasicBlock *EntryBB = BB;
3262 
3263   OMPBuilder.createCopyinClauseBlocks(Builder.saveIP(), MasterAddress,
3264                                       PrivAddress, Int32, /*BranchtoEnd*/ true);
3265 
3266   BranchInst *EntryBr = dyn_cast_or_null<BranchInst>(EntryBB->getTerminator());
3267 
3268   EXPECT_NE(EntryBr, nullptr);
3269   EXPECT_TRUE(EntryBr->isConditional());
3270 
3271   BasicBlock *NotMasterBB = EntryBr->getSuccessor(0);
3272   BasicBlock *CopyinEnd = EntryBr->getSuccessor(1);
3273   CmpInst *CMP = dyn_cast_or_null<CmpInst>(EntryBr->getCondition());
3274 
3275   EXPECT_NE(CMP, nullptr);
3276   EXPECT_NE(NotMasterBB, nullptr);
3277   EXPECT_NE(CopyinEnd, nullptr);
3278 
3279   BranchInst *NotMasterBr =
3280       dyn_cast_or_null<BranchInst>(NotMasterBB->getTerminator());
3281   EXPECT_NE(NotMasterBr, nullptr);
3282   EXPECT_FALSE(NotMasterBr->isConditional());
3283   EXPECT_EQ(CopyinEnd, NotMasterBr->getSuccessor(0));
3284 }
3285 
3286 TEST_F(OpenMPIRBuilderTest, SingleDirective) {
3287   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3288   OpenMPIRBuilder OMPBuilder(*M);
3289   OMPBuilder.initialize();
3290   F->setName("func");
3291   IRBuilder<> Builder(BB);
3292 
3293   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3294 
3295   AllocaInst *PrivAI = nullptr;
3296 
3297   BasicBlock *EntryBB = nullptr;
3298   BasicBlock *ThenBB = nullptr;
3299 
3300   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3301     if (AllocaIP.isSet())
3302       Builder.restoreIP(AllocaIP);
3303     else
3304       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3305     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3306     Builder.CreateStore(F->arg_begin(), PrivAI);
3307 
3308     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3309     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3310     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3311 
3312     Builder.restoreIP(CodeGenIP);
3313 
3314     // collect some info for checks later
3315     ThenBB = Builder.GetInsertBlock();
3316     EntryBB = ThenBB->getUniquePredecessor();
3317 
3318     // simple instructions for body
3319     Value *PrivLoad =
3320         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3321     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3322   };
3323 
3324   auto FiniCB = [&](InsertPointTy IP) {
3325     BasicBlock *IPBB = IP.getBlock();
3326     EXPECT_NE(IPBB->end(), IP.getPoint());
3327   };
3328 
3329   Builder.restoreIP(
3330       OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ false));
3331   Value *EntryBBTI = EntryBB->getTerminator();
3332   EXPECT_NE(EntryBBTI, nullptr);
3333   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3334   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3335   EXPECT_TRUE(EntryBr->isConditional());
3336   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3337   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3338   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3339 
3340   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3341   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3342 
3343   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3344   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3345   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3346   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3347 
3348   CallInst *SingleEndCI = nullptr;
3349   for (auto &FI : *ThenBB) {
3350     Instruction *cur = &FI;
3351     if (isa<CallInst>(cur)) {
3352       SingleEndCI = cast<CallInst>(cur);
3353       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3354         break;
3355       SingleEndCI = nullptr;
3356     }
3357   }
3358   EXPECT_NE(SingleEndCI, nullptr);
3359   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3360   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3361   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3362 
3363   bool FoundBarrier = false;
3364   for (auto &FI : *ExitBB) {
3365     Instruction *cur = &FI;
3366     if (auto CI = dyn_cast<CallInst>(cur)) {
3367       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3368         FoundBarrier = true;
3369         break;
3370       }
3371     }
3372   }
3373   EXPECT_TRUE(FoundBarrier);
3374 }
3375 
3376 TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
3377   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3378   OpenMPIRBuilder OMPBuilder(*M);
3379   OMPBuilder.initialize();
3380   F->setName("func");
3381   IRBuilder<> Builder(BB);
3382 
3383   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3384 
3385   AllocaInst *PrivAI = nullptr;
3386 
3387   BasicBlock *EntryBB = nullptr;
3388   BasicBlock *ThenBB = nullptr;
3389 
3390   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3391     if (AllocaIP.isSet())
3392       Builder.restoreIP(AllocaIP);
3393     else
3394       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3395     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3396     Builder.CreateStore(F->arg_begin(), PrivAI);
3397 
3398     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3399     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3400     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3401 
3402     Builder.restoreIP(CodeGenIP);
3403 
3404     // collect some info for checks later
3405     ThenBB = Builder.GetInsertBlock();
3406     EntryBB = ThenBB->getUniquePredecessor();
3407 
3408     // simple instructions for body
3409     Value *PrivLoad =
3410         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3411     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3412   };
3413 
3414   auto FiniCB = [&](InsertPointTy IP) {
3415     BasicBlock *IPBB = IP.getBlock();
3416     EXPECT_NE(IPBB->end(), IP.getPoint());
3417   };
3418 
3419   Builder.restoreIP(
3420       OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ true));
3421   Value *EntryBBTI = EntryBB->getTerminator();
3422   EXPECT_NE(EntryBBTI, nullptr);
3423   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3424   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3425   EXPECT_TRUE(EntryBr->isConditional());
3426   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3427   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3428   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3429 
3430   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3431   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3432 
3433   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3434   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3435   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3436   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3437 
3438   CallInst *SingleEndCI = nullptr;
3439   for (auto &FI : *ThenBB) {
3440     Instruction *cur = &FI;
3441     if (isa<CallInst>(cur)) {
3442       SingleEndCI = cast<CallInst>(cur);
3443       if (SingleEndCI->getCalledFunction()->getName() == "__kmpc_end_single")
3444         break;
3445       SingleEndCI = nullptr;
3446     }
3447   }
3448   EXPECT_NE(SingleEndCI, nullptr);
3449   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3450   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3451   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3452 
3453   CallInst *ExitBarrier = nullptr;
3454   for (auto &FI : *ExitBB) {
3455     Instruction *cur = &FI;
3456     if (auto CI = dyn_cast<CallInst>(cur)) {
3457       if (CI->getCalledFunction()->getName() == "__kmpc_barrier") {
3458         ExitBarrier = CI;
3459         break;
3460       }
3461     }
3462   }
3463   EXPECT_EQ(ExitBarrier, nullptr);
3464 }
3465 
3466 // Helper class to check each instruction of a BB.
3467 class BBInstIter {
3468   BasicBlock *BB;
3469   BasicBlock::iterator BBI;
3470 
3471 public:
3472   BBInstIter(BasicBlock *BB) : BB(BB), BBI(BB->begin()) {}
3473 
3474   bool hasNext() const { return BBI != BB->end(); }
3475 
3476   template <typename InstTy> InstTy *next() {
3477     if (!hasNext())
3478       return nullptr;
3479     Instruction *Cur = &*BBI++;
3480     if (!isa<InstTy>(Cur))
3481       return nullptr;
3482     return cast<InstTy>(Cur);
3483   }
3484 };
3485 
3486 TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
3487   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
3488   OpenMPIRBuilder OMPBuilder(*M);
3489   OMPBuilder.initialize();
3490   F->setName("func");
3491   IRBuilder<> Builder(BB);
3492 
3493   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3494 
3495   AllocaInst *PrivAI = nullptr;
3496 
3497   BasicBlock *EntryBB = nullptr;
3498   BasicBlock *ThenBB = nullptr;
3499 
3500   Value *CPVar = Builder.CreateAlloca(F->arg_begin()->getType());
3501   Builder.CreateStore(F->arg_begin(), CPVar);
3502 
3503   FunctionType *CopyFuncTy = FunctionType::get(
3504       Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getPtrTy()}, false);
3505   Function *CopyFunc =
3506       Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M);
3507 
3508   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
3509     if (AllocaIP.isSet())
3510       Builder.restoreIP(AllocaIP);
3511     else
3512       Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
3513     PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
3514     Builder.CreateStore(F->arg_begin(), PrivAI);
3515 
3516     llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
3517     llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
3518     EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
3519 
3520     Builder.restoreIP(CodeGenIP);
3521 
3522     // collect some info for checks later
3523     ThenBB = Builder.GetInsertBlock();
3524     EntryBB = ThenBB->getUniquePredecessor();
3525 
3526     // simple instructions for body
3527     Value *PrivLoad =
3528         Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
3529     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
3530   };
3531 
3532   auto FiniCB = [&](InsertPointTy IP) {
3533     BasicBlock *IPBB = IP.getBlock();
3534     // IP must be before the unconditional branch to ExitBB
3535     EXPECT_NE(IPBB->end(), IP.getPoint());
3536   };
3537 
3538   Builder.restoreIP(OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB,
3539                                             /*IsNowait*/ false, {CPVar},
3540                                             {CopyFunc}));
3541   Value *EntryBBTI = EntryBB->getTerminator();
3542   EXPECT_NE(EntryBBTI, nullptr);
3543   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
3544   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
3545   EXPECT_TRUE(EntryBr->isConditional());
3546   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
3547   BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
3548   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
3549 
3550   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
3551   EXPECT_TRUE(isa<CallInst>(CondInst->getOperand(0)));
3552 
3553   CallInst *SingleEntryCI = cast<CallInst>(CondInst->getOperand(0));
3554   EXPECT_EQ(SingleEntryCI->arg_size(), 2U);
3555   EXPECT_EQ(SingleEntryCI->getCalledFunction()->getName(), "__kmpc_single");
3556   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
3557 
3558   // check ThenBB
3559   BBInstIter ThenBBI(ThenBB);
3560   // load PrivAI
3561   auto *PrivLI = ThenBBI.next<LoadInst>();
3562   EXPECT_NE(PrivLI, nullptr);
3563   EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI);
3564   // icmp
3565   EXPECT_TRUE(ThenBBI.next<ICmpInst>());
3566   // store 1, DidIt
3567   auto *DidItSI = ThenBBI.next<StoreInst>();
3568   EXPECT_NE(DidItSI, nullptr);
3569   EXPECT_EQ(DidItSI->getValueOperand(),
3570             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
3571   Value *DidIt = DidItSI->getPointerOperand();
3572   // call __kmpc_end_single
3573   auto *SingleEndCI = ThenBBI.next<CallInst>();
3574   EXPECT_NE(SingleEndCI, nullptr);
3575   EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single");
3576   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
3577   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
3578   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
3579   // br ExitBB
3580   auto *ExitBBBI = ThenBBI.next<BranchInst>();
3581   EXPECT_NE(ExitBBBI, nullptr);
3582   EXPECT_TRUE(ExitBBBI->isUnconditional());
3583   EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB);
3584   EXPECT_FALSE(ThenBBI.hasNext());
3585 
3586   // check ExitBB
3587   BBInstIter ExitBBI(ExitBB);
3588   // call __kmpc_global_thread_num
3589   auto *ThreadNumCI = ExitBBI.next<CallInst>();
3590   EXPECT_NE(ThreadNumCI, nullptr);
3591   EXPECT_EQ(ThreadNumCI->getCalledFunction()->getName(),
3592             "__kmpc_global_thread_num");
3593   // load DidIt
3594   auto *DidItLI = ExitBBI.next<LoadInst>();
3595   EXPECT_NE(DidItLI, nullptr);
3596   EXPECT_EQ(DidItLI->getPointerOperand(), DidIt);
3597   // call __kmpc_copyprivate
3598   auto *CopyPrivateCI = ExitBBI.next<CallInst>();
3599   EXPECT_NE(CopyPrivateCI, nullptr);
3600   EXPECT_EQ(CopyPrivateCI->arg_size(), 6U);
3601   EXPECT_TRUE(isa<AllocaInst>(CopyPrivateCI->getArgOperand(3)));
3602   EXPECT_EQ(CopyPrivateCI->getArgOperand(3), CPVar);
3603   EXPECT_TRUE(isa<Function>(CopyPrivateCI->getArgOperand(4)));
3604   EXPECT_EQ(CopyPrivateCI->getArgOperand(4), CopyFunc);
3605   EXPECT_TRUE(isa<LoadInst>(CopyPrivateCI->getArgOperand(5)));
3606   DidItLI = cast<LoadInst>(CopyPrivateCI->getArgOperand(5));
3607   EXPECT_EQ(DidItLI->getOperand(0), DidIt);
3608   EXPECT_FALSE(ExitBBI.hasNext());
3609 }
3610 
3611 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadFlt) {
3612   OpenMPIRBuilder OMPBuilder(*M);
3613   OMPBuilder.initialize();
3614   F->setName("func");
3615   IRBuilder<> Builder(BB);
3616 
3617   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3618 
3619   Type *Float32 = Type::getFloatTy(M->getContext());
3620   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3621   XVal->setName("AtomicVar");
3622   AllocaInst *VVal = Builder.CreateAlloca(Float32);
3623   VVal->setName("AtomicRead");
3624   AtomicOrdering AO = AtomicOrdering::Monotonic;
3625   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3626   OpenMPIRBuilder::AtomicOpValue V = {VVal, Float32, false, false};
3627 
3628   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3629 
3630   IntegerType *IntCastTy =
3631       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3632 
3633   LoadInst *AtomicLoad = cast<LoadInst>(VVal->getNextNode());
3634   EXPECT_TRUE(AtomicLoad->isAtomic());
3635   EXPECT_EQ(AtomicLoad->getPointerOperand(), XVal);
3636 
3637   BitCastInst *CastToFlt = cast<BitCastInst>(AtomicLoad->getNextNode());
3638   EXPECT_EQ(CastToFlt->getSrcTy(), IntCastTy);
3639   EXPECT_EQ(CastToFlt->getDestTy(), Float32);
3640   EXPECT_EQ(CastToFlt->getOperand(0), AtomicLoad);
3641 
3642   StoreInst *StoreofAtomic = cast<StoreInst>(CastToFlt->getNextNode());
3643   EXPECT_EQ(StoreofAtomic->getValueOperand(), CastToFlt);
3644   EXPECT_EQ(StoreofAtomic->getPointerOperand(), VVal);
3645 
3646   Builder.CreateRetVoid();
3647   OMPBuilder.finalize();
3648   EXPECT_FALSE(verifyModule(*M, &errs()));
3649 }
3650 
3651 TEST_F(OpenMPIRBuilderTest, OMPAtomicReadInt) {
3652   OpenMPIRBuilder OMPBuilder(*M);
3653   OMPBuilder.initialize();
3654   F->setName("func");
3655   IRBuilder<> Builder(BB);
3656 
3657   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3658 
3659   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3660   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3661   XVal->setName("AtomicVar");
3662   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3663   VVal->setName("AtomicRead");
3664   AtomicOrdering AO = AtomicOrdering::Monotonic;
3665   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3666   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3667 
3668   BasicBlock *EntryBB = BB;
3669 
3670   Builder.restoreIP(OMPBuilder.createAtomicRead(Loc, X, V, AO));
3671   LoadInst *AtomicLoad = nullptr;
3672   StoreInst *StoreofAtomic = nullptr;
3673 
3674   for (Instruction &Cur : *EntryBB) {
3675     if (isa<LoadInst>(Cur)) {
3676       AtomicLoad = cast<LoadInst>(&Cur);
3677       if (AtomicLoad->getPointerOperand() == XVal)
3678         continue;
3679       AtomicLoad = nullptr;
3680     } else if (isa<StoreInst>(Cur)) {
3681       StoreofAtomic = cast<StoreInst>(&Cur);
3682       if (StoreofAtomic->getPointerOperand() == VVal)
3683         continue;
3684       StoreofAtomic = nullptr;
3685     }
3686   }
3687 
3688   EXPECT_NE(AtomicLoad, nullptr);
3689   EXPECT_TRUE(AtomicLoad->isAtomic());
3690 
3691   EXPECT_NE(StoreofAtomic, nullptr);
3692   EXPECT_EQ(StoreofAtomic->getValueOperand(), AtomicLoad);
3693 
3694   Builder.CreateRetVoid();
3695   OMPBuilder.finalize();
3696 
3697   EXPECT_FALSE(verifyModule(*M, &errs()));
3698 }
3699 
3700 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteFlt) {
3701   OpenMPIRBuilder OMPBuilder(*M);
3702   OMPBuilder.initialize();
3703   F->setName("func");
3704   IRBuilder<> Builder(BB);
3705 
3706   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3707 
3708   LLVMContext &Ctx = M->getContext();
3709   Type *Float32 = Type::getFloatTy(Ctx);
3710   AllocaInst *XVal = Builder.CreateAlloca(Float32);
3711   XVal->setName("AtomicVar");
3712   OpenMPIRBuilder::AtomicOpValue X = {XVal, Float32, false, false};
3713   AtomicOrdering AO = AtomicOrdering::Monotonic;
3714   Constant *ValToWrite = ConstantFP::get(Float32, 1.0);
3715 
3716   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3717 
3718   IntegerType *IntCastTy =
3719       IntegerType::get(M->getContext(), Float32->getScalarSizeInBits());
3720 
3721   Value *ExprCast = Builder.CreateBitCast(ValToWrite, IntCastTy);
3722 
3723   StoreInst *StoreofAtomic = cast<StoreInst>(XVal->getNextNode());
3724   EXPECT_EQ(StoreofAtomic->getValueOperand(), ExprCast);
3725   EXPECT_EQ(StoreofAtomic->getPointerOperand(), XVal);
3726   EXPECT_TRUE(StoreofAtomic->isAtomic());
3727 
3728   Builder.CreateRetVoid();
3729   OMPBuilder.finalize();
3730   EXPECT_FALSE(verifyModule(*M, &errs()));
3731 }
3732 
3733 TEST_F(OpenMPIRBuilderTest, OMPAtomicWriteInt) {
3734   OpenMPIRBuilder OMPBuilder(*M);
3735   OMPBuilder.initialize();
3736   F->setName("func");
3737   IRBuilder<> Builder(BB);
3738 
3739   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3740 
3741   LLVMContext &Ctx = M->getContext();
3742   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3743   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3744   XVal->setName("AtomicVar");
3745   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3746   AtomicOrdering AO = AtomicOrdering::Monotonic;
3747   ConstantInt *ValToWrite = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3748 
3749   BasicBlock *EntryBB = BB;
3750 
3751   Builder.restoreIP(OMPBuilder.createAtomicWrite(Loc, X, ValToWrite, AO));
3752 
3753   StoreInst *StoreofAtomic = nullptr;
3754 
3755   for (Instruction &Cur : *EntryBB) {
3756     if (isa<StoreInst>(Cur)) {
3757       StoreofAtomic = cast<StoreInst>(&Cur);
3758       if (StoreofAtomic->getPointerOperand() == XVal)
3759         continue;
3760       StoreofAtomic = nullptr;
3761     }
3762   }
3763 
3764   EXPECT_NE(StoreofAtomic, nullptr);
3765   EXPECT_TRUE(StoreofAtomic->isAtomic());
3766   EXPECT_EQ(StoreofAtomic->getValueOperand(), ValToWrite);
3767 
3768   Builder.CreateRetVoid();
3769   OMPBuilder.finalize();
3770   EXPECT_FALSE(verifyModule(*M, &errs()));
3771 }
3772 
3773 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
3774   OpenMPIRBuilder OMPBuilder(*M);
3775   OMPBuilder.initialize();
3776   F->setName("func");
3777   IRBuilder<> Builder(BB);
3778 
3779   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3780 
3781   IntegerType *Int32 = Type::getInt32Ty(M->getContext());
3782   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3783   XVal->setName("AtomicVar");
3784   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3785   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3786   AtomicOrdering AO = AtomicOrdering::Monotonic;
3787   ConstantInt *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3788   Value *Expr = nullptr;
3789   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Sub;
3790   bool IsXLHSInRHSPart = false;
3791 
3792   BasicBlock *EntryBB = BB;
3793   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3794                                           EntryBB->getFirstInsertionPt());
3795   Value *Sub = nullptr;
3796 
3797   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3798     Sub = IRB.CreateSub(ConstVal, Atomic);
3799     return Sub;
3800   };
3801   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3802       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3803   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3804   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3805   EXPECT_NE(ContTI, nullptr);
3806   BasicBlock *EndBB = ContTI->getSuccessor(0);
3807   EXPECT_TRUE(ContTI->isConditional());
3808   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3809   EXPECT_NE(EndBB, nullptr);
3810 
3811   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3812   EXPECT_NE(Phi, nullptr);
3813   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3814   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3815   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3816 
3817   EXPECT_EQ(Sub->getNumUses(), 1U);
3818   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3819   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3820 
3821   ExtractValueInst *ExVI1 =
3822       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3823   EXPECT_NE(ExVI1, nullptr);
3824   AtomicCmpXchgInst *CmpExchg =
3825       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3826   EXPECT_NE(CmpExchg, nullptr);
3827   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3828   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3829   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3830 
3831   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3832   EXPECT_NE(Ld, nullptr);
3833   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3834 
3835   Builder.CreateRetVoid();
3836   OMPBuilder.finalize();
3837   EXPECT_FALSE(verifyModule(*M, &errs()));
3838 }
3839 
3840 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
3841   OpenMPIRBuilder OMPBuilder(*M);
3842   OMPBuilder.initialize();
3843   F->setName("func");
3844   IRBuilder<> Builder(BB);
3845 
3846   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3847 
3848   Type *FloatTy = Type::getFloatTy(M->getContext());
3849   AllocaInst *XVal = Builder.CreateAlloca(FloatTy);
3850   XVal->setName("AtomicVar");
3851   Builder.CreateStore(ConstantFP::get(Type::getFloatTy(Ctx), 0.0), XVal);
3852   OpenMPIRBuilder::AtomicOpValue X = {XVal, FloatTy, false, false};
3853   AtomicOrdering AO = AtomicOrdering::Monotonic;
3854   Constant *ConstVal = ConstantFP::get(Type::getFloatTy(Ctx), 1.0);
3855   Value *Expr = nullptr;
3856   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::FSub;
3857   bool IsXLHSInRHSPart = false;
3858 
3859   BasicBlock *EntryBB = BB;
3860   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3861                                           EntryBB->getFirstInsertionPt());
3862   Value *Sub = nullptr;
3863 
3864   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3865     Sub = IRB.CreateFSub(ConstVal, Atomic);
3866     return Sub;
3867   };
3868   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3869       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3870   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3871   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3872   EXPECT_NE(ContTI, nullptr);
3873   BasicBlock *EndBB = ContTI->getSuccessor(0);
3874   EXPECT_TRUE(ContTI->isConditional());
3875   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3876   EXPECT_NE(EndBB, nullptr);
3877 
3878   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3879   EXPECT_NE(Phi, nullptr);
3880   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3881   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3882   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3883 
3884   EXPECT_EQ(Sub->getNumUses(), 1U);
3885   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3886   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3887 
3888   ExtractValueInst *ExVI1 =
3889       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3890   EXPECT_NE(ExVI1, nullptr);
3891   AtomicCmpXchgInst *CmpExchg =
3892       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3893   EXPECT_NE(CmpExchg, nullptr);
3894   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3895   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3896   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3897 
3898   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3899   EXPECT_NE(Ld, nullptr);
3900   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3901   Builder.CreateRetVoid();
3902   OMPBuilder.finalize();
3903   EXPECT_FALSE(verifyModule(*M, &errs()));
3904 }
3905 
3906 TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
3907   OpenMPIRBuilder OMPBuilder(*M);
3908   OMPBuilder.initialize();
3909   F->setName("func");
3910   IRBuilder<> Builder(BB);
3911 
3912   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3913 
3914   Type *IntTy = Type::getInt32Ty(M->getContext());
3915   AllocaInst *XVal = Builder.CreateAlloca(IntTy);
3916   XVal->setName("AtomicVar");
3917   Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0), XVal);
3918   OpenMPIRBuilder::AtomicOpValue X = {XVal, IntTy, false, false};
3919   AtomicOrdering AO = AtomicOrdering::Monotonic;
3920   Constant *ConstVal = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
3921   Value *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1);
3922   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::UMax;
3923   bool IsXLHSInRHSPart = false;
3924 
3925   BasicBlock *EntryBB = BB;
3926   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
3927                                           EntryBB->getFirstInsertionPt());
3928   Value *Sub = nullptr;
3929 
3930   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) {
3931     Sub = IRB.CreateSub(ConstVal, Atomic);
3932     return Sub;
3933   };
3934   Builder.restoreIP(OMPBuilder.createAtomicUpdate(
3935       Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
3936   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
3937   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
3938   EXPECT_NE(ContTI, nullptr);
3939   BasicBlock *EndBB = ContTI->getSuccessor(0);
3940   EXPECT_TRUE(ContTI->isConditional());
3941   EXPECT_EQ(ContTI->getSuccessor(1), ContBB);
3942   EXPECT_NE(EndBB, nullptr);
3943 
3944   PHINode *Phi = dyn_cast<PHINode>(&ContBB->front());
3945   EXPECT_NE(Phi, nullptr);
3946   EXPECT_EQ(Phi->getNumIncomingValues(), 2U);
3947   EXPECT_EQ(Phi->getIncomingBlock(0), EntryBB);
3948   EXPECT_EQ(Phi->getIncomingBlock(1), ContBB);
3949 
3950   EXPECT_EQ(Sub->getNumUses(), 1U);
3951   StoreInst *St = dyn_cast<StoreInst>(Sub->user_back());
3952   AllocaInst *UpdateTemp = dyn_cast<AllocaInst>(St->getPointerOperand());
3953 
3954   ExtractValueInst *ExVI1 =
3955       dyn_cast<ExtractValueInst>(Phi->getIncomingValueForBlock(ContBB));
3956   EXPECT_NE(ExVI1, nullptr);
3957   AtomicCmpXchgInst *CmpExchg =
3958       dyn_cast<AtomicCmpXchgInst>(ExVI1->getAggregateOperand());
3959   EXPECT_NE(CmpExchg, nullptr);
3960   EXPECT_EQ(CmpExchg->getPointerOperand(), XVal);
3961   EXPECT_EQ(CmpExchg->getCompareOperand(), Phi);
3962   EXPECT_EQ(CmpExchg->getSuccessOrdering(), AtomicOrdering::Monotonic);
3963 
3964   LoadInst *Ld = dyn_cast<LoadInst>(CmpExchg->getNewValOperand());
3965   EXPECT_NE(Ld, nullptr);
3966   EXPECT_EQ(UpdateTemp, Ld->getPointerOperand());
3967 
3968   Builder.CreateRetVoid();
3969   OMPBuilder.finalize();
3970   EXPECT_FALSE(verifyModule(*M, &errs()));
3971 }
3972 
3973 TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
3974   OpenMPIRBuilder OMPBuilder(*M);
3975   OMPBuilder.initialize();
3976   F->setName("func");
3977   IRBuilder<> Builder(BB);
3978 
3979   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
3980 
3981   LLVMContext &Ctx = M->getContext();
3982   IntegerType *Int32 = Type::getInt32Ty(Ctx);
3983   AllocaInst *XVal = Builder.CreateAlloca(Int32);
3984   XVal->setName("AtomicVar");
3985   AllocaInst *VVal = Builder.CreateAlloca(Int32);
3986   VVal->setName("AtomicCapTar");
3987   StoreInst *Init =
3988       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
3989 
3990   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, false, false};
3991   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
3992   AtomicOrdering AO = AtomicOrdering::Monotonic;
3993   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
3994   AtomicRMWInst::BinOp RMWOp = AtomicRMWInst::Add;
3995   bool IsXLHSInRHSPart = true;
3996   bool IsPostfixUpdate = true;
3997   bool UpdateExpr = true;
3998 
3999   BasicBlock *EntryBB = BB;
4000   OpenMPIRBuilder::InsertPointTy AllocaIP(EntryBB,
4001                                           EntryBB->getFirstInsertionPt());
4002 
4003   // integer update - not used
4004   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; };
4005 
4006   Builder.restoreIP(OMPBuilder.createAtomicCapture(
4007       Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, UpdateExpr,
4008       IsPostfixUpdate, IsXLHSInRHSPart));
4009   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4010   AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4011   EXPECT_NE(ARWM, nullptr);
4012   EXPECT_EQ(ARWM->getPointerOperand(), XVal);
4013   EXPECT_EQ(ARWM->getOperation(), RMWOp);
4014   StoreInst *St = dyn_cast<StoreInst>(ARWM->user_back());
4015   EXPECT_NE(St, nullptr);
4016   EXPECT_EQ(St->getPointerOperand(), VVal);
4017 
4018   Builder.CreateRetVoid();
4019   OMPBuilder.finalize();
4020   EXPECT_FALSE(verifyModule(*M, &errs()));
4021 }
4022 
4023 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompare) {
4024   OpenMPIRBuilder OMPBuilder(*M);
4025   OMPBuilder.initialize();
4026   F->setName("func");
4027   IRBuilder<> Builder(BB);
4028 
4029   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4030 
4031   LLVMContext &Ctx = M->getContext();
4032   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4033   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4034   XVal->setName("x");
4035   StoreInst *Init =
4036       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4037 
4038   OpenMPIRBuilder::AtomicOpValue XSigned = {XVal, Int32, true, false};
4039   OpenMPIRBuilder::AtomicOpValue XUnsigned = {XVal, Int32, false, false};
4040   // V and R are not used in atomic compare
4041   OpenMPIRBuilder::AtomicOpValue V = {nullptr, nullptr, false, false};
4042   OpenMPIRBuilder::AtomicOpValue R = {nullptr, nullptr, false, false};
4043   AtomicOrdering AO = AtomicOrdering::Monotonic;
4044   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4045   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4046   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4047   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4048 
4049   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4050       Builder, XSigned, V, R, Expr, nullptr, AO, OpMax, true, false, false));
4051   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4052       Builder, XUnsigned, V, R, Expr, nullptr, AO, OpMax, false, false, false));
4053   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4054       Builder, XSigned, V, R, Expr, D, AO, OpEQ, true, false, false));
4055 
4056   BasicBlock *EntryBB = BB;
4057   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
4058   EXPECT_EQ(EntryBB->size(), 5U);
4059 
4060   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Init->getNextNode());
4061   EXPECT_NE(ARWM1, nullptr);
4062   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4063   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4064   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4065 
4066   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(ARWM1->getNextNode());
4067   EXPECT_NE(ARWM2, nullptr);
4068   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4069   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4070   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::UMax);
4071 
4072   AtomicCmpXchgInst *AXCHG = dyn_cast<AtomicCmpXchgInst>(ARWM2->getNextNode());
4073   EXPECT_NE(AXCHG, nullptr);
4074   EXPECT_EQ(AXCHG->getPointerOperand(), XVal);
4075   EXPECT_EQ(AXCHG->getCompareOperand(), Expr);
4076   EXPECT_EQ(AXCHG->getNewValOperand(), D);
4077 
4078   Builder.CreateRetVoid();
4079   OMPBuilder.finalize();
4080   EXPECT_FALSE(verifyModule(*M, &errs()));
4081 }
4082 
4083 TEST_F(OpenMPIRBuilderTest, OMPAtomicCompareCapture) {
4084   OpenMPIRBuilder OMPBuilder(*M);
4085   OMPBuilder.initialize();
4086   F->setName("func");
4087   IRBuilder<> Builder(BB);
4088 
4089   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4090 
4091   LLVMContext &Ctx = M->getContext();
4092   IntegerType *Int32 = Type::getInt32Ty(Ctx);
4093   AllocaInst *XVal = Builder.CreateAlloca(Int32);
4094   XVal->setName("x");
4095   AllocaInst *VVal = Builder.CreateAlloca(Int32);
4096   VVal->setName("v");
4097   AllocaInst *RVal = Builder.CreateAlloca(Int32);
4098   RVal->setName("r");
4099 
4100   StoreInst *Init =
4101       Builder.CreateStore(ConstantInt::get(Type::getInt32Ty(Ctx), 0U), XVal);
4102 
4103   OpenMPIRBuilder::AtomicOpValue X = {XVal, Int32, true, false};
4104   OpenMPIRBuilder::AtomicOpValue V = {VVal, Int32, false, false};
4105   OpenMPIRBuilder::AtomicOpValue NoV = {nullptr, nullptr, false, false};
4106   OpenMPIRBuilder::AtomicOpValue R = {RVal, Int32, false, false};
4107   OpenMPIRBuilder::AtomicOpValue NoR = {nullptr, nullptr, false, false};
4108 
4109   AtomicOrdering AO = AtomicOrdering::Monotonic;
4110   ConstantInt *Expr = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4111   ConstantInt *D = ConstantInt::get(Type::getInt32Ty(Ctx), 1U);
4112   OMPAtomicCompareOp OpMax = OMPAtomicCompareOp::MAX;
4113   OMPAtomicCompareOp OpEQ = OMPAtomicCompareOp::EQ;
4114 
4115   // { cond-update-stmt v = x; }
4116   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4117       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4118       /* IsPostfixUpdate */ false,
4119       /* IsFailOnly */ false));
4120   // { v = x; cond-update-stmt }
4121   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4122       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4123       /* IsPostfixUpdate */ true,
4124       /* IsFailOnly */ false));
4125   // if(x == e) { x = d; } else { v = x; }
4126   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4127       Builder, X, V, NoR, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4128       /* IsPostfixUpdate */ false,
4129       /* IsFailOnly */ true));
4130   // { r = x == e; if(r) { x = d; } }
4131   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4132       Builder, X, NoV, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4133       /* IsPostfixUpdate */ false,
4134       /* IsFailOnly */ false));
4135   // { r = x == e; if(r) { x = d; } else { v = x; } }
4136   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4137       Builder, X, V, R, Expr, D, AO, OpEQ, /* IsXBinopExpr */ true,
4138       /* IsPostfixUpdate */ false,
4139       /* IsFailOnly */ true));
4140 
4141   // { v = x; cond-update-stmt }
4142   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4143       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ true,
4144       /* IsPostfixUpdate */ true,
4145       /* IsFailOnly */ false));
4146   // { cond-update-stmt v = x; }
4147   Builder.restoreIP(OMPBuilder.createAtomicCompare(
4148       Builder, X, V, NoR, Expr, nullptr, AO, OpMax, /* IsXBinopExpr */ false,
4149       /* IsPostfixUpdate */ false,
4150       /* IsFailOnly */ false));
4151 
4152   BasicBlock *EntryBB = BB;
4153   EXPECT_EQ(EntryBB->getParent()->size(), 5U);
4154   BasicBlock *Cont1 = dyn_cast<BasicBlock>(EntryBB->getNextNode());
4155   EXPECT_NE(Cont1, nullptr);
4156   BasicBlock *Exit1 = dyn_cast<BasicBlock>(Cont1->getNextNode());
4157   EXPECT_NE(Exit1, nullptr);
4158   BasicBlock *Cont2 = dyn_cast<BasicBlock>(Exit1->getNextNode());
4159   EXPECT_NE(Cont2, nullptr);
4160   BasicBlock *Exit2 = dyn_cast<BasicBlock>(Cont2->getNextNode());
4161   EXPECT_NE(Exit2, nullptr);
4162 
4163   AtomicCmpXchgInst *CmpXchg1 =
4164       dyn_cast<AtomicCmpXchgInst>(Init->getNextNode());
4165   EXPECT_NE(CmpXchg1, nullptr);
4166   EXPECT_EQ(CmpXchg1->getPointerOperand(), XVal);
4167   EXPECT_EQ(CmpXchg1->getCompareOperand(), Expr);
4168   EXPECT_EQ(CmpXchg1->getNewValOperand(), D);
4169   ExtractValueInst *ExtVal1 =
4170       dyn_cast<ExtractValueInst>(CmpXchg1->getNextNode());
4171   EXPECT_NE(ExtVal1, nullptr);
4172   EXPECT_EQ(ExtVal1->getAggregateOperand(), CmpXchg1);
4173   EXPECT_EQ(ExtVal1->getIndices(), ArrayRef<unsigned int>(0U));
4174   ExtractValueInst *ExtVal2 =
4175       dyn_cast<ExtractValueInst>(ExtVal1->getNextNode());
4176   EXPECT_NE(ExtVal2, nullptr);
4177   EXPECT_EQ(ExtVal2->getAggregateOperand(), CmpXchg1);
4178   EXPECT_EQ(ExtVal2->getIndices(), ArrayRef<unsigned int>(1U));
4179   SelectInst *Sel1 = dyn_cast<SelectInst>(ExtVal2->getNextNode());
4180   EXPECT_NE(Sel1, nullptr);
4181   EXPECT_EQ(Sel1->getCondition(), ExtVal2);
4182   EXPECT_EQ(Sel1->getTrueValue(), Expr);
4183   EXPECT_EQ(Sel1->getFalseValue(), ExtVal1);
4184   StoreInst *Store1 = dyn_cast<StoreInst>(Sel1->getNextNode());
4185   EXPECT_NE(Store1, nullptr);
4186   EXPECT_EQ(Store1->getPointerOperand(), VVal);
4187   EXPECT_EQ(Store1->getValueOperand(), Sel1);
4188 
4189   AtomicCmpXchgInst *CmpXchg2 =
4190       dyn_cast<AtomicCmpXchgInst>(Store1->getNextNode());
4191   EXPECT_NE(CmpXchg2, nullptr);
4192   EXPECT_EQ(CmpXchg2->getPointerOperand(), XVal);
4193   EXPECT_EQ(CmpXchg2->getCompareOperand(), Expr);
4194   EXPECT_EQ(CmpXchg2->getNewValOperand(), D);
4195   ExtractValueInst *ExtVal3 =
4196       dyn_cast<ExtractValueInst>(CmpXchg2->getNextNode());
4197   EXPECT_NE(ExtVal3, nullptr);
4198   EXPECT_EQ(ExtVal3->getAggregateOperand(), CmpXchg2);
4199   EXPECT_EQ(ExtVal3->getIndices(), ArrayRef<unsigned int>(0U));
4200   StoreInst *Store2 = dyn_cast<StoreInst>(ExtVal3->getNextNode());
4201   EXPECT_NE(Store2, nullptr);
4202   EXPECT_EQ(Store2->getPointerOperand(), VVal);
4203   EXPECT_EQ(Store2->getValueOperand(), ExtVal3);
4204 
4205   AtomicCmpXchgInst *CmpXchg3 =
4206       dyn_cast<AtomicCmpXchgInst>(Store2->getNextNode());
4207   EXPECT_NE(CmpXchg3, nullptr);
4208   EXPECT_EQ(CmpXchg3->getPointerOperand(), XVal);
4209   EXPECT_EQ(CmpXchg3->getCompareOperand(), Expr);
4210   EXPECT_EQ(CmpXchg3->getNewValOperand(), D);
4211   ExtractValueInst *ExtVal4 =
4212       dyn_cast<ExtractValueInst>(CmpXchg3->getNextNode());
4213   EXPECT_NE(ExtVal4, nullptr);
4214   EXPECT_EQ(ExtVal4->getAggregateOperand(), CmpXchg3);
4215   EXPECT_EQ(ExtVal4->getIndices(), ArrayRef<unsigned int>(0U));
4216   ExtractValueInst *ExtVal5 =
4217       dyn_cast<ExtractValueInst>(ExtVal4->getNextNode());
4218   EXPECT_NE(ExtVal5, nullptr);
4219   EXPECT_EQ(ExtVal5->getAggregateOperand(), CmpXchg3);
4220   EXPECT_EQ(ExtVal5->getIndices(), ArrayRef<unsigned int>(1U));
4221   BranchInst *Br1 = dyn_cast<BranchInst>(ExtVal5->getNextNode());
4222   EXPECT_NE(Br1, nullptr);
4223   EXPECT_EQ(Br1->isConditional(), true);
4224   EXPECT_EQ(Br1->getCondition(), ExtVal5);
4225   EXPECT_EQ(Br1->getSuccessor(0), Exit1);
4226   EXPECT_EQ(Br1->getSuccessor(1), Cont1);
4227 
4228   StoreInst *Store3 = dyn_cast<StoreInst>(&Cont1->front());
4229   EXPECT_NE(Store3, nullptr);
4230   EXPECT_EQ(Store3->getPointerOperand(), VVal);
4231   EXPECT_EQ(Store3->getValueOperand(), ExtVal4);
4232   BranchInst *Br2 = dyn_cast<BranchInst>(Store3->getNextNode());
4233   EXPECT_NE(Br2, nullptr);
4234   EXPECT_EQ(Br2->isUnconditional(), true);
4235   EXPECT_EQ(Br2->getSuccessor(0), Exit1);
4236 
4237   AtomicCmpXchgInst *CmpXchg4 = dyn_cast<AtomicCmpXchgInst>(&Exit1->front());
4238   EXPECT_NE(CmpXchg4, nullptr);
4239   EXPECT_EQ(CmpXchg4->getPointerOperand(), XVal);
4240   EXPECT_EQ(CmpXchg4->getCompareOperand(), Expr);
4241   EXPECT_EQ(CmpXchg4->getNewValOperand(), D);
4242   ExtractValueInst *ExtVal6 =
4243       dyn_cast<ExtractValueInst>(CmpXchg4->getNextNode());
4244   EXPECT_NE(ExtVal6, nullptr);
4245   EXPECT_EQ(ExtVal6->getAggregateOperand(), CmpXchg4);
4246   EXPECT_EQ(ExtVal6->getIndices(), ArrayRef<unsigned int>(1U));
4247   ZExtInst *ZExt1 = dyn_cast<ZExtInst>(ExtVal6->getNextNode());
4248   EXPECT_NE(ZExt1, nullptr);
4249   EXPECT_EQ(ZExt1->getDestTy(), Int32);
4250   StoreInst *Store4 = dyn_cast<StoreInst>(ZExt1->getNextNode());
4251   EXPECT_NE(Store4, nullptr);
4252   EXPECT_EQ(Store4->getPointerOperand(), RVal);
4253   EXPECT_EQ(Store4->getValueOperand(), ZExt1);
4254 
4255   AtomicCmpXchgInst *CmpXchg5 =
4256       dyn_cast<AtomicCmpXchgInst>(Store4->getNextNode());
4257   EXPECT_NE(CmpXchg5, nullptr);
4258   EXPECT_EQ(CmpXchg5->getPointerOperand(), XVal);
4259   EXPECT_EQ(CmpXchg5->getCompareOperand(), Expr);
4260   EXPECT_EQ(CmpXchg5->getNewValOperand(), D);
4261   ExtractValueInst *ExtVal7 =
4262       dyn_cast<ExtractValueInst>(CmpXchg5->getNextNode());
4263   EXPECT_NE(ExtVal7, nullptr);
4264   EXPECT_EQ(ExtVal7->getAggregateOperand(), CmpXchg5);
4265   EXPECT_EQ(ExtVal7->getIndices(), ArrayRef<unsigned int>(0U));
4266   ExtractValueInst *ExtVal8 =
4267       dyn_cast<ExtractValueInst>(ExtVal7->getNextNode());
4268   EXPECT_NE(ExtVal8, nullptr);
4269   EXPECT_EQ(ExtVal8->getAggregateOperand(), CmpXchg5);
4270   EXPECT_EQ(ExtVal8->getIndices(), ArrayRef<unsigned int>(1U));
4271   BranchInst *Br3 = dyn_cast<BranchInst>(ExtVal8->getNextNode());
4272   EXPECT_NE(Br3, nullptr);
4273   EXPECT_EQ(Br3->isConditional(), true);
4274   EXPECT_EQ(Br3->getCondition(), ExtVal8);
4275   EXPECT_EQ(Br3->getSuccessor(0), Exit2);
4276   EXPECT_EQ(Br3->getSuccessor(1), Cont2);
4277 
4278   StoreInst *Store5 = dyn_cast<StoreInst>(&Cont2->front());
4279   EXPECT_NE(Store5, nullptr);
4280   EXPECT_EQ(Store5->getPointerOperand(), VVal);
4281   EXPECT_EQ(Store5->getValueOperand(), ExtVal7);
4282   BranchInst *Br4 = dyn_cast<BranchInst>(Store5->getNextNode());
4283   EXPECT_NE(Br4, nullptr);
4284   EXPECT_EQ(Br4->isUnconditional(), true);
4285   EXPECT_EQ(Br4->getSuccessor(0), Exit2);
4286 
4287   ExtractValueInst *ExtVal9 = dyn_cast<ExtractValueInst>(&Exit2->front());
4288   EXPECT_NE(ExtVal9, nullptr);
4289   EXPECT_EQ(ExtVal9->getAggregateOperand(), CmpXchg5);
4290   EXPECT_EQ(ExtVal9->getIndices(), ArrayRef<unsigned int>(1U));
4291   ZExtInst *ZExt2 = dyn_cast<ZExtInst>(ExtVal9->getNextNode());
4292   EXPECT_NE(ZExt2, nullptr);
4293   EXPECT_EQ(ZExt2->getDestTy(), Int32);
4294   StoreInst *Store6 = dyn_cast<StoreInst>(ZExt2->getNextNode());
4295   EXPECT_NE(Store6, nullptr);
4296   EXPECT_EQ(Store6->getPointerOperand(), RVal);
4297   EXPECT_EQ(Store6->getValueOperand(), ZExt2);
4298 
4299   AtomicRMWInst *ARWM1 = dyn_cast<AtomicRMWInst>(Store6->getNextNode());
4300   EXPECT_NE(ARWM1, nullptr);
4301   EXPECT_EQ(ARWM1->getPointerOperand(), XVal);
4302   EXPECT_EQ(ARWM1->getValOperand(), Expr);
4303   EXPECT_EQ(ARWM1->getOperation(), AtomicRMWInst::Min);
4304   StoreInst *Store7 = dyn_cast<StoreInst>(ARWM1->getNextNode());
4305   EXPECT_NE(Store7, nullptr);
4306   EXPECT_EQ(Store7->getPointerOperand(), VVal);
4307   EXPECT_EQ(Store7->getValueOperand(), ARWM1);
4308 
4309   AtomicRMWInst *ARWM2 = dyn_cast<AtomicRMWInst>(Store7->getNextNode());
4310   EXPECT_NE(ARWM2, nullptr);
4311   EXPECT_EQ(ARWM2->getPointerOperand(), XVal);
4312   EXPECT_EQ(ARWM2->getValOperand(), Expr);
4313   EXPECT_EQ(ARWM2->getOperation(), AtomicRMWInst::Max);
4314   CmpInst *Cmp1 = dyn_cast<CmpInst>(ARWM2->getNextNode());
4315   EXPECT_NE(Cmp1, nullptr);
4316   EXPECT_EQ(Cmp1->getPredicate(), CmpInst::ICMP_SGT);
4317   EXPECT_EQ(Cmp1->getOperand(0), ARWM2);
4318   EXPECT_EQ(Cmp1->getOperand(1), Expr);
4319   SelectInst *Sel2 = dyn_cast<SelectInst>(Cmp1->getNextNode());
4320   EXPECT_NE(Sel2, nullptr);
4321   EXPECT_EQ(Sel2->getCondition(), Cmp1);
4322   EXPECT_EQ(Sel2->getTrueValue(), Expr);
4323   EXPECT_EQ(Sel2->getFalseValue(), ARWM2);
4324   StoreInst *Store8 = dyn_cast<StoreInst>(Sel2->getNextNode());
4325   EXPECT_NE(Store8, nullptr);
4326   EXPECT_EQ(Store8->getPointerOperand(), VVal);
4327   EXPECT_EQ(Store8->getValueOperand(), Sel2);
4328 
4329   Builder.CreateRetVoid();
4330   OMPBuilder.finalize();
4331   EXPECT_FALSE(verifyModule(*M, &errs()));
4332 }
4333 
4334 TEST_F(OpenMPIRBuilderTest, CreateTeams) {
4335   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4336   OpenMPIRBuilder OMPBuilder(*M);
4337   OMPBuilder.Config.IsTargetDevice = false;
4338   OMPBuilder.initialize();
4339   F->setName("func");
4340   IRBuilder<> Builder(BB);
4341 
4342   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
4343   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
4344   Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load");
4345 
4346   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4347     Builder.restoreIP(AllocaIP);
4348     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
4349                                                 "bodygen.alloca128");
4350 
4351     Builder.restoreIP(CodeGenIP);
4352     // Loading and storing captured pointer and values
4353     Builder.CreateStore(Val128, Local128);
4354     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
4355                                       "bodygen.load32");
4356 
4357     LoadInst *PrivLoad128 = Builder.CreateLoad(
4358         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
4359     Value *Cmp = Builder.CreateICmpNE(
4360         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
4361     Instruction *ThenTerm, *ElseTerm;
4362     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
4363                                   &ThenTerm, &ElseTerm);
4364   };
4365 
4366   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4367   Builder.restoreIP(OMPBuilder.createTeams(
4368       Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr,
4369       /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4370 
4371   OMPBuilder.finalize();
4372   Builder.CreateRetVoid();
4373 
4374   EXPECT_FALSE(verifyModule(*M, &errs()));
4375 
4376   CallInst *TeamsForkCall = dyn_cast<CallInst>(
4377       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams)
4378           ->user_back());
4379 
4380   // Verify the Ident argument
4381   GlobalVariable *Ident = cast<GlobalVariable>(TeamsForkCall->getArgOperand(0));
4382   ASSERT_NE(Ident, nullptr);
4383   EXPECT_TRUE(Ident->hasInitializer());
4384   Constant *Initializer = Ident->getInitializer();
4385   GlobalVariable *SrcStrGlob =
4386       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
4387   ASSERT_NE(SrcStrGlob, nullptr);
4388   ConstantDataArray *SrcSrc =
4389       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
4390   ASSERT_NE(SrcSrc, nullptr);
4391 
4392   // Verify the outlined function signature.
4393   Function *OutlinedFn =
4394       dyn_cast<Function>(TeamsForkCall->getArgOperand(2)->stripPointerCasts());
4395   ASSERT_NE(OutlinedFn, nullptr);
4396   EXPECT_FALSE(OutlinedFn->isDeclaration());
4397   EXPECT_TRUE(OutlinedFn->arg_size() >= 3);
4398   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getPtrTy()); // global_tid
4399   EXPECT_EQ(OutlinedFn->getArg(1)->getType(), Builder.getPtrTy()); // bound_tid
4400   EXPECT_EQ(OutlinedFn->getArg(2)->getType(),
4401             Builder.getPtrTy()); // captured args
4402 
4403   // Check for TruncInst and ICmpInst in the outlined function.
4404   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4405                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
4406   EXPECT_TRUE(any_of(instructions(OutlinedFn),
4407                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
4408 }
4409 
4410 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) {
4411   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4412   OpenMPIRBuilder OMPBuilder(*M);
4413   OMPBuilder.Config.IsTargetDevice = false;
4414   OMPBuilder.initialize();
4415   F->setName("func");
4416   IRBuilder<> &Builder = OMPBuilder.Builder;
4417   Builder.SetInsertPoint(BB);
4418 
4419   Function *FakeFunction =
4420       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4421                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4422 
4423   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4424     Builder.restoreIP(CodeGenIP);
4425     Builder.CreateCall(FakeFunction, {});
4426   };
4427 
4428   // `F` has an argument - an integer, so we use that as the thread limit.
4429   Builder.restoreIP(OMPBuilder.createTeams(/*=*/Builder, BodyGenCB,
4430                                            /*NumTeamsLower=*/nullptr,
4431                                            /*NumTeamsUpper=*/nullptr,
4432                                            /*ThreadLimit=*/F->arg_begin(),
4433                                            /*IfExpr=*/nullptr));
4434 
4435   Builder.CreateRetVoid();
4436   OMPBuilder.finalize();
4437 
4438   ASSERT_FALSE(verifyModule(*M));
4439 
4440   CallInst *PushNumTeamsCallInst =
4441       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4442   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4443 
4444   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), Builder.getInt32(0));
4445   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), Builder.getInt32(0));
4446   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), &*F->arg_begin());
4447 
4448   // Verifying that the next instruction to execute is kmpc_fork_teams
4449   BranchInst *BrInst =
4450       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4451   ASSERT_NE(BrInst, nullptr);
4452   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4453   Instruction *NextInstruction =
4454       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4455   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4456   ASSERT_NE(ForkTeamsCI, nullptr);
4457   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4458             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4459 }
4460 
4461 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) {
4462   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4463   OpenMPIRBuilder OMPBuilder(*M);
4464   OMPBuilder.Config.IsTargetDevice = false;
4465   OMPBuilder.initialize();
4466   F->setName("func");
4467   IRBuilder<> &Builder = OMPBuilder.Builder;
4468   Builder.SetInsertPoint(BB);
4469 
4470   Function *FakeFunction =
4471       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4472                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4473 
4474   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4475     Builder.restoreIP(CodeGenIP);
4476     Builder.CreateCall(FakeFunction, {});
4477   };
4478 
4479   // `F` already has an integer argument, so we use that as upper bound to
4480   // `num_teams`
4481   Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB,
4482                                            /*NumTeamsLower=*/nullptr,
4483                                            /*NumTeamsUpper=*/F->arg_begin(),
4484                                            /*ThreadLimit=*/nullptr,
4485                                            /*IfExpr=*/nullptr));
4486 
4487   Builder.CreateRetVoid();
4488   OMPBuilder.finalize();
4489 
4490   ASSERT_FALSE(verifyModule(*M));
4491 
4492   CallInst *PushNumTeamsCallInst =
4493       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4494   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4495 
4496   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), &*F->arg_begin());
4497   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), &*F->arg_begin());
4498   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4499 
4500   // Verifying that the next instruction to execute is kmpc_fork_teams
4501   BranchInst *BrInst =
4502       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4503   ASSERT_NE(BrInst, nullptr);
4504   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4505   Instruction *NextInstruction =
4506       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4507   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4508   ASSERT_NE(ForkTeamsCI, nullptr);
4509   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4510             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4511 }
4512 
4513 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) {
4514   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4515   OpenMPIRBuilder OMPBuilder(*M);
4516   OMPBuilder.Config.IsTargetDevice = false;
4517   OMPBuilder.initialize();
4518   F->setName("func");
4519   IRBuilder<> &Builder = OMPBuilder.Builder;
4520   Builder.SetInsertPoint(BB);
4521 
4522   Function *FakeFunction =
4523       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4524                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4525 
4526   Value *NumTeamsLower =
4527       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4528   Value *NumTeamsUpper =
4529       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4530 
4531   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4532     Builder.restoreIP(CodeGenIP);
4533     Builder.CreateCall(FakeFunction, {});
4534   };
4535 
4536   // `F` already has an integer argument, so we use that as upper bound to
4537   // `num_teams`
4538   Builder.restoreIP(
4539       OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper,
4540                              /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
4541 
4542   Builder.CreateRetVoid();
4543   OMPBuilder.finalize();
4544 
4545   ASSERT_FALSE(verifyModule(*M));
4546 
4547   CallInst *PushNumTeamsCallInst =
4548       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4549   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4550 
4551   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4552   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4553   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), Builder.getInt32(0));
4554 
4555   // Verifying that the next instruction to execute is kmpc_fork_teams
4556   BranchInst *BrInst =
4557       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4558   ASSERT_NE(BrInst, nullptr);
4559   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4560   Instruction *NextInstruction =
4561       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4562   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4563   ASSERT_NE(ForkTeamsCI, nullptr);
4564   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4565             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4566 }
4567 
4568 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) {
4569   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4570   OpenMPIRBuilder OMPBuilder(*M);
4571   OMPBuilder.Config.IsTargetDevice = false;
4572   OMPBuilder.initialize();
4573   F->setName("func");
4574   IRBuilder<> &Builder = OMPBuilder.Builder;
4575   Builder.SetInsertPoint(BB);
4576 
4577   BasicBlock *CodegenBB = splitBB(Builder, true);
4578   Builder.SetInsertPoint(CodegenBB);
4579 
4580   // Generate values for `num_teams` and `thread_limit` using the first argument
4581   // of the testing function.
4582   Value *NumTeamsLower =
4583       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5), "numTeamsLower");
4584   Value *NumTeamsUpper =
4585       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
4586   Value *ThreadLimit =
4587       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20), "threadLimit");
4588 
4589   Function *FakeFunction =
4590       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4591                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4592 
4593   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4594     Builder.restoreIP(CodeGenIP);
4595     Builder.CreateCall(FakeFunction, {});
4596   };
4597 
4598   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4599   Builder.restoreIP(OMPBuilder.createTeams(
4600       Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr));
4601 
4602   Builder.CreateRetVoid();
4603   OMPBuilder.finalize();
4604 
4605   ASSERT_FALSE(verifyModule(*M));
4606 
4607   CallInst *PushNumTeamsCallInst =
4608       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4609   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4610 
4611   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(2), NumTeamsLower);
4612   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(3), NumTeamsUpper);
4613   EXPECT_EQ(PushNumTeamsCallInst->getArgOperand(4), ThreadLimit);
4614 
4615   // Verifying that the next instruction to execute is kmpc_fork_teams
4616   BranchInst *BrInst =
4617       dyn_cast<BranchInst>(PushNumTeamsCallInst->getNextNonDebugInstruction());
4618   ASSERT_NE(BrInst, nullptr);
4619   ASSERT_EQ(BrInst->getNumSuccessors(), 1U);
4620   Instruction *NextInstruction =
4621       BrInst->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime();
4622   CallInst *ForkTeamsCI = dyn_cast_if_present<CallInst>(NextInstruction);
4623   ASSERT_NE(ForkTeamsCI, nullptr);
4624   EXPECT_EQ(ForkTeamsCI->getCalledFunction(),
4625             OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_teams));
4626 }
4627 
4628 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) {
4629   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4630   OpenMPIRBuilder OMPBuilder(*M);
4631   OMPBuilder.Config.IsTargetDevice = false;
4632   OMPBuilder.initialize();
4633   F->setName("func");
4634   IRBuilder<> &Builder = OMPBuilder.Builder;
4635   Builder.SetInsertPoint(BB);
4636 
4637   Value *IfExpr = Builder.CreateLoad(Builder.getInt1Ty(),
4638                                      Builder.CreateAlloca(Builder.getInt1Ty()));
4639 
4640   Function *FakeFunction =
4641       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4642                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4643 
4644   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4645     Builder.restoreIP(CodeGenIP);
4646     Builder.CreateCall(FakeFunction, {});
4647   };
4648 
4649   // `F` already has an integer argument, so we use that as upper bound to
4650   // `num_teams`
4651   Builder.restoreIP(OMPBuilder.createTeams(
4652       Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr,
4653       /*ThreadLimit=*/nullptr, IfExpr));
4654 
4655   Builder.CreateRetVoid();
4656   OMPBuilder.finalize();
4657 
4658   ASSERT_FALSE(verifyModule(*M));
4659 
4660   CallInst *PushNumTeamsCallInst =
4661       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4662   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4663   Value *NumTeamsLower = PushNumTeamsCallInst->getArgOperand(2);
4664   Value *NumTeamsUpper = PushNumTeamsCallInst->getArgOperand(3);
4665   Value *ThreadLimit = PushNumTeamsCallInst->getArgOperand(4);
4666 
4667   // Check the lower_bound
4668   ASSERT_NE(NumTeamsLower, nullptr);
4669   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLower);
4670   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4671   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExpr);
4672   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), Builder.getInt32(0));
4673   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4674 
4675   // Check the upper_bound
4676   ASSERT_NE(NumTeamsUpper, nullptr);
4677   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpper);
4678   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4679   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExpr);
4680   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), Builder.getInt32(0));
4681   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4682 
4683   // Check thread_limit
4684   EXPECT_EQ(ThreadLimit, Builder.getInt32(0));
4685 }
4686 
4687 TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) {
4688   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4689   OpenMPIRBuilder OMPBuilder(*M);
4690   OMPBuilder.Config.IsTargetDevice = false;
4691   OMPBuilder.initialize();
4692   F->setName("func");
4693   IRBuilder<> &Builder = OMPBuilder.Builder;
4694   Builder.SetInsertPoint(BB);
4695 
4696   Value *IfExpr = Builder.CreateLoad(
4697       Builder.getInt32Ty(), Builder.CreateAlloca(Builder.getInt32Ty()));
4698   Value *NumTeamsLower = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(5));
4699   Value *NumTeamsUpper =
4700       Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10));
4701   Value *ThreadLimit = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(20));
4702 
4703   Function *FakeFunction =
4704       Function::Create(FunctionType::get(Builder.getVoidTy(), false),
4705                        GlobalValue::ExternalLinkage, "fakeFunction", M.get());
4706 
4707   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4708     Builder.restoreIP(CodeGenIP);
4709     Builder.CreateCall(FakeFunction, {});
4710   };
4711 
4712   // `F` already has an integer argument, so we use that as upper bound to
4713   // `num_teams`
4714   Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
4715                                            NumTeamsUpper, ThreadLimit, IfExpr));
4716 
4717   Builder.CreateRetVoid();
4718   OMPBuilder.finalize();
4719 
4720   ASSERT_FALSE(verifyModule(*M));
4721 
4722   CallInst *PushNumTeamsCallInst =
4723       findSingleCall(F, OMPRTL___kmpc_push_num_teams_51, OMPBuilder);
4724   ASSERT_NE(PushNumTeamsCallInst, nullptr);
4725   Value *NumTeamsLowerArg = PushNumTeamsCallInst->getArgOperand(2);
4726   Value *NumTeamsUpperArg = PushNumTeamsCallInst->getArgOperand(3);
4727   Value *ThreadLimitArg = PushNumTeamsCallInst->getArgOperand(4);
4728 
4729   // Get the boolean conversion of if expression
4730   ASSERT_EQ(IfExpr->getNumUses(), 1U);
4731   User *IfExprInst = IfExpr->user_back();
4732   ICmpInst *IfExprCmpInst = dyn_cast<ICmpInst>(IfExprInst);
4733   ASSERT_NE(IfExprCmpInst, nullptr);
4734   EXPECT_EQ(IfExprCmpInst->getPredicate(), ICmpInst::Predicate::ICMP_NE);
4735   EXPECT_EQ(IfExprCmpInst->getOperand(0), IfExpr);
4736   EXPECT_EQ(IfExprCmpInst->getOperand(1), Builder.getInt32(0));
4737 
4738   // Check the lower_bound
4739   ASSERT_NE(NumTeamsLowerArg, nullptr);
4740   SelectInst *NumTeamsLowerSelectInst = dyn_cast<SelectInst>(NumTeamsLowerArg);
4741   ASSERT_NE(NumTeamsLowerSelectInst, nullptr);
4742   EXPECT_EQ(NumTeamsLowerSelectInst->getCondition(), IfExprCmpInst);
4743   EXPECT_EQ(NumTeamsLowerSelectInst->getTrueValue(), NumTeamsLower);
4744   EXPECT_EQ(NumTeamsLowerSelectInst->getFalseValue(), Builder.getInt32(1));
4745 
4746   // Check the upper_bound
4747   ASSERT_NE(NumTeamsUpperArg, nullptr);
4748   SelectInst *NumTeamsUpperSelectInst = dyn_cast<SelectInst>(NumTeamsUpperArg);
4749   ASSERT_NE(NumTeamsUpperSelectInst, nullptr);
4750   EXPECT_EQ(NumTeamsUpperSelectInst->getCondition(), IfExprCmpInst);
4751   EXPECT_EQ(NumTeamsUpperSelectInst->getTrueValue(), NumTeamsUpper);
4752   EXPECT_EQ(NumTeamsUpperSelectInst->getFalseValue(), Builder.getInt32(1));
4753 
4754   // Check thread_limit
4755   EXPECT_EQ(ThreadLimitArg, ThreadLimit);
4756 }
4757 
4758 /// Returns the single instruction of InstTy type in BB that uses the value V.
4759 /// If there is more than one such instruction, returns null.
4760 template <typename InstTy>
4761 static InstTy *findSingleUserInBlock(Value *V, BasicBlock *BB) {
4762   InstTy *Result = nullptr;
4763   for (User *U : V->users()) {
4764     auto *Inst = dyn_cast<InstTy>(U);
4765     if (!Inst || Inst->getParent() != BB)
4766       continue;
4767     if (Result) {
4768       if (auto *SI = dyn_cast<StoreInst>(Inst)) {
4769         if (V == SI->getValueOperand())
4770           continue;
4771       } else {
4772         return nullptr;
4773       }
4774     }
4775     Result = Inst;
4776   }
4777   return Result;
4778 }
4779 
4780 /// Returns true if BB contains a simple binary reduction that loads a value
4781 /// from Accum, performs some binary operation with it, and stores it back to
4782 /// Accum.
4783 static bool isSimpleBinaryReduction(Value *Accum, BasicBlock *BB,
4784                                     Instruction::BinaryOps *OpCode = nullptr) {
4785   StoreInst *Store = findSingleUserInBlock<StoreInst>(Accum, BB);
4786   if (!Store)
4787     return false;
4788   auto *Stored = dyn_cast<BinaryOperator>(Store->getOperand(0));
4789   if (!Stored)
4790     return false;
4791   if (OpCode && *OpCode != Stored->getOpcode())
4792     return false;
4793   auto *Load = dyn_cast<LoadInst>(Stored->getOperand(0));
4794   return Load && Load->getOperand(0) == Accum;
4795 }
4796 
4797 /// Returns true if BB contains a binary reduction that reduces V using a binary
4798 /// operator into an accumulator that is a function argument.
4799 static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
4800   auto *ReductionOp = findSingleUserInBlock<BinaryOperator>(V, BB);
4801   if (!ReductionOp)
4802     return false;
4803 
4804   auto *GlobalLoad = dyn_cast<LoadInst>(ReductionOp->getOperand(0));
4805   if (!GlobalLoad)
4806     return false;
4807 
4808   auto *Store = findSingleUserInBlock<StoreInst>(ReductionOp, BB);
4809   if (!Store)
4810     return false;
4811 
4812   return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
4813          isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand()));
4814 }
4815 
4816 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
4817 /// [0, 1], respectively, and assigns results of these instructions to Zero and
4818 /// One. Returns true on success, false on failure or if such instructions are
4819 /// not unique among the users of Ptr.
4820 static bool findGEPZeroOne(Value *Ptr, Value *&Zero, Value *&One) {
4821   Zero = nullptr;
4822   One = nullptr;
4823   for (User *U : Ptr->users()) {
4824     if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
4825       if (GEP->getNumIndices() != 2)
4826         continue;
4827       auto *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
4828       auto *SecondIdx = dyn_cast<ConstantInt>(GEP->getOperand(2));
4829       EXPECT_NE(FirstIdx, nullptr);
4830       EXPECT_NE(SecondIdx, nullptr);
4831 
4832       EXPECT_TRUE(FirstIdx->isZero());
4833       if (SecondIdx->isZero()) {
4834         if (Zero)
4835           return false;
4836         Zero = GEP;
4837       } else if (SecondIdx->isOne()) {
4838         if (One)
4839           return false;
4840         One = GEP;
4841       } else {
4842         return false;
4843       }
4844     }
4845   }
4846   return Zero != nullptr && One != nullptr;
4847 }
4848 
4849 static OpenMPIRBuilder::InsertPointTy
4850 sumReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
4851              Value *&Result) {
4852   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
4853   Result = Builder.CreateFAdd(LHS, RHS, "red.add");
4854   return Builder.saveIP();
4855 }
4856 
4857 static OpenMPIRBuilder::InsertPointTy
4858 sumAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
4859                    Value *RHS) {
4860   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
4861   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
4862   Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, std::nullopt,
4863                           AtomicOrdering::Monotonic);
4864   return Builder.saveIP();
4865 }
4866 
4867 static OpenMPIRBuilder::InsertPointTy
4868 xorReduction(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
4869              Value *&Result) {
4870   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
4871   Result = Builder.CreateXor(LHS, RHS, "red.xor");
4872   return Builder.saveIP();
4873 }
4874 
4875 static OpenMPIRBuilder::InsertPointTy
4876 xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS,
4877                    Value *RHS) {
4878   IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
4879   Value *Partial = Builder.CreateLoad(Ty, RHS, "red.partial");
4880   Builder.CreateAtomicRMW(AtomicRMWInst::Xor, LHS, Partial, std::nullopt,
4881                           AtomicOrdering::Monotonic);
4882   return Builder.saveIP();
4883 }
4884 
4885 TEST_F(OpenMPIRBuilderTest, CreateReductions) {
4886   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
4887   OpenMPIRBuilder OMPBuilder(*M);
4888   OMPBuilder.Config.IsTargetDevice = false;
4889   OMPBuilder.initialize();
4890   F->setName("func");
4891   IRBuilder<> Builder(BB);
4892 
4893   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
4894   Builder.CreateBr(EnterBB);
4895   Builder.SetInsertPoint(EnterBB);
4896   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
4897 
4898   // Create variables to be reduced.
4899   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
4900                               F->getEntryBlock().getFirstInsertionPt());
4901   Type *SumType = Builder.getFloatTy();
4902   Type *XorType = Builder.getInt32Ty();
4903   Value *SumReduced;
4904   Value *XorReduced;
4905   {
4906     IRBuilderBase::InsertPointGuard Guard(Builder);
4907     Builder.restoreIP(OuterAllocaIP);
4908     SumReduced = Builder.CreateAlloca(SumType);
4909     XorReduced = Builder.CreateAlloca(XorType);
4910   }
4911 
4912   // Store initial values of reductions into global variables.
4913   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
4914   Builder.CreateStore(Builder.getInt32(1), XorReduced);
4915 
4916   // The loop body computes two reductions:
4917   //   sum of (float) thread-id;
4918   //   xor of thread-id;
4919   // and store the result in global variables.
4920   InsertPointTy BodyIP, BodyAllocaIP;
4921   auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) {
4922     IRBuilderBase::InsertPointGuard Guard(Builder);
4923     Builder.restoreIP(CodeGenIP);
4924 
4925     uint32_t StrSize;
4926     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
4927     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
4928     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
4929     Value *SumLocal =
4930         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
4931     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
4932     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
4933     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
4934     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
4935     Builder.CreateStore(Sum, SumReduced);
4936     Builder.CreateStore(Xor, XorReduced);
4937 
4938     BodyIP = Builder.saveIP();
4939     BodyAllocaIP = InnerAllocaIP;
4940   };
4941 
4942   // Privatization for reduction creates local copies of reduction variables and
4943   // initializes them to reduction-neutral values.
4944   Value *SumPrivatized;
4945   Value *XorPrivatized;
4946   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
4947                     Value &Original, Value &Inner, Value *&ReplVal) {
4948     IRBuilderBase::InsertPointGuard Guard(Builder);
4949     Builder.restoreIP(InnerAllocaIP);
4950     if (&Original == SumReduced) {
4951       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
4952       ReplVal = SumPrivatized;
4953     } else if (&Original == XorReduced) {
4954       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
4955       ReplVal = XorPrivatized;
4956     } else {
4957       ReplVal = &Inner;
4958       return CodeGenIP;
4959     }
4960 
4961     Builder.restoreIP(CodeGenIP);
4962     if (&Original == SumReduced)
4963       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
4964                           SumPrivatized);
4965     else if (&Original == XorReduced)
4966       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
4967 
4968     return Builder.saveIP();
4969   };
4970 
4971   // Do nothing in finalization.
4972   auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
4973 
4974   InsertPointTy AfterIP =
4975       OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
4976                                 /* IfCondition */ nullptr,
4977                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
4978                                 /* IsCancellable */ false);
4979   Builder.restoreIP(AfterIP);
4980 
4981   OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
4982       {SumType, SumReduced, SumPrivatized,
4983        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
4984        /*ReductionGenClang=*/nullptr, sumAtomicReduction},
4985       {XorType, XorReduced, XorPrivatized,
4986        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
4987        /*ReductionGenClang=*/nullptr, xorAtomicReduction}};
4988   OMPBuilder.Config.setIsGPU(false);
4989 
4990   bool ReduceVariableByRef[] = {false, false};
4991 
4992   OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos,
4993                               ReduceVariableByRef);
4994 
4995   Builder.restoreIP(AfterIP);
4996   Builder.CreateRetVoid();
4997 
4998   OMPBuilder.finalize(F);
4999 
5000   // The IR must be valid.
5001   EXPECT_FALSE(verifyModule(*M));
5002 
5003   // Outlining must have happened.
5004   SmallVector<CallInst *> ForkCalls;
5005   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5006             ForkCalls);
5007   ASSERT_EQ(ForkCalls.size(), 1u);
5008   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5009   Function *Outlined = dyn_cast<Function>(CalleeVal);
5010   EXPECT_NE(Outlined, nullptr);
5011 
5012   // Check that the lock variable was created with the expected name.
5013   GlobalVariable *LockVar =
5014       M->getGlobalVariable(".gomp_critical_user_.reduction.var");
5015   EXPECT_NE(LockVar, nullptr);
5016 
5017   // Find the allocation of a local array that will be used to call the runtime
5018   // reduciton function.
5019   BasicBlock &AllocBlock = Outlined->getEntryBlock();
5020   Value *LocalArray = nullptr;
5021   for (Instruction &I : AllocBlock) {
5022     if (AllocaInst *Alloc = dyn_cast<AllocaInst>(&I)) {
5023       if (!Alloc->getAllocatedType()->isArrayTy() ||
5024           !Alloc->getAllocatedType()->getArrayElementType()->isPointerTy())
5025         continue;
5026       LocalArray = Alloc;
5027       break;
5028     }
5029   }
5030   ASSERT_NE(LocalArray, nullptr);
5031 
5032   // Find the call to the runtime reduction function.
5033   BasicBlock *BB = AllocBlock.getUniqueSuccessor();
5034   Value *LocalArrayPtr = nullptr;
5035   Value *ReductionFnVal = nullptr;
5036   Value *SwitchArg = nullptr;
5037   for (Instruction &I : *BB) {
5038     if (CallInst *Call = dyn_cast<CallInst>(&I)) {
5039       if (Call->getCalledFunction() !=
5040           OMPBuilder.getOrCreateRuntimeFunctionPtr(
5041               RuntimeFunction::OMPRTL___kmpc_reduce))
5042         continue;
5043       LocalArrayPtr = Call->getOperand(4);
5044       ReductionFnVal = Call->getOperand(5);
5045       SwitchArg = Call;
5046       break;
5047     }
5048   }
5049 
5050   // Check that the local array is passed to the function.
5051   ASSERT_NE(LocalArrayPtr, nullptr);
5052   EXPECT_EQ(LocalArrayPtr, LocalArray);
5053 
5054   // Find the GEP instructions preceding stores to the local array.
5055   Value *FirstArrayElemPtr = nullptr;
5056   Value *SecondArrayElemPtr = nullptr;
5057   EXPECT_EQ(LocalArray->getNumUses(), 3u);
5058   ASSERT_TRUE(
5059       findGEPZeroOne(LocalArray, FirstArrayElemPtr, SecondArrayElemPtr));
5060 
5061   // Check that the values stored into the local array are privatized reduction
5062   // variables.
5063   auto *FirstPrivatized = dyn_cast_or_null<AllocaInst>(
5064       findStoredValue<GetElementPtrInst>(FirstArrayElemPtr));
5065   auto *SecondPrivatized = dyn_cast_or_null<AllocaInst>(
5066       findStoredValue<GetElementPtrInst>(SecondArrayElemPtr));
5067   ASSERT_NE(FirstPrivatized, nullptr);
5068   ASSERT_NE(SecondPrivatized, nullptr);
5069   ASSERT_TRUE(isa<Instruction>(FirstArrayElemPtr));
5070   EXPECT_TRUE(isSimpleBinaryReduction(
5071       FirstPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5072   EXPECT_TRUE(isSimpleBinaryReduction(
5073       SecondPrivatized, cast<Instruction>(FirstArrayElemPtr)->getParent()));
5074 
5075   // Check that the result of the runtime reduction call is used for further
5076   // dispatch.
5077   ASSERT_EQ(SwitchArg->getNumUses(), 1u);
5078   SwitchInst *Switch = dyn_cast<SwitchInst>(*SwitchArg->user_begin());
5079   ASSERT_NE(Switch, nullptr);
5080   EXPECT_EQ(Switch->getNumSuccessors(), 3u);
5081   BasicBlock *NonAtomicBB = Switch->case_begin()->getCaseSuccessor();
5082   BasicBlock *AtomicBB = std::next(Switch->case_begin())->getCaseSuccessor();
5083 
5084   // Non-atomic block contains reductions to the global reduction variable,
5085   // which is passed into the outlined function as an argument.
5086   Value *FirstLoad =
5087       findSingleUserInBlock<LoadInst>(FirstPrivatized, NonAtomicBB);
5088   Value *SecondLoad =
5089       findSingleUserInBlock<LoadInst>(SecondPrivatized, NonAtomicBB);
5090   EXPECT_TRUE(isValueReducedToFuncArg(FirstLoad, NonAtomicBB));
5091   EXPECT_TRUE(isValueReducedToFuncArg(SecondLoad, NonAtomicBB));
5092 
5093   // Atomic block also constains reductions to the global reduction variable.
5094   FirstLoad = findSingleUserInBlock<LoadInst>(FirstPrivatized, AtomicBB);
5095   SecondLoad = findSingleUserInBlock<LoadInst>(SecondPrivatized, AtomicBB);
5096   auto *FirstAtomic = findSingleUserInBlock<AtomicRMWInst>(FirstLoad, AtomicBB);
5097   auto *SecondAtomic =
5098       findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
5099   ASSERT_NE(FirstAtomic, nullptr);
5100   Value *AtomicStorePointer = FirstAtomic->getPointerOperand();
5101   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5102   ASSERT_NE(SecondAtomic, nullptr);
5103   AtomicStorePointer = SecondAtomic->getPointerOperand();
5104   EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
5105 
5106   // Check that the separate reduction function also performs (non-atomic)
5107   // reductions after extracting reduction variables from its arguments.
5108   Function *ReductionFn = cast<Function>(ReductionFnVal);
5109   BasicBlock *FnReductionBB = &ReductionFn->getEntryBlock();
5110   Value *FirstLHSPtr;
5111   Value *SecondLHSPtr;
5112   ASSERT_TRUE(
5113       findGEPZeroOne(ReductionFn->getArg(0), FirstLHSPtr, SecondLHSPtr));
5114   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5115   ASSERT_NE(Opaque, nullptr);
5116   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5117   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5118   ASSERT_NE(Opaque, nullptr);
5119   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB));
5120 
5121   Value *FirstRHS;
5122   Value *SecondRHS;
5123   EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
5124 }
5125 
5126 TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
5127   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5128   OpenMPIRBuilder OMPBuilder(*M);
5129   OMPBuilder.Config.IsTargetDevice = false;
5130   OMPBuilder.initialize();
5131   F->setName("func");
5132   IRBuilder<> Builder(BB);
5133 
5134   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
5135   Builder.CreateBr(EnterBB);
5136   Builder.SetInsertPoint(EnterBB);
5137   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5138 
5139   // Create variables to be reduced.
5140   InsertPointTy OuterAllocaIP(&F->getEntryBlock(),
5141                               F->getEntryBlock().getFirstInsertionPt());
5142   Type *SumType = Builder.getFloatTy();
5143   Type *XorType = Builder.getInt32Ty();
5144   Value *SumReduced;
5145   Value *XorReduced;
5146   {
5147     IRBuilderBase::InsertPointGuard Guard(Builder);
5148     Builder.restoreIP(OuterAllocaIP);
5149     SumReduced = Builder.CreateAlloca(SumType);
5150     XorReduced = Builder.CreateAlloca(XorType);
5151   }
5152 
5153   // Store initial values of reductions into global variables.
5154   Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0), SumReduced);
5155   Builder.CreateStore(Builder.getInt32(1), XorReduced);
5156 
5157   InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
5158   auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5159                             InsertPointTy CodeGenIP) {
5160     IRBuilderBase::InsertPointGuard Guard(Builder);
5161     Builder.restoreIP(CodeGenIP);
5162 
5163     uint32_t StrSize;
5164     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5165     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5166     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5167     Value *SumLocal =
5168         Builder.CreateUIToFP(TID, Builder.getFloatTy(), "sum.local");
5169     Value *SumPartial = Builder.CreateLoad(SumType, SumReduced, "sum.partial");
5170     Value *Sum = Builder.CreateFAdd(SumPartial, SumLocal, "sum");
5171     Builder.CreateStore(Sum, SumReduced);
5172 
5173     FirstBodyIP = Builder.saveIP();
5174     FirstBodyAllocaIP = InnerAllocaIP;
5175   };
5176 
5177   InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
5178   auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
5179                              InsertPointTy CodeGenIP) {
5180     IRBuilderBase::InsertPointGuard Guard(Builder);
5181     Builder.restoreIP(CodeGenIP);
5182 
5183     uint32_t StrSize;
5184     Constant *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(Loc, StrSize);
5185     Value *Ident = OMPBuilder.getOrCreateIdent(SrcLocStr, StrSize);
5186     Value *TID = OMPBuilder.getOrCreateThreadID(Ident);
5187     Value *XorPartial = Builder.CreateLoad(XorType, XorReduced, "xor.partial");
5188     Value *Xor = Builder.CreateXor(XorPartial, TID, "xor");
5189     Builder.CreateStore(Xor, XorReduced);
5190 
5191     SecondBodyIP = Builder.saveIP();
5192     SecondBodyAllocaIP = InnerAllocaIP;
5193   };
5194 
5195   // Privatization for reduction creates local copies of reduction variables and
5196   // initializes them to reduction-neutral values. The same privatization
5197   // callback is used for both loops, with dispatch based on the value being
5198   // privatized.
5199   Value *SumPrivatized;
5200   Value *XorPrivatized;
5201   auto PrivCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP,
5202                     Value &Original, Value &Inner, Value *&ReplVal) {
5203     IRBuilderBase::InsertPointGuard Guard(Builder);
5204     Builder.restoreIP(InnerAllocaIP);
5205     if (&Original == SumReduced) {
5206       SumPrivatized = Builder.CreateAlloca(Builder.getFloatTy());
5207       ReplVal = SumPrivatized;
5208     } else if (&Original == XorReduced) {
5209       XorPrivatized = Builder.CreateAlloca(Builder.getInt32Ty());
5210       ReplVal = XorPrivatized;
5211     } else {
5212       ReplVal = &Inner;
5213       return CodeGenIP;
5214     }
5215 
5216     Builder.restoreIP(CodeGenIP);
5217     if (&Original == SumReduced)
5218       Builder.CreateStore(ConstantFP::get(Builder.getFloatTy(), 0.0),
5219                           SumPrivatized);
5220     else if (&Original == XorReduced)
5221       Builder.CreateStore(Builder.getInt32(0), XorPrivatized);
5222 
5223     return Builder.saveIP();
5224   };
5225 
5226   // Do nothing in finalization.
5227   auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
5228 
5229   Builder.restoreIP(
5230       OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
5231                                 FiniCB, /* IfCondition */ nullptr,
5232                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5233                                 /* IsCancellable */ false));
5234   InsertPointTy AfterIP = OMPBuilder.createParallel(
5235       {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB,
5236       /* IfCondition */ nullptr,
5237       /* NumThreads */ nullptr, OMP_PROC_BIND_default,
5238       /* IsCancellable */ false);
5239 
5240   OMPBuilder.Config.setIsGPU(false);
5241   bool ReduceVariableByRef[] = {false};
5242 
5243   OMPBuilder.createReductions(
5244       FirstBodyIP, FirstBodyAllocaIP,
5245       {{SumType, SumReduced, SumPrivatized,
5246         /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
5247         /*ReductionGenClang=*/nullptr, sumAtomicReduction}},
5248       ReduceVariableByRef);
5249   OMPBuilder.createReductions(
5250       SecondBodyIP, SecondBodyAllocaIP,
5251       {{XorType, XorReduced, XorPrivatized,
5252         /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
5253         /*ReductionGenClang=*/nullptr, xorAtomicReduction}},
5254       ReduceVariableByRef);
5255 
5256   Builder.restoreIP(AfterIP);
5257   Builder.CreateRetVoid();
5258 
5259   OMPBuilder.finalize(F);
5260 
5261   // The IR must be valid.
5262   EXPECT_FALSE(verifyModule(*M));
5263 
5264   // Two different outlined functions must have been created.
5265   SmallVector<CallInst *> ForkCalls;
5266   findCalls(F, omp::RuntimeFunction::OMPRTL___kmpc_fork_call, OMPBuilder,
5267             ForkCalls);
5268   ASSERT_EQ(ForkCalls.size(), 2u);
5269   Value *CalleeVal = ForkCalls[0]->getOperand(2);
5270   Function *FirstCallee = cast<Function>(CalleeVal);
5271   CalleeVal = ForkCalls[1]->getOperand(2);
5272   Function *SecondCallee = cast<Function>(CalleeVal);
5273   EXPECT_NE(FirstCallee, SecondCallee);
5274 
5275   // Two different reduction functions must have been created.
5276   SmallVector<CallInst *> ReduceCalls;
5277   findCalls(FirstCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce, OMPBuilder,
5278             ReduceCalls);
5279   ASSERT_EQ(ReduceCalls.size(), 1u);
5280   auto *AddReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5281   ReduceCalls.clear();
5282   findCalls(SecondCallee, omp::RuntimeFunction::OMPRTL___kmpc_reduce,
5283             OMPBuilder, ReduceCalls);
5284   auto *XorReduction = cast<Function>(ReduceCalls[0]->getOperand(5));
5285   EXPECT_NE(AddReduction, XorReduction);
5286 
5287   // Each reduction function does its own kind of reduction.
5288   BasicBlock *FnReductionBB = &AddReduction->getEntryBlock();
5289   Value *FirstLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5290       AddReduction->getArg(0), FnReductionBB);
5291   ASSERT_NE(FirstLHSPtr, nullptr);
5292   Value *Opaque = findSingleUserInBlock<LoadInst>(FirstLHSPtr, FnReductionBB);
5293   ASSERT_NE(Opaque, nullptr);
5294   Instruction::BinaryOps Opcode = Instruction::FAdd;
5295   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5296 
5297   FnReductionBB = &XorReduction->getEntryBlock();
5298   Value *SecondLHSPtr = findSingleUserInBlock<GetElementPtrInst>(
5299       XorReduction->getArg(0), FnReductionBB);
5300   ASSERT_NE(FirstLHSPtr, nullptr);
5301   Opaque = findSingleUserInBlock<LoadInst>(SecondLHSPtr, FnReductionBB);
5302   ASSERT_NE(Opaque, nullptr);
5303   Opcode = Instruction::Xor;
5304   EXPECT_TRUE(isSimpleBinaryReduction(Opaque, FnReductionBB, &Opcode));
5305 }
5306 
5307 TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
5308   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5309   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5310   OpenMPIRBuilder OMPBuilder(*M);
5311   OMPBuilder.initialize();
5312   F->setName("func");
5313   IRBuilder<> Builder(BB);
5314 
5315   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5316   Builder.CreateBr(EnterBB);
5317   Builder.SetInsertPoint(EnterBB);
5318   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5319 
5320   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5321   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5322 
5323   auto FiniCB = [&](InsertPointTy IP) {};
5324   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
5325   SectionCBVector.push_back(SectionCB);
5326 
5327   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5328                    llvm::Value &, llvm::Value &Val,
5329                    llvm::Value *&ReplVal) { return CodeGenIP; };
5330   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5331                                     F->getEntryBlock().getFirstInsertionPt());
5332   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5333                                               PrivCB, FiniCB, false, false));
5334   Builder.CreateRetVoid(); // Required at the end of the function
5335   EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr);
5336   EXPECT_FALSE(verifyModule(*M, &errs()));
5337 }
5338 
5339 TEST_F(OpenMPIRBuilderTest, CreateSections) {
5340   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5341   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5342   OpenMPIRBuilder OMPBuilder(*M);
5343   OMPBuilder.initialize();
5344   F->setName("func");
5345   IRBuilder<> Builder(BB);
5346 
5347   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5348   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5349   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
5350 
5351   BasicBlock *SwitchBB = nullptr;
5352   AllocaInst *PrivAI = nullptr;
5353   SwitchInst *Switch = nullptr;
5354 
5355   unsigned NumBodiesGenerated = 0;
5356   unsigned NumFiniCBCalls = 0;
5357   PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
5358 
5359   auto FiniCB = [&](InsertPointTy IP) {
5360     ++NumFiniCBCalls;
5361     BasicBlock *IPBB = IP.getBlock();
5362     EXPECT_NE(IPBB->end(), IP.getPoint());
5363   };
5364 
5365   auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
5366     ++NumBodiesGenerated;
5367     CaseBBs.push_back(CodeGenIP.getBlock());
5368     SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
5369     Builder.restoreIP(CodeGenIP);
5370     Builder.CreateStore(F->arg_begin(), PrivAI);
5371     Value *PrivLoad =
5372         Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca");
5373     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
5374   };
5375   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5376                    llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
5377     // TODO: Privatization not implemented yet
5378     return CodeGenIP;
5379   };
5380 
5381   SectionCBVector.push_back(SectionCB);
5382   SectionCBVector.push_back(SectionCB);
5383 
5384   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5385                                     F->getEntryBlock().getFirstInsertionPt());
5386   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5387                                               PrivCB, FiniCB, false, false));
5388   Builder.CreateRetVoid(); // Required at the end of the function
5389 
5390   // Switch BB's predecessor is loop condition BB, whose successor at index 1 is
5391   // loop's exit BB
5392   BasicBlock *ForExitBB =
5393       SwitchBB->getSinglePredecessor()->getTerminator()->getSuccessor(1);
5394   EXPECT_NE(ForExitBB, nullptr);
5395 
5396   EXPECT_NE(PrivAI, nullptr);
5397   Function *OutlinedFn = PrivAI->getFunction();
5398   EXPECT_EQ(F, OutlinedFn);
5399   EXPECT_FALSE(verifyModule(*M, &errs()));
5400   EXPECT_EQ(OutlinedFn->arg_size(), 1U);
5401 
5402   BasicBlock *LoopPreheaderBB =
5403       OutlinedFn->getEntryBlock().getSingleSuccessor();
5404   // loop variables are 5 - lower bound, upper bound, stride, islastiter, and
5405   // iterator/counter
5406   bool FoundForInit = false;
5407   for (Instruction &Inst : *LoopPreheaderBB) {
5408     if (isa<CallInst>(Inst)) {
5409       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5410           "__kmpc_for_static_init_4u") {
5411         FoundForInit = true;
5412       }
5413     }
5414   }
5415   EXPECT_EQ(FoundForInit, true);
5416 
5417   bool FoundForExit = false;
5418   bool FoundBarrier = false;
5419   for (Instruction &Inst : *ForExitBB) {
5420     if (isa<CallInst>(Inst)) {
5421       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5422           "__kmpc_for_static_fini") {
5423         FoundForExit = true;
5424       }
5425       if (cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5426           "__kmpc_barrier") {
5427         FoundBarrier = true;
5428       }
5429       if (FoundForExit && FoundBarrier)
5430         break;
5431     }
5432   }
5433   EXPECT_EQ(FoundForExit, true);
5434   EXPECT_EQ(FoundBarrier, true);
5435 
5436   EXPECT_NE(SwitchBB, nullptr);
5437   EXPECT_NE(SwitchBB->getTerminator(), nullptr);
5438   EXPECT_EQ(isa<SwitchInst>(SwitchBB->getTerminator()), true);
5439   Switch = cast<SwitchInst>(SwitchBB->getTerminator());
5440   EXPECT_EQ(Switch->getNumCases(), 2U);
5441 
5442   EXPECT_EQ(CaseBBs.size(), 2U);
5443   for (auto *&CaseBB : CaseBBs) {
5444     EXPECT_EQ(CaseBB->getParent(), OutlinedFn);
5445   }
5446 
5447   ASSERT_EQ(NumBodiesGenerated, 2U);
5448   ASSERT_EQ(NumFiniCBCalls, 1U);
5449   EXPECT_FALSE(verifyModule(*M, &errs()));
5450 }
5451 
5452 TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) {
5453   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5454   using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
5455   OpenMPIRBuilder OMPBuilder(*M);
5456   OMPBuilder.initialize();
5457   F->setName("func");
5458   IRBuilder<> Builder(BB);
5459 
5460   BasicBlock *EnterBB = BasicBlock::Create(Ctx, "sections.enter", F);
5461   Builder.CreateBr(EnterBB);
5462   Builder.SetInsertPoint(EnterBB);
5463   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5464 
5465   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5466                                     F->getEntryBlock().getFirstInsertionPt());
5467   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
5468   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
5469                    llvm::Value &, llvm::Value &Val,
5470                    llvm::Value *&ReplVal) { return CodeGenIP; };
5471   auto FiniCB = [&](InsertPointTy IP) {};
5472 
5473   Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
5474                                               PrivCB, FiniCB, false, true));
5475   Builder.CreateRetVoid(); // Required at the end of the function
5476   for (auto &Inst : instructions(*F)) {
5477     EXPECT_FALSE(isa<CallInst>(Inst) &&
5478                  cast<CallInst>(&Inst)->getCalledFunction()->getName() ==
5479                      "__kmpc_barrier" &&
5480                  "call to function __kmpc_barrier found with nowait");
5481   }
5482 }
5483 
5484 TEST_F(OpenMPIRBuilderTest, CreateOffloadMaptypes) {
5485   OpenMPIRBuilder OMPBuilder(*M);
5486   OMPBuilder.initialize();
5487 
5488   IRBuilder<> Builder(BB);
5489 
5490   SmallVector<uint64_t> Mappings = {0, 1};
5491   GlobalVariable *OffloadMaptypesGlobal =
5492       OMPBuilder.createOffloadMaptypes(Mappings, "offload_maptypes");
5493   EXPECT_FALSE(M->global_empty());
5494   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_maptypes");
5495   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5496   EXPECT_TRUE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5497   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5498   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5499   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5500   EXPECT_TRUE(isa<ConstantDataArray>(Initializer));
5501   ConstantDataArray *MappingInit = dyn_cast<ConstantDataArray>(Initializer);
5502   EXPECT_EQ(MappingInit->getNumElements(), Mappings.size());
5503   EXPECT_TRUE(MappingInit->getType()->getElementType()->isIntegerTy(64));
5504   Constant *CA = ConstantDataArray::get(Builder.getContext(), Mappings);
5505   EXPECT_EQ(MappingInit, CA);
5506 }
5507 
5508 TEST_F(OpenMPIRBuilderTest, CreateOffloadMapnames) {
5509   OpenMPIRBuilder OMPBuilder(*M);
5510   OMPBuilder.initialize();
5511 
5512   IRBuilder<> Builder(BB);
5513 
5514   uint32_t StrSize;
5515   Constant *Cst1 =
5516       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5517   Constant *Cst2 =
5518       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5519   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5520 
5521   GlobalVariable *OffloadMaptypesGlobal =
5522       OMPBuilder.createOffloadMapnames(Names, "offload_mapnames");
5523   EXPECT_FALSE(M->global_empty());
5524   EXPECT_EQ(OffloadMaptypesGlobal->getName(), "offload_mapnames");
5525   EXPECT_TRUE(OffloadMaptypesGlobal->isConstant());
5526   EXPECT_FALSE(OffloadMaptypesGlobal->hasGlobalUnnamedAddr());
5527   EXPECT_TRUE(OffloadMaptypesGlobal->hasPrivateLinkage());
5528   EXPECT_TRUE(OffloadMaptypesGlobal->hasInitializer());
5529   Constant *Initializer = OffloadMaptypesGlobal->getInitializer();
5530   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(0)->stripPointerCasts()));
5531   EXPECT_TRUE(isa<Constant>(Initializer->getOperand(1)->stripPointerCasts()));
5532 
5533   GlobalVariable *Name1Gbl =
5534       cast<GlobalVariable>(Initializer->getOperand(0)->stripPointerCasts());
5535   EXPECT_TRUE(isa<ConstantDataArray>(Name1Gbl->getInitializer()));
5536   ConstantDataArray *Name1GblCA =
5537       dyn_cast<ConstantDataArray>(Name1Gbl->getInitializer());
5538   EXPECT_EQ(Name1GblCA->getAsCString(), ";file1;array1;2;5;;");
5539 
5540   GlobalVariable *Name2Gbl =
5541       cast<GlobalVariable>(Initializer->getOperand(1)->stripPointerCasts());
5542   EXPECT_TRUE(isa<ConstantDataArray>(Name2Gbl->getInitializer()));
5543   ConstantDataArray *Name2GblCA =
5544       dyn_cast<ConstantDataArray>(Name2Gbl->getInitializer());
5545   EXPECT_EQ(Name2GblCA->getAsCString(), ";file1;array2;3;5;;");
5546 
5547   EXPECT_TRUE(Initializer->getType()->getArrayElementType()->isPointerTy());
5548   EXPECT_EQ(Initializer->getType()->getArrayNumElements(), Names.size());
5549 }
5550 
5551 TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
5552   OpenMPIRBuilder OMPBuilder(*M);
5553   OMPBuilder.initialize();
5554   F->setName("func");
5555   IRBuilder<> Builder(BB);
5556 
5557   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5558 
5559   unsigned TotalNbOperand = 2;
5560 
5561   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5562   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5563                                     F->getEntryBlock().getFirstInsertionPt());
5564   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5565   EXPECT_NE(MapperAllocas.ArgsBase, nullptr);
5566   EXPECT_NE(MapperAllocas.Args, nullptr);
5567   EXPECT_NE(MapperAllocas.ArgSizes, nullptr);
5568   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()->isArrayTy());
5569   ArrayType *ArrType =
5570       dyn_cast<ArrayType>(MapperAllocas.ArgsBase->getAllocatedType());
5571   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5572   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
5573                   ->getArrayElementType()
5574                   ->isPointerTy());
5575 
5576   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
5577   ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType());
5578   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5579   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
5580                   ->getArrayElementType()
5581                   ->isPointerTy());
5582 
5583   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
5584   ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType());
5585   EXPECT_EQ(ArrType->getNumElements(), TotalNbOperand);
5586   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()
5587                   ->getArrayElementType()
5588                   ->isIntegerTy(64));
5589 }
5590 
5591 TEST_F(OpenMPIRBuilderTest, EmitMapperCall) {
5592   OpenMPIRBuilder OMPBuilder(*M);
5593   OMPBuilder.initialize();
5594   F->setName("func");
5595   IRBuilder<> Builder(BB);
5596   LLVMContext &Ctx = M->getContext();
5597 
5598   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5599 
5600   unsigned TotalNbOperand = 2;
5601 
5602   OpenMPIRBuilder::MapperAllocas MapperAllocas;
5603   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5604                                     F->getEntryBlock().getFirstInsertionPt());
5605   OMPBuilder.createMapperAllocas(Loc, AllocaIP, TotalNbOperand, MapperAllocas);
5606 
5607   auto *BeginMapperFunc = OMPBuilder.getOrCreateRuntimeFunctionPtr(
5608       omp::OMPRTL___tgt_target_data_begin_mapper);
5609 
5610   SmallVector<uint64_t> Flags = {0, 2};
5611 
5612   uint32_t StrSize;
5613   Constant *SrcLocCst =
5614       OMPBuilder.getOrCreateSrcLocStr("", "file1", 2, 5, StrSize);
5615   Value *SrcLocInfo = OMPBuilder.getOrCreateIdent(SrcLocCst, StrSize);
5616 
5617   Constant *Cst1 =
5618       OMPBuilder.getOrCreateSrcLocStr("array1", "file1", 2, 5, StrSize);
5619   Constant *Cst2 =
5620       OMPBuilder.getOrCreateSrcLocStr("array2", "file1", 3, 5, StrSize);
5621   SmallVector<llvm::Constant *> Names = {Cst1, Cst2};
5622 
5623   GlobalVariable *Maptypes =
5624       OMPBuilder.createOffloadMaptypes(Flags, ".offload_maptypes");
5625   Value *MaptypesArg = Builder.CreateConstInBoundsGEP2_32(
5626       ArrayType::get(Type::getInt64Ty(Ctx), TotalNbOperand), Maptypes,
5627       /*Idx0=*/0, /*Idx1=*/0);
5628 
5629   GlobalVariable *Mapnames =
5630       OMPBuilder.createOffloadMapnames(Names, ".offload_mapnames");
5631   Value *MapnamesArg = Builder.CreateConstInBoundsGEP2_32(
5632       ArrayType::get(PointerType::getUnqual(Ctx), TotalNbOperand), Mapnames,
5633       /*Idx0=*/0, /*Idx1=*/0);
5634 
5635   OMPBuilder.emitMapperCall(Builder.saveIP(), BeginMapperFunc, SrcLocInfo,
5636                             MaptypesArg, MapnamesArg, MapperAllocas, -1,
5637                             TotalNbOperand);
5638 
5639   CallInst *MapperCall = dyn_cast<CallInst>(&BB->back());
5640   EXPECT_NE(MapperCall, nullptr);
5641   EXPECT_EQ(MapperCall->arg_size(), 9U);
5642   EXPECT_EQ(MapperCall->getCalledFunction()->getName(),
5643             "__tgt_target_data_begin_mapper");
5644   EXPECT_EQ(MapperCall->getOperand(0), SrcLocInfo);
5645   EXPECT_TRUE(MapperCall->getOperand(1)->getType()->isIntegerTy(64));
5646   EXPECT_TRUE(MapperCall->getOperand(2)->getType()->isIntegerTy(32));
5647 
5648   EXPECT_EQ(MapperCall->getOperand(6), MaptypesArg);
5649   EXPECT_EQ(MapperCall->getOperand(7), MapnamesArg);
5650   EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy());
5651 }
5652 
5653 TEST_F(OpenMPIRBuilderTest, TargetEnterData) {
5654   OpenMPIRBuilder OMPBuilder(*M);
5655   OMPBuilder.initialize();
5656   F->setName("func");
5657   IRBuilder<> Builder(BB);
5658   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5659 
5660   int64_t DeviceID = 2;
5661 
5662   AllocaInst *Val1 =
5663       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5664   ASSERT_NE(Val1, nullptr);
5665 
5666   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5667                                     F->getEntryBlock().getFirstInsertionPt());
5668 
5669   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5670   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5671   auto GenMapInfoCB =
5672       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5673     // Get map clause information.
5674     Builder.restoreIP(codeGenIP);
5675 
5676     CombinedInfo.BasePointers.emplace_back(Val1);
5677     CombinedInfo.Pointers.emplace_back(Val1);
5678     CombinedInfo.DevicePointers.emplace_back(
5679         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5680     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5681     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(1));
5682     uint32_t temp;
5683     CombinedInfo.Names.emplace_back(
5684         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5685     return CombinedInfo;
5686   };
5687 
5688   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5689       /*RequiresDevicePointerInfo=*/false,
5690       /*SeparateBeginEndCalls=*/true);
5691 
5692   OMPBuilder.Config.setIsGPU(true);
5693 
5694   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper;
5695   Builder.restoreIP(OMPBuilder.createTargetData(
5696       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5697       /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5698 
5699   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5700   EXPECT_NE(TargetDataCall, nullptr);
5701   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5702   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5703             "__tgt_target_data_begin_mapper");
5704   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5705   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5706   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5707 
5708   Builder.CreateRetVoid();
5709   EXPECT_FALSE(verifyModule(*M, &errs()));
5710 }
5711 
5712 TEST_F(OpenMPIRBuilderTest, TargetExitData) {
5713   OpenMPIRBuilder OMPBuilder(*M);
5714   OMPBuilder.initialize();
5715   F->setName("func");
5716   IRBuilder<> Builder(BB);
5717   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5718 
5719   int64_t DeviceID = 2;
5720 
5721   AllocaInst *Val1 =
5722       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5723   ASSERT_NE(Val1, nullptr);
5724 
5725   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5726                                     F->getEntryBlock().getFirstInsertionPt());
5727 
5728   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5729   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5730   auto GenMapInfoCB =
5731       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5732     // Get map clause information.
5733     Builder.restoreIP(codeGenIP);
5734 
5735     CombinedInfo.BasePointers.emplace_back(Val1);
5736     CombinedInfo.Pointers.emplace_back(Val1);
5737     CombinedInfo.DevicePointers.emplace_back(
5738         llvm::OpenMPIRBuilder::DeviceInfoTy::None);
5739     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5740     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(2));
5741     uint32_t temp;
5742     CombinedInfo.Names.emplace_back(
5743         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5744     return CombinedInfo;
5745   };
5746 
5747   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5748       /*RequiresDevicePointerInfo=*/false,
5749       /*SeparateBeginEndCalls=*/true);
5750 
5751   OMPBuilder.Config.setIsGPU(true);
5752 
5753   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper;
5754   Builder.restoreIP(OMPBuilder.createTargetData(
5755       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5756       /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
5757 
5758   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5759   EXPECT_NE(TargetDataCall, nullptr);
5760   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5761   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5762             "__tgt_target_data_end_mapper");
5763   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5764   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5765   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5766 
5767   Builder.CreateRetVoid();
5768   EXPECT_FALSE(verifyModule(*M, &errs()));
5769 }
5770 
5771 TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
5772   OpenMPIRBuilder OMPBuilder(*M);
5773   OMPBuilder.initialize();
5774   F->setName("func");
5775   IRBuilder<> Builder(BB);
5776   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
5777 
5778   int64_t DeviceID = 2;
5779 
5780   AllocaInst *Val1 =
5781       Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1));
5782   ASSERT_NE(Val1, nullptr);
5783 
5784   AllocaInst *Val2 = Builder.CreateAlloca(Builder.getPtrTy());
5785   ASSERT_NE(Val2, nullptr);
5786 
5787   AllocaInst *Val3 = Builder.CreateAlloca(Builder.getPtrTy());
5788   ASSERT_NE(Val3, nullptr);
5789 
5790   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
5791                                     F->getEntryBlock().getFirstInsertionPt());
5792 
5793   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
5794   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfo;
5795   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5796   auto GenMapInfoCB =
5797       [&](InsertPointTy codeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
5798     // Get map clause information.
5799     Builder.restoreIP(codeGenIP);
5800     uint32_t temp;
5801 
5802     CombinedInfo.BasePointers.emplace_back(Val1);
5803     CombinedInfo.Pointers.emplace_back(Val1);
5804     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::None);
5805     CombinedInfo.Sizes.emplace_back(Builder.getInt64(4));
5806     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(3));
5807     CombinedInfo.Names.emplace_back(
5808         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5809 
5810     CombinedInfo.BasePointers.emplace_back(Val2);
5811     CombinedInfo.Pointers.emplace_back(Val2);
5812     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
5813     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
5814     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
5815     CombinedInfo.Names.emplace_back(
5816         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5817 
5818     CombinedInfo.BasePointers.emplace_back(Val3);
5819     CombinedInfo.Pointers.emplace_back(Val3);
5820     CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Address);
5821     CombinedInfo.Sizes.emplace_back(Builder.getInt64(8));
5822     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(67));
5823     CombinedInfo.Names.emplace_back(
5824         OMPBuilder.getOrCreateSrcLocStr("unknown", temp));
5825     return CombinedInfo;
5826   };
5827 
5828   llvm::OpenMPIRBuilder::TargetDataInfo Info(
5829       /*RequiresDevicePointerInfo=*/true,
5830       /*SeparateBeginEndCalls=*/true);
5831 
5832   OMPBuilder.Config.setIsGPU(true);
5833 
5834   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
5835   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
5836     if (BodyGenType == BodyGenTy::Priv) {
5837       EXPECT_EQ(Info.DevicePtrInfoMap.size(), 2u);
5838       Builder.restoreIP(CodeGenIP);
5839       CallInst *TargetDataCall =
5840           dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
5841       EXPECT_NE(TargetDataCall, nullptr);
5842       EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5843       EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5844                 "__tgt_target_data_begin_mapper");
5845       EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5846       EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5847       EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5848 
5849       LoadInst *LI = dyn_cast<LoadInst>(BB->back().getPrevNode());
5850       EXPECT_NE(LI, nullptr);
5851       StoreInst *SI = dyn_cast<StoreInst>(&BB->back());
5852       EXPECT_NE(SI, nullptr);
5853       EXPECT_EQ(SI->getValueOperand(), LI);
5854       EXPECT_EQ(SI->getPointerOperand(), Info.DevicePtrInfoMap[Val2].second);
5855       EXPECT_TRUE(isa<AllocaInst>(Info.DevicePtrInfoMap[Val2].second));
5856       EXPECT_TRUE(isa<GetElementPtrInst>(Info.DevicePtrInfoMap[Val3].second));
5857       Builder.CreateStore(Builder.getInt32(99), Val1);
5858     }
5859     return Builder.saveIP();
5860   };
5861 
5862   Builder.restoreIP(OMPBuilder.createTargetData(
5863       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5864       /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB));
5865 
5866   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
5867   EXPECT_NE(TargetDataCall, nullptr);
5868   EXPECT_EQ(TargetDataCall->arg_size(), 9U);
5869   EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(),
5870             "__tgt_target_data_end_mapper");
5871   EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64));
5872   EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32));
5873   EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy());
5874 
5875   // Check that BodyGenCB is still made when IsTargetDevice is set to true.
5876   OMPBuilder.Config.setIsTargetDevice(true);
5877   bool CheckDevicePassBodyGen = false;
5878   auto BodyTargetCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
5879     CheckDevicePassBodyGen = true;
5880     Builder.restoreIP(CodeGenIP);
5881     CallInst *TargetDataCall =
5882         dyn_cast<CallInst>(BB->back().getPrevNode()->getPrevNode());
5883     // Make sure no begin_mapper call is present for device pass.
5884     EXPECT_EQ(TargetDataCall, nullptr);
5885     return Builder.saveIP();
5886   };
5887   Builder.restoreIP(OMPBuilder.createTargetData(
5888       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
5889       /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB));
5890   EXPECT_TRUE(CheckDevicePassBodyGen);
5891 
5892   Builder.CreateRetVoid();
5893   EXPECT_FALSE(verifyModule(*M, &errs()));
5894 }
5895 
5896 namespace {
5897 // Some basic handling of argument mapping for the moment
5898 void CreateDefaultMapInfos(llvm::OpenMPIRBuilder &OmpBuilder,
5899                            llvm::SmallVectorImpl<llvm::Value *> &Args,
5900                            llvm::OpenMPIRBuilder::MapInfosTy &CombinedInfo) {
5901   for (auto Arg : Args) {
5902     CombinedInfo.BasePointers.emplace_back(Arg);
5903     CombinedInfo.Pointers.emplace_back(Arg);
5904     uint32_t SrcLocStrSize;
5905     CombinedInfo.Names.emplace_back(OmpBuilder.getOrCreateSrcLocStr(
5906         "Unknown loc - stub implementation", SrcLocStrSize));
5907     CombinedInfo.Types.emplace_back(llvm::omp::OpenMPOffloadMappingFlags(
5908         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
5909         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM |
5910         llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM));
5911     CombinedInfo.Sizes.emplace_back(OmpBuilder.Builder.getInt64(
5912         OmpBuilder.M.getDataLayout().getTypeAllocSize(Arg->getType())));
5913   }
5914 }
5915 } // namespace
5916 
5917 TEST_F(OpenMPIRBuilderTest, TargetRegion) {
5918   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
5919   OpenMPIRBuilder OMPBuilder(*M);
5920   OMPBuilder.initialize();
5921   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
5922   OMPBuilder.setConfig(Config);
5923   F->setName("func");
5924   IRBuilder<> Builder(BB);
5925   auto Int32Ty = Builder.getInt32Ty();
5926 
5927   AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr");
5928   AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr");
5929   AllocaInst *CPtr = Builder.CreateAlloca(Int32Ty, nullptr, "c_ptr");
5930 
5931   Builder.CreateStore(Builder.getInt32(10), APtr);
5932   Builder.CreateStore(Builder.getInt32(20), BPtr);
5933   auto BodyGenCB = [&](InsertPointTy AllocaIP,
5934                        InsertPointTy CodeGenIP) -> InsertPointTy {
5935     Builder.restoreIP(CodeGenIP);
5936     LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr);
5937     LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr);
5938     Value *Sum = Builder.CreateAdd(AVal, BVal);
5939     Builder.CreateStore(Sum, CPtr);
5940     return Builder.saveIP();
5941   };
5942 
5943   llvm::SmallVector<llvm::Value *> Inputs;
5944   Inputs.push_back(APtr);
5945   Inputs.push_back(BPtr);
5946   Inputs.push_back(CPtr);
5947 
5948   auto SimpleArgAccessorCB =
5949       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
5950           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5951           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
5952         if (!OMPBuilder.Config.isTargetDevice()) {
5953           RetVal = cast<llvm::Value>(&Arg);
5954           return CodeGenIP;
5955         }
5956 
5957         Builder.restoreIP(AllocaIP);
5958 
5959         llvm::Value *Addr = Builder.CreateAlloca(
5960             Arg.getType()->isPointerTy()
5961                 ? Arg.getType()
5962                 : Type::getInt64Ty(Builder.getContext()),
5963             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
5964         llvm::Value *AddrAscast =
5965             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
5966         Builder.CreateStore(&Arg, AddrAscast);
5967 
5968         Builder.restoreIP(CodeGenIP);
5969 
5970         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
5971 
5972         return Builder.saveIP();
5973       };
5974 
5975   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
5976   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
5977       -> llvm::OpenMPIRBuilder::MapInfosTy & {
5978     CreateDefaultMapInfos(OMPBuilder, Inputs, CombinedInfos);
5979     return CombinedInfos;
5980   };
5981 
5982   TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
5983   OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
5984   Builder.restoreIP(OMPBuilder.createTarget(
5985       OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(),
5986       EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
5987   OMPBuilder.finalize();
5988   Builder.CreateRetVoid();
5989 
5990   // Check the kernel launch sequence
5991   auto Iter = F->getEntryBlock().rbegin();
5992   EXPECT_TRUE(isa<BranchInst>(&*(Iter)));
5993   BranchInst *Branch = dyn_cast<BranchInst>(&*(Iter));
5994   EXPECT_TRUE(isa<CmpInst>(&*(++Iter)));
5995   EXPECT_TRUE(isa<CallInst>(&*(++Iter)));
5996   CallInst *Call = dyn_cast<CallInst>(&*(Iter));
5997 
5998   // Check that the kernel launch function is called
5999   Function *KernelLaunchFunc = Call->getCalledFunction();
6000   EXPECT_NE(KernelLaunchFunc, nullptr);
6001   StringRef FunctionName = KernelLaunchFunc->getName();
6002   EXPECT_TRUE(FunctionName.starts_with("__tgt_target_kernel"));
6003 
6004   // Check the fallback call
6005   BasicBlock *FallbackBlock = Branch->getSuccessor(0);
6006   Iter = FallbackBlock->rbegin();
6007   CallInst *FCall = dyn_cast<CallInst>(&*(++Iter));
6008   // 'F' has a dummy DISubprogram which causes OutlinedFunc to also
6009   // have a DISubprogram. In this case, the call to OutlinedFunc needs
6010   // to have a debug loc, otherwise verifier will complain.
6011   FCall->setDebugLoc(DL);
6012   EXPECT_NE(FCall, nullptr);
6013 
6014   // Check that the correct aguments are passed in
6015   for (auto ArgInput : zip(FCall->args(), Inputs)) {
6016     EXPECT_EQ(std::get<0>(ArgInput), std::get<1>(ArgInput));
6017   }
6018 
6019   // Check that the outlined function exists with the expected prefix
6020   Function *OutlinedFunc = FCall->getCalledFunction();
6021   EXPECT_NE(OutlinedFunc, nullptr);
6022   StringRef FunctionName2 = OutlinedFunc->getName();
6023   EXPECT_TRUE(FunctionName2.starts_with("__omp_offloading"));
6024 
6025   EXPECT_FALSE(verifyModule(*M, &errs()));
6026 }
6027 
6028 TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
6029   OpenMPIRBuilder OMPBuilder(*M);
6030   OMPBuilder.setConfig(
6031       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6032   OMPBuilder.initialize();
6033 
6034   F->setName("func");
6035   IRBuilder<> Builder(BB);
6036   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6037 
6038   LoadInst *Value = nullptr;
6039   StoreInst *TargetStore = nullptr;
6040   llvm::SmallVector<llvm::Value *, 2> CapturedArgs = {
6041       Constant::getNullValue(PointerType::get(Ctx, 0)),
6042       Constant::getNullValue(PointerType::get(Ctx, 0))};
6043 
6044   auto SimpleArgAccessorCB =
6045       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6046           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6047           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6048         if (!OMPBuilder.Config.isTargetDevice()) {
6049           RetVal = cast<llvm::Value>(&Arg);
6050           return CodeGenIP;
6051         }
6052 
6053         Builder.restoreIP(AllocaIP);
6054 
6055         llvm::Value *Addr = Builder.CreateAlloca(
6056             Arg.getType()->isPointerTy()
6057                 ? Arg.getType()
6058                 : Type::getInt64Ty(Builder.getContext()),
6059             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6060         llvm::Value *AddrAscast =
6061             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6062         Builder.CreateStore(&Arg, AddrAscast);
6063 
6064         Builder.restoreIP(CodeGenIP);
6065 
6066         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6067 
6068         return Builder.saveIP();
6069       };
6070 
6071   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6072   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6073       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6074     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6075     return CombinedInfos;
6076   };
6077 
6078   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6079                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6080       -> OpenMPIRBuilder::InsertPointTy {
6081     Builder.restoreIP(CodeGenIP);
6082     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6083     TargetStore = Builder.CreateStore(Value, CapturedArgs[1]);
6084     return Builder.saveIP();
6085   };
6086 
6087   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6088                                    F->getEntryBlock().getFirstInsertionPt());
6089   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6090                                   /*Line=*/3, /*Count=*/0);
6091 
6092   Builder.restoreIP(
6093       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6094                               EntryInfo, /*NumTeams=*/-1,
6095                               /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
6096                               BodyGenCB, SimpleArgAccessorCB));
6097 
6098   Builder.CreateRetVoid();
6099   OMPBuilder.finalize();
6100 
6101   // Check outlined function
6102   EXPECT_FALSE(verifyModule(*M, &errs()));
6103   EXPECT_NE(TargetStore, nullptr);
6104   Function *OutlinedFn = TargetStore->getFunction();
6105   EXPECT_NE(F, OutlinedFn);
6106 
6107   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6108   // Account for the "implicit" first argument.
6109   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6110   EXPECT_EQ(OutlinedFn->arg_size(), 3U);
6111   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6112   EXPECT_TRUE(OutlinedFn->getArg(2)->getType()->isPointerTy());
6113 
6114   // Check entry block
6115   auto &EntryBlock = OutlinedFn->getEntryBlock();
6116   Instruction *Alloca1 = EntryBlock.getFirstNonPHI();
6117   EXPECT_NE(Alloca1, nullptr);
6118 
6119   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6120   auto *Store1 = Alloca1->getNextNode();
6121   EXPECT_TRUE(isa<StoreInst>(Store1));
6122   auto *Alloca2 = Store1->getNextNode();
6123   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6124   auto *Store2 = Alloca2->getNextNode();
6125   EXPECT_TRUE(isa<StoreInst>(Store2));
6126 
6127   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6128   EXPECT_NE(InitCall, nullptr);
6129   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6130   EXPECT_EQ(InitCall->arg_size(), 2U);
6131   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6132   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6133   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6134   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6135   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6136   auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6137   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6138             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6139   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6140             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6141   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6142             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6143 
6144   auto *EntryBlockBranch = EntryBlock.getTerminator();
6145   EXPECT_NE(EntryBlockBranch, nullptr);
6146   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6147 
6148   // Check user code block
6149   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6150   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6151   auto *Load1 = UserCodeBlock->getFirstNonPHI();
6152   EXPECT_TRUE(isa<LoadInst>(Load1));
6153   auto *Load2 = Load1->getNextNode();
6154   EXPECT_TRUE(isa<LoadInst>(Load2));
6155 
6156   auto *Value1 = Load2->getNextNode();
6157   EXPECT_EQ(Value1, Value);
6158   EXPECT_EQ(Value1->getNextNode(), TargetStore);
6159   auto *Deinit = TargetStore->getNextNode();
6160   EXPECT_NE(Deinit, nullptr);
6161 
6162   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6163   EXPECT_NE(DeinitCall, nullptr);
6164   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6165   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6166 
6167   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6168 
6169   // Check exit block
6170   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6171   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6172   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI()));
6173 }
6174 
6175 TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
6176   OpenMPIRBuilder OMPBuilder(*M);
6177   OMPBuilder.setConfig(
6178       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6179   OMPBuilder.initialize();
6180 
6181   F->setName("func");
6182   IRBuilder<> Builder(BB);
6183   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
6184 
6185   LoadInst *Value = nullptr;
6186   StoreInst *TargetStore = nullptr;
6187   llvm::SmallVector<llvm::Value *, 1> CapturedArgs = {
6188       Constant::getNullValue(PointerType::get(Ctx, 0))};
6189 
6190   auto SimpleArgAccessorCB =
6191       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
6192           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6193           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
6194         if (!OMPBuilder.Config.isTargetDevice()) {
6195           RetVal = cast<llvm::Value>(&Arg);
6196           return CodeGenIP;
6197         }
6198 
6199         Builder.restoreIP(AllocaIP);
6200 
6201         llvm::Value *Addr = Builder.CreateAlloca(
6202             Arg.getType()->isPointerTy()
6203                 ? Arg.getType()
6204                 : Type::getInt64Ty(Builder.getContext()),
6205             OMPBuilder.M.getDataLayout().getAllocaAddrSpace());
6206         llvm::Value *AddrAscast =
6207             Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Input->getType());
6208         Builder.CreateStore(&Arg, AddrAscast);
6209 
6210         Builder.restoreIP(CodeGenIP);
6211 
6212         RetVal = Builder.CreateLoad(Arg.getType(), AddrAscast);
6213 
6214         return Builder.saveIP();
6215       };
6216 
6217   llvm::OpenMPIRBuilder::MapInfosTy CombinedInfos;
6218   auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy codeGenIP)
6219       -> llvm::OpenMPIRBuilder::MapInfosTy & {
6220     CreateDefaultMapInfos(OMPBuilder, CapturedArgs, CombinedInfos);
6221     return CombinedInfos;
6222   };
6223 
6224   llvm::Value *RaiseAlloca = nullptr;
6225 
6226   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
6227                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
6228       -> OpenMPIRBuilder::InsertPointTy {
6229     Builder.restoreIP(CodeGenIP);
6230     RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty());
6231     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
6232     TargetStore = Builder.CreateStore(Value, RaiseAlloca);
6233     return Builder.saveIP();
6234   };
6235 
6236   IRBuilder<>::InsertPoint EntryIP(&F->getEntryBlock(),
6237                                    F->getEntryBlock().getFirstInsertionPt());
6238   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
6239                                   /*Line=*/3, /*Count=*/0);
6240 
6241   Builder.restoreIP(
6242       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
6243                               EntryInfo, /*NumTeams=*/-1,
6244                               /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
6245                               BodyGenCB, SimpleArgAccessorCB));
6246 
6247   Builder.CreateRetVoid();
6248   OMPBuilder.finalize();
6249 
6250   // Check outlined function
6251   EXPECT_FALSE(verifyModule(*M, &errs()));
6252   EXPECT_NE(TargetStore, nullptr);
6253   Function *OutlinedFn = TargetStore->getFunction();
6254   EXPECT_NE(F, OutlinedFn);
6255 
6256   EXPECT_TRUE(OutlinedFn->hasWeakODRLinkage());
6257   // Account for the "implicit" first argument.
6258   EXPECT_EQ(OutlinedFn->getName(), "__omp_offloading_1_2_parent_l3");
6259   EXPECT_EQ(OutlinedFn->arg_size(), 2U);
6260   EXPECT_TRUE(OutlinedFn->getArg(1)->getType()->isPointerTy());
6261 
6262   // Check entry block, to see if we have raised our alloca
6263   // from the body to the entry block.
6264   auto &EntryBlock = OutlinedFn->getEntryBlock();
6265 
6266   // Check that we have moved our alloca created in the
6267   // BodyGenCB function, to the top of the function.
6268   Instruction *Alloca1 = EntryBlock.getFirstNonPHI();
6269   EXPECT_NE(Alloca1, nullptr);
6270   EXPECT_TRUE(isa<AllocaInst>(Alloca1));
6271   EXPECT_EQ(Alloca1, RaiseAlloca);
6272 
6273   // Verify we have not altered the rest of the function
6274   // inappropriately with our alloca movement.
6275   auto *Alloca2 = Alloca1->getNextNode();
6276   EXPECT_TRUE(isa<AllocaInst>(Alloca2));
6277   auto *Store2 = Alloca2->getNextNode();
6278   EXPECT_TRUE(isa<StoreInst>(Store2));
6279 
6280   auto *InitCall = dyn_cast<CallInst>(Store2->getNextNode());
6281   EXPECT_NE(InitCall, nullptr);
6282   EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
6283   EXPECT_EQ(InitCall->arg_size(), 2U);
6284   EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
6285   auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
6286   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
6287   auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
6288   EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
6289   auto *ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
6290   EXPECT_EQ(ConfigC->getAggregateElement(0U),
6291             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6292   EXPECT_EQ(ConfigC->getAggregateElement(1U),
6293             ConstantInt::get(Type::getInt8Ty(Ctx), true));
6294   EXPECT_EQ(ConfigC->getAggregateElement(2U),
6295             ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
6296 
6297   auto *EntryBlockBranch = EntryBlock.getTerminator();
6298   EXPECT_NE(EntryBlockBranch, nullptr);
6299   EXPECT_EQ(EntryBlockBranch->getNumSuccessors(), 2U);
6300 
6301   // Check user code block
6302   auto *UserCodeBlock = EntryBlockBranch->getSuccessor(0);
6303   EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
6304   auto *Load1 = UserCodeBlock->getFirstNonPHI();
6305   EXPECT_TRUE(isa<LoadInst>(Load1));
6306   auto *Load2 = Load1->getNextNode();
6307   EXPECT_TRUE(isa<LoadInst>(Load2));
6308   EXPECT_EQ(Load2, Value);
6309   EXPECT_EQ(Load2->getNextNode(), TargetStore);
6310   auto *Deinit = TargetStore->getNextNode();
6311   EXPECT_NE(Deinit, nullptr);
6312 
6313   auto *DeinitCall = dyn_cast<CallInst>(Deinit);
6314   EXPECT_NE(DeinitCall, nullptr);
6315   EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
6316   EXPECT_EQ(DeinitCall->arg_size(), 0U);
6317 
6318   EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
6319 
6320   // Check exit block
6321   auto *ExitBlock = EntryBlockBranch->getSuccessor(1);
6322   EXPECT_EQ(ExitBlock->getName(), "worker.exit");
6323   EXPECT_TRUE(isa<ReturnInst>(ExitBlock->getFirstNonPHI()));
6324 }
6325 
6326 TEST_F(OpenMPIRBuilderTest, CreateTask) {
6327   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6328   OpenMPIRBuilder OMPBuilder(*M);
6329   OMPBuilder.Config.IsTargetDevice = false;
6330   OMPBuilder.initialize();
6331   F->setName("func");
6332   IRBuilder<> Builder(BB);
6333 
6334   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
6335   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
6336   Value *Val128 =
6337       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
6338 
6339   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6340     Builder.restoreIP(AllocaIP);
6341     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
6342                                                 "bodygen.alloca128");
6343 
6344     Builder.restoreIP(CodeGenIP);
6345     // Loading and storing captured pointer and values
6346     Builder.CreateStore(Val128, Local128);
6347     Value *Val32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
6348                                       "bodygen.load32");
6349 
6350     LoadInst *PrivLoad128 = Builder.CreateLoad(
6351         Local128->getAllocatedType(), Local128, "bodygen.local.load128");
6352     Value *Cmp = Builder.CreateICmpNE(
6353         Val32, Builder.CreateTrunc(PrivLoad128, Val32->getType()));
6354     Instruction *ThenTerm, *ElseTerm;
6355     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
6356                                   &ThenTerm, &ElseTerm);
6357   };
6358 
6359   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6360   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6361   OpenMPIRBuilder::LocationDescription Loc(
6362       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6363   Builder.restoreIP(OMPBuilder.createTask(
6364       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6365       BodyGenCB));
6366   OMPBuilder.finalize();
6367   Builder.CreateRetVoid();
6368 
6369   EXPECT_FALSE(verifyModule(*M, &errs()));
6370 
6371   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6372       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6373           ->user_back());
6374 
6375   // Verify the Ident argument
6376   GlobalVariable *Ident = cast<GlobalVariable>(TaskAllocCall->getArgOperand(0));
6377   ASSERT_NE(Ident, nullptr);
6378   EXPECT_TRUE(Ident->hasInitializer());
6379   Constant *Initializer = Ident->getInitializer();
6380   GlobalVariable *SrcStrGlob =
6381       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
6382   ASSERT_NE(SrcStrGlob, nullptr);
6383   ConstantDataArray *SrcSrc =
6384       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
6385   ASSERT_NE(SrcSrc, nullptr);
6386 
6387   // Verify the num_threads argument.
6388   CallInst *GTID = dyn_cast<CallInst>(TaskAllocCall->getArgOperand(1));
6389   ASSERT_NE(GTID, nullptr);
6390   EXPECT_EQ(GTID->arg_size(), 1U);
6391   EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
6392 
6393   // Verify the flags
6394   // TODO: Check for others flags. Currently testing only for tiedness.
6395   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
6396   ASSERT_NE(Flags, nullptr);
6397   EXPECT_EQ(Flags->getSExtValue(), 1);
6398 
6399   // Verify the data size
6400   ConstantInt *DataSize =
6401       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
6402   ASSERT_NE(DataSize, nullptr);
6403   EXPECT_EQ(DataSize->getSExtValue(), 40);
6404 
6405   ConstantInt *SharedsSize =
6406       dyn_cast<ConstantInt>(TaskAllocCall->getOperand(4));
6407   EXPECT_EQ(SharedsSize->getSExtValue(),
6408             24); // 64-bit pointer + 128-bit integer
6409 
6410   // Verify Wrapper function
6411   Function *OutlinedFn =
6412       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
6413   ASSERT_NE(OutlinedFn, nullptr);
6414 
6415   LoadInst *SharedsLoad = dyn_cast<LoadInst>(OutlinedFn->begin()->begin());
6416   ASSERT_NE(SharedsLoad, nullptr);
6417   EXPECT_EQ(SharedsLoad->getPointerOperand(), OutlinedFn->getArg(1));
6418 
6419   EXPECT_FALSE(OutlinedFn->isDeclaration());
6420   EXPECT_EQ(OutlinedFn->getArg(0)->getType(), Builder.getInt32Ty());
6421 
6422   // Verify that the data argument is used only once, and that too in the load
6423   // instruction that is then used for accessing shared data.
6424   Value *DataPtr = OutlinedFn->getArg(1);
6425   EXPECT_EQ(DataPtr->getNumUses(), 1U);
6426   EXPECT_TRUE(isa<LoadInst>(DataPtr->uses().begin()->getUser()));
6427   Value *Data = DataPtr->uses().begin()->getUser();
6428   EXPECT_TRUE(all_of(Data->uses(), [](Use &U) {
6429     return isa<GetElementPtrInst>(U.getUser());
6430   }));
6431 
6432   // Verify the presence of `trunc` and `icmp` instructions in Outlined function
6433   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6434                      [](Instruction &inst) { return isa<TruncInst>(&inst); }));
6435   EXPECT_TRUE(any_of(instructions(OutlinedFn),
6436                      [](Instruction &inst) { return isa<ICmpInst>(&inst); }));
6437 
6438   // Verify the execution of the task
6439   CallInst *TaskCall = dyn_cast<CallInst>(
6440       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
6441           ->user_back());
6442   ASSERT_NE(TaskCall, nullptr);
6443   EXPECT_EQ(TaskCall->getArgOperand(0), Ident);
6444   EXPECT_EQ(TaskCall->getArgOperand(1), GTID);
6445   EXPECT_EQ(TaskCall->getArgOperand(2), TaskAllocCall);
6446 
6447   // Verify that the argument data has been copied
6448   for (User *in : TaskAllocCall->users()) {
6449     if (MemCpyInst *memCpyInst = dyn_cast<MemCpyInst>(in)) {
6450       EXPECT_EQ(memCpyInst->getDest(), TaskAllocCall);
6451     }
6452   }
6453 }
6454 
6455 TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
6456   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6457   OpenMPIRBuilder OMPBuilder(*M);
6458   OMPBuilder.Config.IsTargetDevice = false;
6459   OMPBuilder.initialize();
6460   F->setName("func");
6461   IRBuilder<> Builder(BB);
6462 
6463   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6464 
6465   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6466   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6467   OpenMPIRBuilder::LocationDescription Loc(
6468       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6469   Builder.restoreIP(OMPBuilder.createTask(
6470       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6471       BodyGenCB));
6472   OMPBuilder.finalize();
6473   Builder.CreateRetVoid();
6474 
6475   EXPECT_FALSE(verifyModule(*M, &errs()));
6476 
6477   // Check that the outlined function has only one argument.
6478   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6479       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6480           ->user_back());
6481   Function *OutlinedFn = dyn_cast<Function>(TaskAllocCall->getArgOperand(5));
6482   ASSERT_NE(OutlinedFn, nullptr);
6483   ASSERT_EQ(OutlinedFn->arg_size(), 1U);
6484 }
6485 
6486 TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
6487   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6488   OpenMPIRBuilder OMPBuilder(*M);
6489   OMPBuilder.Config.IsTargetDevice = false;
6490   OMPBuilder.initialize();
6491   F->setName("func");
6492   IRBuilder<> Builder(BB);
6493   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6494   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6495   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6496   OpenMPIRBuilder::LocationDescription Loc(
6497       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6498   Builder.restoreIP(OMPBuilder.createTask(
6499       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB,
6500       /*Tied=*/false));
6501   OMPBuilder.finalize();
6502   Builder.CreateRetVoid();
6503 
6504   // Check for the `Tied` argument
6505   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6506       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6507           ->user_back());
6508   ASSERT_NE(TaskAllocCall, nullptr);
6509   ConstantInt *Flags = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(2));
6510   ASSERT_NE(Flags, nullptr);
6511   EXPECT_EQ(Flags->getZExtValue() & 1U, 0U);
6512 
6513   EXPECT_FALSE(verifyModule(*M, &errs()));
6514 }
6515 
6516 TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
6517   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6518   OpenMPIRBuilder OMPBuilder(*M);
6519   OMPBuilder.Config.IsTargetDevice = false;
6520   OMPBuilder.initialize();
6521   F->setName("func");
6522   IRBuilder<> Builder(BB);
6523   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6524   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6525   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6526   OpenMPIRBuilder::LocationDescription Loc(
6527       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6528   AllocaInst *InDep = Builder.CreateAlloca(Type::getInt32Ty(M->getContext()));
6529   SmallVector<OpenMPIRBuilder::DependData> DDS;
6530   {
6531     OpenMPIRBuilder::DependData DDIn(RTLDependenceKindTy::DepIn,
6532                                      Type::getInt32Ty(M->getContext()), InDep);
6533     DDS.push_back(DDIn);
6534   }
6535   Builder.restoreIP(OMPBuilder.createTask(
6536       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB,
6537       /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS));
6538   OMPBuilder.finalize();
6539   Builder.CreateRetVoid();
6540 
6541   // Check for the `NumDeps` argument
6542   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6543       OMPBuilder
6544           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps)
6545           ->user_back());
6546   ASSERT_NE(TaskAllocCall, nullptr);
6547   ConstantInt *NumDeps = dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(3));
6548   ASSERT_NE(NumDeps, nullptr);
6549   EXPECT_EQ(NumDeps->getZExtValue(), 1U);
6550 
6551   // Check for the `DepInfo` array argument
6552   AllocaInst *DepArray = dyn_cast<AllocaInst>(TaskAllocCall->getOperand(4));
6553   ASSERT_NE(DepArray, nullptr);
6554   Value::user_iterator DepArrayI = DepArray->user_begin();
6555   ++DepArrayI;
6556   Value::user_iterator DepInfoI = DepArrayI->user_begin();
6557   // Check for the `DependKind` flag in the `DepInfo` array
6558   Value *Flag = findStoredValue<GetElementPtrInst>(*DepInfoI);
6559   ASSERT_NE(Flag, nullptr);
6560   ConstantInt *FlagInt = dyn_cast<ConstantInt>(Flag);
6561   ASSERT_NE(FlagInt, nullptr);
6562   EXPECT_EQ(FlagInt->getZExtValue(),
6563             static_cast<unsigned int>(RTLDependenceKindTy::DepIn));
6564   ++DepInfoI;
6565   // Check for the size in the `DepInfo` array
6566   Value *Size = findStoredValue<GetElementPtrInst>(*DepInfoI);
6567   ASSERT_NE(Size, nullptr);
6568   ConstantInt *SizeInt = dyn_cast<ConstantInt>(Size);
6569   ASSERT_NE(SizeInt, nullptr);
6570   EXPECT_EQ(SizeInt->getZExtValue(), 4U);
6571   ++DepInfoI;
6572   // Check for the variable address in the `DepInfo` array
6573   Value *AddrStored = findStoredValue<GetElementPtrInst>(*DepInfoI);
6574   ASSERT_NE(AddrStored, nullptr);
6575   PtrToIntInst *AddrInt = dyn_cast<PtrToIntInst>(AddrStored);
6576   ASSERT_NE(AddrInt, nullptr);
6577   Value *Addr = AddrInt->getPointerOperand();
6578   EXPECT_EQ(Addr, InDep);
6579 
6580   ConstantInt *NumDepsNoAlias =
6581       dyn_cast<ConstantInt>(TaskAllocCall->getArgOperand(5));
6582   ASSERT_NE(NumDepsNoAlias, nullptr);
6583   EXPECT_EQ(NumDepsNoAlias->getZExtValue(), 0U);
6584   EXPECT_EQ(TaskAllocCall->getOperand(6),
6585             ConstantPointerNull::get(PointerType::getUnqual(M->getContext())));
6586 
6587   EXPECT_FALSE(verifyModule(*M, &errs()));
6588 }
6589 
6590 TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
6591   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6592   OpenMPIRBuilder OMPBuilder(*M);
6593   OMPBuilder.Config.IsTargetDevice = false;
6594   OMPBuilder.initialize();
6595   F->setName("func");
6596   IRBuilder<> Builder(BB);
6597   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6598   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6599   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
6600   Builder.SetInsertPoint(BodyBB);
6601   Value *Final = Builder.CreateICmp(
6602       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
6603       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
6604   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
6605   Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
6606                                           /*Tied=*/false, Final));
6607   OMPBuilder.finalize();
6608   Builder.CreateRetVoid();
6609 
6610   // Check for the `Tied` argument
6611   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6612       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6613           ->user_back());
6614   ASSERT_NE(TaskAllocCall, nullptr);
6615   BinaryOperator *OrInst =
6616       dyn_cast<BinaryOperator>(TaskAllocCall->getArgOperand(2));
6617   ASSERT_NE(OrInst, nullptr);
6618   EXPECT_EQ(OrInst->getOpcode(), BinaryOperator::BinaryOps::Or);
6619 
6620   // One of the arguments to `or` instruction is the tied flag, which is equal
6621   // to zero.
6622   EXPECT_TRUE(any_of(OrInst->operands(), [](Value *op) {
6623     if (ConstantInt *TiedValue = dyn_cast<ConstantInt>(op))
6624       return TiedValue->getSExtValue() == 0;
6625     return false;
6626   }));
6627 
6628   // One of the arguments to `or` instruction is the final condition.
6629   EXPECT_TRUE(any_of(OrInst->operands(), [Final](Value *op) {
6630     if (SelectInst *Select = dyn_cast<SelectInst>(op)) {
6631       ConstantInt *TrueValue = dyn_cast<ConstantInt>(Select->getTrueValue());
6632       ConstantInt *FalseValue = dyn_cast<ConstantInt>(Select->getFalseValue());
6633       if (!TrueValue || !FalseValue)
6634         return false;
6635       return Select->getCondition() == Final &&
6636              TrueValue->getSExtValue() == 2 && FalseValue->getSExtValue() == 0;
6637     }
6638     return false;
6639   }));
6640 
6641   EXPECT_FALSE(verifyModule(*M, &errs()));
6642 }
6643 
6644 TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
6645   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6646   OpenMPIRBuilder OMPBuilder(*M);
6647   OMPBuilder.Config.IsTargetDevice = false;
6648   OMPBuilder.initialize();
6649   F->setName("func");
6650   IRBuilder<> Builder(BB);
6651   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
6652   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6653   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
6654   Builder.SetInsertPoint(BodyBB);
6655   Value *IfCondition = Builder.CreateICmp(
6656       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
6657       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
6658   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
6659   Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
6660                                           /*Tied=*/false, /*Final=*/nullptr,
6661                                           IfCondition));
6662   OMPBuilder.finalize();
6663   Builder.CreateRetVoid();
6664 
6665   EXPECT_FALSE(verifyModule(*M, &errs()));
6666 
6667   CallInst *TaskAllocCall = dyn_cast<CallInst>(
6668       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
6669           ->user_back());
6670   ASSERT_NE(TaskAllocCall, nullptr);
6671 
6672   // Check the branching is based on the if condition argument.
6673   BranchInst *IfConditionBranchInst =
6674       dyn_cast<BranchInst>(TaskAllocCall->getParent()->getTerminator());
6675   ASSERT_NE(IfConditionBranchInst, nullptr);
6676   ASSERT_TRUE(IfConditionBranchInst->isConditional());
6677   EXPECT_EQ(IfConditionBranchInst->getCondition(), IfCondition);
6678 
6679   // Check that the `__kmpc_omp_task` executes only in the then branch.
6680   CallInst *TaskCall = dyn_cast<CallInst>(
6681       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task)
6682           ->user_back());
6683   ASSERT_NE(TaskCall, nullptr);
6684   EXPECT_EQ(TaskCall->getParent(), IfConditionBranchInst->getSuccessor(0));
6685 
6686   // Check that the OpenMP Runtime Functions specific to `if` clause execute
6687   // only in the else branch. Also check that the function call is between the
6688   // `__kmpc_omp_task_begin_if0` and `__kmpc_omp_task_complete_if0` calls.
6689   CallInst *TaskBeginIfCall = dyn_cast<CallInst>(
6690       OMPBuilder
6691           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0)
6692           ->user_back());
6693   CallInst *TaskCompleteCall = dyn_cast<CallInst>(
6694       OMPBuilder
6695           .getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0)
6696           ->user_back());
6697   ASSERT_NE(TaskBeginIfCall, nullptr);
6698   ASSERT_NE(TaskCompleteCall, nullptr);
6699   Function *OulinedFn =
6700       dyn_cast<Function>(TaskAllocCall->getArgOperand(5)->stripPointerCasts());
6701   ASSERT_NE(OulinedFn, nullptr);
6702   CallInst *OulinedFnCall = dyn_cast<CallInst>(OulinedFn->user_back());
6703   ASSERT_NE(OulinedFnCall, nullptr);
6704   EXPECT_EQ(TaskBeginIfCall->getParent(),
6705             IfConditionBranchInst->getSuccessor(1));
6706 
6707   EXPECT_EQ(TaskBeginIfCall->getNextNonDebugInstruction(), OulinedFnCall);
6708   EXPECT_EQ(OulinedFnCall->getNextNonDebugInstruction(), TaskCompleteCall);
6709 }
6710 
6711 TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
6712   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6713   OpenMPIRBuilder OMPBuilder(*M);
6714   OMPBuilder.initialize();
6715   F->setName("func");
6716   IRBuilder<> Builder(BB);
6717 
6718   AllocaInst *ValPtr32 = Builder.CreateAlloca(Builder.getInt32Ty());
6719   AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
6720   Value *Val128 =
6721       Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
6722   Instruction *ThenTerm, *ElseTerm;
6723 
6724   Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp;
6725 
6726   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6727     Builder.restoreIP(AllocaIP);
6728     AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
6729                                                 "bodygen.alloca128");
6730 
6731     Builder.restoreIP(CodeGenIP);
6732     // Loading and storing captured pointer and values
6733     InternalStoreInst = Builder.CreateStore(Val128, Local128);
6734     InternalLoad32 = Builder.CreateLoad(ValPtr32->getAllocatedType(), ValPtr32,
6735                                         "bodygen.load32");
6736 
6737     InternalLoad128 = Builder.CreateLoad(Local128->getAllocatedType(), Local128,
6738                                          "bodygen.local.load128");
6739     InternalIfCmp = Builder.CreateICmpNE(
6740         InternalLoad32,
6741         Builder.CreateTrunc(InternalLoad128, InternalLoad32->getType()));
6742     SplitBlockAndInsertIfThenElse(InternalIfCmp,
6743                                   CodeGenIP.getBlock()->getTerminator(),
6744                                   &ThenTerm, &ElseTerm);
6745   };
6746 
6747   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6748   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6749   OpenMPIRBuilder::LocationDescription Loc(
6750       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6751   Builder.restoreIP(OMPBuilder.createTaskgroup(
6752       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6753       BodyGenCB));
6754   OMPBuilder.finalize();
6755   Builder.CreateRetVoid();
6756 
6757   EXPECT_FALSE(verifyModule(*M, &errs()));
6758 
6759   CallInst *TaskgroupCall = dyn_cast<CallInst>(
6760       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
6761           ->user_back());
6762   ASSERT_NE(TaskgroupCall, nullptr);
6763   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
6764       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
6765           ->user_back());
6766   ASSERT_NE(EndTaskgroupCall, nullptr);
6767 
6768   // Verify the Ident argument
6769   GlobalVariable *Ident = cast<GlobalVariable>(TaskgroupCall->getArgOperand(0));
6770   ASSERT_NE(Ident, nullptr);
6771   EXPECT_TRUE(Ident->hasInitializer());
6772   Constant *Initializer = Ident->getInitializer();
6773   GlobalVariable *SrcStrGlob =
6774       cast<GlobalVariable>(Initializer->getOperand(4)->stripPointerCasts());
6775   ASSERT_NE(SrcStrGlob, nullptr);
6776   ConstantDataArray *SrcSrc =
6777       dyn_cast<ConstantDataArray>(SrcStrGlob->getInitializer());
6778   ASSERT_NE(SrcSrc, nullptr);
6779 
6780   // Verify the num_threads argument.
6781   CallInst *GTID = dyn_cast<CallInst>(TaskgroupCall->getArgOperand(1));
6782   ASSERT_NE(GTID, nullptr);
6783   EXPECT_EQ(GTID->arg_size(), 1U);
6784   EXPECT_EQ(GTID->getCalledFunction(), OMPBuilder.getOrCreateRuntimeFunctionPtr(
6785                                            OMPRTL___kmpc_global_thread_num));
6786 
6787   // Checking the general structure of the IR generated is same as expected.
6788   Instruction *GeneratedStoreInst = TaskgroupCall->getNextNonDebugInstruction();
6789   EXPECT_EQ(GeneratedStoreInst, InternalStoreInst);
6790   Instruction *GeneratedLoad32 =
6791       GeneratedStoreInst->getNextNonDebugInstruction();
6792   EXPECT_EQ(GeneratedLoad32, InternalLoad32);
6793   Instruction *GeneratedLoad128 = GeneratedLoad32->getNextNonDebugInstruction();
6794   EXPECT_EQ(GeneratedLoad128, InternalLoad128);
6795 
6796   // Checking the ordering because of the if statements and that
6797   // `__kmp_end_taskgroup` call is after the if branching.
6798   BasicBlock *RefOrder[] = {TaskgroupCall->getParent(), ThenTerm->getParent(),
6799                             ThenTerm->getSuccessor(0),
6800                             EndTaskgroupCall->getParent(),
6801                             ElseTerm->getParent()};
6802   verifyDFSOrder(F, RefOrder);
6803 }
6804 
6805 TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
6806   using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
6807   OpenMPIRBuilder OMPBuilder(*M);
6808   OMPBuilder.Config.IsTargetDevice = false;
6809   OMPBuilder.initialize();
6810   F->setName("func");
6811   IRBuilder<> Builder(BB);
6812 
6813   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6814     Builder.restoreIP(AllocaIP);
6815     AllocaInst *Alloca32 =
6816         Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32");
6817     AllocaInst *Alloca64 =
6818         Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64");
6819     Builder.restoreIP(CodeGenIP);
6820     auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6821       Builder.restoreIP(CodeGenIP);
6822       LoadInst *LoadValue =
6823           Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64);
6824       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64));
6825       Builder.CreateStore(AddInst, Alloca64);
6826     };
6827     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
6828     Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1));
6829 
6830     auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
6831       Builder.restoreIP(CodeGenIP);
6832       LoadInst *LoadValue =
6833           Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32);
6834       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32));
6835       Builder.CreateStore(AddInst, Alloca32);
6836     };
6837     OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL);
6838     Builder.restoreIP(OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2));
6839   };
6840 
6841   BasicBlock *AllocaBB = Builder.GetInsertBlock();
6842   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
6843   OpenMPIRBuilder::LocationDescription Loc(
6844       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
6845   Builder.restoreIP(OMPBuilder.createTaskgroup(
6846       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
6847       BodyGenCB));
6848   OMPBuilder.finalize();
6849   Builder.CreateRetVoid();
6850 
6851   EXPECT_FALSE(verifyModule(*M, &errs()));
6852 
6853   CallInst *TaskgroupCall = dyn_cast<CallInst>(
6854       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup)
6855           ->user_back());
6856   ASSERT_NE(TaskgroupCall, nullptr);
6857   CallInst *EndTaskgroupCall = dyn_cast<CallInst>(
6858       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup)
6859           ->user_back());
6860   ASSERT_NE(EndTaskgroupCall, nullptr);
6861 
6862   Function *TaskAllocFn =
6863       OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
6864   ASSERT_EQ(TaskAllocFn->getNumUses(), 2u);
6865 
6866   CallInst *FirstTaskAllocCall =
6867       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin());
6868   CallInst *SecondTaskAllocCall =
6869       dyn_cast_or_null<CallInst>(*TaskAllocFn->users().begin()++);
6870   ASSERT_NE(FirstTaskAllocCall, nullptr);
6871   ASSERT_NE(SecondTaskAllocCall, nullptr);
6872 
6873   // Verify that the tasks have been generated in order and inside taskgroup
6874   // construct.
6875   BasicBlock *RefOrder[] = {
6876       TaskgroupCall->getParent(), FirstTaskAllocCall->getParent(),
6877       SecondTaskAllocCall->getParent(), EndTaskgroupCall->getParent()};
6878   verifyDFSOrder(F, RefOrder);
6879 }
6880 
6881 TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
6882   OpenMPIRBuilder OMPBuilder(*M);
6883   OMPBuilder.initialize();
6884 
6885   IRBuilder<> Builder(BB);
6886 
6887   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
6888   OpenMPIRBuilder::TargetDataInfo Info(true, false);
6889 
6890   auto VoidPtrTy = PointerType::getUnqual(Builder.getContext());
6891   auto VoidPtrPtrTy = PointerType::getUnqual(Builder.getContext());
6892   auto Int64Ty = Type::getInt64Ty(Builder.getContext());
6893   auto Int64PtrTy = PointerType::getUnqual(Builder.getContext());
6894   auto Array4VoidPtrTy = ArrayType::get(VoidPtrTy, 4);
6895   auto Array4Int64PtrTy = ArrayType::get(Int64Ty, 4);
6896 
6897   Info.RTArgs.BasePointersArray =
6898       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo(0));
6899   Info.RTArgs.PointersArray =
6900       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
6901   Info.RTArgs.SizesArray =
6902       ConstantPointerNull::get(Array4Int64PtrTy->getPointerTo());
6903   Info.RTArgs.MapTypesArray =
6904       ConstantPointerNull::get(Array4Int64PtrTy->getPointerTo());
6905   Info.RTArgs.MapNamesArray =
6906       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
6907   Info.RTArgs.MappersArray =
6908       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
6909   Info.NumberOfPtrs = 4;
6910   Info.EmitDebug = false;
6911   OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
6912 
6913   EXPECT_NE(RTArgs.BasePointersArray, nullptr);
6914   EXPECT_NE(RTArgs.PointersArray, nullptr);
6915   EXPECT_NE(RTArgs.SizesArray, nullptr);
6916   EXPECT_NE(RTArgs.MapTypesArray, nullptr);
6917   EXPECT_NE(RTArgs.MappersArray, nullptr);
6918   EXPECT_NE(RTArgs.MapNamesArray, nullptr);
6919   EXPECT_EQ(RTArgs.MapTypesArrayEnd, nullptr);
6920 
6921   EXPECT_EQ(RTArgs.BasePointersArray->getType(), VoidPtrPtrTy);
6922   EXPECT_EQ(RTArgs.PointersArray->getType(), VoidPtrPtrTy);
6923   EXPECT_EQ(RTArgs.SizesArray->getType(), Int64PtrTy);
6924   EXPECT_EQ(RTArgs.MapTypesArray->getType(), Int64PtrTy);
6925   EXPECT_EQ(RTArgs.MappersArray->getType(), VoidPtrPtrTy);
6926   EXPECT_EQ(RTArgs.MapNamesArray->getType(), VoidPtrPtrTy);
6927 }
6928 
6929 TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) {
6930   OpenMPIRBuilder OMPBuilder(*M);
6931   OMPBuilder.setConfig(
6932       OpenMPIRBuilderConfig(true, false, false, false, false, false, false));
6933   OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager;
6934   TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0);
6935   InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0);
6936   EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo));
6937   InfoManager.initializeDeviceGlobalVarEntryInfo(
6938       "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0);
6939   InfoManager.registerTargetRegionEntryInfo(
6940       EntryInfo, nullptr, nullptr,
6941       OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
6942   InfoManager.registerDeviceGlobalVarEntryInfo(
6943       "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
6944       GlobalValue::WeakAnyLinkage);
6945   EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar"));
6946 }
6947 
6948 // Tests both registerTargetGlobalVariable and getAddrOfDeclareTargetVar as they
6949 // call each other (recursively in some cases). The test case test these
6950 // functions by utilising them for host code generation for declare target
6951 // global variables
6952 TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) {
6953   OpenMPIRBuilder OMPBuilder(*M);
6954   OMPBuilder.initialize();
6955   OpenMPIRBuilderConfig Config(false, false, false, false, false, false, false);
6956   OMPBuilder.setConfig(Config);
6957 
6958   std::vector<llvm::Triple> TargetTriple;
6959   TargetTriple.emplace_back("amdgcn-amd-amdhsa");
6960 
6961   TargetRegionEntryInfo EntryInfo("", 42, 4711, 17);
6962   std::vector<GlobalVariable *> RefsGathered;
6963 
6964   std::vector<Constant *> Globals;
6965   auto *IntTy = Type::getInt32Ty(Ctx);
6966   for (int I = 0; I < 2; ++I) {
6967     Globals.push_back(M->getOrInsertGlobal(
6968         "test_data_int_" + std::to_string(I), IntTy, [&]() -> GlobalVariable * {
6969           return new GlobalVariable(
6970               *M, IntTy, false, GlobalValue::LinkageTypes::WeakAnyLinkage,
6971               ConstantInt::get(IntTy, I), "test_data_int_" + std::to_string(I));
6972         }));
6973   }
6974 
6975   OMPBuilder.registerTargetGlobalVariable(
6976       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo,
6977       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
6978       EntryInfo, Globals[0]->getName(), RefsGathered, false, TargetTriple,
6979       nullptr, nullptr, Globals[0]->getType(), Globals[0]);
6980 
6981   OMPBuilder.registerTargetGlobalVariable(
6982       OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink,
6983       OffloadEntriesInfoManager::OMPTargetDeviceClauseAny, false, true,
6984       EntryInfo, Globals[1]->getName(), RefsGathered, false, TargetTriple,
6985       nullptr, nullptr, Globals[1]->getType(), Globals[1]);
6986 
6987   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportfn =
6988       [](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
6989          const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
6990     // If this is invoked, then we want to emit an error, even if it is not
6991     // neccesarily the most readable, as something has went wrong. The
6992     // test-suite unfortunately eats up all error output
6993     ASSERT_EQ(Kind, Kind);
6994   };
6995 
6996   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportfn);
6997 
6998   // Clauses for data_int_0 with To + Any clauses for the host
6999   std::vector<GlobalVariable *> OffloadEntries;
7000   OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name"));
7001   OffloadEntries.push_back(
7002       M->getNamedGlobal(".offloading.entry.test_data_int_0"));
7003 
7004   // Clauses for data_int_1 with Link + Any clauses for the host
7005   OffloadEntries.push_back(
7006       M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr"));
7007   OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name.1"));
7008   OffloadEntries.push_back(
7009       M->getNamedGlobal(".offloading.entry.test_data_int_1_decl_tgt_ref_ptr"));
7010 
7011   for (unsigned I = 0; I < OffloadEntries.size(); ++I)
7012     EXPECT_NE(OffloadEntries[I], nullptr);
7013 
7014   // Metadata generated for the host offload module
7015   NamedMDNode *OffloadMetadata = M->getNamedMetadata("omp_offload.info");
7016   ASSERT_THAT(OffloadMetadata, testing::NotNull());
7017   StringRef Nodes[2] = {
7018       cast<MDString>(OffloadMetadata->getOperand(0)->getOperand(1))
7019           ->getString(),
7020       cast<MDString>(OffloadMetadata->getOperand(1)->getOperand(1))
7021           ->getString()};
7022   EXPECT_THAT(
7023       Nodes, testing::UnorderedElementsAre("test_data_int_0",
7024                                            "test_data_int_1_decl_tgt_ref_ptr"));
7025 }
7026 
7027 TEST_F(OpenMPIRBuilderTest, createGPUOffloadEntry) {
7028   OpenMPIRBuilder OMPBuilder(*M);
7029   OMPBuilder.initialize();
7030   OpenMPIRBuilderConfig Config(/* IsTargetDevice = */ true,
7031                                /* IsGPU = */ true,
7032                                /* OpenMPOffloadMandatory = */ false,
7033                                /* HasRequiresReverseOffload = */ false,
7034                                /* HasRequiresUnifiedAddress = */ false,
7035                                /* HasRequiresUnifiedSharedMemory = */ false,
7036                                /* HasRequiresDynamicAllocators = */ false);
7037   OMPBuilder.setConfig(Config);
7038 
7039   FunctionCallee FnTypeAndCallee =
7040       M->getOrInsertFunction("test_kernel", Type::getVoidTy(Ctx));
7041 
7042   auto *Fn = cast<Function>(FnTypeAndCallee.getCallee());
7043   OMPBuilder.createOffloadEntry(/* ID = */ nullptr, Fn,
7044                                 /* Size = */ 0,
7045                                 /* Flags = */ 0, GlobalValue::WeakAnyLinkage);
7046 
7047   // Check nvvm.annotations only created for GPU kernels
7048   NamedMDNode *MD = M->getNamedMetadata("nvvm.annotations");
7049   EXPECT_NE(MD, nullptr);
7050   EXPECT_EQ(MD->getNumOperands(), 1u);
7051 
7052   MDNode *Annotations = MD->getOperand(0);
7053   EXPECT_EQ(Annotations->getNumOperands(), 3u);
7054 
7055   Constant *ConstVal =
7056       dyn_cast<ConstantAsMetadata>(Annotations->getOperand(0))->getValue();
7057   EXPECT_TRUE(isa<Function>(Fn));
7058   EXPECT_EQ(ConstVal, cast<Function>(Fn));
7059 
7060   EXPECT_TRUE(Annotations->getOperand(1).equalsStr("kernel"));
7061 
7062   EXPECT_TRUE(mdconst::hasa<ConstantInt>(Annotations->getOperand(2)));
7063   APInt IntVal =
7064       mdconst::extract<ConstantInt>(Annotations->getOperand(2))->getValue();
7065   EXPECT_EQ(IntVal, 1);
7066 
7067   // Check kernel attributes
7068   EXPECT_TRUE(Fn->hasFnAttribute("kernel"));
7069   EXPECT_TRUE(Fn->hasFnAttribute(Attribute::MustProgress));
7070 }
7071 
7072 } // namespace
7073