1*f4a2713aSLionel Sambuc //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===// 2*f4a2713aSLionel Sambuc // 3*f4a2713aSLionel Sambuc // The LLVM Compiler Infrastructure 4*f4a2713aSLionel Sambuc // 5*f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source 6*f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details. 7*f4a2713aSLionel Sambuc // 8*f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===// 9*f4a2713aSLionel Sambuc // 10*f4a2713aSLionel Sambuc // This provides a class for CUDA code generation targeting the NVIDIA CUDA 11*f4a2713aSLionel Sambuc // runtime library. 12*f4a2713aSLionel Sambuc // 13*f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===// 14*f4a2713aSLionel Sambuc 15*f4a2713aSLionel Sambuc #include "CGCUDARuntime.h" 16*f4a2713aSLionel Sambuc #include "CodeGenFunction.h" 17*f4a2713aSLionel Sambuc #include "CodeGenModule.h" 18*f4a2713aSLionel Sambuc #include "clang/AST/Decl.h" 19*f4a2713aSLionel Sambuc #include "llvm/IR/BasicBlock.h" 20*f4a2713aSLionel Sambuc #include "llvm/IR/Constants.h" 21*f4a2713aSLionel Sambuc #include "llvm/IR/DerivedTypes.h" 22*f4a2713aSLionel Sambuc #include "llvm/Support/CallSite.h" 23*f4a2713aSLionel Sambuc #include <vector> 24*f4a2713aSLionel Sambuc 25*f4a2713aSLionel Sambuc using namespace clang; 26*f4a2713aSLionel Sambuc using namespace CodeGen; 27*f4a2713aSLionel Sambuc 28*f4a2713aSLionel Sambuc namespace { 29*f4a2713aSLionel Sambuc 30*f4a2713aSLionel Sambuc class CGNVCUDARuntime : public CGCUDARuntime { 31*f4a2713aSLionel Sambuc 32*f4a2713aSLionel Sambuc private: 33*f4a2713aSLionel Sambuc llvm::Type *IntTy, *SizeTy; 34*f4a2713aSLionel Sambuc llvm::PointerType *CharPtrTy, *VoidPtrTy; 35*f4a2713aSLionel Sambuc 36*f4a2713aSLionel Sambuc llvm::Constant *getSetupArgumentFn() const; 37*f4a2713aSLionel Sambuc llvm::Constant *getLaunchFn() const; 38*f4a2713aSLionel Sambuc 39*f4a2713aSLionel Sambuc public: 40*f4a2713aSLionel Sambuc CGNVCUDARuntime(CodeGenModule &CGM); 41*f4a2713aSLionel Sambuc 42*f4a2713aSLionel Sambuc void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args); 43*f4a2713aSLionel Sambuc }; 44*f4a2713aSLionel Sambuc 45*f4a2713aSLionel Sambuc } 46*f4a2713aSLionel Sambuc 47*f4a2713aSLionel Sambuc CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) { 48*f4a2713aSLionel Sambuc CodeGen::CodeGenTypes &Types = CGM.getTypes(); 49*f4a2713aSLionel Sambuc ASTContext &Ctx = CGM.getContext(); 50*f4a2713aSLionel Sambuc 51*f4a2713aSLionel Sambuc IntTy = Types.ConvertType(Ctx.IntTy); 52*f4a2713aSLionel Sambuc SizeTy = Types.ConvertType(Ctx.getSizeType()); 53*f4a2713aSLionel Sambuc 54*f4a2713aSLionel Sambuc CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); 55*f4a2713aSLionel Sambuc VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); 56*f4a2713aSLionel Sambuc } 57*f4a2713aSLionel Sambuc 58*f4a2713aSLionel Sambuc llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { 59*f4a2713aSLionel Sambuc // cudaError_t cudaSetupArgument(void *, size_t, size_t) 60*f4a2713aSLionel Sambuc std::vector<llvm::Type*> Params; 61*f4a2713aSLionel Sambuc Params.push_back(VoidPtrTy); 62*f4a2713aSLionel Sambuc Params.push_back(SizeTy); 63*f4a2713aSLionel Sambuc Params.push_back(SizeTy); 64*f4a2713aSLionel Sambuc return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, 65*f4a2713aSLionel Sambuc Params, false), 66*f4a2713aSLionel Sambuc "cudaSetupArgument"); 67*f4a2713aSLionel Sambuc } 68*f4a2713aSLionel Sambuc 69*f4a2713aSLionel Sambuc llvm::Constant *CGNVCUDARuntime::getLaunchFn() const { 70*f4a2713aSLionel Sambuc // cudaError_t cudaLaunch(char *) 71*f4a2713aSLionel Sambuc std::vector<llvm::Type*> Params; 72*f4a2713aSLionel Sambuc Params.push_back(CharPtrTy); 73*f4a2713aSLionel Sambuc return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy, 74*f4a2713aSLionel Sambuc Params, false), 75*f4a2713aSLionel Sambuc "cudaLaunch"); 76*f4a2713aSLionel Sambuc } 77*f4a2713aSLionel Sambuc 78*f4a2713aSLionel Sambuc void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF, 79*f4a2713aSLionel Sambuc FunctionArgList &Args) { 80*f4a2713aSLionel Sambuc // Build the argument value list and the argument stack struct type. 81*f4a2713aSLionel Sambuc SmallVector<llvm::Value *, 16> ArgValues; 82*f4a2713aSLionel Sambuc std::vector<llvm::Type *> ArgTypes; 83*f4a2713aSLionel Sambuc for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end(); 84*f4a2713aSLionel Sambuc I != E; ++I) { 85*f4a2713aSLionel Sambuc llvm::Value *V = CGF.GetAddrOfLocalVar(*I); 86*f4a2713aSLionel Sambuc ArgValues.push_back(V); 87*f4a2713aSLionel Sambuc assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType"); 88*f4a2713aSLionel Sambuc ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType()); 89*f4a2713aSLionel Sambuc } 90*f4a2713aSLionel Sambuc llvm::StructType *ArgStackTy = llvm::StructType::get( 91*f4a2713aSLionel Sambuc CGF.getLLVMContext(), ArgTypes); 92*f4a2713aSLionel Sambuc 93*f4a2713aSLionel Sambuc llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); 94*f4a2713aSLionel Sambuc 95*f4a2713aSLionel Sambuc // Emit the calls to cudaSetupArgument 96*f4a2713aSLionel Sambuc llvm::Constant *cudaSetupArgFn = getSetupArgumentFn(); 97*f4a2713aSLionel Sambuc for (unsigned I = 0, E = Args.size(); I != E; ++I) { 98*f4a2713aSLionel Sambuc llvm::Value *Args[3]; 99*f4a2713aSLionel Sambuc llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); 100*f4a2713aSLionel Sambuc Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy); 101*f4a2713aSLionel Sambuc Args[1] = CGF.Builder.CreateIntCast( 102*f4a2713aSLionel Sambuc llvm::ConstantExpr::getSizeOf(ArgTypes[I]), 103*f4a2713aSLionel Sambuc SizeTy, false); 104*f4a2713aSLionel Sambuc Args[2] = CGF.Builder.CreateIntCast( 105*f4a2713aSLionel Sambuc llvm::ConstantExpr::getOffsetOf(ArgStackTy, I), 106*f4a2713aSLionel Sambuc SizeTy, false); 107*f4a2713aSLionel Sambuc llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); 108*f4a2713aSLionel Sambuc llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); 109*f4a2713aSLionel Sambuc llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero); 110*f4a2713aSLionel Sambuc CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock); 111*f4a2713aSLionel Sambuc CGF.EmitBlock(NextBlock); 112*f4a2713aSLionel Sambuc } 113*f4a2713aSLionel Sambuc 114*f4a2713aSLionel Sambuc // Emit the call to cudaLaunch 115*f4a2713aSLionel Sambuc llvm::Constant *cudaLaunchFn = getLaunchFn(); 116*f4a2713aSLionel Sambuc llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); 117*f4a2713aSLionel Sambuc CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); 118*f4a2713aSLionel Sambuc CGF.EmitBranch(EndBlock); 119*f4a2713aSLionel Sambuc 120*f4a2713aSLionel Sambuc CGF.EmitBlock(EndBlock); 121*f4a2713aSLionel Sambuc } 122*f4a2713aSLionel Sambuc 123*f4a2713aSLionel Sambuc CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { 124*f4a2713aSLionel Sambuc return new CGNVCUDARuntime(CGM); 125*f4a2713aSLionel Sambuc } 126