10b57cec5SDimitry Andric //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Generates code for built-in GPU calls which are not runtime-specific. 100b57cec5SDimitry Andric // (Runtime-specific codegen lives in programming model specific files.) 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "CodeGenFunction.h" 150b57cec5SDimitry Andric #include "clang/Basic/Builtins.h" 160b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 170b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 180b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 195ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric using namespace clang; 220b57cec5SDimitry Andric using namespace CodeGen; 230b57cec5SDimitry Andric 24349cc55cSDimitry Andric namespace { 25349cc55cSDimitry Andric llvm::Function *GetVprintfDeclaration(llvm::Module &M) { 265f757f3fSDimitry Andric llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()), 275f757f3fSDimitry Andric llvm::PointerType::getUnqual(M.getContext())}; 280b57cec5SDimitry Andric llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( 290b57cec5SDimitry Andric llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric if (auto *F = M.getFunction("vprintf")) { 320b57cec5SDimitry Andric // Our CUDA system header declares vprintf with the right signature, so 330b57cec5SDimitry Andric // nobody else should have been able to declare vprintf with a bogus 340b57cec5SDimitry Andric // signature. 350b57cec5SDimitry Andric assert(F->getFunctionType() == VprintfFuncType); 360b57cec5SDimitry Andric return F; 370b57cec5SDimitry Andric } 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric // vprintf doesn't already exist; create a declaration and insert it into the 400b57cec5SDimitry Andric // module. 410b57cec5SDimitry Andric return llvm::Function::Create( 420b57cec5SDimitry Andric VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M); 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric 45349cc55cSDimitry Andric llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) { 46349cc55cSDimitry Andric const char *Name = "__llvm_omp_vprintf"; 47349cc55cSDimitry Andric llvm::Module &M = CGM.getModule(); 485f757f3fSDimitry Andric llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()), 495f757f3fSDimitry Andric llvm::PointerType::getUnqual(M.getContext()), 50349cc55cSDimitry Andric llvm::Type::getInt32Ty(M.getContext())}; 51349cc55cSDimitry Andric llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( 52349cc55cSDimitry Andric llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); 53349cc55cSDimitry Andric 54349cc55cSDimitry Andric if (auto *F = M.getFunction(Name)) { 55349cc55cSDimitry Andric if (F->getFunctionType() != VprintfFuncType) { 56349cc55cSDimitry Andric CGM.Error(SourceLocation(), 57349cc55cSDimitry Andric "Invalid type declaration for __llvm_omp_vprintf"); 58349cc55cSDimitry Andric return nullptr; 59349cc55cSDimitry Andric } 60349cc55cSDimitry Andric return F; 61349cc55cSDimitry Andric } 62349cc55cSDimitry Andric 63349cc55cSDimitry Andric return llvm::Function::Create( 64349cc55cSDimitry Andric VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M); 65349cc55cSDimitry Andric } 66349cc55cSDimitry Andric 670b57cec5SDimitry Andric // Transforms a call to printf into a call to the NVPTX vprintf syscall (which 680b57cec5SDimitry Andric // isn't particularly special; it's invoked just like a regular function). 690b57cec5SDimitry Andric // vprintf takes two args: A format string, and a pointer to a buffer containing 700b57cec5SDimitry Andric // the varargs. 710b57cec5SDimitry Andric // 720b57cec5SDimitry Andric // For example, the call 730b57cec5SDimitry Andric // 740b57cec5SDimitry Andric // printf("format string", arg1, arg2, arg3); 750b57cec5SDimitry Andric // 760b57cec5SDimitry Andric // is converted into something resembling 770b57cec5SDimitry Andric // 780b57cec5SDimitry Andric // struct Tmp { 790b57cec5SDimitry Andric // Arg1 a1; 800b57cec5SDimitry Andric // Arg2 a2; 810b57cec5SDimitry Andric // Arg3 a3; 820b57cec5SDimitry Andric // }; 830b57cec5SDimitry Andric // char* buf = alloca(sizeof(Tmp)); 840b57cec5SDimitry Andric // *(Tmp*)buf = {a1, a2, a3}; 850b57cec5SDimitry Andric // vprintf("format string", buf); 860b57cec5SDimitry Andric // 870b57cec5SDimitry Andric // buf is aligned to the max of {alignof(Arg1), ...}. Furthermore, each of the 880b57cec5SDimitry Andric // args is itself aligned to its preferred alignment. 890b57cec5SDimitry Andric // 900b57cec5SDimitry Andric // Note that by the time this function runs, E's args have already undergone the 910b57cec5SDimitry Andric // standard C vararg promotion (short -> int, float -> double, etc.). 920b57cec5SDimitry Andric 93349cc55cSDimitry Andric std::pair<llvm::Value *, llvm::TypeSize> 94349cc55cSDimitry Andric packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) { 95349cc55cSDimitry Andric const llvm::DataLayout &DL = CGF->CGM.getDataLayout(); 96349cc55cSDimitry Andric llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext(); 97349cc55cSDimitry Andric CGBuilderTy &Builder = CGF->Builder; 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric // Construct and fill the args buffer that we'll pass to vprintf. 1000b57cec5SDimitry Andric if (Args.size() <= 1) { 101349cc55cSDimitry Andric // If there are no args, pass a null pointer and size 0 1025f757f3fSDimitry Andric llvm::Value *BufferPtr = 1035f757f3fSDimitry Andric llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx)); 1045f757f3fSDimitry Andric return {BufferPtr, llvm::TypeSize::getFixed(0)}; 1050b57cec5SDimitry Andric } else { 1060b57cec5SDimitry Andric llvm::SmallVector<llvm::Type *, 8> ArgTypes; 1070b57cec5SDimitry Andric for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) 108349cc55cSDimitry Andric ArgTypes.push_back(Args[I].getRValue(*CGF).getScalarVal()->getType()); 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric // Using llvm::StructType is correct only because printf doesn't accept 1110b57cec5SDimitry Andric // aggregates. If we had to handle aggregates here, we'd have to manually 1120b57cec5SDimitry Andric // compute the offsets within the alloca -- we wouldn't be able to assume 1130b57cec5SDimitry Andric // that the alignment of the llvm type was the same as the alignment of the 1140b57cec5SDimitry Andric // clang type. 1150b57cec5SDimitry Andric llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args"); 116349cc55cSDimitry Andric llvm::Value *Alloca = CGF->CreateTempAlloca(AllocaTy); 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { 1190b57cec5SDimitry Andric llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); 120349cc55cSDimitry Andric llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal(); 1215ffd83dbSDimitry Andric Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType())); 1220b57cec5SDimitry Andric } 123349cc55cSDimitry Andric llvm::Value *BufferPtr = 1245f757f3fSDimitry Andric Builder.CreatePointerCast(Alloca, llvm::PointerType::getUnqual(Ctx)); 125349cc55cSDimitry Andric return {BufferPtr, DL.getTypeAllocSize(AllocaTy)}; 126349cc55cSDimitry Andric } 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric 12906c3fb27SDimitry Andric bool containsNonScalarVarargs(CodeGenFunction *CGF, const CallArgList &Args) { 130349cc55cSDimitry Andric return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) { 131349cc55cSDimitry Andric return !A.getRValue(*CGF).isScalar(); 132349cc55cSDimitry Andric }); 1330b57cec5SDimitry Andric } 1345ffd83dbSDimitry Andric 135349cc55cSDimitry Andric RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF, 136349cc55cSDimitry Andric llvm::Function *Decl, bool WithSizeArg) { 137349cc55cSDimitry Andric CodeGenModule &CGM = CGF->CGM; 138349cc55cSDimitry Andric CGBuilderTy &Builder = CGF->Builder; 139*0fca6ea1SDimitry Andric assert(E->getBuiltinCallee() == Builtin::BIprintf || 140*0fca6ea1SDimitry Andric E->getBuiltinCallee() == Builtin::BI__builtin_printf); 141349cc55cSDimitry Andric assert(E->getNumArgs() >= 1); // printf always has at least one arg. 142349cc55cSDimitry Andric 143349cc55cSDimitry Andric // Uses the same format as nvptx for the argument packing, but also passes 144349cc55cSDimitry Andric // an i32 for the total size of the passed pointer 145349cc55cSDimitry Andric CallArgList Args; 146349cc55cSDimitry Andric CGF->EmitCallArgs(Args, 147349cc55cSDimitry Andric E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), 148349cc55cSDimitry Andric E->arguments(), E->getDirectCallee(), 149349cc55cSDimitry Andric /* ParamsToSkip = */ 0); 150349cc55cSDimitry Andric 151349cc55cSDimitry Andric // We don't know how to emit non-scalar varargs. 152349cc55cSDimitry Andric if (containsNonScalarVarargs(CGF, Args)) { 153349cc55cSDimitry Andric CGM.ErrorUnsupported(E, "non-scalar arg to printf"); 154349cc55cSDimitry Andric return RValue::get(llvm::ConstantInt::get(CGF->IntTy, 0)); 155349cc55cSDimitry Andric } 156349cc55cSDimitry Andric 157349cc55cSDimitry Andric auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args); 158349cc55cSDimitry Andric llvm::Value *BufferPtr = r.first; 159349cc55cSDimitry Andric 160349cc55cSDimitry Andric llvm::SmallVector<llvm::Value *, 3> Vec = { 161349cc55cSDimitry Andric Args[0].getRValue(*CGF).getScalarVal(), BufferPtr}; 162349cc55cSDimitry Andric if (WithSizeArg) { 163349cc55cSDimitry Andric // Passing > 32bit of data as a local alloca doesn't work for nvptx or 164349cc55cSDimitry Andric // amdgpu 165349cc55cSDimitry Andric llvm::Constant *Size = 166349cc55cSDimitry Andric llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()), 167bdd1243dSDimitry Andric static_cast<uint32_t>(r.second.getFixedValue())); 168349cc55cSDimitry Andric 169349cc55cSDimitry Andric Vec.push_back(Size); 170349cc55cSDimitry Andric } 171349cc55cSDimitry Andric return RValue::get(Builder.CreateCall(Decl, Vec)); 172349cc55cSDimitry Andric } 173349cc55cSDimitry Andric } // namespace 174349cc55cSDimitry Andric 175349cc55cSDimitry Andric RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) { 176349cc55cSDimitry Andric assert(getTarget().getTriple().isNVPTX()); 177349cc55cSDimitry Andric return EmitDevicePrintfCallExpr( 178349cc55cSDimitry Andric E, this, GetVprintfDeclaration(CGM.getModule()), false); 179349cc55cSDimitry Andric } 180349cc55cSDimitry Andric 181349cc55cSDimitry Andric RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) { 182*0fca6ea1SDimitry Andric assert(getTarget().getTriple().isAMDGCN() || 183*0fca6ea1SDimitry Andric (getTarget().getTriple().isSPIRV() && 184*0fca6ea1SDimitry Andric getTarget().getTriple().getVendor() == llvm::Triple::AMD)); 1855ffd83dbSDimitry Andric assert(E->getBuiltinCallee() == Builtin::BIprintf || 1865ffd83dbSDimitry Andric E->getBuiltinCallee() == Builtin::BI__builtin_printf); 1875ffd83dbSDimitry Andric assert(E->getNumArgs() >= 1); // printf always has at least one arg. 1885ffd83dbSDimitry Andric 1895ffd83dbSDimitry Andric CallArgList CallArgs; 1905ffd83dbSDimitry Andric EmitCallArgs(CallArgs, 1915ffd83dbSDimitry Andric E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), 1925ffd83dbSDimitry Andric E->arguments(), E->getDirectCallee(), 1935ffd83dbSDimitry Andric /* ParamsToSkip = */ 0); 1945ffd83dbSDimitry Andric 1955ffd83dbSDimitry Andric SmallVector<llvm::Value *, 8> Args; 19606c3fb27SDimitry Andric for (const auto &A : CallArgs) { 1975ffd83dbSDimitry Andric // We don't know how to emit non-scalar varargs. 1985ffd83dbSDimitry Andric if (!A.getRValue(*this).isScalar()) { 1995ffd83dbSDimitry Andric CGM.ErrorUnsupported(E, "non-scalar arg to printf"); 2005ffd83dbSDimitry Andric return RValue::get(llvm::ConstantInt::get(IntTy, -1)); 2015ffd83dbSDimitry Andric } 2025ffd83dbSDimitry Andric 2035ffd83dbSDimitry Andric llvm::Value *Arg = A.getRValue(*this).getScalarVal(); 2045ffd83dbSDimitry Andric Args.push_back(Arg); 2055ffd83dbSDimitry Andric } 2065ffd83dbSDimitry Andric 2075ffd83dbSDimitry Andric llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); 2085ffd83dbSDimitry Andric IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); 20906c3fb27SDimitry Andric 21006c3fb27SDimitry Andric bool isBuffered = (CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal == 21106c3fb27SDimitry Andric clang::TargetOptions::AMDGPUPrintfKind::Buffered); 21206c3fb27SDimitry Andric auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered); 2135ffd83dbSDimitry Andric Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); 2145ffd83dbSDimitry Andric return RValue::get(Printf); 2155ffd83dbSDimitry Andric } 216349cc55cSDimitry Andric 217349cc55cSDimitry Andric RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) { 218349cc55cSDimitry Andric assert(getTarget().getTriple().isNVPTX() || 219349cc55cSDimitry Andric getTarget().getTriple().isAMDGCN()); 220349cc55cSDimitry Andric return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM), 221349cc55cSDimitry Andric true); 222349cc55cSDimitry Andric } 223