xref: /freebsd-src/contrib/llvm-project/clang/lib/CodeGen/CGGPUBuiltin.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Generates code for built-in GPU calls which are not runtime-specific.
100b57cec5SDimitry Andric // (Runtime-specific codegen lives in programming model specific files.)
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "CodeGenFunction.h"
150b57cec5SDimitry Andric #include "clang/Basic/Builtins.h"
160b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
170b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
180b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
195ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric using namespace clang;
220b57cec5SDimitry Andric using namespace CodeGen;
230b57cec5SDimitry Andric 
24349cc55cSDimitry Andric namespace {
25349cc55cSDimitry Andric llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
265f757f3fSDimitry Andric   llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()),
275f757f3fSDimitry Andric                             llvm::PointerType::getUnqual(M.getContext())};
280b57cec5SDimitry Andric   llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
290b57cec5SDimitry Andric       llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric   if (auto *F = M.getFunction("vprintf")) {
320b57cec5SDimitry Andric     // Our CUDA system header declares vprintf with the right signature, so
330b57cec5SDimitry Andric     // nobody else should have been able to declare vprintf with a bogus
340b57cec5SDimitry Andric     // signature.
350b57cec5SDimitry Andric     assert(F->getFunctionType() == VprintfFuncType);
360b57cec5SDimitry Andric     return F;
370b57cec5SDimitry Andric   }
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric   // vprintf doesn't already exist; create a declaration and insert it into the
400b57cec5SDimitry Andric   // module.
410b57cec5SDimitry Andric   return llvm::Function::Create(
420b57cec5SDimitry Andric       VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
430b57cec5SDimitry Andric }
440b57cec5SDimitry Andric 
45349cc55cSDimitry Andric llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
46349cc55cSDimitry Andric   const char *Name = "__llvm_omp_vprintf";
47349cc55cSDimitry Andric   llvm::Module &M = CGM.getModule();
485f757f3fSDimitry Andric   llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()),
495f757f3fSDimitry Andric                             llvm::PointerType::getUnqual(M.getContext()),
50349cc55cSDimitry Andric                             llvm::Type::getInt32Ty(M.getContext())};
51349cc55cSDimitry Andric   llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
52349cc55cSDimitry Andric       llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
53349cc55cSDimitry Andric 
54349cc55cSDimitry Andric   if (auto *F = M.getFunction(Name)) {
55349cc55cSDimitry Andric     if (F->getFunctionType() != VprintfFuncType) {
56349cc55cSDimitry Andric       CGM.Error(SourceLocation(),
57349cc55cSDimitry Andric                 "Invalid type declaration for __llvm_omp_vprintf");
58349cc55cSDimitry Andric       return nullptr;
59349cc55cSDimitry Andric     }
60349cc55cSDimitry Andric     return F;
61349cc55cSDimitry Andric   }
62349cc55cSDimitry Andric 
63349cc55cSDimitry Andric   return llvm::Function::Create(
64349cc55cSDimitry Andric       VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
65349cc55cSDimitry Andric }
66349cc55cSDimitry Andric 
670b57cec5SDimitry Andric // Transforms a call to printf into a call to the NVPTX vprintf syscall (which
680b57cec5SDimitry Andric // isn't particularly special; it's invoked just like a regular function).
690b57cec5SDimitry Andric // vprintf takes two args: A format string, and a pointer to a buffer containing
700b57cec5SDimitry Andric // the varargs.
710b57cec5SDimitry Andric //
720b57cec5SDimitry Andric // For example, the call
730b57cec5SDimitry Andric //
740b57cec5SDimitry Andric //   printf("format string", arg1, arg2, arg3);
750b57cec5SDimitry Andric //
760b57cec5SDimitry Andric // is converted into something resembling
770b57cec5SDimitry Andric //
780b57cec5SDimitry Andric //   struct Tmp {
790b57cec5SDimitry Andric //     Arg1 a1;
800b57cec5SDimitry Andric //     Arg2 a2;
810b57cec5SDimitry Andric //     Arg3 a3;
820b57cec5SDimitry Andric //   };
830b57cec5SDimitry Andric //   char* buf = alloca(sizeof(Tmp));
840b57cec5SDimitry Andric //   *(Tmp*)buf = {a1, a2, a3};
850b57cec5SDimitry Andric //   vprintf("format string", buf);
860b57cec5SDimitry Andric //
870b57cec5SDimitry Andric // buf is aligned to the max of {alignof(Arg1), ...}.  Furthermore, each of the
880b57cec5SDimitry Andric // args is itself aligned to its preferred alignment.
890b57cec5SDimitry Andric //
900b57cec5SDimitry Andric // Note that by the time this function runs, E's args have already undergone the
910b57cec5SDimitry Andric // standard C vararg promotion (short -> int, float -> double, etc.).
920b57cec5SDimitry Andric 
93349cc55cSDimitry Andric std::pair<llvm::Value *, llvm::TypeSize>
94349cc55cSDimitry Andric packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) {
95349cc55cSDimitry Andric   const llvm::DataLayout &DL = CGF->CGM.getDataLayout();
96349cc55cSDimitry Andric   llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext();
97349cc55cSDimitry Andric   CGBuilderTy &Builder = CGF->Builder;
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric   // Construct and fill the args buffer that we'll pass to vprintf.
1000b57cec5SDimitry Andric   if (Args.size() <= 1) {
101349cc55cSDimitry Andric     // If there are no args, pass a null pointer and size 0
1025f757f3fSDimitry Andric     llvm::Value *BufferPtr =
1035f757f3fSDimitry Andric         llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx));
1045f757f3fSDimitry Andric     return {BufferPtr, llvm::TypeSize::getFixed(0)};
1050b57cec5SDimitry Andric   } else {
1060b57cec5SDimitry Andric     llvm::SmallVector<llvm::Type *, 8> ArgTypes;
1070b57cec5SDimitry Andric     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
108349cc55cSDimitry Andric       ArgTypes.push_back(Args[I].getRValue(*CGF).getScalarVal()->getType());
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric     // Using llvm::StructType is correct only because printf doesn't accept
1110b57cec5SDimitry Andric     // aggregates.  If we had to handle aggregates here, we'd have to manually
1120b57cec5SDimitry Andric     // compute the offsets within the alloca -- we wouldn't be able to assume
1130b57cec5SDimitry Andric     // that the alignment of the llvm type was the same as the alignment of the
1140b57cec5SDimitry Andric     // clang type.
1150b57cec5SDimitry Andric     llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args");
116349cc55cSDimitry Andric     llvm::Value *Alloca = CGF->CreateTempAlloca(AllocaTy);
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
1190b57cec5SDimitry Andric       llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
120349cc55cSDimitry Andric       llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal();
1215ffd83dbSDimitry Andric       Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
1220b57cec5SDimitry Andric     }
123349cc55cSDimitry Andric     llvm::Value *BufferPtr =
1245f757f3fSDimitry Andric         Builder.CreatePointerCast(Alloca, llvm::PointerType::getUnqual(Ctx));
125349cc55cSDimitry Andric     return {BufferPtr, DL.getTypeAllocSize(AllocaTy)};
126349cc55cSDimitry Andric   }
1270b57cec5SDimitry Andric }
1280b57cec5SDimitry Andric 
12906c3fb27SDimitry Andric bool containsNonScalarVarargs(CodeGenFunction *CGF, const CallArgList &Args) {
130349cc55cSDimitry Andric   return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) {
131349cc55cSDimitry Andric     return !A.getRValue(*CGF).isScalar();
132349cc55cSDimitry Andric   });
1330b57cec5SDimitry Andric }
1345ffd83dbSDimitry Andric 
135349cc55cSDimitry Andric RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
136349cc55cSDimitry Andric                                 llvm::Function *Decl, bool WithSizeArg) {
137349cc55cSDimitry Andric   CodeGenModule &CGM = CGF->CGM;
138349cc55cSDimitry Andric   CGBuilderTy &Builder = CGF->Builder;
139*0fca6ea1SDimitry Andric   assert(E->getBuiltinCallee() == Builtin::BIprintf ||
140*0fca6ea1SDimitry Andric          E->getBuiltinCallee() == Builtin::BI__builtin_printf);
141349cc55cSDimitry Andric   assert(E->getNumArgs() >= 1); // printf always has at least one arg.
142349cc55cSDimitry Andric 
143349cc55cSDimitry Andric   // Uses the same format as nvptx for the argument packing, but also passes
144349cc55cSDimitry Andric   // an i32 for the total size of the passed pointer
145349cc55cSDimitry Andric   CallArgList Args;
146349cc55cSDimitry Andric   CGF->EmitCallArgs(Args,
147349cc55cSDimitry Andric                     E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
148349cc55cSDimitry Andric                     E->arguments(), E->getDirectCallee(),
149349cc55cSDimitry Andric                     /* ParamsToSkip = */ 0);
150349cc55cSDimitry Andric 
151349cc55cSDimitry Andric   // We don't know how to emit non-scalar varargs.
152349cc55cSDimitry Andric   if (containsNonScalarVarargs(CGF, Args)) {
153349cc55cSDimitry Andric     CGM.ErrorUnsupported(E, "non-scalar arg to printf");
154349cc55cSDimitry Andric     return RValue::get(llvm::ConstantInt::get(CGF->IntTy, 0));
155349cc55cSDimitry Andric   }
156349cc55cSDimitry Andric 
157349cc55cSDimitry Andric   auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args);
158349cc55cSDimitry Andric   llvm::Value *BufferPtr = r.first;
159349cc55cSDimitry Andric 
160349cc55cSDimitry Andric   llvm::SmallVector<llvm::Value *, 3> Vec = {
161349cc55cSDimitry Andric       Args[0].getRValue(*CGF).getScalarVal(), BufferPtr};
162349cc55cSDimitry Andric   if (WithSizeArg) {
163349cc55cSDimitry Andric     // Passing > 32bit of data as a local alloca doesn't work for nvptx or
164349cc55cSDimitry Andric     // amdgpu
165349cc55cSDimitry Andric     llvm::Constant *Size =
166349cc55cSDimitry Andric         llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()),
167bdd1243dSDimitry Andric                                static_cast<uint32_t>(r.second.getFixedValue()));
168349cc55cSDimitry Andric 
169349cc55cSDimitry Andric     Vec.push_back(Size);
170349cc55cSDimitry Andric   }
171349cc55cSDimitry Andric   return RValue::get(Builder.CreateCall(Decl, Vec));
172349cc55cSDimitry Andric }
173349cc55cSDimitry Andric } // namespace
174349cc55cSDimitry Andric 
175349cc55cSDimitry Andric RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
176349cc55cSDimitry Andric   assert(getTarget().getTriple().isNVPTX());
177349cc55cSDimitry Andric   return EmitDevicePrintfCallExpr(
178349cc55cSDimitry Andric       E, this, GetVprintfDeclaration(CGM.getModule()), false);
179349cc55cSDimitry Andric }
180349cc55cSDimitry Andric 
181349cc55cSDimitry Andric RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
182*0fca6ea1SDimitry Andric   assert(getTarget().getTriple().isAMDGCN() ||
183*0fca6ea1SDimitry Andric          (getTarget().getTriple().isSPIRV() &&
184*0fca6ea1SDimitry Andric           getTarget().getTriple().getVendor() == llvm::Triple::AMD));
1855ffd83dbSDimitry Andric   assert(E->getBuiltinCallee() == Builtin::BIprintf ||
1865ffd83dbSDimitry Andric          E->getBuiltinCallee() == Builtin::BI__builtin_printf);
1875ffd83dbSDimitry Andric   assert(E->getNumArgs() >= 1); // printf always has at least one arg.
1885ffd83dbSDimitry Andric 
1895ffd83dbSDimitry Andric   CallArgList CallArgs;
1905ffd83dbSDimitry Andric   EmitCallArgs(CallArgs,
1915ffd83dbSDimitry Andric                E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
1925ffd83dbSDimitry Andric                E->arguments(), E->getDirectCallee(),
1935ffd83dbSDimitry Andric                /* ParamsToSkip = */ 0);
1945ffd83dbSDimitry Andric 
1955ffd83dbSDimitry Andric   SmallVector<llvm::Value *, 8> Args;
19606c3fb27SDimitry Andric   for (const auto &A : CallArgs) {
1975ffd83dbSDimitry Andric     // We don't know how to emit non-scalar varargs.
1985ffd83dbSDimitry Andric     if (!A.getRValue(*this).isScalar()) {
1995ffd83dbSDimitry Andric       CGM.ErrorUnsupported(E, "non-scalar arg to printf");
2005ffd83dbSDimitry Andric       return RValue::get(llvm::ConstantInt::get(IntTy, -1));
2015ffd83dbSDimitry Andric     }
2025ffd83dbSDimitry Andric 
2035ffd83dbSDimitry Andric     llvm::Value *Arg = A.getRValue(*this).getScalarVal();
2045ffd83dbSDimitry Andric     Args.push_back(Arg);
2055ffd83dbSDimitry Andric   }
2065ffd83dbSDimitry Andric 
2075ffd83dbSDimitry Andric   llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
2085ffd83dbSDimitry Andric   IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
20906c3fb27SDimitry Andric 
21006c3fb27SDimitry Andric   bool isBuffered = (CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal ==
21106c3fb27SDimitry Andric                      clang::TargetOptions::AMDGPUPrintfKind::Buffered);
21206c3fb27SDimitry Andric   auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered);
2135ffd83dbSDimitry Andric   Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
2145ffd83dbSDimitry Andric   return RValue::get(Printf);
2155ffd83dbSDimitry Andric }
216349cc55cSDimitry Andric 
217349cc55cSDimitry Andric RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) {
218349cc55cSDimitry Andric   assert(getTarget().getTriple().isNVPTX() ||
219349cc55cSDimitry Andric          getTarget().getTriple().isAMDGCN());
220349cc55cSDimitry Andric   return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM),
221349cc55cSDimitry Andric                                   true);
222349cc55cSDimitry Andric }
223