xref: /llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp (revision d4216b5d0b111879f153c53caecf8ea011296cec)
1ed181efaSSameer Sahasrabuddhe //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
2ed181efaSSameer Sahasrabuddhe //
3ed181efaSSameer Sahasrabuddhe // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ed181efaSSameer Sahasrabuddhe // See https://llvm.org/LICENSE.txt for license information.
5ed181efaSSameer Sahasrabuddhe // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ed181efaSSameer Sahasrabuddhe //
7ed181efaSSameer Sahasrabuddhe //===----------------------------------------------------------------------===//
8ed181efaSSameer Sahasrabuddhe //
9ed181efaSSameer Sahasrabuddhe // Utility function to lower a printf call into a series of device
10ed181efaSSameer Sahasrabuddhe // library calls on the AMDGPU target.
11ed181efaSSameer Sahasrabuddhe //
12ed181efaSSameer Sahasrabuddhe // WARNING: This file knows about certain library functions. It recognizes them
13ed181efaSSameer Sahasrabuddhe // by name, and hardwires knowledge of their semantics.
14ed181efaSSameer Sahasrabuddhe //
15ed181efaSSameer Sahasrabuddhe //===----------------------------------------------------------------------===//
16ed181efaSSameer Sahasrabuddhe 
17ed181efaSSameer Sahasrabuddhe #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
18ed181efaSSameer Sahasrabuddhe #include "llvm/ADT/SparseBitVector.h"
19b0abd489SElliot Goodrich #include "llvm/ADT/StringExtras.h"
20ed181efaSSameer Sahasrabuddhe #include "llvm/Analysis/ValueTracking.h"
2174deadf1SNikita Popov #include "llvm/IR/Module.h"
22631c9654SVikram #include "llvm/Support/DataExtractor.h"
23631c9654SVikram #include "llvm/Support/MD5.h"
24631c9654SVikram #include "llvm/Support/MathExtras.h"
25ed181efaSSameer Sahasrabuddhe 
26ed181efaSSameer Sahasrabuddhe using namespace llvm;
27ed181efaSSameer Sahasrabuddhe 
28ed181efaSSameer Sahasrabuddhe #define DEBUG_TYPE "amdgpu-emit-printf"
29ed181efaSSameer Sahasrabuddhe 
fitArgInto64Bits(IRBuilder<> & Builder,Value * Arg)30ed181efaSSameer Sahasrabuddhe static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
31ed181efaSSameer Sahasrabuddhe   auto Int64Ty = Builder.getInt64Ty();
32ed181efaSSameer Sahasrabuddhe   auto Ty = Arg->getType();
33ed181efaSSameer Sahasrabuddhe 
34ed181efaSSameer Sahasrabuddhe   if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
35ed181efaSSameer Sahasrabuddhe     switch (IntTy->getBitWidth()) {
36ed181efaSSameer Sahasrabuddhe     case 32:
37ed181efaSSameer Sahasrabuddhe       return Builder.CreateZExt(Arg, Int64Ty);
38ed181efaSSameer Sahasrabuddhe     case 64:
39ed181efaSSameer Sahasrabuddhe       return Arg;
40ed181efaSSameer Sahasrabuddhe     }
41ed181efaSSameer Sahasrabuddhe   }
42ed181efaSSameer Sahasrabuddhe 
43ed181efaSSameer Sahasrabuddhe   if (Ty->getTypeID() == Type::DoubleTyID) {
44ed181efaSSameer Sahasrabuddhe     return Builder.CreateBitCast(Arg, Int64Ty);
45ed181efaSSameer Sahasrabuddhe   }
46ed181efaSSameer Sahasrabuddhe 
4723a887b0SSimon Pilgrim   if (isa<PointerType>(Ty)) {
48ed181efaSSameer Sahasrabuddhe     return Builder.CreatePtrToInt(Arg, Int64Ty);
49ed181efaSSameer Sahasrabuddhe   }
50ed181efaSSameer Sahasrabuddhe 
51ed181efaSSameer Sahasrabuddhe   llvm_unreachable("unexpected type");
52ed181efaSSameer Sahasrabuddhe }
53ed181efaSSameer Sahasrabuddhe 
callPrintfBegin(IRBuilder<> & Builder,Value * Version)54ed181efaSSameer Sahasrabuddhe static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
55ed181efaSSameer Sahasrabuddhe   auto Int64Ty = Builder.getInt64Ty();
56ed181efaSSameer Sahasrabuddhe   auto M = Builder.GetInsertBlock()->getModule();
57ed181efaSSameer Sahasrabuddhe   auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
58ed181efaSSameer Sahasrabuddhe   return Builder.CreateCall(Fn, Version);
59ed181efaSSameer Sahasrabuddhe }
60ed181efaSSameer Sahasrabuddhe 
callAppendArgs(IRBuilder<> & Builder,Value * Desc,int NumArgs,Value * Arg0,Value * Arg1,Value * Arg2,Value * Arg3,Value * Arg4,Value * Arg5,Value * Arg6,bool IsLast)61ed181efaSSameer Sahasrabuddhe static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
62ed181efaSSameer Sahasrabuddhe                              Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
63ed181efaSSameer Sahasrabuddhe                              Value *Arg4, Value *Arg5, Value *Arg6,
64ed181efaSSameer Sahasrabuddhe                              bool IsLast) {
65ed181efaSSameer Sahasrabuddhe   auto Int64Ty = Builder.getInt64Ty();
66ed181efaSSameer Sahasrabuddhe   auto Int32Ty = Builder.getInt32Ty();
67ed181efaSSameer Sahasrabuddhe   auto M = Builder.GetInsertBlock()->getModule();
68ed181efaSSameer Sahasrabuddhe   auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
69ed181efaSSameer Sahasrabuddhe                                    Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
70ed181efaSSameer Sahasrabuddhe                                    Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
71ed181efaSSameer Sahasrabuddhe   auto IsLastValue = Builder.getInt32(IsLast);
72ed181efaSSameer Sahasrabuddhe   auto NumArgsValue = Builder.getInt32(NumArgs);
73ed181efaSSameer Sahasrabuddhe   return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
74ed181efaSSameer Sahasrabuddhe                                  Arg4, Arg5, Arg6, IsLastValue});
75ed181efaSSameer Sahasrabuddhe }
76ed181efaSSameer Sahasrabuddhe 
appendArg(IRBuilder<> & Builder,Value * Desc,Value * Arg,bool IsLast)77ed181efaSSameer Sahasrabuddhe static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
78ed181efaSSameer Sahasrabuddhe                         bool IsLast) {
79ed181efaSSameer Sahasrabuddhe   auto Arg0 = fitArgInto64Bits(Builder, Arg);
80ed181efaSSameer Sahasrabuddhe   auto Zero = Builder.getInt64(0);
81ed181efaSSameer Sahasrabuddhe   return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
82ed181efaSSameer Sahasrabuddhe                         Zero, IsLast);
83ed181efaSSameer Sahasrabuddhe }
84ed181efaSSameer Sahasrabuddhe 
85ed181efaSSameer Sahasrabuddhe // The device library does not provide strlen, so we build our own loop
86ed181efaSSameer Sahasrabuddhe // here. While we are at it, we also include the terminating null in the length.
getStrlenWithNull(IRBuilder<> & Builder,Value * Str)87ed181efaSSameer Sahasrabuddhe static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
88ed181efaSSameer Sahasrabuddhe   auto *Prev = Builder.GetInsertBlock();
89ed181efaSSameer Sahasrabuddhe   Module *M = Prev->getModule();
90ed181efaSSameer Sahasrabuddhe 
91ed181efaSSameer Sahasrabuddhe   auto CharZero = Builder.getInt8(0);
92ed181efaSSameer Sahasrabuddhe   auto One = Builder.getInt64(1);
93ed181efaSSameer Sahasrabuddhe   auto Zero = Builder.getInt64(0);
94ed181efaSSameer Sahasrabuddhe   auto Int64Ty = Builder.getInt64Ty();
95ed181efaSSameer Sahasrabuddhe 
96ed181efaSSameer Sahasrabuddhe   // The length is either zero for a null pointer, or the computed value for an
97ed181efaSSameer Sahasrabuddhe   // actual string. We need a join block for a phi that represents the final
98ed181efaSSameer Sahasrabuddhe   // value.
99ed181efaSSameer Sahasrabuddhe   //
100ed181efaSSameer Sahasrabuddhe   //  Strictly speaking, the zero does not matter since
101ed181efaSSameer Sahasrabuddhe   // __ockl_printf_append_string_n ignores the length if the pointer is null.
102ed181efaSSameer Sahasrabuddhe   BasicBlock *Join = nullptr;
103ed181efaSSameer Sahasrabuddhe   if (Prev->getTerminator()) {
104ed181efaSSameer Sahasrabuddhe     Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
105ed181efaSSameer Sahasrabuddhe                                  "strlen.join");
106ed181efaSSameer Sahasrabuddhe     Prev->getTerminator()->eraseFromParent();
107ed181efaSSameer Sahasrabuddhe   } else {
108ed181efaSSameer Sahasrabuddhe     Join = BasicBlock::Create(M->getContext(), "strlen.join",
109ed181efaSSameer Sahasrabuddhe                               Prev->getParent());
110ed181efaSSameer Sahasrabuddhe   }
111ed181efaSSameer Sahasrabuddhe   BasicBlock *While =
112ed181efaSSameer Sahasrabuddhe       BasicBlock::Create(M->getContext(), "strlen.while",
113ed181efaSSameer Sahasrabuddhe                          Prev->getParent(), Join);
114ed181efaSSameer Sahasrabuddhe   BasicBlock *WhileDone = BasicBlock::Create(
115ed181efaSSameer Sahasrabuddhe       M->getContext(), "strlen.while.done",
116ed181efaSSameer Sahasrabuddhe       Prev->getParent(), Join);
117ed181efaSSameer Sahasrabuddhe 
118ed181efaSSameer Sahasrabuddhe   // Emit an early return for when the pointer is null.
119ed181efaSSameer Sahasrabuddhe   Builder.SetInsertPoint(Prev);
120ed181efaSSameer Sahasrabuddhe   auto CmpNull =
121ed181efaSSameer Sahasrabuddhe       Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
122ed181efaSSameer Sahasrabuddhe   BranchInst::Create(Join, While, CmpNull, Prev);
123ed181efaSSameer Sahasrabuddhe 
124ed181efaSSameer Sahasrabuddhe   // Entry to the while loop.
125ed181efaSSameer Sahasrabuddhe   Builder.SetInsertPoint(While);
126ed181efaSSameer Sahasrabuddhe 
127ed181efaSSameer Sahasrabuddhe   auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
128ed181efaSSameer Sahasrabuddhe   PtrPhi->addIncoming(Str, Prev);
1292983053dSArthur Eubanks   auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
130ed181efaSSameer Sahasrabuddhe   PtrPhi->addIncoming(PtrNext, While);
131ed181efaSSameer Sahasrabuddhe 
132ed181efaSSameer Sahasrabuddhe   // Condition for the while loop.
13346354bacSNikita Popov   auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
134ed181efaSSameer Sahasrabuddhe   auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
135ed181efaSSameer Sahasrabuddhe   Builder.CreateCondBr(Cmp, WhileDone, While);
136ed181efaSSameer Sahasrabuddhe 
137ed181efaSSameer Sahasrabuddhe   // Add one to the computed length.
138d75f9dd1SStephen Tozer   Builder.SetInsertPoint(WhileDone, WhileDone->begin());
139ed181efaSSameer Sahasrabuddhe   auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
140ed181efaSSameer Sahasrabuddhe   auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
141ed181efaSSameer Sahasrabuddhe   auto Len = Builder.CreateSub(End, Begin);
142ed181efaSSameer Sahasrabuddhe   Len = Builder.CreateAdd(Len, One);
143ed181efaSSameer Sahasrabuddhe 
144ed181efaSSameer Sahasrabuddhe   // Final join.
145ed181efaSSameer Sahasrabuddhe   BranchInst::Create(Join, WhileDone);
146d75f9dd1SStephen Tozer   Builder.SetInsertPoint(Join, Join->begin());
147ed181efaSSameer Sahasrabuddhe   auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
148ed181efaSSameer Sahasrabuddhe   LenPhi->addIncoming(Len, WhileDone);
149ed181efaSSameer Sahasrabuddhe   LenPhi->addIncoming(Zero, Prev);
150ed181efaSSameer Sahasrabuddhe 
151ed181efaSSameer Sahasrabuddhe   return LenPhi;
152ed181efaSSameer Sahasrabuddhe }
153ed181efaSSameer Sahasrabuddhe 
callAppendStringN(IRBuilder<> & Builder,Value * Desc,Value * Str,Value * Length,bool isLast)154ed181efaSSameer Sahasrabuddhe static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
155ed181efaSSameer Sahasrabuddhe                                 Value *Length, bool isLast) {
156ed181efaSSameer Sahasrabuddhe   auto Int64Ty = Builder.getInt64Ty();
157*d4216b5dSAlex Voicu   auto IsLastInt32 = Builder.getInt32(isLast);
158ed181efaSSameer Sahasrabuddhe   auto M = Builder.GetInsertBlock()->getModule();
159ed181efaSSameer Sahasrabuddhe   auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
160*d4216b5dSAlex Voicu                                    Desc->getType(), Str->getType(),
161*d4216b5dSAlex Voicu                                    Length->getType(), IsLastInt32->getType());
162ed181efaSSameer Sahasrabuddhe   return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
163ed181efaSSameer Sahasrabuddhe }
164ed181efaSSameer Sahasrabuddhe 
appendString(IRBuilder<> & Builder,Value * Desc,Value * Arg,bool IsLast)165ed181efaSSameer Sahasrabuddhe static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
166ed181efaSSameer Sahasrabuddhe                            bool IsLast) {
167ed181efaSSameer Sahasrabuddhe   auto Length = getStrlenWithNull(Builder, Arg);
168ed181efaSSameer Sahasrabuddhe   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
169ed181efaSSameer Sahasrabuddhe }
170ed181efaSSameer Sahasrabuddhe 
processArg(IRBuilder<> & Builder,Value * Desc,Value * Arg,bool SpecIsCString,bool IsLast)171ed181efaSSameer Sahasrabuddhe static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
172ed181efaSSameer Sahasrabuddhe                          bool SpecIsCString, bool IsLast) {
173de8867a0SNikita Popov   if (SpecIsCString && isa<PointerType>(Arg->getType())) {
174ed181efaSSameer Sahasrabuddhe     return appendString(Builder, Desc, Arg, IsLast);
175ed181efaSSameer Sahasrabuddhe   }
176ed181efaSSameer Sahasrabuddhe   // If the format specifies a string but the argument is not, the frontend will
177ed181efaSSameer Sahasrabuddhe   // have printed a warning. We just rely on undefined behaviour and send the
178ed181efaSSameer Sahasrabuddhe   // argument anyway.
179ed181efaSSameer Sahasrabuddhe   return appendArg(Builder, Desc, Arg, IsLast);
180ed181efaSSameer Sahasrabuddhe }
181ed181efaSSameer Sahasrabuddhe 
182ed181efaSSameer Sahasrabuddhe // Scan the format string to locate all specifiers, and mark the ones that
183ed181efaSSameer Sahasrabuddhe // specify a string, i.e, the "%s" specifier with optional '*' characters.
locateCStrings(SparseBitVector<8> & BV,StringRef Str)184631c9654SVikram static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) {
185ed181efaSSameer Sahasrabuddhe   static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
186ed181efaSSameer Sahasrabuddhe   size_t SpecPos = 0;
187ed181efaSSameer Sahasrabuddhe   // Skip the first argument, the format string.
188ed181efaSSameer Sahasrabuddhe   unsigned ArgIdx = 1;
189ed181efaSSameer Sahasrabuddhe 
190ed181efaSSameer Sahasrabuddhe   while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
191ed181efaSSameer Sahasrabuddhe     if (Str[SpecPos + 1] == '%') {
192ed181efaSSameer Sahasrabuddhe       SpecPos += 2;
193ed181efaSSameer Sahasrabuddhe       continue;
194ed181efaSSameer Sahasrabuddhe     }
195ed181efaSSameer Sahasrabuddhe     auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
196ed181efaSSameer Sahasrabuddhe     if (SpecEnd == StringRef::npos)
197ed181efaSSameer Sahasrabuddhe       return;
198ed181efaSSameer Sahasrabuddhe     auto Spec = Str.slice(SpecPos, SpecEnd + 1);
199ed181efaSSameer Sahasrabuddhe     ArgIdx += Spec.count('*');
200ed181efaSSameer Sahasrabuddhe     if (Str[SpecEnd] == 's') {
201ed181efaSSameer Sahasrabuddhe       BV.set(ArgIdx);
202ed181efaSSameer Sahasrabuddhe     }
203ed181efaSSameer Sahasrabuddhe     SpecPos = SpecEnd + 1;
204ed181efaSSameer Sahasrabuddhe     ++ArgIdx;
205ed181efaSSameer Sahasrabuddhe   }
206ed181efaSSameer Sahasrabuddhe }
207ed181efaSSameer Sahasrabuddhe 
208631c9654SVikram // helper struct to package the string related data
209631c9654SVikram struct StringData {
210631c9654SVikram   StringRef Str;
211631c9654SVikram   Value *RealSize = nullptr;
212631c9654SVikram   Value *AlignedSize = nullptr;
213631c9654SVikram   bool IsConst = true;
214631c9654SVikram 
StringDataStringData215631c9654SVikram   StringData(StringRef ST, Value *RS, Value *AS, bool IC)
216631c9654SVikram       : Str(ST), RealSize(RS), AlignedSize(AS), IsConst(IC) {}
217631c9654SVikram };
218631c9654SVikram 
219631c9654SVikram // Calculates frame size required for current printf expansion and allocates
220631c9654SVikram // space on printf buffer. Printf frame includes following contents
221631c9654SVikram // [ ControlDWord , format string/Hash , Arguments (each aligned to 8 byte) ]
callBufferedPrintfStart(IRBuilder<> & Builder,ArrayRef<Value * > Args,Value * Fmt,bool isConstFmtStr,SparseBitVector<8> & SpecIsCString,SmallVectorImpl<StringData> & StringContents,Value * & ArgSize)222631c9654SVikram static Value *callBufferedPrintfStart(
223631c9654SVikram     IRBuilder<> &Builder, ArrayRef<Value *> Args, Value *Fmt,
224631c9654SVikram     bool isConstFmtStr, SparseBitVector<8> &SpecIsCString,
225631c9654SVikram     SmallVectorImpl<StringData> &StringContents, Value *&ArgSize) {
226631c9654SVikram   Module *M = Builder.GetInsertBlock()->getModule();
227631c9654SVikram   Value *NonConstStrLen = nullptr;
228631c9654SVikram   Value *LenWithNull = nullptr;
229631c9654SVikram   Value *LenWithNullAligned = nullptr;
230631c9654SVikram   Value *TempAdd = nullptr;
231631c9654SVikram 
232631c9654SVikram   // First 4 bytes to be reserved for control dword
233631c9654SVikram   size_t BufSize = 4;
234631c9654SVikram   if (isConstFmtStr)
235631c9654SVikram     // First 8 bytes of MD5 hash
236631c9654SVikram     BufSize += 8;
237631c9654SVikram   else {
238631c9654SVikram     LenWithNull = getStrlenWithNull(Builder, Fmt);
239631c9654SVikram 
240631c9654SVikram     // Align the computed length to next 8 byte boundary
241631c9654SVikram     TempAdd = Builder.CreateAdd(LenWithNull,
242631c9654SVikram                                 ConstantInt::get(LenWithNull->getType(), 7U));
243631c9654SVikram     NonConstStrLen = Builder.CreateAnd(
244631c9654SVikram         TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U));
245631c9654SVikram 
246631c9654SVikram     StringContents.push_back(
247631c9654SVikram         StringData(StringRef(), LenWithNull, NonConstStrLen, false));
248631c9654SVikram   }
249631c9654SVikram 
250631c9654SVikram   for (size_t i = 1; i < Args.size(); i++) {
251631c9654SVikram     if (SpecIsCString.test(i)) {
252631c9654SVikram       StringRef ArgStr;
253631c9654SVikram       if (getConstantStringInfo(Args[i], ArgStr)) {
254631c9654SVikram         auto alignedLen = alignTo(ArgStr.size() + 1, 8);
255631c9654SVikram         StringContents.push_back(StringData(
256631c9654SVikram             ArgStr,
257631c9654SVikram             /*RealSize*/ nullptr, /*AlignedSize*/ nullptr, /*IsConst*/ true));
258631c9654SVikram         BufSize += alignedLen;
259631c9654SVikram       } else {
260631c9654SVikram         LenWithNull = getStrlenWithNull(Builder, Args[i]);
261631c9654SVikram 
262631c9654SVikram         // Align the computed length to next 8 byte boundary
263631c9654SVikram         TempAdd = Builder.CreateAdd(
264631c9654SVikram             LenWithNull, ConstantInt::get(LenWithNull->getType(), 7U));
265631c9654SVikram         LenWithNullAligned = Builder.CreateAnd(
266631c9654SVikram             TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U));
267631c9654SVikram 
268631c9654SVikram         if (NonConstStrLen) {
269631c9654SVikram           auto Val = Builder.CreateAdd(LenWithNullAligned, NonConstStrLen,
270631c9654SVikram                                        "cumulativeAdd");
271631c9654SVikram           NonConstStrLen = Val;
272631c9654SVikram         } else
273631c9654SVikram           NonConstStrLen = LenWithNullAligned;
274631c9654SVikram 
275631c9654SVikram         StringContents.push_back(
276631c9654SVikram             StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
277631c9654SVikram       }
278631c9654SVikram     } else {
279631c9654SVikram       int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
280631c9654SVikram       // We end up expanding non string arguments to 8 bytes
281631c9654SVikram       // (args smaller than 8 bytes)
282631c9654SVikram       BufSize += std::max(AllocSize, 8);
283631c9654SVikram     }
284631c9654SVikram   }
285631c9654SVikram 
286631c9654SVikram   // calculate final size value to be passed to printf_alloc
287631c9654SVikram   Value *SizeToReserve = ConstantInt::get(Builder.getInt64Ty(), BufSize, false);
288631c9654SVikram   SmallVector<Value *, 1> Alloc_args;
289631c9654SVikram   if (NonConstStrLen)
290631c9654SVikram     SizeToReserve = Builder.CreateAdd(NonConstStrLen, SizeToReserve);
291631c9654SVikram 
292631c9654SVikram   ArgSize = Builder.CreateTrunc(SizeToReserve, Builder.getInt32Ty());
293631c9654SVikram   Alloc_args.push_back(ArgSize);
294631c9654SVikram 
295631c9654SVikram   // call the printf_alloc function
296631c9654SVikram   AttributeList Attr = AttributeList::get(
297631c9654SVikram       Builder.getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind);
298631c9654SVikram 
299631c9654SVikram   Type *Tys_alloc[1] = {Builder.getInt32Ty()};
300aba04018SCraig Topper   Type *PtrTy =
301aba04018SCraig Topper       Builder.getPtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace());
302aba04018SCraig Topper   FunctionType *FTy_alloc = FunctionType::get(PtrTy, Tys_alloc, false);
303631c9654SVikram   auto PrintfAllocFn =
304631c9654SVikram       M->getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
305631c9654SVikram 
306631c9654SVikram   return Builder.CreateCall(PrintfAllocFn, Alloc_args, "printf_alloc_fn");
307631c9654SVikram }
308631c9654SVikram 
309631c9654SVikram // Prepare constant string argument to push onto the buffer
processConstantStringArg(StringData * SD,IRBuilder<> & Builder,SmallVectorImpl<Value * > & WhatToStore)310631c9654SVikram static void processConstantStringArg(StringData *SD, IRBuilder<> &Builder,
311631c9654SVikram                                      SmallVectorImpl<Value *> &WhatToStore) {
312631c9654SVikram   std::string Str(SD->Str.str() + '\0');
313631c9654SVikram 
314631c9654SVikram   DataExtractor Extractor(Str, /*IsLittleEndian=*/true, 8);
315631c9654SVikram   DataExtractor::Cursor Offset(0);
316631c9654SVikram   while (Offset && Offset.tell() < Str.size()) {
317631c9654SVikram     const uint64_t ReadSize = 4;
318631c9654SVikram     uint64_t ReadNow = std::min(ReadSize, Str.size() - Offset.tell());
319631c9654SVikram     uint64_t ReadBytes = 0;
320631c9654SVikram     switch (ReadNow) {
321631c9654SVikram     default:
322631c9654SVikram       llvm_unreachable("min(4, X) > 4?");
323631c9654SVikram     case 1:
324631c9654SVikram       ReadBytes = Extractor.getU8(Offset);
325631c9654SVikram       break;
326631c9654SVikram     case 2:
327631c9654SVikram       ReadBytes = Extractor.getU16(Offset);
328631c9654SVikram       break;
329631c9654SVikram     case 3:
330631c9654SVikram       ReadBytes = Extractor.getU24(Offset);
331631c9654SVikram       break;
332631c9654SVikram     case 4:
333631c9654SVikram       ReadBytes = Extractor.getU32(Offset);
334631c9654SVikram       break;
335631c9654SVikram     }
336631c9654SVikram     cantFail(Offset.takeError(), "failed to read bytes from constant array");
337631c9654SVikram 
338631c9654SVikram     APInt IntVal(8 * ReadSize, ReadBytes);
339631c9654SVikram 
340631c9654SVikram     // TODO: Should not bother aligning up.
341631c9654SVikram     if (ReadNow < ReadSize)
342631c9654SVikram       IntVal = IntVal.zext(8 * ReadSize);
343631c9654SVikram 
344631c9654SVikram     Type *IntTy = Type::getIntNTy(Builder.getContext(), IntVal.getBitWidth());
345631c9654SVikram     WhatToStore.push_back(ConstantInt::get(IntTy, IntVal));
346631c9654SVikram   }
347631c9654SVikram   // Additional padding for 8 byte alignment
348631c9654SVikram   int Rem = (Str.size() % 8);
349631c9654SVikram   if (Rem > 0 && Rem <= 4)
350631c9654SVikram     WhatToStore.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
351631c9654SVikram }
352631c9654SVikram 
processNonStringArg(Value * Arg,IRBuilder<> & Builder)353631c9654SVikram static Value *processNonStringArg(Value *Arg, IRBuilder<> &Builder) {
3542d209d96SNikita Popov   const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
355631c9654SVikram   auto Ty = Arg->getType();
356631c9654SVikram 
357631c9654SVikram   if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
358631c9654SVikram     if (IntTy->getBitWidth() < 64) {
359631c9654SVikram       return Builder.CreateZExt(Arg, Builder.getInt64Ty());
360631c9654SVikram     }
361631c9654SVikram   }
362631c9654SVikram 
363631c9654SVikram   if (Ty->isFloatingPointTy()) {
364631c9654SVikram     if (DL.getTypeAllocSize(Ty) < 8) {
365631c9654SVikram       return Builder.CreateFPExt(Arg, Builder.getDoubleTy());
366631c9654SVikram     }
367631c9654SVikram   }
368631c9654SVikram 
369631c9654SVikram   return Arg;
370631c9654SVikram }
371631c9654SVikram 
372631c9654SVikram static void
callBufferedPrintfArgPush(IRBuilder<> & Builder,ArrayRef<Value * > Args,Value * PtrToStore,SparseBitVector<8> & SpecIsCString,SmallVectorImpl<StringData> & StringContents,bool IsConstFmtStr)373631c9654SVikram callBufferedPrintfArgPush(IRBuilder<> &Builder, ArrayRef<Value *> Args,
374631c9654SVikram                           Value *PtrToStore, SparseBitVector<8> &SpecIsCString,
375631c9654SVikram                           SmallVectorImpl<StringData> &StringContents,
376631c9654SVikram                           bool IsConstFmtStr) {
377631c9654SVikram   Module *M = Builder.GetInsertBlock()->getModule();
378631c9654SVikram   const DataLayout &DL = M->getDataLayout();
379631c9654SVikram   auto StrIt = StringContents.begin();
380631c9654SVikram   size_t i = IsConstFmtStr ? 1 : 0;
381631c9654SVikram   for (; i < Args.size(); i++) {
382631c9654SVikram     SmallVector<Value *, 32> WhatToStore;
383631c9654SVikram     if ((i == 0) || SpecIsCString.test(i)) {
384631c9654SVikram       if (StrIt->IsConst) {
385631c9654SVikram         processConstantStringArg(StrIt, Builder, WhatToStore);
386631c9654SVikram         StrIt++;
387631c9654SVikram       } else {
388631c9654SVikram         // This copies the contents of the string, however the next offset
389631c9654SVikram         // is at aligned length, the extra space that might be created due
390631c9654SVikram         // to alignment padding is not populated with any specific value
391631c9654SVikram         // here. This would be safe as long as runtime is sync with
392631c9654SVikram         // the offsets.
393631c9654SVikram         Builder.CreateMemCpy(PtrToStore, /*DstAlign*/ Align(1), Args[i],
394631c9654SVikram                              /*SrcAlign*/ Args[i]->getPointerAlignment(DL),
395631c9654SVikram                              StrIt->RealSize);
396631c9654SVikram 
397631c9654SVikram         PtrToStore =
398631c9654SVikram             Builder.CreateInBoundsGEP(Builder.getInt8Ty(), PtrToStore,
399631c9654SVikram                                       {StrIt->AlignedSize}, "PrintBuffNextPtr");
400631c9654SVikram         LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:"
401631c9654SVikram                           << *PtrToStore << '\n');
402631c9654SVikram 
403631c9654SVikram         // done with current argument, move to next
404631c9654SVikram         StrIt++;
405631c9654SVikram         continue;
406631c9654SVikram       }
407631c9654SVikram     } else {
408631c9654SVikram       WhatToStore.push_back(processNonStringArg(Args[i], Builder));
409631c9654SVikram     }
410631c9654SVikram 
4114b28b3faSKazu Hirata     for (Value *toStore : WhatToStore) {
412631c9654SVikram       StoreInst *StBuff = Builder.CreateStore(toStore, PtrToStore);
413631c9654SVikram       LLVM_DEBUG(dbgs() << "inserting store to printf buffer:" << *StBuff
414631c9654SVikram                         << '\n');
41598183da6SKazu Hirata       (void)StBuff;
416631c9654SVikram       PtrToStore = Builder.CreateConstInBoundsGEP1_32(
417631c9654SVikram           Builder.getInt8Ty(), PtrToStore,
418631c9654SVikram           M->getDataLayout().getTypeAllocSize(toStore->getType()),
419631c9654SVikram           "PrintBuffNextPtr");
420631c9654SVikram       LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:" << *PtrToStore
421631c9654SVikram                         << '\n');
422631c9654SVikram     }
423631c9654SVikram   }
424631c9654SVikram }
425631c9654SVikram 
emitAMDGPUPrintfCall(IRBuilder<> & Builder,ArrayRef<Value * > Args,bool IsBuffered)426631c9654SVikram Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args,
427631c9654SVikram                                   bool IsBuffered) {
428ed181efaSSameer Sahasrabuddhe   auto NumOps = Args.size();
429ed181efaSSameer Sahasrabuddhe   assert(NumOps >= 1);
430ed181efaSSameer Sahasrabuddhe 
431ed181efaSSameer Sahasrabuddhe   auto Fmt = Args[0];
432ed181efaSSameer Sahasrabuddhe   SparseBitVector<8> SpecIsCString;
433631c9654SVikram   StringRef FmtStr;
434631c9654SVikram 
435631c9654SVikram   if (getConstantStringInfo(Fmt, FmtStr))
436631c9654SVikram     locateCStrings(SpecIsCString, FmtStr);
437631c9654SVikram 
438631c9654SVikram   if (IsBuffered) {
439631c9654SVikram     SmallVector<StringData, 8> StringContents;
440631c9654SVikram     Module *M = Builder.GetInsertBlock()->getModule();
441631c9654SVikram     LLVMContext &Ctx = Builder.getContext();
442631c9654SVikram     auto Int8Ty = Builder.getInt8Ty();
443631c9654SVikram     auto Int32Ty = Builder.getInt32Ty();
444631c9654SVikram     bool IsConstFmtStr = !FmtStr.empty();
445631c9654SVikram 
446631c9654SVikram     Value *ArgSize = nullptr;
447631c9654SVikram     Value *Ptr =
448631c9654SVikram         callBufferedPrintfStart(Builder, Args, Fmt, IsConstFmtStr,
449631c9654SVikram                                 SpecIsCString, StringContents, ArgSize);
450631c9654SVikram 
451631c9654SVikram     // The buffered version still follows OpenCL printf standards for
452631c9654SVikram     // printf return value, i.e 0 on success, -1 on failure.
453631c9654SVikram     ConstantPointerNull *zeroIntPtr =
454631c9654SVikram         ConstantPointerNull::get(cast<PointerType>(Ptr->getType()));
455631c9654SVikram 
456631c9654SVikram     auto *Cmp = cast<ICmpInst>(Builder.CreateICmpNE(Ptr, zeroIntPtr, ""));
457631c9654SVikram 
458631c9654SVikram     BasicBlock *End = BasicBlock::Create(Ctx, "end.block",
459631c9654SVikram                                          Builder.GetInsertBlock()->getParent());
460631c9654SVikram     BasicBlock *ArgPush = BasicBlock::Create(
461631c9654SVikram         Ctx, "argpush.block", Builder.GetInsertBlock()->getParent());
462631c9654SVikram 
463631c9654SVikram     BranchInst::Create(ArgPush, End, Cmp, Builder.GetInsertBlock());
464631c9654SVikram     Builder.SetInsertPoint(ArgPush);
465631c9654SVikram 
466631c9654SVikram     // Create controlDWord and store as the first entry, format as follows
467631c9654SVikram     // Bit 0 (LSB) -> stream (1 if stderr, 0 if stdout, printf always outputs to
468631c9654SVikram     // stdout) Bit 1 -> constant format string (1 if constant) Bits 2-31 -> size
469631c9654SVikram     // of printf data frame
470631c9654SVikram     auto ConstantTwo = Builder.getInt32(2);
471631c9654SVikram     auto ControlDWord = Builder.CreateShl(ArgSize, ConstantTwo);
472631c9654SVikram     if (IsConstFmtStr)
473631c9654SVikram       ControlDWord = Builder.CreateOr(ControlDWord, ConstantTwo);
474631c9654SVikram 
475631c9654SVikram     Builder.CreateStore(ControlDWord, Ptr);
476631c9654SVikram 
477631c9654SVikram     Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 4);
478631c9654SVikram 
479631c9654SVikram     // Create MD5 hash for costant format string, push low 64 bits of the
480631c9654SVikram     // same onto buffer and metadata.
481631c9654SVikram     NamedMDNode *metaD = M->getOrInsertNamedMetadata("llvm.printf.fmts");
482631c9654SVikram     if (IsConstFmtStr) {
483631c9654SVikram       MD5 Hasher;
484631c9654SVikram       MD5::MD5Result Hash;
485631c9654SVikram       Hasher.update(FmtStr);
486631c9654SVikram       Hasher.final(Hash);
487631c9654SVikram 
488631c9654SVikram       // Try sticking to llvm.printf.fmts format, although we are not going to
489631c9654SVikram       // use the ID and argument size fields while printing,
490631c9654SVikram       std::string MetadataStr =
491631c9654SVikram           "0:0:" + llvm::utohexstr(Hash.low(), /*LowerCase=*/true) + "," +
492631c9654SVikram           FmtStr.str();
493631c9654SVikram       MDString *fmtStrArray = MDString::get(Ctx, MetadataStr);
494631c9654SVikram       MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
495631c9654SVikram       metaD->addOperand(myMD);
496631c9654SVikram 
497631c9654SVikram       Builder.CreateStore(Builder.getInt64(Hash.low()), Ptr);
498631c9654SVikram       Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 8);
499631c9654SVikram     } else {
500631c9654SVikram       // Include a dummy metadata instance in case of only non constant
501631c9654SVikram       // format string usage, This might be an absurd usecase but needs to
502631c9654SVikram       // be done for completeness
503631c9654SVikram       if (metaD->getNumOperands() == 0) {
504631c9654SVikram         MDString *fmtStrArray =
505631c9654SVikram             MDString::get(Ctx, "0:0:ffffffff,\"Non const format string\"");
506631c9654SVikram         MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
507631c9654SVikram         metaD->addOperand(myMD);
508631c9654SVikram       }
509631c9654SVikram     }
510631c9654SVikram 
511631c9654SVikram     // Push The printf arguments onto buffer
512631c9654SVikram     callBufferedPrintfArgPush(Builder, Args, Ptr, SpecIsCString, StringContents,
513631c9654SVikram                               IsConstFmtStr);
514631c9654SVikram 
515631c9654SVikram     // End block, returns -1 on failure
516631c9654SVikram     BranchInst::Create(End, ArgPush);
517631c9654SVikram     Builder.SetInsertPoint(End);
518631c9654SVikram     return Builder.CreateSExt(Builder.CreateNot(Cmp), Int32Ty, "printf_result");
519631c9654SVikram   }
520ed181efaSSameer Sahasrabuddhe 
521ed181efaSSameer Sahasrabuddhe   auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
522ed181efaSSameer Sahasrabuddhe   Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
523ed181efaSSameer Sahasrabuddhe 
524ed181efaSSameer Sahasrabuddhe   // FIXME: This invokes hostcall once for each argument. We can pack up to
525ed181efaSSameer Sahasrabuddhe   // seven scalar printf arguments in a single hostcall. See the signature of
526ed181efaSSameer Sahasrabuddhe   // callAppendArgs().
527ed181efaSSameer Sahasrabuddhe   for (unsigned int i = 1; i != NumOps; ++i) {
528ed181efaSSameer Sahasrabuddhe     bool IsLast = i == NumOps - 1;
529ed181efaSSameer Sahasrabuddhe     bool IsCString = SpecIsCString.test(i);
530ed181efaSSameer Sahasrabuddhe     Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
531ed181efaSSameer Sahasrabuddhe   }
532ed181efaSSameer Sahasrabuddhe 
533ed181efaSSameer Sahasrabuddhe   return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
534ed181efaSSameer Sahasrabuddhe }
535