xref: /llvm-project/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp (revision 048bc6727644c103044ea22a6f06b80cb2443ec5)
1 //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ByteCodeEmitter.h"
10 #include "Context.h"
11 #include "FixedPoint.h"
12 #include "Floating.h"
13 #include "IntegralAP.h"
14 #include "Opcode.h"
15 #include "Program.h"
16 #include "clang/AST/ASTLambda.h"
17 #include "clang/AST/Attr.h"
18 #include "clang/AST/DeclCXX.h"
19 #include "clang/Basic/Builtins.h"
20 #include <type_traits>
21 
22 using namespace clang;
23 using namespace clang::interp;
24 
25 Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
26 
27   // Manually created functions that haven't been assigned proper
28   // parameters yet.
29   if (!FuncDecl->param_empty() && !FuncDecl->param_begin())
30     return nullptr;
31 
32   bool IsLambdaStaticInvoker = false;
33   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl);
34       MD && MD->isLambdaStaticInvoker()) {
35     // For a lambda static invoker, we might have to pick a specialized
36     // version if the lambda is generic. In that case, the picked function
37     // will *NOT* be a static invoker anymore. However, it will still
38     // be a non-static member function, this (usually) requiring an
39     // instance pointer. We suppress that later in this function.
40     IsLambdaStaticInvoker = true;
41 
42     const CXXRecordDecl *ClosureClass = MD->getParent();
43     assert(ClosureClass->captures_begin() == ClosureClass->captures_end());
44     if (ClosureClass->isGenericLambda()) {
45       const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator();
46       assert(MD->isFunctionTemplateSpecialization() &&
47              "A generic lambda's static-invoker function must be a "
48              "template specialization");
49       const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
50       FunctionTemplateDecl *CallOpTemplate =
51           LambdaCallOp->getDescribedFunctionTemplate();
52       void *InsertPos = nullptr;
53       const FunctionDecl *CorrespondingCallOpSpecialization =
54           CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
55       assert(CorrespondingCallOpSpecialization);
56       FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
57     }
58   }
59 
60   // Set up argument indices.
61   unsigned ParamOffset = 0;
62   SmallVector<PrimType, 8> ParamTypes;
63   SmallVector<unsigned, 8> ParamOffsets;
64   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
65 
66   // If the return is not a primitive, a pointer to the storage where the
67   // value is initialized in is passed as the first argument. See 'RVO'
68   // elsewhere in the code.
69   QualType Ty = FuncDecl->getReturnType();
70   bool HasRVO = false;
71   if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
72     HasRVO = true;
73     ParamTypes.push_back(PT_Ptr);
74     ParamOffsets.push_back(ParamOffset);
75     ParamOffset += align(primSize(PT_Ptr));
76   }
77 
78   // If the function decl is a member decl, the next parameter is
79   // the 'this' pointer. This parameter is pop()ed from the
80   // InterpStack when calling the function.
81   bool HasThisPointer = false;
82   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) {
83     if (!IsLambdaStaticInvoker) {
84       HasThisPointer = MD->isInstance();
85       if (MD->isImplicitObjectMemberFunction()) {
86         ParamTypes.push_back(PT_Ptr);
87         ParamOffsets.push_back(ParamOffset);
88         ParamOffset += align(primSize(PT_Ptr));
89       }
90     }
91 
92     // Set up lambda capture to closure record field mapping.
93     if (isLambdaCallOperator(MD)) {
94       // The parent record needs to be complete, we need to know about all
95       // the lambda captures.
96       if (!MD->getParent()->isCompleteDefinition())
97         return nullptr;
98 
99       const Record *R = P.getOrCreateRecord(MD->getParent());
100       llvm::DenseMap<const ValueDecl *, FieldDecl *> LC;
101       FieldDecl *LTC;
102 
103       MD->getParent()->getCaptureFields(LC, LTC);
104 
105       for (auto Cap : LC) {
106         // Static lambdas cannot have any captures. If this one does,
107         // it has already been diagnosed and we can only ignore it.
108         if (MD->isStatic())
109           return nullptr;
110 
111         unsigned Offset = R->getField(Cap.second)->Offset;
112         this->LambdaCaptures[Cap.first] = {
113             Offset, Cap.second->getType()->isReferenceType()};
114       }
115       if (LTC) {
116         QualType CaptureType = R->getField(LTC)->Decl->getType();
117         this->LambdaThisCapture = {R->getField(LTC)->Offset,
118                                    CaptureType->isReferenceType() ||
119                                        CaptureType->isPointerType()};
120       }
121     }
122   }
123 
124   // Assign descriptors to all parameters.
125   // Composite objects are lowered to pointers.
126   for (const ParmVarDecl *PD : FuncDecl->parameters()) {
127     std::optional<PrimType> T = Ctx.classify(PD->getType());
128     PrimType PT = T.value_or(PT_Ptr);
129     Descriptor *Desc = P.createDescriptor(PD, PT);
130     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
131     Params.insert({PD, {ParamOffset, T != std::nullopt}});
132     ParamOffsets.push_back(ParamOffset);
133     ParamOffset += align(primSize(PT));
134     ParamTypes.push_back(PT);
135   }
136 
137   // Create a handle over the emitted code.
138   Function *Func = P.getFunction(FuncDecl);
139   if (!Func) {
140     unsigned BuiltinID = FuncDecl->getBuiltinID();
141     Func =
142         P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
143                          std::move(ParamDescriptors), std::move(ParamOffsets),
144                          HasThisPointer, HasRVO, BuiltinID);
145   }
146 
147   assert(Func);
148   // For not-yet-defined functions, we only create a Function instance and
149   // compile their body later.
150   if (!FuncDecl->isDefined() ||
151       (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) {
152     Func->setDefined(false);
153     return Func;
154   }
155 
156   Func->setDefined(true);
157 
158   // Lambda static invokers are a special case that we emit custom code for.
159   bool IsEligibleForCompilation = false;
160   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl))
161     IsEligibleForCompilation = MD->isLambdaStaticInvoker();
162   if (!IsEligibleForCompilation)
163     IsEligibleForCompilation =
164         FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>();
165 
166   // Compile the function body.
167   if (!IsEligibleForCompilation || !visitFunc(FuncDecl)) {
168     Func->setIsFullyCompiled(true);
169     return Func;
170   }
171 
172   // Create scopes from descriptors.
173   llvm::SmallVector<Scope, 2> Scopes;
174   for (auto &DS : Descriptors) {
175     Scopes.emplace_back(std::move(DS));
176   }
177 
178   // Set the function's code.
179   Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap),
180                 std::move(Scopes), FuncDecl->hasBody());
181   Func->setIsFullyCompiled(true);
182   return Func;
183 }
184 
185 /// Compile an ObjC block, i.e. ^(){}, that thing.
186 ///
187 /// FIXME: We do not support calling the block though, so we create a function
188 /// here but do not compile any code for it.
189 Function *ByteCodeEmitter::compileObjCBlock(const BlockExpr *BE) {
190   const BlockDecl *BD = BE->getBlockDecl();
191   // Set up argument indices.
192   unsigned ParamOffset = 0;
193   SmallVector<PrimType, 8> ParamTypes;
194   SmallVector<unsigned, 8> ParamOffsets;
195   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
196 
197   // Assign descriptors to all parameters.
198   // Composite objects are lowered to pointers.
199   for (const ParmVarDecl *PD : BD->parameters()) {
200     std::optional<PrimType> T = Ctx.classify(PD->getType());
201     PrimType PT = T.value_or(PT_Ptr);
202     Descriptor *Desc = P.createDescriptor(PD, PT);
203     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
204     Params.insert({PD, {ParamOffset, T != std::nullopt}});
205     ParamOffsets.push_back(ParamOffset);
206     ParamOffset += align(primSize(PT));
207     ParamTypes.push_back(PT);
208   }
209 
210   if (BD->hasCaptures())
211     return nullptr;
212 
213   // Create a handle over the emitted code.
214   Function *Func =
215       P.createFunction(BE, ParamOffset, std::move(ParamTypes),
216                        std::move(ParamDescriptors), std::move(ParamOffsets),
217                        /*HasThisPointer=*/false, /*HasRVO=*/false,
218                        /*IsUnevaluatedBuiltin=*/false);
219 
220   assert(Func);
221   Func->setDefined(true);
222   // We don't compile the BlockDecl code at all right now.
223   Func->setIsFullyCompiled(true);
224   return Func;
225 }
226 
227 Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) {
228   NextLocalOffset += sizeof(Block);
229   unsigned Location = NextLocalOffset;
230   NextLocalOffset += align(D->getAllocSize());
231   return {Location, D};
232 }
233 
234 void ByteCodeEmitter::emitLabel(LabelTy Label) {
235   const size_t Target = Code.size();
236   LabelOffsets.insert({Label, Target});
237 
238   if (auto It = LabelRelocs.find(Label); It != LabelRelocs.end()) {
239     for (unsigned Reloc : It->second) {
240       using namespace llvm::support;
241 
242       // Rewrite the operand of all jumps to this label.
243       void *Location = Code.data() + Reloc - align(sizeof(int32_t));
244       assert(aligned(Location));
245       const int32_t Offset = Target - static_cast<int64_t>(Reloc);
246       endian::write<int32_t, llvm::endianness::native>(Location, Offset);
247     }
248     LabelRelocs.erase(It);
249   }
250 }
251 
252 int32_t ByteCodeEmitter::getOffset(LabelTy Label) {
253   // Compute the PC offset which the jump is relative to.
254   const int64_t Position =
255       Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t));
256   assert(aligned(Position));
257 
258   // If target is known, compute jump offset.
259   if (auto It = LabelOffsets.find(Label); It != LabelOffsets.end())
260     return It->second - Position;
261 
262   // Otherwise, record relocation and return dummy offset.
263   LabelRelocs[Label].push_back(Position);
264   return 0ull;
265 }
266 
267 /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
268 /// Pointers will be automatically marshalled as 32-bit IDs.
269 template <typename T>
270 static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
271                  bool &Success) {
272   size_t Size;
273 
274   if constexpr (std::is_pointer_v<T>)
275     Size = sizeof(uint32_t);
276   else
277     Size = sizeof(T);
278 
279   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
280     Success = false;
281     return;
282   }
283 
284   // Access must be aligned!
285   size_t ValPos = align(Code.size());
286   Size = align(Size);
287   assert(aligned(ValPos + Size));
288   Code.resize(ValPos + Size);
289 
290   if constexpr (!std::is_pointer_v<T>) {
291     new (Code.data() + ValPos) T(Val);
292   } else {
293     uint32_t ID = P.getOrCreateNativePointer(Val);
294     new (Code.data() + ValPos) uint32_t(ID);
295   }
296 }
297 
298 /// Emits a serializable value. These usually (potentially) contain
299 /// heap-allocated memory and aren't trivially copyable.
300 template <typename T>
301 static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
302                            bool &Success) {
303   size_t Size = Val.bytesToSerialize();
304 
305   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
306     Success = false;
307     return;
308   }
309 
310   // Access must be aligned!
311   size_t ValPos = align(Code.size());
312   Size = align(Size);
313   assert(aligned(ValPos + Size));
314   Code.resize(ValPos + Size);
315 
316   Val.serialize(Code.data() + ValPos);
317 }
318 
319 template <>
320 void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
321           bool &Success) {
322   emitSerialized(Code, Val, Success);
323 }
324 
325 template <>
326 void emit(Program &P, std::vector<std::byte> &Code,
327           const IntegralAP<false> &Val, bool &Success) {
328   emitSerialized(Code, Val, Success);
329 }
330 
331 template <>
332 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
333           bool &Success) {
334   emitSerialized(Code, Val, Success);
335 }
336 
337 template <typename... Tys>
338 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &...Args,
339                              const SourceInfo &SI) {
340   bool Success = true;
341 
342   // The opcode is followed by arguments. The source info is
343   // attached to the address after the opcode.
344   emit(P, Code, Op, Success);
345   if (SI)
346     SrcMap.emplace_back(Code.size(), SI);
347 
348   (..., emit(P, Code, Args, Success));
349   return Success;
350 }
351 
352 bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) {
353   return emitJt(getOffset(Label), SourceInfo{});
354 }
355 
356 bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) {
357   return emitJf(getOffset(Label), SourceInfo{});
358 }
359 
360 bool ByteCodeEmitter::jump(const LabelTy &Label) {
361   return emitJmp(getOffset(Label), SourceInfo{});
362 }
363 
364 bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
365   emitLabel(Label);
366   return true;
367 }
368 
369 //===----------------------------------------------------------------------===//
370 // Opcode emitters
371 //===----------------------------------------------------------------------===//
372 
373 #define GET_LINK_IMPL
374 #include "Opcodes.inc"
375 #undef GET_LINK_IMPL
376