xref: /llvm-project/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp (revision b5c9cba3f33512014a18622a0e3479851faf4ce9)
1 //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ByteCodeEmitter.h"
10 #include "Context.h"
11 #include "Floating.h"
12 #include "IntegralAP.h"
13 #include "Opcode.h"
14 #include "Program.h"
15 #include "clang/AST/ASTLambda.h"
16 #include "clang/AST/Attr.h"
17 #include "clang/AST/DeclCXX.h"
18 #include <type_traits>
19 
20 using namespace clang;
21 using namespace clang::interp;
22 
23 Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
24 
25   // Manually created functions that haven't been assigned proper
26   // parameters yet.
27   if (!FuncDecl->param_empty() && !FuncDecl->param_begin())
28     return nullptr;
29 
30   bool IsLambdaStaticInvoker = false;
31   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl);
32       MD && MD->isLambdaStaticInvoker()) {
33     // For a lambda static invoker, we might have to pick a specialized
34     // version if the lambda is generic. In that case, the picked function
35     // will *NOT* be a static invoker anymore. However, it will still
36     // be a non-static member function, this (usually) requiring an
37     // instance pointer. We suppress that later in this function.
38     IsLambdaStaticInvoker = true;
39 
40     const CXXRecordDecl *ClosureClass = MD->getParent();
41     assert(ClosureClass->captures_begin() == ClosureClass->captures_end());
42     if (ClosureClass->isGenericLambda()) {
43       const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator();
44       assert(MD->isFunctionTemplateSpecialization() &&
45              "A generic lambda's static-invoker function must be a "
46              "template specialization");
47       const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
48       FunctionTemplateDecl *CallOpTemplate =
49           LambdaCallOp->getDescribedFunctionTemplate();
50       void *InsertPos = nullptr;
51       const FunctionDecl *CorrespondingCallOpSpecialization =
52           CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
53       assert(CorrespondingCallOpSpecialization);
54       FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
55     }
56   }
57 
58   // Set up argument indices.
59   unsigned ParamOffset = 0;
60   SmallVector<PrimType, 8> ParamTypes;
61   SmallVector<unsigned, 8> ParamOffsets;
62   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
63 
64   // If the return is not a primitive, a pointer to the storage where the
65   // value is initialized in is passed as the first argument. See 'RVO'
66   // elsewhere in the code.
67   QualType Ty = FuncDecl->getReturnType();
68   bool HasRVO = false;
69   if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
70     HasRVO = true;
71     ParamTypes.push_back(PT_Ptr);
72     ParamOffsets.push_back(ParamOffset);
73     ParamOffset += align(primSize(PT_Ptr));
74   }
75 
76   // If the function decl is a member decl, the next parameter is
77   // the 'this' pointer. This parameter is pop()ed from the
78   // InterpStack when calling the function.
79   bool HasThisPointer = false;
80   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) {
81     if (!IsLambdaStaticInvoker) {
82       HasThisPointer = MD->isInstance();
83       if (MD->isImplicitObjectMemberFunction()) {
84         ParamTypes.push_back(PT_Ptr);
85         ParamOffsets.push_back(ParamOffset);
86         ParamOffset += align(primSize(PT_Ptr));
87       }
88     }
89 
90     // Set up lambda capture to closure record field mapping.
91     if (isLambdaCallOperator(MD)) {
92       // The parent record needs to be complete, we need to know about all
93       // the lambda captures.
94       if (!MD->getParent()->isCompleteDefinition())
95         return nullptr;
96 
97       const Record *R = P.getOrCreateRecord(MD->getParent());
98       llvm::DenseMap<const ValueDecl *, FieldDecl *> LC;
99       FieldDecl *LTC;
100 
101       MD->getParent()->getCaptureFields(LC, LTC);
102 
103       for (auto Cap : LC) {
104         // Static lambdas cannot have any captures. If this one does,
105         // it has already been diagnosed and we can only ignore it.
106         if (MD->isStatic())
107           return nullptr;
108 
109         unsigned Offset = R->getField(Cap.second)->Offset;
110         this->LambdaCaptures[Cap.first] = {
111             Offset, Cap.second->getType()->isReferenceType()};
112       }
113       if (LTC) {
114         QualType CaptureType = R->getField(LTC)->Decl->getType();
115         this->LambdaThisCapture = {R->getField(LTC)->Offset,
116                                    CaptureType->isReferenceType() ||
117                                        CaptureType->isPointerType()};
118       }
119     }
120   }
121 
122   // Assign descriptors to all parameters.
123   // Composite objects are lowered to pointers.
124   for (const ParmVarDecl *PD : FuncDecl->parameters()) {
125     std::optional<PrimType> T = Ctx.classify(PD->getType());
126     PrimType PT = T.value_or(PT_Ptr);
127     Descriptor *Desc = P.createDescriptor(PD, PT);
128     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
129     Params.insert({PD, {ParamOffset, T != std::nullopt}});
130     ParamOffsets.push_back(ParamOffset);
131     ParamOffset += align(primSize(PT));
132     ParamTypes.push_back(PT);
133   }
134 
135   // Create a handle over the emitted code.
136   Function *Func = P.getFunction(FuncDecl);
137   if (!Func) {
138     unsigned BuiltinID = FuncDecl->getBuiltinID();
139     Func =
140         P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
141                          std::move(ParamDescriptors), std::move(ParamOffsets),
142                          HasThisPointer, HasRVO, BuiltinID);
143   }
144 
145   assert(Func);
146   // For not-yet-defined functions, we only create a Function instance and
147   // compile their body later.
148   if (!FuncDecl->isDefined() ||
149       (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) {
150     Func->setDefined(false);
151     return Func;
152   }
153 
154   Func->setDefined(true);
155 
156   // Lambda static invokers are a special case that we emit custom code for.
157   bool IsEligibleForCompilation = false;
158   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl))
159     IsEligibleForCompilation = MD->isLambdaStaticInvoker();
160   if (!IsEligibleForCompilation)
161     IsEligibleForCompilation =
162         FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>();
163 
164   // Compile the function body.
165   if (!IsEligibleForCompilation || !visitFunc(FuncDecl)) {
166     Func->setIsFullyCompiled(true);
167     return Func;
168   }
169 
170   // Create scopes from descriptors.
171   llvm::SmallVector<Scope, 2> Scopes;
172   for (auto &DS : Descriptors) {
173     Scopes.emplace_back(std::move(DS));
174   }
175 
176   // Set the function's code.
177   Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap),
178                 std::move(Scopes), FuncDecl->hasBody());
179   Func->setIsFullyCompiled(true);
180   return Func;
181 }
182 
183 /// Compile an ObjC block, i.e. ^(){}, that thing.
184 ///
185 /// FIXME: We do not support calling the block though, so we create a function
186 /// here but do not compile any code for it.
187 Function *ByteCodeEmitter::compileObjCBlock(const BlockExpr *BE) {
188   const BlockDecl *BD = BE->getBlockDecl();
189   // Set up argument indices.
190   unsigned ParamOffset = 0;
191   SmallVector<PrimType, 8> ParamTypes;
192   SmallVector<unsigned, 8> ParamOffsets;
193   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
194 
195   // Assign descriptors to all parameters.
196   // Composite objects are lowered to pointers.
197   for (const ParmVarDecl *PD : BD->parameters()) {
198     std::optional<PrimType> T = Ctx.classify(PD->getType());
199     PrimType PT = T.value_or(PT_Ptr);
200     Descriptor *Desc = P.createDescriptor(PD, PT);
201     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
202     Params.insert({PD, {ParamOffset, T != std::nullopt}});
203     ParamOffsets.push_back(ParamOffset);
204     ParamOffset += align(primSize(PT));
205     ParamTypes.push_back(PT);
206   }
207 
208   if (BD->hasCaptures())
209     return nullptr;
210 
211   // Create a handle over the emitted code.
212   Function *Func =
213       P.createFunction(BE, ParamOffset, std::move(ParamTypes),
214                        std::move(ParamDescriptors), std::move(ParamOffsets),
215                        /*HasThisPointer=*/false, /*HasRVO=*/false,
216                        /*IsUnevaluatedBuiltin=*/false);
217 
218   assert(Func);
219   Func->setDefined(true);
220   // We don't compile the BlockDecl code at all right now.
221   Func->setIsFullyCompiled(true);
222   return Func;
223 }
224 
225 Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) {
226   NextLocalOffset += sizeof(Block);
227   unsigned Location = NextLocalOffset;
228   NextLocalOffset += align(D->getAllocSize());
229   return {Location, D};
230 }
231 
232 void ByteCodeEmitter::emitLabel(LabelTy Label) {
233   const size_t Target = Code.size();
234   LabelOffsets.insert({Label, Target});
235 
236   if (auto It = LabelRelocs.find(Label); It != LabelRelocs.end()) {
237     for (unsigned Reloc : It->second) {
238       using namespace llvm::support;
239 
240       // Rewrite the operand of all jumps to this label.
241       void *Location = Code.data() + Reloc - align(sizeof(int32_t));
242       assert(aligned(Location));
243       const int32_t Offset = Target - static_cast<int64_t>(Reloc);
244       endian::write<int32_t, llvm::endianness::native>(Location, Offset);
245     }
246     LabelRelocs.erase(It);
247   }
248 }
249 
250 int32_t ByteCodeEmitter::getOffset(LabelTy Label) {
251   // Compute the PC offset which the jump is relative to.
252   const int64_t Position =
253       Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t));
254   assert(aligned(Position));
255 
256   // If target is known, compute jump offset.
257   if (auto It = LabelOffsets.find(Label); It != LabelOffsets.end())
258     return It->second - Position;
259 
260   // Otherwise, record relocation and return dummy offset.
261   LabelRelocs[Label].push_back(Position);
262   return 0ull;
263 }
264 
265 /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
266 /// Pointers will be automatically marshalled as 32-bit IDs.
267 template <typename T>
268 static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
269                  bool &Success) {
270   size_t Size;
271 
272   if constexpr (std::is_pointer_v<T>)
273     Size = sizeof(uint32_t);
274   else
275     Size = sizeof(T);
276 
277   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
278     Success = false;
279     return;
280   }
281 
282   // Access must be aligned!
283   size_t ValPos = align(Code.size());
284   Size = align(Size);
285   assert(aligned(ValPos + Size));
286   Code.resize(ValPos + Size);
287 
288   if constexpr (!std::is_pointer_v<T>) {
289     new (Code.data() + ValPos) T(Val);
290   } else {
291     uint32_t ID = P.getOrCreateNativePointer(Val);
292     new (Code.data() + ValPos) uint32_t(ID);
293   }
294 }
295 
296 /// Emits a serializable value. These usually (potentially) contain
297 /// heap-allocated memory and aren't trivially copyable.
298 template <typename T>
299 static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
300                            bool &Success) {
301   size_t Size = Val.bytesToSerialize();
302 
303   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
304     Success = false;
305     return;
306   }
307 
308   // Access must be aligned!
309   size_t ValPos = align(Code.size());
310   Size = align(Size);
311   assert(aligned(ValPos + Size));
312   Code.resize(ValPos + Size);
313 
314   Val.serialize(Code.data() + ValPos);
315 }
316 
317 template <>
318 void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
319           bool &Success) {
320   emitSerialized(Code, Val, Success);
321 }
322 
323 template <>
324 void emit(Program &P, std::vector<std::byte> &Code,
325           const IntegralAP<false> &Val, bool &Success) {
326   emitSerialized(Code, Val, Success);
327 }
328 
329 template <>
330 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
331           bool &Success) {
332   emitSerialized(Code, Val, Success);
333 }
334 
335 template <>
336 void emit(Program &P, std::vector<std::byte> &Code, const FixedPoint &Val,
337           bool &Success) {
338   emitSerialized(Code, Val, Success);
339 }
340 
341 template <typename... Tys>
342 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &...Args,
343                              const SourceInfo &SI) {
344   bool Success = true;
345 
346   // The opcode is followed by arguments. The source info is
347   // attached to the address after the opcode.
348   emit(P, Code, Op, Success);
349   if (SI)
350     SrcMap.emplace_back(Code.size(), SI);
351 
352   (..., emit(P, Code, Args, Success));
353   return Success;
354 }
355 
356 bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) {
357   return emitJt(getOffset(Label), SourceInfo{});
358 }
359 
360 bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) {
361   return emitJf(getOffset(Label), SourceInfo{});
362 }
363 
364 bool ByteCodeEmitter::jump(const LabelTy &Label) {
365   return emitJmp(getOffset(Label), SourceInfo{});
366 }
367 
368 bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
369   emitLabel(Label);
370   return true;
371 }
372 
373 //===----------------------------------------------------------------------===//
374 // Opcode emitters
375 //===----------------------------------------------------------------------===//
376 
377 #define GET_LINK_IMPL
378 #include "Opcodes.inc"
379 #undef GET_LINK_IMPL
380