xref: /llvm-project/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp (revision ca148b21505e97f07787c13ec00ffc086d4658d0)
1 //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ByteCodeEmitter.h"
10 #include "Context.h"
11 #include "Floating.h"
12 #include "IntegralAP.h"
13 #include "Opcode.h"
14 #include "Program.h"
15 #include "clang/AST/ASTLambda.h"
16 #include "clang/AST/Attr.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/Basic/Builtins.h"
19 #include <type_traits>
20 
21 using namespace clang;
22 using namespace clang::interp;
23 
24 /// Unevaluated builtins don't get their arguments put on the stack
25 /// automatically. They instead operate on the AST of their Call
26 /// Expression.
27 /// Similar information is available via ASTContext::BuiltinInfo,
28 /// but that is not correct for our use cases.
29 static bool isUnevaluatedBuiltin(unsigned BuiltinID) {
30   return BuiltinID == Builtin::BI__builtin_classify_type ||
31          BuiltinID == Builtin::BI__builtin_os_log_format_buffer_size ||
32          BuiltinID == Builtin::BI__builtin_constant_p;
33 }
34 
35 Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
36 
37   // Manually created functions that haven't been assigned proper
38   // parameters yet.
39   if (!FuncDecl->param_empty() && !FuncDecl->param_begin())
40     return nullptr;
41 
42   bool IsLambdaStaticInvoker = false;
43   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl);
44       MD && MD->isLambdaStaticInvoker()) {
45     // For a lambda static invoker, we might have to pick a specialized
46     // version if the lambda is generic. In that case, the picked function
47     // will *NOT* be a static invoker anymore. However, it will still
48     // be a non-static member function, this (usually) requiring an
49     // instance pointer. We suppress that later in this function.
50     IsLambdaStaticInvoker = true;
51 
52     const CXXRecordDecl *ClosureClass = MD->getParent();
53     assert(ClosureClass->captures_begin() == ClosureClass->captures_end());
54     if (ClosureClass->isGenericLambda()) {
55       const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator();
56       assert(MD->isFunctionTemplateSpecialization() &&
57              "A generic lambda's static-invoker function must be a "
58              "template specialization");
59       const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
60       FunctionTemplateDecl *CallOpTemplate =
61           LambdaCallOp->getDescribedFunctionTemplate();
62       void *InsertPos = nullptr;
63       const FunctionDecl *CorrespondingCallOpSpecialization =
64           CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
65       assert(CorrespondingCallOpSpecialization);
66       FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
67     }
68   }
69 
70   // Set up argument indices.
71   unsigned ParamOffset = 0;
72   SmallVector<PrimType, 8> ParamTypes;
73   SmallVector<unsigned, 8> ParamOffsets;
74   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
75 
76   // If the return is not a primitive, a pointer to the storage where the
77   // value is initialized in is passed as the first argument. See 'RVO'
78   // elsewhere in the code.
79   QualType Ty = FuncDecl->getReturnType();
80   bool HasRVO = false;
81   if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
82     HasRVO = true;
83     ParamTypes.push_back(PT_Ptr);
84     ParamOffsets.push_back(ParamOffset);
85     ParamOffset += align(primSize(PT_Ptr));
86   }
87 
88   // If the function decl is a member decl, the next parameter is
89   // the 'this' pointer. This parameter is pop()ed from the
90   // InterpStack when calling the function.
91   bool HasThisPointer = false;
92   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) {
93     if (!IsLambdaStaticInvoker) {
94       HasThisPointer = MD->isInstance();
95       if (MD->isImplicitObjectMemberFunction()) {
96         ParamTypes.push_back(PT_Ptr);
97         ParamOffsets.push_back(ParamOffset);
98         ParamOffset += align(primSize(PT_Ptr));
99       }
100     }
101 
102     // Set up lambda capture to closure record field mapping.
103     if (isLambdaCallOperator(MD)) {
104       // The parent record needs to be complete, we need to know about all
105       // the lambda captures.
106       if (!MD->getParent()->isCompleteDefinition())
107         return nullptr;
108 
109       const Record *R = P.getOrCreateRecord(MD->getParent());
110       llvm::DenseMap<const ValueDecl *, FieldDecl *> LC;
111       FieldDecl *LTC;
112 
113       MD->getParent()->getCaptureFields(LC, LTC);
114 
115       for (auto Cap : LC) {
116         // Static lambdas cannot have any captures. If this one does,
117         // it has already been diagnosed and we can only ignore it.
118         if (MD->isStatic())
119           return nullptr;
120 
121         unsigned Offset = R->getField(Cap.second)->Offset;
122         this->LambdaCaptures[Cap.first] = {
123             Offset, Cap.second->getType()->isReferenceType()};
124       }
125       if (LTC) {
126         QualType CaptureType = R->getField(LTC)->Decl->getType();
127         this->LambdaThisCapture = {R->getField(LTC)->Offset,
128                                    CaptureType->isReferenceType() ||
129                                        CaptureType->isPointerType()};
130       }
131     }
132   }
133 
134   // Assign descriptors to all parameters.
135   // Composite objects are lowered to pointers.
136   for (const ParmVarDecl *PD : FuncDecl->parameters()) {
137     std::optional<PrimType> T = Ctx.classify(PD->getType());
138     PrimType PT = T.value_or(PT_Ptr);
139     Descriptor *Desc = P.createDescriptor(PD, PT);
140     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
141     Params.insert({PD, {ParamOffset, T != std::nullopt}});
142     ParamOffsets.push_back(ParamOffset);
143     ParamOffset += align(primSize(PT));
144     ParamTypes.push_back(PT);
145   }
146 
147   // Create a handle over the emitted code.
148   Function *Func = P.getFunction(FuncDecl);
149   if (!Func) {
150     bool IsUnevaluatedBuiltin = false;
151     if (unsigned BI = FuncDecl->getBuiltinID())
152       IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BI);
153 
154     Func =
155         P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
156                          std::move(ParamDescriptors), std::move(ParamOffsets),
157                          HasThisPointer, HasRVO, IsUnevaluatedBuiltin);
158   }
159 
160   assert(Func);
161   // For not-yet-defined functions, we only create a Function instance and
162   // compile their body later.
163   if (!FuncDecl->isDefined() ||
164       (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) {
165     Func->setDefined(false);
166     return Func;
167   }
168 
169   Func->setDefined(true);
170 
171   // Lambda static invokers are a special case that we emit custom code for.
172   bool IsEligibleForCompilation = false;
173   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl))
174     IsEligibleForCompilation = MD->isLambdaStaticInvoker();
175   if (!IsEligibleForCompilation)
176     IsEligibleForCompilation =
177         FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>();
178 
179   // Compile the function body.
180   if (!IsEligibleForCompilation || !visitFunc(FuncDecl)) {
181     Func->setIsFullyCompiled(true);
182     return Func;
183   }
184 
185   // Create scopes from descriptors.
186   llvm::SmallVector<Scope, 2> Scopes;
187   for (auto &DS : Descriptors) {
188     Scopes.emplace_back(std::move(DS));
189   }
190 
191   // Set the function's code.
192   Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap),
193                 std::move(Scopes), FuncDecl->hasBody());
194   Func->setIsFullyCompiled(true);
195   return Func;
196 }
197 
198 /// Compile an ObjC block, i.e. ^(){}, that thing.
199 ///
200 /// FIXME: We do not support calling the block though, so we create a function
201 /// here but do not compile any code for it.
202 Function *ByteCodeEmitter::compileObjCBlock(const BlockExpr *BE) {
203   const BlockDecl *BD = BE->getBlockDecl();
204   // Set up argument indices.
205   unsigned ParamOffset = 0;
206   SmallVector<PrimType, 8> ParamTypes;
207   SmallVector<unsigned, 8> ParamOffsets;
208   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
209 
210   // Assign descriptors to all parameters.
211   // Composite objects are lowered to pointers.
212   for (const ParmVarDecl *PD : BD->parameters()) {
213     std::optional<PrimType> T = Ctx.classify(PD->getType());
214     PrimType PT = T.value_or(PT_Ptr);
215     Descriptor *Desc = P.createDescriptor(PD, PT);
216     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
217     Params.insert({PD, {ParamOffset, T != std::nullopt}});
218     ParamOffsets.push_back(ParamOffset);
219     ParamOffset += align(primSize(PT));
220     ParamTypes.push_back(PT);
221   }
222 
223   if (BD->hasCaptures())
224     return nullptr;
225 
226   // Create a handle over the emitted code.
227   Function *Func =
228       P.createFunction(BE, ParamOffset, std::move(ParamTypes),
229                        std::move(ParamDescriptors), std::move(ParamOffsets),
230                        /*HasThisPointer=*/false, /*HasRVO=*/false,
231                        /*IsUnevaluatedBuiltin=*/false);
232 
233   assert(Func);
234   Func->setDefined(true);
235   // We don't compile the BlockDecl code at all right now.
236   Func->setIsFullyCompiled(true);
237   return Func;
238 }
239 
240 Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) {
241   NextLocalOffset += sizeof(Block);
242   unsigned Location = NextLocalOffset;
243   NextLocalOffset += align(D->getAllocSize());
244   return {Location, D};
245 }
246 
247 void ByteCodeEmitter::emitLabel(LabelTy Label) {
248   const size_t Target = Code.size();
249   LabelOffsets.insert({Label, Target});
250 
251   if (auto It = LabelRelocs.find(Label); It != LabelRelocs.end()) {
252     for (unsigned Reloc : It->second) {
253       using namespace llvm::support;
254 
255       // Rewrite the operand of all jumps to this label.
256       void *Location = Code.data() + Reloc - align(sizeof(int32_t));
257       assert(aligned(Location));
258       const int32_t Offset = Target - static_cast<int64_t>(Reloc);
259       endian::write<int32_t, llvm::endianness::native>(Location, Offset);
260     }
261     LabelRelocs.erase(It);
262   }
263 }
264 
265 int32_t ByteCodeEmitter::getOffset(LabelTy Label) {
266   // Compute the PC offset which the jump is relative to.
267   const int64_t Position =
268       Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t));
269   assert(aligned(Position));
270 
271   // If target is known, compute jump offset.
272   if (auto It = LabelOffsets.find(Label); It != LabelOffsets.end())
273     return It->second - Position;
274 
275   // Otherwise, record relocation and return dummy offset.
276   LabelRelocs[Label].push_back(Position);
277   return 0ull;
278 }
279 
280 /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
281 /// Pointers will be automatically marshalled as 32-bit IDs.
282 template <typename T>
283 static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
284                  bool &Success) {
285   size_t Size;
286 
287   if constexpr (std::is_pointer_v<T>)
288     Size = sizeof(uint32_t);
289   else
290     Size = sizeof(T);
291 
292   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
293     Success = false;
294     return;
295   }
296 
297   // Access must be aligned!
298   size_t ValPos = align(Code.size());
299   Size = align(Size);
300   assert(aligned(ValPos + Size));
301   Code.resize(ValPos + Size);
302 
303   if constexpr (!std::is_pointer_v<T>) {
304     new (Code.data() + ValPos) T(Val);
305   } else {
306     uint32_t ID = P.getOrCreateNativePointer(Val);
307     new (Code.data() + ValPos) uint32_t(ID);
308   }
309 }
310 
311 /// Emits a serializable value. These usually (potentially) contain
312 /// heap-allocated memory and aren't trivially copyable.
313 template <typename T>
314 static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
315                            bool &Success) {
316   size_t Size = Val.bytesToSerialize();
317 
318   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
319     Success = false;
320     return;
321   }
322 
323   // Access must be aligned!
324   size_t ValPos = align(Code.size());
325   Size = align(Size);
326   assert(aligned(ValPos + Size));
327   Code.resize(ValPos + Size);
328 
329   Val.serialize(Code.data() + ValPos);
330 }
331 
332 template <>
333 void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
334           bool &Success) {
335   emitSerialized(Code, Val, Success);
336 }
337 
338 template <>
339 void emit(Program &P, std::vector<std::byte> &Code,
340           const IntegralAP<false> &Val, bool &Success) {
341   emitSerialized(Code, Val, Success);
342 }
343 
344 template <>
345 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
346           bool &Success) {
347   emitSerialized(Code, Val, Success);
348 }
349 
350 template <typename... Tys>
351 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &...Args,
352                              const SourceInfo &SI) {
353   bool Success = true;
354 
355   // The opcode is followed by arguments. The source info is
356   // attached to the address after the opcode.
357   emit(P, Code, Op, Success);
358   if (SI)
359     SrcMap.emplace_back(Code.size(), SI);
360 
361   (..., emit(P, Code, Args, Success));
362   return Success;
363 }
364 
365 bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) {
366   return emitJt(getOffset(Label), SourceInfo{});
367 }
368 
369 bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) {
370   return emitJf(getOffset(Label), SourceInfo{});
371 }
372 
373 bool ByteCodeEmitter::jump(const LabelTy &Label) {
374   return emitJmp(getOffset(Label), SourceInfo{});
375 }
376 
377 bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
378   emitLabel(Label);
379   return true;
380 }
381 
382 //===----------------------------------------------------------------------===//
383 // Opcode emitters
384 //===----------------------------------------------------------------------===//
385 
386 #define GET_LINK_IMPL
387 #include "Opcodes.inc"
388 #undef GET_LINK_IMPL
389