xref: /llvm-project/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp (revision 3745a2e8ab10029f8f401f5ff3c3c76c12e94822)
1 //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ByteCodeEmitter.h"
10 #include "Context.h"
11 #include "Floating.h"
12 #include "IntegralAP.h"
13 #include "Opcode.h"
14 #include "Program.h"
15 #include "clang/AST/ASTLambda.h"
16 #include "clang/AST/Attr.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/Basic/Builtins.h"
19 #include <type_traits>
20 
21 using namespace clang;
22 using namespace clang::interp;
23 
24 Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
25 
26   // Manually created functions that haven't been assigned proper
27   // parameters yet.
28   if (!FuncDecl->param_empty() && !FuncDecl->param_begin())
29     return nullptr;
30 
31   bool IsLambdaStaticInvoker = false;
32   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl);
33       MD && MD->isLambdaStaticInvoker()) {
34     // For a lambda static invoker, we might have to pick a specialized
35     // version if the lambda is generic. In that case, the picked function
36     // will *NOT* be a static invoker anymore. However, it will still
37     // be a non-static member function, this (usually) requiring an
38     // instance pointer. We suppress that later in this function.
39     IsLambdaStaticInvoker = true;
40 
41     const CXXRecordDecl *ClosureClass = MD->getParent();
42     assert(ClosureClass->captures_begin() == ClosureClass->captures_end());
43     if (ClosureClass->isGenericLambda()) {
44       const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator();
45       assert(MD->isFunctionTemplateSpecialization() &&
46              "A generic lambda's static-invoker function must be a "
47              "template specialization");
48       const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
49       FunctionTemplateDecl *CallOpTemplate =
50           LambdaCallOp->getDescribedFunctionTemplate();
51       void *InsertPos = nullptr;
52       const FunctionDecl *CorrespondingCallOpSpecialization =
53           CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
54       assert(CorrespondingCallOpSpecialization);
55       FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
56     }
57   }
58 
59   // Set up argument indices.
60   unsigned ParamOffset = 0;
61   SmallVector<PrimType, 8> ParamTypes;
62   SmallVector<unsigned, 8> ParamOffsets;
63   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
64 
65   // If the return is not a primitive, a pointer to the storage where the
66   // value is initialized in is passed as the first argument. See 'RVO'
67   // elsewhere in the code.
68   QualType Ty = FuncDecl->getReturnType();
69   bool HasRVO = false;
70   if (!Ty->isVoidType() && !Ctx.classify(Ty)) {
71     HasRVO = true;
72     ParamTypes.push_back(PT_Ptr);
73     ParamOffsets.push_back(ParamOffset);
74     ParamOffset += align(primSize(PT_Ptr));
75   }
76 
77   // If the function decl is a member decl, the next parameter is
78   // the 'this' pointer. This parameter is pop()ed from the
79   // InterpStack when calling the function.
80   bool HasThisPointer = false;
81   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) {
82     if (!IsLambdaStaticInvoker) {
83       HasThisPointer = MD->isInstance();
84       if (MD->isImplicitObjectMemberFunction()) {
85         ParamTypes.push_back(PT_Ptr);
86         ParamOffsets.push_back(ParamOffset);
87         ParamOffset += align(primSize(PT_Ptr));
88       }
89     }
90 
91     // Set up lambda capture to closure record field mapping.
92     if (isLambdaCallOperator(MD)) {
93       // The parent record needs to be complete, we need to know about all
94       // the lambda captures.
95       if (!MD->getParent()->isCompleteDefinition())
96         return nullptr;
97 
98       const Record *R = P.getOrCreateRecord(MD->getParent());
99       llvm::DenseMap<const ValueDecl *, FieldDecl *> LC;
100       FieldDecl *LTC;
101 
102       MD->getParent()->getCaptureFields(LC, LTC);
103 
104       for (auto Cap : LC) {
105         // Static lambdas cannot have any captures. If this one does,
106         // it has already been diagnosed and we can only ignore it.
107         if (MD->isStatic())
108           return nullptr;
109 
110         unsigned Offset = R->getField(Cap.second)->Offset;
111         this->LambdaCaptures[Cap.first] = {
112             Offset, Cap.second->getType()->isReferenceType()};
113       }
114       if (LTC) {
115         QualType CaptureType = R->getField(LTC)->Decl->getType();
116         this->LambdaThisCapture = {R->getField(LTC)->Offset,
117                                    CaptureType->isReferenceType() ||
118                                        CaptureType->isPointerType()};
119       }
120     }
121   }
122 
123   // Assign descriptors to all parameters.
124   // Composite objects are lowered to pointers.
125   for (const ParmVarDecl *PD : FuncDecl->parameters()) {
126     std::optional<PrimType> T = Ctx.classify(PD->getType());
127     PrimType PT = T.value_or(PT_Ptr);
128     Descriptor *Desc = P.createDescriptor(PD, PT);
129     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
130     Params.insert({PD, {ParamOffset, T != std::nullopt}});
131     ParamOffsets.push_back(ParamOffset);
132     ParamOffset += align(primSize(PT));
133     ParamTypes.push_back(PT);
134   }
135 
136   // Create a handle over the emitted code.
137   Function *Func = P.getFunction(FuncDecl);
138   if (!Func) {
139     unsigned BuiltinID = FuncDecl->getBuiltinID();
140     Func =
141         P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes),
142                          std::move(ParamDescriptors), std::move(ParamOffsets),
143                          HasThisPointer, HasRVO, BuiltinID);
144   }
145 
146   assert(Func);
147   // For not-yet-defined functions, we only create a Function instance and
148   // compile their body later.
149   if (!FuncDecl->isDefined() ||
150       (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) {
151     Func->setDefined(false);
152     return Func;
153   }
154 
155   Func->setDefined(true);
156 
157   // Lambda static invokers are a special case that we emit custom code for.
158   bool IsEligibleForCompilation = false;
159   if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl))
160     IsEligibleForCompilation = MD->isLambdaStaticInvoker();
161   if (!IsEligibleForCompilation)
162     IsEligibleForCompilation =
163         FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>();
164 
165   // Compile the function body.
166   if (!IsEligibleForCompilation || !visitFunc(FuncDecl)) {
167     Func->setIsFullyCompiled(true);
168     return Func;
169   }
170 
171   // Create scopes from descriptors.
172   llvm::SmallVector<Scope, 2> Scopes;
173   for (auto &DS : Descriptors) {
174     Scopes.emplace_back(std::move(DS));
175   }
176 
177   // Set the function's code.
178   Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap),
179                 std::move(Scopes), FuncDecl->hasBody());
180   Func->setIsFullyCompiled(true);
181   return Func;
182 }
183 
184 /// Compile an ObjC block, i.e. ^(){}, that thing.
185 ///
186 /// FIXME: We do not support calling the block though, so we create a function
187 /// here but do not compile any code for it.
188 Function *ByteCodeEmitter::compileObjCBlock(const BlockExpr *BE) {
189   const BlockDecl *BD = BE->getBlockDecl();
190   // Set up argument indices.
191   unsigned ParamOffset = 0;
192   SmallVector<PrimType, 8> ParamTypes;
193   SmallVector<unsigned, 8> ParamOffsets;
194   llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
195 
196   // Assign descriptors to all parameters.
197   // Composite objects are lowered to pointers.
198   for (const ParmVarDecl *PD : BD->parameters()) {
199     std::optional<PrimType> T = Ctx.classify(PD->getType());
200     PrimType PT = T.value_or(PT_Ptr);
201     Descriptor *Desc = P.createDescriptor(PD, PT);
202     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
203     Params.insert({PD, {ParamOffset, T != std::nullopt}});
204     ParamOffsets.push_back(ParamOffset);
205     ParamOffset += align(primSize(PT));
206     ParamTypes.push_back(PT);
207   }
208 
209   if (BD->hasCaptures())
210     return nullptr;
211 
212   // Create a handle over the emitted code.
213   Function *Func =
214       P.createFunction(BE, ParamOffset, std::move(ParamTypes),
215                        std::move(ParamDescriptors), std::move(ParamOffsets),
216                        /*HasThisPointer=*/false, /*HasRVO=*/false,
217                        /*IsUnevaluatedBuiltin=*/false);
218 
219   assert(Func);
220   Func->setDefined(true);
221   // We don't compile the BlockDecl code at all right now.
222   Func->setIsFullyCompiled(true);
223   return Func;
224 }
225 
226 Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) {
227   NextLocalOffset += sizeof(Block);
228   unsigned Location = NextLocalOffset;
229   NextLocalOffset += align(D->getAllocSize());
230   return {Location, D};
231 }
232 
233 void ByteCodeEmitter::emitLabel(LabelTy Label) {
234   const size_t Target = Code.size();
235   LabelOffsets.insert({Label, Target});
236 
237   if (auto It = LabelRelocs.find(Label); It != LabelRelocs.end()) {
238     for (unsigned Reloc : It->second) {
239       using namespace llvm::support;
240 
241       // Rewrite the operand of all jumps to this label.
242       void *Location = Code.data() + Reloc - align(sizeof(int32_t));
243       assert(aligned(Location));
244       const int32_t Offset = Target - static_cast<int64_t>(Reloc);
245       endian::write<int32_t, llvm::endianness::native>(Location, Offset);
246     }
247     LabelRelocs.erase(It);
248   }
249 }
250 
251 int32_t ByteCodeEmitter::getOffset(LabelTy Label) {
252   // Compute the PC offset which the jump is relative to.
253   const int64_t Position =
254       Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t));
255   assert(aligned(Position));
256 
257   // If target is known, compute jump offset.
258   if (auto It = LabelOffsets.find(Label); It != LabelOffsets.end())
259     return It->second - Position;
260 
261   // Otherwise, record relocation and return dummy offset.
262   LabelRelocs[Label].push_back(Position);
263   return 0ull;
264 }
265 
266 /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
267 /// Pointers will be automatically marshalled as 32-bit IDs.
268 template <typename T>
269 static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
270                  bool &Success) {
271   size_t Size;
272 
273   if constexpr (std::is_pointer_v<T>)
274     Size = sizeof(uint32_t);
275   else
276     Size = sizeof(T);
277 
278   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
279     Success = false;
280     return;
281   }
282 
283   // Access must be aligned!
284   size_t ValPos = align(Code.size());
285   Size = align(Size);
286   assert(aligned(ValPos + Size));
287   Code.resize(ValPos + Size);
288 
289   if constexpr (!std::is_pointer_v<T>) {
290     new (Code.data() + ValPos) T(Val);
291   } else {
292     uint32_t ID = P.getOrCreateNativePointer(Val);
293     new (Code.data() + ValPos) uint32_t(ID);
294   }
295 }
296 
297 /// Emits a serializable value. These usually (potentially) contain
298 /// heap-allocated memory and aren't trivially copyable.
299 template <typename T>
300 static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
301                            bool &Success) {
302   size_t Size = Val.bytesToSerialize();
303 
304   if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
305     Success = false;
306     return;
307   }
308 
309   // Access must be aligned!
310   size_t ValPos = align(Code.size());
311   Size = align(Size);
312   assert(aligned(ValPos + Size));
313   Code.resize(ValPos + Size);
314 
315   Val.serialize(Code.data() + ValPos);
316 }
317 
318 template <>
319 void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
320           bool &Success) {
321   emitSerialized(Code, Val, Success);
322 }
323 
324 template <>
325 void emit(Program &P, std::vector<std::byte> &Code,
326           const IntegralAP<false> &Val, bool &Success) {
327   emitSerialized(Code, Val, Success);
328 }
329 
330 template <>
331 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
332           bool &Success) {
333   emitSerialized(Code, Val, Success);
334 }
335 
336 template <typename... Tys>
337 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &...Args,
338                              const SourceInfo &SI) {
339   bool Success = true;
340 
341   // The opcode is followed by arguments. The source info is
342   // attached to the address after the opcode.
343   emit(P, Code, Op, Success);
344   if (SI)
345     SrcMap.emplace_back(Code.size(), SI);
346 
347   (..., emit(P, Code, Args, Success));
348   return Success;
349 }
350 
351 bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) {
352   return emitJt(getOffset(Label), SourceInfo{});
353 }
354 
355 bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) {
356   return emitJf(getOffset(Label), SourceInfo{});
357 }
358 
359 bool ByteCodeEmitter::jump(const LabelTy &Label) {
360   return emitJmp(getOffset(Label), SourceInfo{});
361 }
362 
363 bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
364   emitLabel(Label);
365   return true;
366 }
367 
368 //===----------------------------------------------------------------------===//
369 // Opcode emitters
370 //===----------------------------------------------------------------------===//
371 
372 #define GET_LINK_IMPL
373 #include "Opcodes.inc"
374 #undef GET_LINK_IMPL
375