1 //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ByteCodeEmitter.h" 10 #include "Context.h" 11 #include "Floating.h" 12 #include "IntegralAP.h" 13 #include "Opcode.h" 14 #include "Program.h" 15 #include "clang/AST/ASTLambda.h" 16 #include "clang/AST/Attr.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/Basic/Builtins.h" 19 #include <type_traits> 20 21 using namespace clang; 22 using namespace clang::interp; 23 24 /// Unevaluated builtins don't get their arguments put on the stack 25 /// automatically. They instead operate on the AST of their Call 26 /// Expression. 27 /// Similar information is available via ASTContext::BuiltinInfo, 28 /// but that is not correct for our use cases. 29 static bool isUnevaluatedBuiltin(unsigned BuiltinID) { 30 return BuiltinID == Builtin::BI__builtin_classify_type || 31 BuiltinID == Builtin::BI__builtin_os_log_format_buffer_size || 32 BuiltinID == Builtin::BI__builtin_constant_p; 33 } 34 35 Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) { 36 37 // Manually created functions that haven't been assigned proper 38 // parameters yet. 39 if (!FuncDecl->param_empty() && !FuncDecl->param_begin()) 40 return nullptr; 41 42 bool IsLambdaStaticInvoker = false; 43 if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl); 44 MD && MD->isLambdaStaticInvoker()) { 45 // For a lambda static invoker, we might have to pick a specialized 46 // version if the lambda is generic. In that case, the picked function 47 // will *NOT* be a static invoker anymore. However, it will still 48 // be a non-static member function, this (usually) requiring an 49 // instance pointer. We suppress that later in this function. 50 IsLambdaStaticInvoker = true; 51 52 const CXXRecordDecl *ClosureClass = MD->getParent(); 53 assert(ClosureClass->captures_begin() == ClosureClass->captures_end()); 54 if (ClosureClass->isGenericLambda()) { 55 const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator(); 56 assert(MD->isFunctionTemplateSpecialization() && 57 "A generic lambda's static-invoker function must be a " 58 "template specialization"); 59 const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs(); 60 FunctionTemplateDecl *CallOpTemplate = 61 LambdaCallOp->getDescribedFunctionTemplate(); 62 void *InsertPos = nullptr; 63 const FunctionDecl *CorrespondingCallOpSpecialization = 64 CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos); 65 assert(CorrespondingCallOpSpecialization); 66 FuncDecl = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization); 67 } 68 } 69 70 // Set up argument indices. 71 unsigned ParamOffset = 0; 72 SmallVector<PrimType, 8> ParamTypes; 73 SmallVector<unsigned, 8> ParamOffsets; 74 llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors; 75 76 // If the return is not a primitive, a pointer to the storage where the 77 // value is initialized in is passed as the first argument. See 'RVO' 78 // elsewhere in the code. 79 QualType Ty = FuncDecl->getReturnType(); 80 bool HasRVO = false; 81 if (!Ty->isVoidType() && !Ctx.classify(Ty)) { 82 HasRVO = true; 83 ParamTypes.push_back(PT_Ptr); 84 ParamOffsets.push_back(ParamOffset); 85 ParamOffset += align(primSize(PT_Ptr)); 86 } 87 88 // If the function decl is a member decl, the next parameter is 89 // the 'this' pointer. This parameter is pop()ed from the 90 // InterpStack when calling the function. 91 bool HasThisPointer = false; 92 if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) { 93 if (!IsLambdaStaticInvoker) { 94 HasThisPointer = MD->isInstance(); 95 if (MD->isImplicitObjectMemberFunction()) { 96 ParamTypes.push_back(PT_Ptr); 97 ParamOffsets.push_back(ParamOffset); 98 ParamOffset += align(primSize(PT_Ptr)); 99 } 100 } 101 102 // Set up lambda capture to closure record field mapping. 103 if (isLambdaCallOperator(MD)) { 104 // The parent record needs to be complete, we need to know about all 105 // the lambda captures. 106 if (!MD->getParent()->isCompleteDefinition()) 107 return nullptr; 108 109 const Record *R = P.getOrCreateRecord(MD->getParent()); 110 llvm::DenseMap<const ValueDecl *, FieldDecl *> LC; 111 FieldDecl *LTC; 112 113 MD->getParent()->getCaptureFields(LC, LTC); 114 115 for (auto Cap : LC) { 116 // Static lambdas cannot have any captures. If this one does, 117 // it has already been diagnosed and we can only ignore it. 118 if (MD->isStatic()) 119 return nullptr; 120 121 unsigned Offset = R->getField(Cap.second)->Offset; 122 this->LambdaCaptures[Cap.first] = { 123 Offset, Cap.second->getType()->isReferenceType()}; 124 } 125 if (LTC) { 126 QualType CaptureType = R->getField(LTC)->Decl->getType(); 127 this->LambdaThisCapture = {R->getField(LTC)->Offset, 128 CaptureType->isReferenceType() || 129 CaptureType->isPointerType()}; 130 } 131 } 132 } 133 134 // Assign descriptors to all parameters. 135 // Composite objects are lowered to pointers. 136 for (const ParmVarDecl *PD : FuncDecl->parameters()) { 137 std::optional<PrimType> T = Ctx.classify(PD->getType()); 138 PrimType PT = T.value_or(PT_Ptr); 139 Descriptor *Desc = P.createDescriptor(PD, PT); 140 ParamDescriptors.insert({ParamOffset, {PT, Desc}}); 141 Params.insert({PD, {ParamOffset, T != std::nullopt}}); 142 ParamOffsets.push_back(ParamOffset); 143 ParamOffset += align(primSize(PT)); 144 ParamTypes.push_back(PT); 145 } 146 147 // Create a handle over the emitted code. 148 Function *Func = P.getFunction(FuncDecl); 149 if (!Func) { 150 bool IsUnevaluatedBuiltin = false; 151 if (unsigned BI = FuncDecl->getBuiltinID()) 152 IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BI); 153 154 Func = 155 P.createFunction(FuncDecl, ParamOffset, std::move(ParamTypes), 156 std::move(ParamDescriptors), std::move(ParamOffsets), 157 HasThisPointer, HasRVO, IsUnevaluatedBuiltin); 158 } 159 160 assert(Func); 161 // For not-yet-defined functions, we only create a Function instance and 162 // compile their body later. 163 if (!FuncDecl->isDefined() || 164 (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) { 165 Func->setDefined(false); 166 return Func; 167 } 168 169 Func->setDefined(true); 170 171 // Lambda static invokers are a special case that we emit custom code for. 172 bool IsEligibleForCompilation = false; 173 if (const auto *MD = dyn_cast<CXXMethodDecl>(FuncDecl)) 174 IsEligibleForCompilation = MD->isLambdaStaticInvoker(); 175 if (!IsEligibleForCompilation) 176 IsEligibleForCompilation = 177 FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>(); 178 179 // Compile the function body. 180 if (!IsEligibleForCompilation || !visitFunc(FuncDecl)) { 181 Func->setIsFullyCompiled(true); 182 return Func; 183 } 184 185 // Create scopes from descriptors. 186 llvm::SmallVector<Scope, 2> Scopes; 187 for (auto &DS : Descriptors) { 188 Scopes.emplace_back(std::move(DS)); 189 } 190 191 // Set the function's code. 192 Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap), 193 std::move(Scopes), FuncDecl->hasBody()); 194 Func->setIsFullyCompiled(true); 195 return Func; 196 } 197 198 /// Compile an ObjC block, i.e. ^(){}, that thing. 199 /// 200 /// FIXME: We do not support calling the block though, so we create a function 201 /// here but do not compile any code for it. 202 Function *ByteCodeEmitter::compileObjCBlock(const BlockExpr *BE) { 203 const BlockDecl *BD = BE->getBlockDecl(); 204 // Set up argument indices. 205 unsigned ParamOffset = 0; 206 SmallVector<PrimType, 8> ParamTypes; 207 SmallVector<unsigned, 8> ParamOffsets; 208 llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors; 209 210 // Assign descriptors to all parameters. 211 // Composite objects are lowered to pointers. 212 for (const ParmVarDecl *PD : BD->parameters()) { 213 std::optional<PrimType> T = Ctx.classify(PD->getType()); 214 PrimType PT = T.value_or(PT_Ptr); 215 Descriptor *Desc = P.createDescriptor(PD, PT); 216 ParamDescriptors.insert({ParamOffset, {PT, Desc}}); 217 Params.insert({PD, {ParamOffset, T != std::nullopt}}); 218 ParamOffsets.push_back(ParamOffset); 219 ParamOffset += align(primSize(PT)); 220 ParamTypes.push_back(PT); 221 } 222 223 if (BD->hasCaptures()) 224 return nullptr; 225 226 // Create a handle over the emitted code. 227 Function *Func = 228 P.createFunction(BE, ParamOffset, std::move(ParamTypes), 229 std::move(ParamDescriptors), std::move(ParamOffsets), 230 /*HasThisPointer=*/false, /*HasRVO=*/false, 231 /*IsUnevaluatedBuiltin=*/false); 232 233 assert(Func); 234 Func->setDefined(true); 235 // We don't compile the BlockDecl code at all right now. 236 Func->setIsFullyCompiled(true); 237 return Func; 238 } 239 240 Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) { 241 NextLocalOffset += sizeof(Block); 242 unsigned Location = NextLocalOffset; 243 NextLocalOffset += align(D->getAllocSize()); 244 return {Location, D}; 245 } 246 247 void ByteCodeEmitter::emitLabel(LabelTy Label) { 248 const size_t Target = Code.size(); 249 LabelOffsets.insert({Label, Target}); 250 251 if (auto It = LabelRelocs.find(Label); It != LabelRelocs.end()) { 252 for (unsigned Reloc : It->second) { 253 using namespace llvm::support; 254 255 // Rewrite the operand of all jumps to this label. 256 void *Location = Code.data() + Reloc - align(sizeof(int32_t)); 257 assert(aligned(Location)); 258 const int32_t Offset = Target - static_cast<int64_t>(Reloc); 259 endian::write<int32_t, llvm::endianness::native>(Location, Offset); 260 } 261 LabelRelocs.erase(It); 262 } 263 } 264 265 int32_t ByteCodeEmitter::getOffset(LabelTy Label) { 266 // Compute the PC offset which the jump is relative to. 267 const int64_t Position = 268 Code.size() + align(sizeof(Opcode)) + align(sizeof(int32_t)); 269 assert(aligned(Position)); 270 271 // If target is known, compute jump offset. 272 if (auto It = LabelOffsets.find(Label); It != LabelOffsets.end()) 273 return It->second - Position; 274 275 // Otherwise, record relocation and return dummy offset. 276 LabelRelocs[Label].push_back(Position); 277 return 0ull; 278 } 279 280 /// Helper to write bytecode and bail out if 32-bit offsets become invalid. 281 /// Pointers will be automatically marshalled as 32-bit IDs. 282 template <typename T> 283 static void emit(Program &P, std::vector<std::byte> &Code, const T &Val, 284 bool &Success) { 285 size_t Size; 286 287 if constexpr (std::is_pointer_v<T>) 288 Size = sizeof(uint32_t); 289 else 290 Size = sizeof(T); 291 292 if (Code.size() + Size > std::numeric_limits<unsigned>::max()) { 293 Success = false; 294 return; 295 } 296 297 // Access must be aligned! 298 size_t ValPos = align(Code.size()); 299 Size = align(Size); 300 assert(aligned(ValPos + Size)); 301 Code.resize(ValPos + Size); 302 303 if constexpr (!std::is_pointer_v<T>) { 304 new (Code.data() + ValPos) T(Val); 305 } else { 306 uint32_t ID = P.getOrCreateNativePointer(Val); 307 new (Code.data() + ValPos) uint32_t(ID); 308 } 309 } 310 311 /// Emits a serializable value. These usually (potentially) contain 312 /// heap-allocated memory and aren't trivially copyable. 313 template <typename T> 314 static void emitSerialized(std::vector<std::byte> &Code, const T &Val, 315 bool &Success) { 316 size_t Size = Val.bytesToSerialize(); 317 318 if (Code.size() + Size > std::numeric_limits<unsigned>::max()) { 319 Success = false; 320 return; 321 } 322 323 // Access must be aligned! 324 size_t ValPos = align(Code.size()); 325 Size = align(Size); 326 assert(aligned(ValPos + Size)); 327 Code.resize(ValPos + Size); 328 329 Val.serialize(Code.data() + ValPos); 330 } 331 332 template <> 333 void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val, 334 bool &Success) { 335 emitSerialized(Code, Val, Success); 336 } 337 338 template <> 339 void emit(Program &P, std::vector<std::byte> &Code, 340 const IntegralAP<false> &Val, bool &Success) { 341 emitSerialized(Code, Val, Success); 342 } 343 344 template <> 345 void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val, 346 bool &Success) { 347 emitSerialized(Code, Val, Success); 348 } 349 350 template <typename... Tys> 351 bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &...Args, 352 const SourceInfo &SI) { 353 bool Success = true; 354 355 // The opcode is followed by arguments. The source info is 356 // attached to the address after the opcode. 357 emit(P, Code, Op, Success); 358 if (SI) 359 SrcMap.emplace_back(Code.size(), SI); 360 361 (..., emit(P, Code, Args, Success)); 362 return Success; 363 } 364 365 bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) { 366 return emitJt(getOffset(Label), SourceInfo{}); 367 } 368 369 bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) { 370 return emitJf(getOffset(Label), SourceInfo{}); 371 } 372 373 bool ByteCodeEmitter::jump(const LabelTy &Label) { 374 return emitJmp(getOffset(Label), SourceInfo{}); 375 } 376 377 bool ByteCodeEmitter::fallthrough(const LabelTy &Label) { 378 emitLabel(Label); 379 return true; 380 } 381 382 //===----------------------------------------------------------------------===// 383 // Opcode emitters 384 //===----------------------------------------------------------------------===// 385 386 #define GET_LINK_IMPL 387 #include "Opcodes.inc" 388 #undef GET_LINK_IMPL 389