1 //===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines manifest constants for the wasm object file format. 10 // See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_BINARYFORMAT_WASM_H 15 #define LLVM_BINARYFORMAT_WASM_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include <optional> 21 22 namespace llvm { 23 namespace wasm { 24 25 // Object file magic string. 26 const char WasmMagic[] = {'\0', 'a', 's', 'm'}; 27 // Wasm binary format version 28 const uint32_t WasmVersion = 0x1; 29 // Wasm linking metadata version 30 const uint32_t WasmMetadataVersion = 0x2; 31 // Wasm uses a 64k page size 32 const uint32_t WasmPageSize = 65536; 33 34 enum : unsigned { 35 WASM_SEC_CUSTOM = 0, // Custom / User-defined section 36 WASM_SEC_TYPE = 1, // Function signature declarations 37 WASM_SEC_IMPORT = 2, // Import declarations 38 WASM_SEC_FUNCTION = 3, // Function declarations 39 WASM_SEC_TABLE = 4, // Indirect function table and other tables 40 WASM_SEC_MEMORY = 5, // Memory attributes 41 WASM_SEC_GLOBAL = 6, // Global declarations 42 WASM_SEC_EXPORT = 7, // Exports 43 WASM_SEC_START = 8, // Start function declaration 44 WASM_SEC_ELEM = 9, // Elements section 45 WASM_SEC_CODE = 10, // Function bodies (code) 46 WASM_SEC_DATA = 11, // Data segments 47 WASM_SEC_DATACOUNT = 12, // Data segment count 48 WASM_SEC_TAG = 13, // Tag declarations 49 WASM_SEC_LAST_KNOWN = WASM_SEC_TAG, 50 }; 51 52 // Type immediate encodings used in various contexts. 53 enum : unsigned { 54 WASM_TYPE_I32 = 0x7F, 55 WASM_TYPE_I64 = 0x7E, 56 WASM_TYPE_F32 = 0x7D, 57 WASM_TYPE_F64 = 0x7C, 58 WASM_TYPE_V128 = 0x7B, 59 WASM_TYPE_NULLFUNCREF = 0x73, 60 WASM_TYPE_NULLEXTERNREF = 0x72, 61 WASM_TYPE_NULLEXNREF = 0x74, 62 WASM_TYPE_NULLREF = 0x71, 63 WASM_TYPE_FUNCREF = 0x70, 64 WASM_TYPE_EXTERNREF = 0x6F, 65 WASM_TYPE_EXNREF = 0x69, 66 WASM_TYPE_ANYREF = 0x6E, 67 WASM_TYPE_EQREF = 0x6D, 68 WASM_TYPE_I31REF = 0x6C, 69 WASM_TYPE_STRUCTREF = 0x6B, 70 WASM_TYPE_ARRAYREF = 0x6A, 71 WASM_TYPE_NONNULLABLE = 0x64, 72 WASM_TYPE_NULLABLE = 0x63, 73 WASM_TYPE_FUNC = 0x60, 74 WASM_TYPE_ARRAY = 0x5E, 75 WASM_TYPE_STRUCT = 0x5F, 76 WASM_TYPE_SUB = 0x50, 77 WASM_TYPE_SUB_FINAL = 0x4F, 78 WASM_TYPE_REC = 0x4E, 79 WASM_TYPE_NORESULT = 0x40, // for blocks with no result values 80 }; 81 82 // Kinds of externals (for imports and exports). 83 enum : unsigned { 84 WASM_EXTERNAL_FUNCTION = 0x0, 85 WASM_EXTERNAL_TABLE = 0x1, 86 WASM_EXTERNAL_MEMORY = 0x2, 87 WASM_EXTERNAL_GLOBAL = 0x3, 88 WASM_EXTERNAL_TAG = 0x4, 89 }; 90 91 // Opcodes used in initializer expressions. 92 enum : unsigned { 93 WASM_OPCODE_END = 0x0b, 94 WASM_OPCODE_CALL = 0x10, 95 WASM_OPCODE_LOCAL_GET = 0x20, 96 WASM_OPCODE_LOCAL_SET = 0x21, 97 WASM_OPCODE_LOCAL_TEE = 0x22, 98 WASM_OPCODE_GLOBAL_GET = 0x23, 99 WASM_OPCODE_GLOBAL_SET = 0x24, 100 WASM_OPCODE_I32_STORE = 0x36, 101 WASM_OPCODE_I64_STORE = 0x37, 102 WASM_OPCODE_I32_CONST = 0x41, 103 WASM_OPCODE_I64_CONST = 0x42, 104 WASM_OPCODE_F32_CONST = 0x43, 105 WASM_OPCODE_F64_CONST = 0x44, 106 WASM_OPCODE_I32_ADD = 0x6a, 107 WASM_OPCODE_I32_SUB = 0x6b, 108 WASM_OPCODE_I32_MUL = 0x6c, 109 WASM_OPCODE_I64_ADD = 0x7c, 110 WASM_OPCODE_I64_SUB = 0x7d, 111 WASM_OPCODE_I64_MUL = 0x7e, 112 WASM_OPCODE_REF_NULL = 0xd0, 113 WASM_OPCODE_REF_FUNC = 0xd2, 114 WASM_OPCODE_GC_PREFIX = 0xfb, 115 }; 116 117 // Opcodes in the GC-prefixed space (0xfb) 118 enum : unsigned { 119 WASM_OPCODE_STRUCT_NEW = 0x00, 120 WASM_OPCODE_STRUCT_NEW_DEFAULT = 0x01, 121 WASM_OPCODE_ARRAY_NEW = 0x06, 122 WASM_OPCODE_ARRAY_NEW_DEFAULT = 0x07, 123 WASM_OPCODE_ARRAY_NEW_FIXED = 0x08, 124 WASM_OPCODE_REF_I31 = 0x1c, 125 // any.convert_extern and extern.convert_any don't seem to be supported by 126 // Binaryen. 127 }; 128 129 // Opcodes used in synthetic functions. 130 enum : unsigned { 131 WASM_OPCODE_BLOCK = 0x02, 132 WASM_OPCODE_BR = 0x0c, 133 WASM_OPCODE_BR_TABLE = 0x0e, 134 WASM_OPCODE_RETURN = 0x0f, 135 WASM_OPCODE_DROP = 0x1a, 136 WASM_OPCODE_MISC_PREFIX = 0xfc, 137 WASM_OPCODE_MEMORY_INIT = 0x08, 138 WASM_OPCODE_MEMORY_FILL = 0x0b, 139 WASM_OPCODE_DATA_DROP = 0x09, 140 WASM_OPCODE_ATOMICS_PREFIX = 0xfe, 141 WASM_OPCODE_ATOMIC_NOTIFY = 0x00, 142 WASM_OPCODE_I32_ATOMIC_WAIT = 0x01, 143 WASM_OPCODE_I32_ATOMIC_STORE = 0x17, 144 WASM_OPCODE_I32_RMW_CMPXCHG = 0x48, 145 }; 146 147 // Sub-opcodes for catch clauses in a try_table instruction 148 enum : unsigned { 149 WASM_OPCODE_CATCH = 0x00, 150 WASM_OPCODE_CATCH_REF = 0x01, 151 WASM_OPCODE_CATCH_ALL = 0x02, 152 WASM_OPCODE_CATCH_ALL_REF = 0x03, 153 }; 154 155 enum : unsigned { 156 WASM_LIMITS_FLAG_NONE = 0x0, 157 WASM_LIMITS_FLAG_HAS_MAX = 0x1, 158 WASM_LIMITS_FLAG_IS_SHARED = 0x2, 159 WASM_LIMITS_FLAG_IS_64 = 0x4, 160 }; 161 162 enum : unsigned { 163 WASM_DATA_SEGMENT_IS_PASSIVE = 0x01, 164 WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02, 165 }; 166 167 enum : unsigned { 168 WASM_ELEM_SEGMENT_IS_PASSIVE = 0x01, 169 WASM_ELEM_SEGMENT_IS_DECLARATIVE = 0x02, // if passive == 1 170 WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0 171 WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04, 172 }; 173 const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC = 0x3; 174 175 // Feature policy prefixes used in the custom "target_features" section 176 enum : uint8_t { 177 WASM_FEATURE_PREFIX_USED = '+', 178 WASM_FEATURE_PREFIX_DISALLOWED = '-', 179 }; 180 181 // Kind codes used in the custom "name" section 182 enum : unsigned { 183 WASM_NAMES_MODULE = 0, 184 WASM_NAMES_FUNCTION = 1, 185 WASM_NAMES_LOCAL = 2, 186 WASM_NAMES_GLOBAL = 7, 187 WASM_NAMES_DATA_SEGMENT = 9, 188 }; 189 190 // Kind codes used in the custom "linking" section 191 enum : unsigned { 192 WASM_SEGMENT_INFO = 0x5, 193 WASM_INIT_FUNCS = 0x6, 194 WASM_COMDAT_INFO = 0x7, 195 WASM_SYMBOL_TABLE = 0x8, 196 }; 197 198 // Kind codes used in the custom "dylink" section 199 enum : unsigned { 200 WASM_DYLINK_MEM_INFO = 0x1, 201 WASM_DYLINK_NEEDED = 0x2, 202 WASM_DYLINK_EXPORT_INFO = 0x3, 203 WASM_DYLINK_IMPORT_INFO = 0x4, 204 }; 205 206 // Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO 207 enum : unsigned { 208 WASM_COMDAT_DATA = 0x0, 209 WASM_COMDAT_FUNCTION = 0x1, 210 // GLOBAL, TAG, and TABLE are in here but LLVM doesn't use them yet. 211 WASM_COMDAT_SECTION = 0x5, 212 }; 213 214 // Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE 215 enum WasmSymbolType : unsigned { 216 WASM_SYMBOL_TYPE_FUNCTION = 0x0, 217 WASM_SYMBOL_TYPE_DATA = 0x1, 218 WASM_SYMBOL_TYPE_GLOBAL = 0x2, 219 WASM_SYMBOL_TYPE_SECTION = 0x3, 220 WASM_SYMBOL_TYPE_TAG = 0x4, 221 WASM_SYMBOL_TYPE_TABLE = 0x5, 222 }; 223 224 enum WasmSegmentFlag : unsigned { 225 WASM_SEG_FLAG_STRINGS = 0x1, 226 WASM_SEG_FLAG_TLS = 0x2, 227 WASM_SEG_FLAG_RETAIN = 0x4, 228 }; 229 230 // Kinds of tag attributes. 231 enum WasmTagAttribute : uint8_t { 232 WASM_TAG_ATTRIBUTE_EXCEPTION = 0x0, 233 }; 234 235 const unsigned WASM_SYMBOL_BINDING_MASK = 0x3; 236 const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0xc; 237 238 const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0; 239 const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1; 240 const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2; 241 const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0; 242 const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4; 243 const unsigned WASM_SYMBOL_UNDEFINED = 0x10; 244 const unsigned WASM_SYMBOL_EXPORTED = 0x20; 245 const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40; 246 const unsigned WASM_SYMBOL_NO_STRIP = 0x80; 247 const unsigned WASM_SYMBOL_TLS = 0x100; 248 const unsigned WASM_SYMBOL_ABSOLUTE = 0x200; 249 250 #define WASM_RELOC(name, value) name = value, 251 252 enum : unsigned { 253 #include "WasmRelocs.def" 254 }; 255 256 #undef WASM_RELOC 257 258 struct WasmObjectHeader { 259 StringRef Magic; 260 uint32_t Version; 261 }; 262 263 // Subset of types that a value can have 264 enum class ValType { 265 I32 = WASM_TYPE_I32, 266 I64 = WASM_TYPE_I64, 267 F32 = WASM_TYPE_F32, 268 F64 = WASM_TYPE_F64, 269 V128 = WASM_TYPE_V128, 270 FUNCREF = WASM_TYPE_FUNCREF, 271 EXTERNREF = WASM_TYPE_EXTERNREF, 272 EXNREF = WASM_TYPE_EXNREF, 273 // Unmodeled value types include ref types with heap types other than 274 // func, extern or exn, and type-specialized funcrefs 275 OTHERREF = 0xff, 276 }; 277 278 struct WasmDylinkImportInfo { 279 StringRef Module; 280 StringRef Field; 281 uint32_t Flags; 282 }; 283 284 struct WasmDylinkExportInfo { 285 StringRef Name; 286 uint32_t Flags; 287 }; 288 289 struct WasmDylinkInfo { 290 uint32_t MemorySize; // Memory size in bytes 291 uint32_t MemoryAlignment; // P2 alignment of memory 292 uint32_t TableSize; // Table size in elements 293 uint32_t TableAlignment; // P2 alignment of table 294 std::vector<StringRef> Needed; // Shared library dependencies 295 std::vector<WasmDylinkImportInfo> ImportInfo; 296 std::vector<WasmDylinkExportInfo> ExportInfo; 297 }; 298 299 struct WasmProducerInfo { 300 std::vector<std::pair<std::string, std::string>> Languages; 301 std::vector<std::pair<std::string, std::string>> Tools; 302 std::vector<std::pair<std::string, std::string>> SDKs; 303 }; 304 305 struct WasmFeatureEntry { 306 uint8_t Prefix; 307 std::string Name; 308 }; 309 310 struct WasmExport { 311 StringRef Name; 312 uint8_t Kind; 313 uint32_t Index; 314 }; 315 316 struct WasmLimits { 317 uint8_t Flags; 318 uint64_t Minimum; 319 uint64_t Maximum; 320 }; 321 322 struct WasmTableType { 323 ValType ElemType; 324 WasmLimits Limits; 325 }; 326 327 struct WasmTable { 328 uint32_t Index; 329 WasmTableType Type; 330 StringRef SymbolName; // from the "linking" section 331 }; 332 333 struct WasmInitExprMVP { 334 uint8_t Opcode; 335 union { 336 int32_t Int32; 337 int64_t Int64; 338 uint32_t Float32; 339 uint64_t Float64; 340 uint32_t Global; 341 } Value; 342 }; 343 344 // Extended-const init exprs and exprs with GC types are not explicitly 345 // modeled, but the raw body of the expr is attached. 346 struct WasmInitExpr { 347 uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than 348 // one instruction) 349 WasmInitExprMVP Inst; 350 ArrayRef<uint8_t> Body; 351 }; 352 353 struct WasmGlobalType { 354 uint8_t Type; // TODO: make this a ValType? 355 bool Mutable; 356 }; 357 358 struct WasmGlobal { 359 uint32_t Index; 360 WasmGlobalType Type; 361 WasmInitExpr InitExpr; 362 StringRef SymbolName; // from the "linking" section 363 uint32_t Offset; // Offset of the definition in the binary's Global section 364 uint32_t Size; // Size of the definition in the binary's Global section 365 }; 366 367 struct WasmTag { 368 uint32_t Index; 369 uint32_t SigIndex; 370 StringRef SymbolName; // from the "linking" section 371 }; 372 373 struct WasmImport { 374 StringRef Module; 375 StringRef Field; 376 uint8_t Kind; 377 union { 378 uint32_t SigIndex; 379 WasmGlobalType Global; 380 WasmTableType Table; 381 WasmLimits Memory; 382 }; 383 }; 384 385 struct WasmLocalDecl { 386 uint8_t Type; 387 uint32_t Count; 388 }; 389 390 struct WasmFunction { 391 uint32_t Index; 392 uint32_t SigIndex; 393 std::vector<WasmLocalDecl> Locals; 394 ArrayRef<uint8_t> Body; 395 uint32_t CodeSectionOffset; 396 uint32_t Size; 397 uint32_t CodeOffset; // start of Locals and Body 398 std::optional<StringRef> ExportName; // from the "export" section 399 StringRef SymbolName; // from the "linking" section 400 StringRef DebugName; // from the "name" section 401 uint32_t Comdat; // from the "comdat info" section 402 }; 403 404 struct WasmDataSegment { 405 uint32_t InitFlags; 406 // Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX. 407 uint32_t MemoryIndex; 408 // Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0. 409 WasmInitExpr Offset; 410 411 ArrayRef<uint8_t> Content; 412 StringRef Name; // from the "segment info" section 413 uint32_t Alignment; 414 uint32_t LinkingFlags; 415 uint32_t Comdat; // from the "comdat info" section 416 }; 417 418 // 3 different element segment modes are encodable. This class is currently 419 // only used during decoding (see WasmElemSegment below). 420 enum class ElemSegmentMode { Active, Passive, Declarative }; 421 422 // Represents a Wasm element segment, with some limitations compared the spec: 423 // 1) Does not model passive or declarative segments (Segment will end up with 424 // an Offset field of i32.const 0) 425 // 2) Does not model init exprs (Segment will get an empty Functions list) 426 // 3) Does not model types other than basic funcref/externref/exnref (see 427 // ValType) 428 struct WasmElemSegment { 429 uint32_t Flags; 430 uint32_t TableNumber; 431 ValType ElemKind; 432 WasmInitExpr Offset; 433 std::vector<uint32_t> Functions; 434 }; 435 436 // Represents the location of a Wasm data symbol within a WasmDataSegment, as 437 // the index of the segment, and the offset and size within the segment. 438 struct WasmDataReference { 439 uint32_t Segment; 440 uint64_t Offset; 441 uint64_t Size; 442 }; 443 444 struct WasmRelocation { 445 uint8_t Type; // The type of the relocation. 446 uint32_t Index; // Index into either symbol or type index space. 447 uint64_t Offset; // Offset from the start of the section. 448 int64_t Addend; // A value to add to the symbol. 449 }; 450 451 struct WasmInitFunc { 452 uint32_t Priority; 453 uint32_t Symbol; 454 }; 455 456 struct WasmSymbolInfo { 457 StringRef Name; 458 uint8_t Kind; 459 uint32_t Flags; 460 // For undefined symbols the module of the import 461 std::optional<StringRef> ImportModule; 462 // For undefined symbols the name of the import 463 std::optional<StringRef> ImportName; 464 // For symbols to be exported from the final module 465 std::optional<StringRef> ExportName; 466 union { 467 // For function, table, or global symbols, the index in function, table, or 468 // global index space. 469 uint32_t ElementIndex; 470 // For a data symbols, the address of the data relative to segment. 471 WasmDataReference DataRef; 472 }; 473 }; 474 475 enum class NameType { 476 FUNCTION, 477 GLOBAL, 478 DATA_SEGMENT, 479 }; 480 481 struct WasmDebugName { 482 NameType Type; 483 uint32_t Index; 484 StringRef Name; 485 }; 486 487 // Info from the linking metadata section of a wasm object file. 488 struct WasmLinkingData { 489 uint32_t Version; 490 std::vector<WasmInitFunc> InitFunctions; 491 std::vector<StringRef> Comdats; 492 // The linking section also contains a symbol table. This info (represented 493 // in a WasmSymbolInfo struct) is stored inside the WasmSymbol object instead 494 // of in this structure; this allows vectors of WasmSymbols and 495 // WasmLinkingDatas to be reallocated. 496 }; 497 498 struct WasmSignature { 499 SmallVector<ValType, 1> Returns; 500 SmallVector<ValType, 4> Params; 501 // LLVM can parse types other than functions encoded in the type section, 502 // but does not actually model them. Instead a placeholder signature is 503 // created in the Object's signature list. 504 enum { Function, Tag, Placeholder } Kind = Function; 505 // Support empty and tombstone instances, needed by DenseMap. 506 enum { Plain, Empty, Tombstone } State = Plain; 507 508 WasmSignature(SmallVector<ValType, 1> &&InReturns, 509 SmallVector<ValType, 4> &&InParams) 510 : Returns(InReturns), Params(InParams) {} 511 WasmSignature() = default; 512 }; 513 514 // Useful comparison operators 515 inline bool operator==(const WasmSignature &LHS, const WasmSignature &RHS) { 516 return LHS.State == RHS.State && LHS.Returns == RHS.Returns && 517 LHS.Params == RHS.Params; 518 } 519 520 inline bool operator!=(const WasmSignature &LHS, const WasmSignature &RHS) { 521 return !(LHS == RHS); 522 } 523 524 inline bool operator==(const WasmGlobalType &LHS, const WasmGlobalType &RHS) { 525 return LHS.Type == RHS.Type && LHS.Mutable == RHS.Mutable; 526 } 527 528 inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) { 529 return !(LHS == RHS); 530 } 531 532 inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) { 533 return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum && 534 (LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum 535 : true); 536 } 537 538 inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) { 539 return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits; 540 } 541 542 llvm::StringRef toString(WasmSymbolType type); 543 llvm::StringRef relocTypetoString(uint32_t type); 544 llvm::StringRef sectionTypeToString(uint32_t type); 545 bool relocTypeHasAddend(uint32_t type); 546 547 } // end namespace wasm 548 } // end namespace llvm 549 550 #endif 551