1f3acb54cSRiver Riddle //===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===// 2f3acb54cSRiver Riddle // 3f3acb54cSRiver Riddle // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4f3acb54cSRiver Riddle // See https://llvm.org/LICENSE.txt for license information. 5f3acb54cSRiver Riddle // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6f3acb54cSRiver Riddle // 7f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 8f3acb54cSRiver Riddle 9f3acb54cSRiver Riddle #include "mlir/Bytecode/BytecodeWriter.h" 10f3acb54cSRiver Riddle #include "IRNumbering.h" 1102c2ecb9SRiver Riddle #include "mlir/Bytecode/BytecodeImplementation.h" 12660f714eSMehdi Amini #include "mlir/Bytecode/BytecodeOpInterface.h" 1361278191SMatteo Franciolini #include "mlir/Bytecode/Encoding.h" 14660f714eSMehdi Amini #include "mlir/IR/Attributes.h" 15660f714eSMehdi Amini #include "mlir/IR/Diagnostics.h" 16f3acb54cSRiver Riddle #include "mlir/IR/OpImplementation.h" 17660f714eSMehdi Amini #include "llvm/ADT/ArrayRef.h" 18f3acb54cSRiver Riddle #include "llvm/ADT/CachedHashString.h" 19f3acb54cSRiver Riddle #include "llvm/ADT/MapVector.h" 20660f714eSMehdi Amini #include "llvm/ADT/SmallVector.h" 21*d1578848SKevin Gleason #include "llvm/Support/Debug.h" 22b86a1321SMehdi Amini #include "llvm/Support/Endian.h" 23bff6a429SMatteo Franciolini #include "llvm/Support/raw_ostream.h" 24660f714eSMehdi Amini #include <optional> 25f3acb54cSRiver Riddle 26f3acb54cSRiver Riddle #define DEBUG_TYPE "mlir-bytecode-writer" 27f3acb54cSRiver Riddle 28f3acb54cSRiver Riddle using namespace mlir; 29f3acb54cSRiver Riddle using namespace mlir::bytecode::detail; 30f3acb54cSRiver Riddle 31f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 326ab2bcffSRiver Riddle // BytecodeWriterConfig 336ab2bcffSRiver Riddle //===----------------------------------------------------------------------===// 346ab2bcffSRiver Riddle 356ab2bcffSRiver Riddle struct BytecodeWriterConfig::Impl { 366ab2bcffSRiver Riddle Impl(StringRef producer) : producer(producer) {} 376ab2bcffSRiver Riddle 380610e2f6SJacques Pienaar /// Version to use when writing. 390610e2f6SJacques Pienaar /// Note: This only differs from kVersion if a specific version is set. 400610e2f6SJacques Pienaar int64_t bytecodeVersion = bytecode::kVersion; 410610e2f6SJacques Pienaar 424488f493SMatteo Franciolini /// A flag specifying whether to elide emission of resources into the bytecode 434488f493SMatteo Franciolini /// file. 444488f493SMatteo Franciolini bool shouldElideResourceData = false; 454488f493SMatteo Franciolini 467ad9e9dcSMatteo Franciolini /// A map containing dialect version information for each dialect to emit. 477ad9e9dcSMatteo Franciolini llvm::StringMap<std::unique_ptr<DialectVersion>> dialectVersionMap; 487ad9e9dcSMatteo Franciolini 496ab2bcffSRiver Riddle /// The producer of the bytecode. 506ab2bcffSRiver Riddle StringRef producer; 516ab2bcffSRiver Riddle 52bff6a429SMatteo Franciolini /// Printer callbacks used to emit custom type and attribute encodings. 53bff6a429SMatteo Franciolini llvm::SmallVector<std::unique_ptr<AttrTypeBytecodeWriter<Attribute>>> 54bff6a429SMatteo Franciolini attributeWriterCallbacks; 55bff6a429SMatteo Franciolini llvm::SmallVector<std::unique_ptr<AttrTypeBytecodeWriter<Type>>> 56bff6a429SMatteo Franciolini typeWriterCallbacks; 57bff6a429SMatteo Franciolini 586ab2bcffSRiver Riddle /// A collection of non-dialect resource printers. 596ab2bcffSRiver Riddle SmallVector<std::unique_ptr<AsmResourcePrinter>> externalResourcePrinters; 606ab2bcffSRiver Riddle }; 616ab2bcffSRiver Riddle 626ab2bcffSRiver Riddle BytecodeWriterConfig::BytecodeWriterConfig(StringRef producer) 636ab2bcffSRiver Riddle : impl(std::make_unique<Impl>(producer)) {} 6434300ee3SRiver Riddle BytecodeWriterConfig::BytecodeWriterConfig(FallbackAsmResourceMap &map, 6534300ee3SRiver Riddle StringRef producer) 6634300ee3SRiver Riddle : BytecodeWriterConfig(producer) { 6734300ee3SRiver Riddle attachFallbackResourcePrinter(map); 6834300ee3SRiver Riddle } 696ab2bcffSRiver Riddle BytecodeWriterConfig::~BytecodeWriterConfig() = default; 706ab2bcffSRiver Riddle 71bff6a429SMatteo Franciolini ArrayRef<std::unique_ptr<AttrTypeBytecodeWriter<Attribute>>> 72bff6a429SMatteo Franciolini BytecodeWriterConfig::getAttributeWriterCallbacks() const { 73bff6a429SMatteo Franciolini return impl->attributeWriterCallbacks; 74bff6a429SMatteo Franciolini } 75bff6a429SMatteo Franciolini 76bff6a429SMatteo Franciolini ArrayRef<std::unique_ptr<AttrTypeBytecodeWriter<Type>>> 77bff6a429SMatteo Franciolini BytecodeWriterConfig::getTypeWriterCallbacks() const { 78bff6a429SMatteo Franciolini return impl->typeWriterCallbacks; 79bff6a429SMatteo Franciolini } 80bff6a429SMatteo Franciolini 81bff6a429SMatteo Franciolini void BytecodeWriterConfig::attachAttributeCallback( 82bff6a429SMatteo Franciolini std::unique_ptr<AttrTypeBytecodeWriter<Attribute>> callback) { 83bff6a429SMatteo Franciolini impl->attributeWriterCallbacks.emplace_back(std::move(callback)); 84bff6a429SMatteo Franciolini } 85bff6a429SMatteo Franciolini 86bff6a429SMatteo Franciolini void BytecodeWriterConfig::attachTypeCallback( 87bff6a429SMatteo Franciolini std::unique_ptr<AttrTypeBytecodeWriter<Type>> callback) { 88bff6a429SMatteo Franciolini impl->typeWriterCallbacks.emplace_back(std::move(callback)); 89bff6a429SMatteo Franciolini } 90bff6a429SMatteo Franciolini 916ab2bcffSRiver Riddle void BytecodeWriterConfig::attachResourcePrinter( 926ab2bcffSRiver Riddle std::unique_ptr<AsmResourcePrinter> printer) { 936ab2bcffSRiver Riddle impl->externalResourcePrinters.emplace_back(std::move(printer)); 946ab2bcffSRiver Riddle } 956ab2bcffSRiver Riddle 964488f493SMatteo Franciolini void BytecodeWriterConfig::setElideResourceDataFlag( 974488f493SMatteo Franciolini bool shouldElideResourceData) { 984488f493SMatteo Franciolini impl->shouldElideResourceData = shouldElideResourceData; 994488f493SMatteo Franciolini } 1004488f493SMatteo Franciolini 1010610e2f6SJacques Pienaar void BytecodeWriterConfig::setDesiredBytecodeVersion(int64_t bytecodeVersion) { 1020ee4875dSKevin Gleason impl->bytecodeVersion = bytecodeVersion; 1030610e2f6SJacques Pienaar } 1040610e2f6SJacques Pienaar 105660f714eSMehdi Amini int64_t BytecodeWriterConfig::getDesiredBytecodeVersion() const { 106660f714eSMehdi Amini return impl->bytecodeVersion; 107660f714eSMehdi Amini } 108660f714eSMehdi Amini 1097ad9e9dcSMatteo Franciolini llvm::StringMap<std::unique_ptr<DialectVersion>> & 1107ad9e9dcSMatteo Franciolini BytecodeWriterConfig::getDialectVersionMap() const { 1117ad9e9dcSMatteo Franciolini return impl->dialectVersionMap; 1127ad9e9dcSMatteo Franciolini } 1137ad9e9dcSMatteo Franciolini 1147ad9e9dcSMatteo Franciolini void BytecodeWriterConfig::setDialectVersion( 1157ad9e9dcSMatteo Franciolini llvm::StringRef dialectName, 1167ad9e9dcSMatteo Franciolini std::unique_ptr<DialectVersion> dialectVersion) const { 1177ad9e9dcSMatteo Franciolini assert(!impl->dialectVersionMap.contains(dialectName) && 1187ad9e9dcSMatteo Franciolini "cannot override a previously set dialect version"); 1197ad9e9dcSMatteo Franciolini impl->dialectVersionMap.insert({dialectName, std::move(dialectVersion)}); 1207ad9e9dcSMatteo Franciolini } 1217ad9e9dcSMatteo Franciolini 1226ab2bcffSRiver Riddle //===----------------------------------------------------------------------===// 123f3acb54cSRiver Riddle // EncodingEmitter 124f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 125f3acb54cSRiver Riddle 126f3acb54cSRiver Riddle namespace { 127f3acb54cSRiver Riddle /// This class functions as the underlying encoding emitter for the bytecode 128f3acb54cSRiver Riddle /// writer. This class is a bit different compared to other types of encoders; 129f3acb54cSRiver Riddle /// it does not use a single buffer, but instead may contain several buffers 130f3acb54cSRiver Riddle /// (some owned by the writer, and some not) that get concatted during the final 131f3acb54cSRiver Riddle /// emission. 132f3acb54cSRiver Riddle class EncodingEmitter { 133f3acb54cSRiver Riddle public: 134f3acb54cSRiver Riddle EncodingEmitter() = default; 135f3acb54cSRiver Riddle EncodingEmitter(const EncodingEmitter &) = delete; 136f3acb54cSRiver Riddle EncodingEmitter &operator=(const EncodingEmitter &) = delete; 137f3acb54cSRiver Riddle 138f3acb54cSRiver Riddle /// Write the current contents to the provided stream. 139f3acb54cSRiver Riddle void writeTo(raw_ostream &os) const; 140f3acb54cSRiver Riddle 141f3acb54cSRiver Riddle /// Return the current size of the encoded buffer. 142f3acb54cSRiver Riddle size_t size() const { return prevResultSize + currentResult.size(); } 143f3acb54cSRiver Riddle 144f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 145f3acb54cSRiver Riddle // Emission 146f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 147f3acb54cSRiver Riddle 148f3acb54cSRiver Riddle /// Backpatch a byte in the result buffer at the given offset. 149*d1578848SKevin Gleason void patchByte(uint64_t offset, uint8_t value, StringLiteral desc) { 150*d1578848SKevin Gleason LLVM_DEBUG(llvm::dbgs() << "patchByte(" << offset << ',' << uint64_t(value) 151*d1578848SKevin Gleason << ")\t" << desc << '\n'); 152f3acb54cSRiver Riddle assert(offset < size() && offset >= prevResultSize && 153f3acb54cSRiver Riddle "cannot patch previously emitted data"); 154f3acb54cSRiver Riddle currentResult[offset - prevResultSize] = value; 155f3acb54cSRiver Riddle } 156f3acb54cSRiver Riddle 1576ab2bcffSRiver Riddle /// Emit the provided blob of data, which is owned by the caller and is 1586ab2bcffSRiver Riddle /// guaranteed to not die before the end of the bytecode process. 159*d1578848SKevin Gleason void emitOwnedBlob(ArrayRef<uint8_t> data, StringLiteral desc) { 160*d1578848SKevin Gleason LLVM_DEBUG(llvm::dbgs() 161*d1578848SKevin Gleason << "emitOwnedBlob(" << data.size() << "b)\t" << desc << '\n'); 1626ab2bcffSRiver Riddle // Push the current buffer before adding the provided data. 1636ab2bcffSRiver Riddle appendResult(std::move(currentResult)); 1646ab2bcffSRiver Riddle appendOwnedResult(data); 1656ab2bcffSRiver Riddle } 1666ab2bcffSRiver Riddle 1676ab2bcffSRiver Riddle /// Emit the provided blob of data that has the given alignment, which is 1686ab2bcffSRiver Riddle /// owned by the caller and is guaranteed to not die before the end of the 1696ab2bcffSRiver Riddle /// bytecode process. The alignment value is also encoded, making it available 1706ab2bcffSRiver Riddle /// on load. 171*d1578848SKevin Gleason void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment, 172*d1578848SKevin Gleason StringLiteral desc) { 173*d1578848SKevin Gleason emitVarInt(alignment, desc); 174*d1578848SKevin Gleason emitVarInt(data.size(), desc); 1756ab2bcffSRiver Riddle 1766ab2bcffSRiver Riddle alignTo(alignment); 177*d1578848SKevin Gleason emitOwnedBlob(data, desc); 1786ab2bcffSRiver Riddle } 179*d1578848SKevin Gleason void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment, 180*d1578848SKevin Gleason StringLiteral desc) { 1816ab2bcffSRiver Riddle ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()), 1826ab2bcffSRiver Riddle data.size()); 183*d1578848SKevin Gleason emitOwnedBlobAndAlignment(castedData, alignment, desc); 1846ab2bcffSRiver Riddle } 1856ab2bcffSRiver Riddle 1866ab2bcffSRiver Riddle /// Align the emitter to the given alignment. 1876ab2bcffSRiver Riddle void alignTo(unsigned alignment) { 1886ab2bcffSRiver Riddle if (alignment < 2) 1896ab2bcffSRiver Riddle return; 1906ab2bcffSRiver Riddle assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment"); 1916ab2bcffSRiver Riddle 1926ab2bcffSRiver Riddle // Check to see if we need to emit any padding bytes to meet the desired 1936ab2bcffSRiver Riddle // alignment. 1946ab2bcffSRiver Riddle size_t curOffset = size(); 1956ab2bcffSRiver Riddle size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset; 1966ab2bcffSRiver Riddle while (paddingSize--) 197*d1578848SKevin Gleason emitByte(bytecode::kAlignmentByte, "alignment byte"); 1986ab2bcffSRiver Riddle 1996ab2bcffSRiver Riddle // Keep track of the maximum required alignment. 2006ab2bcffSRiver Riddle requiredAlignment = std::max(requiredAlignment, alignment); 2016ab2bcffSRiver Riddle } 2026ab2bcffSRiver Riddle 203f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 204f3acb54cSRiver Riddle // Integer Emission 205f3acb54cSRiver Riddle 206f3acb54cSRiver Riddle /// Emit a single byte. 207f3acb54cSRiver Riddle template <typename T> 208*d1578848SKevin Gleason void emitByte(T byte, StringLiteral desc) { 209*d1578848SKevin Gleason LLVM_DEBUG(llvm::dbgs() 210*d1578848SKevin Gleason << "emitByte(" << uint64_t(byte) << ")\t" << desc << '\n'); 211f3acb54cSRiver Riddle currentResult.push_back(static_cast<uint8_t>(byte)); 212f3acb54cSRiver Riddle } 213f3acb54cSRiver Riddle 214f3acb54cSRiver Riddle /// Emit a range of bytes. 215*d1578848SKevin Gleason void emitBytes(ArrayRef<uint8_t> bytes, StringLiteral desc) { 216*d1578848SKevin Gleason LLVM_DEBUG(llvm::dbgs() 217*d1578848SKevin Gleason << "emitBytes(" << bytes.size() << "b)\t" << desc << '\n'); 218f3acb54cSRiver Riddle llvm::append_range(currentResult, bytes); 219f3acb54cSRiver Riddle } 220f3acb54cSRiver Riddle 221f3acb54cSRiver Riddle /// Emit a variable length integer. The first encoded byte contains a prefix 222f3acb54cSRiver Riddle /// in the low bits indicating the encoded length of the value. This length 223f3acb54cSRiver Riddle /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits 224f3acb54cSRiver Riddle /// indicate the number of _additional_ bytes (not including the prefix byte). 225f3acb54cSRiver Riddle /// All remaining bits in the first byte, along with all of the bits in 226f3acb54cSRiver Riddle /// additional bytes, provide the value of the integer encoded in 227f3acb54cSRiver Riddle /// little-endian order. 228*d1578848SKevin Gleason void emitVarInt(uint64_t value, StringLiteral desc) { 229*d1578848SKevin Gleason LLVM_DEBUG(llvm::dbgs() << "emitVarInt(" << value << ")\t" << desc << '\n'); 230*d1578848SKevin Gleason 231f3acb54cSRiver Riddle // In the most common case, the value can be represented in a single byte. 232f3acb54cSRiver Riddle // Given how hot this case is, explicitly handle that here. 233f3acb54cSRiver Riddle if ((value >> 7) == 0) 234*d1578848SKevin Gleason return emitByte((value << 1) | 0x1, desc); 235*d1578848SKevin Gleason emitMultiByteVarInt(value, desc); 236f3acb54cSRiver Riddle } 237f3acb54cSRiver Riddle 2382f90764cSRiver Riddle /// Emit a signed variable length integer. Signed varints are encoded using 2392f90764cSRiver Riddle /// a varint with zigzag encoding, meaning that we use the low bit of the 2402f90764cSRiver Riddle /// value to indicate the sign of the value. This allows for more efficient 2412f90764cSRiver Riddle /// encoding of negative values by limiting the number of active bits 242*d1578848SKevin Gleason void emitSignedVarInt(uint64_t value, StringLiteral desc) { 243*d1578848SKevin Gleason emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63), desc); 2442f90764cSRiver Riddle } 2452f90764cSRiver Riddle 246f3acb54cSRiver Riddle /// Emit a variable length integer whose low bit is used to encode the 247f3acb54cSRiver Riddle /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0). 248*d1578848SKevin Gleason void emitVarIntWithFlag(uint64_t value, bool flag, StringLiteral desc) { 249*d1578848SKevin Gleason emitVarInt((value << 1) | (flag ? 1 : 0), desc); 250f3acb54cSRiver Riddle } 251f3acb54cSRiver Riddle 252f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 253f3acb54cSRiver Riddle // String Emission 254f3acb54cSRiver Riddle 255f3acb54cSRiver Riddle /// Emit the given string as a nul terminated string. 256*d1578848SKevin Gleason void emitNulTerminatedString(StringRef str, StringLiteral desc) { 257*d1578848SKevin Gleason emitString(str, desc); 258*d1578848SKevin Gleason emitByte(0, "null terminator"); 259f3acb54cSRiver Riddle } 260f3acb54cSRiver Riddle 261f3acb54cSRiver Riddle /// Emit the given string without a nul terminator. 262*d1578848SKevin Gleason void emitString(StringRef str, StringLiteral desc) { 263*d1578848SKevin Gleason emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()}, 264*d1578848SKevin Gleason desc); 265f3acb54cSRiver Riddle } 266f3acb54cSRiver Riddle 267f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 268f3acb54cSRiver Riddle // Section Emission 269f3acb54cSRiver Riddle 270f3acb54cSRiver Riddle /// Emit a nested section of the given code, whose contents are encoded in the 271f3acb54cSRiver Riddle /// provided emitter. 272f3acb54cSRiver Riddle void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) { 2736ab2bcffSRiver Riddle // Emit the section code and length. The high bit of the code is used to 2746ab2bcffSRiver Riddle // indicate whether the section alignment is present, so save an offset to 2756ab2bcffSRiver Riddle // it. 2766ab2bcffSRiver Riddle uint64_t codeOffset = currentResult.size(); 277*d1578848SKevin Gleason emitByte(code, "section code"); 278*d1578848SKevin Gleason emitVarInt(emitter.size(), "section size"); 279f3acb54cSRiver Riddle 2806ab2bcffSRiver Riddle // Integrate the alignment of the section into this emitter if necessary. 2816ab2bcffSRiver Riddle unsigned emitterAlign = emitter.requiredAlignment; 2826ab2bcffSRiver Riddle if (emitterAlign > 1) { 2836ab2bcffSRiver Riddle if (size() & (emitterAlign - 1)) { 284*d1578848SKevin Gleason emitVarInt(emitterAlign, "section alignment"); 2856ab2bcffSRiver Riddle alignTo(emitterAlign); 2866ab2bcffSRiver Riddle 2876ab2bcffSRiver Riddle // Indicate that we needed to align the section, the high bit of the 2886ab2bcffSRiver Riddle // code field is used for this. 2896ab2bcffSRiver Riddle currentResult[codeOffset] |= 0b10000000; 2906ab2bcffSRiver Riddle } else { 2916ab2bcffSRiver Riddle // Otherwise, if we happen to be at a compatible offset, we just 2926ab2bcffSRiver Riddle // remember that we need this alignment. 2936ab2bcffSRiver Riddle requiredAlignment = std::max(requiredAlignment, emitterAlign); 2946ab2bcffSRiver Riddle } 2956ab2bcffSRiver Riddle } 2966ab2bcffSRiver Riddle 297f3acb54cSRiver Riddle // Push our current buffer and then merge the provided section body into 298f3acb54cSRiver Riddle // ours. 299f3acb54cSRiver Riddle appendResult(std::move(currentResult)); 300f3acb54cSRiver Riddle for (std::vector<uint8_t> &result : emitter.prevResultStorage) 3016ab2bcffSRiver Riddle prevResultStorage.push_back(std::move(result)); 3026ab2bcffSRiver Riddle llvm::append_range(prevResultList, emitter.prevResultList); 3036ab2bcffSRiver Riddle prevResultSize += emitter.prevResultSize; 304f3acb54cSRiver Riddle appendResult(std::move(emitter.currentResult)); 305f3acb54cSRiver Riddle } 306f3acb54cSRiver Riddle 307f3acb54cSRiver Riddle private: 308f3acb54cSRiver Riddle /// Emit the given value using a variable width encoding. This method is a 309f3acb54cSRiver Riddle /// fallback when the number of bytes needed to encode the value is greater 310f3acb54cSRiver Riddle /// than 1. We mark it noinline here so that the single byte hot path isn't 311f3acb54cSRiver Riddle /// pessimized. 312*d1578848SKevin Gleason LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value, 313*d1578848SKevin Gleason StringLiteral desc); 314f3acb54cSRiver Riddle 315f3acb54cSRiver Riddle /// Append a new result buffer to the current contents. 316f3acb54cSRiver Riddle void appendResult(std::vector<uint8_t> &&result) { 3176ab2bcffSRiver Riddle if (result.empty()) 3186ab2bcffSRiver Riddle return; 319f3acb54cSRiver Riddle prevResultStorage.emplace_back(std::move(result)); 3206ab2bcffSRiver Riddle appendOwnedResult(prevResultStorage.back()); 3216ab2bcffSRiver Riddle } 3226ab2bcffSRiver Riddle void appendOwnedResult(ArrayRef<uint8_t> result) { 3236ab2bcffSRiver Riddle if (result.empty()) 3246ab2bcffSRiver Riddle return; 3256ab2bcffSRiver Riddle prevResultSize += result.size(); 3266ab2bcffSRiver Riddle prevResultList.emplace_back(result); 327f3acb54cSRiver Riddle } 328f3acb54cSRiver Riddle 329f3acb54cSRiver Riddle /// The result of the emitter currently being built. We refrain from building 330f3acb54cSRiver Riddle /// a single buffer to simplify emitting sections, large data, and more. The 331f3acb54cSRiver Riddle /// result is thus represented using multiple distinct buffers, some of which 332f3acb54cSRiver Riddle /// we own (via prevResultStorage), and some of which are just pointers into 333f3acb54cSRiver Riddle /// externally owned buffers. 334f3acb54cSRiver Riddle std::vector<uint8_t> currentResult; 335f3acb54cSRiver Riddle std::vector<ArrayRef<uint8_t>> prevResultList; 336f3acb54cSRiver Riddle std::vector<std::vector<uint8_t>> prevResultStorage; 337f3acb54cSRiver Riddle 338f3acb54cSRiver Riddle /// An up-to-date total size of all of the buffers within `prevResultList`. 339f3acb54cSRiver Riddle /// This enables O(1) size checks of the current encoding. 340f3acb54cSRiver Riddle size_t prevResultSize = 0; 3416ab2bcffSRiver Riddle 3426ab2bcffSRiver Riddle /// The highest required alignment for the start of this section. 3436ab2bcffSRiver Riddle unsigned requiredAlignment = 1; 344f3acb54cSRiver Riddle }; 345f3acb54cSRiver Riddle 3460e0b6070SMatteo Franciolini //===----------------------------------------------------------------------===// 3470e0b6070SMatteo Franciolini // StringSectionBuilder 3480e0b6070SMatteo Franciolini //===----------------------------------------------------------------------===// 3490e0b6070SMatteo Franciolini 3500e0b6070SMatteo Franciolini namespace { 3510e0b6070SMatteo Franciolini /// This class is used to simplify the process of emitting the string section. 3520e0b6070SMatteo Franciolini class StringSectionBuilder { 3530e0b6070SMatteo Franciolini public: 3540e0b6070SMatteo Franciolini /// Add the given string to the string section, and return the index of the 3550e0b6070SMatteo Franciolini /// string within the section. 3560e0b6070SMatteo Franciolini size_t insert(StringRef str) { 3570e0b6070SMatteo Franciolini auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()}); 3580e0b6070SMatteo Franciolini return it.first->second; 3590e0b6070SMatteo Franciolini } 3600e0b6070SMatteo Franciolini 3610e0b6070SMatteo Franciolini /// Write the current set of strings to the given emitter. 3620e0b6070SMatteo Franciolini void write(EncodingEmitter &emitter) { 363*d1578848SKevin Gleason emitter.emitVarInt(strings.size(), "string section size"); 3640e0b6070SMatteo Franciolini 3650e0b6070SMatteo Franciolini // Emit the sizes in reverse order, so that we don't need to backpatch an 3660e0b6070SMatteo Franciolini // offset to the string data or have a separate section. 3670e0b6070SMatteo Franciolini for (const auto &it : llvm::reverse(strings)) 368*d1578848SKevin Gleason emitter.emitVarInt(it.first.size() + 1, "string size"); 3690e0b6070SMatteo Franciolini // Emit the string data itself. 3700e0b6070SMatteo Franciolini for (const auto &it : strings) 371*d1578848SKevin Gleason emitter.emitNulTerminatedString(it.first.val(), "string"); 3720e0b6070SMatteo Franciolini } 3730e0b6070SMatteo Franciolini 3740e0b6070SMatteo Franciolini private: 3750e0b6070SMatteo Franciolini /// A set of strings referenced within the bytecode. The value of the map is 3760e0b6070SMatteo Franciolini /// unused. 3770e0b6070SMatteo Franciolini llvm::MapVector<llvm::CachedHashStringRef, size_t> strings; 3780e0b6070SMatteo Franciolini }; 3790e0b6070SMatteo Franciolini } // namespace 3800e0b6070SMatteo Franciolini 3810e0b6070SMatteo Franciolini class DialectWriter : public DialectBytecodeWriter { 3827ad9e9dcSMatteo Franciolini using DialectVersionMapT = llvm::StringMap<std::unique_ptr<DialectVersion>>; 3837ad9e9dcSMatteo Franciolini 3840e0b6070SMatteo Franciolini public: 3850610e2f6SJacques Pienaar DialectWriter(int64_t bytecodeVersion, EncodingEmitter &emitter, 3860610e2f6SJacques Pienaar IRNumberingState &numberingState, 3877ad9e9dcSMatteo Franciolini StringSectionBuilder &stringSection, 3887ad9e9dcSMatteo Franciolini const DialectVersionMapT &dialectVersionMap) 3892ef44aa4SMehdi Amini : bytecodeVersion(bytecodeVersion), emitter(emitter), 3907ad9e9dcSMatteo Franciolini numberingState(numberingState), stringSection(stringSection), 3917ad9e9dcSMatteo Franciolini dialectVersionMap(dialectVersionMap) {} 3920e0b6070SMatteo Franciolini 3930e0b6070SMatteo Franciolini //===--------------------------------------------------------------------===// 3940e0b6070SMatteo Franciolini // IR 3950e0b6070SMatteo Franciolini //===--------------------------------------------------------------------===// 3960e0b6070SMatteo Franciolini 3970e0b6070SMatteo Franciolini void writeAttribute(Attribute attr) override { 398*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(attr), "dialect attr"); 3990e0b6070SMatteo Franciolini } 400660f714eSMehdi Amini void writeOptionalAttribute(Attribute attr) override { 401660f714eSMehdi Amini if (!attr) { 402*d1578848SKevin Gleason emitter.emitVarInt(0, "dialect optional attr none"); 403660f714eSMehdi Amini return; 404660f714eSMehdi Amini } 405*d1578848SKevin Gleason emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true, 406*d1578848SKevin Gleason "dialect optional attr"); 407660f714eSMehdi Amini } 408660f714eSMehdi Amini 4090e0b6070SMatteo Franciolini void writeType(Type type) override { 410*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(type), "dialect type"); 4110e0b6070SMatteo Franciolini } 4120e0b6070SMatteo Franciolini 4130e0b6070SMatteo Franciolini void writeResourceHandle(const AsmDialectResourceHandle &resource) override { 414*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(resource), "dialect resource"); 4150e0b6070SMatteo Franciolini } 4160e0b6070SMatteo Franciolini 4170e0b6070SMatteo Franciolini //===--------------------------------------------------------------------===// 4180e0b6070SMatteo Franciolini // Primitives 4190e0b6070SMatteo Franciolini //===--------------------------------------------------------------------===// 4200e0b6070SMatteo Franciolini 421*d1578848SKevin Gleason void writeVarInt(uint64_t value) override { 422*d1578848SKevin Gleason emitter.emitVarInt(value, "dialect writer"); 423*d1578848SKevin Gleason } 4240e0b6070SMatteo Franciolini 4250e0b6070SMatteo Franciolini void writeSignedVarInt(int64_t value) override { 426*d1578848SKevin Gleason emitter.emitSignedVarInt(value, "dialect writer"); 4270e0b6070SMatteo Franciolini } 4280e0b6070SMatteo Franciolini 4290e0b6070SMatteo Franciolini void writeAPIntWithKnownWidth(const APInt &value) override { 4300e0b6070SMatteo Franciolini size_t bitWidth = value.getBitWidth(); 4310e0b6070SMatteo Franciolini 4320e0b6070SMatteo Franciolini // If the value is a single byte, just emit it directly without going 4330e0b6070SMatteo Franciolini // through a varint. 4340e0b6070SMatteo Franciolini if (bitWidth <= 8) 435*d1578848SKevin Gleason return emitter.emitByte(value.getLimitedValue(), "dialect APInt"); 4360e0b6070SMatteo Franciolini 4370e0b6070SMatteo Franciolini // If the value fits within a single varint, emit it directly. 4380e0b6070SMatteo Franciolini if (bitWidth <= 64) 439*d1578848SKevin Gleason return emitter.emitSignedVarInt(value.getLimitedValue(), "dialect APInt"); 4400e0b6070SMatteo Franciolini 4410e0b6070SMatteo Franciolini // Otherwise, we need to encode a variable number of active words. We use 4420e0b6070SMatteo Franciolini // active words instead of the number of total words under the observation 4430e0b6070SMatteo Franciolini // that smaller values will be more common. 4440e0b6070SMatteo Franciolini unsigned numActiveWords = value.getActiveWords(); 445*d1578848SKevin Gleason emitter.emitVarInt(numActiveWords, "dialect APInt word count"); 4460e0b6070SMatteo Franciolini 4470e0b6070SMatteo Franciolini const uint64_t *rawValueData = value.getRawData(); 4480e0b6070SMatteo Franciolini for (unsigned i = 0; i < numActiveWords; ++i) 449*d1578848SKevin Gleason emitter.emitSignedVarInt(rawValueData[i], "dialect APInt word"); 4500e0b6070SMatteo Franciolini } 4510e0b6070SMatteo Franciolini 4520e0b6070SMatteo Franciolini void writeAPFloatWithKnownSemantics(const APFloat &value) override { 4530e0b6070SMatteo Franciolini writeAPIntWithKnownWidth(value.bitcastToAPInt()); 4540e0b6070SMatteo Franciolini } 4550e0b6070SMatteo Franciolini 4560e0b6070SMatteo Franciolini void writeOwnedString(StringRef str) override { 457*d1578848SKevin Gleason emitter.emitVarInt(stringSection.insert(str), "dialect string"); 4580e0b6070SMatteo Franciolini } 4590e0b6070SMatteo Franciolini 4600e0b6070SMatteo Franciolini void writeOwnedBlob(ArrayRef<char> blob) override { 461*d1578848SKevin Gleason emitter.emitVarInt(blob.size(), "dialect blob"); 462*d1578848SKevin Gleason emitter.emitOwnedBlob( 463*d1578848SKevin Gleason ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()), 464*d1578848SKevin Gleason blob.size()), 465*d1578848SKevin Gleason "dialect blob"); 4660e0b6070SMatteo Franciolini } 4670e0b6070SMatteo Franciolini 468*d1578848SKevin Gleason void writeOwnedBool(bool value) override { 469*d1578848SKevin Gleason emitter.emitByte(value, "dialect bool"); 470*d1578848SKevin Gleason } 47179c83e12SAndrzej Warzynski 4720610e2f6SJacques Pienaar int64_t getBytecodeVersion() const override { return bytecodeVersion; } 4730610e2f6SJacques Pienaar 4747ad9e9dcSMatteo Franciolini FailureOr<const DialectVersion *> 4757ad9e9dcSMatteo Franciolini getDialectVersion(StringRef dialectName) const override { 4767ad9e9dcSMatteo Franciolini auto dialectEntry = dialectVersionMap.find(dialectName); 4777ad9e9dcSMatteo Franciolini if (dialectEntry == dialectVersionMap.end()) 4787ad9e9dcSMatteo Franciolini return failure(); 4797ad9e9dcSMatteo Franciolini return dialectEntry->getValue().get(); 4807ad9e9dcSMatteo Franciolini } 4817ad9e9dcSMatteo Franciolini 4820e0b6070SMatteo Franciolini private: 4830610e2f6SJacques Pienaar int64_t bytecodeVersion; 4840e0b6070SMatteo Franciolini EncodingEmitter &emitter; 4850e0b6070SMatteo Franciolini IRNumberingState &numberingState; 4860e0b6070SMatteo Franciolini StringSectionBuilder &stringSection; 4877ad9e9dcSMatteo Franciolini const DialectVersionMapT &dialectVersionMap; 4880e0b6070SMatteo Franciolini }; 4890e0b6070SMatteo Franciolini 490660f714eSMehdi Amini namespace { 491660f714eSMehdi Amini class PropertiesSectionBuilder { 492660f714eSMehdi Amini public: 493660f714eSMehdi Amini PropertiesSectionBuilder(IRNumberingState &numberingState, 494660f714eSMehdi Amini StringSectionBuilder &stringSection, 495660f714eSMehdi Amini const BytecodeWriterConfig::Impl &config) 496660f714eSMehdi Amini : numberingState(numberingState), stringSection(stringSection), 497660f714eSMehdi Amini config(config) {} 498660f714eSMehdi Amini 499660f714eSMehdi Amini /// Emit the op properties in the properties section and return the index of 500660f714eSMehdi Amini /// the properties within the section. Return -1 if no properties was emitted. 501660f714eSMehdi Amini std::optional<ssize_t> emit(Operation *op) { 502660f714eSMehdi Amini EncodingEmitter propertiesEmitter; 503660f714eSMehdi Amini if (!op->getPropertiesStorageSize()) 504660f714eSMehdi Amini return std::nullopt; 505660f714eSMehdi Amini if (!op->isRegistered()) { 506660f714eSMehdi Amini // Unregistered op are storing properties as an optional attribute. 507660f714eSMehdi Amini Attribute prop = *op->getPropertiesStorage().as<Attribute *>(); 508660f714eSMehdi Amini if (!prop) 509660f714eSMehdi Amini return std::nullopt; 510660f714eSMehdi Amini EncodingEmitter sizeEmitter; 511*d1578848SKevin Gleason sizeEmitter.emitVarInt(numberingState.getNumber(prop), "properties size"); 512660f714eSMehdi Amini scratch.clear(); 513660f714eSMehdi Amini llvm::raw_svector_ostream os(scratch); 514660f714eSMehdi Amini sizeEmitter.writeTo(os); 515660f714eSMehdi Amini return emit(scratch); 516660f714eSMehdi Amini } 517660f714eSMehdi Amini 518660f714eSMehdi Amini EncodingEmitter emitter; 519660f714eSMehdi Amini DialectWriter propertiesWriter(config.bytecodeVersion, emitter, 5207ad9e9dcSMatteo Franciolini numberingState, stringSection, 5217ad9e9dcSMatteo Franciolini config.dialectVersionMap); 522660f714eSMehdi Amini auto iface = cast<BytecodeOpInterface>(op); 523660f714eSMehdi Amini iface.writeProperties(propertiesWriter); 524660f714eSMehdi Amini scratch.clear(); 525660f714eSMehdi Amini llvm::raw_svector_ostream os(scratch); 526660f714eSMehdi Amini emitter.writeTo(os); 527660f714eSMehdi Amini return emit(scratch); 528660f714eSMehdi Amini } 529660f714eSMehdi Amini 530660f714eSMehdi Amini /// Write the current set of properties to the given emitter. 531660f714eSMehdi Amini void write(EncodingEmitter &emitter) { 532*d1578848SKevin Gleason emitter.emitVarInt(propertiesStorage.size(), "properties size"); 533660f714eSMehdi Amini if (propertiesStorage.empty()) 534660f714eSMehdi Amini return; 535660f714eSMehdi Amini for (const auto &storage : propertiesStorage) { 536660f714eSMehdi Amini if (storage.empty()) { 537*d1578848SKevin Gleason emitter.emitBytes(ArrayRef<uint8_t>(), "empty properties"); 538660f714eSMehdi Amini continue; 539660f714eSMehdi Amini } 540660f714eSMehdi Amini emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]), 541*d1578848SKevin Gleason storage.size()), 542*d1578848SKevin Gleason "property"); 543660f714eSMehdi Amini } 544660f714eSMehdi Amini } 545660f714eSMehdi Amini 546660f714eSMehdi Amini /// Returns true if the section is empty. 547660f714eSMehdi Amini bool empty() { return propertiesStorage.empty(); } 548660f714eSMehdi Amini 549660f714eSMehdi Amini private: 550660f714eSMehdi Amini /// Emit raw data and returns the offset in the internal buffer. 551660f714eSMehdi Amini /// Data are deduplicated and will be copied in the internal buffer only if 552660f714eSMehdi Amini /// they don't exist there already. 553660f714eSMehdi Amini ssize_t emit(ArrayRef<char> rawProperties) { 554660f714eSMehdi Amini // Populate a scratch buffer with the properties size. 555660f714eSMehdi Amini SmallVector<char> sizeScratch; 556660f714eSMehdi Amini { 557660f714eSMehdi Amini EncodingEmitter sizeEmitter; 558*d1578848SKevin Gleason sizeEmitter.emitVarInt(rawProperties.size(), "properties"); 559660f714eSMehdi Amini llvm::raw_svector_ostream os(sizeScratch); 560660f714eSMehdi Amini sizeEmitter.writeTo(os); 561660f714eSMehdi Amini } 562660f714eSMehdi Amini // Append a new storage to the table now. 563660f714eSMehdi Amini size_t index = propertiesStorage.size(); 564660f714eSMehdi Amini propertiesStorage.emplace_back(); 565660f714eSMehdi Amini std::vector<char> &newStorage = propertiesStorage.back(); 566660f714eSMehdi Amini size_t propertiesSize = sizeScratch.size() + rawProperties.size(); 567660f714eSMehdi Amini newStorage.reserve(propertiesSize); 568660f714eSMehdi Amini newStorage.insert(newStorage.end(), sizeScratch.begin(), sizeScratch.end()); 569660f714eSMehdi Amini newStorage.insert(newStorage.end(), rawProperties.begin(), 570660f714eSMehdi Amini rawProperties.end()); 571660f714eSMehdi Amini 572660f714eSMehdi Amini // Try to de-duplicate the new serialized properties. 573660f714eSMehdi Amini // If the properties is a duplicate, pop it back from the storage. 574660f714eSMehdi Amini auto inserted = propertiesUniquing.insert( 575660f714eSMehdi Amini std::make_pair(ArrayRef<char>(newStorage), index)); 576660f714eSMehdi Amini if (!inserted.second) 577660f714eSMehdi Amini propertiesStorage.pop_back(); 578660f714eSMehdi Amini return inserted.first->getSecond(); 579660f714eSMehdi Amini } 580660f714eSMehdi Amini 581660f714eSMehdi Amini /// Storage for properties. 582660f714eSMehdi Amini std::vector<std::vector<char>> propertiesStorage; 583660f714eSMehdi Amini SmallVector<char> scratch; 584660f714eSMehdi Amini DenseMap<ArrayRef<char>, int64_t> propertiesUniquing; 585660f714eSMehdi Amini IRNumberingState &numberingState; 586660f714eSMehdi Amini StringSectionBuilder &stringSection; 587660f714eSMehdi Amini const BytecodeWriterConfig::Impl &config; 588660f714eSMehdi Amini }; 589660f714eSMehdi Amini } // namespace 590660f714eSMehdi Amini 591f3acb54cSRiver Riddle /// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need 592f3acb54cSRiver Riddle /// to go through an intermediate buffer when interacting with code that wants a 593f3acb54cSRiver Riddle /// raw_ostream. 594221f7853SMehdi Amini class RawEmitterOstream : public raw_ostream { 595f3acb54cSRiver Riddle public: 596221f7853SMehdi Amini explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) { 597f3acb54cSRiver Riddle SetUnbuffered(); 598f3acb54cSRiver Riddle } 599f3acb54cSRiver Riddle 600f3acb54cSRiver Riddle private: 601f3acb54cSRiver Riddle void write_impl(const char *ptr, size_t size) override { 602*d1578848SKevin Gleason emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size}, 603*d1578848SKevin Gleason "raw emitter"); 604f3acb54cSRiver Riddle } 605f3acb54cSRiver Riddle uint64_t current_pos() const override { return emitter.size(); } 606f3acb54cSRiver Riddle 607f3acb54cSRiver Riddle /// The section being emitted to. 608f3acb54cSRiver Riddle EncodingEmitter &emitter; 609f3acb54cSRiver Riddle }; 610f3acb54cSRiver Riddle } // namespace 611f3acb54cSRiver Riddle 612f3acb54cSRiver Riddle void EncodingEmitter::writeTo(raw_ostream &os) const { 613f3acb54cSRiver Riddle for (auto &prevResult : prevResultList) 614f3acb54cSRiver Riddle os.write((const char *)prevResult.data(), prevResult.size()); 615f3acb54cSRiver Riddle os.write((const char *)currentResult.data(), currentResult.size()); 616f3acb54cSRiver Riddle } 617f3acb54cSRiver Riddle 618*d1578848SKevin Gleason void EncodingEmitter::emitMultiByteVarInt(uint64_t value, StringLiteral desc) { 619f3acb54cSRiver Riddle // Compute the number of bytes needed to encode the value. Each byte can hold 620f3acb54cSRiver Riddle // up to 7-bits of data. We only check up to the number of bits we can encode 621f3acb54cSRiver Riddle // in the first byte (8). 622f3acb54cSRiver Riddle uint64_t it = value >> 7; 623f3acb54cSRiver Riddle for (size_t numBytes = 2; numBytes < 9; ++numBytes) { 624f3acb54cSRiver Riddle if (LLVM_LIKELY(it >>= 7) == 0) { 625f3acb54cSRiver Riddle uint64_t encodedValue = (value << 1) | 0x1; 626f3acb54cSRiver Riddle encodedValue <<= (numBytes - 1); 627bb0bbed6SUlrich Weigand llvm::support::ulittle64_t encodedValueLE(encodedValue); 628*d1578848SKevin Gleason emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}, desc); 629f3acb54cSRiver Riddle return; 630f3acb54cSRiver Riddle } 631f3acb54cSRiver Riddle } 632f3acb54cSRiver Riddle 633f3acb54cSRiver Riddle // If the value is too large to encode in a single byte, emit a special all 634f3acb54cSRiver Riddle // zero marker byte and splat the value directly. 635*d1578848SKevin Gleason emitByte(0, desc); 636bb0bbed6SUlrich Weigand llvm::support::ulittle64_t valueLE(value); 637*d1578848SKevin Gleason emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}, desc); 638f3acb54cSRiver Riddle } 639f3acb54cSRiver Riddle 640f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 641f3acb54cSRiver Riddle // Bytecode Writer 642f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 643f3acb54cSRiver Riddle 644f3acb54cSRiver Riddle namespace { 645f3acb54cSRiver Riddle class BytecodeWriter { 646f3acb54cSRiver Riddle public: 647660f714eSMehdi Amini BytecodeWriter(Operation *op, const BytecodeWriterConfig &config) 648660f714eSMehdi Amini : numberingState(op, config), config(config.getImpl()), 649660f714eSMehdi Amini propertiesSection(numberingState, stringSection, config.getImpl()) {} 650f3acb54cSRiver Riddle 651f3acb54cSRiver Riddle /// Write the bytecode for the given root operation. 652660f714eSMehdi Amini LogicalResult write(Operation *rootOp, raw_ostream &os); 653f3acb54cSRiver Riddle 654f3acb54cSRiver Riddle private: 655f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 656f3acb54cSRiver Riddle // Dialects 657f3acb54cSRiver Riddle 658f3acb54cSRiver Riddle void writeDialectSection(EncodingEmitter &emitter); 659f3acb54cSRiver Riddle 660f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 661f3acb54cSRiver Riddle // Attributes and Types 662f3acb54cSRiver Riddle 663f3acb54cSRiver Riddle void writeAttrTypeSection(EncodingEmitter &emitter); 664f3acb54cSRiver Riddle 665f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 666f3acb54cSRiver Riddle // Operations 667f3acb54cSRiver Riddle 668660f714eSMehdi Amini LogicalResult writeBlock(EncodingEmitter &emitter, Block *block); 669660f714eSMehdi Amini LogicalResult writeOp(EncodingEmitter &emitter, Operation *op); 670660f714eSMehdi Amini LogicalResult writeRegion(EncodingEmitter &emitter, Region *region); 671660f714eSMehdi Amini LogicalResult writeIRSection(EncodingEmitter &emitter, Operation *op); 672f3acb54cSRiver Riddle 6735ab65895SRiver Riddle LogicalResult writeRegions(EncodingEmitter &emitter, 6745ab65895SRiver Riddle MutableArrayRef<Region> regions) { 6755ab65895SRiver Riddle return success(llvm::all_of(regions, [&](Region ®ion) { 6765ab65895SRiver Riddle return succeeded(writeRegion(emitter, ®ion)); 6775ab65895SRiver Riddle })); 6785ab65895SRiver Riddle } 6795ab65895SRiver Riddle 680f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 6816ab2bcffSRiver Riddle // Resources 6826ab2bcffSRiver Riddle 6830610e2f6SJacques Pienaar void writeResourceSection(Operation *op, EncodingEmitter &emitter); 6846ab2bcffSRiver Riddle 6856ab2bcffSRiver Riddle //===--------------------------------------------------------------------===// 686f3acb54cSRiver Riddle // Strings 687f3acb54cSRiver Riddle 688f3acb54cSRiver Riddle void writeStringSection(EncodingEmitter &emitter); 689f3acb54cSRiver Riddle 690f3acb54cSRiver Riddle //===--------------------------------------------------------------------===// 691660f714eSMehdi Amini // Properties 692660f714eSMehdi Amini 693660f714eSMehdi Amini void writePropertiesSection(EncodingEmitter &emitter); 694660f714eSMehdi Amini 695660f714eSMehdi Amini //===--------------------------------------------------------------------===// 69661278191SMatteo Franciolini // Helpers 69761278191SMatteo Franciolini 69861278191SMatteo Franciolini void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask, 69961278191SMatteo Franciolini ValueRange range); 70061278191SMatteo Franciolini 70161278191SMatteo Franciolini //===--------------------------------------------------------------------===// 702f3acb54cSRiver Riddle // Fields 703f3acb54cSRiver Riddle 70483dc9999SRiver Riddle /// The builder used for the string section. 70583dc9999SRiver Riddle StringSectionBuilder stringSection; 70683dc9999SRiver Riddle 707f3acb54cSRiver Riddle /// The IR numbering state generated for the root operation. 708f3acb54cSRiver Riddle IRNumberingState numberingState; 7090610e2f6SJacques Pienaar 7100610e2f6SJacques Pienaar /// Configuration dictating bytecode emission. 7110610e2f6SJacques Pienaar const BytecodeWriterConfig::Impl &config; 712660f714eSMehdi Amini 713660f714eSMehdi Amini /// Storage for the properties section 714660f714eSMehdi Amini PropertiesSectionBuilder propertiesSection; 715f3acb54cSRiver Riddle }; 716f3acb54cSRiver Riddle } // namespace 717f3acb54cSRiver Riddle 718660f714eSMehdi Amini LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) { 719f3acb54cSRiver Riddle EncodingEmitter emitter; 720f3acb54cSRiver Riddle 721f3acb54cSRiver Riddle // Emit the bytecode file header. This is how we identify the output as a 722f3acb54cSRiver Riddle // bytecode file. 723*d1578848SKevin Gleason emitter.emitString("ML\xefR", "bytecode header"); 724f3acb54cSRiver Riddle 725f3acb54cSRiver Riddle // Emit the bytecode version. 7260ee4875dSKevin Gleason if (config.bytecodeVersion < bytecode::kMinSupportedVersion || 7270ee4875dSKevin Gleason config.bytecodeVersion > bytecode::kVersion) 7280ee4875dSKevin Gleason return rootOp->emitError() 7290ee4875dSKevin Gleason << "unsupported version requested " << config.bytecodeVersion 7300ee4875dSKevin Gleason << ", must be in range [" 7310ee4875dSKevin Gleason << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", " 7320ee4875dSKevin Gleason << static_cast<int64_t>(bytecode::kVersion) << ']'; 733*d1578848SKevin Gleason emitter.emitVarInt(config.bytecodeVersion, "bytecode version"); 734f3acb54cSRiver Riddle 735f3acb54cSRiver Riddle // Emit the producer. 736*d1578848SKevin Gleason emitter.emitNulTerminatedString(config.producer, "bytecode producer"); 737f3acb54cSRiver Riddle 738f3acb54cSRiver Riddle // Emit the dialect section. 739f3acb54cSRiver Riddle writeDialectSection(emitter); 740f3acb54cSRiver Riddle 741f3acb54cSRiver Riddle // Emit the attributes and types section. 742f3acb54cSRiver Riddle writeAttrTypeSection(emitter); 743f3acb54cSRiver Riddle 744f3acb54cSRiver Riddle // Emit the IR section. 745660f714eSMehdi Amini if (failed(writeIRSection(emitter, rootOp))) 746660f714eSMehdi Amini return failure(); 747f3acb54cSRiver Riddle 7486ab2bcffSRiver Riddle // Emit the resources section. 7490610e2f6SJacques Pienaar writeResourceSection(rootOp, emitter); 7506ab2bcffSRiver Riddle 751f3acb54cSRiver Riddle // Emit the string section. 752f3acb54cSRiver Riddle writeStringSection(emitter); 753f3acb54cSRiver Riddle 754660f714eSMehdi Amini // Emit the properties section. 7559c1e5587SMehdi Amini if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) 756660f714eSMehdi Amini writePropertiesSection(emitter); 757660f714eSMehdi Amini else if (!propertiesSection.empty()) 758660f714eSMehdi Amini return rootOp->emitError( 759660f714eSMehdi Amini "unexpected properties emitted incompatible with bytecode <5"); 760660f714eSMehdi Amini 761f3acb54cSRiver Riddle // Write the generated bytecode to the provided output stream. 762f3acb54cSRiver Riddle emitter.writeTo(os); 763660f714eSMehdi Amini 764660f714eSMehdi Amini return success(); 765f3acb54cSRiver Riddle } 766f3acb54cSRiver Riddle 767f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 768f3acb54cSRiver Riddle // Dialects 769f3acb54cSRiver Riddle 770f3acb54cSRiver Riddle /// Write the given entries in contiguous groups with the same parent dialect. 771f3acb54cSRiver Riddle /// Each dialect sub-group is encoded with the parent dialect and number of 772f3acb54cSRiver Riddle /// elements, followed by the encoding for the entries. The given callback is 773f3acb54cSRiver Riddle /// invoked to encode each individual entry. 774f3acb54cSRiver Riddle template <typename EntriesT, typename EntryCallbackT> 775f3acb54cSRiver Riddle static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, 776f3acb54cSRiver Riddle EntryCallbackT &&callback) { 777f3acb54cSRiver Riddle for (auto it = entries.begin(), e = entries.end(); it != e;) { 778f3acb54cSRiver Riddle auto groupStart = it++; 779f3acb54cSRiver Riddle 780f3acb54cSRiver Riddle // Find the end of the group that shares the same parent dialect. 781f3acb54cSRiver Riddle DialectNumbering *currentDialect = groupStart->dialect; 782f3acb54cSRiver Riddle it = std::find_if(it, e, [&](const auto &entry) { 783f3acb54cSRiver Riddle return entry.dialect != currentDialect; 784f3acb54cSRiver Riddle }); 785f3acb54cSRiver Riddle 786f3acb54cSRiver Riddle // Emit the dialect and number of elements. 787*d1578848SKevin Gleason emitter.emitVarInt(currentDialect->number, "dialect number"); 788*d1578848SKevin Gleason emitter.emitVarInt(std::distance(groupStart, it), "dialect offset"); 789f3acb54cSRiver Riddle 790f3acb54cSRiver Riddle // Emit the entries within the group. 791f3acb54cSRiver Riddle for (auto &entry : llvm::make_range(groupStart, it)) 792f3acb54cSRiver Riddle callback(entry); 793f3acb54cSRiver Riddle } 794f3acb54cSRiver Riddle } 795f3acb54cSRiver Riddle 796f3acb54cSRiver Riddle void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) { 797f3acb54cSRiver Riddle EncodingEmitter dialectEmitter; 798f3acb54cSRiver Riddle 799f3acb54cSRiver Riddle // Emit the referenced dialects. 800f3acb54cSRiver Riddle auto dialects = numberingState.getDialects(); 801*d1578848SKevin Gleason dialectEmitter.emitVarInt(llvm::size(dialects), "dialects count"); 8020e0b6070SMatteo Franciolini for (DialectNumbering &dialect : dialects) { 8030e0b6070SMatteo Franciolini // Write the string section and get the ID. 8040e0b6070SMatteo Franciolini size_t nameID = stringSection.insert(dialect.name); 8050e0b6070SMatteo Franciolini 8069c1e5587SMehdi Amini if (config.bytecodeVersion < bytecode::kDialectVersioning) { 807*d1578848SKevin Gleason dialectEmitter.emitVarInt(nameID, "dialect name ID"); 8080610e2f6SJacques Pienaar continue; 8090610e2f6SJacques Pienaar } 8100610e2f6SJacques Pienaar 8110e0b6070SMatteo Franciolini // Try writing the version to the versionEmitter. 8120e0b6070SMatteo Franciolini EncodingEmitter versionEmitter; 8130e0b6070SMatteo Franciolini if (dialect.interface) { 8140e0b6070SMatteo Franciolini // The writer used when emitting using a custom bytecode encoding. 8150610e2f6SJacques Pienaar DialectWriter versionWriter(config.bytecodeVersion, versionEmitter, 8167ad9e9dcSMatteo Franciolini numberingState, stringSection, 8177ad9e9dcSMatteo Franciolini config.dialectVersionMap); 8180e0b6070SMatteo Franciolini dialect.interface->writeVersion(versionWriter); 8190e0b6070SMatteo Franciolini } 8200e0b6070SMatteo Franciolini 8210e0b6070SMatteo Franciolini // If the version emitter is empty, version is not available. We can encode 8220e0b6070SMatteo Franciolini // this in the dialect ID, so if there is no version, we don't write the 8230e0b6070SMatteo Franciolini // section. 8240e0b6070SMatteo Franciolini size_t versionAvailable = versionEmitter.size() > 0; 825*d1578848SKevin Gleason dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable, 826*d1578848SKevin Gleason "dialect version"); 8270e0b6070SMatteo Franciolini if (versionAvailable) 8280e0b6070SMatteo Franciolini dialectEmitter.emitSection(bytecode::Section::kDialectVersions, 8290e0b6070SMatteo Franciolini std::move(versionEmitter)); 8300e0b6070SMatteo Franciolini } 831f3acb54cSRiver Riddle 8329c1e5587SMehdi Amini if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) 833*d1578848SKevin Gleason dialectEmitter.emitVarInt(size(numberingState.getOpNames()), 834*d1578848SKevin Gleason "op names count"); 8351826fadbSJacques Pienaar 836f3acb54cSRiver Riddle // Emit the referenced operation names grouped by dialect. 837f3acb54cSRiver Riddle auto emitOpName = [&](OpNameNumbering &name) { 838660f714eSMehdi Amini size_t stringId = stringSection.insert(name.name.stripDialect()); 8399c1e5587SMehdi Amini if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding) 840*d1578848SKevin Gleason dialectEmitter.emitVarInt(stringId, "dialect op name"); 841660f714eSMehdi Amini else 842*d1578848SKevin Gleason dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered(), 843*d1578848SKevin Gleason "dialect op name"); 844f3acb54cSRiver Riddle }; 845f3acb54cSRiver Riddle writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName); 846f3acb54cSRiver Riddle 847f3acb54cSRiver Riddle emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter)); 848f3acb54cSRiver Riddle } 849f3acb54cSRiver Riddle 850f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 851f3acb54cSRiver Riddle // Attributes and Types 852f3acb54cSRiver Riddle 853f3acb54cSRiver Riddle void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) { 854f3acb54cSRiver Riddle EncodingEmitter attrTypeEmitter; 855f3acb54cSRiver Riddle EncodingEmitter offsetEmitter; 856*d1578848SKevin Gleason offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()), 857*d1578848SKevin Gleason "attributes count"); 858*d1578848SKevin Gleason offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()), 859*d1578848SKevin Gleason "types count"); 860f3acb54cSRiver Riddle 861f3acb54cSRiver Riddle // A functor used to emit an attribute or type entry. 862f3acb54cSRiver Riddle uint64_t prevOffset = 0; 863f3acb54cSRiver Riddle auto emitAttrOrType = [&](auto &entry) { 86402c2ecb9SRiver Riddle auto entryValue = entry.getValue(); 865f3acb54cSRiver Riddle 866bff6a429SMatteo Franciolini auto emitAttrOrTypeRawImpl = [&]() -> void { 867b86a1321SMehdi Amini RawEmitterOstream(attrTypeEmitter) << entryValue; 868*d1578848SKevin Gleason attrTypeEmitter.emitByte(0, "attr/type separator"); 869bff6a429SMatteo Franciolini }; 870bff6a429SMatteo Franciolini auto emitAttrOrTypeImpl = [&]() -> bool { 871bff6a429SMatteo Franciolini // TODO: We don't currently support custom encoded mutable types and 872bff6a429SMatteo Franciolini // attributes. 873bff6a429SMatteo Franciolini if (entryValue.template hasTrait<TypeTrait::IsMutable>() || 874bff6a429SMatteo Franciolini entryValue.template hasTrait<AttributeTrait::IsMutable>()) { 875bff6a429SMatteo Franciolini emitAttrOrTypeRawImpl(); 876bff6a429SMatteo Franciolini return false; 877b86a1321SMehdi Amini } 878b299ec16SMehdi Amini 879bff6a429SMatteo Franciolini DialectWriter dialectWriter(config.bytecodeVersion, attrTypeEmitter, 8807ad9e9dcSMatteo Franciolini numberingState, stringSection, 8817ad9e9dcSMatteo Franciolini config.dialectVersionMap); 882bff6a429SMatteo Franciolini if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) { 883bff6a429SMatteo Franciolini for (const auto &callback : config.typeWriterCallbacks) { 884bff6a429SMatteo Franciolini if (succeeded(callback->write(entryValue, dialectWriter))) 885bff6a429SMatteo Franciolini return true; 886bff6a429SMatteo Franciolini } 887bff6a429SMatteo Franciolini if (const BytecodeDialectInterface *interface = 888bff6a429SMatteo Franciolini entry.dialect->interface) { 889bff6a429SMatteo Franciolini if (succeeded(interface->writeType(entryValue, dialectWriter))) 890bff6a429SMatteo Franciolini return true; 891bff6a429SMatteo Franciolini } 892bff6a429SMatteo Franciolini } else { 893bff6a429SMatteo Franciolini for (const auto &callback : config.attributeWriterCallbacks) { 894bff6a429SMatteo Franciolini if (succeeded(callback->write(entryValue, dialectWriter))) 895bff6a429SMatteo Franciolini return true; 896bff6a429SMatteo Franciolini } 897bff6a429SMatteo Franciolini if (const BytecodeDialectInterface *interface = 898bff6a429SMatteo Franciolini entry.dialect->interface) { 899bff6a429SMatteo Franciolini if (succeeded(interface->writeAttribute(entryValue, dialectWriter))) 900bff6a429SMatteo Franciolini return true; 901bff6a429SMatteo Franciolini } 902bff6a429SMatteo Franciolini } 903bff6a429SMatteo Franciolini 904bff6a429SMatteo Franciolini // If the entry was not emitted using a callback or a dialect interface, 905bff6a429SMatteo Franciolini // emit it using the textual format. 906bff6a429SMatteo Franciolini emitAttrOrTypeRawImpl(); 907bff6a429SMatteo Franciolini return false; 908bff6a429SMatteo Franciolini }; 909bff6a429SMatteo Franciolini 910bff6a429SMatteo Franciolini bool hasCustomEncoding = emitAttrOrTypeImpl(); 911bff6a429SMatteo Franciolini 912f3acb54cSRiver Riddle // Record the offset of this entry. 913f3acb54cSRiver Riddle uint64_t curOffset = attrTypeEmitter.size(); 914*d1578848SKevin Gleason offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding, 915*d1578848SKevin Gleason "attr/type offset"); 916f3acb54cSRiver Riddle prevOffset = curOffset; 917f3acb54cSRiver Riddle }; 918f3acb54cSRiver Riddle 919f3acb54cSRiver Riddle // Emit the attribute and type entries for each dialect. 920f3acb54cSRiver Riddle writeDialectGrouping(offsetEmitter, numberingState.getAttributes(), 921f3acb54cSRiver Riddle emitAttrOrType); 922f3acb54cSRiver Riddle writeDialectGrouping(offsetEmitter, numberingState.getTypes(), 923f3acb54cSRiver Riddle emitAttrOrType); 924f3acb54cSRiver Riddle 925f3acb54cSRiver Riddle // Emit the sections to the stream. 926f3acb54cSRiver Riddle emitter.emitSection(bytecode::Section::kAttrTypeOffset, 927f3acb54cSRiver Riddle std::move(offsetEmitter)); 928f3acb54cSRiver Riddle emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter)); 929f3acb54cSRiver Riddle } 930f3acb54cSRiver Riddle 931f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 932f3acb54cSRiver Riddle // Operations 933f3acb54cSRiver Riddle 934660f714eSMehdi Amini LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter, 935660f714eSMehdi Amini Block *block) { 936f3acb54cSRiver Riddle ArrayRef<BlockArgument> args = block->getArguments(); 937f3acb54cSRiver Riddle bool hasArgs = !args.empty(); 938f3acb54cSRiver Riddle 939f3acb54cSRiver Riddle // Emit the number of operations in this block, and if it has arguments. We 940f3acb54cSRiver Riddle // use the low bit of the operation count to indicate if the block has 941f3acb54cSRiver Riddle // arguments. 942f3acb54cSRiver Riddle unsigned numOps = numberingState.getOperationCount(block); 943*d1578848SKevin Gleason emitter.emitVarIntWithFlag(numOps, hasArgs, "block num ops"); 944f3acb54cSRiver Riddle 945f3acb54cSRiver Riddle // Emit the arguments of the block. 946f3acb54cSRiver Riddle if (hasArgs) { 947*d1578848SKevin Gleason emitter.emitVarInt(args.size(), "block args count"); 948f3acb54cSRiver Riddle for (BlockArgument arg : args) { 9491826fadbSJacques Pienaar Location argLoc = arg.getLoc(); 9509c1e5587SMehdi Amini if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) { 9511826fadbSJacques Pienaar emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()), 952*d1578848SKevin Gleason !isa<UnknownLoc>(argLoc), "block arg type"); 9531826fadbSJacques Pienaar if (!isa<UnknownLoc>(argLoc)) 954*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(argLoc), 955*d1578848SKevin Gleason "block arg location"); 9561826fadbSJacques Pienaar } else { 957*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(arg.getType()), 958*d1578848SKevin Gleason "block arg type"); 959*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(argLoc), 960*d1578848SKevin Gleason "block arg location"); 9611826fadbSJacques Pienaar } 962f3acb54cSRiver Riddle } 9639c1e5587SMehdi Amini if (config.bytecodeVersion >= bytecode::kUseListOrdering) { 96461278191SMatteo Franciolini uint64_t maskOffset = emitter.size(); 96561278191SMatteo Franciolini uint8_t encodingMask = 0; 966*d1578848SKevin Gleason emitter.emitByte(0, "use-list separator"); 96761278191SMatteo Franciolini writeUseListOrders(emitter, encodingMask, args); 96861278191SMatteo Franciolini if (encodingMask) 969*d1578848SKevin Gleason emitter.patchByte(maskOffset, encodingMask, "block patch encoding"); 97061278191SMatteo Franciolini } 971f3acb54cSRiver Riddle } 972f3acb54cSRiver Riddle 973f3acb54cSRiver Riddle // Emit the operations within the block. 974f3acb54cSRiver Riddle for (Operation &op : *block) 975660f714eSMehdi Amini if (failed(writeOp(emitter, &op))) 976660f714eSMehdi Amini return failure(); 977660f714eSMehdi Amini return success(); 978f3acb54cSRiver Riddle } 979f3acb54cSRiver Riddle 980660f714eSMehdi Amini LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) { 981*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(op->getName()), "op name ID"); 982f3acb54cSRiver Riddle 983f3acb54cSRiver Riddle // Emit a mask for the operation components. We need to fill this in later 984f3acb54cSRiver Riddle // (when we actually know what needs to be emitted), so emit a placeholder for 985f3acb54cSRiver Riddle // now. 986f3acb54cSRiver Riddle uint64_t maskOffset = emitter.size(); 987f3acb54cSRiver Riddle uint8_t opEncodingMask = 0; 988*d1578848SKevin Gleason emitter.emitByte(0, "op separator"); 989f3acb54cSRiver Riddle 990f3acb54cSRiver Riddle // Emit the location for this operation. 991*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(op->getLoc()), "op location"); 992f3acb54cSRiver Riddle 993f3acb54cSRiver Riddle // Emit the attributes of this operation. 994660f714eSMehdi Amini DictionaryAttr attrs = op->getDiscardableAttrDictionary(); 9959c1e5587SMehdi Amini // Allow deployment to version <kNativePropertiesEncoding by merging inherent 9969c1e5587SMehdi Amini // attribute with the discardable ones. We should fail if there are any 997985bb3a2SAlex Zinenko // conflicts. When properties are not used by the op, also store everything as 998985bb3a2SAlex Zinenko // attributes. 999985bb3a2SAlex Zinenko if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding || 1000985bb3a2SAlex Zinenko !op->getPropertiesStorage()) { 1001660f714eSMehdi Amini attrs = op->getAttrDictionary(); 1002985bb3a2SAlex Zinenko } 1003f3acb54cSRiver Riddle if (!attrs.empty()) { 1004f3acb54cSRiver Riddle opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs; 1005*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(attrs), "op attrs count"); 1006660f714eSMehdi Amini } 1007660f714eSMehdi Amini 1008660f714eSMehdi Amini // Emit the properties of this operation, for now we still support deployment 10099c1e5587SMehdi Amini // to version <kNativePropertiesEncoding. 10109c1e5587SMehdi Amini if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) { 1011660f714eSMehdi Amini std::optional<ssize_t> propertiesId = propertiesSection.emit(op); 1012660f714eSMehdi Amini if (propertiesId.has_value()) { 1013660f714eSMehdi Amini opEncodingMask |= bytecode::OpEncodingMask::kHasProperties; 1014*d1578848SKevin Gleason emitter.emitVarInt(*propertiesId, "op properties ID"); 1015660f714eSMehdi Amini } 1016f3acb54cSRiver Riddle } 1017f3acb54cSRiver Riddle 1018f3acb54cSRiver Riddle // Emit the result types of the operation. 1019f3acb54cSRiver Riddle if (unsigned numResults = op->getNumResults()) { 1020f3acb54cSRiver Riddle opEncodingMask |= bytecode::OpEncodingMask::kHasResults; 1021*d1578848SKevin Gleason emitter.emitVarInt(numResults, "op results count"); 1022f3acb54cSRiver Riddle for (Type type : op->getResultTypes()) 1023*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(type), "op result type"); 1024f3acb54cSRiver Riddle } 1025f3acb54cSRiver Riddle 1026f3acb54cSRiver Riddle // Emit the operands of the operation. 1027f3acb54cSRiver Riddle if (unsigned numOperands = op->getNumOperands()) { 1028f3acb54cSRiver Riddle opEncodingMask |= bytecode::OpEncodingMask::kHasOperands; 1029*d1578848SKevin Gleason emitter.emitVarInt(numOperands, "op operands count"); 1030f3acb54cSRiver Riddle for (Value operand : op->getOperands()) 1031*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(operand), "op operand types"); 1032f3acb54cSRiver Riddle } 1033f3acb54cSRiver Riddle 1034f3acb54cSRiver Riddle // Emit the successors of the operation. 1035f3acb54cSRiver Riddle if (unsigned numSuccessors = op->getNumSuccessors()) { 1036f3acb54cSRiver Riddle opEncodingMask |= bytecode::OpEncodingMask::kHasSuccessors; 1037*d1578848SKevin Gleason emitter.emitVarInt(numSuccessors, "op successors count"); 1038f3acb54cSRiver Riddle for (Block *successor : op->getSuccessors()) 1039*d1578848SKevin Gleason emitter.emitVarInt(numberingState.getNumber(successor), "op successor"); 1040f3acb54cSRiver Riddle } 1041f3acb54cSRiver Riddle 104261278191SMatteo Franciolini // Emit the use-list orders to bytecode, so we can reconstruct the same order 104361278191SMatteo Franciolini // at parsing. 10449c1e5587SMehdi Amini if (config.bytecodeVersion >= bytecode::kUseListOrdering) 104561278191SMatteo Franciolini writeUseListOrders(emitter, opEncodingMask, ValueRange(op->getResults())); 104661278191SMatteo Franciolini 1047f3acb54cSRiver Riddle // Check for regions. 1048f3acb54cSRiver Riddle unsigned numRegions = op->getNumRegions(); 1049f3acb54cSRiver Riddle if (numRegions) 1050f3acb54cSRiver Riddle opEncodingMask |= bytecode::OpEncodingMask::kHasInlineRegions; 1051f3acb54cSRiver Riddle 1052f3acb54cSRiver Riddle // Update the mask for the operation. 1053*d1578848SKevin Gleason emitter.patchByte(maskOffset, opEncodingMask, "op encoding mask"); 1054f3acb54cSRiver Riddle 1055f3acb54cSRiver Riddle // With the mask emitted, we can now emit the regions of the operation. We do 1056f3acb54cSRiver Riddle // this after mask emission to avoid offset complications that may arise by 1057f3acb54cSRiver Riddle // emitting the regions first (e.g. if the regions are huge, backpatching the 1058f3acb54cSRiver Riddle // op encoding mask is more annoying). 1059f3acb54cSRiver Riddle if (numRegions) { 10604af01bf9SRiver Riddle bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op); 1061*d1578848SKevin Gleason emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove, 1062*d1578848SKevin Gleason "op regions count"); 1063f3acb54cSRiver Riddle 10643128b310SMehdi Amini // If the region is not isolated from above, or we are emitting bytecode 10659c1e5587SMehdi Amini // targeting version <kLazyLoading, we don't use a section. 10665ab65895SRiver Riddle if (isIsolatedFromAbove && 10675ab65895SRiver Riddle config.bytecodeVersion >= bytecode::kLazyLoading) { 10683128b310SMehdi Amini EncodingEmitter regionEmitter; 10695ab65895SRiver Riddle if (failed(writeRegions(regionEmitter, op->getRegions()))) 1070660f714eSMehdi Amini return failure(); 10713128b310SMehdi Amini emitter.emitSection(bytecode::Section::kIR, std::move(regionEmitter)); 10725ab65895SRiver Riddle 10735ab65895SRiver Riddle } else if (failed(writeRegions(emitter, op->getRegions()))) { 10745ab65895SRiver Riddle return failure(); 10753128b310SMehdi Amini } 1076f3acb54cSRiver Riddle } 1077660f714eSMehdi Amini return success(); 1078f3acb54cSRiver Riddle } 1079f3acb54cSRiver Riddle 108061278191SMatteo Franciolini void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter, 108161278191SMatteo Franciolini uint8_t &opEncodingMask, 108261278191SMatteo Franciolini ValueRange range) { 108361278191SMatteo Franciolini // Loop over the results and store the use-list order per result index. 108461278191SMatteo Franciolini DenseMap<unsigned, llvm::SmallVector<unsigned>> map; 108561278191SMatteo Franciolini for (auto item : llvm::enumerate(range)) { 108661278191SMatteo Franciolini auto value = item.value(); 108761278191SMatteo Franciolini // No need to store a custom use-list order if the result does not have 108861278191SMatteo Franciolini // multiple uses. 108961278191SMatteo Franciolini if (value.use_empty() || value.hasOneUse()) 109061278191SMatteo Franciolini continue; 109161278191SMatteo Franciolini 109261278191SMatteo Franciolini // For each result, assemble the list of pairs (use-list-index, 109361278191SMatteo Franciolini // global-value-index). While doing so, detect if the global-value-index is 109461278191SMatteo Franciolini // already ordered with respect to the use-list-index. 109561278191SMatteo Franciolini bool alreadyOrdered = true; 109661278191SMatteo Franciolini auto &firstUse = *value.use_begin(); 109761278191SMatteo Franciolini uint64_t prevID = bytecode::getUseID( 109861278191SMatteo Franciolini firstUse, numberingState.getNumber(firstUse.getOwner())); 109961278191SMatteo Franciolini llvm::SmallVector<std::pair<unsigned, uint64_t>> useListPairs( 110061278191SMatteo Franciolini {{0, prevID}}); 110161278191SMatteo Franciolini 110261278191SMatteo Franciolini for (auto use : llvm::drop_begin(llvm::enumerate(value.getUses()))) { 110361278191SMatteo Franciolini uint64_t currentID = bytecode::getUseID( 110461278191SMatteo Franciolini use.value(), numberingState.getNumber(use.value().getOwner())); 110561278191SMatteo Franciolini // The use-list order achieved when building the IR at parsing always 110661278191SMatteo Franciolini // pushes new uses on front. Hence, if the order by unique ID is 110761278191SMatteo Franciolini // monotonically decreasing, a roundtrip to bytecode preserves such order. 110861278191SMatteo Franciolini alreadyOrdered &= (prevID > currentID); 110961278191SMatteo Franciolini useListPairs.push_back({use.index(), currentID}); 111061278191SMatteo Franciolini prevID = currentID; 111161278191SMatteo Franciolini } 111261278191SMatteo Franciolini 111361278191SMatteo Franciolini // Do not emit if the order is already sorted. 111461278191SMatteo Franciolini if (alreadyOrdered) 111561278191SMatteo Franciolini continue; 111661278191SMatteo Franciolini 111761278191SMatteo Franciolini // Sort the use indices by the unique ID indices in descending order. 111861278191SMatteo Franciolini std::sort( 111961278191SMatteo Franciolini useListPairs.begin(), useListPairs.end(), 112061278191SMatteo Franciolini [](auto elem1, auto elem2) { return elem1.second > elem2.second; }); 112161278191SMatteo Franciolini 112261278191SMatteo Franciolini map.try_emplace(item.index(), llvm::map_range(useListPairs, [](auto elem) { 112361278191SMatteo Franciolini return elem.first; 112461278191SMatteo Franciolini })); 112561278191SMatteo Franciolini } 112661278191SMatteo Franciolini 112761278191SMatteo Franciolini if (map.empty()) 112861278191SMatteo Franciolini return; 112961278191SMatteo Franciolini 113061278191SMatteo Franciolini opEncodingMask |= bytecode::OpEncodingMask::kHasUseListOrders; 113161278191SMatteo Franciolini // Emit the number of results that have a custom use-list order if the number 113261278191SMatteo Franciolini // of results is greater than one. 1133*d1578848SKevin Gleason if (range.size() != 1) { 1134*d1578848SKevin Gleason emitter.emitVarInt(map.size(), "custom use-list size"); 1135*d1578848SKevin Gleason } 113661278191SMatteo Franciolini 113761278191SMatteo Franciolini for (const auto &item : map) { 113861278191SMatteo Franciolini auto resultIdx = item.getFirst(); 113961278191SMatteo Franciolini auto useListOrder = item.getSecond(); 114061278191SMatteo Franciolini 114161278191SMatteo Franciolini // Compute the number of uses that are actually shuffled. If those are less 114261278191SMatteo Franciolini // than half of the total uses, encoding the index pair `(src, dst)` is more 114361278191SMatteo Franciolini // space efficient. 114461278191SMatteo Franciolini size_t shuffledElements = 114561278191SMatteo Franciolini llvm::count_if(llvm::enumerate(useListOrder), 114661278191SMatteo Franciolini [](auto item) { return item.index() != item.value(); }); 114761278191SMatteo Franciolini bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2); 114861278191SMatteo Franciolini 114961278191SMatteo Franciolini // For single result, we don't need to store the result index. 115061278191SMatteo Franciolini if (range.size() != 1) 1151*d1578848SKevin Gleason emitter.emitVarInt(resultIdx, "use-list result index"); 115261278191SMatteo Franciolini 115361278191SMatteo Franciolini if (indexPairEncoding) { 1154*d1578848SKevin Gleason emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding, 1155*d1578848SKevin Gleason "use-list index pair size"); 115661278191SMatteo Franciolini for (auto pair : llvm::enumerate(useListOrder)) { 115761278191SMatteo Franciolini if (pair.index() != pair.value()) { 1158*d1578848SKevin Gleason emitter.emitVarInt(pair.value(), "use-list index pair first"); 1159*d1578848SKevin Gleason emitter.emitVarInt(pair.index(), "use-list index pair second"); 116061278191SMatteo Franciolini } 116161278191SMatteo Franciolini } 116261278191SMatteo Franciolini } else { 1163*d1578848SKevin Gleason emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding, 1164*d1578848SKevin Gleason "use-list size"); 116561278191SMatteo Franciolini for (const auto &index : useListOrder) 1166*d1578848SKevin Gleason emitter.emitVarInt(index, "use-list order"); 116761278191SMatteo Franciolini } 116861278191SMatteo Franciolini } 116961278191SMatteo Franciolini } 117061278191SMatteo Franciolini 1171660f714eSMehdi Amini LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter, 1172660f714eSMehdi Amini Region *region) { 1173f3acb54cSRiver Riddle // If the region is empty, we only need to emit the number of blocks (which is 1174f3acb54cSRiver Riddle // zero). 1175660f714eSMehdi Amini if (region->empty()) { 1176*d1578848SKevin Gleason emitter.emitVarInt(/*numBlocks*/ 0, "region block count empty"); 1177660f714eSMehdi Amini return success(); 1178660f714eSMehdi Amini } 1179f3acb54cSRiver Riddle 1180f3acb54cSRiver Riddle // Emit the number of blocks and values within the region. 1181f3acb54cSRiver Riddle unsigned numBlocks, numValues; 1182f3acb54cSRiver Riddle std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region); 1183*d1578848SKevin Gleason emitter.emitVarInt(numBlocks, "region block count"); 1184*d1578848SKevin Gleason emitter.emitVarInt(numValues, "region value count"); 1185f3acb54cSRiver Riddle 1186f3acb54cSRiver Riddle // Emit the blocks within the region. 1187f3acb54cSRiver Riddle for (Block &block : *region) 1188660f714eSMehdi Amini if (failed(writeBlock(emitter, &block))) 1189660f714eSMehdi Amini return failure(); 1190660f714eSMehdi Amini return success(); 1191f3acb54cSRiver Riddle } 1192f3acb54cSRiver Riddle 1193660f714eSMehdi Amini LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter, 1194660f714eSMehdi Amini Operation *op) { 1195f3acb54cSRiver Riddle EncodingEmitter irEmitter; 1196f3acb54cSRiver Riddle 1197f3acb54cSRiver Riddle // Write the IR section the same way as a block with no arguments. Note that 1198f3acb54cSRiver Riddle // the low-bit of the operation count for a block is used to indicate if the 1199f3acb54cSRiver Riddle // block has arguments, which in this case is always false. 1200*d1578848SKevin Gleason irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false, "ir section"); 1201f3acb54cSRiver Riddle 1202f3acb54cSRiver Riddle // Emit the operations. 1203660f714eSMehdi Amini if (failed(writeOp(irEmitter, op))) 1204660f714eSMehdi Amini return failure(); 1205f3acb54cSRiver Riddle 1206f3acb54cSRiver Riddle emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter)); 1207660f714eSMehdi Amini return success(); 1208f3acb54cSRiver Riddle } 1209f3acb54cSRiver Riddle 1210f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 12116ab2bcffSRiver Riddle // Resources 12126ab2bcffSRiver Riddle 12136ab2bcffSRiver Riddle namespace { 12146ab2bcffSRiver Riddle /// This class represents a resource builder implementation for the MLIR 12156ab2bcffSRiver Riddle /// bytecode format. 12166ab2bcffSRiver Riddle class ResourceBuilder : public AsmResourceBuilder { 12176ab2bcffSRiver Riddle public: 12186ab2bcffSRiver Riddle using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>; 12196ab2bcffSRiver Riddle 12206ab2bcffSRiver Riddle ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection, 12214488f493SMatteo Franciolini PostProcessFn postProcessFn, bool shouldElideData) 12226ab2bcffSRiver Riddle : emitter(emitter), stringSection(stringSection), 12234488f493SMatteo Franciolini postProcessFn(postProcessFn), shouldElideData(shouldElideData) {} 12246ab2bcffSRiver Riddle ~ResourceBuilder() override = default; 12256ab2bcffSRiver Riddle 12266ab2bcffSRiver Riddle void buildBlob(StringRef key, ArrayRef<char> data, 12276ab2bcffSRiver Riddle uint32_t dataAlignment) final { 12284488f493SMatteo Franciolini if (!shouldElideData) 1229*d1578848SKevin Gleason emitter.emitOwnedBlobAndAlignment(data, dataAlignment, "resource blob"); 12306ab2bcffSRiver Riddle postProcessFn(key, AsmResourceEntryKind::Blob); 12316ab2bcffSRiver Riddle } 12326ab2bcffSRiver Riddle void buildBool(StringRef key, bool data) final { 12334488f493SMatteo Franciolini if (!shouldElideData) 1234*d1578848SKevin Gleason emitter.emitByte(data, "resource bool"); 12356ab2bcffSRiver Riddle postProcessFn(key, AsmResourceEntryKind::Bool); 12366ab2bcffSRiver Riddle } 12376ab2bcffSRiver Riddle void buildString(StringRef key, StringRef data) final { 12384488f493SMatteo Franciolini if (!shouldElideData) 1239*d1578848SKevin Gleason emitter.emitVarInt(stringSection.insert(data), "resource string"); 12406ab2bcffSRiver Riddle postProcessFn(key, AsmResourceEntryKind::String); 12416ab2bcffSRiver Riddle } 12426ab2bcffSRiver Riddle 12436ab2bcffSRiver Riddle private: 12446ab2bcffSRiver Riddle EncodingEmitter &emitter; 12456ab2bcffSRiver Riddle StringSectionBuilder &stringSection; 12466ab2bcffSRiver Riddle PostProcessFn postProcessFn; 12474488f493SMatteo Franciolini bool shouldElideData = false; 12486ab2bcffSRiver Riddle }; 12496ab2bcffSRiver Riddle } // namespace 12506ab2bcffSRiver Riddle 12510610e2f6SJacques Pienaar void BytecodeWriter::writeResourceSection(Operation *op, 12520610e2f6SJacques Pienaar EncodingEmitter &emitter) { 12536ab2bcffSRiver Riddle EncodingEmitter resourceEmitter; 12546ab2bcffSRiver Riddle EncodingEmitter resourceOffsetEmitter; 12556ab2bcffSRiver Riddle uint64_t prevOffset = 0; 12566ab2bcffSRiver Riddle SmallVector<std::tuple<StringRef, AsmResourceEntryKind, uint64_t>> 12576ab2bcffSRiver Riddle curResourceEntries; 12586ab2bcffSRiver Riddle 12596ab2bcffSRiver Riddle // Functor used to process the offset for a resource of `kind` defined by 12606ab2bcffSRiver Riddle // 'key'. 12616ab2bcffSRiver Riddle auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) { 12626ab2bcffSRiver Riddle uint64_t curOffset = resourceEmitter.size(); 12636ab2bcffSRiver Riddle curResourceEntries.emplace_back(key, kind, curOffset - prevOffset); 12646ab2bcffSRiver Riddle prevOffset = curOffset; 12656ab2bcffSRiver Riddle }; 12666ab2bcffSRiver Riddle 12676ab2bcffSRiver Riddle // Functor used to emit a resource group defined by 'key'. 12686ab2bcffSRiver Riddle auto emitResourceGroup = [&](uint64_t key) { 1269*d1578848SKevin Gleason resourceOffsetEmitter.emitVarInt(key, "resource group key"); 1270*d1578848SKevin Gleason resourceOffsetEmitter.emitVarInt(curResourceEntries.size(), 1271*d1578848SKevin Gleason "resource group size"); 12726ab2bcffSRiver Riddle for (auto [key, kind, size] : curResourceEntries) { 1273*d1578848SKevin Gleason resourceOffsetEmitter.emitVarInt(stringSection.insert(key), 1274*d1578848SKevin Gleason "resource key"); 1275*d1578848SKevin Gleason resourceOffsetEmitter.emitVarInt(size, "resource size"); 1276*d1578848SKevin Gleason resourceOffsetEmitter.emitByte(kind, "resource kind"); 12776ab2bcffSRiver Riddle } 12786ab2bcffSRiver Riddle }; 12796ab2bcffSRiver Riddle 12806ab2bcffSRiver Riddle // Builder used to emit resources. 12816ab2bcffSRiver Riddle ResourceBuilder entryBuilder(resourceEmitter, stringSection, 12824488f493SMatteo Franciolini appendResourceOffset, 12834488f493SMatteo Franciolini config.shouldElideResourceData); 12846ab2bcffSRiver Riddle 12856ab2bcffSRiver Riddle // Emit the external resource entries. 1286*d1578848SKevin Gleason resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size(), 1287*d1578848SKevin Gleason "external resource printer count"); 12886ab2bcffSRiver Riddle for (const auto &printer : config.externalResourcePrinters) { 12896ab2bcffSRiver Riddle curResourceEntries.clear(); 12906ab2bcffSRiver Riddle printer->buildResources(op, entryBuilder); 12916ab2bcffSRiver Riddle emitResourceGroup(stringSection.insert(printer->getName())); 12926ab2bcffSRiver Riddle } 12936ab2bcffSRiver Riddle 12946ab2bcffSRiver Riddle // Emit the dialect resource entries. 12956ab2bcffSRiver Riddle for (DialectNumbering &dialect : numberingState.getDialects()) { 12966ab2bcffSRiver Riddle if (!dialect.asmInterface) 12976ab2bcffSRiver Riddle continue; 12986ab2bcffSRiver Riddle curResourceEntries.clear(); 12996ab2bcffSRiver Riddle dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder); 13006ab2bcffSRiver Riddle 13016ab2bcffSRiver Riddle // Emit the declaration resources for this dialect, these didn't get emitted 13026ab2bcffSRiver Riddle // by the interface. These resources don't have data attached, so just use a 13036ab2bcffSRiver Riddle // "blob" kind as a placeholder. 13046ab2bcffSRiver Riddle for (const auto &resource : dialect.resourceMap) 13056ab2bcffSRiver Riddle if (resource.second->isDeclaration) 13066ab2bcffSRiver Riddle appendResourceOffset(resource.first, AsmResourceEntryKind::Blob); 13076ab2bcffSRiver Riddle 13086ab2bcffSRiver Riddle // Emit the resource group for this dialect. 13096ab2bcffSRiver Riddle if (!curResourceEntries.empty()) 13106ab2bcffSRiver Riddle emitResourceGroup(dialect.number); 13116ab2bcffSRiver Riddle } 13126ab2bcffSRiver Riddle 13136ab2bcffSRiver Riddle // If we didn't emit any resource groups, elide the resource sections. 13146ab2bcffSRiver Riddle if (resourceOffsetEmitter.size() == 0) 13156ab2bcffSRiver Riddle return; 13166ab2bcffSRiver Riddle 13176ab2bcffSRiver Riddle emitter.emitSection(bytecode::Section::kResourceOffset, 13186ab2bcffSRiver Riddle std::move(resourceOffsetEmitter)); 13196ab2bcffSRiver Riddle emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter)); 13206ab2bcffSRiver Riddle } 13216ab2bcffSRiver Riddle 13226ab2bcffSRiver Riddle //===----------------------------------------------------------------------===// 1323f3acb54cSRiver Riddle // Strings 1324f3acb54cSRiver Riddle 1325f3acb54cSRiver Riddle void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) { 1326f3acb54cSRiver Riddle EncodingEmitter stringEmitter; 132783dc9999SRiver Riddle stringSection.write(stringEmitter); 1328f3acb54cSRiver Riddle emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter)); 1329f3acb54cSRiver Riddle } 1330f3acb54cSRiver Riddle 1331f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 1332660f714eSMehdi Amini // Properties 1333660f714eSMehdi Amini 1334660f714eSMehdi Amini void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) { 1335660f714eSMehdi Amini EncodingEmitter propertiesEmitter; 1336660f714eSMehdi Amini propertiesSection.write(propertiesEmitter); 1337660f714eSMehdi Amini emitter.emitSection(bytecode::Section::kProperties, 1338660f714eSMehdi Amini std::move(propertiesEmitter)); 1339660f714eSMehdi Amini } 1340660f714eSMehdi Amini 1341660f714eSMehdi Amini //===----------------------------------------------------------------------===// 1342f3acb54cSRiver Riddle // Entry Points 1343f3acb54cSRiver Riddle //===----------------------------------------------------------------------===// 1344f3acb54cSRiver Riddle 13455c90e1ffSJacques Pienaar LogicalResult mlir::writeBytecodeToFile(Operation *op, raw_ostream &os, 13466ab2bcffSRiver Riddle const BytecodeWriterConfig &config) { 1347660f714eSMehdi Amini BytecodeWriter writer(op, config); 1348660f714eSMehdi Amini return writer.write(op, os); 1349f3acb54cSRiver Riddle } 1350