10b57cec5SDimitry Andric //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This header defines the BitstreamReader class. This class can be used to 100b57cec5SDimitry Andric // read an arbitrary bitstream, regardless of its contents. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef LLVM_BITSTREAM_BITSTREAMREADER_H 150b57cec5SDimitry Andric #define LLVM_BITSTREAM_BITSTREAMREADER_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 190b57cec5SDimitry Andric #include "llvm/Bitstream/BitCodes.h" 200b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 215ffd83dbSDimitry Andric #include "llvm/Support/Error.h" 221fd87a68SDimitry Andric #include "llvm/Support/MemoryBufferRef.h" 230b57cec5SDimitry Andric #include <algorithm> 240b57cec5SDimitry Andric #include <cassert> 250b57cec5SDimitry Andric #include <climits> 260b57cec5SDimitry Andric #include <cstddef> 270b57cec5SDimitry Andric #include <cstdint> 280b57cec5SDimitry Andric #include <memory> 29bdd1243dSDimitry Andric #include <optional> 300b57cec5SDimitry Andric #include <string> 310b57cec5SDimitry Andric #include <utility> 320b57cec5SDimitry Andric #include <vector> 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric namespace llvm { 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric /// This class maintains the abbreviations read from a block info block. 370b57cec5SDimitry Andric class BitstreamBlockInfo { 380b57cec5SDimitry Andric public: 390b57cec5SDimitry Andric /// This contains information emitted to BLOCKINFO_BLOCK blocks. These 400b57cec5SDimitry Andric /// describe abbreviations that all blocks of the specified ID inherit. 410b57cec5SDimitry Andric struct BlockInfo { 42480093f4SDimitry Andric unsigned BlockID = 0; 430b57cec5SDimitry Andric std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs; 440b57cec5SDimitry Andric std::string Name; 450b57cec5SDimitry Andric std::vector<std::pair<unsigned, std::string>> RecordNames; 460b57cec5SDimitry Andric }; 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric private: 490b57cec5SDimitry Andric std::vector<BlockInfo> BlockInfoRecords; 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric public: 520b57cec5SDimitry Andric /// If there is block info for the specified ID, return it, otherwise return 530b57cec5SDimitry Andric /// null. getBlockInfo(unsigned BlockID)540b57cec5SDimitry Andric const BlockInfo *getBlockInfo(unsigned BlockID) const { 550b57cec5SDimitry Andric // Common case, the most recent entry matches BlockID. 560b57cec5SDimitry Andric if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) 570b57cec5SDimitry Andric return &BlockInfoRecords.back(); 580b57cec5SDimitry Andric 59bdd1243dSDimitry Andric for (const BlockInfo &BI : BlockInfoRecords) 60bdd1243dSDimitry Andric if (BI.BlockID == BlockID) 61bdd1243dSDimitry Andric return &BI; 620b57cec5SDimitry Andric return nullptr; 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric getOrCreateBlockInfo(unsigned BlockID)650b57cec5SDimitry Andric BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { 660b57cec5SDimitry Andric if (const BlockInfo *BI = getBlockInfo(BlockID)) 670b57cec5SDimitry Andric return *const_cast<BlockInfo*>(BI); 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric // Otherwise, add a new record. 700b57cec5SDimitry Andric BlockInfoRecords.emplace_back(); 710b57cec5SDimitry Andric BlockInfoRecords.back().BlockID = BlockID; 720b57cec5SDimitry Andric return BlockInfoRecords.back(); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric }; 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric /// This represents a position within a bitstream. There may be multiple 770b57cec5SDimitry Andric /// independent cursors reading within one bitstream, each maintaining their 780b57cec5SDimitry Andric /// own local state. 790b57cec5SDimitry Andric class SimpleBitstreamCursor { 800b57cec5SDimitry Andric ArrayRef<uint8_t> BitcodeBytes; 810b57cec5SDimitry Andric size_t NextChar = 0; 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric public: 840b57cec5SDimitry Andric /// This is the current data we have pulled from the stream but have not 850b57cec5SDimitry Andric /// returned to the client. This is specifically and intentionally defined to 860b57cec5SDimitry Andric /// follow the word size of the host machine for efficiency. We use word_t in 870b57cec5SDimitry Andric /// places that are aware of this to make it perfectly explicit what is going 880b57cec5SDimitry Andric /// on. 890b57cec5SDimitry Andric using word_t = size_t; 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric private: 920b57cec5SDimitry Andric word_t CurWord = 0; 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric /// This is the number of bits in CurWord that are valid. This is always from 950b57cec5SDimitry Andric /// [0...bits_of(size_t)-1] inclusive. 960b57cec5SDimitry Andric unsigned BitsInCurWord = 0; 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric public: 990b57cec5SDimitry Andric SimpleBitstreamCursor() = default; SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)1000b57cec5SDimitry Andric explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) 1010b57cec5SDimitry Andric : BitcodeBytes(BitcodeBytes) {} SimpleBitstreamCursor(StringRef BitcodeBytes)1020b57cec5SDimitry Andric explicit SimpleBitstreamCursor(StringRef BitcodeBytes) 1030b57cec5SDimitry Andric : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {} SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)1040b57cec5SDimitry Andric explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes) 1050b57cec5SDimitry Andric : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {} 1060b57cec5SDimitry Andric canSkipToPos(size_t pos)1070b57cec5SDimitry Andric bool canSkipToPos(size_t pos) const { 1080b57cec5SDimitry Andric // pos can be skipped to if it is a valid address or one byte past the end. 1090b57cec5SDimitry Andric return pos <= BitcodeBytes.size(); 1100b57cec5SDimitry Andric } 1110b57cec5SDimitry Andric AtEndOfStream()1120b57cec5SDimitry Andric bool AtEndOfStream() { 1130b57cec5SDimitry Andric return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar; 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric /// Return the bit # of the bit we are reading. GetCurrentBitNo()1170b57cec5SDimitry Andric uint64_t GetCurrentBitNo() const { 118*5f757f3fSDimitry Andric return uint64_t(NextChar)*CHAR_BIT - BitsInCurWord; 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric // Return the byte # of the current bit. getCurrentByteNo()1220b57cec5SDimitry Andric uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; } 1230b57cec5SDimitry Andric getBitcodeBytes()1240b57cec5SDimitry Andric ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; } 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric /// Reset the stream to the specified bit number. JumpToBit(uint64_t BitNo)1270b57cec5SDimitry Andric Error JumpToBit(uint64_t BitNo) { 1280b57cec5SDimitry Andric size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); 1290b57cec5SDimitry Andric unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); 1300b57cec5SDimitry Andric assert(canSkipToPos(ByteNo) && "Invalid location"); 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric // Move the cursor to the right word. 1330b57cec5SDimitry Andric NextChar = ByteNo; 1340b57cec5SDimitry Andric BitsInCurWord = 0; 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric // Skip over any bits that are already consumed. 1370b57cec5SDimitry Andric if (WordBitNo) { 1380b57cec5SDimitry Andric if (Expected<word_t> Res = Read(WordBitNo)) 1390b57cec5SDimitry Andric return Error::success(); 1400b57cec5SDimitry Andric else 1410b57cec5SDimitry Andric return Res.takeError(); 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric return Error::success(); 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric /// Get a pointer into the bitstream at the specified byte offset. getPointerToByte(uint64_t ByteNo,uint64_t NumBytes)1480b57cec5SDimitry Andric const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { 1490b57cec5SDimitry Andric return BitcodeBytes.data() + ByteNo; 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric /// Get a pointer into the bitstream at the specified bit offset. 1530b57cec5SDimitry Andric /// 1540b57cec5SDimitry Andric /// The bit offset must be on a byte boundary. getPointerToBit(uint64_t BitNo,uint64_t NumBytes)1550b57cec5SDimitry Andric const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) { 1560b57cec5SDimitry Andric assert(!(BitNo % 8) && "Expected bit on byte boundary"); 1570b57cec5SDimitry Andric return getPointerToByte(BitNo / 8, NumBytes); 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric fillCurWord()1600b57cec5SDimitry Andric Error fillCurWord() { 1610b57cec5SDimitry Andric if (NextChar >= BitcodeBytes.size()) 1620b57cec5SDimitry Andric return createStringError(std::errc::io_error, 1630b57cec5SDimitry Andric "Unexpected end of file reading %u of %u bytes", 1640b57cec5SDimitry Andric NextChar, BitcodeBytes.size()); 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric // Read the next word from the stream. 1670b57cec5SDimitry Andric const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar; 1680b57cec5SDimitry Andric unsigned BytesRead; 1690b57cec5SDimitry Andric if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) { 1700b57cec5SDimitry Andric BytesRead = sizeof(word_t); 1710b57cec5SDimitry Andric CurWord = 172*5f757f3fSDimitry Andric support::endian::read<word_t, llvm::endianness::little>(NextCharPtr); 1730b57cec5SDimitry Andric } else { 1740b57cec5SDimitry Andric // Short read. 1750b57cec5SDimitry Andric BytesRead = BitcodeBytes.size() - NextChar; 1760b57cec5SDimitry Andric CurWord = 0; 1770b57cec5SDimitry Andric for (unsigned B = 0; B != BytesRead; ++B) 1780b57cec5SDimitry Andric CurWord |= uint64_t(NextCharPtr[B]) << (B * 8); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric NextChar += BytesRead; 1810b57cec5SDimitry Andric BitsInCurWord = BytesRead * 8; 1820b57cec5SDimitry Andric return Error::success(); 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric Read(unsigned NumBits)1850b57cec5SDimitry Andric Expected<word_t> Read(unsigned NumBits) { 18681ad6265SDimitry Andric static const unsigned BitsInWord = sizeof(word_t) * 8; 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric assert(NumBits && NumBits <= BitsInWord && 1890b57cec5SDimitry Andric "Cannot return zero or more than BitsInWord bits!"); 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric // If the field is fully contained by CurWord, return it quickly. 1940b57cec5SDimitry Andric if (BitsInCurWord >= NumBits) { 1950b57cec5SDimitry Andric word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric // Use a mask to avoid undefined behavior. 1980b57cec5SDimitry Andric CurWord >>= (NumBits & Mask); 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric BitsInCurWord -= NumBits; 2010b57cec5SDimitry Andric return R; 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric word_t R = BitsInCurWord ? CurWord : 0; 2050b57cec5SDimitry Andric unsigned BitsLeft = NumBits - BitsInCurWord; 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric if (Error fillResult = fillCurWord()) 2080b57cec5SDimitry Andric return std::move(fillResult); 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric // If we run out of data, abort. 2110b57cec5SDimitry Andric if (BitsLeft > BitsInCurWord) 2120b57cec5SDimitry Andric return createStringError(std::errc::io_error, 2130b57cec5SDimitry Andric "Unexpected end of file reading %u of %u bits", 2140b57cec5SDimitry Andric BitsInCurWord, BitsLeft); 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric // Use a mask to avoid undefined behavior. 2190b57cec5SDimitry Andric CurWord >>= (BitsLeft & Mask); 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric BitsInCurWord -= BitsLeft; 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric R |= R2 << (NumBits - BitsLeft); 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric return R; 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric ReadVBR(const unsigned NumBits)22881ad6265SDimitry Andric Expected<uint32_t> ReadVBR(const unsigned NumBits) { 2290b57cec5SDimitry Andric Expected<unsigned> MaybeRead = Read(NumBits); 2300b57cec5SDimitry Andric if (!MaybeRead) 2310b57cec5SDimitry Andric return MaybeRead; 2320b57cec5SDimitry Andric uint32_t Piece = MaybeRead.get(); 2330b57cec5SDimitry Andric 23481ad6265SDimitry Andric assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value"); 23581ad6265SDimitry Andric const uint32_t MaskBitOrder = (NumBits - 1); 23681ad6265SDimitry Andric const uint32_t Mask = 1UL << MaskBitOrder; 23781ad6265SDimitry Andric 23881ad6265SDimitry Andric if ((Piece & Mask) == 0) 2390b57cec5SDimitry Andric return Piece; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric uint32_t Result = 0; 2420b57cec5SDimitry Andric unsigned NextBit = 0; 2430b57cec5SDimitry Andric while (true) { 24481ad6265SDimitry Andric Result |= (Piece & (Mask - 1)) << NextBit; 2450b57cec5SDimitry Andric 24681ad6265SDimitry Andric if ((Piece & Mask) == 0) 2470b57cec5SDimitry Andric return Result; 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric NextBit += NumBits-1; 25081ad6265SDimitry Andric if (NextBit >= 32) 25181ad6265SDimitry Andric return createStringError(std::errc::illegal_byte_sequence, 25281ad6265SDimitry Andric "Unterminated VBR"); 25381ad6265SDimitry Andric 2540b57cec5SDimitry Andric MaybeRead = Read(NumBits); 2550b57cec5SDimitry Andric if (!MaybeRead) 2560b57cec5SDimitry Andric return MaybeRead; 2570b57cec5SDimitry Andric Piece = MaybeRead.get(); 2580b57cec5SDimitry Andric } 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric // Read a VBR that may have a value up to 64-bits in size. The chunk size of 2620b57cec5SDimitry Andric // the VBR must still be <= 32 bits though. ReadVBR64(const unsigned NumBits)26381ad6265SDimitry Andric Expected<uint64_t> ReadVBR64(const unsigned NumBits) { 2640b57cec5SDimitry Andric Expected<uint64_t> MaybeRead = Read(NumBits); 2650b57cec5SDimitry Andric if (!MaybeRead) 2660b57cec5SDimitry Andric return MaybeRead; 2670b57cec5SDimitry Andric uint32_t Piece = MaybeRead.get(); 26881ad6265SDimitry Andric assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value"); 26981ad6265SDimitry Andric const uint32_t MaskBitOrder = (NumBits - 1); 27081ad6265SDimitry Andric const uint32_t Mask = 1UL << MaskBitOrder; 2710b57cec5SDimitry Andric 27281ad6265SDimitry Andric if ((Piece & Mask) == 0) 2730b57cec5SDimitry Andric return uint64_t(Piece); 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric uint64_t Result = 0; 2760b57cec5SDimitry Andric unsigned NextBit = 0; 2770b57cec5SDimitry Andric while (true) { 27881ad6265SDimitry Andric Result |= uint64_t(Piece & (Mask - 1)) << NextBit; 2790b57cec5SDimitry Andric 28081ad6265SDimitry Andric if ((Piece & Mask) == 0) 2810b57cec5SDimitry Andric return Result; 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric NextBit += NumBits-1; 28481ad6265SDimitry Andric if (NextBit >= 64) 28581ad6265SDimitry Andric return createStringError(std::errc::illegal_byte_sequence, 28681ad6265SDimitry Andric "Unterminated VBR"); 28781ad6265SDimitry Andric 2880b57cec5SDimitry Andric MaybeRead = Read(NumBits); 2890b57cec5SDimitry Andric if (!MaybeRead) 2900b57cec5SDimitry Andric return MaybeRead; 2910b57cec5SDimitry Andric Piece = MaybeRead.get(); 2920b57cec5SDimitry Andric } 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric SkipToFourByteBoundary()2950b57cec5SDimitry Andric void SkipToFourByteBoundary() { 2960b57cec5SDimitry Andric // If word_t is 64-bits and if we've read less than 32 bits, just dump 2970b57cec5SDimitry Andric // the bits we have up to the next 32-bit boundary. 2980b57cec5SDimitry Andric if (sizeof(word_t) > 4 && 2990b57cec5SDimitry Andric BitsInCurWord >= 32) { 3000b57cec5SDimitry Andric CurWord >>= BitsInCurWord-32; 3010b57cec5SDimitry Andric BitsInCurWord = 32; 3020b57cec5SDimitry Andric return; 3030b57cec5SDimitry Andric } 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric BitsInCurWord = 0; 3060b57cec5SDimitry Andric } 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric /// Return the size of the stream in bytes. SizeInBytes()3090b57cec5SDimitry Andric size_t SizeInBytes() const { return BitcodeBytes.size(); } 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric /// Skip to the end of the file. skipToEnd()3120b57cec5SDimitry Andric void skipToEnd() { NextChar = BitcodeBytes.size(); } 31381ad6265SDimitry Andric 31481ad6265SDimitry Andric /// Check whether a reservation of Size elements is plausible. isSizePlausible(size_t Size)31581ad6265SDimitry Andric bool isSizePlausible(size_t Size) const { 31681ad6265SDimitry Andric // Don't allow reserving more elements than the number of bits, assuming 31781ad6265SDimitry Andric // at least one bit is needed to encode an element. 31881ad6265SDimitry Andric return Size < BitcodeBytes.size() * 8; 31981ad6265SDimitry Andric } 3200b57cec5SDimitry Andric }; 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric /// When advancing through a bitstream cursor, each advance can discover a few 3230b57cec5SDimitry Andric /// different kinds of entries: 3240b57cec5SDimitry Andric struct BitstreamEntry { 3250b57cec5SDimitry Andric enum { 3260b57cec5SDimitry Andric Error, // Malformed bitcode was found. 3270b57cec5SDimitry Andric EndBlock, // We've reached the end of the current block, (or the end of the 3280b57cec5SDimitry Andric // file, which is treated like a series of EndBlock records. 3290b57cec5SDimitry Andric SubBlock, // This is the start of a new subblock of a specific ID. 3300b57cec5SDimitry Andric Record // This is a record with a specific AbbrevID. 3310b57cec5SDimitry Andric } Kind; 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric unsigned ID; 3340b57cec5SDimitry Andric getErrorBitstreamEntry3350b57cec5SDimitry Andric static BitstreamEntry getError() { 3360b57cec5SDimitry Andric BitstreamEntry E; E.Kind = Error; return E; 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric getEndBlockBitstreamEntry3390b57cec5SDimitry Andric static BitstreamEntry getEndBlock() { 3400b57cec5SDimitry Andric BitstreamEntry E; E.Kind = EndBlock; return E; 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric getSubBlockBitstreamEntry3430b57cec5SDimitry Andric static BitstreamEntry getSubBlock(unsigned ID) { 3440b57cec5SDimitry Andric BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; 3450b57cec5SDimitry Andric } 3460b57cec5SDimitry Andric getRecordBitstreamEntry3470b57cec5SDimitry Andric static BitstreamEntry getRecord(unsigned AbbrevID) { 3480b57cec5SDimitry Andric BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric }; 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric /// This represents a position within a bitcode file, implemented on top of a 3530b57cec5SDimitry Andric /// SimpleBitstreamCursor. 3540b57cec5SDimitry Andric /// 3550b57cec5SDimitry Andric /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not 3560b57cec5SDimitry Andric /// be passed by value. 3570b57cec5SDimitry Andric class BitstreamCursor : SimpleBitstreamCursor { 3580b57cec5SDimitry Andric // This is the declared size of code values used for the current block, in 3590b57cec5SDimitry Andric // bits. 3600b57cec5SDimitry Andric unsigned CurCodeSize = 2; 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric /// Abbrevs installed at in this block. 3630b57cec5SDimitry Andric std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs; 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric struct Block { 3660b57cec5SDimitry Andric unsigned PrevCodeSize; 3670b57cec5SDimitry Andric std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs; 3680b57cec5SDimitry Andric BlockBlock3690b57cec5SDimitry Andric explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} 3700b57cec5SDimitry Andric }; 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric /// This tracks the codesize of parent blocks. 3730b57cec5SDimitry Andric SmallVector<Block, 8> BlockScope; 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric BitstreamBlockInfo *BlockInfo = nullptr; 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric public: 37881ad6265SDimitry Andric static const size_t MaxChunkSize = 32; 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric BitstreamCursor() = default; BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)3810b57cec5SDimitry Andric explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) 3820b57cec5SDimitry Andric : SimpleBitstreamCursor(BitcodeBytes) {} BitstreamCursor(StringRef BitcodeBytes)3830b57cec5SDimitry Andric explicit BitstreamCursor(StringRef BitcodeBytes) 3840b57cec5SDimitry Andric : SimpleBitstreamCursor(BitcodeBytes) {} BitstreamCursor(MemoryBufferRef BitcodeBytes)3850b57cec5SDimitry Andric explicit BitstreamCursor(MemoryBufferRef BitcodeBytes) 3860b57cec5SDimitry Andric : SimpleBitstreamCursor(BitcodeBytes) {} 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric using SimpleBitstreamCursor::AtEndOfStream; 3890b57cec5SDimitry Andric using SimpleBitstreamCursor::canSkipToPos; 3900b57cec5SDimitry Andric using SimpleBitstreamCursor::fillCurWord; 3910b57cec5SDimitry Andric using SimpleBitstreamCursor::getBitcodeBytes; 3920b57cec5SDimitry Andric using SimpleBitstreamCursor::GetCurrentBitNo; 3930b57cec5SDimitry Andric using SimpleBitstreamCursor::getCurrentByteNo; 3940b57cec5SDimitry Andric using SimpleBitstreamCursor::getPointerToByte; 3950b57cec5SDimitry Andric using SimpleBitstreamCursor::JumpToBit; 3960b57cec5SDimitry Andric using SimpleBitstreamCursor::Read; 3970b57cec5SDimitry Andric using SimpleBitstreamCursor::ReadVBR; 3980b57cec5SDimitry Andric using SimpleBitstreamCursor::ReadVBR64; 3990b57cec5SDimitry Andric using SimpleBitstreamCursor::SizeInBytes; 4008bcb0991SDimitry Andric using SimpleBitstreamCursor::skipToEnd; 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric /// Return the number of bits used to encode an abbrev #. getAbbrevIDWidth()4030b57cec5SDimitry Andric unsigned getAbbrevIDWidth() const { return CurCodeSize; } 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric /// Flags that modify the behavior of advance(). 4060b57cec5SDimitry Andric enum { 4070b57cec5SDimitry Andric /// If this flag is used, the advance() method does not automatically pop 4080b57cec5SDimitry Andric /// the block scope when the end of a block is reached. 4090b57cec5SDimitry Andric AF_DontPopBlockAtEnd = 1, 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric /// If this flag is used, abbrev entries are returned just like normal 4120b57cec5SDimitry Andric /// records. 4130b57cec5SDimitry Andric AF_DontAutoprocessAbbrevs = 2 4140b57cec5SDimitry Andric }; 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric /// Advance the current bitstream, returning the next entry in the stream. 4170b57cec5SDimitry Andric Expected<BitstreamEntry> advance(unsigned Flags = 0) { 4180b57cec5SDimitry Andric while (true) { 4190b57cec5SDimitry Andric if (AtEndOfStream()) 4200b57cec5SDimitry Andric return BitstreamEntry::getError(); 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric Expected<unsigned> MaybeCode = ReadCode(); 4230b57cec5SDimitry Andric if (!MaybeCode) 4240b57cec5SDimitry Andric return MaybeCode.takeError(); 4250b57cec5SDimitry Andric unsigned Code = MaybeCode.get(); 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric if (Code == bitc::END_BLOCK) { 4280b57cec5SDimitry Andric // Pop the end of the block unless Flags tells us not to. 4290b57cec5SDimitry Andric if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) 4300b57cec5SDimitry Andric return BitstreamEntry::getError(); 4310b57cec5SDimitry Andric return BitstreamEntry::getEndBlock(); 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric if (Code == bitc::ENTER_SUBBLOCK) { 4350b57cec5SDimitry Andric if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID()) 4360b57cec5SDimitry Andric return BitstreamEntry::getSubBlock(MaybeSubBlock.get()); 4370b57cec5SDimitry Andric else 4380b57cec5SDimitry Andric return MaybeSubBlock.takeError(); 4390b57cec5SDimitry Andric } 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric if (Code == bitc::DEFINE_ABBREV && 4420b57cec5SDimitry Andric !(Flags & AF_DontAutoprocessAbbrevs)) { 4430b57cec5SDimitry Andric // We read and accumulate abbrev's, the client can't do anything with 4440b57cec5SDimitry Andric // them anyway. 4450b57cec5SDimitry Andric if (Error Err = ReadAbbrevRecord()) 4460b57cec5SDimitry Andric return std::move(Err); 4470b57cec5SDimitry Andric continue; 4480b57cec5SDimitry Andric } 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric return BitstreamEntry::getRecord(Code); 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric } 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric /// This is a convenience function for clients that don't expect any 4550b57cec5SDimitry Andric /// subblocks. This just skips over them automatically. 4560b57cec5SDimitry Andric Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) { 4570b57cec5SDimitry Andric while (true) { 4580b57cec5SDimitry Andric // If we found a normal entry, return it. 4590b57cec5SDimitry Andric Expected<BitstreamEntry> MaybeEntry = advance(Flags); 4600b57cec5SDimitry Andric if (!MaybeEntry) 4610b57cec5SDimitry Andric return MaybeEntry; 4620b57cec5SDimitry Andric BitstreamEntry Entry = MaybeEntry.get(); 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric if (Entry.Kind != BitstreamEntry::SubBlock) 4650b57cec5SDimitry Andric return Entry; 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric // If we found a sub-block, just skip over it and check the next entry. 4680b57cec5SDimitry Andric if (Error Err = SkipBlock()) 4690b57cec5SDimitry Andric return std::move(Err); 4700b57cec5SDimitry Andric } 4710b57cec5SDimitry Andric } 4720b57cec5SDimitry Andric ReadCode()4730b57cec5SDimitry Andric Expected<unsigned> ReadCode() { return Read(CurCodeSize); } 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric // Block header: 4760b57cec5SDimitry Andric // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. ReadSubBlockID()4790b57cec5SDimitry Andric Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); } 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body 4820b57cec5SDimitry Andric /// of this block. SkipBlock()4830b57cec5SDimitry Andric Error SkipBlock() { 4840b57cec5SDimitry Andric // Read and ignore the codelen value. 4850b57cec5SDimitry Andric if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth)) 4860b57cec5SDimitry Andric ; // Since we are skipping this block, we don't care what code widths are 4870b57cec5SDimitry Andric // used inside of it. 4880b57cec5SDimitry Andric else 4890b57cec5SDimitry Andric return Res.takeError(); 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric SkipToFourByteBoundary(); 4920b57cec5SDimitry Andric Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth); 4930b57cec5SDimitry Andric if (!MaybeNum) 4940b57cec5SDimitry Andric return MaybeNum.takeError(); 4950b57cec5SDimitry Andric size_t NumFourBytes = MaybeNum.get(); 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric // Check that the block wasn't partially defined, and that the offset isn't 4980b57cec5SDimitry Andric // bogus. 4990b57cec5SDimitry Andric size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8; 5000b57cec5SDimitry Andric if (AtEndOfStream()) 5010b57cec5SDimitry Andric return createStringError(std::errc::illegal_byte_sequence, 5020b57cec5SDimitry Andric "can't skip block: already at end of stream"); 5030b57cec5SDimitry Andric if (!canSkipToPos(SkipTo / 8)) 5040b57cec5SDimitry Andric return createStringError(std::errc::illegal_byte_sequence, 5050b57cec5SDimitry Andric "can't skip to bit %zu from %" PRIu64, SkipTo, 5060b57cec5SDimitry Andric GetCurrentBitNo()); 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric if (Error Res = JumpToBit(SkipTo)) 5090b57cec5SDimitry Andric return Res; 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric return Error::success(); 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block. 5150b57cec5SDimitry Andric Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); 5160b57cec5SDimitry Andric ReadBlockEnd()5170b57cec5SDimitry Andric bool ReadBlockEnd() { 5180b57cec5SDimitry Andric if (BlockScope.empty()) return true; 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric // Block tail: 5210b57cec5SDimitry Andric // [END_BLOCK, <align4bytes>] 5220b57cec5SDimitry Andric SkipToFourByteBoundary(); 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric popBlockScope(); 5250b57cec5SDimitry Andric return false; 5260b57cec5SDimitry Andric } 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric private: popBlockScope()5290b57cec5SDimitry Andric void popBlockScope() { 5300b57cec5SDimitry Andric CurCodeSize = BlockScope.back().PrevCodeSize; 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); 5330b57cec5SDimitry Andric BlockScope.pop_back(); 5340b57cec5SDimitry Andric } 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric //===--------------------------------------------------------------------===// 5370b57cec5SDimitry Andric // Record Processing 5380b57cec5SDimitry Andric //===--------------------------------------------------------------------===// 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric public: 5410b57cec5SDimitry Andric /// Return the abbreviation for the specified AbbrevId. getAbbrev(unsigned AbbrevID)54281ad6265SDimitry Andric Expected<const BitCodeAbbrev *> getAbbrev(unsigned AbbrevID) { 5430b57cec5SDimitry Andric unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; 5440b57cec5SDimitry Andric if (AbbrevNo >= CurAbbrevs.size()) 54581ad6265SDimitry Andric return createStringError( 54681ad6265SDimitry Andric std::errc::illegal_byte_sequence, "Invalid abbrev number"); 5470b57cec5SDimitry Andric return CurAbbrevs[AbbrevNo].get(); 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric /// Read the current record and discard it, returning the code for the record. 5510b57cec5SDimitry Andric Expected<unsigned> skipRecord(unsigned AbbrevID); 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric Expected<unsigned> readRecord(unsigned AbbrevID, 5540b57cec5SDimitry Andric SmallVectorImpl<uint64_t> &Vals, 5550b57cec5SDimitry Andric StringRef *Blob = nullptr); 5560b57cec5SDimitry Andric 5570b57cec5SDimitry Andric //===--------------------------------------------------------------------===// 5580b57cec5SDimitry Andric // Abbrev Processing 5590b57cec5SDimitry Andric //===--------------------------------------------------------------------===// 5600b57cec5SDimitry Andric Error ReadAbbrevRecord(); 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric /// Read and return a block info block from the bitstream. If an error was 563bdd1243dSDimitry Andric /// encountered, return std::nullopt. 5640b57cec5SDimitry Andric /// 5650b57cec5SDimitry Andric /// \param ReadBlockInfoNames Whether to read block/record name information in 5660b57cec5SDimitry Andric /// the BlockInfo block. Only llvm-bcanalyzer uses this. 567bdd1243dSDimitry Andric Expected<std::optional<BitstreamBlockInfo>> 5680b57cec5SDimitry Andric ReadBlockInfoBlock(bool ReadBlockInfoNames = false); 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric /// Set the block info to be used by this BitstreamCursor to interpret 5710b57cec5SDimitry Andric /// abbreviated records. setBlockInfo(BitstreamBlockInfo * BI)5720b57cec5SDimitry Andric void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; } 5730b57cec5SDimitry Andric }; 5740b57cec5SDimitry Andric 5750b57cec5SDimitry Andric } // end llvm namespace 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric #endif // LLVM_BITSTREAM_BITSTREAMREADER_H 578