xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/Bitstream/BitstreamReader.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This header defines the BitstreamReader class.  This class can be used to
100b57cec5SDimitry Andric // read an arbitrary bitstream, regardless of its contents.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #ifndef LLVM_BITSTREAM_BITSTREAMREADER_H
150b57cec5SDimitry Andric #define LLVM_BITSTREAM_BITSTREAMREADER_H
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
190b57cec5SDimitry Andric #include "llvm/Bitstream/BitCodes.h"
200b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
215ffd83dbSDimitry Andric #include "llvm/Support/Error.h"
221fd87a68SDimitry Andric #include "llvm/Support/MemoryBufferRef.h"
230b57cec5SDimitry Andric #include <algorithm>
240b57cec5SDimitry Andric #include <cassert>
250b57cec5SDimitry Andric #include <climits>
260b57cec5SDimitry Andric #include <cstddef>
270b57cec5SDimitry Andric #include <cstdint>
280b57cec5SDimitry Andric #include <memory>
29bdd1243dSDimitry Andric #include <optional>
300b57cec5SDimitry Andric #include <string>
310b57cec5SDimitry Andric #include <utility>
320b57cec5SDimitry Andric #include <vector>
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric namespace llvm {
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric /// This class maintains the abbreviations read from a block info block.
370b57cec5SDimitry Andric class BitstreamBlockInfo {
380b57cec5SDimitry Andric public:
390b57cec5SDimitry Andric   /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
400b57cec5SDimitry Andric   /// describe abbreviations that all blocks of the specified ID inherit.
410b57cec5SDimitry Andric   struct BlockInfo {
42480093f4SDimitry Andric     unsigned BlockID = 0;
430b57cec5SDimitry Andric     std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
440b57cec5SDimitry Andric     std::string Name;
450b57cec5SDimitry Andric     std::vector<std::pair<unsigned, std::string>> RecordNames;
460b57cec5SDimitry Andric   };
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric private:
490b57cec5SDimitry Andric   std::vector<BlockInfo> BlockInfoRecords;
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric public:
520b57cec5SDimitry Andric   /// If there is block info for the specified ID, return it, otherwise return
530b57cec5SDimitry Andric   /// null.
getBlockInfo(unsigned BlockID)540b57cec5SDimitry Andric   const BlockInfo *getBlockInfo(unsigned BlockID) const {
550b57cec5SDimitry Andric     // Common case, the most recent entry matches BlockID.
560b57cec5SDimitry Andric     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
570b57cec5SDimitry Andric       return &BlockInfoRecords.back();
580b57cec5SDimitry Andric 
59bdd1243dSDimitry Andric     for (const BlockInfo &BI : BlockInfoRecords)
60bdd1243dSDimitry Andric       if (BI.BlockID == BlockID)
61bdd1243dSDimitry Andric         return &BI;
620b57cec5SDimitry Andric     return nullptr;
630b57cec5SDimitry Andric   }
640b57cec5SDimitry Andric 
getOrCreateBlockInfo(unsigned BlockID)650b57cec5SDimitry Andric   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
660b57cec5SDimitry Andric     if (const BlockInfo *BI = getBlockInfo(BlockID))
670b57cec5SDimitry Andric       return *const_cast<BlockInfo*>(BI);
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric     // Otherwise, add a new record.
700b57cec5SDimitry Andric     BlockInfoRecords.emplace_back();
710b57cec5SDimitry Andric     BlockInfoRecords.back().BlockID = BlockID;
720b57cec5SDimitry Andric     return BlockInfoRecords.back();
730b57cec5SDimitry Andric   }
740b57cec5SDimitry Andric };
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric /// This represents a position within a bitstream. There may be multiple
770b57cec5SDimitry Andric /// independent cursors reading within one bitstream, each maintaining their
780b57cec5SDimitry Andric /// own local state.
790b57cec5SDimitry Andric class SimpleBitstreamCursor {
800b57cec5SDimitry Andric   ArrayRef<uint8_t> BitcodeBytes;
810b57cec5SDimitry Andric   size_t NextChar = 0;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric public:
840b57cec5SDimitry Andric   /// This is the current data we have pulled from the stream but have not
850b57cec5SDimitry Andric   /// returned to the client. This is specifically and intentionally defined to
860b57cec5SDimitry Andric   /// follow the word size of the host machine for efficiency. We use word_t in
870b57cec5SDimitry Andric   /// places that are aware of this to make it perfectly explicit what is going
880b57cec5SDimitry Andric   /// on.
890b57cec5SDimitry Andric   using word_t = size_t;
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric private:
920b57cec5SDimitry Andric   word_t CurWord = 0;
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric   /// This is the number of bits in CurWord that are valid. This is always from
950b57cec5SDimitry Andric   /// [0...bits_of(size_t)-1] inclusive.
960b57cec5SDimitry Andric   unsigned BitsInCurWord = 0;
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric public:
990b57cec5SDimitry Andric   SimpleBitstreamCursor() = default;
SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)1000b57cec5SDimitry Andric   explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
1010b57cec5SDimitry Andric       : BitcodeBytes(BitcodeBytes) {}
SimpleBitstreamCursor(StringRef BitcodeBytes)1020b57cec5SDimitry Andric   explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
1030b57cec5SDimitry Andric       : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {}
SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)1040b57cec5SDimitry Andric   explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
1050b57cec5SDimitry Andric       : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
1060b57cec5SDimitry Andric 
canSkipToPos(size_t pos)1070b57cec5SDimitry Andric   bool canSkipToPos(size_t pos) const {
1080b57cec5SDimitry Andric     // pos can be skipped to if it is a valid address or one byte past the end.
1090b57cec5SDimitry Andric     return pos <= BitcodeBytes.size();
1100b57cec5SDimitry Andric   }
1110b57cec5SDimitry Andric 
AtEndOfStream()1120b57cec5SDimitry Andric   bool AtEndOfStream() {
1130b57cec5SDimitry Andric     return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
1140b57cec5SDimitry Andric   }
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric   /// Return the bit # of the bit we are reading.
GetCurrentBitNo()1170b57cec5SDimitry Andric   uint64_t GetCurrentBitNo() const {
118*5f757f3fSDimitry Andric     return uint64_t(NextChar)*CHAR_BIT - BitsInCurWord;
1190b57cec5SDimitry Andric   }
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric   // Return the byte # of the current bit.
getCurrentByteNo()1220b57cec5SDimitry Andric   uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
1230b57cec5SDimitry Andric 
getBitcodeBytes()1240b57cec5SDimitry Andric   ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric   /// Reset the stream to the specified bit number.
JumpToBit(uint64_t BitNo)1270b57cec5SDimitry Andric   Error JumpToBit(uint64_t BitNo) {
1280b57cec5SDimitry Andric     size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
1290b57cec5SDimitry Andric     unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
1300b57cec5SDimitry Andric     assert(canSkipToPos(ByteNo) && "Invalid location");
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric     // Move the cursor to the right word.
1330b57cec5SDimitry Andric     NextChar = ByteNo;
1340b57cec5SDimitry Andric     BitsInCurWord = 0;
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric     // Skip over any bits that are already consumed.
1370b57cec5SDimitry Andric     if (WordBitNo) {
1380b57cec5SDimitry Andric       if (Expected<word_t> Res = Read(WordBitNo))
1390b57cec5SDimitry Andric         return Error::success();
1400b57cec5SDimitry Andric       else
1410b57cec5SDimitry Andric         return Res.takeError();
1420b57cec5SDimitry Andric     }
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric     return Error::success();
1450b57cec5SDimitry Andric   }
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric   /// Get a pointer into the bitstream at the specified byte offset.
getPointerToByte(uint64_t ByteNo,uint64_t NumBytes)1480b57cec5SDimitry Andric   const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
1490b57cec5SDimitry Andric     return BitcodeBytes.data() + ByteNo;
1500b57cec5SDimitry Andric   }
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric   /// Get a pointer into the bitstream at the specified bit offset.
1530b57cec5SDimitry Andric   ///
1540b57cec5SDimitry Andric   /// The bit offset must be on a byte boundary.
getPointerToBit(uint64_t BitNo,uint64_t NumBytes)1550b57cec5SDimitry Andric   const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
1560b57cec5SDimitry Andric     assert(!(BitNo % 8) && "Expected bit on byte boundary");
1570b57cec5SDimitry Andric     return getPointerToByte(BitNo / 8, NumBytes);
1580b57cec5SDimitry Andric   }
1590b57cec5SDimitry Andric 
fillCurWord()1600b57cec5SDimitry Andric   Error fillCurWord() {
1610b57cec5SDimitry Andric     if (NextChar >= BitcodeBytes.size())
1620b57cec5SDimitry Andric       return createStringError(std::errc::io_error,
1630b57cec5SDimitry Andric                                "Unexpected end of file reading %u of %u bytes",
1640b57cec5SDimitry Andric                                NextChar, BitcodeBytes.size());
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric     // Read the next word from the stream.
1670b57cec5SDimitry Andric     const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
1680b57cec5SDimitry Andric     unsigned BytesRead;
1690b57cec5SDimitry Andric     if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
1700b57cec5SDimitry Andric       BytesRead = sizeof(word_t);
1710b57cec5SDimitry Andric       CurWord =
172*5f757f3fSDimitry Andric           support::endian::read<word_t, llvm::endianness::little>(NextCharPtr);
1730b57cec5SDimitry Andric     } else {
1740b57cec5SDimitry Andric       // Short read.
1750b57cec5SDimitry Andric       BytesRead = BitcodeBytes.size() - NextChar;
1760b57cec5SDimitry Andric       CurWord = 0;
1770b57cec5SDimitry Andric       for (unsigned B = 0; B != BytesRead; ++B)
1780b57cec5SDimitry Andric         CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
1790b57cec5SDimitry Andric     }
1800b57cec5SDimitry Andric     NextChar += BytesRead;
1810b57cec5SDimitry Andric     BitsInCurWord = BytesRead * 8;
1820b57cec5SDimitry Andric     return Error::success();
1830b57cec5SDimitry Andric   }
1840b57cec5SDimitry Andric 
Read(unsigned NumBits)1850b57cec5SDimitry Andric   Expected<word_t> Read(unsigned NumBits) {
18681ad6265SDimitry Andric     static const unsigned BitsInWord = sizeof(word_t) * 8;
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric     assert(NumBits && NumBits <= BitsInWord &&
1890b57cec5SDimitry Andric            "Cannot return zero or more than BitsInWord bits!");
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric     static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric     // If the field is fully contained by CurWord, return it quickly.
1940b57cec5SDimitry Andric     if (BitsInCurWord >= NumBits) {
1950b57cec5SDimitry Andric       word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric       // Use a mask to avoid undefined behavior.
1980b57cec5SDimitry Andric       CurWord >>= (NumBits & Mask);
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric       BitsInCurWord -= NumBits;
2010b57cec5SDimitry Andric       return R;
2020b57cec5SDimitry Andric     }
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric     word_t R = BitsInCurWord ? CurWord : 0;
2050b57cec5SDimitry Andric     unsigned BitsLeft = NumBits - BitsInCurWord;
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric     if (Error fillResult = fillCurWord())
2080b57cec5SDimitry Andric       return std::move(fillResult);
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric     // If we run out of data, abort.
2110b57cec5SDimitry Andric     if (BitsLeft > BitsInCurWord)
2120b57cec5SDimitry Andric       return createStringError(std::errc::io_error,
2130b57cec5SDimitry Andric                                "Unexpected end of file reading %u of %u bits",
2140b57cec5SDimitry Andric                                BitsInCurWord, BitsLeft);
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric     word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric     // Use a mask to avoid undefined behavior.
2190b57cec5SDimitry Andric     CurWord >>= (BitsLeft & Mask);
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric     BitsInCurWord -= BitsLeft;
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric     R |= R2 << (NumBits - BitsLeft);
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric     return R;
2260b57cec5SDimitry Andric   }
2270b57cec5SDimitry Andric 
ReadVBR(const unsigned NumBits)22881ad6265SDimitry Andric   Expected<uint32_t> ReadVBR(const unsigned NumBits) {
2290b57cec5SDimitry Andric     Expected<unsigned> MaybeRead = Read(NumBits);
2300b57cec5SDimitry Andric     if (!MaybeRead)
2310b57cec5SDimitry Andric       return MaybeRead;
2320b57cec5SDimitry Andric     uint32_t Piece = MaybeRead.get();
2330b57cec5SDimitry Andric 
23481ad6265SDimitry Andric     assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value");
23581ad6265SDimitry Andric     const uint32_t MaskBitOrder = (NumBits - 1);
23681ad6265SDimitry Andric     const uint32_t Mask = 1UL << MaskBitOrder;
23781ad6265SDimitry Andric 
23881ad6265SDimitry Andric     if ((Piece & Mask) == 0)
2390b57cec5SDimitry Andric       return Piece;
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric     uint32_t Result = 0;
2420b57cec5SDimitry Andric     unsigned NextBit = 0;
2430b57cec5SDimitry Andric     while (true) {
24481ad6265SDimitry Andric       Result |= (Piece & (Mask - 1)) << NextBit;
2450b57cec5SDimitry Andric 
24681ad6265SDimitry Andric       if ((Piece & Mask) == 0)
2470b57cec5SDimitry Andric         return Result;
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric       NextBit += NumBits-1;
25081ad6265SDimitry Andric       if (NextBit >= 32)
25181ad6265SDimitry Andric         return createStringError(std::errc::illegal_byte_sequence,
25281ad6265SDimitry Andric                                  "Unterminated VBR");
25381ad6265SDimitry Andric 
2540b57cec5SDimitry Andric       MaybeRead = Read(NumBits);
2550b57cec5SDimitry Andric       if (!MaybeRead)
2560b57cec5SDimitry Andric         return MaybeRead;
2570b57cec5SDimitry Andric       Piece = MaybeRead.get();
2580b57cec5SDimitry Andric     }
2590b57cec5SDimitry Andric   }
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric   // Read a VBR that may have a value up to 64-bits in size. The chunk size of
2620b57cec5SDimitry Andric   // the VBR must still be <= 32 bits though.
ReadVBR64(const unsigned NumBits)26381ad6265SDimitry Andric   Expected<uint64_t> ReadVBR64(const unsigned NumBits) {
2640b57cec5SDimitry Andric     Expected<uint64_t> MaybeRead = Read(NumBits);
2650b57cec5SDimitry Andric     if (!MaybeRead)
2660b57cec5SDimitry Andric       return MaybeRead;
2670b57cec5SDimitry Andric     uint32_t Piece = MaybeRead.get();
26881ad6265SDimitry Andric     assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value");
26981ad6265SDimitry Andric     const uint32_t MaskBitOrder = (NumBits - 1);
27081ad6265SDimitry Andric     const uint32_t Mask = 1UL << MaskBitOrder;
2710b57cec5SDimitry Andric 
27281ad6265SDimitry Andric     if ((Piece & Mask) == 0)
2730b57cec5SDimitry Andric       return uint64_t(Piece);
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric     uint64_t Result = 0;
2760b57cec5SDimitry Andric     unsigned NextBit = 0;
2770b57cec5SDimitry Andric     while (true) {
27881ad6265SDimitry Andric       Result |= uint64_t(Piece & (Mask - 1)) << NextBit;
2790b57cec5SDimitry Andric 
28081ad6265SDimitry Andric       if ((Piece & Mask) == 0)
2810b57cec5SDimitry Andric         return Result;
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric       NextBit += NumBits-1;
28481ad6265SDimitry Andric       if (NextBit >= 64)
28581ad6265SDimitry Andric         return createStringError(std::errc::illegal_byte_sequence,
28681ad6265SDimitry Andric                                  "Unterminated VBR");
28781ad6265SDimitry Andric 
2880b57cec5SDimitry Andric       MaybeRead = Read(NumBits);
2890b57cec5SDimitry Andric       if (!MaybeRead)
2900b57cec5SDimitry Andric         return MaybeRead;
2910b57cec5SDimitry Andric       Piece = MaybeRead.get();
2920b57cec5SDimitry Andric     }
2930b57cec5SDimitry Andric   }
2940b57cec5SDimitry Andric 
SkipToFourByteBoundary()2950b57cec5SDimitry Andric   void SkipToFourByteBoundary() {
2960b57cec5SDimitry Andric     // If word_t is 64-bits and if we've read less than 32 bits, just dump
2970b57cec5SDimitry Andric     // the bits we have up to the next 32-bit boundary.
2980b57cec5SDimitry Andric     if (sizeof(word_t) > 4 &&
2990b57cec5SDimitry Andric         BitsInCurWord >= 32) {
3000b57cec5SDimitry Andric       CurWord >>= BitsInCurWord-32;
3010b57cec5SDimitry Andric       BitsInCurWord = 32;
3020b57cec5SDimitry Andric       return;
3030b57cec5SDimitry Andric     }
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric     BitsInCurWord = 0;
3060b57cec5SDimitry Andric   }
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric   /// Return the size of the stream in bytes.
SizeInBytes()3090b57cec5SDimitry Andric   size_t SizeInBytes() const { return BitcodeBytes.size(); }
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   /// Skip to the end of the file.
skipToEnd()3120b57cec5SDimitry Andric   void skipToEnd() { NextChar = BitcodeBytes.size(); }
31381ad6265SDimitry Andric 
31481ad6265SDimitry Andric   /// Check whether a reservation of Size elements is plausible.
isSizePlausible(size_t Size)31581ad6265SDimitry Andric   bool isSizePlausible(size_t Size) const {
31681ad6265SDimitry Andric     // Don't allow reserving more elements than the number of bits, assuming
31781ad6265SDimitry Andric     // at least one bit is needed to encode an element.
31881ad6265SDimitry Andric     return Size < BitcodeBytes.size() * 8;
31981ad6265SDimitry Andric   }
3200b57cec5SDimitry Andric };
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric /// When advancing through a bitstream cursor, each advance can discover a few
3230b57cec5SDimitry Andric /// different kinds of entries:
3240b57cec5SDimitry Andric struct BitstreamEntry {
3250b57cec5SDimitry Andric   enum {
3260b57cec5SDimitry Andric     Error,    // Malformed bitcode was found.
3270b57cec5SDimitry Andric     EndBlock, // We've reached the end of the current block, (or the end of the
3280b57cec5SDimitry Andric               // file, which is treated like a series of EndBlock records.
3290b57cec5SDimitry Andric     SubBlock, // This is the start of a new subblock of a specific ID.
3300b57cec5SDimitry Andric     Record    // This is a record with a specific AbbrevID.
3310b57cec5SDimitry Andric   } Kind;
3320b57cec5SDimitry Andric 
3330b57cec5SDimitry Andric   unsigned ID;
3340b57cec5SDimitry Andric 
getErrorBitstreamEntry3350b57cec5SDimitry Andric   static BitstreamEntry getError() {
3360b57cec5SDimitry Andric     BitstreamEntry E; E.Kind = Error; return E;
3370b57cec5SDimitry Andric   }
3380b57cec5SDimitry Andric 
getEndBlockBitstreamEntry3390b57cec5SDimitry Andric   static BitstreamEntry getEndBlock() {
3400b57cec5SDimitry Andric     BitstreamEntry E; E.Kind = EndBlock; return E;
3410b57cec5SDimitry Andric   }
3420b57cec5SDimitry Andric 
getSubBlockBitstreamEntry3430b57cec5SDimitry Andric   static BitstreamEntry getSubBlock(unsigned ID) {
3440b57cec5SDimitry Andric     BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
3450b57cec5SDimitry Andric   }
3460b57cec5SDimitry Andric 
getRecordBitstreamEntry3470b57cec5SDimitry Andric   static BitstreamEntry getRecord(unsigned AbbrevID) {
3480b57cec5SDimitry Andric     BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
3490b57cec5SDimitry Andric   }
3500b57cec5SDimitry Andric };
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric /// This represents a position within a bitcode file, implemented on top of a
3530b57cec5SDimitry Andric /// SimpleBitstreamCursor.
3540b57cec5SDimitry Andric ///
3550b57cec5SDimitry Andric /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
3560b57cec5SDimitry Andric /// be passed by value.
3570b57cec5SDimitry Andric class BitstreamCursor : SimpleBitstreamCursor {
3580b57cec5SDimitry Andric   // This is the declared size of code values used for the current block, in
3590b57cec5SDimitry Andric   // bits.
3600b57cec5SDimitry Andric   unsigned CurCodeSize = 2;
3610b57cec5SDimitry Andric 
3620b57cec5SDimitry Andric   /// Abbrevs installed at in this block.
3630b57cec5SDimitry Andric   std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric   struct Block {
3660b57cec5SDimitry Andric     unsigned PrevCodeSize;
3670b57cec5SDimitry Andric     std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
3680b57cec5SDimitry Andric 
BlockBlock3690b57cec5SDimitry Andric     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
3700b57cec5SDimitry Andric   };
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric   /// This tracks the codesize of parent blocks.
3730b57cec5SDimitry Andric   SmallVector<Block, 8> BlockScope;
3740b57cec5SDimitry Andric 
3750b57cec5SDimitry Andric   BitstreamBlockInfo *BlockInfo = nullptr;
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric public:
37881ad6265SDimitry Andric   static const size_t MaxChunkSize = 32;
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric   BitstreamCursor() = default;
BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)3810b57cec5SDimitry Andric   explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
3820b57cec5SDimitry Andric       : SimpleBitstreamCursor(BitcodeBytes) {}
BitstreamCursor(StringRef BitcodeBytes)3830b57cec5SDimitry Andric   explicit BitstreamCursor(StringRef BitcodeBytes)
3840b57cec5SDimitry Andric       : SimpleBitstreamCursor(BitcodeBytes) {}
BitstreamCursor(MemoryBufferRef BitcodeBytes)3850b57cec5SDimitry Andric   explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
3860b57cec5SDimitry Andric       : SimpleBitstreamCursor(BitcodeBytes) {}
3870b57cec5SDimitry Andric 
3880b57cec5SDimitry Andric   using SimpleBitstreamCursor::AtEndOfStream;
3890b57cec5SDimitry Andric   using SimpleBitstreamCursor::canSkipToPos;
3900b57cec5SDimitry Andric   using SimpleBitstreamCursor::fillCurWord;
3910b57cec5SDimitry Andric   using SimpleBitstreamCursor::getBitcodeBytes;
3920b57cec5SDimitry Andric   using SimpleBitstreamCursor::GetCurrentBitNo;
3930b57cec5SDimitry Andric   using SimpleBitstreamCursor::getCurrentByteNo;
3940b57cec5SDimitry Andric   using SimpleBitstreamCursor::getPointerToByte;
3950b57cec5SDimitry Andric   using SimpleBitstreamCursor::JumpToBit;
3960b57cec5SDimitry Andric   using SimpleBitstreamCursor::Read;
3970b57cec5SDimitry Andric   using SimpleBitstreamCursor::ReadVBR;
3980b57cec5SDimitry Andric   using SimpleBitstreamCursor::ReadVBR64;
3990b57cec5SDimitry Andric   using SimpleBitstreamCursor::SizeInBytes;
4008bcb0991SDimitry Andric   using SimpleBitstreamCursor::skipToEnd;
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric   /// Return the number of bits used to encode an abbrev #.
getAbbrevIDWidth()4030b57cec5SDimitry Andric   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric   /// Flags that modify the behavior of advance().
4060b57cec5SDimitry Andric   enum {
4070b57cec5SDimitry Andric     /// If this flag is used, the advance() method does not automatically pop
4080b57cec5SDimitry Andric     /// the block scope when the end of a block is reached.
4090b57cec5SDimitry Andric     AF_DontPopBlockAtEnd = 1,
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric     /// If this flag is used, abbrev entries are returned just like normal
4120b57cec5SDimitry Andric     /// records.
4130b57cec5SDimitry Andric     AF_DontAutoprocessAbbrevs = 2
4140b57cec5SDimitry Andric   };
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric   /// Advance the current bitstream, returning the next entry in the stream.
4170b57cec5SDimitry Andric   Expected<BitstreamEntry> advance(unsigned Flags = 0) {
4180b57cec5SDimitry Andric     while (true) {
4190b57cec5SDimitry Andric       if (AtEndOfStream())
4200b57cec5SDimitry Andric         return BitstreamEntry::getError();
4210b57cec5SDimitry Andric 
4220b57cec5SDimitry Andric       Expected<unsigned> MaybeCode = ReadCode();
4230b57cec5SDimitry Andric       if (!MaybeCode)
4240b57cec5SDimitry Andric         return MaybeCode.takeError();
4250b57cec5SDimitry Andric       unsigned Code = MaybeCode.get();
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric       if (Code == bitc::END_BLOCK) {
4280b57cec5SDimitry Andric         // Pop the end of the block unless Flags tells us not to.
4290b57cec5SDimitry Andric         if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
4300b57cec5SDimitry Andric           return BitstreamEntry::getError();
4310b57cec5SDimitry Andric         return BitstreamEntry::getEndBlock();
4320b57cec5SDimitry Andric       }
4330b57cec5SDimitry Andric 
4340b57cec5SDimitry Andric       if (Code == bitc::ENTER_SUBBLOCK) {
4350b57cec5SDimitry Andric         if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID())
4360b57cec5SDimitry Andric           return BitstreamEntry::getSubBlock(MaybeSubBlock.get());
4370b57cec5SDimitry Andric         else
4380b57cec5SDimitry Andric           return MaybeSubBlock.takeError();
4390b57cec5SDimitry Andric       }
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric       if (Code == bitc::DEFINE_ABBREV &&
4420b57cec5SDimitry Andric           !(Flags & AF_DontAutoprocessAbbrevs)) {
4430b57cec5SDimitry Andric         // We read and accumulate abbrev's, the client can't do anything with
4440b57cec5SDimitry Andric         // them anyway.
4450b57cec5SDimitry Andric         if (Error Err = ReadAbbrevRecord())
4460b57cec5SDimitry Andric           return std::move(Err);
4470b57cec5SDimitry Andric         continue;
4480b57cec5SDimitry Andric       }
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric       return BitstreamEntry::getRecord(Code);
4510b57cec5SDimitry Andric     }
4520b57cec5SDimitry Andric   }
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   /// This is a convenience function for clients that don't expect any
4550b57cec5SDimitry Andric   /// subblocks. This just skips over them automatically.
4560b57cec5SDimitry Andric   Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) {
4570b57cec5SDimitry Andric     while (true) {
4580b57cec5SDimitry Andric       // If we found a normal entry, return it.
4590b57cec5SDimitry Andric       Expected<BitstreamEntry> MaybeEntry = advance(Flags);
4600b57cec5SDimitry Andric       if (!MaybeEntry)
4610b57cec5SDimitry Andric         return MaybeEntry;
4620b57cec5SDimitry Andric       BitstreamEntry Entry = MaybeEntry.get();
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric       if (Entry.Kind != BitstreamEntry::SubBlock)
4650b57cec5SDimitry Andric         return Entry;
4660b57cec5SDimitry Andric 
4670b57cec5SDimitry Andric       // If we found a sub-block, just skip over it and check the next entry.
4680b57cec5SDimitry Andric       if (Error Err = SkipBlock())
4690b57cec5SDimitry Andric         return std::move(Err);
4700b57cec5SDimitry Andric     }
4710b57cec5SDimitry Andric   }
4720b57cec5SDimitry Andric 
ReadCode()4730b57cec5SDimitry Andric   Expected<unsigned> ReadCode() { return Read(CurCodeSize); }
4740b57cec5SDimitry Andric 
4750b57cec5SDimitry Andric   // Block header:
4760b57cec5SDimitry Andric   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric   /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
ReadSubBlockID()4790b57cec5SDimitry Andric   Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); }
4800b57cec5SDimitry Andric 
4810b57cec5SDimitry Andric   /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
4820b57cec5SDimitry Andric   /// of this block.
SkipBlock()4830b57cec5SDimitry Andric   Error SkipBlock() {
4840b57cec5SDimitry Andric     // Read and ignore the codelen value.
4850b57cec5SDimitry Andric     if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth))
4860b57cec5SDimitry Andric       ; // Since we are skipping this block, we don't care what code widths are
4870b57cec5SDimitry Andric         // used inside of it.
4880b57cec5SDimitry Andric     else
4890b57cec5SDimitry Andric       return Res.takeError();
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric     SkipToFourByteBoundary();
4920b57cec5SDimitry Andric     Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth);
4930b57cec5SDimitry Andric     if (!MaybeNum)
4940b57cec5SDimitry Andric       return MaybeNum.takeError();
4950b57cec5SDimitry Andric     size_t NumFourBytes = MaybeNum.get();
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric     // Check that the block wasn't partially defined, and that the offset isn't
4980b57cec5SDimitry Andric     // bogus.
4990b57cec5SDimitry Andric     size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8;
5000b57cec5SDimitry Andric     if (AtEndOfStream())
5010b57cec5SDimitry Andric       return createStringError(std::errc::illegal_byte_sequence,
5020b57cec5SDimitry Andric                                "can't skip block: already at end of stream");
5030b57cec5SDimitry Andric     if (!canSkipToPos(SkipTo / 8))
5040b57cec5SDimitry Andric       return createStringError(std::errc::illegal_byte_sequence,
5050b57cec5SDimitry Andric                                "can't skip to bit %zu from %" PRIu64, SkipTo,
5060b57cec5SDimitry Andric                                GetCurrentBitNo());
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric     if (Error Res = JumpToBit(SkipTo))
5090b57cec5SDimitry Andric       return Res;
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric     return Error::success();
5120b57cec5SDimitry Andric   }
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric   /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
5150b57cec5SDimitry Andric   Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
5160b57cec5SDimitry Andric 
ReadBlockEnd()5170b57cec5SDimitry Andric   bool ReadBlockEnd() {
5180b57cec5SDimitry Andric     if (BlockScope.empty()) return true;
5190b57cec5SDimitry Andric 
5200b57cec5SDimitry Andric     // Block tail:
5210b57cec5SDimitry Andric     //    [END_BLOCK, <align4bytes>]
5220b57cec5SDimitry Andric     SkipToFourByteBoundary();
5230b57cec5SDimitry Andric 
5240b57cec5SDimitry Andric     popBlockScope();
5250b57cec5SDimitry Andric     return false;
5260b57cec5SDimitry Andric   }
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric private:
popBlockScope()5290b57cec5SDimitry Andric   void popBlockScope() {
5300b57cec5SDimitry Andric     CurCodeSize = BlockScope.back().PrevCodeSize;
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric     CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
5330b57cec5SDimitry Andric     BlockScope.pop_back();
5340b57cec5SDimitry Andric   }
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
5370b57cec5SDimitry Andric   // Record Processing
5380b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
5390b57cec5SDimitry Andric 
5400b57cec5SDimitry Andric public:
5410b57cec5SDimitry Andric   /// Return the abbreviation for the specified AbbrevId.
getAbbrev(unsigned AbbrevID)54281ad6265SDimitry Andric   Expected<const BitCodeAbbrev *> getAbbrev(unsigned AbbrevID) {
5430b57cec5SDimitry Andric     unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
5440b57cec5SDimitry Andric     if (AbbrevNo >= CurAbbrevs.size())
54581ad6265SDimitry Andric       return createStringError(
54681ad6265SDimitry Andric           std::errc::illegal_byte_sequence, "Invalid abbrev number");
5470b57cec5SDimitry Andric     return CurAbbrevs[AbbrevNo].get();
5480b57cec5SDimitry Andric   }
5490b57cec5SDimitry Andric 
5500b57cec5SDimitry Andric   /// Read the current record and discard it, returning the code for the record.
5510b57cec5SDimitry Andric   Expected<unsigned> skipRecord(unsigned AbbrevID);
5520b57cec5SDimitry Andric 
5530b57cec5SDimitry Andric   Expected<unsigned> readRecord(unsigned AbbrevID,
5540b57cec5SDimitry Andric                                 SmallVectorImpl<uint64_t> &Vals,
5550b57cec5SDimitry Andric                                 StringRef *Blob = nullptr);
5560b57cec5SDimitry Andric 
5570b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
5580b57cec5SDimitry Andric   // Abbrev Processing
5590b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
5600b57cec5SDimitry Andric   Error ReadAbbrevRecord();
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric   /// Read and return a block info block from the bitstream. If an error was
563bdd1243dSDimitry Andric   /// encountered, return std::nullopt.
5640b57cec5SDimitry Andric   ///
5650b57cec5SDimitry Andric   /// \param ReadBlockInfoNames Whether to read block/record name information in
5660b57cec5SDimitry Andric   /// the BlockInfo block. Only llvm-bcanalyzer uses this.
567bdd1243dSDimitry Andric   Expected<std::optional<BitstreamBlockInfo>>
5680b57cec5SDimitry Andric   ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
5690b57cec5SDimitry Andric 
5700b57cec5SDimitry Andric   /// Set the block info to be used by this BitstreamCursor to interpret
5710b57cec5SDimitry Andric   /// abbreviated records.
setBlockInfo(BitstreamBlockInfo * BI)5720b57cec5SDimitry Andric   void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
5730b57cec5SDimitry Andric };
5740b57cec5SDimitry Andric 
5750b57cec5SDimitry Andric } // end llvm namespace
5760b57cec5SDimitry Andric 
5770b57cec5SDimitry Andric #endif // LLVM_BITSTREAM_BITSTREAMREADER_H
578