10b57cec5SDimitry Andric //===- BinaryStreamReader.h - Reads objects from a binary stream *- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #ifndef LLVM_SUPPORT_BINARYSTREAMREADER_H 100b57cec5SDimitry Andric #define LLVM_SUPPORT_BINARYSTREAMREADER_H 110b57cec5SDimitry Andric 120b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 13fe6060f1SDimitry Andric #include "llvm/ADT/StringRef.h" 145ffd83dbSDimitry Andric #include "llvm/Support/Alignment.h" 150b57cec5SDimitry Andric #include "llvm/Support/BinaryStreamArray.h" 160b57cec5SDimitry Andric #include "llvm/Support/BinaryStreamRef.h" 170b57cec5SDimitry Andric #include "llvm/Support/ConvertUTF.h" 180b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 190b57cec5SDimitry Andric #include "llvm/Support/Error.h" 200b57cec5SDimitry Andric #include <type_traits> 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric namespace llvm { 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric /// Provides read only access to a subclass of `BinaryStream`. Provides 250b57cec5SDimitry Andric /// bounds checking and helpers for writing certain common data types such as 260b57cec5SDimitry Andric /// null-terminated strings, integers in various flavors of endianness, etc. 270b57cec5SDimitry Andric /// Can be subclassed to provide reading of custom datatypes, although no 280b57cec5SDimitry Andric /// are overridable. 290b57cec5SDimitry Andric class BinaryStreamReader { 300b57cec5SDimitry Andric public: 310b57cec5SDimitry Andric BinaryStreamReader() = default; 320b57cec5SDimitry Andric explicit BinaryStreamReader(BinaryStreamRef Ref); 330b57cec5SDimitry Andric explicit BinaryStreamReader(BinaryStream &Stream); 34*5f757f3fSDimitry Andric explicit BinaryStreamReader(ArrayRef<uint8_t> Data, llvm::endianness Endian); 35*5f757f3fSDimitry Andric explicit BinaryStreamReader(StringRef Data, llvm::endianness Endian); 360b57cec5SDimitry Andric 371fd87a68SDimitry Andric BinaryStreamReader(const BinaryStreamReader &Other) = default; 380b57cec5SDimitry Andric 391fd87a68SDimitry Andric BinaryStreamReader &operator=(const BinaryStreamReader &Other) = default; 400b57cec5SDimitry Andric 411fd87a68SDimitry Andric virtual ~BinaryStreamReader() = default; 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric /// Read as much as possible from the underlying string at the current offset 440b57cec5SDimitry Andric /// without invoking a copy, and set \p Buffer to the resulting data slice. 450b57cec5SDimitry Andric /// Updates the stream's offset to point after the newly read data. 460b57cec5SDimitry Andric /// 470b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 480b57cec5SDimitry Andric /// returns an appropriate error code. 490b57cec5SDimitry Andric Error readLongestContiguousChunk(ArrayRef<uint8_t> &Buffer); 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric /// Read \p Size bytes from the underlying stream at the current offset and 520b57cec5SDimitry Andric /// and set \p Buffer to the resulting data slice. Whether a copy occurs 530b57cec5SDimitry Andric /// depends on the implementation of the underlying stream. Updates the 540b57cec5SDimitry Andric /// stream's offset to point after the newly read data. 550b57cec5SDimitry Andric /// 560b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 570b57cec5SDimitry Andric /// returns an appropriate error code. 580b57cec5SDimitry Andric Error readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size); 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric /// Read an integer of the specified endianness into \p Dest and update the 610b57cec5SDimitry Andric /// stream's offset. The data is always copied from the stream's underlying 620b57cec5SDimitry Andric /// buffer into \p Dest. Updates the stream's offset to point after the newly 630b57cec5SDimitry Andric /// read data. 640b57cec5SDimitry Andric /// 650b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 660b57cec5SDimitry Andric /// returns an appropriate error code. readInteger(T & Dest)670b57cec5SDimitry Andric template <typename T> Error readInteger(T &Dest) { 68bdd1243dSDimitry Andric static_assert(std::is_integral_v<T>, 690b57cec5SDimitry Andric "Cannot call readInteger with non-integral value!"); 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes; 720b57cec5SDimitry Andric if (auto EC = readBytes(Bytes, sizeof(T))) 730b57cec5SDimitry Andric return EC; 740b57cec5SDimitry Andric 75*5f757f3fSDimitry Andric Dest = llvm::support::endian::read<T>(Bytes.data(), Stream.getEndian()); 760b57cec5SDimitry Andric return Error::success(); 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric /// Similar to readInteger. readEnum(T & Dest)800b57cec5SDimitry Andric template <typename T> Error readEnum(T &Dest) { 810b57cec5SDimitry Andric static_assert(std::is_enum<T>::value, 820b57cec5SDimitry Andric "Cannot call readEnum with non-enum value!"); 835ffd83dbSDimitry Andric std::underlying_type_t<T> N; 840b57cec5SDimitry Andric if (auto EC = readInteger(N)) 850b57cec5SDimitry Andric return EC; 860b57cec5SDimitry Andric Dest = static_cast<T>(N); 870b57cec5SDimitry Andric return Error::success(); 880b57cec5SDimitry Andric } 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric /// Read an unsigned LEB128 encoded value. 910b57cec5SDimitry Andric /// 920b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 930b57cec5SDimitry Andric /// returns an appropriate error code. 940b57cec5SDimitry Andric Error readULEB128(uint64_t &Dest); 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric /// Read a signed LEB128 encoded value. 970b57cec5SDimitry Andric /// 980b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 990b57cec5SDimitry Andric /// returns an appropriate error code. 1000b57cec5SDimitry Andric Error readSLEB128(int64_t &Dest); 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric /// Read a null terminated string from \p Dest. Whether a copy occurs depends 1030b57cec5SDimitry Andric /// on the implementation of the underlying stream. Updates the stream's 1040b57cec5SDimitry Andric /// offset to point after the newly read data. 1050b57cec5SDimitry Andric /// 1060b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 1070b57cec5SDimitry Andric /// returns an appropriate error code. 1080b57cec5SDimitry Andric Error readCString(StringRef &Dest); 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric /// Similar to readCString, however read a null-terminated UTF16 string 1110b57cec5SDimitry Andric /// instead. 1120b57cec5SDimitry Andric /// 1130b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 1140b57cec5SDimitry Andric /// returns an appropriate error code. 1150b57cec5SDimitry Andric Error readWideString(ArrayRef<UTF16> &Dest); 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric /// Read a \p Length byte string into \p Dest. Whether a copy occurs depends 1180b57cec5SDimitry Andric /// on the implementation of the underlying stream. Updates the stream's 1190b57cec5SDimitry Andric /// offset to point after the newly read data. 1200b57cec5SDimitry Andric /// 1210b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 1220b57cec5SDimitry Andric /// returns an appropriate error code. 1230b57cec5SDimitry Andric Error readFixedString(StringRef &Dest, uint32_t Length); 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric /// Read the entire remainder of the underlying stream into \p Ref. This is 1260b57cec5SDimitry Andric /// equivalent to calling getUnderlyingStream().slice(Offset). Updates the 1270b57cec5SDimitry Andric /// stream's offset to point to the end of the stream. Never causes a copy. 1280b57cec5SDimitry Andric /// 1290b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 1300b57cec5SDimitry Andric /// returns an appropriate error code. 1310b57cec5SDimitry Andric Error readStreamRef(BinaryStreamRef &Ref); 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric /// Read \p Length bytes from the underlying stream into \p Ref. This is 1340b57cec5SDimitry Andric /// equivalent to calling getUnderlyingStream().slice(Offset, Length). 1350b57cec5SDimitry Andric /// Updates the stream's offset to point after the newly read object. Never 1360b57cec5SDimitry Andric /// causes a copy. 1370b57cec5SDimitry Andric /// 1380b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 1390b57cec5SDimitry Andric /// returns an appropriate error code. 1400b57cec5SDimitry Andric Error readStreamRef(BinaryStreamRef &Ref, uint32_t Length); 1410b57cec5SDimitry Andric 142480093f4SDimitry Andric /// Read \p Length bytes from the underlying stream into \p Ref. This is 1430b57cec5SDimitry Andric /// equivalent to calling getUnderlyingStream().slice(Offset, Length). 1440b57cec5SDimitry Andric /// Updates the stream's offset to point after the newly read object. Never 1450b57cec5SDimitry Andric /// causes a copy. 1460b57cec5SDimitry Andric /// 1470b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 1480b57cec5SDimitry Andric /// returns an appropriate error code. 149480093f4SDimitry Andric Error readSubstream(BinarySubstreamRef &Ref, uint32_t Length); 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric /// Get a pointer to an object of type T from the underlying stream, as if by 1520b57cec5SDimitry Andric /// memcpy, and store the result into \p Dest. It is up to the caller to 1530b57cec5SDimitry Andric /// ensure that objects of type T can be safely treated in this manner. 1540b57cec5SDimitry Andric /// Updates the stream's offset to point after the newly read object. Whether 1550b57cec5SDimitry Andric /// a copy occurs depends upon the implementation of the underlying 1560b57cec5SDimitry Andric /// stream. 1570b57cec5SDimitry Andric /// 1580b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 1590b57cec5SDimitry Andric /// returns an appropriate error code. readObject(const T * & Dest)1600b57cec5SDimitry Andric template <typename T> Error readObject(const T *&Dest) { 1610b57cec5SDimitry Andric ArrayRef<uint8_t> Buffer; 1620b57cec5SDimitry Andric if (auto EC = readBytes(Buffer, sizeof(T))) 1630b57cec5SDimitry Andric return EC; 1640b57cec5SDimitry Andric Dest = reinterpret_cast<const T *>(Buffer.data()); 1650b57cec5SDimitry Andric return Error::success(); 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric /// Get a reference to a \p NumElements element array of objects of type T 1690b57cec5SDimitry Andric /// from the underlying stream as if by memcpy, and store the resulting array 1700b57cec5SDimitry Andric /// slice into \p array. It is up to the caller to ensure that objects of 1710b57cec5SDimitry Andric /// type T can be safely treated in this manner. Updates the stream's offset 1720b57cec5SDimitry Andric /// to point after the newly read object. Whether a copy occurs depends upon 1730b57cec5SDimitry Andric /// the implementation of the underlying stream. 1740b57cec5SDimitry Andric /// 1750b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 1760b57cec5SDimitry Andric /// returns an appropriate error code. 1770b57cec5SDimitry Andric template <typename T> readArray(ArrayRef<T> & Array,uint32_t NumElements)1780b57cec5SDimitry Andric Error readArray(ArrayRef<T> &Array, uint32_t NumElements) { 1790b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes; 1800b57cec5SDimitry Andric if (NumElements == 0) { 1810b57cec5SDimitry Andric Array = ArrayRef<T>(); 1820b57cec5SDimitry Andric return Error::success(); 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric if (NumElements > UINT32_MAX / sizeof(T)) 1860b57cec5SDimitry Andric return make_error<BinaryStreamError>( 1870b57cec5SDimitry Andric stream_error_code::invalid_array_size); 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric if (auto EC = readBytes(Bytes, NumElements * sizeof(T))) 1900b57cec5SDimitry Andric return EC; 1910b57cec5SDimitry Andric 1928bcb0991SDimitry Andric assert(isAddrAligned(Align::Of<T>(), Bytes.data()) && 1930b57cec5SDimitry Andric "Reading at invalid alignment!"); 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric Array = ArrayRef<T>(reinterpret_cast<const T *>(Bytes.data()), NumElements); 1960b57cec5SDimitry Andric return Error::success(); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /// Read a VarStreamArray of size \p Size bytes and store the result into 2000b57cec5SDimitry Andric /// \p Array. Updates the stream's offset to point after the newly read 2010b57cec5SDimitry Andric /// array. Never causes a copy (although iterating the elements of the 2020b57cec5SDimitry Andric /// VarStreamArray may, depending upon the implementation of the underlying 2030b57cec5SDimitry Andric /// stream). 2040b57cec5SDimitry Andric /// 2050b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 2060b57cec5SDimitry Andric /// returns an appropriate error code. 2070b57cec5SDimitry Andric template <typename T, typename U> 2080b57cec5SDimitry Andric Error readArray(VarStreamArray<T, U> &Array, uint32_t Size, 2090b57cec5SDimitry Andric uint32_t Skew = 0) { 2100b57cec5SDimitry Andric BinaryStreamRef S; 2110b57cec5SDimitry Andric if (auto EC = readStreamRef(S, Size)) 2120b57cec5SDimitry Andric return EC; 2130b57cec5SDimitry Andric Array.setUnderlyingStream(S, Skew); 2140b57cec5SDimitry Andric return Error::success(); 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric /// Read a FixedStreamArray of \p NumItems elements and store the result into 2180b57cec5SDimitry Andric /// \p Array. Updates the stream's offset to point after the newly read 2190b57cec5SDimitry Andric /// array. Never causes a copy (although iterating the elements of the 2200b57cec5SDimitry Andric /// FixedStreamArray may, depending upon the implementation of the underlying 2210b57cec5SDimitry Andric /// stream). 2220b57cec5SDimitry Andric /// 2230b57cec5SDimitry Andric /// \returns a success error code if the data was successfully read, otherwise 2240b57cec5SDimitry Andric /// returns an appropriate error code. 2250b57cec5SDimitry Andric template <typename T> readArray(FixedStreamArray<T> & Array,uint32_t NumItems)2260b57cec5SDimitry Andric Error readArray(FixedStreamArray<T> &Array, uint32_t NumItems) { 2270b57cec5SDimitry Andric if (NumItems == 0) { 2280b57cec5SDimitry Andric Array = FixedStreamArray<T>(); 2290b57cec5SDimitry Andric return Error::success(); 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric if (NumItems > UINT32_MAX / sizeof(T)) 2330b57cec5SDimitry Andric return make_error<BinaryStreamError>( 2340b57cec5SDimitry Andric stream_error_code::invalid_array_size); 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric BinaryStreamRef View; 2370b57cec5SDimitry Andric if (auto EC = readStreamRef(View, NumItems * sizeof(T))) 2380b57cec5SDimitry Andric return EC; 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric Array = FixedStreamArray<T>(View); 2410b57cec5SDimitry Andric return Error::success(); 2420b57cec5SDimitry Andric } 2430b57cec5SDimitry Andric empty()2440b57cec5SDimitry Andric bool empty() const { return bytesRemaining() == 0; } setOffset(uint64_t Off)245349cc55cSDimitry Andric void setOffset(uint64_t Off) { Offset = Off; } getOffset()246349cc55cSDimitry Andric uint64_t getOffset() const { return Offset; } getLength()247349cc55cSDimitry Andric uint64_t getLength() const { return Stream.getLength(); } bytesRemaining()248349cc55cSDimitry Andric uint64_t bytesRemaining() const { return getLength() - getOffset(); } 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric /// Advance the stream's offset by \p Amount bytes. 2510b57cec5SDimitry Andric /// 2520b57cec5SDimitry Andric /// \returns a success error code if at least \p Amount bytes remain in the 2530b57cec5SDimitry Andric /// stream, otherwise returns an appropriate error code. 254349cc55cSDimitry Andric Error skip(uint64_t Amount); 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric /// Examine the next byte of the underlying stream without advancing the 2570b57cec5SDimitry Andric /// stream's offset. If the stream is empty the behavior is undefined. 2580b57cec5SDimitry Andric /// 2590b57cec5SDimitry Andric /// \returns the next byte in the stream. 2600b57cec5SDimitry Andric uint8_t peek() const; 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric Error padToAlignment(uint32_t Align); 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric std::pair<BinaryStreamReader, BinaryStreamReader> 265349cc55cSDimitry Andric split(uint64_t Offset) const; 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric private: 2680b57cec5SDimitry Andric BinaryStreamRef Stream; 269349cc55cSDimitry Andric uint64_t Offset = 0; 2700b57cec5SDimitry Andric }; 2710b57cec5SDimitry Andric } // namespace llvm 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andric #endif // LLVM_SUPPORT_BINARYSTREAMREADER_H 274