xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/DataExtractor.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===-- DataExtractor.cpp -------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "llvm/Support/DataExtractor.h"
1006c3fb27SDimitry Andric #include "llvm/ADT/StringExtras.h"
118bcb0991SDimitry Andric #include "llvm/Support/Errc.h"
120b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
130b57cec5SDimitry Andric #include "llvm/Support/LEB128.h"
148bcb0991SDimitry Andric #include "llvm/Support/SwapByteOrder.h"
158bcb0991SDimitry Andric 
160b57cec5SDimitry Andric using namespace llvm;
170b57cec5SDimitry Andric 
prepareRead(uint64_t Offset,uint64_t Size,Error * E) const185ffd83dbSDimitry Andric bool DataExtractor::prepareRead(uint64_t Offset, uint64_t Size,
195ffd83dbSDimitry Andric                                 Error *E) const {
205ffd83dbSDimitry Andric   if (isValidOffsetForDataOfSize(Offset, Size))
215ffd83dbSDimitry Andric     return true;
225ffd83dbSDimitry Andric   if (E) {
235ffd83dbSDimitry Andric     if (Offset <= Data.size())
245ffd83dbSDimitry Andric       *E = createStringError(
255ffd83dbSDimitry Andric           errc::illegal_byte_sequence,
265ffd83dbSDimitry Andric           "unexpected end of data at offset 0x%zx while reading [0x%" PRIx64
275ffd83dbSDimitry Andric           ", 0x%" PRIx64 ")",
285ffd83dbSDimitry Andric           Data.size(), Offset, Offset + Size);
295ffd83dbSDimitry Andric     else
305ffd83dbSDimitry Andric       *E = createStringError(errc::invalid_argument,
315ffd83dbSDimitry Andric                              "offset 0x%" PRIx64
325ffd83dbSDimitry Andric                              " is beyond the end of data at 0x%zx",
335ffd83dbSDimitry Andric                              Offset, Data.size());
345ffd83dbSDimitry Andric   }
355ffd83dbSDimitry Andric   return false;
368bcb0991SDimitry Andric }
378bcb0991SDimitry Andric 
isError(Error * E)388bcb0991SDimitry Andric static bool isError(Error *E) { return E && *E; }
398bcb0991SDimitry Andric 
400b57cec5SDimitry Andric template <typename T>
getU(uint64_t * offset_ptr,Error * Err) const415ffd83dbSDimitry Andric T DataExtractor::getU(uint64_t *offset_ptr, Error *Err) const {
428bcb0991SDimitry Andric   ErrorAsOutParameter ErrAsOut(Err);
430b57cec5SDimitry Andric   T val = 0;
448bcb0991SDimitry Andric   if (isError(Err))
458bcb0991SDimitry Andric     return val;
468bcb0991SDimitry Andric 
478bcb0991SDimitry Andric   uint64_t offset = *offset_ptr;
485ffd83dbSDimitry Andric   if (!prepareRead(offset, sizeof(T), Err))
498bcb0991SDimitry Andric     return val;
505ffd83dbSDimitry Andric   std::memcpy(&val, &Data.data()[offset], sizeof(val));
515ffd83dbSDimitry Andric   if (sys::IsLittleEndianHost != IsLittleEndian)
520b57cec5SDimitry Andric     sys::swapByteOrder(val);
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric   // Advance the offset
550b57cec5SDimitry Andric   *offset_ptr += sizeof(val);
560b57cec5SDimitry Andric   return val;
570b57cec5SDimitry Andric }
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric template <typename T>
getUs(uint64_t * offset_ptr,T * dst,uint32_t count,Error * Err) const605ffd83dbSDimitry Andric T *DataExtractor::getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
615ffd83dbSDimitry Andric                         Error *Err) const {
628bcb0991SDimitry Andric   ErrorAsOutParameter ErrAsOut(Err);
638bcb0991SDimitry Andric   if (isError(Err))
648bcb0991SDimitry Andric     return nullptr;
650b57cec5SDimitry Andric 
668bcb0991SDimitry Andric   uint64_t offset = *offset_ptr;
678bcb0991SDimitry Andric 
685ffd83dbSDimitry Andric   if (!prepareRead(offset, sizeof(*dst) * count, Err))
698bcb0991SDimitry Andric     return nullptr;
700b57cec5SDimitry Andric   for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
710b57cec5SDimitry Andric        ++value_ptr, offset += sizeof(*dst))
725ffd83dbSDimitry Andric     *value_ptr = getU<T>(offset_ptr, Err);
730b57cec5SDimitry Andric   // Advance the offset
740b57cec5SDimitry Andric   *offset_ptr = offset;
750b57cec5SDimitry Andric   // Return a non-NULL pointer to the converted data as an indicator of
760b57cec5SDimitry Andric   // success
770b57cec5SDimitry Andric   return dst;
780b57cec5SDimitry Andric }
790b57cec5SDimitry Andric 
getU8(uint64_t * offset_ptr,llvm::Error * Err) const808bcb0991SDimitry Andric uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
815ffd83dbSDimitry Andric   return getU<uint8_t>(offset_ptr, Err);
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric 
getU8(uint64_t * offset_ptr,uint8_t * dst,uint32_t count) const845ffd83dbSDimitry Andric uint8_t *DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst,
855ffd83dbSDimitry Andric                               uint32_t count) const {
865ffd83dbSDimitry Andric   return getUs<uint8_t>(offset_ptr, dst, count, nullptr);
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric 
getU8(Cursor & C,uint8_t * Dst,uint32_t Count) const898bcb0991SDimitry Andric uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
905ffd83dbSDimitry Andric   return getUs<uint8_t>(&C.Offset, Dst, Count, &C.Err);
910b57cec5SDimitry Andric }
920b57cec5SDimitry Andric 
getU16(uint64_t * offset_ptr,llvm::Error * Err) const938bcb0991SDimitry Andric uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
945ffd83dbSDimitry Andric   return getU<uint16_t>(offset_ptr, Err);
958bcb0991SDimitry Andric }
968bcb0991SDimitry Andric 
getU16(uint64_t * offset_ptr,uint16_t * dst,uint32_t count) const978bcb0991SDimitry Andric uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
980b57cec5SDimitry Andric                                 uint32_t count) const {
995ffd83dbSDimitry Andric   return getUs<uint16_t>(offset_ptr, dst, count, nullptr);
1000b57cec5SDimitry Andric }
1010b57cec5SDimitry Andric 
getU24(uint64_t * OffsetPtr,Error * Err) const1025ffd83dbSDimitry Andric uint32_t DataExtractor::getU24(uint64_t *OffsetPtr, Error *Err) const {
1035ffd83dbSDimitry Andric   uint24_t ExtractedVal = getU<uint24_t>(OffsetPtr, Err);
1040b57cec5SDimitry Andric   // The 3 bytes are in the correct byte order for the host.
1050b57cec5SDimitry Andric   return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
1060b57cec5SDimitry Andric }
1070b57cec5SDimitry Andric 
getU32(uint64_t * offset_ptr,llvm::Error * Err) const1088bcb0991SDimitry Andric uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
1095ffd83dbSDimitry Andric   return getU<uint32_t>(offset_ptr, Err);
1100b57cec5SDimitry Andric }
1110b57cec5SDimitry Andric 
getU32(uint64_t * offset_ptr,uint32_t * dst,uint32_t count) const1128bcb0991SDimitry Andric uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
1130b57cec5SDimitry Andric                                 uint32_t count) const {
1145ffd83dbSDimitry Andric   return getUs<uint32_t>(offset_ptr, dst, count, nullptr);
1150b57cec5SDimitry Andric }
1160b57cec5SDimitry Andric 
getU64(uint64_t * offset_ptr,llvm::Error * Err) const1178bcb0991SDimitry Andric uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
1185ffd83dbSDimitry Andric   return getU<uint64_t>(offset_ptr, Err);
1190b57cec5SDimitry Andric }
1200b57cec5SDimitry Andric 
getU64(uint64_t * offset_ptr,uint64_t * dst,uint32_t count) const1218bcb0991SDimitry Andric uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
1220b57cec5SDimitry Andric                                 uint32_t count) const {
1235ffd83dbSDimitry Andric   return getUs<uint64_t>(offset_ptr, dst, count, nullptr);
1240b57cec5SDimitry Andric }
1250b57cec5SDimitry Andric 
getUnsigned(uint64_t * offset_ptr,uint32_t byte_size,llvm::Error * Err) const1268bcb0991SDimitry Andric uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
1278bcb0991SDimitry Andric                                     llvm::Error *Err) const {
1280b57cec5SDimitry Andric   switch (byte_size) {
1290b57cec5SDimitry Andric   case 1:
1308bcb0991SDimitry Andric     return getU8(offset_ptr, Err);
1310b57cec5SDimitry Andric   case 2:
1328bcb0991SDimitry Andric     return getU16(offset_ptr, Err);
1330b57cec5SDimitry Andric   case 4:
1348bcb0991SDimitry Andric     return getU32(offset_ptr, Err);
1350b57cec5SDimitry Andric   case 8:
1368bcb0991SDimitry Andric     return getU64(offset_ptr, Err);
1370b57cec5SDimitry Andric   }
1380b57cec5SDimitry Andric   llvm_unreachable("getUnsigned unhandled case!");
1390b57cec5SDimitry Andric }
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric int64_t
getSigned(uint64_t * offset_ptr,uint32_t byte_size) const1428bcb0991SDimitry Andric DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
1430b57cec5SDimitry Andric   switch (byte_size) {
1440b57cec5SDimitry Andric   case 1:
1450b57cec5SDimitry Andric     return (int8_t)getU8(offset_ptr);
1460b57cec5SDimitry Andric   case 2:
1470b57cec5SDimitry Andric     return (int16_t)getU16(offset_ptr);
1480b57cec5SDimitry Andric   case 4:
1490b57cec5SDimitry Andric     return (int32_t)getU32(offset_ptr);
1500b57cec5SDimitry Andric   case 8:
1510b57cec5SDimitry Andric     return (int64_t)getU64(offset_ptr);
1520b57cec5SDimitry Andric   }
1530b57cec5SDimitry Andric   llvm_unreachable("getSigned unhandled case!");
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric 
getCStrRef(uint64_t * OffsetPtr,Error * Err) const1565ffd83dbSDimitry Andric StringRef DataExtractor::getCStrRef(uint64_t *OffsetPtr, Error *Err) const {
1575ffd83dbSDimitry Andric   ErrorAsOutParameter ErrAsOut(Err);
1585ffd83dbSDimitry Andric   if (isError(Err))
1595ffd83dbSDimitry Andric     return StringRef();
1600b57cec5SDimitry Andric 
1615ffd83dbSDimitry Andric   uint64_t Start = *OffsetPtr;
1620b57cec5SDimitry Andric   StringRef::size_type Pos = Data.find('\0', Start);
1630b57cec5SDimitry Andric   if (Pos != StringRef::npos) {
1645ffd83dbSDimitry Andric     *OffsetPtr = Pos + 1;
1650b57cec5SDimitry Andric     return StringRef(Data.data() + Start, Pos - Start);
1660b57cec5SDimitry Andric   }
1675ffd83dbSDimitry Andric   if (Err)
1685ffd83dbSDimitry Andric     *Err = createStringError(errc::illegal_byte_sequence,
1695ffd83dbSDimitry Andric                              "no null terminated string at offset 0x%" PRIx64,
1705ffd83dbSDimitry Andric                              Start);
1710b57cec5SDimitry Andric   return StringRef();
1720b57cec5SDimitry Andric }
1730b57cec5SDimitry Andric 
getFixedLengthString(uint64_t * OffsetPtr,uint64_t Length,StringRef TrimChars) const1745ffd83dbSDimitry Andric StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
1755ffd83dbSDimitry Andric                                               uint64_t Length,
1765ffd83dbSDimitry Andric                                               StringRef TrimChars) const {
1775ffd83dbSDimitry Andric   StringRef Bytes(getBytes(OffsetPtr, Length));
1785ffd83dbSDimitry Andric   return Bytes.trim(TrimChars);
1795ffd83dbSDimitry Andric }
1805ffd83dbSDimitry Andric 
getBytes(uint64_t * OffsetPtr,uint64_t Length,Error * Err) const1815ffd83dbSDimitry Andric StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length,
1825ffd83dbSDimitry Andric                                   Error *Err) const {
1838bcb0991SDimitry Andric   ErrorAsOutParameter ErrAsOut(Err);
1848bcb0991SDimitry Andric   if (isError(Err))
1855ffd83dbSDimitry Andric     return StringRef();
1865ffd83dbSDimitry Andric 
1875ffd83dbSDimitry Andric   if (!prepareRead(*OffsetPtr, Length, Err))
1885ffd83dbSDimitry Andric     return StringRef();
1895ffd83dbSDimitry Andric 
1905ffd83dbSDimitry Andric   StringRef Result = Data.substr(*OffsetPtr, Length);
1915ffd83dbSDimitry Andric   *OffsetPtr += Length;
1925ffd83dbSDimitry Andric   return Result;
1935ffd83dbSDimitry Andric }
1945ffd83dbSDimitry Andric 
1955ffd83dbSDimitry Andric template <typename T>
getLEB128(StringRef Data,uint64_t * OffsetPtr,Error * Err,T (& Decoder)(const uint8_t * p,unsigned * n,const uint8_t * end,const char ** error))1965ffd83dbSDimitry Andric static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
1975ffd83dbSDimitry Andric                    T (&Decoder)(const uint8_t *p, unsigned *n,
1985ffd83dbSDimitry Andric                                 const uint8_t *end, const char **error)) {
1995ffd83dbSDimitry Andric   ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Data);
2005ffd83dbSDimitry Andric   assert(*OffsetPtr <= Bytes.size());
2015ffd83dbSDimitry Andric   ErrorAsOutParameter ErrAsOut(Err);
2025ffd83dbSDimitry Andric   if (isError(Err))
2035ffd83dbSDimitry Andric     return T();
2040b57cec5SDimitry Andric 
205*5f757f3fSDimitry Andric   const char *error = nullptr;
2060b57cec5SDimitry Andric   unsigned bytes_read;
2075ffd83dbSDimitry Andric   T result =
2085ffd83dbSDimitry Andric       Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
2098bcb0991SDimitry Andric   if (error) {
2108bcb0991SDimitry Andric     if (Err)
2115ffd83dbSDimitry Andric       *Err = createStringError(errc::illegal_byte_sequence,
2125ffd83dbSDimitry Andric                                "unable to decode LEB128 at offset 0x%8.8" PRIx64
2135ffd83dbSDimitry Andric                                ": %s",
2145ffd83dbSDimitry Andric                                *OffsetPtr, error);
2155ffd83dbSDimitry Andric     return T();
2168bcb0991SDimitry Andric   }
2175ffd83dbSDimitry Andric   *OffsetPtr += bytes_read;
2180b57cec5SDimitry Andric   return result;
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric 
getULEB128(uint64_t * offset_ptr,Error * Err) const2215ffd83dbSDimitry Andric uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, Error *Err) const {
2225ffd83dbSDimitry Andric   return getLEB128(Data, offset_ptr, Err, decodeULEB128);
2235ffd83dbSDimitry Andric }
2240b57cec5SDimitry Andric 
getSLEB128(uint64_t * offset_ptr,Error * Err) const2255ffd83dbSDimitry Andric int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr, Error *Err) const {
2265ffd83dbSDimitry Andric   return getLEB128(Data, offset_ptr, Err, decodeSLEB128);
2270b57cec5SDimitry Andric }
2288bcb0991SDimitry Andric 
skip(Cursor & C,uint64_t Length) const2298bcb0991SDimitry Andric void DataExtractor::skip(Cursor &C, uint64_t Length) const {
2308bcb0991SDimitry Andric   ErrorAsOutParameter ErrAsOut(&C.Err);
2318bcb0991SDimitry Andric   if (isError(&C.Err))
2328bcb0991SDimitry Andric     return;
2338bcb0991SDimitry Andric 
2345ffd83dbSDimitry Andric   if (prepareRead(C.Offset, Length, &C.Err))
2358bcb0991SDimitry Andric     C.Offset += Length;
2368bcb0991SDimitry Andric }
237