xref: /openbsd-src/gnu/llvm/llvm/lib/Support/DataExtractor.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===-- DataExtractor.cpp -------------------------------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick 
909467b48Spatrick #include "llvm/Support/DataExtractor.h"
1009467b48Spatrick #include "llvm/Support/Errc.h"
1109467b48Spatrick #include "llvm/Support/ErrorHandling.h"
1209467b48Spatrick #include "llvm/Support/LEB128.h"
1309467b48Spatrick #include "llvm/Support/SwapByteOrder.h"
1409467b48Spatrick 
1509467b48Spatrick using namespace llvm;
1609467b48Spatrick 
prepareRead(uint64_t Offset,uint64_t Size,Error * E) const17*097a140dSpatrick bool DataExtractor::prepareRead(uint64_t Offset, uint64_t Size,
18*097a140dSpatrick                                 Error *E) const {
19*097a140dSpatrick   if (isValidOffsetForDataOfSize(Offset, Size))
20*097a140dSpatrick     return true;
21*097a140dSpatrick   if (E) {
22*097a140dSpatrick     if (Offset <= Data.size())
23*097a140dSpatrick       *E = createStringError(
24*097a140dSpatrick           errc::illegal_byte_sequence,
25*097a140dSpatrick           "unexpected end of data at offset 0x%zx while reading [0x%" PRIx64
26*097a140dSpatrick           ", 0x%" PRIx64 ")",
27*097a140dSpatrick           Data.size(), Offset, Offset + Size);
28*097a140dSpatrick     else
29*097a140dSpatrick       *E = createStringError(errc::invalid_argument,
30*097a140dSpatrick                              "offset 0x%" PRIx64
31*097a140dSpatrick                              " is beyond the end of data at 0x%zx",
32*097a140dSpatrick                              Offset, Data.size());
33*097a140dSpatrick   }
34*097a140dSpatrick   return false;
3509467b48Spatrick }
3609467b48Spatrick 
isError(Error * E)3709467b48Spatrick static bool isError(Error *E) { return E && *E; }
3809467b48Spatrick 
3909467b48Spatrick template <typename T>
getU(uint64_t * offset_ptr,Error * Err) const40*097a140dSpatrick T DataExtractor::getU(uint64_t *offset_ptr, Error *Err) const {
4109467b48Spatrick   ErrorAsOutParameter ErrAsOut(Err);
4209467b48Spatrick   T val = 0;
4309467b48Spatrick   if (isError(Err))
4409467b48Spatrick     return val;
4509467b48Spatrick 
4609467b48Spatrick   uint64_t offset = *offset_ptr;
47*097a140dSpatrick   if (!prepareRead(offset, sizeof(T), Err))
4809467b48Spatrick     return val;
49*097a140dSpatrick   std::memcpy(&val, &Data.data()[offset], sizeof(val));
50*097a140dSpatrick   if (sys::IsLittleEndianHost != IsLittleEndian)
5109467b48Spatrick     sys::swapByteOrder(val);
5209467b48Spatrick 
5309467b48Spatrick   // Advance the offset
5409467b48Spatrick   *offset_ptr += sizeof(val);
5509467b48Spatrick   return val;
5609467b48Spatrick }
5709467b48Spatrick 
5809467b48Spatrick template <typename T>
getUs(uint64_t * offset_ptr,T * dst,uint32_t count,Error * Err) const59*097a140dSpatrick T *DataExtractor::getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
60*097a140dSpatrick                         Error *Err) const {
6109467b48Spatrick   ErrorAsOutParameter ErrAsOut(Err);
6209467b48Spatrick   if (isError(Err))
6309467b48Spatrick     return nullptr;
6409467b48Spatrick 
6509467b48Spatrick   uint64_t offset = *offset_ptr;
6609467b48Spatrick 
67*097a140dSpatrick   if (!prepareRead(offset, sizeof(*dst) * count, Err))
6809467b48Spatrick     return nullptr;
6909467b48Spatrick   for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
7009467b48Spatrick        ++value_ptr, offset += sizeof(*dst))
71*097a140dSpatrick     *value_ptr = getU<T>(offset_ptr, Err);
7209467b48Spatrick   // Advance the offset
7309467b48Spatrick   *offset_ptr = offset;
7409467b48Spatrick   // Return a non-NULL pointer to the converted data as an indicator of
7509467b48Spatrick   // success
7609467b48Spatrick   return dst;
7709467b48Spatrick }
7809467b48Spatrick 
getU8(uint64_t * offset_ptr,llvm::Error * Err) const7909467b48Spatrick uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
80*097a140dSpatrick   return getU<uint8_t>(offset_ptr, Err);
8109467b48Spatrick }
8209467b48Spatrick 
getU8(uint64_t * offset_ptr,uint8_t * dst,uint32_t count) const83*097a140dSpatrick uint8_t *DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst,
84*097a140dSpatrick                               uint32_t count) const {
85*097a140dSpatrick   return getUs<uint8_t>(offset_ptr, dst, count, nullptr);
8609467b48Spatrick }
8709467b48Spatrick 
getU8(Cursor & C,uint8_t * Dst,uint32_t Count) const8809467b48Spatrick uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
89*097a140dSpatrick   return getUs<uint8_t>(&C.Offset, Dst, Count, &C.Err);
9009467b48Spatrick }
9109467b48Spatrick 
getU16(uint64_t * offset_ptr,llvm::Error * Err) const9209467b48Spatrick uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
93*097a140dSpatrick   return getU<uint16_t>(offset_ptr, Err);
9409467b48Spatrick }
9509467b48Spatrick 
getU16(uint64_t * offset_ptr,uint16_t * dst,uint32_t count) const9609467b48Spatrick uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
9709467b48Spatrick                                 uint32_t count) const {
98*097a140dSpatrick   return getUs<uint16_t>(offset_ptr, dst, count, nullptr);
9909467b48Spatrick }
10009467b48Spatrick 
getU24(uint64_t * OffsetPtr,Error * Err) const101*097a140dSpatrick uint32_t DataExtractor::getU24(uint64_t *OffsetPtr, Error *Err) const {
102*097a140dSpatrick   uint24_t ExtractedVal = getU<uint24_t>(OffsetPtr, Err);
10309467b48Spatrick   // The 3 bytes are in the correct byte order for the host.
10409467b48Spatrick   return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
10509467b48Spatrick }
10609467b48Spatrick 
getU32(uint64_t * offset_ptr,llvm::Error * Err) const10709467b48Spatrick uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
108*097a140dSpatrick   return getU<uint32_t>(offset_ptr, Err);
10909467b48Spatrick }
11009467b48Spatrick 
getU32(uint64_t * offset_ptr,uint32_t * dst,uint32_t count) const11109467b48Spatrick uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
11209467b48Spatrick                                 uint32_t count) const {
113*097a140dSpatrick   return getUs<uint32_t>(offset_ptr, dst, count, nullptr);
11409467b48Spatrick }
11509467b48Spatrick 
getU64(uint64_t * offset_ptr,llvm::Error * Err) const11609467b48Spatrick uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
117*097a140dSpatrick   return getU<uint64_t>(offset_ptr, Err);
11809467b48Spatrick }
11909467b48Spatrick 
getU64(uint64_t * offset_ptr,uint64_t * dst,uint32_t count) const12009467b48Spatrick uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
12109467b48Spatrick                                 uint32_t count) const {
122*097a140dSpatrick   return getUs<uint64_t>(offset_ptr, dst, count, nullptr);
12309467b48Spatrick }
12409467b48Spatrick 
getUnsigned(uint64_t * offset_ptr,uint32_t byte_size,llvm::Error * Err) const12509467b48Spatrick uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
12609467b48Spatrick                                     llvm::Error *Err) const {
12709467b48Spatrick   switch (byte_size) {
12809467b48Spatrick   case 1:
12909467b48Spatrick     return getU8(offset_ptr, Err);
13009467b48Spatrick   case 2:
13109467b48Spatrick     return getU16(offset_ptr, Err);
13209467b48Spatrick   case 4:
13309467b48Spatrick     return getU32(offset_ptr, Err);
13409467b48Spatrick   case 8:
13509467b48Spatrick     return getU64(offset_ptr, Err);
13609467b48Spatrick   }
13709467b48Spatrick   llvm_unreachable("getUnsigned unhandled case!");
13809467b48Spatrick }
13909467b48Spatrick 
14009467b48Spatrick int64_t
getSigned(uint64_t * offset_ptr,uint32_t byte_size) const14109467b48Spatrick DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
14209467b48Spatrick   switch (byte_size) {
14309467b48Spatrick   case 1:
14409467b48Spatrick     return (int8_t)getU8(offset_ptr);
14509467b48Spatrick   case 2:
14609467b48Spatrick     return (int16_t)getU16(offset_ptr);
14709467b48Spatrick   case 4:
14809467b48Spatrick     return (int32_t)getU32(offset_ptr);
14909467b48Spatrick   case 8:
15009467b48Spatrick     return (int64_t)getU64(offset_ptr);
15109467b48Spatrick   }
15209467b48Spatrick   llvm_unreachable("getSigned unhandled case!");
15309467b48Spatrick }
15409467b48Spatrick 
getCStrRef(uint64_t * OffsetPtr,Error * Err) const155*097a140dSpatrick StringRef DataExtractor::getCStrRef(uint64_t *OffsetPtr, Error *Err) const {
156*097a140dSpatrick   ErrorAsOutParameter ErrAsOut(Err);
157*097a140dSpatrick   if (isError(Err))
158*097a140dSpatrick     return StringRef();
15909467b48Spatrick 
160*097a140dSpatrick   uint64_t Start = *OffsetPtr;
16109467b48Spatrick   StringRef::size_type Pos = Data.find('\0', Start);
16209467b48Spatrick   if (Pos != StringRef::npos) {
163*097a140dSpatrick     *OffsetPtr = Pos + 1;
16409467b48Spatrick     return StringRef(Data.data() + Start, Pos - Start);
16509467b48Spatrick   }
166*097a140dSpatrick   if (Err)
167*097a140dSpatrick     *Err = createStringError(errc::illegal_byte_sequence,
168*097a140dSpatrick                              "no null terminated string at offset 0x%" PRIx64,
169*097a140dSpatrick                              Start);
17009467b48Spatrick   return StringRef();
17109467b48Spatrick }
17209467b48Spatrick 
getFixedLengthString(uint64_t * OffsetPtr,uint64_t Length,StringRef TrimChars) const173*097a140dSpatrick StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
174*097a140dSpatrick                                               uint64_t Length,
175*097a140dSpatrick                                               StringRef TrimChars) const {
176*097a140dSpatrick   StringRef Bytes(getBytes(OffsetPtr, Length));
177*097a140dSpatrick   return Bytes.trim(TrimChars);
178*097a140dSpatrick }
179*097a140dSpatrick 
getBytes(uint64_t * OffsetPtr,uint64_t Length,Error * Err) const180*097a140dSpatrick StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length,
181*097a140dSpatrick                                   Error *Err) const {
18209467b48Spatrick   ErrorAsOutParameter ErrAsOut(Err);
18309467b48Spatrick   if (isError(Err))
184*097a140dSpatrick     return StringRef();
185*097a140dSpatrick 
186*097a140dSpatrick   if (!prepareRead(*OffsetPtr, Length, Err))
187*097a140dSpatrick     return StringRef();
188*097a140dSpatrick 
189*097a140dSpatrick   StringRef Result = Data.substr(*OffsetPtr, Length);
190*097a140dSpatrick   *OffsetPtr += Length;
191*097a140dSpatrick   return Result;
192*097a140dSpatrick }
193*097a140dSpatrick 
194*097a140dSpatrick template <typename T>
getLEB128(StringRef Data,uint64_t * OffsetPtr,Error * Err,T (& Decoder)(const uint8_t * p,unsigned * n,const uint8_t * end,const char ** error))195*097a140dSpatrick static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
196*097a140dSpatrick                    T (&Decoder)(const uint8_t *p, unsigned *n,
197*097a140dSpatrick                                 const uint8_t *end, const char **error)) {
198*097a140dSpatrick   ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Data);
199*097a140dSpatrick   assert(*OffsetPtr <= Bytes.size());
200*097a140dSpatrick   ErrorAsOutParameter ErrAsOut(Err);
201*097a140dSpatrick   if (isError(Err))
202*097a140dSpatrick     return T();
20309467b48Spatrick 
20409467b48Spatrick   const char *error;
20509467b48Spatrick   unsigned bytes_read;
206*097a140dSpatrick   T result =
207*097a140dSpatrick       Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
20809467b48Spatrick   if (error) {
20909467b48Spatrick     if (Err)
210*097a140dSpatrick       *Err = createStringError(errc::illegal_byte_sequence,
211*097a140dSpatrick                                "unable to decode LEB128 at offset 0x%8.8" PRIx64
212*097a140dSpatrick                                ": %s",
213*097a140dSpatrick                                *OffsetPtr, error);
214*097a140dSpatrick     return T();
21509467b48Spatrick   }
216*097a140dSpatrick   *OffsetPtr += bytes_read;
21709467b48Spatrick   return result;
21809467b48Spatrick }
21909467b48Spatrick 
getULEB128(uint64_t * offset_ptr,Error * Err) const220*097a140dSpatrick uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, Error *Err) const {
221*097a140dSpatrick   return getLEB128(Data, offset_ptr, Err, decodeULEB128);
222*097a140dSpatrick }
22309467b48Spatrick 
getSLEB128(uint64_t * offset_ptr,Error * Err) const224*097a140dSpatrick int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr, Error *Err) const {
225*097a140dSpatrick   return getLEB128(Data, offset_ptr, Err, decodeSLEB128);
22609467b48Spatrick }
22709467b48Spatrick 
skip(Cursor & C,uint64_t Length) const22809467b48Spatrick void DataExtractor::skip(Cursor &C, uint64_t Length) const {
22909467b48Spatrick   ErrorAsOutParameter ErrAsOut(&C.Err);
23009467b48Spatrick   if (isError(&C.Err))
23109467b48Spatrick     return;
23209467b48Spatrick 
233*097a140dSpatrick   if (prepareRead(C.Offset, Length, &C.Err))
23409467b48Spatrick     C.Offset += Length;
23509467b48Spatrick }
236