109467b48Spatrick //===-- DataExtractor.cpp -------------------------------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick
909467b48Spatrick #include "llvm/Support/DataExtractor.h"
1009467b48Spatrick #include "llvm/Support/Errc.h"
1109467b48Spatrick #include "llvm/Support/ErrorHandling.h"
1209467b48Spatrick #include "llvm/Support/LEB128.h"
1309467b48Spatrick #include "llvm/Support/SwapByteOrder.h"
1409467b48Spatrick
1509467b48Spatrick using namespace llvm;
1609467b48Spatrick
prepareRead(uint64_t Offset,uint64_t Size,Error * E) const17*097a140dSpatrick bool DataExtractor::prepareRead(uint64_t Offset, uint64_t Size,
18*097a140dSpatrick Error *E) const {
19*097a140dSpatrick if (isValidOffsetForDataOfSize(Offset, Size))
20*097a140dSpatrick return true;
21*097a140dSpatrick if (E) {
22*097a140dSpatrick if (Offset <= Data.size())
23*097a140dSpatrick *E = createStringError(
24*097a140dSpatrick errc::illegal_byte_sequence,
25*097a140dSpatrick "unexpected end of data at offset 0x%zx while reading [0x%" PRIx64
26*097a140dSpatrick ", 0x%" PRIx64 ")",
27*097a140dSpatrick Data.size(), Offset, Offset + Size);
28*097a140dSpatrick else
29*097a140dSpatrick *E = createStringError(errc::invalid_argument,
30*097a140dSpatrick "offset 0x%" PRIx64
31*097a140dSpatrick " is beyond the end of data at 0x%zx",
32*097a140dSpatrick Offset, Data.size());
33*097a140dSpatrick }
34*097a140dSpatrick return false;
3509467b48Spatrick }
3609467b48Spatrick
isError(Error * E)3709467b48Spatrick static bool isError(Error *E) { return E && *E; }
3809467b48Spatrick
3909467b48Spatrick template <typename T>
getU(uint64_t * offset_ptr,Error * Err) const40*097a140dSpatrick T DataExtractor::getU(uint64_t *offset_ptr, Error *Err) const {
4109467b48Spatrick ErrorAsOutParameter ErrAsOut(Err);
4209467b48Spatrick T val = 0;
4309467b48Spatrick if (isError(Err))
4409467b48Spatrick return val;
4509467b48Spatrick
4609467b48Spatrick uint64_t offset = *offset_ptr;
47*097a140dSpatrick if (!prepareRead(offset, sizeof(T), Err))
4809467b48Spatrick return val;
49*097a140dSpatrick std::memcpy(&val, &Data.data()[offset], sizeof(val));
50*097a140dSpatrick if (sys::IsLittleEndianHost != IsLittleEndian)
5109467b48Spatrick sys::swapByteOrder(val);
5209467b48Spatrick
5309467b48Spatrick // Advance the offset
5409467b48Spatrick *offset_ptr += sizeof(val);
5509467b48Spatrick return val;
5609467b48Spatrick }
5709467b48Spatrick
5809467b48Spatrick template <typename T>
getUs(uint64_t * offset_ptr,T * dst,uint32_t count,Error * Err) const59*097a140dSpatrick T *DataExtractor::getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
60*097a140dSpatrick Error *Err) const {
6109467b48Spatrick ErrorAsOutParameter ErrAsOut(Err);
6209467b48Spatrick if (isError(Err))
6309467b48Spatrick return nullptr;
6409467b48Spatrick
6509467b48Spatrick uint64_t offset = *offset_ptr;
6609467b48Spatrick
67*097a140dSpatrick if (!prepareRead(offset, sizeof(*dst) * count, Err))
6809467b48Spatrick return nullptr;
6909467b48Spatrick for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
7009467b48Spatrick ++value_ptr, offset += sizeof(*dst))
71*097a140dSpatrick *value_ptr = getU<T>(offset_ptr, Err);
7209467b48Spatrick // Advance the offset
7309467b48Spatrick *offset_ptr = offset;
7409467b48Spatrick // Return a non-NULL pointer to the converted data as an indicator of
7509467b48Spatrick // success
7609467b48Spatrick return dst;
7709467b48Spatrick }
7809467b48Spatrick
getU8(uint64_t * offset_ptr,llvm::Error * Err) const7909467b48Spatrick uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
80*097a140dSpatrick return getU<uint8_t>(offset_ptr, Err);
8109467b48Spatrick }
8209467b48Spatrick
getU8(uint64_t * offset_ptr,uint8_t * dst,uint32_t count) const83*097a140dSpatrick uint8_t *DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst,
84*097a140dSpatrick uint32_t count) const {
85*097a140dSpatrick return getUs<uint8_t>(offset_ptr, dst, count, nullptr);
8609467b48Spatrick }
8709467b48Spatrick
getU8(Cursor & C,uint8_t * Dst,uint32_t Count) const8809467b48Spatrick uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
89*097a140dSpatrick return getUs<uint8_t>(&C.Offset, Dst, Count, &C.Err);
9009467b48Spatrick }
9109467b48Spatrick
getU16(uint64_t * offset_ptr,llvm::Error * Err) const9209467b48Spatrick uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
93*097a140dSpatrick return getU<uint16_t>(offset_ptr, Err);
9409467b48Spatrick }
9509467b48Spatrick
getU16(uint64_t * offset_ptr,uint16_t * dst,uint32_t count) const9609467b48Spatrick uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
9709467b48Spatrick uint32_t count) const {
98*097a140dSpatrick return getUs<uint16_t>(offset_ptr, dst, count, nullptr);
9909467b48Spatrick }
10009467b48Spatrick
getU24(uint64_t * OffsetPtr,Error * Err) const101*097a140dSpatrick uint32_t DataExtractor::getU24(uint64_t *OffsetPtr, Error *Err) const {
102*097a140dSpatrick uint24_t ExtractedVal = getU<uint24_t>(OffsetPtr, Err);
10309467b48Spatrick // The 3 bytes are in the correct byte order for the host.
10409467b48Spatrick return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
10509467b48Spatrick }
10609467b48Spatrick
getU32(uint64_t * offset_ptr,llvm::Error * Err) const10709467b48Spatrick uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
108*097a140dSpatrick return getU<uint32_t>(offset_ptr, Err);
10909467b48Spatrick }
11009467b48Spatrick
getU32(uint64_t * offset_ptr,uint32_t * dst,uint32_t count) const11109467b48Spatrick uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
11209467b48Spatrick uint32_t count) const {
113*097a140dSpatrick return getUs<uint32_t>(offset_ptr, dst, count, nullptr);
11409467b48Spatrick }
11509467b48Spatrick
getU64(uint64_t * offset_ptr,llvm::Error * Err) const11609467b48Spatrick uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
117*097a140dSpatrick return getU<uint64_t>(offset_ptr, Err);
11809467b48Spatrick }
11909467b48Spatrick
getU64(uint64_t * offset_ptr,uint64_t * dst,uint32_t count) const12009467b48Spatrick uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
12109467b48Spatrick uint32_t count) const {
122*097a140dSpatrick return getUs<uint64_t>(offset_ptr, dst, count, nullptr);
12309467b48Spatrick }
12409467b48Spatrick
getUnsigned(uint64_t * offset_ptr,uint32_t byte_size,llvm::Error * Err) const12509467b48Spatrick uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
12609467b48Spatrick llvm::Error *Err) const {
12709467b48Spatrick switch (byte_size) {
12809467b48Spatrick case 1:
12909467b48Spatrick return getU8(offset_ptr, Err);
13009467b48Spatrick case 2:
13109467b48Spatrick return getU16(offset_ptr, Err);
13209467b48Spatrick case 4:
13309467b48Spatrick return getU32(offset_ptr, Err);
13409467b48Spatrick case 8:
13509467b48Spatrick return getU64(offset_ptr, Err);
13609467b48Spatrick }
13709467b48Spatrick llvm_unreachable("getUnsigned unhandled case!");
13809467b48Spatrick }
13909467b48Spatrick
14009467b48Spatrick int64_t
getSigned(uint64_t * offset_ptr,uint32_t byte_size) const14109467b48Spatrick DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
14209467b48Spatrick switch (byte_size) {
14309467b48Spatrick case 1:
14409467b48Spatrick return (int8_t)getU8(offset_ptr);
14509467b48Spatrick case 2:
14609467b48Spatrick return (int16_t)getU16(offset_ptr);
14709467b48Spatrick case 4:
14809467b48Spatrick return (int32_t)getU32(offset_ptr);
14909467b48Spatrick case 8:
15009467b48Spatrick return (int64_t)getU64(offset_ptr);
15109467b48Spatrick }
15209467b48Spatrick llvm_unreachable("getSigned unhandled case!");
15309467b48Spatrick }
15409467b48Spatrick
getCStrRef(uint64_t * OffsetPtr,Error * Err) const155*097a140dSpatrick StringRef DataExtractor::getCStrRef(uint64_t *OffsetPtr, Error *Err) const {
156*097a140dSpatrick ErrorAsOutParameter ErrAsOut(Err);
157*097a140dSpatrick if (isError(Err))
158*097a140dSpatrick return StringRef();
15909467b48Spatrick
160*097a140dSpatrick uint64_t Start = *OffsetPtr;
16109467b48Spatrick StringRef::size_type Pos = Data.find('\0', Start);
16209467b48Spatrick if (Pos != StringRef::npos) {
163*097a140dSpatrick *OffsetPtr = Pos + 1;
16409467b48Spatrick return StringRef(Data.data() + Start, Pos - Start);
16509467b48Spatrick }
166*097a140dSpatrick if (Err)
167*097a140dSpatrick *Err = createStringError(errc::illegal_byte_sequence,
168*097a140dSpatrick "no null terminated string at offset 0x%" PRIx64,
169*097a140dSpatrick Start);
17009467b48Spatrick return StringRef();
17109467b48Spatrick }
17209467b48Spatrick
getFixedLengthString(uint64_t * OffsetPtr,uint64_t Length,StringRef TrimChars) const173*097a140dSpatrick StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
174*097a140dSpatrick uint64_t Length,
175*097a140dSpatrick StringRef TrimChars) const {
176*097a140dSpatrick StringRef Bytes(getBytes(OffsetPtr, Length));
177*097a140dSpatrick return Bytes.trim(TrimChars);
178*097a140dSpatrick }
179*097a140dSpatrick
getBytes(uint64_t * OffsetPtr,uint64_t Length,Error * Err) const180*097a140dSpatrick StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length,
181*097a140dSpatrick Error *Err) const {
18209467b48Spatrick ErrorAsOutParameter ErrAsOut(Err);
18309467b48Spatrick if (isError(Err))
184*097a140dSpatrick return StringRef();
185*097a140dSpatrick
186*097a140dSpatrick if (!prepareRead(*OffsetPtr, Length, Err))
187*097a140dSpatrick return StringRef();
188*097a140dSpatrick
189*097a140dSpatrick StringRef Result = Data.substr(*OffsetPtr, Length);
190*097a140dSpatrick *OffsetPtr += Length;
191*097a140dSpatrick return Result;
192*097a140dSpatrick }
193*097a140dSpatrick
194*097a140dSpatrick template <typename T>
getLEB128(StringRef Data,uint64_t * OffsetPtr,Error * Err,T (& Decoder)(const uint8_t * p,unsigned * n,const uint8_t * end,const char ** error))195*097a140dSpatrick static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
196*097a140dSpatrick T (&Decoder)(const uint8_t *p, unsigned *n,
197*097a140dSpatrick const uint8_t *end, const char **error)) {
198*097a140dSpatrick ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Data);
199*097a140dSpatrick assert(*OffsetPtr <= Bytes.size());
200*097a140dSpatrick ErrorAsOutParameter ErrAsOut(Err);
201*097a140dSpatrick if (isError(Err))
202*097a140dSpatrick return T();
20309467b48Spatrick
20409467b48Spatrick const char *error;
20509467b48Spatrick unsigned bytes_read;
206*097a140dSpatrick T result =
207*097a140dSpatrick Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
20809467b48Spatrick if (error) {
20909467b48Spatrick if (Err)
210*097a140dSpatrick *Err = createStringError(errc::illegal_byte_sequence,
211*097a140dSpatrick "unable to decode LEB128 at offset 0x%8.8" PRIx64
212*097a140dSpatrick ": %s",
213*097a140dSpatrick *OffsetPtr, error);
214*097a140dSpatrick return T();
21509467b48Spatrick }
216*097a140dSpatrick *OffsetPtr += bytes_read;
21709467b48Spatrick return result;
21809467b48Spatrick }
21909467b48Spatrick
getULEB128(uint64_t * offset_ptr,Error * Err) const220*097a140dSpatrick uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, Error *Err) const {
221*097a140dSpatrick return getLEB128(Data, offset_ptr, Err, decodeULEB128);
222*097a140dSpatrick }
22309467b48Spatrick
getSLEB128(uint64_t * offset_ptr,Error * Err) const224*097a140dSpatrick int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr, Error *Err) const {
225*097a140dSpatrick return getLEB128(Data, offset_ptr, Err, decodeSLEB128);
22609467b48Spatrick }
22709467b48Spatrick
skip(Cursor & C,uint64_t Length) const22809467b48Spatrick void DataExtractor::skip(Cursor &C, uint64_t Length) const {
22909467b48Spatrick ErrorAsOutParameter ErrAsOut(&C.Err);
23009467b48Spatrick if (isError(&C.Err))
23109467b48Spatrick return;
23209467b48Spatrick
233*097a140dSpatrick if (prepareRead(C.Offset, Length, &C.Err))
23409467b48Spatrick C.Offset += Length;
23509467b48Spatrick }
236