xref: /openbsd-src/gnu/llvm/lldb/source/Utility/StringExtractor.cpp (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1dda28197Spatrick //===-- StringExtractor.cpp -----------------------------------------------===//
2061da546Spatrick //
3061da546Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4061da546Spatrick // See https://llvm.org/LICENSE.txt for license information.
5061da546Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6061da546Spatrick //
7061da546Spatrick //===----------------------------------------------------------------------===//
8061da546Spatrick 
9061da546Spatrick #include "lldb/Utility/StringExtractor.h"
10dda28197Spatrick #include "llvm/ADT/StringExtras.h"
11061da546Spatrick 
12061da546Spatrick #include <tuple>
13061da546Spatrick 
14be691f3bSpatrick #include <cctype>
15be691f3bSpatrick #include <cstdlib>
16be691f3bSpatrick #include <cstring>
17061da546Spatrick 
xdigit_to_sint(char ch)18061da546Spatrick static inline int xdigit_to_sint(char ch) {
19061da546Spatrick   if (ch >= 'a' && ch <= 'f')
20061da546Spatrick     return 10 + ch - 'a';
21061da546Spatrick   if (ch >= 'A' && ch <= 'F')
22061da546Spatrick     return 10 + ch - 'A';
23061da546Spatrick   if (ch >= '0' && ch <= '9')
24061da546Spatrick     return ch - '0';
25061da546Spatrick   return -1;
26061da546Spatrick }
27061da546Spatrick 
28061da546Spatrick // StringExtractor constructor
StringExtractor()29be691f3bSpatrick StringExtractor::StringExtractor() : m_packet() {}
30061da546Spatrick 
StringExtractor(llvm::StringRef packet_str)31*f6aab3d8Srobert StringExtractor::StringExtractor(llvm::StringRef packet_str) : m_packet() {
32061da546Spatrick   m_packet.assign(packet_str.begin(), packet_str.end());
33061da546Spatrick }
34061da546Spatrick 
StringExtractor(const char * packet_cstr)35*f6aab3d8Srobert StringExtractor::StringExtractor(const char *packet_cstr) : m_packet() {
36061da546Spatrick   if (packet_cstr)
37061da546Spatrick     m_packet.assign(packet_cstr);
38061da546Spatrick }
39061da546Spatrick 
40061da546Spatrick // Destructor
41be691f3bSpatrick StringExtractor::~StringExtractor() = default;
42061da546Spatrick 
GetChar(char fail_value)43061da546Spatrick char StringExtractor::GetChar(char fail_value) {
44061da546Spatrick   if (m_index < m_packet.size()) {
45061da546Spatrick     char ch = m_packet[m_index];
46061da546Spatrick     ++m_index;
47061da546Spatrick     return ch;
48061da546Spatrick   }
49061da546Spatrick   m_index = UINT64_MAX;
50061da546Spatrick   return fail_value;
51061da546Spatrick }
52061da546Spatrick 
53061da546Spatrick // If a pair of valid hex digits exist at the head of the StringExtractor they
54061da546Spatrick // are decoded into an unsigned byte and returned by this function
55061da546Spatrick //
56061da546Spatrick // If there is not a pair of valid hex digits at the head of the
57061da546Spatrick // StringExtractor, it is left unchanged and -1 is returned
DecodeHexU8()58061da546Spatrick int StringExtractor::DecodeHexU8() {
59061da546Spatrick   SkipSpaces();
60061da546Spatrick   if (GetBytesLeft() < 2) {
61061da546Spatrick     return -1;
62061da546Spatrick   }
63061da546Spatrick   const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
64061da546Spatrick   const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]);
65061da546Spatrick   if (hi_nibble == -1 || lo_nibble == -1) {
66061da546Spatrick     return -1;
67061da546Spatrick   }
68061da546Spatrick   m_index += 2;
69061da546Spatrick   return static_cast<uint8_t>((hi_nibble << 4) + lo_nibble);
70061da546Spatrick }
71061da546Spatrick 
72061da546Spatrick // Extract an unsigned character from two hex ASCII chars in the packet string,
73061da546Spatrick // or return fail_value on failure
GetHexU8(uint8_t fail_value,bool set_eof_on_fail)74061da546Spatrick uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) {
75061da546Spatrick   // On success, fail_value will be overwritten with the next character in the
76061da546Spatrick   // stream
77061da546Spatrick   GetHexU8Ex(fail_value, set_eof_on_fail);
78061da546Spatrick   return fail_value;
79061da546Spatrick }
80061da546Spatrick 
GetHexU8Ex(uint8_t & ch,bool set_eof_on_fail)81061da546Spatrick bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) {
82061da546Spatrick   int byte = DecodeHexU8();
83061da546Spatrick   if (byte == -1) {
84061da546Spatrick     if (set_eof_on_fail || m_index >= m_packet.size())
85061da546Spatrick       m_index = UINT64_MAX;
86061da546Spatrick     // ch should not be changed in case of failure
87061da546Spatrick     return false;
88061da546Spatrick   }
89061da546Spatrick   ch = static_cast<uint8_t>(byte);
90061da546Spatrick   return true;
91061da546Spatrick }
92061da546Spatrick 
GetU32(uint32_t fail_value,int base)93061da546Spatrick uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) {
94061da546Spatrick   if (m_index < m_packet.size()) {
95061da546Spatrick     char *end = nullptr;
96061da546Spatrick     const char *start = m_packet.c_str();
97061da546Spatrick     const char *cstr = start + m_index;
98061da546Spatrick     uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base));
99061da546Spatrick 
100061da546Spatrick     if (end && end != cstr) {
101061da546Spatrick       m_index = end - start;
102061da546Spatrick       return result;
103061da546Spatrick     }
104061da546Spatrick   }
105061da546Spatrick   return fail_value;
106061da546Spatrick }
107061da546Spatrick 
GetS32(int32_t fail_value,int base)108061da546Spatrick int32_t StringExtractor::GetS32(int32_t fail_value, int base) {
109061da546Spatrick   if (m_index < m_packet.size()) {
110061da546Spatrick     char *end = nullptr;
111061da546Spatrick     const char *start = m_packet.c_str();
112061da546Spatrick     const char *cstr = start + m_index;
113061da546Spatrick     int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base));
114061da546Spatrick 
115061da546Spatrick     if (end && end != cstr) {
116061da546Spatrick       m_index = end - start;
117061da546Spatrick       return result;
118061da546Spatrick     }
119061da546Spatrick   }
120061da546Spatrick   return fail_value;
121061da546Spatrick }
122061da546Spatrick 
GetU64(uint64_t fail_value,int base)123061da546Spatrick uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) {
124061da546Spatrick   if (m_index < m_packet.size()) {
125061da546Spatrick     char *end = nullptr;
126061da546Spatrick     const char *start = m_packet.c_str();
127061da546Spatrick     const char *cstr = start + m_index;
128061da546Spatrick     uint64_t result = ::strtoull(cstr, &end, base);
129061da546Spatrick 
130061da546Spatrick     if (end && end != cstr) {
131061da546Spatrick       m_index = end - start;
132061da546Spatrick       return result;
133061da546Spatrick     }
134061da546Spatrick   }
135061da546Spatrick   return fail_value;
136061da546Spatrick }
137061da546Spatrick 
GetS64(int64_t fail_value,int base)138061da546Spatrick int64_t StringExtractor::GetS64(int64_t fail_value, int base) {
139061da546Spatrick   if (m_index < m_packet.size()) {
140061da546Spatrick     char *end = nullptr;
141061da546Spatrick     const char *start = m_packet.c_str();
142061da546Spatrick     const char *cstr = start + m_index;
143061da546Spatrick     int64_t result = ::strtoll(cstr, &end, base);
144061da546Spatrick 
145061da546Spatrick     if (end && end != cstr) {
146061da546Spatrick       m_index = end - start;
147061da546Spatrick       return result;
148061da546Spatrick     }
149061da546Spatrick   }
150061da546Spatrick   return fail_value;
151061da546Spatrick }
152061da546Spatrick 
GetHexMaxU32(bool little_endian,uint32_t fail_value)153061da546Spatrick uint32_t StringExtractor::GetHexMaxU32(bool little_endian,
154061da546Spatrick                                        uint32_t fail_value) {
155061da546Spatrick   uint32_t result = 0;
156061da546Spatrick   uint32_t nibble_count = 0;
157061da546Spatrick 
158061da546Spatrick   SkipSpaces();
159061da546Spatrick   if (little_endian) {
160061da546Spatrick     uint32_t shift_amount = 0;
161061da546Spatrick     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
162061da546Spatrick       // Make sure we don't exceed the size of a uint32_t...
163061da546Spatrick       if (nibble_count >= (sizeof(uint32_t) * 2)) {
164061da546Spatrick         m_index = UINT64_MAX;
165061da546Spatrick         return fail_value;
166061da546Spatrick       }
167061da546Spatrick 
168061da546Spatrick       uint8_t nibble_lo;
169061da546Spatrick       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
170061da546Spatrick       ++m_index;
171061da546Spatrick       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
172061da546Spatrick         nibble_lo = xdigit_to_sint(m_packet[m_index]);
173061da546Spatrick         ++m_index;
174061da546Spatrick         result |= (static_cast<uint32_t>(nibble_hi) << (shift_amount + 4));
175061da546Spatrick         result |= (static_cast<uint32_t>(nibble_lo) << shift_amount);
176061da546Spatrick         nibble_count += 2;
177061da546Spatrick         shift_amount += 8;
178061da546Spatrick       } else {
179061da546Spatrick         result |= (static_cast<uint32_t>(nibble_hi) << shift_amount);
180061da546Spatrick         nibble_count += 1;
181061da546Spatrick         shift_amount += 4;
182061da546Spatrick       }
183061da546Spatrick     }
184061da546Spatrick   } else {
185061da546Spatrick     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
186061da546Spatrick       // Make sure we don't exceed the size of a uint32_t...
187061da546Spatrick       if (nibble_count >= (sizeof(uint32_t) * 2)) {
188061da546Spatrick         m_index = UINT64_MAX;
189061da546Spatrick         return fail_value;
190061da546Spatrick       }
191061da546Spatrick 
192061da546Spatrick       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
193061da546Spatrick       // Big Endian
194061da546Spatrick       result <<= 4;
195061da546Spatrick       result |= nibble;
196061da546Spatrick 
197061da546Spatrick       ++m_index;
198061da546Spatrick       ++nibble_count;
199061da546Spatrick     }
200061da546Spatrick   }
201061da546Spatrick   return result;
202061da546Spatrick }
203061da546Spatrick 
GetHexMaxU64(bool little_endian,uint64_t fail_value)204061da546Spatrick uint64_t StringExtractor::GetHexMaxU64(bool little_endian,
205061da546Spatrick                                        uint64_t fail_value) {
206061da546Spatrick   uint64_t result = 0;
207061da546Spatrick   uint32_t nibble_count = 0;
208061da546Spatrick 
209061da546Spatrick   SkipSpaces();
210061da546Spatrick   if (little_endian) {
211061da546Spatrick     uint32_t shift_amount = 0;
212061da546Spatrick     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
213061da546Spatrick       // Make sure we don't exceed the size of a uint64_t...
214061da546Spatrick       if (nibble_count >= (sizeof(uint64_t) * 2)) {
215061da546Spatrick         m_index = UINT64_MAX;
216061da546Spatrick         return fail_value;
217061da546Spatrick       }
218061da546Spatrick 
219061da546Spatrick       uint8_t nibble_lo;
220061da546Spatrick       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
221061da546Spatrick       ++m_index;
222061da546Spatrick       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
223061da546Spatrick         nibble_lo = xdigit_to_sint(m_packet[m_index]);
224061da546Spatrick         ++m_index;
225061da546Spatrick         result |= (static_cast<uint64_t>(nibble_hi) << (shift_amount + 4));
226061da546Spatrick         result |= (static_cast<uint64_t>(nibble_lo) << shift_amount);
227061da546Spatrick         nibble_count += 2;
228061da546Spatrick         shift_amount += 8;
229061da546Spatrick       } else {
230061da546Spatrick         result |= (static_cast<uint64_t>(nibble_hi) << shift_amount);
231061da546Spatrick         nibble_count += 1;
232061da546Spatrick         shift_amount += 4;
233061da546Spatrick       }
234061da546Spatrick     }
235061da546Spatrick   } else {
236061da546Spatrick     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
237061da546Spatrick       // Make sure we don't exceed the size of a uint64_t...
238061da546Spatrick       if (nibble_count >= (sizeof(uint64_t) * 2)) {
239061da546Spatrick         m_index = UINT64_MAX;
240061da546Spatrick         return fail_value;
241061da546Spatrick       }
242061da546Spatrick 
243061da546Spatrick       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
244061da546Spatrick       // Big Endian
245061da546Spatrick       result <<= 4;
246061da546Spatrick       result |= nibble;
247061da546Spatrick 
248061da546Spatrick       ++m_index;
249061da546Spatrick       ++nibble_count;
250061da546Spatrick     }
251061da546Spatrick   }
252061da546Spatrick   return result;
253061da546Spatrick }
254061da546Spatrick 
ConsumeFront(const llvm::StringRef & str)255061da546Spatrick bool StringExtractor::ConsumeFront(const llvm::StringRef &str) {
256061da546Spatrick   llvm::StringRef S = GetStringRef();
257061da546Spatrick   if (!S.startswith(str))
258061da546Spatrick     return false;
259061da546Spatrick   else
260061da546Spatrick     m_index += str.size();
261061da546Spatrick   return true;
262061da546Spatrick }
263061da546Spatrick 
GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,uint8_t fail_fill_value)264061da546Spatrick size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,
265061da546Spatrick                                     uint8_t fail_fill_value) {
266061da546Spatrick   size_t bytes_extracted = 0;
267061da546Spatrick   while (!dest.empty() && GetBytesLeft() > 0) {
268061da546Spatrick     dest[0] = GetHexU8(fail_fill_value);
269061da546Spatrick     if (!IsGood())
270061da546Spatrick       break;
271061da546Spatrick     ++bytes_extracted;
272061da546Spatrick     dest = dest.drop_front();
273061da546Spatrick   }
274061da546Spatrick 
275061da546Spatrick   if (!dest.empty())
276061da546Spatrick     ::memset(dest.data(), fail_fill_value, dest.size());
277061da546Spatrick 
278061da546Spatrick   return bytes_extracted;
279061da546Spatrick }
280061da546Spatrick 
281061da546Spatrick // Decodes all valid hex encoded bytes at the head of the StringExtractor,
282061da546Spatrick // limited by dst_len.
283061da546Spatrick //
284061da546Spatrick // Returns the number of bytes successfully decoded
GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest)285061da546Spatrick size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) {
286061da546Spatrick   size_t bytes_extracted = 0;
287061da546Spatrick   while (!dest.empty()) {
288061da546Spatrick     int decode = DecodeHexU8();
289061da546Spatrick     if (decode == -1)
290061da546Spatrick       break;
291061da546Spatrick     dest[0] = static_cast<uint8_t>(decode);
292061da546Spatrick     dest = dest.drop_front();
293061da546Spatrick     ++bytes_extracted;
294061da546Spatrick   }
295061da546Spatrick   return bytes_extracted;
296061da546Spatrick }
297061da546Spatrick 
GetHexByteString(std::string & str)298061da546Spatrick size_t StringExtractor::GetHexByteString(std::string &str) {
299061da546Spatrick   str.clear();
300061da546Spatrick   str.reserve(GetBytesLeft() / 2);
301061da546Spatrick   char ch;
302061da546Spatrick   while ((ch = GetHexU8()) != '\0')
303061da546Spatrick     str.append(1, ch);
304061da546Spatrick   return str.size();
305061da546Spatrick }
306061da546Spatrick 
GetHexByteStringFixedLength(std::string & str,uint32_t nibble_length)307061da546Spatrick size_t StringExtractor::GetHexByteStringFixedLength(std::string &str,
308061da546Spatrick                                                     uint32_t nibble_length) {
309061da546Spatrick   str.clear();
310061da546Spatrick 
311061da546Spatrick   uint32_t nibble_count = 0;
312061da546Spatrick   for (const char *pch = Peek();
313061da546Spatrick        (nibble_count < nibble_length) && (pch != nullptr);
314061da546Spatrick        str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) {
315061da546Spatrick   }
316061da546Spatrick 
317061da546Spatrick   return str.size();
318061da546Spatrick }
319061da546Spatrick 
GetHexByteStringTerminatedBy(std::string & str,char terminator)320061da546Spatrick size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &str,
321061da546Spatrick                                                      char terminator) {
322061da546Spatrick   str.clear();
323061da546Spatrick   char ch;
324061da546Spatrick   while ((ch = GetHexU8(0, false)) != '\0')
325061da546Spatrick     str.append(1, ch);
326061da546Spatrick   if (Peek() && *Peek() == terminator)
327061da546Spatrick     return str.size();
328061da546Spatrick 
329061da546Spatrick   str.clear();
330061da546Spatrick   return str.size();
331061da546Spatrick }
332061da546Spatrick 
GetNameColonValue(llvm::StringRef & name,llvm::StringRef & value)333061da546Spatrick bool StringExtractor::GetNameColonValue(llvm::StringRef &name,
334061da546Spatrick                                         llvm::StringRef &value) {
335061da546Spatrick   // Read something in the form of NNNN:VVVV; where NNNN is any character that
336061da546Spatrick   // is not a colon, followed by a ':' character, then a value (one or more ';'
337061da546Spatrick   // chars), followed by a ';'
338061da546Spatrick   if (m_index >= m_packet.size())
339061da546Spatrick     return fail();
340061da546Spatrick 
341061da546Spatrick   llvm::StringRef view(m_packet);
342061da546Spatrick   if (view.empty())
343061da546Spatrick     return fail();
344061da546Spatrick 
345061da546Spatrick   llvm::StringRef a, b, c, d;
346061da546Spatrick   view = view.substr(m_index);
347061da546Spatrick   std::tie(a, b) = view.split(':');
348061da546Spatrick   if (a.empty() || b.empty())
349061da546Spatrick     return fail();
350061da546Spatrick   std::tie(c, d) = b.split(';');
351061da546Spatrick   if (b == c && d.empty())
352061da546Spatrick     return fail();
353061da546Spatrick 
354061da546Spatrick   name = a;
355061da546Spatrick   value = c;
356061da546Spatrick   if (d.empty())
357061da546Spatrick     m_index = m_packet.size();
358061da546Spatrick   else {
359061da546Spatrick     size_t bytes_consumed = d.data() - view.data();
360061da546Spatrick     m_index += bytes_consumed;
361061da546Spatrick   }
362061da546Spatrick   return true;
363061da546Spatrick }
364061da546Spatrick 
SkipSpaces()365061da546Spatrick void StringExtractor::SkipSpaces() {
366061da546Spatrick   const size_t n = m_packet.size();
367dda28197Spatrick   while (m_index < n && llvm::isSpace(m_packet[m_index]))
368061da546Spatrick     ++m_index;
369061da546Spatrick }
370