xref: /llvm-project/lldb/source/Utility/StringExtractor.cpp (revision 9cbc8db016e1d7e09fe87c370c0bc68414cbdb29)
1 //===-- StringExtractor.cpp -------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "lldb/Utility/StringExtractor.h"
11 
12 // C Includes
13 #include <stdlib.h>
14 
15 // C++ Includes
16 #include <tuple>
17 // Other libraries and framework includes
18 // Project includes
19 #include "llvm/Support/Endian.h"
20 
21 static inline int
22 xdigit_to_sint (char ch)
23 {
24     if (ch >= 'a' && ch <= 'f')
25         return 10 + ch - 'a';
26     if (ch >= 'A' && ch <= 'F')
27         return 10 + ch - 'A';
28     if (ch >= '0' && ch <= '9')
29         return ch - '0';
30     return -1;
31 }
32 
33 //----------------------------------------------------------------------
34 // StringExtractor constructor
35 //----------------------------------------------------------------------
36 StringExtractor::StringExtractor() :
37     m_packet(),
38     m_index (0)
39 {
40 }
41 
42 StringExtractor::StringExtractor(llvm::StringRef packet_str) : m_packet(), m_index(0)
43 {
44     m_packet.assign(packet_str.begin(), packet_str.end());
45 }
46 
47 StringExtractor::StringExtractor(const char *packet_cstr) :
48     m_packet(),
49     m_index (0)
50 {
51     if (packet_cstr)
52         m_packet.assign (packet_cstr);
53 }
54 
55 
56 //----------------------------------------------------------------------
57 // StringExtractor copy constructor
58 //----------------------------------------------------------------------
59 StringExtractor::StringExtractor(const StringExtractor& rhs) :
60     m_packet (rhs.m_packet),
61     m_index (rhs.m_index)
62 {
63 
64 }
65 
66 //----------------------------------------------------------------------
67 // StringExtractor assignment operator
68 //----------------------------------------------------------------------
69 const StringExtractor&
70 StringExtractor::operator=(const StringExtractor& rhs)
71 {
72     if (this != &rhs)
73     {
74         m_packet = rhs.m_packet;
75         m_index = rhs.m_index;
76 
77     }
78     return *this;
79 }
80 
81 //----------------------------------------------------------------------
82 // Destructor
83 //----------------------------------------------------------------------
84 StringExtractor::~StringExtractor()
85 {
86 }
87 
88 
89 char
90 StringExtractor::GetChar (char fail_value)
91 {
92     if (m_index < m_packet.size())
93     {
94         char ch = m_packet[m_index];
95         ++m_index;
96         return ch;
97     }
98     m_index = UINT64_MAX;
99     return fail_value;
100 }
101 
102 //----------------------------------------------------------------------
103 // If a pair of valid hex digits exist at the head of the
104 // StringExtractor they are decoded into an unsigned byte and returned
105 // by this function
106 //
107 // If there is not a pair of valid hex digits at the head of the
108 // StringExtractor, it is left unchanged and -1 is returned
109 //----------------------------------------------------------------------
110 int
111 StringExtractor::DecodeHexU8()
112 {
113     SkipSpaces();
114     if (GetBytesLeft() < 2)
115     {
116         return -1;
117     }
118     const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
119     const int lo_nibble = xdigit_to_sint(m_packet[m_index+1]);
120     if (hi_nibble == -1 || lo_nibble == -1)
121     {
122         return -1;
123     }
124     m_index += 2;
125     return (uint8_t)((hi_nibble << 4) + lo_nibble);
126 }
127 
128 //----------------------------------------------------------------------
129 // Extract an unsigned character from two hex ASCII chars in the packet
130 // string, or return fail_value on failure
131 //----------------------------------------------------------------------
132 uint8_t
133 StringExtractor::GetHexU8 (uint8_t fail_value, bool set_eof_on_fail)
134 {
135     // On success, fail_value will be overwritten with the next
136     // character in the stream
137     GetHexU8Ex(fail_value, set_eof_on_fail);
138     return fail_value;
139 }
140 
141 bool
142 StringExtractor::GetHexU8Ex (uint8_t& ch, bool set_eof_on_fail)
143 {
144     int byte = DecodeHexU8();
145     if (byte == -1)
146     {
147         if (set_eof_on_fail || m_index >= m_packet.size())
148             m_index = UINT64_MAX;
149         // ch should not be changed in case of failure
150         return false;
151     }
152     ch = (uint8_t)byte;
153     return true;
154 }
155 
156 uint32_t
157 StringExtractor::GetU32 (uint32_t fail_value, int base)
158 {
159     if (m_index < m_packet.size())
160     {
161         char *end = nullptr;
162         const char *start = m_packet.c_str();
163         const char *cstr = start + m_index;
164         uint32_t result = static_cast<uint32_t>(::strtoul (cstr, &end, base));
165 
166         if (end && end != cstr)
167         {
168             m_index = end - start;
169             return result;
170         }
171     }
172     return fail_value;
173 }
174 
175 int32_t
176 StringExtractor::GetS32 (int32_t fail_value, int base)
177 {
178     if (m_index < m_packet.size())
179     {
180         char *end = nullptr;
181         const char *start = m_packet.c_str();
182         const char *cstr = start + m_index;
183         int32_t result = static_cast<int32_t>(::strtol (cstr, &end, base));
184 
185         if (end && end != cstr)
186         {
187             m_index = end - start;
188             return result;
189         }
190     }
191     return fail_value;
192 }
193 
194 
195 uint64_t
196 StringExtractor::GetU64 (uint64_t fail_value, int base)
197 {
198     if (m_index < m_packet.size())
199     {
200         char *end = nullptr;
201         const char *start = m_packet.c_str();
202         const char *cstr = start + m_index;
203         uint64_t result = ::strtoull (cstr, &end, base);
204 
205         if (end && end != cstr)
206         {
207             m_index = end - start;
208             return result;
209         }
210     }
211     return fail_value;
212 }
213 
214 int64_t
215 StringExtractor::GetS64 (int64_t fail_value, int base)
216 {
217     if (m_index < m_packet.size())
218     {
219         char *end = nullptr;
220         const char *start = m_packet.c_str();
221         const char *cstr = start + m_index;
222         int64_t result = ::strtoll (cstr, &end, base);
223 
224         if (end && end != cstr)
225         {
226             m_index = end - start;
227             return result;
228         }
229     }
230     return fail_value;
231 }
232 
233 uint32_t
234 StringExtractor::GetHexMaxU32 (bool little_endian, uint32_t fail_value)
235 {
236     SkipSpaces();
237 
238     // Allocate enough space for 2 uint32's.  In big endian, if the user writes
239     // "AB" then this should be treated as 0xAB, not 0xAB000000.  In order to
240     // do this, we decode into the second half of the array, and then shift the
241     // starting point of the big endian translation left by however many bytes
242     // of a uint32 were missing from the input.  We're essentially padding left
243     // with 0's.
244     uint8_t bytes[2 * sizeof(uint32_t) - 1] = {0};
245     auto byte_array = llvm::MutableArrayRef<uint8_t>(bytes);
246     auto decode_loc = byte_array.drop_front(sizeof(uint32_t) - 1);
247     uint32_t bytes_decoded = GetHexBytesAvail(decode_loc);
248     if (bytes_decoded == sizeof(uint32_t) && ::isxdigit(PeekChar()))
249         return fail();
250 
251     using namespace llvm::support;
252     if (little_endian)
253         return endian::read<uint32_t, endianness::little>(decode_loc.data());
254     else
255     {
256         decode_loc = byte_array.drop_front(bytes_decoded - 1).take_front(sizeof(uint32_t));
257         return endian::read<uint32_t, endianness::big>(decode_loc.data());
258     }
259 }
260 
261 uint64_t
262 StringExtractor::GetHexMaxU64 (bool little_endian, uint64_t fail_value)
263 {
264     SkipSpaces();
265 
266     // Allocate enough space for 2 uint32's.  In big endian, if the user writes
267     // "AB" then this should be treated as 0xAB, not 0xAB000000.  In order to
268     // do this, we decode into the second half of the array, and then shift the
269     // starting point of the big endian translation left by however many bytes
270     // of a uint32 were missing from the input.  We're essentially padding left
271     // with 0's.
272     uint8_t bytes[2 * sizeof(uint64_t) - 1] = {0};
273     auto byte_array = llvm::MutableArrayRef<uint8_t>(bytes);
274     auto decode_loc = byte_array.drop_front(sizeof(uint64_t) - 1);
275     uint32_t bytes_decoded = GetHexBytesAvail(decode_loc);
276     if (bytes_decoded == sizeof(uint64_t) && ::isxdigit(PeekChar()))
277         return fail();
278 
279     using namespace llvm::support;
280     if (little_endian)
281         return endian::read<uint64_t, endianness::little>(decode_loc.data());
282     else
283     {
284         decode_loc = byte_array.drop_front(bytes_decoded - 1).take_front(sizeof(uint64_t));
285         return endian::read<uint64_t, endianness::big>(decode_loc.data());
286     }
287 }
288 
289 size_t
290 StringExtractor::GetHexBytes (llvm::MutableArrayRef<uint8_t> dest, uint8_t fail_fill_value)
291 {
292     size_t bytes_extracted = 0;
293     while (!dest.empty() && GetBytesLeft() > 0)
294     {
295         dest[0] = GetHexU8 (fail_fill_value);
296         if (!IsGood())
297             break;
298         ++bytes_extracted;
299         dest = dest.drop_front();
300     }
301 
302     if (!dest.empty())
303         ::memset(dest.data(), fail_fill_value, dest.size());
304 
305     return bytes_extracted;
306 }
307 
308 //----------------------------------------------------------------------
309 // Decodes all valid hex encoded bytes at the head of the
310 // StringExtractor, limited by dst_len.
311 //
312 // Returns the number of bytes successfully decoded
313 //----------------------------------------------------------------------
314 size_t
315 StringExtractor::GetHexBytesAvail (llvm::MutableArrayRef<uint8_t> dest)
316 {
317     size_t bytes_extracted = 0;
318     while (!dest.empty())
319     {
320         int decode = DecodeHexU8();
321         if (decode == -1)
322             break;
323         dest[0] = (uint8_t)decode;
324         dest = dest.drop_front();
325         ++bytes_extracted;
326     }
327     return bytes_extracted;
328 }
329 
330 // Consume ASCII hex nibble character pairs until we have decoded byte_size
331 // bytes of data.
332 
333 uint64_t
334 StringExtractor::GetHexWithFixedSize (uint32_t byte_size, bool little_endian, uint64_t fail_value)
335 {
336     if (byte_size <= 8 && GetBytesLeft() >= byte_size * 2)
337     {
338         uint64_t result = 0;
339         uint32_t i;
340         if (little_endian)
341         {
342             // Little Endian
343             uint32_t shift_amount;
344             for (i = 0, shift_amount = 0;
345                  i < byte_size && IsGood();
346                  ++i, shift_amount += 8)
347             {
348                 result |= ((uint64_t)GetHexU8() << shift_amount);
349             }
350         }
351         else
352         {
353             // Big Endian
354             for (i = 0; i < byte_size && IsGood(); ++i)
355             {
356                 result <<= 8;
357                 result |= GetHexU8();
358             }
359         }
360     }
361     m_index = UINT64_MAX;
362     return fail_value;
363 }
364 
365 size_t
366 StringExtractor::GetHexByteString (std::string &str)
367 {
368     str.clear();
369     str.reserve(GetBytesLeft() / 2);
370     char ch;
371     while ((ch = GetHexU8()) != '\0')
372         str.append(1, ch);
373     return str.size();
374 }
375 
376 size_t
377 StringExtractor::GetHexByteStringFixedLength (std::string &str, uint32_t nibble_length)
378 {
379     str.clear();
380 
381     uint32_t nibble_count = 0;
382     for (const char *pch = Peek(); (nibble_count < nibble_length) && (pch != nullptr); str.append(1, GetHexU8(0, false)), pch = Peek (), nibble_count += 2)
383     {}
384 
385     return str.size();
386 }
387 
388 size_t
389 StringExtractor::GetHexByteStringTerminatedBy (std::string &str,
390                                                char terminator)
391 {
392     str.clear();
393     char ch;
394     while ((ch = GetHexU8(0,false)) != '\0')
395         str.append(1, ch);
396     if (Peek() && *Peek() == terminator)
397         return str.size();
398 
399     str.clear();
400     return str.size();
401 }
402 
403 bool
404 StringExtractor::GetNameColonValue(llvm::StringRef &name, llvm::StringRef &value)
405 {
406     // Read something in the form of NNNN:VVVV; where NNNN is any character
407     // that is not a colon, followed by a ':' character, then a value (one or
408     // more ';' chars), followed by a ';'
409     if (m_index >= m_packet.size())
410         return fail();
411 
412     llvm::StringRef view(m_packet);
413     if (view.empty())
414         return fail();
415 
416     llvm::StringRef a, b, c, d;
417     view = view.substr(m_index);
418     std::tie(a, b) = view.split(':');
419     if (a.empty() || b.empty())
420         return fail();
421     std::tie(c, d) = b.split(';');
422     if (b == c && d.empty())
423         return fail();
424 
425     name = a;
426     value = c;
427     if (d.empty())
428         m_index = m_packet.size();
429     else
430     {
431         size_t bytes_consumed = d.data() - view.data();
432         m_index += bytes_consumed;
433     }
434     return true;
435 }
436 
437 void
438 StringExtractor::SkipSpaces ()
439 {
440     const size_t n = m_packet.size();
441     while (m_index < n && isspace(m_packet[m_index]))
442         ++m_index;
443 }
444 
445