xref: /llvm-project/lldb/source/Utility/StringExtractor.cpp (revision 2946cd701067404b99c39fb29dc9c74bd7193eb3)
1 //===-- StringExtractor.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/StringExtractor.h"
10 
11 #include <tuple>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 static inline int xdigit_to_sint(char ch) {
18   if (ch >= 'a' && ch <= 'f')
19     return 10 + ch - 'a';
20   if (ch >= 'A' && ch <= 'F')
21     return 10 + ch - 'A';
22   if (ch >= '0' && ch <= '9')
23     return ch - '0';
24   return -1;
25 }
26 
27 //----------------------------------------------------------------------
28 // StringExtractor constructor
29 //----------------------------------------------------------------------
30 StringExtractor::StringExtractor() : m_packet(), m_index(0) {}
31 
32 StringExtractor::StringExtractor(llvm::StringRef packet_str)
33     : m_packet(), m_index(0) {
34   m_packet.assign(packet_str.begin(), packet_str.end());
35 }
36 
37 StringExtractor::StringExtractor(const char *packet_cstr)
38     : m_packet(), m_index(0) {
39   if (packet_cstr)
40     m_packet.assign(packet_cstr);
41 }
42 
43 //----------------------------------------------------------------------
44 // StringExtractor copy constructor
45 //----------------------------------------------------------------------
46 StringExtractor::StringExtractor(const StringExtractor &rhs)
47     : m_packet(rhs.m_packet), m_index(rhs.m_index) {}
48 
49 //----------------------------------------------------------------------
50 // StringExtractor assignment operator
51 //----------------------------------------------------------------------
52 const StringExtractor &StringExtractor::operator=(const StringExtractor &rhs) {
53   if (this != &rhs) {
54     m_packet = rhs.m_packet;
55     m_index = rhs.m_index;
56   }
57   return *this;
58 }
59 
60 //----------------------------------------------------------------------
61 // Destructor
62 //----------------------------------------------------------------------
63 StringExtractor::~StringExtractor() {}
64 
65 char StringExtractor::GetChar(char fail_value) {
66   if (m_index < m_packet.size()) {
67     char ch = m_packet[m_index];
68     ++m_index;
69     return ch;
70   }
71   m_index = UINT64_MAX;
72   return fail_value;
73 }
74 
75 //----------------------------------------------------------------------
76 // If a pair of valid hex digits exist at the head of the StringExtractor they
77 // are decoded into an unsigned byte and returned by this function
78 //
79 // If there is not a pair of valid hex digits at the head of the
80 // StringExtractor, it is left unchanged and -1 is returned
81 //----------------------------------------------------------------------
82 int StringExtractor::DecodeHexU8() {
83   SkipSpaces();
84   if (GetBytesLeft() < 2) {
85     return -1;
86   }
87   const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
88   const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]);
89   if (hi_nibble == -1 || lo_nibble == -1) {
90     return -1;
91   }
92   m_index += 2;
93   return (uint8_t)((hi_nibble << 4) + lo_nibble);
94 }
95 
96 //----------------------------------------------------------------------
97 // Extract an unsigned character from two hex ASCII chars in the packet string,
98 // or return fail_value on failure
99 //----------------------------------------------------------------------
100 uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) {
101   // On success, fail_value will be overwritten with the next character in the
102   // stream
103   GetHexU8Ex(fail_value, set_eof_on_fail);
104   return fail_value;
105 }
106 
107 bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) {
108   int byte = DecodeHexU8();
109   if (byte == -1) {
110     if (set_eof_on_fail || m_index >= m_packet.size())
111       m_index = UINT64_MAX;
112     // ch should not be changed in case of failure
113     return false;
114   }
115   ch = (uint8_t)byte;
116   return true;
117 }
118 
119 uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) {
120   if (m_index < m_packet.size()) {
121     char *end = nullptr;
122     const char *start = m_packet.c_str();
123     const char *cstr = start + m_index;
124     uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base));
125 
126     if (end && end != cstr) {
127       m_index = end - start;
128       return result;
129     }
130   }
131   return fail_value;
132 }
133 
134 int32_t StringExtractor::GetS32(int32_t fail_value, int base) {
135   if (m_index < m_packet.size()) {
136     char *end = nullptr;
137     const char *start = m_packet.c_str();
138     const char *cstr = start + m_index;
139     int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base));
140 
141     if (end && end != cstr) {
142       m_index = end - start;
143       return result;
144     }
145   }
146   return fail_value;
147 }
148 
149 uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) {
150   if (m_index < m_packet.size()) {
151     char *end = nullptr;
152     const char *start = m_packet.c_str();
153     const char *cstr = start + m_index;
154     uint64_t result = ::strtoull(cstr, &end, base);
155 
156     if (end && end != cstr) {
157       m_index = end - start;
158       return result;
159     }
160   }
161   return fail_value;
162 }
163 
164 int64_t StringExtractor::GetS64(int64_t fail_value, int base) {
165   if (m_index < m_packet.size()) {
166     char *end = nullptr;
167     const char *start = m_packet.c_str();
168     const char *cstr = start + m_index;
169     int64_t result = ::strtoll(cstr, &end, base);
170 
171     if (end && end != cstr) {
172       m_index = end - start;
173       return result;
174     }
175   }
176   return fail_value;
177 }
178 
179 uint32_t StringExtractor::GetHexMaxU32(bool little_endian,
180                                        uint32_t fail_value) {
181   uint32_t result = 0;
182   uint32_t nibble_count = 0;
183 
184   SkipSpaces();
185   if (little_endian) {
186     uint32_t shift_amount = 0;
187     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
188       // Make sure we don't exceed the size of a uint32_t...
189       if (nibble_count >= (sizeof(uint32_t) * 2)) {
190         m_index = UINT64_MAX;
191         return fail_value;
192       }
193 
194       uint8_t nibble_lo;
195       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
196       ++m_index;
197       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
198         nibble_lo = xdigit_to_sint(m_packet[m_index]);
199         ++m_index;
200         result |= ((uint32_t)nibble_hi << (shift_amount + 4));
201         result |= ((uint32_t)nibble_lo << shift_amount);
202         nibble_count += 2;
203         shift_amount += 8;
204       } else {
205         result |= ((uint32_t)nibble_hi << shift_amount);
206         nibble_count += 1;
207         shift_amount += 4;
208       }
209     }
210   } else {
211     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
212       // Make sure we don't exceed the size of a uint32_t...
213       if (nibble_count >= (sizeof(uint32_t) * 2)) {
214         m_index = UINT64_MAX;
215         return fail_value;
216       }
217 
218       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
219       // Big Endian
220       result <<= 4;
221       result |= nibble;
222 
223       ++m_index;
224       ++nibble_count;
225     }
226   }
227   return result;
228 }
229 
230 uint64_t StringExtractor::GetHexMaxU64(bool little_endian,
231                                        uint64_t fail_value) {
232   uint64_t result = 0;
233   uint32_t nibble_count = 0;
234 
235   SkipSpaces();
236   if (little_endian) {
237     uint32_t shift_amount = 0;
238     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
239       // Make sure we don't exceed the size of a uint64_t...
240       if (nibble_count >= (sizeof(uint64_t) * 2)) {
241         m_index = UINT64_MAX;
242         return fail_value;
243       }
244 
245       uint8_t nibble_lo;
246       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
247       ++m_index;
248       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
249         nibble_lo = xdigit_to_sint(m_packet[m_index]);
250         ++m_index;
251         result |= ((uint64_t)nibble_hi << (shift_amount + 4));
252         result |= ((uint64_t)nibble_lo << shift_amount);
253         nibble_count += 2;
254         shift_amount += 8;
255       } else {
256         result |= ((uint64_t)nibble_hi << shift_amount);
257         nibble_count += 1;
258         shift_amount += 4;
259       }
260     }
261   } else {
262     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
263       // Make sure we don't exceed the size of a uint64_t...
264       if (nibble_count >= (sizeof(uint64_t) * 2)) {
265         m_index = UINT64_MAX;
266         return fail_value;
267       }
268 
269       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
270       // Big Endian
271       result <<= 4;
272       result |= nibble;
273 
274       ++m_index;
275       ++nibble_count;
276     }
277   }
278   return result;
279 }
280 
281 bool StringExtractor::ConsumeFront(const llvm::StringRef &str) {
282   llvm::StringRef S = GetStringRef();
283   if (!S.startswith(str))
284     return false;
285   else
286     m_index += str.size();
287   return true;
288 }
289 
290 size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,
291                                     uint8_t fail_fill_value) {
292   size_t bytes_extracted = 0;
293   while (!dest.empty() && GetBytesLeft() > 0) {
294     dest[0] = GetHexU8(fail_fill_value);
295     if (!IsGood())
296       break;
297     ++bytes_extracted;
298     dest = dest.drop_front();
299   }
300 
301   if (!dest.empty())
302     ::memset(dest.data(), fail_fill_value, dest.size());
303 
304   return bytes_extracted;
305 }
306 
307 //----------------------------------------------------------------------
308 // Decodes all valid hex encoded bytes at the head of the StringExtractor,
309 // limited by dst_len.
310 //
311 // Returns the number of bytes successfully decoded
312 //----------------------------------------------------------------------
313 size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) {
314   size_t bytes_extracted = 0;
315   while (!dest.empty()) {
316     int decode = DecodeHexU8();
317     if (decode == -1)
318       break;
319     dest[0] = (uint8_t)decode;
320     dest = dest.drop_front();
321     ++bytes_extracted;
322   }
323   return bytes_extracted;
324 }
325 
326 // Consume ASCII hex nibble character pairs until we have decoded byte_size
327 // bytes of data.
328 
329 uint64_t StringExtractor::GetHexWithFixedSize(uint32_t byte_size,
330                                               bool little_endian,
331                                               uint64_t fail_value) {
332   if (byte_size <= 8 && GetBytesLeft() >= byte_size * 2) {
333     uint64_t result = 0;
334     uint32_t i;
335     if (little_endian) {
336       // Little Endian
337       uint32_t shift_amount;
338       for (i = 0, shift_amount = 0; i < byte_size && IsGood();
339            ++i, shift_amount += 8) {
340         result |= ((uint64_t)GetHexU8() << shift_amount);
341       }
342     } else {
343       // Big Endian
344       for (i = 0; i < byte_size && IsGood(); ++i) {
345         result <<= 8;
346         result |= GetHexU8();
347       }
348     }
349   }
350   m_index = UINT64_MAX;
351   return fail_value;
352 }
353 
354 size_t StringExtractor::GetHexByteString(std::string &str) {
355   str.clear();
356   str.reserve(GetBytesLeft() / 2);
357   char ch;
358   while ((ch = GetHexU8()) != '\0')
359     str.append(1, ch);
360   return str.size();
361 }
362 
363 size_t StringExtractor::GetHexByteStringFixedLength(std::string &str,
364                                                     uint32_t nibble_length) {
365   str.clear();
366 
367   uint32_t nibble_count = 0;
368   for (const char *pch = Peek();
369        (nibble_count < nibble_length) && (pch != nullptr);
370        str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) {
371   }
372 
373   return str.size();
374 }
375 
376 size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &str,
377                                                      char terminator) {
378   str.clear();
379   char ch;
380   while ((ch = GetHexU8(0, false)) != '\0')
381     str.append(1, ch);
382   if (Peek() && *Peek() == terminator)
383     return str.size();
384 
385   str.clear();
386   return str.size();
387 }
388 
389 bool StringExtractor::GetNameColonValue(llvm::StringRef &name,
390                                         llvm::StringRef &value) {
391   // Read something in the form of NNNN:VVVV; where NNNN is any character that
392   // is not a colon, followed by a ':' character, then a value (one or more ';'
393   // chars), followed by a ';'
394   if (m_index >= m_packet.size())
395     return fail();
396 
397   llvm::StringRef view(m_packet);
398   if (view.empty())
399     return fail();
400 
401   llvm::StringRef a, b, c, d;
402   view = view.substr(m_index);
403   std::tie(a, b) = view.split(':');
404   if (a.empty() || b.empty())
405     return fail();
406   std::tie(c, d) = b.split(';');
407   if (b == c && d.empty())
408     return fail();
409 
410   name = a;
411   value = c;
412   if (d.empty())
413     m_index = m_packet.size();
414   else {
415     size_t bytes_consumed = d.data() - view.data();
416     m_index += bytes_consumed;
417   }
418   return true;
419 }
420 
421 void StringExtractor::SkipSpaces() {
422   const size_t n = m_packet.size();
423   while (m_index < n && isspace(m_packet[m_index]))
424     ++m_index;
425 }
426