1 //===-- StringExtractor.cpp -------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "lldb/Utility/StringExtractor.h" 11 12 // C Includes 13 #include <stdlib.h> 14 15 // C++ Includes 16 #include <tuple> 17 // Other libraries and framework includes 18 // Project includes 19 #include "llvm/Support/Endian.h" 20 21 static inline int 22 xdigit_to_sint (char ch) 23 { 24 if (ch >= 'a' && ch <= 'f') 25 return 10 + ch - 'a'; 26 if (ch >= 'A' && ch <= 'F') 27 return 10 + ch - 'A'; 28 if (ch >= '0' && ch <= '9') 29 return ch - '0'; 30 return -1; 31 } 32 33 //---------------------------------------------------------------------- 34 // StringExtractor constructor 35 //---------------------------------------------------------------------- 36 StringExtractor::StringExtractor() : 37 m_packet(), 38 m_index (0) 39 { 40 } 41 42 StringExtractor::StringExtractor(llvm::StringRef packet_str) : m_packet(), m_index(0) 43 { 44 m_packet.assign(packet_str.begin(), packet_str.end()); 45 } 46 47 StringExtractor::StringExtractor(const char *packet_cstr) : 48 m_packet(), 49 m_index (0) 50 { 51 if (packet_cstr) 52 m_packet.assign (packet_cstr); 53 } 54 55 56 //---------------------------------------------------------------------- 57 // StringExtractor copy constructor 58 //---------------------------------------------------------------------- 59 StringExtractor::StringExtractor(const StringExtractor& rhs) : 60 m_packet (rhs.m_packet), 61 m_index (rhs.m_index) 62 { 63 64 } 65 66 //---------------------------------------------------------------------- 67 // StringExtractor assignment operator 68 //---------------------------------------------------------------------- 69 const StringExtractor& 70 StringExtractor::operator=(const StringExtractor& rhs) 71 { 72 if (this != &rhs) 73 { 74 m_packet = rhs.m_packet; 75 m_index = rhs.m_index; 76 77 } 78 return *this; 79 } 80 81 //---------------------------------------------------------------------- 82 // Destructor 83 //---------------------------------------------------------------------- 84 StringExtractor::~StringExtractor() 85 { 86 } 87 88 89 char 90 StringExtractor::GetChar (char fail_value) 91 { 92 if (m_index < m_packet.size()) 93 { 94 char ch = m_packet[m_index]; 95 ++m_index; 96 return ch; 97 } 98 m_index = UINT64_MAX; 99 return fail_value; 100 } 101 102 //---------------------------------------------------------------------- 103 // If a pair of valid hex digits exist at the head of the 104 // StringExtractor they are decoded into an unsigned byte and returned 105 // by this function 106 // 107 // If there is not a pair of valid hex digits at the head of the 108 // StringExtractor, it is left unchanged and -1 is returned 109 //---------------------------------------------------------------------- 110 int 111 StringExtractor::DecodeHexU8() 112 { 113 SkipSpaces(); 114 if (GetBytesLeft() < 2) 115 { 116 return -1; 117 } 118 const int hi_nibble = xdigit_to_sint(m_packet[m_index]); 119 const int lo_nibble = xdigit_to_sint(m_packet[m_index+1]); 120 if (hi_nibble == -1 || lo_nibble == -1) 121 { 122 return -1; 123 } 124 m_index += 2; 125 return (uint8_t)((hi_nibble << 4) + lo_nibble); 126 } 127 128 //---------------------------------------------------------------------- 129 // Extract an unsigned character from two hex ASCII chars in the packet 130 // string, or return fail_value on failure 131 //---------------------------------------------------------------------- 132 uint8_t 133 StringExtractor::GetHexU8 (uint8_t fail_value, bool set_eof_on_fail) 134 { 135 // On success, fail_value will be overwritten with the next 136 // character in the stream 137 GetHexU8Ex(fail_value, set_eof_on_fail); 138 return fail_value; 139 } 140 141 bool 142 StringExtractor::GetHexU8Ex (uint8_t& ch, bool set_eof_on_fail) 143 { 144 int byte = DecodeHexU8(); 145 if (byte == -1) 146 { 147 if (set_eof_on_fail || m_index >= m_packet.size()) 148 m_index = UINT64_MAX; 149 // ch should not be changed in case of failure 150 return false; 151 } 152 ch = (uint8_t)byte; 153 return true; 154 } 155 156 uint32_t 157 StringExtractor::GetU32 (uint32_t fail_value, int base) 158 { 159 if (m_index < m_packet.size()) 160 { 161 char *end = nullptr; 162 const char *start = m_packet.c_str(); 163 const char *cstr = start + m_index; 164 uint32_t result = static_cast<uint32_t>(::strtoul (cstr, &end, base)); 165 166 if (end && end != cstr) 167 { 168 m_index = end - start; 169 return result; 170 } 171 } 172 return fail_value; 173 } 174 175 int32_t 176 StringExtractor::GetS32 (int32_t fail_value, int base) 177 { 178 if (m_index < m_packet.size()) 179 { 180 char *end = nullptr; 181 const char *start = m_packet.c_str(); 182 const char *cstr = start + m_index; 183 int32_t result = static_cast<int32_t>(::strtol (cstr, &end, base)); 184 185 if (end && end != cstr) 186 { 187 m_index = end - start; 188 return result; 189 } 190 } 191 return fail_value; 192 } 193 194 195 uint64_t 196 StringExtractor::GetU64 (uint64_t fail_value, int base) 197 { 198 if (m_index < m_packet.size()) 199 { 200 char *end = nullptr; 201 const char *start = m_packet.c_str(); 202 const char *cstr = start + m_index; 203 uint64_t result = ::strtoull (cstr, &end, base); 204 205 if (end && end != cstr) 206 { 207 m_index = end - start; 208 return result; 209 } 210 } 211 return fail_value; 212 } 213 214 int64_t 215 StringExtractor::GetS64 (int64_t fail_value, int base) 216 { 217 if (m_index < m_packet.size()) 218 { 219 char *end = nullptr; 220 const char *start = m_packet.c_str(); 221 const char *cstr = start + m_index; 222 int64_t result = ::strtoll (cstr, &end, base); 223 224 if (end && end != cstr) 225 { 226 m_index = end - start; 227 return result; 228 } 229 } 230 return fail_value; 231 } 232 233 uint32_t 234 StringExtractor::GetHexMaxU32 (bool little_endian, uint32_t fail_value) 235 { 236 SkipSpaces(); 237 238 // Allocate enough space for 2 uint32's. In big endian, if the user writes 239 // "AB" then this should be treated as 0xAB, not 0xAB000000. In order to 240 // do this, we decode into the second half of the array, and then shift the 241 // starting point of the big endian translation left by however many bytes 242 // of a uint32 were missing from the input. We're essentially padding left 243 // with 0's. 244 uint8_t bytes[2 * sizeof(uint32_t) - 1] = {0}; 245 auto byte_array = llvm::MutableArrayRef<uint8_t>(bytes); 246 auto decode_loc = byte_array.drop_front(sizeof(uint32_t) - 1); 247 uint32_t bytes_decoded = GetHexBytesAvail(decode_loc); 248 if (bytes_decoded == sizeof(uint32_t) && ::isxdigit(PeekChar())) 249 return fail(); 250 251 using namespace llvm::support; 252 if (little_endian) 253 return endian::read<uint32_t, endianness::little>(decode_loc.data()); 254 else 255 { 256 decode_loc = byte_array.drop_front(bytes_decoded - 1).take_front(sizeof(uint32_t)); 257 return endian::read<uint32_t, endianness::big>(decode_loc.data()); 258 } 259 } 260 261 uint64_t 262 StringExtractor::GetHexMaxU64 (bool little_endian, uint64_t fail_value) 263 { 264 SkipSpaces(); 265 266 // Allocate enough space for 2 uint32's. In big endian, if the user writes 267 // "AB" then this should be treated as 0xAB, not 0xAB000000. In order to 268 // do this, we decode into the second half of the array, and then shift the 269 // starting point of the big endian translation left by however many bytes 270 // of a uint32 were missing from the input. We're essentially padding left 271 // with 0's. 272 uint8_t bytes[2 * sizeof(uint64_t) - 1] = {0}; 273 auto byte_array = llvm::MutableArrayRef<uint8_t>(bytes); 274 auto decode_loc = byte_array.drop_front(sizeof(uint64_t) - 1); 275 uint32_t bytes_decoded = GetHexBytesAvail(decode_loc); 276 if (bytes_decoded == sizeof(uint64_t) && ::isxdigit(PeekChar())) 277 return fail(); 278 279 using namespace llvm::support; 280 if (little_endian) 281 return endian::read<uint64_t, endianness::little>(decode_loc.data()); 282 else 283 { 284 decode_loc = byte_array.drop_front(bytes_decoded - 1).take_front(sizeof(uint64_t)); 285 return endian::read<uint64_t, endianness::big>(decode_loc.data()); 286 } 287 } 288 289 size_t 290 StringExtractor::GetHexBytes (llvm::MutableArrayRef<uint8_t> dest, uint8_t fail_fill_value) 291 { 292 size_t bytes_extracted = 0; 293 while (!dest.empty() && GetBytesLeft() > 0) 294 { 295 dest[0] = GetHexU8 (fail_fill_value); 296 if (!IsGood()) 297 break; 298 ++bytes_extracted; 299 dest = dest.drop_front(); 300 } 301 302 if (!dest.empty()) 303 ::memset(dest.data(), fail_fill_value, dest.size()); 304 305 return bytes_extracted; 306 } 307 308 //---------------------------------------------------------------------- 309 // Decodes all valid hex encoded bytes at the head of the 310 // StringExtractor, limited by dst_len. 311 // 312 // Returns the number of bytes successfully decoded 313 //---------------------------------------------------------------------- 314 size_t 315 StringExtractor::GetHexBytesAvail (llvm::MutableArrayRef<uint8_t> dest) 316 { 317 size_t bytes_extracted = 0; 318 while (!dest.empty()) 319 { 320 int decode = DecodeHexU8(); 321 if (decode == -1) 322 break; 323 dest[0] = (uint8_t)decode; 324 dest = dest.drop_front(); 325 ++bytes_extracted; 326 } 327 return bytes_extracted; 328 } 329 330 // Consume ASCII hex nibble character pairs until we have decoded byte_size 331 // bytes of data. 332 333 uint64_t 334 StringExtractor::GetHexWithFixedSize (uint32_t byte_size, bool little_endian, uint64_t fail_value) 335 { 336 if (byte_size <= 8 && GetBytesLeft() >= byte_size * 2) 337 { 338 uint64_t result = 0; 339 uint32_t i; 340 if (little_endian) 341 { 342 // Little Endian 343 uint32_t shift_amount; 344 for (i = 0, shift_amount = 0; 345 i < byte_size && IsGood(); 346 ++i, shift_amount += 8) 347 { 348 result |= ((uint64_t)GetHexU8() << shift_amount); 349 } 350 } 351 else 352 { 353 // Big Endian 354 for (i = 0; i < byte_size && IsGood(); ++i) 355 { 356 result <<= 8; 357 result |= GetHexU8(); 358 } 359 } 360 } 361 m_index = UINT64_MAX; 362 return fail_value; 363 } 364 365 size_t 366 StringExtractor::GetHexByteString (std::string &str) 367 { 368 str.clear(); 369 str.reserve(GetBytesLeft() / 2); 370 char ch; 371 while ((ch = GetHexU8()) != '\0') 372 str.append(1, ch); 373 return str.size(); 374 } 375 376 size_t 377 StringExtractor::GetHexByteStringFixedLength (std::string &str, uint32_t nibble_length) 378 { 379 str.clear(); 380 381 uint32_t nibble_count = 0; 382 for (const char *pch = Peek(); (nibble_count < nibble_length) && (pch != nullptr); str.append(1, GetHexU8(0, false)), pch = Peek (), nibble_count += 2) 383 {} 384 385 return str.size(); 386 } 387 388 size_t 389 StringExtractor::GetHexByteStringTerminatedBy (std::string &str, 390 char terminator) 391 { 392 str.clear(); 393 char ch; 394 while ((ch = GetHexU8(0,false)) != '\0') 395 str.append(1, ch); 396 if (Peek() && *Peek() == terminator) 397 return str.size(); 398 399 str.clear(); 400 return str.size(); 401 } 402 403 bool 404 StringExtractor::GetNameColonValue(llvm::StringRef &name, llvm::StringRef &value) 405 { 406 // Read something in the form of NNNN:VVVV; where NNNN is any character 407 // that is not a colon, followed by a ':' character, then a value (one or 408 // more ';' chars), followed by a ';' 409 if (m_index >= m_packet.size()) 410 return fail(); 411 412 llvm::StringRef view(m_packet); 413 if (view.empty()) 414 return fail(); 415 416 llvm::StringRef a, b, c, d; 417 view = view.substr(m_index); 418 std::tie(a, b) = view.split(':'); 419 if (a.empty() || b.empty()) 420 return fail(); 421 std::tie(c, d) = b.split(';'); 422 if (b == c && d.empty()) 423 return fail(); 424 425 name = a; 426 value = c; 427 if (d.empty()) 428 m_index = m_packet.size(); 429 else 430 { 431 size_t bytes_consumed = d.data() - view.data(); 432 m_index += bytes_consumed; 433 } 434 return true; 435 } 436 437 void 438 StringExtractor::SkipSpaces () 439 { 440 const size_t n = m_packet.size(); 441 while (m_index < n && isspace(m_packet[m_index])) 442 ++m_index; 443 } 444 445