1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "json_internal.h" 35 36 static int 37 hex_value(uint8_t c) 38 { 39 #define V(x, y) [x] = y + 1 40 static const int8_t val[256] = { 41 V('0', 0), V('1', 1), V('2', 2), V('3', 3), V('4', 4), 42 V('5', 5), V('6', 6), V('7', 7), V('8', 8), V('9', 9), 43 V('A', 0xA), V('B', 0xB), V('C', 0xC), V('D', 0xD), V('E', 0xE), V('F', 0xF), 44 V('a', 0xA), V('b', 0xB), V('c', 0xC), V('d', 0xD), V('e', 0xE), V('f', 0xF), 45 }; 46 #undef V 47 48 return val[c] - 1; 49 } 50 51 static int 52 json_decode_string_escape_unicode(uint8_t **strp, uint8_t *buf_end, uint8_t *out) 53 { 54 uint8_t *str = *strp; 55 int v0, v1, v2, v3; 56 uint32_t val; 57 uint32_t surrogate_high = 0; 58 int rc; 59 decode: 60 /* \uXXXX */ 61 assert(buf_end > str); 62 63 if (*str++ != '\\') return SPDK_JSON_PARSE_INVALID; 64 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE; 65 66 if (*str++ != 'u') return SPDK_JSON_PARSE_INVALID; 67 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE; 68 69 if ((v3 = hex_value(*str++)) < 0) return SPDK_JSON_PARSE_INVALID; 70 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE; 71 72 if ((v2 = hex_value(*str++)) < 0) return SPDK_JSON_PARSE_INVALID; 73 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE; 74 75 if ((v1 = hex_value(*str++)) < 0) return SPDK_JSON_PARSE_INVALID; 76 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE; 77 78 if ((v0 = hex_value(*str++)) < 0) return SPDK_JSON_PARSE_INVALID; 79 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE; 80 81 val = v0 | (v1 << 4) | (v2 << 8) | (v3 << 12); 82 83 if (surrogate_high) { 84 /* We already parsed the high surrogate, so this should be the low part. */ 85 if (!utf16_valid_surrogate_low(val)) { 86 return SPDK_JSON_PARSE_INVALID; 87 } 88 89 /* Convert UTF-16 surrogate pair into codepoint and fall through to utf8_encode. */ 90 val = utf16_decode_surrogate_pair(surrogate_high, val); 91 } else if (utf16_valid_surrogate_high(val)) { 92 surrogate_high = val; 93 94 /* 95 * We parsed a \uXXXX sequence that decoded to the first half of a 96 * UTF-16 surrogate pair, so it must be immediately followed by another 97 * \uXXXX escape. 98 * 99 * Loop around to get the low half of the surrogate pair. 100 */ 101 if (buf_end == str) return SPDK_JSON_PARSE_INCOMPLETE; 102 goto decode; 103 } else if (utf16_valid_surrogate_low(val)) { 104 /* 105 * We found the second half of surrogate pair without the first half; 106 * this is an invalid encoding. 107 */ 108 return SPDK_JSON_PARSE_INVALID; 109 } 110 111 /* 112 * Convert Unicode escape (or surrogate pair) to UTF-8 in place. 113 * 114 * This is safe (will not write beyond the buffer) because the \uXXXX sequence is 6 bytes 115 * (or 12 bytes for surrogate pairs), and the longest possible UTF-8 encoding of a 116 * single codepoint is 4 bytes. 117 */ 118 if (out) { 119 rc = utf8_encode_unsafe(out, val); 120 } else { 121 rc = utf8_codepoint_len(val); 122 } 123 if (rc < 0) { 124 return SPDK_JSON_PARSE_INVALID; 125 } 126 127 *strp = str; /* update input pointer */ 128 return rc; /* return number of bytes decoded */ 129 } 130 131 static int 132 json_decode_string_escape_twochar(uint8_t **strp, uint8_t *buf_end, uint8_t *out) 133 { 134 static const uint8_t escapes[256] = { 135 ['b'] = '\b', 136 ['f'] = '\f', 137 ['n'] = '\n', 138 ['r'] = '\r', 139 ['t'] = '\t', 140 ['/'] = '/', 141 ['"'] = '"', 142 ['\\'] = '\\', 143 }; 144 uint8_t *str = *strp; 145 uint8_t c; 146 147 assert(buf_end > str); 148 if (buf_end - str < 2) { 149 return SPDK_JSON_PARSE_INCOMPLETE; 150 } 151 152 assert(str[0] == '\\'); 153 154 c = escapes[str[1]]; 155 if (c) { 156 if (out) { 157 *out = c; 158 } 159 *strp += 2; /* consumed two bytes */ 160 return 1; /* produced one byte */ 161 } 162 163 return SPDK_JSON_PARSE_INVALID; 164 } 165 166 /* 167 * Decode JSON string backslash escape. 168 * \param strp pointer to pointer to first character of escape (the backslash). 169 * *strp is also advanced to indicate how much input was consumed. 170 * 171 * \return Number of bytes appended to out 172 */ 173 static int 174 json_decode_string_escape(uint8_t **strp, uint8_t *buf_end, uint8_t *out) 175 { 176 int rc; 177 178 rc = json_decode_string_escape_twochar(strp, buf_end, out); 179 if (rc > 0) { 180 return rc; 181 } 182 183 return json_decode_string_escape_unicode(strp, buf_end, out); 184 } 185 186 /* 187 * Decode JSON string in place. 188 * 189 * \param str_start Pointer to the beginning of the string (the opening " character). 190 * 191 * \return Number of bytes in decoded string (beginning from start). 192 */ 193 static int 194 json_decode_string(uint8_t *str_start, uint8_t *buf_end, uint8_t **str_end, uint32_t flags) 195 { 196 uint8_t *str = str_start; 197 uint8_t *out = str_start + 1; /* Decode string in place (skip the initial quote) */ 198 int rc; 199 200 if (buf_end - str_start < 2) { 201 /* 202 * Shortest valid string (the empty string) is two bytes (""), 203 * so this can't possibly be valid 204 */ 205 return SPDK_JSON_PARSE_INCOMPLETE; 206 } 207 208 if (*str++ != '"') { 209 return SPDK_JSON_PARSE_INVALID; 210 } 211 212 while (str < buf_end) { 213 if (str[0] == '"') { 214 /* 215 * End of string. 216 * Update str_end to point at next input byte and return output length. 217 */ 218 *str_end = str + 1; 219 return out - str_start - 1; 220 } else if (str[0] == '\\') { 221 rc = json_decode_string_escape(&str, buf_end, 222 flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE ? out : NULL); 223 assert(rc != 0); 224 if (rc < 0) { 225 return rc; 226 } 227 out += rc; 228 } else if (str[0] <= 0x1f) { 229 /* control characters must be escaped */ 230 return SPDK_JSON_PARSE_INVALID; 231 } else { 232 rc = utf8_valid(str, buf_end); 233 if (rc == 0) { 234 return SPDK_JSON_PARSE_INCOMPLETE; 235 } else if (rc < 0) { 236 return SPDK_JSON_PARSE_INVALID; 237 } 238 239 if (out && out != str && (flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE)) { 240 memmove(out, str, rc); 241 } 242 out += rc; 243 str += rc; 244 } 245 } 246 247 /* If execution gets here, we ran out of buffer. */ 248 return SPDK_JSON_PARSE_INCOMPLETE; 249 } 250 251 static int 252 json_valid_number(uint8_t *start, uint8_t *buf_end) 253 { 254 uint8_t *p = start; 255 enum { 256 NUM_STATE_START, 257 NUM_STATE_INT_FIRST_DIGIT, 258 NUM_STATE_INT_DIGITS, 259 NUM_STATE_FRAC_OR_EXP, 260 NUM_STATE_FRAC_FIRST_DIGIT, 261 NUM_STATE_FRAC_DIGITS, 262 NUM_STATE_EXP_SIGN, 263 NUM_STATE_EXP_FIRST_DIGIT, 264 NUM_STATE_EXP_DIGITS, 265 } state = NUM_STATE_START; 266 267 if (p >= buf_end) return -1; 268 269 while (p != buf_end) { 270 uint8_t c = *p++; 271 272 switch (c) { 273 case '0': 274 if (state == NUM_STATE_START || state == NUM_STATE_INT_FIRST_DIGIT) { 275 /* 276 * If the very first digit is 0, 277 * it must be the last digit of the integer part 278 * (no leading zeroes allowed). 279 */ 280 state = NUM_STATE_FRAC_OR_EXP; 281 break; 282 } 283 /* fallthrough */ 284 case '1': 285 case '2': 286 case '3': 287 case '4': 288 case '5': 289 case '6': 290 case '7': 291 case '8': 292 case '9': 293 switch (state) { 294 case NUM_STATE_START: 295 case NUM_STATE_INT_FIRST_DIGIT: 296 state = NUM_STATE_INT_DIGITS; 297 break; 298 299 case NUM_STATE_FRAC_FIRST_DIGIT: 300 state = NUM_STATE_FRAC_DIGITS; 301 break; 302 303 case NUM_STATE_EXP_SIGN: 304 case NUM_STATE_EXP_FIRST_DIGIT: 305 state = NUM_STATE_EXP_DIGITS; 306 break; 307 308 case NUM_STATE_INT_DIGITS: 309 case NUM_STATE_FRAC_DIGITS: 310 case NUM_STATE_EXP_DIGITS: 311 /* stay in same state */ 312 break; 313 314 default: 315 return SPDK_JSON_PARSE_INVALID; 316 } 317 break; 318 319 case '.': 320 if (state != NUM_STATE_INT_DIGITS && state != NUM_STATE_FRAC_OR_EXP) { 321 return SPDK_JSON_PARSE_INVALID; 322 } 323 state = NUM_STATE_FRAC_FIRST_DIGIT; 324 break; 325 326 case 'e': 327 case 'E': 328 switch (state) { 329 case NUM_STATE_INT_DIGITS: 330 case NUM_STATE_FRAC_OR_EXP: 331 case NUM_STATE_FRAC_DIGITS: 332 state = NUM_STATE_EXP_SIGN; 333 break; 334 default: 335 return SPDK_JSON_PARSE_INVALID; 336 } 337 break; 338 339 case '-': 340 if (state == NUM_STATE_START) { 341 state = NUM_STATE_INT_FIRST_DIGIT; 342 break; 343 } 344 /* fallthrough */ 345 case '+': 346 if (state == NUM_STATE_EXP_SIGN) { 347 state = NUM_STATE_EXP_FIRST_DIGIT; 348 } else { 349 return SPDK_JSON_PARSE_INVALID; 350 } 351 break; 352 default: 353 /* 354 * Got an unexpected character - back up and stop parsing number. 355 * The top-level parsing code will handle invalid trailing characters. 356 */ 357 p--; 358 goto done; 359 } 360 } 361 362 done: 363 switch (state) { 364 case NUM_STATE_INT_DIGITS: 365 case NUM_STATE_FRAC_OR_EXP: 366 case NUM_STATE_FRAC_DIGITS: 367 case NUM_STATE_EXP_DIGITS: 368 /* Valid end state */ 369 return p - start; 370 371 default: 372 return SPDK_JSON_PARSE_INCOMPLETE; 373 } 374 } 375 376 struct json_literal { 377 enum spdk_json_val_type type; 378 uint32_t len; 379 uint8_t str[8]; 380 }; 381 382 /* 383 * JSON only defines 3 possible literals; they can be uniquely identified by bits 384 * 3 and 4 of the first character: 385 * 'f' = 0b11[00]110 386 * 'n' = 0b11[01]110 387 * 't' = 0b11[10]100 388 * These two bits can be used as an index into the g_json_literals array. 389 */ 390 static const struct json_literal g_json_literals[] = { 391 {SPDK_JSON_VAL_FALSE, 5, "false"}, 392 {SPDK_JSON_VAL_NULL, 4, "null"}, 393 {SPDK_JSON_VAL_TRUE, 4, "true"}, 394 {} 395 }; 396 397 static int 398 match_literal(const uint8_t *start, const uint8_t *end, const uint8_t *literal, size_t len) 399 { 400 assert(end >= start); 401 if ((size_t)(end - start) < len) { 402 return SPDK_JSON_PARSE_INCOMPLETE; 403 } 404 405 if (memcmp(start, literal, len) != 0) { 406 return SPDK_JSON_PARSE_INVALID; 407 } 408 409 return len; 410 } 411 412 ssize_t 413 spdk_json_parse(void *json, size_t size, struct spdk_json_val *values, size_t num_values, 414 void **end, uint32_t flags) 415 { 416 uint8_t *json_end = json + size; 417 enum spdk_json_val_type containers[SPDK_JSON_MAX_NESTING_DEPTH]; 418 size_t con_value[SPDK_JSON_MAX_NESTING_DEPTH]; 419 enum spdk_json_val_type con_type = SPDK_JSON_VAL_INVALID; 420 bool trailing_comma = false; 421 size_t depth = 0; /* index into containers */ 422 size_t cur_value = 0; /* index into values */ 423 size_t con_start_value; 424 uint8_t *data = json; 425 uint8_t *new_data; 426 int rc; 427 const struct json_literal *lit; 428 enum { 429 STATE_VALUE, /* initial state */ 430 STATE_VALUE_SEPARATOR, /* value separator (comma) */ 431 STATE_NAME, /* "name": value */ 432 STATE_NAME_SEPARATOR, /* colon */ 433 STATE_END, /* parsed the complete value, so only whitespace is valid */ 434 } state = STATE_VALUE; 435 436 #define ADD_VALUE(t, val_start_ptr, val_end_ptr) \ 437 if (values && cur_value < num_values) { \ 438 values[cur_value].type = t; \ 439 values[cur_value].start = val_start_ptr; \ 440 values[cur_value].len = val_end_ptr - val_start_ptr; \ 441 } \ 442 cur_value++ 443 444 while (data < json_end) { 445 uint8_t c = *data; 446 447 switch (c) { 448 case ' ': 449 case '\t': 450 case '\r': 451 case '\n': 452 /* Whitespace is allowed between any tokens. */ 453 data++; 454 break; 455 456 case 't': 457 case 'f': 458 case 'n': 459 /* true, false, or null */ 460 if (state != STATE_VALUE) return SPDK_JSON_PARSE_INVALID; 461 lit = &g_json_literals[(c >> 3) & 3]; /* See comment above g_json_literals[] */ 462 assert(lit->str[0] == c); 463 rc = match_literal(data, json_end, lit->str, lit->len); 464 if (rc < 0) return rc; 465 ADD_VALUE(lit->type, data, data + rc); 466 data += rc; 467 state = depth ? STATE_VALUE_SEPARATOR : STATE_END; 468 trailing_comma = false; 469 break; 470 471 case '"': 472 if (state != STATE_VALUE && state != STATE_NAME) return SPDK_JSON_PARSE_INVALID; 473 rc = json_decode_string(data, json_end, &new_data, flags); 474 if (rc < 0) return rc; 475 /* 476 * Start is data + 1 to skip initial quote. 477 * Length is data + rc - 1 to skip both quotes. 478 */ 479 ADD_VALUE(state == STATE_VALUE ? SPDK_JSON_VAL_STRING : SPDK_JSON_VAL_NAME, 480 data + 1, data + rc - 1); 481 data = new_data; 482 if (state == STATE_NAME) { 483 state = STATE_NAME_SEPARATOR; 484 } else { 485 state = depth ? STATE_VALUE_SEPARATOR : STATE_END; 486 } 487 trailing_comma = false; 488 break; 489 490 case '-': 491 case '0': 492 case '1': 493 case '2': 494 case '3': 495 case '4': 496 case '5': 497 case '6': 498 case '7': 499 case '8': 500 case '9': 501 if (state != STATE_VALUE) return SPDK_JSON_PARSE_INVALID; 502 rc = json_valid_number(data, json_end); 503 if (rc < 0) return rc; 504 ADD_VALUE(SPDK_JSON_VAL_NUMBER, data, data + rc); 505 data += rc; 506 state = depth ? STATE_VALUE_SEPARATOR : STATE_END; 507 trailing_comma = false; 508 break; 509 510 case '{': 511 case '[': 512 if (state != STATE_VALUE) return SPDK_JSON_PARSE_INVALID; 513 if (depth == SPDK_JSON_MAX_NESTING_DEPTH) { 514 return SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED; 515 } 516 if (c == '{') { 517 con_type = SPDK_JSON_VAL_OBJECT_BEGIN; 518 state = STATE_NAME; 519 } else { 520 con_type = SPDK_JSON_VAL_ARRAY_BEGIN; 521 state = STATE_VALUE; 522 } 523 con_value[depth] = cur_value; 524 containers[depth++] = con_type; 525 ADD_VALUE(con_type, data, data + 1); 526 data++; 527 trailing_comma = false; 528 break; 529 530 case '}': 531 case ']': 532 if (trailing_comma) return SPDK_JSON_PARSE_INVALID; 533 if (depth == 0) return SPDK_JSON_PARSE_INVALID; 534 con_type = containers[--depth]; 535 con_start_value = con_value[depth]; 536 if (values && con_start_value < num_values) { 537 values[con_start_value].len = cur_value - con_start_value - 1; 538 } 539 if (c == '}') { 540 if (state != STATE_NAME && state != STATE_VALUE_SEPARATOR) { 541 return SPDK_JSON_PARSE_INVALID; 542 } 543 if (con_type != SPDK_JSON_VAL_OBJECT_BEGIN) { 544 return SPDK_JSON_PARSE_INVALID; 545 } 546 ADD_VALUE(SPDK_JSON_VAL_OBJECT_END, data, data + 1); 547 } else { 548 if (state != STATE_VALUE && state != STATE_VALUE_SEPARATOR) { 549 return SPDK_JSON_PARSE_INVALID; 550 } 551 if (con_type != SPDK_JSON_VAL_ARRAY_BEGIN) { 552 return SPDK_JSON_PARSE_INVALID; 553 } 554 ADD_VALUE(SPDK_JSON_VAL_ARRAY_END, data, data + 1); 555 } 556 con_type = depth == 0 ? SPDK_JSON_VAL_INVALID : containers[depth - 1]; 557 data++; 558 state = depth ? STATE_VALUE_SEPARATOR : STATE_END; 559 trailing_comma = false; 560 break; 561 562 case ',': 563 if (state != STATE_VALUE_SEPARATOR) return SPDK_JSON_PARSE_INVALID; 564 data++; 565 assert(con_type == SPDK_JSON_VAL_ARRAY_BEGIN || 566 con_type == SPDK_JSON_VAL_OBJECT_BEGIN); 567 state = con_type == SPDK_JSON_VAL_ARRAY_BEGIN ? STATE_VALUE : STATE_NAME; 568 trailing_comma = true; 569 break; 570 571 case ':': 572 if (state != STATE_NAME_SEPARATOR) return SPDK_JSON_PARSE_INVALID; 573 data++; 574 state = STATE_VALUE; 575 break; 576 577 default: 578 return SPDK_JSON_PARSE_INVALID; 579 } 580 581 if (state == STATE_END) { 582 break; 583 } 584 } 585 586 if (state == STATE_END) { 587 /* Skip trailing whitespace */ 588 while (data < json_end) { 589 uint8_t c = *data; 590 591 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { 592 data++; 593 } else { 594 break; 595 } 596 } 597 598 /* 599 * These asserts are just for sanity checking - they are guaranteed by the allowed 600 * state transitions. 601 */ 602 assert(depth == 0); 603 assert(trailing_comma == false); 604 assert(data <= json_end); 605 if (end) { 606 *end = data; 607 } 608 return cur_value; 609 } 610 611 /* Invalid end state - ran out of data */ 612 if (end) { 613 *end = data; 614 } 615 return SPDK_JSON_PARSE_INCOMPLETE; 616 } 617