1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (C) 2016 Intel Corporation.
3 * All rights reserved.
4 */
5
6 #include "spdk/json.h"
7
8 #include "spdk_internal/utf.h"
9
10 #define SPDK_JSON_MAX_NESTING_DEPTH 64
11
12 static int
hex_value(uint8_t c)13 hex_value(uint8_t c)
14 {
15 #define V(x, y) [x] = y + 1
16 static const int8_t val[256] = {
17 V('0', 0), V('1', 1), V('2', 2), V('3', 3), V('4', 4),
18 V('5', 5), V('6', 6), V('7', 7), V('8', 8), V('9', 9),
19 V('A', 0xA), V('B', 0xB), V('C', 0xC), V('D', 0xD), V('E', 0xE), V('F', 0xF),
20 V('a', 0xA), V('b', 0xB), V('c', 0xC), V('d', 0xD), V('e', 0xE), V('f', 0xF),
21 };
22 #undef V
23
24 return val[c] - 1;
25 }
26
27 static int
json_decode_string_escape_unicode(uint8_t ** strp,uint8_t * buf_end,uint8_t * out)28 json_decode_string_escape_unicode(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
29 {
30 uint8_t *str = *strp;
31 int v0, v1, v2, v3;
32 uint32_t val;
33 uint32_t surrogate_high = 0;
34 int rc;
35 decode:
36 /* \uXXXX */
37 assert(buf_end > str);
38
39 if (*str++ != '\\') { return SPDK_JSON_PARSE_INVALID; }
40 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
41
42 if (*str++ != 'u') { return SPDK_JSON_PARSE_INVALID; }
43 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
44
45 if ((v3 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
46 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
47
48 if ((v2 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
49 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
50
51 if ((v1 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
52 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
53
54 if ((v0 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
55 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
56
57 val = v0 | (v1 << 4) | (v2 << 8) | (v3 << 12);
58
59 if (surrogate_high) {
60 /* We already parsed the high surrogate, so this should be the low part. */
61 if (!utf16_valid_surrogate_low(val)) {
62 return SPDK_JSON_PARSE_INVALID;
63 }
64
65 /* Convert UTF-16 surrogate pair into codepoint and fall through to utf8_encode. */
66 val = utf16_decode_surrogate_pair(surrogate_high, val);
67 } else if (utf16_valid_surrogate_high(val)) {
68 surrogate_high = val;
69
70 /*
71 * We parsed a \uXXXX sequence that decoded to the first half of a
72 * UTF-16 surrogate pair, so it must be immediately followed by another
73 * \uXXXX escape.
74 *
75 * Loop around to get the low half of the surrogate pair.
76 */
77 if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
78 goto decode;
79 } else if (utf16_valid_surrogate_low(val)) {
80 /*
81 * We found the second half of surrogate pair without the first half;
82 * this is an invalid encoding.
83 */
84 return SPDK_JSON_PARSE_INVALID;
85 }
86
87 /*
88 * Convert Unicode escape (or surrogate pair) to UTF-8 in place.
89 *
90 * This is safe (will not write beyond the buffer) because the \uXXXX sequence is 6 bytes
91 * (or 12 bytes for surrogate pairs), and the longest possible UTF-8 encoding of a
92 * single codepoint is 4 bytes.
93 */
94 if (out) {
95 rc = utf8_encode_unsafe(out, val);
96 } else {
97 rc = utf8_codepoint_len(val);
98 }
99 if (rc < 0) {
100 return SPDK_JSON_PARSE_INVALID;
101 }
102
103 *strp = str; /* update input pointer */
104 return rc; /* return number of bytes decoded */
105 }
106
107 static int
json_decode_string_escape_twochar(uint8_t ** strp,uint8_t * buf_end,uint8_t * out)108 json_decode_string_escape_twochar(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
109 {
110 static const uint8_t escapes[256] = {
111 ['b'] = '\b',
112 ['f'] = '\f',
113 ['n'] = '\n',
114 ['r'] = '\r',
115 ['t'] = '\t',
116 ['/'] = '/',
117 ['"'] = '"',
118 ['\\'] = '\\',
119 };
120 uint8_t *str = *strp;
121 uint8_t c;
122
123 assert(buf_end > str);
124 if (buf_end - str < 2) {
125 return SPDK_JSON_PARSE_INCOMPLETE;
126 }
127
128 assert(str[0] == '\\');
129
130 c = escapes[str[1]];
131 if (c) {
132 if (out) {
133 *out = c;
134 }
135 *strp += 2; /* consumed two bytes */
136 return 1; /* produced one byte */
137 }
138
139 return SPDK_JSON_PARSE_INVALID;
140 }
141
142 /*
143 * Decode JSON string backslash escape.
144 * \param strp pointer to pointer to first character of escape (the backslash).
145 * *strp is also advanced to indicate how much input was consumed.
146 *
147 * \return Number of bytes appended to out
148 */
149 static int
json_decode_string_escape(uint8_t ** strp,uint8_t * buf_end,uint8_t * out)150 json_decode_string_escape(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
151 {
152 int rc;
153
154 rc = json_decode_string_escape_twochar(strp, buf_end, out);
155 if (rc > 0) {
156 return rc;
157 }
158
159 return json_decode_string_escape_unicode(strp, buf_end, out);
160 }
161
162 /*
163 * Decode JSON string in place.
164 *
165 * \param str_start Pointer to the beginning of the string (the opening " character).
166 *
167 * \return Number of bytes in decoded string (beginning from start).
168 */
169 static int
json_decode_string(uint8_t * str_start,uint8_t * buf_end,uint8_t ** str_end,uint32_t flags)170 json_decode_string(uint8_t *str_start, uint8_t *buf_end, uint8_t **str_end, uint32_t flags)
171 {
172 uint8_t *str = str_start;
173 uint8_t *out = str_start + 1; /* Decode string in place (skip the initial quote) */
174 int rc;
175
176 if (buf_end - str_start < 2) {
177 /*
178 * Shortest valid string (the empty string) is two bytes (""),
179 * so this can't possibly be valid
180 */
181 *str_end = str;
182 return SPDK_JSON_PARSE_INCOMPLETE;
183 }
184
185 if (*str++ != '"') {
186 *str_end = str;
187 return SPDK_JSON_PARSE_INVALID;
188 }
189
190 while (str < buf_end) {
191 if (str[0] == '"') {
192 /*
193 * End of string.
194 * Update str_end to point at next input byte and return output length.
195 */
196 *str_end = str + 1;
197 return out - str_start - 1;
198 } else if (str[0] == '\\') {
199 rc = json_decode_string_escape(&str, buf_end,
200 flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE ? out : NULL);
201 assert(rc != 0);
202 if (rc < 0) {
203 *str_end = str;
204 return rc;
205 }
206 out += rc;
207 } else if (str[0] <= 0x1f) {
208 /* control characters must be escaped */
209 *str_end = str;
210 return SPDK_JSON_PARSE_INVALID;
211 } else {
212 rc = utf8_valid(str, buf_end);
213 if (rc == 0) {
214 *str_end = str;
215 return SPDK_JSON_PARSE_INCOMPLETE;
216 } else if (rc < 0) {
217 *str_end = str;
218 return SPDK_JSON_PARSE_INVALID;
219 }
220
221 if (out && out != str && (flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE)) {
222 memmove(out, str, rc);
223 }
224 out += rc;
225 str += rc;
226 }
227 }
228
229 /* If execution gets here, we ran out of buffer. */
230 *str_end = str;
231 return SPDK_JSON_PARSE_INCOMPLETE;
232 }
233
234 static int
json_valid_number(uint8_t * start,uint8_t * buf_end)235 json_valid_number(uint8_t *start, uint8_t *buf_end)
236 {
237 uint8_t *p = start;
238 uint8_t c;
239
240 if (p >= buf_end) { return -1; }
241
242 c = *p++;
243 if (c >= '1' && c <= '9') { goto num_int_digits; }
244 if (c == '0') { goto num_frac_or_exp; }
245 if (c == '-') { goto num_int_first_digit; }
246 p--;
247 goto done_invalid;
248
249 num_int_first_digit:
250 if (spdk_likely(p != buf_end)) {
251 c = *p++;
252 if (c == '0') { goto num_frac_or_exp; }
253 if (c >= '1' && c <= '9') { goto num_int_digits; }
254 p--;
255 }
256 goto done_invalid;
257
258 num_int_digits:
259 if (spdk_likely(p != buf_end)) {
260 c = *p++;
261 if (c >= '0' && c <= '9') { goto num_int_digits; }
262 if (c == '.') { goto num_frac_first_digit; }
263 if (c == 'e' || c == 'E') { goto num_exp_sign; }
264 p--;
265 }
266 goto done_valid;
267
268 num_frac_or_exp:
269 if (spdk_likely(p != buf_end)) {
270 c = *p++;
271 if (c == '.') { goto num_frac_first_digit; }
272 if (c == 'e' || c == 'E') { goto num_exp_sign; }
273 p--;
274 }
275 goto done_valid;
276
277 num_frac_first_digit:
278 if (spdk_likely(p != buf_end)) {
279 c = *p++;
280 if (c >= '0' && c <= '9') { goto num_frac_digits; }
281 p--;
282 }
283 goto done_invalid;
284
285 num_frac_digits:
286 if (spdk_likely(p != buf_end)) {
287 c = *p++;
288 if (c >= '0' && c <= '9') { goto num_frac_digits; }
289 if (c == 'e' || c == 'E') { goto num_exp_sign; }
290 p--;
291 }
292 goto done_valid;
293
294 num_exp_sign:
295 if (spdk_likely(p != buf_end)) {
296 c = *p++;
297 if (c >= '0' && c <= '9') { goto num_exp_digits; }
298 if (c == '-' || c == '+') { goto num_exp_first_digit; }
299 p--;
300 }
301 goto done_invalid;
302
303 num_exp_first_digit:
304 if (spdk_likely(p != buf_end)) {
305 c = *p++;
306 if (c >= '0' && c <= '9') { goto num_exp_digits; }
307 p--;
308 }
309 goto done_invalid;
310
311 num_exp_digits:
312 if (spdk_likely(p != buf_end)) {
313 c = *p++;
314 if (c >= '0' && c <= '9') { goto num_exp_digits; }
315 p--;
316 }
317 goto done_valid;
318
319 done_valid:
320 /* Valid end state */
321 return p - start;
322
323 done_invalid:
324 /* Invalid end state */
325 if (p == buf_end) {
326 /* Hit the end of the buffer - the stream is incomplete. */
327 return SPDK_JSON_PARSE_INCOMPLETE;
328 }
329
330 /* Found an invalid character in an invalid end state */
331 return SPDK_JSON_PARSE_INVALID;
332 }
333
334 static int
json_valid_comment(const uint8_t * start,const uint8_t * buf_end)335 json_valid_comment(const uint8_t *start, const uint8_t *buf_end)
336 {
337 const uint8_t *p = start;
338 bool multiline;
339
340 assert(buf_end > p);
341 if (buf_end - p < 2) {
342 return SPDK_JSON_PARSE_INCOMPLETE;
343 }
344
345 if (p[0] != '/') {
346 return SPDK_JSON_PARSE_INVALID;
347 }
348 if (p[1] == '*') {
349 multiline = true;
350 } else if (p[1] == '/') {
351 multiline = false;
352 } else {
353 return SPDK_JSON_PARSE_INVALID;
354 }
355 p += 2;
356
357 if (multiline) {
358 while (p != buf_end - 1) {
359 if (p[0] == '*' && p[1] == '/') {
360 /* Include the terminating star and slash in the comment */
361 return p - start + 2;
362 }
363 p++;
364 }
365 } else {
366 while (p != buf_end) {
367 if (*p == '\r' || *p == '\n') {
368 /* Do not include the line terminator in the comment */
369 return p - start;
370 }
371 p++;
372 }
373 }
374
375 return SPDK_JSON_PARSE_INCOMPLETE;
376 }
377
378 struct json_literal {
379 enum spdk_json_val_type type;
380 uint32_t len;
381 uint8_t str[8];
382 };
383
384 /*
385 * JSON only defines 3 possible literals; they can be uniquely identified by bits
386 * 3 and 4 of the first character:
387 * 'f' = 0b11[00]110
388 * 'n' = 0b11[01]110
389 * 't' = 0b11[10]100
390 * These two bits can be used as an index into the g_json_literals array.
391 */
392 static const struct json_literal g_json_literals[] = {
393 {SPDK_JSON_VAL_FALSE, 5, "false"},
394 {SPDK_JSON_VAL_NULL, 4, "null"},
395 {SPDK_JSON_VAL_TRUE, 4, "true"},
396 {}
397 };
398
399 static int
match_literal(const uint8_t * start,const uint8_t * end,const uint8_t * literal,size_t len)400 match_literal(const uint8_t *start, const uint8_t *end, const uint8_t *literal, size_t len)
401 {
402 assert(end >= start);
403 if ((size_t)(end - start) < len) {
404 return SPDK_JSON_PARSE_INCOMPLETE;
405 }
406
407 if (memcmp(start, literal, len) != 0) {
408 return SPDK_JSON_PARSE_INVALID;
409 }
410
411 return len;
412 }
413
414 ssize_t
spdk_json_parse(void * json,size_t size,struct spdk_json_val * values,size_t num_values,void ** end,uint32_t flags)415 spdk_json_parse(void *json, size_t size, struct spdk_json_val *values, size_t num_values,
416 void **end, uint32_t flags)
417 {
418 uint8_t *json_end = json + size;
419 enum spdk_json_val_type containers[SPDK_JSON_MAX_NESTING_DEPTH];
420 size_t con_value[SPDK_JSON_MAX_NESTING_DEPTH];
421 enum spdk_json_val_type con_type = SPDK_JSON_VAL_INVALID;
422 bool trailing_comma = false;
423 size_t depth = 0; /* index into containers */
424 size_t cur_value = 0; /* index into values */
425 size_t con_start_value;
426 uint8_t *data = json;
427 uint8_t *new_data;
428 int rc = 0;
429 const struct json_literal *lit;
430 enum {
431 STATE_VALUE, /* initial state */
432 STATE_VALUE_SEPARATOR, /* value separator (comma) */
433 STATE_NAME, /* "name": value */
434 STATE_NAME_SEPARATOR, /* colon */
435 STATE_END, /* parsed the complete value, so only whitespace is valid */
436 } state = STATE_VALUE;
437
438 #define ADD_VALUE(t, val_start_ptr, val_end_ptr) \
439 if (values && cur_value < num_values) { \
440 values[cur_value].type = t; \
441 values[cur_value].start = val_start_ptr; \
442 values[cur_value].len = val_end_ptr - val_start_ptr; \
443 } \
444 cur_value++
445
446 while (data < json_end) {
447 uint8_t c = *data;
448
449 switch (c) {
450 case ' ':
451 case '\t':
452 case '\r':
453 case '\n':
454 /* Whitespace is allowed between any tokens. */
455 data++;
456 break;
457
458 case 't':
459 case 'f':
460 case 'n':
461 /* true, false, or null */
462 if (state != STATE_VALUE) { goto done_invalid; }
463 lit = &g_json_literals[(c >> 3) & 3]; /* See comment above g_json_literals[] */
464 assert(lit->str[0] == c);
465 rc = match_literal(data, json_end, lit->str, lit->len);
466 if (rc < 0) { goto done_rc; }
467 ADD_VALUE(lit->type, data, data + rc);
468 data += rc;
469 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
470 trailing_comma = false;
471 break;
472
473 case '"':
474 if (state != STATE_VALUE && state != STATE_NAME) { goto done_invalid; }
475 rc = json_decode_string(data, json_end, &new_data, flags);
476 if (rc < 0) {
477 data = new_data;
478 goto done_rc;
479 }
480 /*
481 * Start is data + 1 to skip initial quote.
482 * Length is data + rc - 1 to skip both quotes.
483 */
484 ADD_VALUE(state == STATE_VALUE ? SPDK_JSON_VAL_STRING : SPDK_JSON_VAL_NAME,
485 data + 1, data + rc - 1);
486 data = new_data;
487 if (state == STATE_NAME) {
488 state = STATE_NAME_SEPARATOR;
489 } else {
490 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
491 }
492 trailing_comma = false;
493 break;
494
495 case '-':
496 case '0':
497 case '1':
498 case '2':
499 case '3':
500 case '4':
501 case '5':
502 case '6':
503 case '7':
504 case '8':
505 case '9':
506 if (state != STATE_VALUE) { goto done_invalid; }
507 rc = json_valid_number(data, json_end);
508 if (rc < 0) { goto done_rc; }
509 ADD_VALUE(SPDK_JSON_VAL_NUMBER, data, data + rc);
510 data += rc;
511 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
512 trailing_comma = false;
513 break;
514
515 case '{':
516 case '[':
517 if (state != STATE_VALUE) { goto done_invalid; }
518 if (depth == SPDK_JSON_MAX_NESTING_DEPTH) {
519 rc = SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED;
520 goto done_rc;
521 }
522 if (c == '{') {
523 con_type = SPDK_JSON_VAL_OBJECT_BEGIN;
524 state = STATE_NAME;
525 } else {
526 con_type = SPDK_JSON_VAL_ARRAY_BEGIN;
527 state = STATE_VALUE;
528 }
529 con_value[depth] = cur_value;
530 containers[depth++] = con_type;
531 ADD_VALUE(con_type, data, data + 1);
532 data++;
533 trailing_comma = false;
534 break;
535
536 case '}':
537 case ']':
538 if (trailing_comma) { goto done_invalid; }
539 if (depth == 0) { goto done_invalid; }
540 con_type = containers[--depth];
541 con_start_value = con_value[depth];
542 if (values && con_start_value < num_values) {
543 values[con_start_value].len = cur_value - con_start_value - 1;
544 }
545 if (c == '}') {
546 if (state != STATE_NAME && state != STATE_VALUE_SEPARATOR) {
547 goto done_invalid;
548 }
549 if (con_type != SPDK_JSON_VAL_OBJECT_BEGIN) {
550 goto done_invalid;
551 }
552 ADD_VALUE(SPDK_JSON_VAL_OBJECT_END, data, data + 1);
553 } else {
554 if (state != STATE_VALUE && state != STATE_VALUE_SEPARATOR) {
555 goto done_invalid;
556 }
557 if (con_type != SPDK_JSON_VAL_ARRAY_BEGIN) {
558 goto done_invalid;
559 }
560 ADD_VALUE(SPDK_JSON_VAL_ARRAY_END, data, data + 1);
561 }
562 con_type = depth == 0 ? SPDK_JSON_VAL_INVALID : containers[depth - 1];
563 data++;
564 state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
565 trailing_comma = false;
566 break;
567
568 case ',':
569 if (state != STATE_VALUE_SEPARATOR) { goto done_invalid; }
570 data++;
571 assert(con_type == SPDK_JSON_VAL_ARRAY_BEGIN ||
572 con_type == SPDK_JSON_VAL_OBJECT_BEGIN);
573 state = con_type == SPDK_JSON_VAL_ARRAY_BEGIN ? STATE_VALUE : STATE_NAME;
574 trailing_comma = true;
575 break;
576
577 case ':':
578 if (state != STATE_NAME_SEPARATOR) { goto done_invalid; }
579 data++;
580 state = STATE_VALUE;
581 break;
582
583 case '/':
584 if (!(flags & SPDK_JSON_PARSE_FLAG_ALLOW_COMMENTS)) {
585 goto done_invalid;
586 }
587 rc = json_valid_comment(data, json_end);
588 if (rc < 0) { goto done_rc; }
589 /* Skip over comment */
590 data += rc;
591 break;
592
593 default:
594 goto done_invalid;
595 }
596
597 if (state == STATE_END) {
598 break;
599 }
600 }
601
602 if (state == STATE_END) {
603 /* Skip trailing whitespace */
604 while (data < json_end) {
605 uint8_t c = *data;
606
607 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
608 data++;
609 } else {
610 break;
611 }
612 }
613
614 /*
615 * These asserts are just for sanity checking - they are guaranteed by the allowed
616 * state transitions.
617 */
618 assert(depth == 0);
619 assert(trailing_comma == false);
620 assert(data <= json_end);
621 if (end) {
622 *end = data;
623 }
624 return cur_value;
625 }
626
627 /* Invalid end state - ran out of data */
628 rc = SPDK_JSON_PARSE_INCOMPLETE;
629
630 done_rc:
631 assert(rc < 0);
632 if (end) {
633 *end = data;
634 }
635 return rc;
636
637 done_invalid:
638 rc = SPDK_JSON_PARSE_INVALID;
639 goto done_rc;
640 }
641