1 /* $NetBSD: is_json.c,v 1.5 2022/09/24 20:21:46 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2018 Christos Zoulas 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Parse JSON object serialization format (RFC-7159) 31 */ 32 33 #ifndef TEST 34 #include "file.h" 35 36 #ifndef lint 37 #if 0 38 FILE_RCSID("@(#)$File: is_json.c,v 1.27 2022/09/16 14:15:29 christos Exp $") 39 #else 40 __RCSID("$NetBSD: is_json.c,v 1.5 2022/09/24 20:21:46 christos Exp $"); 41 #endif 42 #endif 43 44 #include "magic.h" 45 #else 46 #include <stdio.h> 47 #include <stddef.h> 48 #endif 49 #include <string.h> 50 51 #ifdef DEBUG 52 #include <stdio.h> 53 #define DPRINTF(a, b, c) \ 54 printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \ 55 (int)(b - c), (const char *)(c)) 56 #define __file_debugused 57 #else 58 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 59 #define __file_debugused __attribute__((__unused__)) 60 #endif 61 62 #define JSON_ARRAY 0 63 #define JSON_CONSTANT 1 64 #define JSON_NUMBER 2 65 #define JSON_OBJECT 3 66 #define JSON_STRING 4 67 #define JSON_ARRAYN 5 68 #define JSON_MAX 6 69 70 /* 71 * if JSON_COUNT != 0: 72 * count all the objects, require that we have the whole data file 73 * otherwise: 74 * stop if we find an object or an array 75 */ 76 #ifndef JSON_COUNT 77 #define JSON_COUNT 0 78 #endif 79 80 static int json_parse(const unsigned char **, const unsigned char *, size_t *, 81 size_t); 82 83 static int 84 json_isspace(const unsigned char uc) 85 { 86 switch (uc) { 87 case ' ': 88 case '\n': 89 case '\r': 90 case '\t': 91 return 1; 92 default: 93 return 0; 94 } 95 } 96 97 static int 98 json_isdigit(unsigned char uc) 99 { 100 switch (uc) { 101 case '0': case '1': case '2': case '3': case '4': 102 case '5': case '6': case '7': case '8': case '9': 103 return 1; 104 default: 105 return 0; 106 } 107 } 108 109 static int 110 json_isxdigit(unsigned char uc) 111 { 112 if (json_isdigit(uc)) 113 return 1; 114 switch (uc) { 115 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 116 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 117 return 1; 118 default: 119 return 0; 120 } 121 } 122 123 static const unsigned char * 124 json_skip_space(const unsigned char *uc, const unsigned char *ue) 125 { 126 while (uc < ue && json_isspace(*uc)) 127 uc++; 128 return uc; 129 } 130 131 /*ARGSUSED*/ 132 static int 133 json_parse_string(const unsigned char **ucp, const unsigned char *ue, 134 size_t lvl __file_debugused) 135 { 136 const unsigned char *uc = *ucp; 137 size_t i; 138 139 DPRINTF("Parse string: ", uc, *ucp); 140 while (uc < ue) { 141 switch (*uc++) { 142 case '\0': 143 goto out; 144 case '\\': 145 if (uc == ue) 146 goto out; 147 switch (*uc++) { 148 case '\0': 149 goto out; 150 case '"': 151 case '\\': 152 case '/': 153 case 'b': 154 case 'f': 155 case 'n': 156 case 'r': 157 case 't': 158 continue; 159 case 'u': 160 if (ue - uc < 4) { 161 uc = ue; 162 goto out; 163 } 164 for (i = 0; i < 4; i++) 165 if (!json_isxdigit(*uc++)) 166 goto out; 167 continue; 168 default: 169 goto out; 170 } 171 case '"': 172 DPRINTF("Good string: ", uc, *ucp); 173 *ucp = uc; 174 return 1; 175 default: 176 continue; 177 } 178 } 179 out: 180 DPRINTF("Bad string: ", uc, *ucp); 181 *ucp = uc; 182 return 0; 183 } 184 185 static int 186 json_parse_array(const unsigned char **ucp, const unsigned char *ue, 187 size_t *st, size_t lvl) 188 { 189 const unsigned char *uc = *ucp; 190 191 DPRINTF("Parse array: ", uc, *ucp); 192 while (uc < ue) { 193 uc = json_skip_space(uc, ue); 194 if (uc == ue) 195 goto out; 196 if (*uc == ']') 197 goto done; 198 if (!json_parse(&uc, ue, st, lvl + 1)) 199 goto out; 200 if (uc == ue) 201 goto out; 202 switch (*uc) { 203 case ',': 204 uc++; 205 continue; 206 case ']': 207 done: 208 st[JSON_ARRAYN]++; 209 DPRINTF("Good array: ", uc, *ucp); 210 *ucp = uc + 1; 211 return 1; 212 default: 213 goto out; 214 } 215 } 216 out: 217 DPRINTF("Bad array: ", uc, *ucp); 218 *ucp = uc; 219 return 0; 220 } 221 222 static int 223 json_parse_object(const unsigned char **ucp, const unsigned char *ue, 224 size_t *st, size_t lvl) 225 { 226 const unsigned char *uc = *ucp; 227 DPRINTF("Parse object: ", uc, *ucp); 228 while (uc < ue) { 229 uc = json_skip_space(uc, ue); 230 if (uc == ue) 231 goto out; 232 if (*uc == '}') { 233 uc++; 234 goto done; 235 } 236 if (*uc++ != '"') { 237 DPRINTF("not string", uc, *ucp); 238 goto out; 239 } 240 DPRINTF("next field", uc, *ucp); 241 if (!json_parse_string(&uc, ue, lvl)) { 242 DPRINTF("not string", uc, *ucp); 243 goto out; 244 } 245 uc = json_skip_space(uc, ue); 246 if (uc == ue) 247 goto out; 248 if (*uc++ != ':') { 249 DPRINTF("not colon", uc, *ucp); 250 goto out; 251 } 252 if (!json_parse(&uc, ue, st, lvl + 1)) { 253 DPRINTF("not json", uc, *ucp); 254 goto out; 255 } 256 if (uc == ue) 257 goto out; 258 switch (*uc++) { 259 case ',': 260 continue; 261 case '}': /* { */ 262 done: 263 DPRINTF("Good object: ", uc, *ucp); 264 *ucp = uc; 265 return 1; 266 default: 267 DPRINTF("not more", uc, *ucp); 268 *ucp = uc - 1; 269 goto out; 270 } 271 } 272 out: 273 DPRINTF("Bad object: ", uc, *ucp); 274 *ucp = uc; 275 return 0; 276 } 277 278 /*ARGSUSED*/ 279 static int 280 json_parse_number(const unsigned char **ucp, const unsigned char *ue, 281 size_t lvl __file_debugused) 282 { 283 const unsigned char *uc = *ucp; 284 int got = 0; 285 286 DPRINTF("Parse number: ", uc, *ucp); 287 if (uc == ue) 288 return 0; 289 if (*uc == '-') 290 uc++; 291 292 for (; uc < ue; uc++) { 293 if (!json_isdigit(*uc)) 294 break; 295 got = 1; 296 } 297 if (uc == ue) 298 goto out; 299 if (*uc == '.') 300 uc++; 301 for (; uc < ue; uc++) { 302 if (!json_isdigit(*uc)) 303 break; 304 got = 1; 305 } 306 if (uc == ue) 307 goto out; 308 if (got && (*uc == 'e' || *uc == 'E')) { 309 uc++; 310 got = 0; 311 if (uc == ue) 312 goto out; 313 if (*uc == '+' || *uc == '-') 314 uc++; 315 for (; uc < ue; uc++) { 316 if (!json_isdigit(*uc)) 317 break; 318 got = 1; 319 } 320 } 321 out: 322 if (!got) 323 DPRINTF("Bad number: ", uc, *ucp); 324 else 325 DPRINTF("Good number: ", uc, *ucp); 326 *ucp = uc; 327 return got; 328 } 329 330 /*ARGSUSED*/ 331 static int 332 json_parse_const(const unsigned char **ucp, const unsigned char *ue, 333 const char *str, size_t len, size_t lvl __file_debugused) 334 { 335 const unsigned char *uc = *ucp; 336 337 DPRINTF("Parse const: ", uc, *ucp); 338 *ucp += --len - 1; 339 if (*ucp > ue) 340 *ucp = ue; 341 for (; uc < ue && --len;) { 342 if (*uc++ != *++str) { 343 DPRINTF("Bad const: ", uc, *ucp); 344 return 0; 345 } 346 } 347 DPRINTF("Good const: ", uc, *ucp); 348 return 1; 349 } 350 351 static int 352 json_parse(const unsigned char **ucp, const unsigned char *ue, 353 size_t *st, size_t lvl) 354 { 355 const unsigned char *uc, *ouc; 356 int rv = 0; 357 int t; 358 359 ouc = uc = json_skip_space(*ucp, ue); 360 if (uc == ue) 361 goto out; 362 363 // Avoid recursion 364 if (lvl > 500) { 365 DPRINTF("Too many levels", uc, *ucp); 366 return 0; 367 } 368 #if JSON_COUNT 369 /* bail quickly if not counting */ 370 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 371 return 1; 372 #endif 373 374 DPRINTF("Parse general: ", uc, *ucp); 375 switch (*uc++) { 376 case '"': 377 rv = json_parse_string(&uc, ue, lvl + 1); 378 t = JSON_STRING; 379 break; 380 case '[': 381 rv = json_parse_array(&uc, ue, st, lvl + 1); 382 t = JSON_ARRAY; 383 break; 384 case '{': /* '}' */ 385 rv = json_parse_object(&uc, ue, st, lvl + 1); 386 t = JSON_OBJECT; 387 break; 388 case 't': 389 rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1); 390 t = JSON_CONSTANT; 391 break; 392 case 'f': 393 rv = json_parse_const(&uc, ue, "false", sizeof("false"), 394 lvl + 1); 395 t = JSON_CONSTANT; 396 break; 397 case 'n': 398 rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1); 399 t = JSON_CONSTANT; 400 break; 401 default: 402 --uc; 403 rv = json_parse_number(&uc, ue, lvl + 1); 404 t = JSON_NUMBER; 405 break; 406 } 407 if (rv) 408 st[t]++; 409 uc = json_skip_space(uc, ue); 410 out: 411 DPRINTF("End general: ", uc, *ucp); 412 *ucp = uc; 413 if (lvl == 0) { 414 if (!rv) 415 return 0; 416 if (uc == ue) 417 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0; 418 if (*ouc == *uc && json_parse(&uc, ue, st, 1)) 419 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0; 420 else 421 return 0; 422 } 423 return rv; 424 } 425 426 #ifndef TEST 427 int 428 file_is_json(struct magic_set *ms, const struct buffer *b) 429 { 430 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 431 const unsigned char *ue = uc + b->flen; 432 size_t st[JSON_MAX]; 433 int mime = ms->flags & MAGIC_MIME; 434 int jt; 435 436 437 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 438 return 0; 439 440 memset(st, 0, sizeof(st)); 441 442 if ((jt = json_parse(&uc, ue, st, 0)) == 0) 443 return 0; 444 445 if (mime == MAGIC_MIME_ENCODING) 446 return 1; 447 if (mime) { 448 if (file_printf(ms, "application/%s", 449 jt == 1 ? "json" : "x-ndjason") == -1) 450 return -1; 451 return 1; 452 } 453 if (file_printf(ms, "%sJSON text data", 454 jt == 1 ? "" : "New Line Delimited ") == -1) 455 return -1; 456 #if JSON_COUNT 457 #define P(n) st[n], st[n] > 1 ? "s" : "" 458 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 459 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 460 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 461 "u >1array%s)", 462 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 463 P(JSON_NUMBER), P(JSON_ARRAYN)) 464 == -1) 465 return -1; 466 #endif 467 return 1; 468 } 469 470 #else 471 472 #include <sys/types.h> 473 #include <sys/stat.h> 474 #include <stdio.h> 475 #include <fcntl.h> 476 #include <unistd.h> 477 #include <stdlib.h> 478 #include <stdint.h> 479 #include <err.h> 480 481 int 482 main(int argc, char *argv[]) 483 { 484 int fd; 485 struct stat st; 486 unsigned char *p; 487 size_t stats[JSON_MAX]; 488 489 if ((fd = open(argv[1], O_RDONLY)) == -1) 490 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 491 492 if (fstat(fd, &st) == -1) 493 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 494 495 if ((p = CAST(char *, malloc(st.st_size))) == NULL) 496 err(EXIT_FAILURE, "Can't allocate %jd bytes", 497 (intmax_t)st.st_size); 498 if (read(fd, p, st.st_size) != st.st_size) 499 err(EXIT_FAILURE, "Can't read %jd bytes", 500 (intmax_t)st.st_size); 501 memset(stats, 0, sizeof(stats)); 502 printf("is json %d\n", json_parse((const unsigned char **)&p, 503 p + st.st_size, stats, 0)); 504 return 0; 505 } 506 #endif 507