1 /* $NetBSD: is_json.c,v 1.4 2020/06/15 00:37:24 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2018 Christos Zoulas 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Parse JSON object serialization format (RFC-7159) 31 */ 32 33 #ifndef TEST 34 #include "file.h" 35 36 #ifndef lint 37 #if 0 38 FILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $") 39 #else 40 __RCSID("$NetBSD: is_json.c,v 1.4 2020/06/15 00:37:24 christos Exp $"); 41 #endif 42 #endif 43 44 #include <string.h> 45 #include "magic.h" 46 #endif 47 48 #ifdef DEBUG 49 #include <stdio.h> 50 #define DPRINTF(a, b, c) \ 51 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 52 #else 53 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 54 #endif 55 56 #define JSON_ARRAY 0 57 #define JSON_CONSTANT 1 58 #define JSON_NUMBER 2 59 #define JSON_OBJECT 3 60 #define JSON_STRING 4 61 #define JSON_ARRAYN 5 62 #define JSON_MAX 6 63 64 /* 65 * if JSON_COUNT != 0: 66 * count all the objects, require that we have the whole data file 67 * otherwise: 68 * stop if we find an object or an array 69 */ 70 #ifndef JSON_COUNT 71 #define JSON_COUNT 0 72 #endif 73 74 static int json_parse(const unsigned char **, const unsigned char *, size_t *, 75 size_t); 76 77 static int 78 json_isspace(const unsigned char uc) 79 { 80 switch (uc) { 81 case ' ': 82 case '\n': 83 case '\r': 84 case '\t': 85 return 1; 86 default: 87 return 0; 88 } 89 } 90 91 static int 92 json_isdigit(unsigned char uc) 93 { 94 switch (uc) { 95 case '0': case '1': case '2': case '3': case '4': 96 case '5': case '6': case '7': case '8': case '9': 97 return 1; 98 default: 99 return 0; 100 } 101 } 102 103 static int 104 json_isxdigit(unsigned char uc) 105 { 106 if (json_isdigit(uc)) 107 return 1; 108 switch (uc) { 109 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 110 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 111 return 1; 112 default: 113 return 0; 114 } 115 } 116 117 static const unsigned char * 118 json_skip_space(const unsigned char *uc, const unsigned char *ue) 119 { 120 while (uc < ue && json_isspace(*uc)) 121 uc++; 122 return uc; 123 } 124 125 static int 126 json_parse_string(const unsigned char **ucp, const unsigned char *ue) 127 { 128 const unsigned char *uc = *ucp; 129 size_t i; 130 131 DPRINTF("Parse string: ", uc, *ucp); 132 while (uc < ue) { 133 switch (*uc++) { 134 case '\0': 135 goto out; 136 case '\\': 137 if (uc == ue) 138 goto out; 139 switch (*uc++) { 140 case '\0': 141 goto out; 142 case '"': 143 case '\\': 144 case '/': 145 case 'b': 146 case 'f': 147 case 'n': 148 case 'r': 149 case 't': 150 continue; 151 case 'u': 152 if (ue - uc < 4) { 153 uc = ue; 154 goto out; 155 } 156 for (i = 0; i < 4; i++) 157 if (!json_isxdigit(*uc++)) 158 goto out; 159 continue; 160 default: 161 goto out; 162 } 163 case '"': 164 *ucp = uc; 165 DPRINTF("Good string: ", uc, *ucp); 166 return 1; 167 default: 168 continue; 169 } 170 } 171 out: 172 DPRINTF("Bad string: ", uc, *ucp); 173 *ucp = uc; 174 return 0; 175 } 176 177 static int 178 json_parse_array(const unsigned char **ucp, const unsigned char *ue, 179 size_t *st, size_t lvl) 180 { 181 const unsigned char *uc = *ucp; 182 183 DPRINTF("Parse array: ", uc, *ucp); 184 while (uc < ue) { 185 if (*uc == ']') 186 goto done; 187 if (!json_parse(&uc, ue, st, lvl + 1)) 188 goto out; 189 if (uc == ue) 190 goto out; 191 switch (*uc) { 192 case ',': 193 uc++; 194 continue; 195 case ']': 196 done: 197 st[JSON_ARRAYN]++; 198 *ucp = uc + 1; 199 DPRINTF("Good array: ", uc, *ucp); 200 return 1; 201 default: 202 goto out; 203 } 204 } 205 out: 206 DPRINTF("Bad array: ", uc, *ucp); 207 *ucp = uc; 208 return 0; 209 } 210 211 static int 212 json_parse_object(const unsigned char **ucp, const unsigned char *ue, 213 size_t *st, size_t lvl) 214 { 215 const unsigned char *uc = *ucp; 216 DPRINTF("Parse object: ", uc, *ucp); 217 while (uc < ue) { 218 uc = json_skip_space(uc, ue); 219 if (uc == ue) 220 goto out; 221 if (*uc == '}') { 222 uc++; 223 goto done; 224 } 225 if (*uc++ != '"') { 226 DPRINTF("not string", uc, *ucp); 227 goto out; 228 } 229 DPRINTF("next field", uc, *ucp); 230 if (!json_parse_string(&uc, ue)) { 231 DPRINTF("not string", uc, *ucp); 232 goto out; 233 } 234 uc = json_skip_space(uc, ue); 235 if (uc == ue) 236 goto out; 237 if (*uc++ != ':') { 238 DPRINTF("not colon", uc, *ucp); 239 goto out; 240 } 241 if (!json_parse(&uc, ue, st, lvl + 1)) { 242 DPRINTF("not json", uc, *ucp); 243 goto out; 244 } 245 if (uc == ue) 246 goto out; 247 switch (*uc++) { 248 case ',': 249 continue; 250 case '}': /* { */ 251 done: 252 *ucp = uc; 253 DPRINTF("Good object: ", uc, *ucp); 254 return 1; 255 default: 256 *ucp = uc - 1; 257 DPRINTF("not more", uc, *ucp); 258 goto out; 259 } 260 } 261 out: 262 DPRINTF("Bad object: ", uc, *ucp); 263 *ucp = uc; 264 return 0; 265 } 266 267 static int 268 json_parse_number(const unsigned char **ucp, const unsigned char *ue) 269 { 270 const unsigned char *uc = *ucp; 271 int got = 0; 272 273 DPRINTF("Parse number: ", uc, *ucp); 274 if (uc == ue) 275 return 0; 276 if (*uc == '-') 277 uc++; 278 279 for (; uc < ue; uc++) { 280 if (!json_isdigit(*uc)) 281 break; 282 got = 1; 283 } 284 if (uc == ue) 285 goto out; 286 if (*uc == '.') 287 uc++; 288 for (; uc < ue; uc++) { 289 if (!json_isdigit(*uc)) 290 break; 291 got = 1; 292 } 293 if (uc == ue) 294 goto out; 295 if (got && (*uc == 'e' || *uc == 'E')) { 296 uc++; 297 got = 0; 298 if (uc == ue) 299 goto out; 300 if (*uc == '+' || *uc == '-') 301 uc++; 302 for (; uc < ue; uc++) { 303 if (!json_isdigit(*uc)) 304 break; 305 got = 1; 306 } 307 } 308 out: 309 if (!got) 310 DPRINTF("Bad number: ", uc, *ucp); 311 else 312 DPRINTF("Good number: ", uc, *ucp); 313 *ucp = uc; 314 return got; 315 } 316 317 static int 318 json_parse_const(const unsigned char **ucp, const unsigned char *ue, 319 const char *str, size_t len) 320 { 321 const unsigned char *uc = *ucp; 322 323 DPRINTF("Parse const: ", uc, *ucp); 324 for (len--; uc < ue && --len;) { 325 if (*uc++ == *++str) 326 continue; 327 } 328 if (len) 329 DPRINTF("Bad const: ", uc, *ucp); 330 *ucp = uc; 331 return len == 0; 332 } 333 334 static int 335 json_parse(const unsigned char **ucp, const unsigned char *ue, 336 size_t *st, size_t lvl) 337 { 338 const unsigned char *uc; 339 int rv = 0; 340 int t; 341 342 uc = json_skip_space(*ucp, ue); 343 if (uc == ue) 344 goto out; 345 346 // Avoid recursion 347 if (lvl > 20) 348 return 0; 349 #if JSON_COUNT 350 /* bail quickly if not counting */ 351 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN])) 352 return 1; 353 #endif 354 355 DPRINTF("Parse general: ", uc, *ucp); 356 switch (*uc++) { 357 case '"': 358 rv = json_parse_string(&uc, ue); 359 t = JSON_STRING; 360 break; 361 case '[': 362 rv = json_parse_array(&uc, ue, st, lvl + 1); 363 t = JSON_ARRAY; 364 break; 365 case '{': /* '}' */ 366 rv = json_parse_object(&uc, ue, st, lvl + 1); 367 t = JSON_OBJECT; 368 break; 369 case 't': 370 rv = json_parse_const(&uc, ue, "true", sizeof("true")); 371 t = JSON_CONSTANT; 372 break; 373 case 'f': 374 rv = json_parse_const(&uc, ue, "false", sizeof("false")); 375 t = JSON_CONSTANT; 376 break; 377 case 'n': 378 rv = json_parse_const(&uc, ue, "null", sizeof("null")); 379 t = JSON_CONSTANT; 380 break; 381 default: 382 --uc; 383 rv = json_parse_number(&uc, ue); 384 t = JSON_NUMBER; 385 break; 386 } 387 if (rv) 388 st[t]++; 389 uc = json_skip_space(uc, ue); 390 out: 391 *ucp = uc; 392 DPRINTF("End general: ", uc, *ucp); 393 if (lvl == 0) 394 return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]); 395 return rv; 396 } 397 398 #ifndef TEST 399 int 400 file_is_json(struct magic_set *ms, const struct buffer *b) 401 { 402 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 403 const unsigned char *ue = uc + b->flen; 404 size_t st[JSON_MAX]; 405 int mime = ms->flags & MAGIC_MIME; 406 407 408 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 409 return 0; 410 411 memset(st, 0, sizeof(st)); 412 413 if (!json_parse(&uc, ue, st, 0)) 414 return 0; 415 416 if (mime == MAGIC_MIME_ENCODING) 417 return 1; 418 if (mime) { 419 if (file_printf(ms, "application/json") == -1) 420 return -1; 421 return 1; 422 } 423 if (file_printf(ms, "JSON data") == -1) 424 return -1; 425 #if JSON_COUNT 426 #define P(n) st[n], st[n] > 1 ? "s" : "" 427 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 428 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 429 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT 430 "u >1array%s)", 431 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), 432 P(JSON_NUMBER), P(JSON_ARRAYN)) 433 == -1) 434 return -1; 435 #endif 436 return 1; 437 } 438 439 #else 440 441 #include <sys/types.h> 442 #include <sys/stat.h> 443 #include <stdio.h> 444 #include <fcntl.h> 445 #include <unistd.h> 446 #include <stdlib.h> 447 #include <stdint.h> 448 #include <err.h> 449 450 int 451 main(int argc, char *argv[]) 452 { 453 int fd, rv; 454 struct stat st; 455 unsigned char *p; 456 size_t stats[JSON_MAX]; 457 458 if ((fd = open(argv[1], O_RDONLY)) == -1) 459 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 460 461 if (fstat(fd, &st) == -1) 462 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 463 464 if ((p = malloc(st.st_size)) == NULL) 465 err(EXIT_FAILURE, "Can't allocate %jd bytes", 466 (intmax_t)st.st_size); 467 if (read(fd, p, st.st_size) != st.st_size) 468 err(EXIT_FAILURE, "Can't read %jd bytes", 469 (intmax_t)st.st_size); 470 memset(stats, 0, sizeof(stats)); 471 printf("is json %d\n", json_parse((const unsigned char **)&p, 472 p + st.st_size, stats, 0)); 473 return 0; 474 } 475 #endif 476