1 /* $NetBSD: is_json.c,v 1.2 2018/10/19 00:24:57 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2018 Christos Zoulas 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Parse JSON object serialization format (RFC-7159) 31 */ 32 33 #ifndef TEST 34 #include "file.h" 35 36 #ifndef lint 37 #if 0 38 FILE_RCSID("@(#)$File: is_json.c,v 1.11 2018/10/15 16:29:16 christos Exp $") 39 #else 40 __RCSID("$NetBSD: is_json.c,v 1.2 2018/10/19 00:24:57 christos Exp $"); 41 #endif 42 #endif 43 44 #include <string.h> 45 #include "magic.h" 46 #endif 47 48 #ifdef DEBUG 49 #include <stdio.h> 50 #define DPRINTF(a, b, c) \ 51 printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c)) 52 #else 53 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0) 54 #endif 55 56 #define JSON_ARRAY 0 57 #define JSON_CONSTANT 1 58 #define JSON_NUMBER 2 59 #define JSON_OBJECT 3 60 #define JSON_STRING 4 61 #define JSON_MAX 5 62 63 /* 64 * if JSON_COUNT != 0: 65 * count all the objects, require that we have the whole data file 66 * otherwise: 67 * stop if we find an object or an array 68 */ 69 #ifndef JSON_COUNT 70 #define JSON_COUNT 0 71 #endif 72 73 static int json_parse(const unsigned char **, const unsigned char *, size_t *, 74 size_t); 75 76 static int 77 json_isspace(const unsigned char uc) 78 { 79 switch (uc) { 80 case ' ': 81 case '\n': 82 case '\r': 83 case '\t': 84 return 1; 85 default: 86 return 0; 87 } 88 } 89 90 static int 91 json_isdigit(unsigned char uc) 92 { 93 switch (uc) { 94 case '0': case '1': case '2': case '3': case '4': 95 case '5': case '6': case '7': case '8': case '9': 96 return 1; 97 default: 98 return 0; 99 } 100 } 101 102 static int 103 json_isxdigit(unsigned char uc) 104 { 105 if (json_isdigit(uc)) 106 return 1; 107 switch (uc) { 108 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 109 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 110 return 1; 111 default: 112 return 0; 113 } 114 } 115 116 static const unsigned char * 117 json_skip_space(const unsigned char *uc, const unsigned char *ue) 118 { 119 while (uc < ue && json_isspace(*uc)) 120 uc++; 121 return uc; 122 } 123 124 static int 125 json_parse_string(const unsigned char **ucp, const unsigned char *ue) 126 { 127 const unsigned char *uc = *ucp; 128 size_t i; 129 130 DPRINTF("Parse string: ", uc, *ucp); 131 while (uc < ue) { 132 switch (*uc++) { 133 case '\0': 134 goto out; 135 case '\\': 136 if (uc == ue) 137 goto out; 138 switch (*uc++) { 139 case '\0': 140 goto out; 141 case '"': 142 case '\\': 143 case '/': 144 case 'b': 145 case 'f': 146 case 'n': 147 case 'r': 148 case 't': 149 continue; 150 case 'u': 151 if (ue - uc < 4) { 152 uc = ue; 153 goto out; 154 } 155 for (i = 0; i < 4; i++) 156 if (!json_isxdigit(*uc++)) 157 goto out; 158 continue; 159 default: 160 goto out; 161 } 162 case '"': 163 *ucp = uc; 164 return 1; 165 default: 166 continue; 167 } 168 } 169 out: 170 DPRINTF("Bad string: ", uc, *ucp); 171 *ucp = uc; 172 return 0; 173 } 174 175 static int 176 json_parse_array(const unsigned char **ucp, const unsigned char *ue, 177 size_t *st, size_t lvl) 178 { 179 const unsigned char *uc = *ucp; 180 181 DPRINTF("Parse array: ", uc, *ucp); 182 while (uc < ue) { 183 if (!json_parse(&uc, ue, st, lvl + 1)) 184 goto out; 185 if (uc == ue) 186 goto out; 187 switch (*uc) { 188 case ',': 189 uc++; 190 continue; 191 case ']': 192 *ucp = uc + 1; 193 return 1; 194 default: 195 goto out; 196 } 197 } 198 out: 199 DPRINTF("Bad array: ", uc, *ucp); 200 *ucp = uc; 201 return 0; 202 } 203 204 static int 205 json_parse_object(const unsigned char **ucp, const unsigned char *ue, 206 size_t *st, size_t lvl) 207 { 208 const unsigned char *uc = *ucp; 209 DPRINTF("Parse object: ", uc, *ucp); 210 while (uc < ue) { 211 uc = json_skip_space(uc, ue); 212 if (uc == ue) 213 goto out; 214 if (*uc++ != '"') { 215 DPRINTF("not string", uc, *ucp); 216 goto out; 217 } 218 DPRINTF("next field", uc, *ucp); 219 if (!json_parse_string(&uc, ue)) { 220 DPRINTF("not string", uc, *ucp); 221 goto out; 222 } 223 uc = json_skip_space(uc, ue); 224 if (uc == ue) 225 goto out; 226 if (*uc++ != ':') { 227 DPRINTF("not colon", uc, *ucp); 228 goto out; 229 } 230 if (!json_parse(&uc, ue, st, lvl + 1)) { 231 DPRINTF("not json", uc, *ucp); 232 goto out; 233 } 234 if (uc == ue) 235 goto out; 236 switch (*uc++) { 237 case ',': 238 continue; 239 case '}': /* { */ 240 *ucp = uc; 241 DPRINTF("Good object: ", uc, *ucp); 242 return 1; 243 default: 244 *ucp = uc - 1; 245 DPRINTF("not more", uc, *ucp); 246 goto out; 247 } 248 } 249 out: 250 DPRINTF("Bad object: ", uc, *ucp); 251 *ucp = uc; 252 return 0; 253 } 254 255 static int 256 json_parse_number(const unsigned char **ucp, const unsigned char *ue) 257 { 258 const unsigned char *uc = *ucp; 259 int got = 0; 260 261 DPRINTF("Parse number: ", uc, *ucp); 262 if (uc == ue) 263 return 0; 264 if (*uc == '-') 265 uc++; 266 267 for (; uc < ue; uc++) { 268 if (!json_isdigit(*uc)) 269 break; 270 got = 1; 271 } 272 if (uc == ue) 273 goto out; 274 if (*uc == '.') 275 uc++; 276 for (; uc < ue; uc++) { 277 if (!json_isdigit(*uc)) 278 break; 279 got = 1; 280 } 281 if (uc == ue) 282 goto out; 283 if (got && (*uc == 'e' || *uc == 'E')) { 284 uc++; 285 got = 0; 286 if (uc == ue) 287 goto out; 288 if (*uc == '+' || *uc == '-') 289 uc++; 290 for (; uc < ue; uc++) { 291 if (!json_isdigit(*uc)) 292 break; 293 got = 1; 294 } 295 } 296 out: 297 if (!got) 298 DPRINTF("Bad number: ", uc, *ucp); 299 else 300 DPRINTF("Good number: ", uc, *ucp); 301 *ucp = uc; 302 return got; 303 } 304 305 static int 306 json_parse_const(const unsigned char **ucp, const unsigned char *ue, 307 const char *str, size_t len) 308 { 309 const unsigned char *uc = *ucp; 310 311 DPRINTF("Parse const: ", uc, *ucp); 312 for (len--; uc < ue && --len;) { 313 if (*uc++ == *++str) 314 continue; 315 } 316 if (len) 317 DPRINTF("Bad const: ", uc, *ucp); 318 *ucp = uc; 319 return len == 0; 320 } 321 322 static int 323 json_parse(const unsigned char **ucp, const unsigned char *ue, 324 size_t *st, size_t lvl) 325 { 326 const unsigned char *uc; 327 int rv = 0; 328 int t; 329 330 uc = json_skip_space(*ucp, ue); 331 if (uc == ue) 332 goto out; 333 334 // Avoid recursion 335 if (lvl > 20) 336 return 0; 337 #if JSON_COUNT 338 /* bail quickly if not counting */ 339 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAY])) 340 return 1; 341 #endif 342 343 DPRINTF("Parse general: ", uc, *ucp); 344 switch (*uc++) { 345 case '"': 346 rv = json_parse_string(&uc, ue); 347 t = JSON_STRING; 348 break; 349 case '[': 350 rv = json_parse_array(&uc, ue, st, lvl + 1); 351 t = JSON_ARRAY; 352 break; 353 case '{': /* '}' */ 354 rv = json_parse_object(&uc, ue, st, lvl + 1); 355 t = JSON_OBJECT; 356 break; 357 case 't': 358 rv = json_parse_const(&uc, ue, "true", sizeof("true")); 359 t = JSON_CONSTANT; 360 break; 361 case 'f': 362 rv = json_parse_const(&uc, ue, "false", sizeof("false")); 363 t = JSON_CONSTANT; 364 break; 365 case 'n': 366 rv = json_parse_const(&uc, ue, "null", sizeof("null")); 367 t = JSON_CONSTANT; 368 break; 369 default: 370 --uc; 371 rv = json_parse_number(&uc, ue); 372 t = JSON_NUMBER; 373 break; 374 } 375 if (rv) 376 st[t]++; 377 uc = json_skip_space(uc, ue); 378 out: 379 *ucp = uc; 380 DPRINTF("End general: ", uc, *ucp); 381 if (lvl == 0) 382 return rv && (st[JSON_ARRAY] || st[JSON_OBJECT]); 383 return rv; 384 } 385 386 #ifndef TEST 387 int 388 file_is_json(struct magic_set *ms, const struct buffer *b) 389 { 390 const unsigned char *uc = CAST(const unsigned char *, b->fbuf); 391 const unsigned char *ue = uc + b->flen; 392 size_t st[JSON_MAX]; 393 int mime = ms->flags & MAGIC_MIME; 394 395 396 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0) 397 return 0; 398 399 memset(st, 0, sizeof(st)); 400 401 if (!json_parse(&uc, ue, st, 0)) 402 return 0; 403 404 if (mime == MAGIC_MIME_ENCODING) 405 return 1; 406 if (mime) { 407 if (file_printf(ms, "application/json") == -1) 408 return -1; 409 return 1; 410 } 411 if (file_printf(ms, "JSON data") == -1) 412 return -1; 413 #if JSON_COUNT 414 #define P(n) st[n], st[n] > 1 ? "s" : "" 415 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT 416 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT 417 "u constant%s, %" SIZE_T_FORMAT "u number%s)", P(JSON_OBJECT), 418 P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), P(JSON_NUMBER)) 419 == -1) 420 return -1; 421 #endif 422 return 1; 423 } 424 425 #else 426 427 #include <sys/types.h> 428 #include <sys/stat.h> 429 #include <stdio.h> 430 #include <fcntl.h> 431 #include <unistd.h> 432 #include <stdlib.h> 433 #include <stdint.h> 434 #include <err.h> 435 436 int 437 main(int argc, char *argv[]) 438 { 439 int fd, rv; 440 struct stat st; 441 unsigned char *p; 442 size_t stats[JSON_MAX]; 443 444 if ((fd = open(argv[1], O_RDONLY)) == -1) 445 err(EXIT_FAILURE, "Can't open `%s'", argv[1]); 446 447 if (fstat(fd, &st) == -1) 448 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]); 449 450 if ((p = malloc(st.st_size)) == NULL) 451 err(EXIT_FAILURE, "Can't allocate %jd bytes", 452 (intmax_t)st.st_size); 453 if (read(fd, p, st.st_size) != st.st_size) 454 err(EXIT_FAILURE, "Can't read %jd bytes", 455 (intmax_t)st.st_size); 456 memset(stats, 0, sizeof(stats)); 457 printf("is json %d\n", json_parse((const unsigned char **)&p, 458 p + st.st_size, stats, 0)); 459 return 0; 460 } 461 #endif 462