1 /* $NetBSD: is_json.c,v 1.6 2023/08/18 19:00:11 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2018 Christos Zoulas
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Parse JSON object serialization format (RFC-7159)
31 */
32
33 #ifndef TEST
34 #include "file.h"
35
36 #ifndef lint
37 #if 0
38 FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
39 #else
40 __RCSID("$NetBSD: is_json.c,v 1.6 2023/08/18 19:00:11 christos Exp $");
41 #endif
42 #endif
43
44 #include "magic.h"
45 #else
46 #include <stdio.h>
47 #include <stddef.h>
48 #endif
49 #include <string.h>
50
51 #ifdef DEBUG
52 #include <stdio.h>
53 #define DPRINTF(a, b, c) \
54 printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
55 (int)(b - c), (const char *)(c))
56 #define __file_debugused
57 #else
58 #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
59 #define __file_debugused __attribute__((__unused__))
60 #endif
61
62 #define JSON_ARRAY 0
63 #define JSON_CONSTANT 1
64 #define JSON_NUMBER 2
65 #define JSON_OBJECT 3
66 #define JSON_STRING 4
67 #define JSON_ARRAYN 5
68 #define JSON_MAX 6
69
70 /*
71 * if JSON_COUNT != 0:
72 * count all the objects, require that we have the whole data file
73 * otherwise:
74 * stop if we find an object or an array
75 */
76 #ifndef JSON_COUNT
77 #define JSON_COUNT 0
78 #endif
79
80 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
81 size_t);
82
83 static int
json_isspace(const unsigned char uc)84 json_isspace(const unsigned char uc)
85 {
86 switch (uc) {
87 case ' ':
88 case '\n':
89 case '\r':
90 case '\t':
91 return 1;
92 default:
93 return 0;
94 }
95 }
96
97 static int
json_isdigit(unsigned char uc)98 json_isdigit(unsigned char uc)
99 {
100 switch (uc) {
101 case '0': case '1': case '2': case '3': case '4':
102 case '5': case '6': case '7': case '8': case '9':
103 return 1;
104 default:
105 return 0;
106 }
107 }
108
109 static int
json_isxdigit(unsigned char uc)110 json_isxdigit(unsigned char uc)
111 {
112 if (json_isdigit(uc))
113 return 1;
114 switch (uc) {
115 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
116 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
117 return 1;
118 default:
119 return 0;
120 }
121 }
122
123 static const unsigned char *
json_skip_space(const unsigned char * uc,const unsigned char * ue)124 json_skip_space(const unsigned char *uc, const unsigned char *ue)
125 {
126 while (uc < ue && json_isspace(*uc))
127 uc++;
128 return uc;
129 }
130
131 /*ARGSUSED*/
132 static int
json_parse_string(const unsigned char ** ucp,const unsigned char * ue,size_t lvl __file_debugused)133 json_parse_string(const unsigned char **ucp, const unsigned char *ue,
134 size_t lvl __file_debugused)
135 {
136 const unsigned char *uc = *ucp;
137 size_t i;
138
139 DPRINTF("Parse string: ", uc, *ucp);
140 while (uc < ue) {
141 switch (*uc++) {
142 case '\0':
143 goto out;
144 case '\\':
145 if (uc == ue)
146 goto out;
147 switch (*uc++) {
148 case '\0':
149 goto out;
150 case '"':
151 case '\\':
152 case '/':
153 case 'b':
154 case 'f':
155 case 'n':
156 case 'r':
157 case 't':
158 continue;
159 case 'u':
160 if (ue - uc < 4) {
161 uc = ue;
162 goto out;
163 }
164 for (i = 0; i < 4; i++)
165 if (!json_isxdigit(*uc++))
166 goto out;
167 continue;
168 default:
169 goto out;
170 }
171 case '"':
172 DPRINTF("Good string: ", uc, *ucp);
173 *ucp = uc;
174 return 1;
175 default:
176 continue;
177 }
178 }
179 out:
180 DPRINTF("Bad string: ", uc, *ucp);
181 *ucp = uc;
182 return 0;
183 }
184
185 static int
json_parse_array(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)186 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
187 size_t *st, size_t lvl)
188 {
189 const unsigned char *uc = *ucp;
190
191 DPRINTF("Parse array: ", uc, *ucp);
192 while (uc < ue) {
193 uc = json_skip_space(uc, ue);
194 if (uc == ue)
195 goto out;
196 if (*uc == ']')
197 goto done;
198 if (!json_parse(&uc, ue, st, lvl + 1))
199 goto out;
200 if (uc == ue)
201 goto out;
202 switch (*uc) {
203 case ',':
204 uc++;
205 continue;
206 case ']':
207 done:
208 st[JSON_ARRAYN]++;
209 DPRINTF("Good array: ", uc, *ucp);
210 *ucp = uc + 1;
211 return 1;
212 default:
213 goto out;
214 }
215 }
216 out:
217 DPRINTF("Bad array: ", uc, *ucp);
218 *ucp = uc;
219 return 0;
220 }
221
222 static int
json_parse_object(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)223 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
224 size_t *st, size_t lvl)
225 {
226 const unsigned char *uc = *ucp;
227 DPRINTF("Parse object: ", uc, *ucp);
228 while (uc < ue) {
229 uc = json_skip_space(uc, ue);
230 if (uc == ue)
231 goto out;
232 if (*uc == '}') {
233 uc++;
234 goto done;
235 }
236 if (*uc++ != '"') {
237 DPRINTF("not string", uc, *ucp);
238 goto out;
239 }
240 DPRINTF("next field", uc, *ucp);
241 if (!json_parse_string(&uc, ue, lvl)) {
242 DPRINTF("not string", uc, *ucp);
243 goto out;
244 }
245 uc = json_skip_space(uc, ue);
246 if (uc == ue)
247 goto out;
248 if (*uc++ != ':') {
249 DPRINTF("not colon", uc, *ucp);
250 goto out;
251 }
252 if (!json_parse(&uc, ue, st, lvl + 1)) {
253 DPRINTF("not json", uc, *ucp);
254 goto out;
255 }
256 if (uc == ue)
257 goto out;
258 switch (*uc++) {
259 case ',':
260 continue;
261 case '}': /* { */
262 done:
263 DPRINTF("Good object: ", uc, *ucp);
264 *ucp = uc;
265 return 1;
266 default:
267 DPRINTF("not more", uc, *ucp);
268 *ucp = uc - 1;
269 goto out;
270 }
271 }
272 out:
273 DPRINTF("Bad object: ", uc, *ucp);
274 *ucp = uc;
275 return 0;
276 }
277
278 /*ARGSUSED*/
279 static int
json_parse_number(const unsigned char ** ucp,const unsigned char * ue,size_t lvl __file_debugused)280 json_parse_number(const unsigned char **ucp, const unsigned char *ue,
281 size_t lvl __file_debugused)
282 {
283 const unsigned char *uc = *ucp;
284 int got = 0;
285
286 DPRINTF("Parse number: ", uc, *ucp);
287 if (uc == ue)
288 return 0;
289 if (*uc == '-')
290 uc++;
291
292 for (; uc < ue; uc++) {
293 if (!json_isdigit(*uc))
294 break;
295 got = 1;
296 }
297 if (uc == ue)
298 goto out;
299 if (*uc == '.')
300 uc++;
301 for (; uc < ue; uc++) {
302 if (!json_isdigit(*uc))
303 break;
304 got = 1;
305 }
306 if (uc == ue)
307 goto out;
308 if (got && (*uc == 'e' || *uc == 'E')) {
309 uc++;
310 got = 0;
311 if (uc == ue)
312 goto out;
313 if (*uc == '+' || *uc == '-')
314 uc++;
315 for (; uc < ue; uc++) {
316 if (!json_isdigit(*uc))
317 break;
318 got = 1;
319 }
320 }
321 out:
322 if (!got)
323 DPRINTF("Bad number: ", uc, *ucp);
324 else
325 DPRINTF("Good number: ", uc, *ucp);
326 *ucp = uc;
327 return got;
328 }
329
330 /*ARGSUSED*/
331 static int
json_parse_const(const unsigned char ** ucp,const unsigned char * ue,const char * str,size_t len,size_t lvl __file_debugused)332 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
333 const char *str, size_t len, size_t lvl __file_debugused)
334 {
335 const unsigned char *uc = *ucp;
336
337 DPRINTF("Parse const: ", uc, *ucp);
338 *ucp += --len - 1;
339 if (*ucp > ue)
340 *ucp = ue;
341 for (; uc < ue && --len;) {
342 if (*uc++ != *++str) {
343 DPRINTF("Bad const: ", uc, *ucp);
344 return 0;
345 }
346 }
347 DPRINTF("Good const: ", uc, *ucp);
348 return 1;
349 }
350
351 static int
json_parse(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)352 json_parse(const unsigned char **ucp, const unsigned char *ue,
353 size_t *st, size_t lvl)
354 {
355 const unsigned char *uc, *ouc;
356 int rv = 0;
357 int t;
358
359 ouc = uc = json_skip_space(*ucp, ue);
360 if (uc == ue)
361 goto out;
362
363 // Avoid recursion
364 if (lvl > 500) {
365 DPRINTF("Too many levels", uc, *ucp);
366 return 0;
367 }
368 #if JSON_COUNT
369 /* bail quickly if not counting */
370 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
371 return 1;
372 #endif
373
374 DPRINTF("Parse general: ", uc, *ucp);
375 switch (*uc++) {
376 case '"':
377 rv = json_parse_string(&uc, ue, lvl + 1);
378 t = JSON_STRING;
379 break;
380 case '[':
381 rv = json_parse_array(&uc, ue, st, lvl + 1);
382 t = JSON_ARRAY;
383 break;
384 case '{': /* '}' */
385 rv = json_parse_object(&uc, ue, st, lvl + 1);
386 t = JSON_OBJECT;
387 break;
388 case 't':
389 rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
390 t = JSON_CONSTANT;
391 break;
392 case 'f':
393 rv = json_parse_const(&uc, ue, "false", sizeof("false"),
394 lvl + 1);
395 t = JSON_CONSTANT;
396 break;
397 case 'n':
398 rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
399 t = JSON_CONSTANT;
400 break;
401 default:
402 --uc;
403 rv = json_parse_number(&uc, ue, lvl + 1);
404 t = JSON_NUMBER;
405 break;
406 }
407 if (rv)
408 st[t]++;
409 uc = json_skip_space(uc, ue);
410 out:
411 DPRINTF("End general: ", uc, *ucp);
412 *ucp = uc;
413 if (lvl == 0) {
414 if (!rv)
415 return 0;
416 if (uc == ue)
417 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
418 if (*ouc == *uc && json_parse(&uc, ue, st, 1))
419 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
420 else
421 return 0;
422 }
423 return rv;
424 }
425
426 #ifndef TEST
427 int
file_is_json(struct magic_set * ms,const struct buffer * b)428 file_is_json(struct magic_set *ms, const struct buffer *b)
429 {
430 const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
431 const unsigned char *ue = uc + b->flen;
432 size_t st[JSON_MAX];
433 int mime = ms->flags & MAGIC_MIME;
434 int jt;
435
436
437 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
438 return 0;
439
440 memset(st, 0, sizeof(st));
441
442 if ((jt = json_parse(&uc, ue, st, 0)) == 0)
443 return 0;
444
445 if (mime == MAGIC_MIME_ENCODING)
446 return 1;
447 if (mime) {
448 if (file_printf(ms, "application/%s",
449 jt == 1 ? "json" : "x-ndjson") == -1)
450 return -1;
451 return 1;
452 }
453 if (file_printf(ms, "%sJSON text data",
454 jt == 1 ? "" : "New Line Delimited ") == -1)
455 return -1;
456 #if JSON_COUNT
457 #define P(n) st[n], st[n] > 1 ? "s" : ""
458 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
459 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
460 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
461 "u >1array%s)",
462 P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
463 P(JSON_NUMBER), P(JSON_ARRAYN))
464 == -1)
465 return -1;
466 #endif
467 return 1;
468 }
469
470 #else
471
472 #include <sys/types.h>
473 #include <sys/stat.h>
474 #include <stdio.h>
475 #include <fcntl.h>
476 #include <unistd.h>
477 #include <stdlib.h>
478 #include <stdint.h>
479 #include <err.h>
480
481 int
main(int argc,char * argv[])482 main(int argc, char *argv[])
483 {
484 int fd;
485 struct stat st;
486 unsigned char *p;
487 size_t stats[JSON_MAX];
488
489 if ((fd = open(argv[1], O_RDONLY)) == -1)
490 err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
491
492 if (fstat(fd, &st) == -1)
493 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
494
495 if ((p = CAST(char *, malloc(st.st_size))) == NULL)
496 err(EXIT_FAILURE, "Can't allocate %jd bytes",
497 (intmax_t)st.st_size);
498 if (read(fd, p, st.st_size) != st.st_size)
499 err(EXIT_FAILURE, "Can't read %jd bytes",
500 (intmax_t)st.st_size);
501 memset(stats, 0, sizeof(stats));
502 printf("is json %d\n", json_parse((const unsigned char **)&p,
503 p + st.st_size, stats, 0));
504 return 0;
505 }
506 #endif
507