xref: /netbsd-src/external/bsd/file/dist/src/is_json.c (revision ddb176824c39fb0db5ceef3e9e40dcaa273aec38)
1 /*	$NetBSD: is_json.c,v 1.6 2023/08/18 19:00:11 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 Christos Zoulas
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Parse JSON object serialization format (RFC-7159)
31  */
32 
33 #ifndef TEST
34 #include "file.h"
35 
36 #ifndef lint
37 #if 0
38 FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
39 #else
40 __RCSID("$NetBSD: is_json.c,v 1.6 2023/08/18 19:00:11 christos Exp $");
41 #endif
42 #endif
43 
44 #include "magic.h"
45 #else
46 #include <stdio.h>
47 #include <stddef.h>
48 #endif
49 #include <string.h>
50 
51 #ifdef DEBUG
52 #include <stdio.h>
53 #define DPRINTF(a, b, c)	\
54     printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
55 	(int)(b - c), (const char *)(c))
56 #define __file_debugused
57 #else
58 #define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
59 #define __file_debugused __attribute__((__unused__))
60 #endif
61 
62 #define JSON_ARRAY	0
63 #define JSON_CONSTANT	1
64 #define JSON_NUMBER	2
65 #define JSON_OBJECT	3
66 #define JSON_STRING	4
67 #define JSON_ARRAYN	5
68 #define JSON_MAX	6
69 
70 /*
71  * if JSON_COUNT != 0:
72  *	count all the objects, require that we have the whole data file
73  * otherwise:
74  *	stop if we find an object or an array
75  */
76 #ifndef JSON_COUNT
77 #define JSON_COUNT 0
78 #endif
79 
80 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
81 	size_t);
82 
83 static int
json_isspace(const unsigned char uc)84 json_isspace(const unsigned char uc)
85 {
86 	switch (uc) {
87 	case ' ':
88 	case '\n':
89 	case '\r':
90 	case '\t':
91 		return 1;
92 	default:
93 		return 0;
94 	}
95 }
96 
97 static int
json_isdigit(unsigned char uc)98 json_isdigit(unsigned char uc)
99 {
100 	switch (uc) {
101 	case '0': case '1': case '2': case '3': case '4':
102 	case '5': case '6': case '7': case '8': case '9':
103 		return 1;
104 	default:
105 		return 0;
106 	}
107 }
108 
109 static int
json_isxdigit(unsigned char uc)110 json_isxdigit(unsigned char uc)
111 {
112 	if (json_isdigit(uc))
113 		return 1;
114 	switch (uc) {
115 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
116 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
117 		return 1;
118 	default:
119 		return 0;
120 	}
121 }
122 
123 static const unsigned char *
json_skip_space(const unsigned char * uc,const unsigned char * ue)124 json_skip_space(const unsigned char *uc, const unsigned char *ue)
125 {
126 	while (uc < ue && json_isspace(*uc))
127 		uc++;
128 	return uc;
129 }
130 
131 /*ARGSUSED*/
132 static int
json_parse_string(const unsigned char ** ucp,const unsigned char * ue,size_t lvl __file_debugused)133 json_parse_string(const unsigned char **ucp, const unsigned char *ue,
134     size_t lvl __file_debugused)
135 {
136 	const unsigned char *uc = *ucp;
137 	size_t i;
138 
139 	DPRINTF("Parse string: ", uc, *ucp);
140 	while (uc < ue) {
141 		switch (*uc++) {
142 		case '\0':
143 			goto out;
144 		case '\\':
145 			if (uc == ue)
146 				goto out;
147 			switch (*uc++) {
148 			case '\0':
149 				goto out;
150 			case '"':
151 			case '\\':
152 			case '/':
153 			case 'b':
154 			case 'f':
155 			case 'n':
156 			case 'r':
157 			case 't':
158 				continue;
159 			case 'u':
160 				if (ue - uc < 4) {
161 					uc = ue;
162 					goto out;
163 				}
164 				for (i = 0; i < 4; i++)
165 					if (!json_isxdigit(*uc++))
166 						goto out;
167 				continue;
168 			default:
169 				goto out;
170 			}
171 		case '"':
172 			DPRINTF("Good string: ", uc, *ucp);
173 			*ucp = uc;
174 			return 1;
175 		default:
176 			continue;
177 		}
178 	}
179 out:
180 	DPRINTF("Bad string: ", uc, *ucp);
181 	*ucp = uc;
182 	return 0;
183 }
184 
185 static int
json_parse_array(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)186 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
187 	size_t *st, size_t lvl)
188 {
189 	const unsigned char *uc = *ucp;
190 
191 	DPRINTF("Parse array: ", uc, *ucp);
192 	while (uc < ue) {
193 		uc = json_skip_space(uc, ue);
194 		if (uc == ue)
195 			goto out;
196 		if (*uc == ']')
197 			goto done;
198 		if (!json_parse(&uc, ue, st, lvl + 1))
199 			goto out;
200 		if (uc == ue)
201 			goto out;
202 		switch (*uc) {
203 		case ',':
204 			uc++;
205 			continue;
206 		case ']':
207 		done:
208 			st[JSON_ARRAYN]++;
209 			DPRINTF("Good array: ", uc, *ucp);
210 			*ucp = uc + 1;
211 			return 1;
212 		default:
213 			goto out;
214 		}
215 	}
216 out:
217 	DPRINTF("Bad array: ", uc,  *ucp);
218 	*ucp = uc;
219 	return 0;
220 }
221 
222 static int
json_parse_object(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)223 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
224 	size_t *st, size_t lvl)
225 {
226 	const unsigned char *uc = *ucp;
227 	DPRINTF("Parse object: ", uc, *ucp);
228 	while (uc < ue) {
229 		uc = json_skip_space(uc, ue);
230 		if (uc == ue)
231 			goto out;
232 		if (*uc == '}') {
233 			uc++;
234 			goto done;
235 		}
236 		if (*uc++ != '"') {
237 			DPRINTF("not string", uc, *ucp);
238 			goto out;
239 		}
240 		DPRINTF("next field", uc, *ucp);
241 		if (!json_parse_string(&uc, ue, lvl)) {
242 			DPRINTF("not string", uc, *ucp);
243 			goto out;
244 		}
245 		uc = json_skip_space(uc, ue);
246 		if (uc == ue)
247 			goto out;
248 		if (*uc++ != ':') {
249 			DPRINTF("not colon", uc, *ucp);
250 			goto out;
251 		}
252 		if (!json_parse(&uc, ue, st, lvl + 1)) {
253 			DPRINTF("not json", uc, *ucp);
254 			goto out;
255 		}
256 		if (uc == ue)
257 			goto out;
258 		switch (*uc++) {
259 		case ',':
260 			continue;
261 		case '}': /* { */
262 		done:
263 			DPRINTF("Good object: ", uc, *ucp);
264 			*ucp = uc;
265 			return 1;
266 		default:
267 			DPRINTF("not more", uc, *ucp);
268 			*ucp = uc - 1;
269 			goto out;
270 		}
271 	}
272 out:
273 	DPRINTF("Bad object: ", uc, *ucp);
274 	*ucp = uc;
275 	return 0;
276 }
277 
278 /*ARGSUSED*/
279 static int
json_parse_number(const unsigned char ** ucp,const unsigned char * ue,size_t lvl __file_debugused)280 json_parse_number(const unsigned char **ucp, const unsigned char *ue,
281     size_t lvl __file_debugused)
282 {
283 	const unsigned char *uc = *ucp;
284 	int got = 0;
285 
286 	DPRINTF("Parse number: ", uc, *ucp);
287 	if (uc == ue)
288 		return 0;
289 	if (*uc == '-')
290 		uc++;
291 
292 	for (; uc < ue; uc++) {
293 		if (!json_isdigit(*uc))
294 			break;
295 		got = 1;
296 	}
297 	if (uc == ue)
298 		goto out;
299 	if (*uc == '.')
300 		uc++;
301 	for (; uc < ue; uc++) {
302 		if (!json_isdigit(*uc))
303 			break;
304 		got = 1;
305 	}
306 	if (uc == ue)
307 		goto out;
308 	if (got && (*uc == 'e' || *uc == 'E')) {
309 		uc++;
310 		got = 0;
311 		if (uc == ue)
312 			goto out;
313 		if (*uc == '+' || *uc == '-')
314 			uc++;
315 		for (; uc < ue; uc++) {
316 			if (!json_isdigit(*uc))
317 				break;
318 			got = 1;
319 		}
320 	}
321 out:
322 	if (!got)
323 		DPRINTF("Bad number: ", uc, *ucp);
324 	else
325 		DPRINTF("Good number: ", uc, *ucp);
326 	*ucp = uc;
327 	return got;
328 }
329 
330 /*ARGSUSED*/
331 static int
json_parse_const(const unsigned char ** ucp,const unsigned char * ue,const char * str,size_t len,size_t lvl __file_debugused)332 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
333     const char *str, size_t len, size_t lvl __file_debugused)
334 {
335 	const unsigned char *uc = *ucp;
336 
337 	DPRINTF("Parse const: ", uc, *ucp);
338 	*ucp += --len - 1;
339 	if (*ucp > ue)
340 		*ucp = ue;
341 	for (; uc < ue && --len;) {
342 		if (*uc++ != *++str) {
343 			DPRINTF("Bad const: ", uc, *ucp);
344 			return 0;
345 		}
346 	}
347 	DPRINTF("Good const: ", uc, *ucp);
348 	return 1;
349 }
350 
351 static int
json_parse(const unsigned char ** ucp,const unsigned char * ue,size_t * st,size_t lvl)352 json_parse(const unsigned char **ucp, const unsigned char *ue,
353     size_t *st, size_t lvl)
354 {
355 	const unsigned char *uc, *ouc;
356 	int rv = 0;
357 	int t;
358 
359 	ouc = uc = json_skip_space(*ucp, ue);
360 	if (uc == ue)
361 		goto out;
362 
363 	// Avoid recursion
364 	if (lvl > 500) {
365 		DPRINTF("Too many levels", uc, *ucp);
366 		return 0;
367 	}
368 #if JSON_COUNT
369 	/* bail quickly if not counting */
370 	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
371 		return 1;
372 #endif
373 
374 	DPRINTF("Parse general: ", uc, *ucp);
375 	switch (*uc++) {
376 	case '"':
377 		rv = json_parse_string(&uc, ue, lvl + 1);
378 		t = JSON_STRING;
379 		break;
380 	case '[':
381 		rv = json_parse_array(&uc, ue, st, lvl + 1);
382 		t = JSON_ARRAY;
383 		break;
384 	case '{': /* '}' */
385 		rv = json_parse_object(&uc, ue, st, lvl + 1);
386 		t = JSON_OBJECT;
387 		break;
388 	case 't':
389 		rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
390 		t = JSON_CONSTANT;
391 		break;
392 	case 'f':
393 		rv = json_parse_const(&uc, ue, "false", sizeof("false"),
394 		    lvl + 1);
395 		t = JSON_CONSTANT;
396 		break;
397 	case 'n':
398 		rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
399 		t = JSON_CONSTANT;
400 		break;
401 	default:
402 		--uc;
403 		rv = json_parse_number(&uc, ue, lvl + 1);
404 		t = JSON_NUMBER;
405 		break;
406 	}
407 	if (rv)
408 		st[t]++;
409 	uc = json_skip_space(uc, ue);
410 out:
411 	DPRINTF("End general: ", uc, *ucp);
412 	*ucp = uc;
413 	if (lvl == 0) {
414 		if (!rv)
415 			return 0;
416 		if (uc == ue)
417 			return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
418 		if (*ouc == *uc && json_parse(&uc, ue, st, 1))
419 			return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
420 		else
421 			return 0;
422 	}
423 	return rv;
424 }
425 
426 #ifndef TEST
427 int
file_is_json(struct magic_set * ms,const struct buffer * b)428 file_is_json(struct magic_set *ms, const struct buffer *b)
429 {
430 	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
431 	const unsigned char *ue = uc + b->flen;
432 	size_t st[JSON_MAX];
433 	int mime = ms->flags & MAGIC_MIME;
434 	int jt;
435 
436 
437 	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
438 		return 0;
439 
440 	memset(st, 0, sizeof(st));
441 
442 	if ((jt = json_parse(&uc, ue, st, 0)) == 0)
443 		return 0;
444 
445 	if (mime == MAGIC_MIME_ENCODING)
446 		return 1;
447 	if (mime) {
448 		if (file_printf(ms, "application/%s",
449 		    jt == 1 ? "json" : "x-ndjson") == -1)
450 			return -1;
451 		return 1;
452 	}
453 	if (file_printf(ms, "%sJSON text data",
454 	    jt == 1 ? "" : "New Line Delimited ") == -1)
455 		return -1;
456 #if JSON_COUNT
457 #define P(n) st[n], st[n] > 1 ? "s" : ""
458 	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
459 	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
460 	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
461 	    "u >1array%s)",
462 	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
463 	    P(JSON_NUMBER), P(JSON_ARRAYN))
464 	    == -1)
465 		return -1;
466 #endif
467 	return 1;
468 }
469 
470 #else
471 
472 #include <sys/types.h>
473 #include <sys/stat.h>
474 #include <stdio.h>
475 #include <fcntl.h>
476 #include <unistd.h>
477 #include <stdlib.h>
478 #include <stdint.h>
479 #include <err.h>
480 
481 int
main(int argc,char * argv[])482 main(int argc, char *argv[])
483 {
484 	int fd;
485 	struct stat st;
486 	unsigned char *p;
487 	size_t stats[JSON_MAX];
488 
489 	if ((fd = open(argv[1], O_RDONLY)) == -1)
490 		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
491 
492 	if (fstat(fd, &st) == -1)
493 		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
494 
495 	if ((p = CAST(char *, malloc(st.st_size))) == NULL)
496 		err(EXIT_FAILURE, "Can't allocate %jd bytes",
497 		    (intmax_t)st.st_size);
498 	if (read(fd, p, st.st_size) != st.st_size)
499 		err(EXIT_FAILURE, "Can't read %jd bytes",
500 		    (intmax_t)st.st_size);
501 	memset(stats, 0, sizeof(stats));
502 	printf("is json %d\n", json_parse((const unsigned char **)&p,
503 	    p + st.st_size, stats, 0));
504 	return 0;
505 }
506 #endif
507