xref: /netbsd-src/external/bsd/file/dist/src/is_json.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: is_json.c,v 1.4 2020/06/15 00:37:24 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 Christos Zoulas
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Parse JSON object serialization format (RFC-7159)
31  */
32 
33 #ifndef TEST
34 #include "file.h"
35 
36 #ifndef lint
37 #if 0
38 FILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $")
39 #else
40 __RCSID("$NetBSD: is_json.c,v 1.4 2020/06/15 00:37:24 christos Exp $");
41 #endif
42 #endif
43 
44 #include <string.h>
45 #include "magic.h"
46 #endif
47 
48 #ifdef DEBUG
49 #include <stdio.h>
50 #define DPRINTF(a, b, c)	\
51     printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
52 #else
53 #define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
54 #endif
55 
56 #define JSON_ARRAY	0
57 #define JSON_CONSTANT	1
58 #define JSON_NUMBER	2
59 #define JSON_OBJECT	3
60 #define JSON_STRING	4
61 #define JSON_ARRAYN	5
62 #define JSON_MAX	6
63 
64 /*
65  * if JSON_COUNT != 0:
66  *	count all the objects, require that we have the whole data file
67  * otherwise:
68  *	stop if we find an object or an array
69  */
70 #ifndef JSON_COUNT
71 #define JSON_COUNT 0
72 #endif
73 
74 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
75 	size_t);
76 
77 static int
78 json_isspace(const unsigned char uc)
79 {
80 	switch (uc) {
81 	case ' ':
82 	case '\n':
83 	case '\r':
84 	case '\t':
85 		return 1;
86 	default:
87 		return 0;
88 	}
89 }
90 
91 static int
92 json_isdigit(unsigned char uc)
93 {
94 	switch (uc) {
95 	case '0': case '1': case '2': case '3': case '4':
96 	case '5': case '6': case '7': case '8': case '9':
97 		return 1;
98 	default:
99 		return 0;
100 	}
101 }
102 
103 static int
104 json_isxdigit(unsigned char uc)
105 {
106 	if (json_isdigit(uc))
107 		return 1;
108 	switch (uc) {
109 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
110 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
111 		return 1;
112 	default:
113 		return 0;
114 	}
115 }
116 
117 static const unsigned char *
118 json_skip_space(const unsigned char *uc, const unsigned char *ue)
119 {
120 	while (uc < ue && json_isspace(*uc))
121 		uc++;
122 	return uc;
123 }
124 
125 static int
126 json_parse_string(const unsigned char **ucp, const unsigned char *ue)
127 {
128 	const unsigned char *uc = *ucp;
129 	size_t i;
130 
131 	DPRINTF("Parse string: ", uc, *ucp);
132 	while (uc < ue) {
133 		switch (*uc++) {
134 		case '\0':
135 			goto out;
136 		case '\\':
137 			if (uc == ue)
138 				goto out;
139 			switch (*uc++) {
140 			case '\0':
141 				goto out;
142 			case '"':
143 			case '\\':
144 			case '/':
145 			case 'b':
146 			case 'f':
147 			case 'n':
148 			case 'r':
149 			case 't':
150 				continue;
151 			case 'u':
152 				if (ue - uc < 4) {
153 					uc = ue;
154 					goto out;
155 				}
156 				for (i = 0; i < 4; i++)
157 					if (!json_isxdigit(*uc++))
158 						goto out;
159 				continue;
160 			default:
161 				goto out;
162 			}
163 		case '"':
164 			*ucp = uc;
165 			DPRINTF("Good string: ", uc, *ucp);
166 			return 1;
167 		default:
168 			continue;
169 		}
170 	}
171 out:
172 	DPRINTF("Bad string: ", uc, *ucp);
173 	*ucp = uc;
174 	return 0;
175 }
176 
177 static int
178 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
179 	size_t *st, size_t lvl)
180 {
181 	const unsigned char *uc = *ucp;
182 
183 	DPRINTF("Parse array: ", uc, *ucp);
184 	while (uc < ue) {
185 		if (*uc == ']')
186 			goto done;
187 		if (!json_parse(&uc, ue, st, lvl + 1))
188 			goto out;
189 		if (uc == ue)
190 			goto out;
191 		switch (*uc) {
192 		case ',':
193 			uc++;
194 			continue;
195 		case ']':
196 		done:
197 			st[JSON_ARRAYN]++;
198 			*ucp = uc + 1;
199 			DPRINTF("Good array: ", uc, *ucp);
200 			return 1;
201 		default:
202 			goto out;
203 		}
204 	}
205 out:
206 	DPRINTF("Bad array: ", uc,  *ucp);
207 	*ucp = uc;
208 	return 0;
209 }
210 
211 static int
212 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
213 	size_t *st, size_t lvl)
214 {
215 	const unsigned char *uc = *ucp;
216 	DPRINTF("Parse object: ", uc, *ucp);
217 	while (uc < ue) {
218 		uc = json_skip_space(uc, ue);
219 		if (uc == ue)
220 			goto out;
221 		if (*uc == '}') {
222 			uc++;
223 			goto done;
224 		}
225 		if (*uc++ != '"') {
226 			DPRINTF("not string", uc, *ucp);
227 			goto out;
228 		}
229 		DPRINTF("next field", uc, *ucp);
230 		if (!json_parse_string(&uc, ue)) {
231 			DPRINTF("not string", uc, *ucp);
232 			goto out;
233 		}
234 		uc = json_skip_space(uc, ue);
235 		if (uc == ue)
236 			goto out;
237 		if (*uc++ != ':') {
238 			DPRINTF("not colon", uc, *ucp);
239 			goto out;
240 		}
241 		if (!json_parse(&uc, ue, st, lvl + 1)) {
242 			DPRINTF("not json", uc, *ucp);
243 			goto out;
244 		}
245 		if (uc == ue)
246 			goto out;
247 		switch (*uc++) {
248 		case ',':
249 			continue;
250 		case '}': /* { */
251 		done:
252 			*ucp = uc;
253 			DPRINTF("Good object: ", uc, *ucp);
254 			return 1;
255 		default:
256 			*ucp = uc - 1;
257 			DPRINTF("not more", uc, *ucp);
258 			goto out;
259 		}
260 	}
261 out:
262 	DPRINTF("Bad object: ", uc, *ucp);
263 	*ucp = uc;
264 	return 0;
265 }
266 
267 static int
268 json_parse_number(const unsigned char **ucp, const unsigned char *ue)
269 {
270 	const unsigned char *uc = *ucp;
271 	int got = 0;
272 
273 	DPRINTF("Parse number: ", uc, *ucp);
274 	if (uc == ue)
275 		return 0;
276 	if (*uc == '-')
277 		uc++;
278 
279 	for (; uc < ue; uc++) {
280 		if (!json_isdigit(*uc))
281 			break;
282 		got = 1;
283 	}
284 	if (uc == ue)
285 		goto out;
286 	if (*uc == '.')
287 		uc++;
288 	for (; uc < ue; uc++) {
289 		if (!json_isdigit(*uc))
290 			break;
291 		got = 1;
292 	}
293 	if (uc == ue)
294 		goto out;
295 	if (got && (*uc == 'e' || *uc == 'E')) {
296 		uc++;
297 		got = 0;
298 		if (uc == ue)
299 			goto out;
300 		if (*uc == '+' || *uc == '-')
301 			uc++;
302 		for (; uc < ue; uc++) {
303 			if (!json_isdigit(*uc))
304 				break;
305 			got = 1;
306 		}
307 	}
308 out:
309 	if (!got)
310 		DPRINTF("Bad number: ", uc, *ucp);
311 	else
312 		DPRINTF("Good number: ", uc, *ucp);
313 	*ucp = uc;
314 	return got;
315 }
316 
317 static int
318 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
319     const char *str, size_t len)
320 {
321 	const unsigned char *uc = *ucp;
322 
323 	DPRINTF("Parse const: ", uc, *ucp);
324 	for (len--; uc < ue && --len;) {
325 		if (*uc++ == *++str)
326 			continue;
327 	}
328 	if (len)
329 		DPRINTF("Bad const: ", uc, *ucp);
330 	*ucp = uc;
331 	return len == 0;
332 }
333 
334 static int
335 json_parse(const unsigned char **ucp, const unsigned char *ue,
336     size_t *st, size_t lvl)
337 {
338 	const unsigned char *uc;
339 	int rv = 0;
340 	int t;
341 
342 	uc = json_skip_space(*ucp, ue);
343 	if (uc == ue)
344 		goto out;
345 
346 	// Avoid recursion
347 	if (lvl > 20)
348 		return 0;
349 #if JSON_COUNT
350 	/* bail quickly if not counting */
351 	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
352 		return 1;
353 #endif
354 
355 	DPRINTF("Parse general: ", uc, *ucp);
356 	switch (*uc++) {
357 	case '"':
358 		rv = json_parse_string(&uc, ue);
359 		t = JSON_STRING;
360 		break;
361 	case '[':
362 		rv = json_parse_array(&uc, ue, st, lvl + 1);
363 		t = JSON_ARRAY;
364 		break;
365 	case '{': /* '}' */
366 		rv = json_parse_object(&uc, ue, st, lvl + 1);
367 		t = JSON_OBJECT;
368 		break;
369 	case 't':
370 		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
371 		t = JSON_CONSTANT;
372 		break;
373 	case 'f':
374 		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
375 		t = JSON_CONSTANT;
376 		break;
377 	case 'n':
378 		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
379 		t = JSON_CONSTANT;
380 		break;
381 	default:
382 		--uc;
383 		rv = json_parse_number(&uc, ue);
384 		t = JSON_NUMBER;
385 		break;
386 	}
387 	if (rv)
388 		st[t]++;
389 	uc = json_skip_space(uc, ue);
390 out:
391 	*ucp = uc;
392 	DPRINTF("End general: ", uc, *ucp);
393 	if (lvl == 0)
394 		return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
395 	return rv;
396 }
397 
398 #ifndef TEST
399 int
400 file_is_json(struct magic_set *ms, const struct buffer *b)
401 {
402 	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
403 	const unsigned char *ue = uc + b->flen;
404 	size_t st[JSON_MAX];
405 	int mime = ms->flags & MAGIC_MIME;
406 
407 
408 	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
409 		return 0;
410 
411 	memset(st, 0, sizeof(st));
412 
413 	if (!json_parse(&uc, ue, st, 0))
414 		return 0;
415 
416 	if (mime == MAGIC_MIME_ENCODING)
417 		return 1;
418 	if (mime) {
419 		if (file_printf(ms, "application/json") == -1)
420 			return -1;
421 		return 1;
422 	}
423 	if (file_printf(ms, "JSON data") == -1)
424 		return -1;
425 #if JSON_COUNT
426 #define P(n) st[n], st[n] > 1 ? "s" : ""
427 	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
428 	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
429 	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
430 	    "u >1array%s)",
431 	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
432 	    P(JSON_NUMBER), P(JSON_ARRAYN))
433 	    == -1)
434 		return -1;
435 #endif
436 	return 1;
437 }
438 
439 #else
440 
441 #include <sys/types.h>
442 #include <sys/stat.h>
443 #include <stdio.h>
444 #include <fcntl.h>
445 #include <unistd.h>
446 #include <stdlib.h>
447 #include <stdint.h>
448 #include <err.h>
449 
450 int
451 main(int argc, char *argv[])
452 {
453 	int fd, rv;
454 	struct stat st;
455 	unsigned char *p;
456 	size_t stats[JSON_MAX];
457 
458 	if ((fd = open(argv[1], O_RDONLY)) == -1)
459 		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
460 
461 	if (fstat(fd, &st) == -1)
462 		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
463 
464 	if ((p = malloc(st.st_size)) == NULL)
465 		err(EXIT_FAILURE, "Can't allocate %jd bytes",
466 		    (intmax_t)st.st_size);
467 	if (read(fd, p, st.st_size) != st.st_size)
468 		err(EXIT_FAILURE, "Can't read %jd bytes",
469 		    (intmax_t)st.st_size);
470 	memset(stats, 0, sizeof(stats));
471 	printf("is json %d\n", json_parse((const unsigned char **)&p,
472 	    p + st.st_size, stats, 0));
473 	return 0;
474 }
475 #endif
476