xref: /netbsd-src/external/bsd/file/dist/src/is_json.c (revision deb6f0161a9109e7de9b519dc8dfb9478668dcdd)
1 /*	$NetBSD: is_json.c,v 1.2 2018/10/19 00:24:57 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 Christos Zoulas
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Parse JSON object serialization format (RFC-7159)
31  */
32 
33 #ifndef TEST
34 #include "file.h"
35 
36 #ifndef lint
37 #if 0
38 FILE_RCSID("@(#)$File: is_json.c,v 1.11 2018/10/15 16:29:16 christos Exp $")
39 #else
40 __RCSID("$NetBSD: is_json.c,v 1.2 2018/10/19 00:24:57 christos Exp $");
41 #endif
42 #endif
43 
44 #include <string.h>
45 #include "magic.h"
46 #endif
47 
48 #ifdef DEBUG
49 #include <stdio.h>
50 #define DPRINTF(a, b, c)	\
51     printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
52 #else
53 #define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
54 #endif
55 
56 #define JSON_ARRAY	0
57 #define JSON_CONSTANT	1
58 #define JSON_NUMBER	2
59 #define JSON_OBJECT	3
60 #define JSON_STRING	4
61 #define JSON_MAX	5
62 
63 /*
64  * if JSON_COUNT != 0:
65  *	count all the objects, require that we have the whole data file
66  * otherwise:
67  *	stop if we find an object or an array
68  */
69 #ifndef JSON_COUNT
70 #define JSON_COUNT 0
71 #endif
72 
73 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
74 	size_t);
75 
76 static int
77 json_isspace(const unsigned char uc)
78 {
79 	switch (uc) {
80 	case ' ':
81 	case '\n':
82 	case '\r':
83 	case '\t':
84 		return 1;
85 	default:
86 		return 0;
87 	}
88 }
89 
90 static int
91 json_isdigit(unsigned char uc)
92 {
93 	switch (uc) {
94 	case '0': case '1': case '2': case '3': case '4':
95 	case '5': case '6': case '7': case '8': case '9':
96 		return 1;
97 	default:
98 		return 0;
99 	}
100 }
101 
102 static int
103 json_isxdigit(unsigned char uc)
104 {
105 	if (json_isdigit(uc))
106 		return 1;
107 	switch (uc) {
108 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
109 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
110 		return 1;
111 	default:
112 		return 0;
113 	}
114 }
115 
116 static const unsigned char *
117 json_skip_space(const unsigned char *uc, const unsigned char *ue)
118 {
119 	while (uc < ue && json_isspace(*uc))
120 		uc++;
121 	return uc;
122 }
123 
124 static int
125 json_parse_string(const unsigned char **ucp, const unsigned char *ue)
126 {
127 	const unsigned char *uc = *ucp;
128 	size_t i;
129 
130 	DPRINTF("Parse string: ", uc, *ucp);
131 	while (uc < ue) {
132 		switch (*uc++) {
133 		case '\0':
134 			goto out;
135 		case '\\':
136 			if (uc == ue)
137 				goto out;
138 			switch (*uc++) {
139 			case '\0':
140 				goto out;
141 			case '"':
142 			case '\\':
143 			case '/':
144 			case 'b':
145 			case 'f':
146 			case 'n':
147 			case 'r':
148 			case 't':
149 				continue;
150 			case 'u':
151 				if (ue - uc < 4) {
152 					uc = ue;
153 					goto out;
154 				}
155 				for (i = 0; i < 4; i++)
156 					if (!json_isxdigit(*uc++))
157 						goto out;
158 				continue;
159 			default:
160 				goto out;
161 			}
162 		case '"':
163 			*ucp = uc;
164 			return 1;
165 		default:
166 			continue;
167 		}
168 	}
169 out:
170 	DPRINTF("Bad string: ", uc, *ucp);
171 	*ucp = uc;
172 	return 0;
173 }
174 
175 static int
176 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
177 	size_t *st, size_t lvl)
178 {
179 	const unsigned char *uc = *ucp;
180 
181 	DPRINTF("Parse array: ", uc, *ucp);
182 	while (uc < ue) {
183 		if (!json_parse(&uc, ue, st, lvl + 1))
184 			goto out;
185 		if (uc == ue)
186 			goto out;
187 		switch (*uc) {
188 		case ',':
189 			uc++;
190 			continue;
191 		case ']':
192 			*ucp = uc + 1;
193 			return 1;
194 		default:
195 			goto out;
196 		}
197 	}
198 out:
199 	DPRINTF("Bad array: ", uc,  *ucp);
200 	*ucp = uc;
201 	return 0;
202 }
203 
204 static int
205 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
206 	size_t *st, size_t lvl)
207 {
208 	const unsigned char *uc = *ucp;
209 	DPRINTF("Parse object: ", uc, *ucp);
210 	while (uc < ue) {
211 		uc = json_skip_space(uc, ue);
212 		if (uc == ue)
213 			goto out;
214 		if (*uc++ != '"') {
215 			DPRINTF("not string", uc, *ucp);
216 			goto out;
217 		}
218 		DPRINTF("next field", uc, *ucp);
219 		if (!json_parse_string(&uc, ue)) {
220 			DPRINTF("not string", uc, *ucp);
221 			goto out;
222 		}
223 		uc = json_skip_space(uc, ue);
224 		if (uc == ue)
225 			goto out;
226 		if (*uc++ != ':') {
227 			DPRINTF("not colon", uc, *ucp);
228 			goto out;
229 		}
230 		if (!json_parse(&uc, ue, st, lvl + 1)) {
231 			DPRINTF("not json", uc, *ucp);
232 			goto out;
233 		}
234 		if (uc == ue)
235 			goto out;
236 		switch (*uc++) {
237 		case ',':
238 			continue;
239 		case '}': /* { */
240 			*ucp = uc;
241 			DPRINTF("Good object: ", uc, *ucp);
242 			return 1;
243 		default:
244 			*ucp = uc - 1;
245 			DPRINTF("not more", uc, *ucp);
246 			goto out;
247 		}
248 	}
249 out:
250 	DPRINTF("Bad object: ", uc, *ucp);
251 	*ucp = uc;
252 	return 0;
253 }
254 
255 static int
256 json_parse_number(const unsigned char **ucp, const unsigned char *ue)
257 {
258 	const unsigned char *uc = *ucp;
259 	int got = 0;
260 
261 	DPRINTF("Parse number: ", uc, *ucp);
262 	if (uc == ue)
263 		return 0;
264 	if (*uc == '-')
265 		uc++;
266 
267 	for (; uc < ue; uc++) {
268 		if (!json_isdigit(*uc))
269 			break;
270 		got = 1;
271 	}
272 	if (uc == ue)
273 		goto out;
274 	if (*uc == '.')
275 		uc++;
276 	for (; uc < ue; uc++) {
277 		if (!json_isdigit(*uc))
278 			break;
279 		got = 1;
280 	}
281 	if (uc == ue)
282 		goto out;
283 	if (got && (*uc == 'e' || *uc == 'E')) {
284 		uc++;
285 		got = 0;
286 		if (uc == ue)
287 			goto out;
288 		if (*uc == '+' || *uc == '-')
289 			uc++;
290 		for (; uc < ue; uc++) {
291 			if (!json_isdigit(*uc))
292 				break;
293 			got = 1;
294 		}
295 	}
296 out:
297 	if (!got)
298 		DPRINTF("Bad number: ", uc, *ucp);
299 	else
300 		DPRINTF("Good number: ", uc, *ucp);
301 	*ucp = uc;
302 	return got;
303 }
304 
305 static int
306 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
307     const char *str, size_t len)
308 {
309 	const unsigned char *uc = *ucp;
310 
311 	DPRINTF("Parse const: ", uc, *ucp);
312 	for (len--; uc < ue && --len;) {
313 		if (*uc++ == *++str)
314 			continue;
315 	}
316 	if (len)
317 		DPRINTF("Bad const: ", uc, *ucp);
318 	*ucp = uc;
319 	return len == 0;
320 }
321 
322 static int
323 json_parse(const unsigned char **ucp, const unsigned char *ue,
324     size_t *st, size_t lvl)
325 {
326 	const unsigned char *uc;
327 	int rv = 0;
328 	int t;
329 
330 	uc = json_skip_space(*ucp, ue);
331 	if (uc == ue)
332 		goto out;
333 
334 	// Avoid recursion
335 	if (lvl > 20)
336 		return 0;
337 #if JSON_COUNT
338 	/* bail quickly if not counting */
339 	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAY]))
340 		return 1;
341 #endif
342 
343 	DPRINTF("Parse general: ", uc, *ucp);
344 	switch (*uc++) {
345 	case '"':
346 		rv = json_parse_string(&uc, ue);
347 		t = JSON_STRING;
348 		break;
349 	case '[':
350 		rv = json_parse_array(&uc, ue, st, lvl + 1);
351 		t = JSON_ARRAY;
352 		break;
353 	case '{': /* '}' */
354 		rv = json_parse_object(&uc, ue, st, lvl + 1);
355 		t = JSON_OBJECT;
356 		break;
357 	case 't':
358 		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
359 		t = JSON_CONSTANT;
360 		break;
361 	case 'f':
362 		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
363 		t = JSON_CONSTANT;
364 		break;
365 	case 'n':
366 		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
367 		t = JSON_CONSTANT;
368 		break;
369 	default:
370 		--uc;
371 		rv = json_parse_number(&uc, ue);
372 		t = JSON_NUMBER;
373 		break;
374 	}
375 	if (rv)
376 		st[t]++;
377 	uc = json_skip_space(uc, ue);
378 out:
379 	*ucp = uc;
380 	DPRINTF("End general: ", uc, *ucp);
381 	if (lvl == 0)
382 		return rv && (st[JSON_ARRAY] || st[JSON_OBJECT]);
383 	return rv;
384 }
385 
386 #ifndef TEST
387 int
388 file_is_json(struct magic_set *ms, const struct buffer *b)
389 {
390 	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
391 	const unsigned char *ue = uc + b->flen;
392 	size_t st[JSON_MAX];
393 	int mime = ms->flags & MAGIC_MIME;
394 
395 
396 	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
397 		return 0;
398 
399 	memset(st, 0, sizeof(st));
400 
401 	if (!json_parse(&uc, ue, st, 0))
402 		return 0;
403 
404 	if (mime == MAGIC_MIME_ENCODING)
405 		return 1;
406 	if (mime) {
407 		if (file_printf(ms, "application/json") == -1)
408 			return -1;
409 		return 1;
410 	}
411 	if (file_printf(ms, "JSON data") == -1)
412 		return -1;
413 #if JSON_COUNT
414 #define P(n) st[n], st[n] > 1 ? "s" : ""
415 	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
416 	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
417 	    "u constant%s, %" SIZE_T_FORMAT "u number%s)", P(JSON_OBJECT),
418 	    P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), P(JSON_NUMBER))
419 	    == -1)
420 		return -1;
421 #endif
422 	return 1;
423 }
424 
425 #else
426 
427 #include <sys/types.h>
428 #include <sys/stat.h>
429 #include <stdio.h>
430 #include <fcntl.h>
431 #include <unistd.h>
432 #include <stdlib.h>
433 #include <stdint.h>
434 #include <err.h>
435 
436 int
437 main(int argc, char *argv[])
438 {
439 	int fd, rv;
440 	struct stat st;
441 	unsigned char *p;
442 	size_t stats[JSON_MAX];
443 
444 	if ((fd = open(argv[1], O_RDONLY)) == -1)
445 		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
446 
447 	if (fstat(fd, &st) == -1)
448 		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
449 
450 	if ((p = malloc(st.st_size)) == NULL)
451 		err(EXIT_FAILURE, "Can't allocate %jd bytes",
452 		    (intmax_t)st.st_size);
453 	if (read(fd, p, st.st_size) != st.st_size)
454 		err(EXIT_FAILURE, "Can't read %jd bytes",
455 		    (intmax_t)st.st_size);
456 	memset(stats, 0, sizeof(stats));
457 	printf("is json %d\n", json_parse((const unsigned char **)&p,
458 	    p + st.st_size, stats, 0));
459 	return 0;
460 }
461 #endif
462