xref: /netbsd-src/external/bsd/file/dist/src/is_json.c (revision ed75d7a867996c84cfa88e3b8906816277e957f7)
1 /*	$NetBSD: is_json.c,v 1.3 2019/05/22 17:26:05 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 Christos Zoulas
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Parse JSON object serialization format (RFC-7159)
31  */
32 
33 #ifndef TEST
34 #include "file.h"
35 
36 #ifndef lint
37 #if 0
38 FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $")
39 #else
40 __RCSID("$NetBSD: is_json.c,v 1.3 2019/05/22 17:26:05 christos Exp $");
41 #endif
42 #endif
43 
44 #include <string.h>
45 #include "magic.h"
46 #endif
47 
48 #ifdef DEBUG
49 #include <stdio.h>
50 #define DPRINTF(a, b, c)	\
51     printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
52 #else
53 #define DPRINTF(a, b, c)	do { } while (/*CONSTCOND*/0)
54 #endif
55 
56 #define JSON_ARRAY	0
57 #define JSON_CONSTANT	1
58 #define JSON_NUMBER	2
59 #define JSON_OBJECT	3
60 #define JSON_STRING	4
61 #define JSON_ARRAYN	5
62 #define JSON_MAX	6
63 
64 /*
65  * if JSON_COUNT != 0:
66  *	count all the objects, require that we have the whole data file
67  * otherwise:
68  *	stop if we find an object or an array
69  */
70 #ifndef JSON_COUNT
71 #define JSON_COUNT 0
72 #endif
73 
74 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
75 	size_t);
76 
77 static int
78 json_isspace(const unsigned char uc)
79 {
80 	switch (uc) {
81 	case ' ':
82 	case '\n':
83 	case '\r':
84 	case '\t':
85 		return 1;
86 	default:
87 		return 0;
88 	}
89 }
90 
91 static int
92 json_isdigit(unsigned char uc)
93 {
94 	switch (uc) {
95 	case '0': case '1': case '2': case '3': case '4':
96 	case '5': case '6': case '7': case '8': case '9':
97 		return 1;
98 	default:
99 		return 0;
100 	}
101 }
102 
103 static int
104 json_isxdigit(unsigned char uc)
105 {
106 	if (json_isdigit(uc))
107 		return 1;
108 	switch (uc) {
109 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
110 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
111 		return 1;
112 	default:
113 		return 0;
114 	}
115 }
116 
117 static const unsigned char *
118 json_skip_space(const unsigned char *uc, const unsigned char *ue)
119 {
120 	while (uc < ue && json_isspace(*uc))
121 		uc++;
122 	return uc;
123 }
124 
125 static int
126 json_parse_string(const unsigned char **ucp, const unsigned char *ue)
127 {
128 	const unsigned char *uc = *ucp;
129 	size_t i;
130 
131 	DPRINTF("Parse string: ", uc, *ucp);
132 	while (uc < ue) {
133 		switch (*uc++) {
134 		case '\0':
135 			goto out;
136 		case '\\':
137 			if (uc == ue)
138 				goto out;
139 			switch (*uc++) {
140 			case '\0':
141 				goto out;
142 			case '"':
143 			case '\\':
144 			case '/':
145 			case 'b':
146 			case 'f':
147 			case 'n':
148 			case 'r':
149 			case 't':
150 				continue;
151 			case 'u':
152 				if (ue - uc < 4) {
153 					uc = ue;
154 					goto out;
155 				}
156 				for (i = 0; i < 4; i++)
157 					if (!json_isxdigit(*uc++))
158 						goto out;
159 				continue;
160 			default:
161 				goto out;
162 			}
163 		case '"':
164 			*ucp = uc;
165 			return 1;
166 		default:
167 			continue;
168 		}
169 	}
170 out:
171 	DPRINTF("Bad string: ", uc, *ucp);
172 	*ucp = uc;
173 	return 0;
174 }
175 
176 static int
177 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
178 	size_t *st, size_t lvl)
179 {
180 	const unsigned char *uc = *ucp;
181 	int more = 0;	/* Array has more than 1 element */
182 
183 	DPRINTF("Parse array: ", uc, *ucp);
184 	while (uc < ue) {
185 		if (!json_parse(&uc, ue, st, lvl + 1))
186 			goto out;
187 		if (uc == ue)
188 			goto out;
189 		switch (*uc) {
190 		case ',':
191 			more++;
192 			uc++;
193 			continue;
194 		case ']':
195 			if (more)
196 				st[JSON_ARRAYN]++;
197 			*ucp = uc + 1;
198 			return 1;
199 		default:
200 			goto out;
201 		}
202 	}
203 out:
204 	DPRINTF("Bad array: ", uc,  *ucp);
205 	*ucp = uc;
206 	return 0;
207 }
208 
209 static int
210 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
211 	size_t *st, size_t lvl)
212 {
213 	const unsigned char *uc = *ucp;
214 	DPRINTF("Parse object: ", uc, *ucp);
215 	while (uc < ue) {
216 		uc = json_skip_space(uc, ue);
217 		if (uc == ue)
218 			goto out;
219 		if (*uc++ != '"') {
220 			DPRINTF("not string", uc, *ucp);
221 			goto out;
222 		}
223 		DPRINTF("next field", uc, *ucp);
224 		if (!json_parse_string(&uc, ue)) {
225 			DPRINTF("not string", uc, *ucp);
226 			goto out;
227 		}
228 		uc = json_skip_space(uc, ue);
229 		if (uc == ue)
230 			goto out;
231 		if (*uc++ != ':') {
232 			DPRINTF("not colon", uc, *ucp);
233 			goto out;
234 		}
235 		if (!json_parse(&uc, ue, st, lvl + 1)) {
236 			DPRINTF("not json", uc, *ucp);
237 			goto out;
238 		}
239 		if (uc == ue)
240 			goto out;
241 		switch (*uc++) {
242 		case ',':
243 			continue;
244 		case '}': /* { */
245 			*ucp = uc;
246 			DPRINTF("Good object: ", uc, *ucp);
247 			return 1;
248 		default:
249 			*ucp = uc - 1;
250 			DPRINTF("not more", uc, *ucp);
251 			goto out;
252 		}
253 	}
254 out:
255 	DPRINTF("Bad object: ", uc, *ucp);
256 	*ucp = uc;
257 	return 0;
258 }
259 
260 static int
261 json_parse_number(const unsigned char **ucp, const unsigned char *ue)
262 {
263 	const unsigned char *uc = *ucp;
264 	int got = 0;
265 
266 	DPRINTF("Parse number: ", uc, *ucp);
267 	if (uc == ue)
268 		return 0;
269 	if (*uc == '-')
270 		uc++;
271 
272 	for (; uc < ue; uc++) {
273 		if (!json_isdigit(*uc))
274 			break;
275 		got = 1;
276 	}
277 	if (uc == ue)
278 		goto out;
279 	if (*uc == '.')
280 		uc++;
281 	for (; uc < ue; uc++) {
282 		if (!json_isdigit(*uc))
283 			break;
284 		got = 1;
285 	}
286 	if (uc == ue)
287 		goto out;
288 	if (got && (*uc == 'e' || *uc == 'E')) {
289 		uc++;
290 		got = 0;
291 		if (uc == ue)
292 			goto out;
293 		if (*uc == '+' || *uc == '-')
294 			uc++;
295 		for (; uc < ue; uc++) {
296 			if (!json_isdigit(*uc))
297 				break;
298 			got = 1;
299 		}
300 	}
301 out:
302 	if (!got)
303 		DPRINTF("Bad number: ", uc, *ucp);
304 	else
305 		DPRINTF("Good number: ", uc, *ucp);
306 	*ucp = uc;
307 	return got;
308 }
309 
310 static int
311 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
312     const char *str, size_t len)
313 {
314 	const unsigned char *uc = *ucp;
315 
316 	DPRINTF("Parse const: ", uc, *ucp);
317 	for (len--; uc < ue && --len;) {
318 		if (*uc++ == *++str)
319 			continue;
320 	}
321 	if (len)
322 		DPRINTF("Bad const: ", uc, *ucp);
323 	*ucp = uc;
324 	return len == 0;
325 }
326 
327 static int
328 json_parse(const unsigned char **ucp, const unsigned char *ue,
329     size_t *st, size_t lvl)
330 {
331 	const unsigned char *uc;
332 	int rv = 0;
333 	int t;
334 
335 	uc = json_skip_space(*ucp, ue);
336 	if (uc == ue)
337 		goto out;
338 
339 	// Avoid recursion
340 	if (lvl > 20)
341 		return 0;
342 #if JSON_COUNT
343 	/* bail quickly if not counting */
344 	if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
345 		return 1;
346 #endif
347 
348 	DPRINTF("Parse general: ", uc, *ucp);
349 	switch (*uc++) {
350 	case '"':
351 		rv = json_parse_string(&uc, ue);
352 		t = JSON_STRING;
353 		break;
354 	case '[':
355 		rv = json_parse_array(&uc, ue, st, lvl + 1);
356 		t = JSON_ARRAY;
357 		break;
358 	case '{': /* '}' */
359 		rv = json_parse_object(&uc, ue, st, lvl + 1);
360 		t = JSON_OBJECT;
361 		break;
362 	case 't':
363 		rv = json_parse_const(&uc, ue, "true", sizeof("true"));
364 		t = JSON_CONSTANT;
365 		break;
366 	case 'f':
367 		rv = json_parse_const(&uc, ue, "false", sizeof("false"));
368 		t = JSON_CONSTANT;
369 		break;
370 	case 'n':
371 		rv = json_parse_const(&uc, ue, "null", sizeof("null"));
372 		t = JSON_CONSTANT;
373 		break;
374 	default:
375 		--uc;
376 		rv = json_parse_number(&uc, ue);
377 		t = JSON_NUMBER;
378 		break;
379 	}
380 	if (rv)
381 		st[t]++;
382 	uc = json_skip_space(uc, ue);
383 out:
384 	*ucp = uc;
385 	DPRINTF("End general: ", uc, *ucp);
386 	if (lvl == 0)
387 		return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
388 	return rv;
389 }
390 
391 #ifndef TEST
392 int
393 file_is_json(struct magic_set *ms, const struct buffer *b)
394 {
395 	const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
396 	const unsigned char *ue = uc + b->flen;
397 	size_t st[JSON_MAX];
398 	int mime = ms->flags & MAGIC_MIME;
399 
400 
401 	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
402 		return 0;
403 
404 	memset(st, 0, sizeof(st));
405 
406 	if (!json_parse(&uc, ue, st, 0))
407 		return 0;
408 
409 	if (mime == MAGIC_MIME_ENCODING)
410 		return 1;
411 	if (mime) {
412 		if (file_printf(ms, "application/json") == -1)
413 			return -1;
414 		return 1;
415 	}
416 	if (file_printf(ms, "JSON data") == -1)
417 		return -1;
418 #if JSON_COUNT
419 #define P(n) st[n], st[n] > 1 ? "s" : ""
420 	if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
421 	    "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
422 	    "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
423 	    "u >1array%s)",
424 	    P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
425 	    P(JSON_NUMBER), P(JSON_ARRAYN))
426 	    == -1)
427 		return -1;
428 #endif
429 	return 1;
430 }
431 
432 #else
433 
434 #include <sys/types.h>
435 #include <sys/stat.h>
436 #include <stdio.h>
437 #include <fcntl.h>
438 #include <unistd.h>
439 #include <stdlib.h>
440 #include <stdint.h>
441 #include <err.h>
442 
443 int
444 main(int argc, char *argv[])
445 {
446 	int fd, rv;
447 	struct stat st;
448 	unsigned char *p;
449 	size_t stats[JSON_MAX];
450 
451 	if ((fd = open(argv[1], O_RDONLY)) == -1)
452 		err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
453 
454 	if (fstat(fd, &st) == -1)
455 		err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
456 
457 	if ((p = malloc(st.st_size)) == NULL)
458 		err(EXIT_FAILURE, "Can't allocate %jd bytes",
459 		    (intmax_t)st.st_size);
460 	if (read(fd, p, st.st_size) != st.st_size)
461 		err(EXIT_FAILURE, "Can't read %jd bytes",
462 		    (intmax_t)st.st_size);
463 	memset(stats, 0, sizeof(stats));
464 	printf("is json %d\n", json_parse((const unsigned char **)&p,
465 	    p + st.st_size, stats, 0));
466 	return 0;
467 }
468 #endif
469