xref: /minix3/external/bsd/file/dist/src/file.h (revision cdfb5ab81f82cdcb0f58d139385d626df29f4069)
1 /*	$NetBSD: file.h,v 1.5 2011/09/16 21:06:26 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*
31  * file.h - definitions for file(1) program
32  * @(#)$File: file.h,v 1.133 2011/05/13 22:15:40 christos Exp $
33  */
34 
35 #ifndef __file_h__
36 #define __file_h__
37 
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41 
42 #ifdef WIN32
43   #ifdef _WIN64
44     #define SIZE_T_FORMAT "I64"
45   #else
46     #define SIZE_T_FORMAT ""
47   #endif
48   #define INT64_T_FORMAT "I64"
49 #else
50   #define SIZE_T_FORMAT "z"
51   #define INT64_T_FORMAT "ll"
52 #endif
53 
54 #include <stdio.h>	/* Include that here, to make sure __P gets defined */
55 #include <errno.h>
56 #include <fcntl.h>	/* For open and flags */
57 #ifdef HAVE_STDINT_H
58 #ifndef __STDC_LIMIT_MACROS
59 #define __STDC_LIMIT_MACROS
60 #endif
61 #include <stdint.h>
62 #endif
63 #ifdef HAVE_INTTYPES_H
64 #include <inttypes.h>
65 #endif
66 #include <regex.h>
67 #include <sys/types.h>
68 #include <sys/param.h>
69 /* Do this here and now, because struct stat gets re-defined on solaris */
70 #include <sys/stat.h>
71 #include <stdarg.h>
72 
73 #define ENABLE_CONDITIONALS
74 
75 #ifndef MAGIC
76 #define MAGIC "/etc/magic"
77 #endif
78 
79 #if defined(__EMX__) || defined (WIN32)
80 #define PATHSEP	';'
81 #else
82 #define PATHSEP	':'
83 #endif
84 
85 #define private static
86 #ifndef protected
87 #define protected
88 #endif
89 #define public
90 
91 #ifndef __GNUC_PREREQ__
92 #ifdef __GNUC__
93 #define	__GNUC_PREREQ__(x, y)						\
94 	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
95 	 (__GNUC__ > (x)))
96 #else
97 #define	__GNUC_PREREQ__(x, y)	0
98 #endif
99 #endif
100 
101 #ifndef __GNUC__
102 #ifndef __attribute__
103 #define __attribute__(a)
104 #endif
105 #endif
106 
107 #ifndef MIN
108 #define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
109 #endif
110 
111 #ifndef MAX
112 #define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
113 #endif
114 
115 #ifndef HOWMANY
116 # define HOWMANY (256 * 1024)	/* how much of the file to look at */
117 #endif
118 #define MAXMAGIS 8192		/* max entries in any one magic file
119 				   or directory */
120 #define MAXDESC	64		/* max leng of text description/MIME type */
121 #define MAXstring 64		/* max leng of "string" types */
122 
123 #define MAGICNO		0xF11E041C
124 #define VERSIONNO	8
125 #define FILE_MAGICSIZE	232
126 
127 #define	FILE_LOAD	0
128 #define FILE_CHECK	1
129 #define FILE_COMPILE	2
130 #define FILE_LIST	3
131 
132 union VALUETYPE {
133 	uint8_t b;
134 	uint16_t h;
135 	uint32_t l;
136 	uint64_t q;
137 	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
138 	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
139 	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
140 	char s[MAXstring];	/* the search string or regex pattern */
141 	unsigned char us[MAXstring];
142 	float f;
143 	double d;
144 };
145 
146 struct magic {
147 	/* Word 1 */
148 	uint16_t cont_level;	/* level of ">" */
149 	uint8_t flag;
150 #define INDIR		0x01	/* if '(...)' appears */
151 #define OFFADD		0x02	/* if '>&' or '>...(&' appears */
152 #define INDIROFFADD	0x04	/* if '>&(' appears */
153 #define UNSIGNED	0x08	/* comparison is unsigned */
154 #define NOSPACE		0x10	/* suppress space character before output */
155 #define BINTEST		0x20	/* test is for a binary type (set only
156 				   for top-level tests) */
157 #define TEXTTEST	0x40	/* for passing to file_softmagic */
158 
159 	uint8_t factor;
160 
161 	/* Word 2 */
162 	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
163 	uint8_t vallen;		/* length of string value, if any */
164 	uint8_t type;		/* comparison type (FILE_*) */
165 	uint8_t in_type;	/* type of indirection */
166 #define 			FILE_INVALID	0
167 #define 			FILE_BYTE	1
168 #define				FILE_SHORT	2
169 #define				FILE_DEFAULT	3
170 #define				FILE_LONG	4
171 #define				FILE_STRING	5
172 #define				FILE_DATE	6
173 #define				FILE_BESHORT	7
174 #define				FILE_BELONG	8
175 #define				FILE_BEDATE	9
176 #define				FILE_LESHORT	10
177 #define				FILE_LELONG	11
178 #define				FILE_LEDATE	12
179 #define				FILE_PSTRING	13
180 #define				FILE_LDATE	14
181 #define				FILE_BELDATE	15
182 #define				FILE_LELDATE	16
183 #define				FILE_REGEX	17
184 #define				FILE_BESTRING16	18
185 #define				FILE_LESTRING16	19
186 #define				FILE_SEARCH	20
187 #define				FILE_MEDATE	21
188 #define				FILE_MELDATE	22
189 #define				FILE_MELONG	23
190 #define				FILE_QUAD	24
191 #define				FILE_LEQUAD	25
192 #define				FILE_BEQUAD	26
193 #define				FILE_QDATE	27
194 #define				FILE_LEQDATE	28
195 #define				FILE_BEQDATE	29
196 #define				FILE_QLDATE	30
197 #define				FILE_LEQLDATE	31
198 #define				FILE_BEQLDATE	32
199 #define				FILE_FLOAT	33
200 #define				FILE_BEFLOAT	34
201 #define				FILE_LEFLOAT	35
202 #define				FILE_DOUBLE	36
203 #define				FILE_BEDOUBLE	37
204 #define				FILE_LEDOUBLE	38
205 #define				FILE_BEID3	39
206 #define				FILE_LEID3	40
207 #define				FILE_INDIRECT	41
208 #define				FILE_NAMES_SIZE	42/* size of array to contain all names */
209 
210 #define IS_STRING(t) \
211 	((t) == FILE_STRING || \
212 	 (t) == FILE_PSTRING || \
213 	 (t) == FILE_BESTRING16 || \
214 	 (t) == FILE_LESTRING16 || \
215 	 (t) == FILE_REGEX || \
216 	 (t) == FILE_SEARCH || \
217 	 (t) == FILE_DEFAULT)
218 
219 #define FILE_FMT_NONE 0
220 #define FILE_FMT_NUM  1 /* "cduxXi" */
221 #define FILE_FMT_STR  2 /* "s" */
222 #define FILE_FMT_QUAD 3 /* "ll" */
223 #define FILE_FMT_FLOAT 4 /* "eEfFgG" */
224 #define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
225 
226 	/* Word 3 */
227 	uint8_t in_op;		/* operator for indirection */
228 	uint8_t mask_op;	/* operator for mask */
229 #ifdef ENABLE_CONDITIONALS
230 	uint8_t cond;		/* conditional type */
231 #else
232 	uint8_t dummy;
233 #endif
234 	uint8_t factor_op;
235 #define		FILE_FACTOR_OP_PLUS	'+'
236 #define		FILE_FACTOR_OP_MINUS	'-'
237 #define		FILE_FACTOR_OP_TIMES	'*'
238 #define		FILE_FACTOR_OP_DIV	'/'
239 #define		FILE_FACTOR_OP_NONE	'\0'
240 
241 #define				FILE_OPS	"&|^+-*/%"
242 #define				FILE_OPAND	0
243 #define				FILE_OPOR	1
244 #define				FILE_OPXOR	2
245 #define				FILE_OPADD	3
246 #define				FILE_OPMINUS	4
247 #define				FILE_OPMULTIPLY	5
248 #define				FILE_OPDIVIDE	6
249 #define				FILE_OPMODULO	7
250 #define				FILE_OPS_MASK	0x07 /* mask for above ops */
251 #define				FILE_UNUSED_1	0x08
252 #define				FILE_UNUSED_2	0x10
253 #define				FILE_UNUSED_3	0x20
254 #define				FILE_OPINVERSE	0x40
255 #define				FILE_OPINDIRECT	0x80
256 
257 #ifdef ENABLE_CONDITIONALS
258 #define				COND_NONE	0
259 #define				COND_IF		1
260 #define				COND_ELIF	2
261 #define				COND_ELSE	3
262 #endif /* ENABLE_CONDITIONALS */
263 
264 	/* Word 4 */
265 	uint32_t offset;	/* offset to magic number */
266 	/* Word 5 */
267 	int32_t in_offset;	/* offset from indirection */
268 	/* Word 6 */
269 	uint32_t lineno;	/* line number in magic file */
270 	/* Word 7,8 */
271 	union {
272 		uint64_t _mask;	/* for use with numeric and date types */
273 		struct {
274 			uint32_t _count;	/* repeat/line count */
275 			uint32_t _flags;	/* modifier flags */
276 		} _s;		/* for use with string types */
277 	} _u;
278 #define num_mask _u._mask
279 #define str_range _u._s._count
280 #define str_flags _u._s._flags
281 	/* Words 9-16 */
282 	union VALUETYPE value;	/* either number or string */
283 	/* Words 17-32 */
284 	char desc[MAXDESC];	/* description */
285 	/* Words 33-48 */
286 	char mimetype[MAXDESC]; /* MIME type */
287 	/* Words 49-50 */
288 	char apple[8];
289 };
290 
291 #define BIT(A)   (1 << (A))
292 #define STRING_COMPACT_WHITESPACE		BIT(0)
293 #define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
294 #define STRING_IGNORE_LOWERCASE			BIT(2)
295 #define STRING_IGNORE_UPPERCASE			BIT(3)
296 #define REGEX_OFFSET_START			BIT(4)
297 #define STRING_TEXTTEST				BIT(5)
298 #define STRING_BINTEST				BIT(6)
299 #define PSTRING_1_BE				BIT(7)
300 #define PSTRING_1_LE				BIT(7)
301 #define PSTRING_2_BE				BIT(8)
302 #define PSTRING_2_LE				BIT(9)
303 #define PSTRING_4_BE				BIT(10)
304 #define PSTRING_4_LE				BIT(11)
305 #define PSTRING_LEN	\
306     (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
307 #define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
308 #define CHAR_COMPACT_WHITESPACE			'W'
309 #define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
310 #define CHAR_IGNORE_LOWERCASE			'c'
311 #define CHAR_IGNORE_UPPERCASE			'C'
312 #define CHAR_REGEX_OFFSET_START			's'
313 #define CHAR_TEXTTEST				't'
314 #define CHAR_BINTEST				'b'
315 #define CHAR_PSTRING_1_BE			'B'
316 #define CHAR_PSTRING_1_LE			'B'
317 #define CHAR_PSTRING_2_BE			'H'
318 #define CHAR_PSTRING_2_LE			'h'
319 #define CHAR_PSTRING_4_BE			'L'
320 #define CHAR_PSTRING_4_LE			'l'
321 #define CHAR_PSTRING_LENGTH_INCLUDES_ITSELF     'J'
322 #define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
323 #define STRING_DEFAULT_RANGE		100
324 
325 
326 /* list of magic entries */
327 struct mlist {
328 	struct magic *magic;		/* array of magic entries */
329 	uint32_t nmagic;			/* number of entries in array */
330 	int mapped;  /* allocation type: 0 => apprentice_file
331 		      *                  1 => apprentice_map + malloc
332 		      *                  2 => apprentice_map + mmap */
333 	struct mlist *next, *prev;
334 };
335 
336 #ifdef __cplusplus
337 #define CAST(T, b)	static_cast<T>(b)
338 #define RCAST(T, b)	reinterpret_cast<T>(b)
339 #else
340 #define CAST(T, b)	(T)(b)
341 #define RCAST(T, b)	(T)(b)
342 #endif
343 
344 struct level_info {
345 	int32_t off;
346 	int got_match;
347 #ifdef ENABLE_CONDITIONALS
348 	int last_match;
349 	int last_cond;	/* used for error checking by parse() */
350 #endif
351 };
352 struct magic_set {
353 	struct mlist *mlist;
354 	struct cont {
355 		size_t len;
356 		struct level_info *li;
357 	} c;
358 	struct out {
359 		char *buf;		/* Accumulation buffer */
360 		char *pbuf;		/* Printable buffer */
361 	} o;
362 	uint32_t offset;
363 	int error;
364 	int flags;			/* Control magic tests. */
365 	int event_flags;		/* Note things that happened. */
366 #define 		EVENT_HAD_ERR		0x01
367 	const char *file;
368 	size_t line;			/* current magic line number */
369 
370 	/* data for searches */
371 	struct {
372 		const char *s;		/* start of search in original source */
373 		size_t s_len;		/* length of search region */
374 		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
375 		size_t rm_len;		/* match length */
376 	} search;
377 
378 	/* FIXME: Make the string dynamically allocated so that e.g.
379 	   strings matched in files can be longer than MAXstring */
380 	union VALUETYPE ms_value;	/* either number or string */
381 };
382 
383 /* Type for Unicode characters */
384 typedef unsigned long unichar;
385 
386 struct stat;
387 protected const char *file_fmttime(uint32_t, int);
388 protected int file_buffer(struct magic_set *, int, const char *, const void *,
389     size_t);
390 protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
391 protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
392 protected int file_vprintf(struct magic_set *, const char *, va_list);
393 protected size_t file_printedlen(const struct magic_set *);
394 protected int file_replace(struct magic_set *, const char *, const char *);
395 protected int file_printf(struct magic_set *, const char *, ...)
396     __attribute__((__format__(__printf__, 2, 3)));
397 protected int file_reset(struct magic_set *);
398 protected int file_tryelf(struct magic_set *, int, const unsigned char *,
399     size_t);
400 protected int file_trycdf(struct magic_set *, int, const unsigned char *,
401     size_t);
402 #if HAVE_FORK
403 protected int file_zmagic(struct magic_set *, int, const char *,
404     const unsigned char *, size_t);
405 #endif
406 protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
407 protected int file_ascmagic_with_encoding(struct magic_set *,
408     const unsigned char *, size_t, unichar *, size_t, const char *,
409     const char *);
410 protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
411     unichar **, size_t *, const char **, const char **, const char **);
412 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
413 protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
414     int);
415 protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
416 protected uint64_t file_signextend(struct magic_set *, struct magic *,
417     uint64_t);
418 protected void file_delmagic(struct magic *, int type, size_t entries);
419 protected void file_badread(struct magic_set *);
420 protected void file_badseek(struct magic_set *);
421 protected void file_oomem(struct magic_set *, size_t);
422 protected void file_error(struct magic_set *, int, const char *, ...)
423     __attribute__((__format__(__printf__, 3, 4)));
424 protected void file_magerror(struct magic_set *, const char *, ...)
425     __attribute__((__format__(__printf__, 2, 3)));
426 protected void file_magwarn(struct magic_set *, const char *, ...)
427     __attribute__((__format__(__printf__, 2, 3)));
428 protected void file_mdump(struct magic *);
429 protected void file_showstr(FILE *, const char *, size_t);
430 protected size_t file_mbswidth(const char *);
431 protected const char *file_getbuffer(struct magic_set *);
432 protected ssize_t sread(int, void *, size_t, int);
433 protected int file_check_mem(struct magic_set *, unsigned int);
434 protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
435     size_t *);
436 protected size_t file_pstring_length_size(const struct magic *);
437 protected size_t file_pstring_get_length(const struct magic *, const char *);
438 #ifdef __EMX__
439 protected int file_os2_apptype(struct magic_set *, const char *, const void *,
440     size_t);
441 #endif /* __EMX__ */
442 
443 
444 #ifndef COMPILE_ONLY
445 extern const char *file_names[];
446 extern const size_t file_nnames;
447 #endif
448 
449 #ifndef HAVE_STRERROR
450 extern int sys_nerr;
451 extern char *sys_errlist[];
452 #define strerror(e) \
453 	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
454 #endif
455 
456 #ifndef HAVE_STRTOUL
457 #define strtoul(a, b, c)	strtol(a, b, c)
458 #endif
459 
460 #ifndef HAVE_VASPRINTF
461 int vasprintf(char **, const char *, va_list);
462 #endif
463 #ifndef HAVE_ASPRINTF
464 int asprintf(char **ptr, const char *format_string, ...);
465 #endif
466 
467 #ifndef HAVE_STRLCPY
468 size_t strlcpy(char *dst, const char *src, size_t siz);
469 #endif
470 #ifndef HAVE_STRLCAT
471 size_t strlcat(char *dst, const char *src, size_t siz);
472 #endif
473 #ifndef HAVE_GETLINE
474 ssize_t getline(char **dst, size_t *len, FILE *fp);
475 ssize_t getdelim(char **dst, size_t *len, int delimiter, FILE *fp);
476 #endif
477 
478 #if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
479 #define QUICK
480 #endif
481 
482 #ifndef O_BINARY
483 #define O_BINARY	0
484 #endif
485 
486 #ifndef __cplusplus
487 #if defined(__GNUC__) && (__GNUC__ >= 3)
488 #define FILE_RCSID(id) \
489 static const char rcsid[] __attribute__((__used__)) = id;
490 #else
491 #define FILE_RCSID(id) \
492 static const char *rcsid(const char *p) { \
493 	return rcsid(p = id); \
494 }
495 #endif
496 #else
497 #define FILE_RCSID(id)
498 #endif
499 #ifndef __RCSID
500 #define __RCSID(a)
501 #endif
502 
503 #endif /* __file_h__ */
504