xref: /openbsd-src/usr.bin/file/file.h (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: file.h,v 1.20 2009/04/24 18:54:34 chl Exp $ */
2 /*
3  * Copyright (c) Ian F. Darwin 1986-1995.
4  * Software written by Ian F. Darwin and others;
5  * maintained 1995-present by Christos Zoulas and others.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * file.h - definitions for file(1) program
31  * @(#)$Id: file.h,v 1.20 2009/04/24 18:54:34 chl Exp $
32  */
33 
34 #ifndef __file_h__
35 #define __file_h__
36 
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40 
41 #include <stdio.h>	/* Include that here, to make sure __P gets defined */
42 #include <errno.h>
43 #include <fcntl.h>	/* For open and flags */
44 #ifdef HAVE_STDINT_H
45 #include <stdint.h>
46 #endif
47 #ifdef HAVE_INTTYPES_H
48 #include <inttypes.h>
49 #endif
50 #include <regex.h>
51 #include <sys/types.h>
52 /* Do this here and now, because struct stat gets re-defined on solaris */
53 #include <sys/stat.h>
54 #include <stdarg.h>
55 
56 #define ENABLE_CONDITIONALS
57 
58 #ifndef MAGIC
59 #define MAGIC "/etc/magic"
60 #endif
61 
62 #ifdef __EMX__
63 #define PATHSEP	';'
64 #else
65 #define PATHSEP	':'
66 #endif
67 
68 #define private static
69 #ifndef protected
70 #define protected
71 #endif
72 #define public
73 
74 #ifndef __GNUC_PREREQ__
75 #ifdef __GNUC__
76 #define	__GNUC_PREREQ__(x, y)						\
77 	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
78 	 (__GNUC__ > (x)))
79 #else
80 #define	__GNUC_PREREQ__(x, y)	0
81 #endif
82 #endif
83 
84 #ifndef MIN
85 #define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
86 #endif
87 
88 #ifndef MAX
89 #define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
90 #endif
91 
92 #ifndef HOWMANY
93 # define HOWMANY (256 * 1024)	/* how much of the file to look at */
94 #endif
95 #define MAXMAGIS 8192		/* max entries in any one magic file
96 				   or directory */
97 #define MAXDESC	64		/* max leng of text description/MIME type */
98 #define MAXstring 32		/* max leng of "string" types */
99 
100 #define MAGICNO		0xF11E041C
101 #define VERSIONNO	5
102 #define FILE_MAGICSIZE	(32 * 6)
103 
104 #define	FILE_LOAD	0
105 #define FILE_CHECK	1
106 #define FILE_COMPILE	2
107 
108 struct magic {
109 	/* Word 1 */
110 	uint16_t cont_level;	/* level of ">" */
111 	uint8_t flag;
112 #define INDIR		0x01	/* if '(...)' appears */
113 #define OFFADD		0x02	/* if '>&' or '>...(&' appears */
114 #define INDIROFFADD	0x04	/* if '>&(' appears */
115 #define UNSIGNED	0x08	/* comparison is unsigned */
116 #define NOSPACE		0x10	/* suppress space character before output */
117 #define BINTEST		0x20	/* test is for a binary type (set only
118                                    for top-level tests) */
119 #define TEXTTEST	0	/* for passing to file_softmagic */
120 
121 	uint8_t dummy1;
122 
123 	/* Word 2 */
124 	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
125 	uint8_t vallen;		/* length of string value, if any */
126 	uint8_t type;		/* comparison type (FILE_*) */
127 	uint8_t in_type;	/* type of indirection */
128 #define 			FILE_INVALID	0
129 #define 			FILE_BYTE	1
130 #define				FILE_SHORT	2
131 #define				FILE_DEFAULT	3
132 #define				FILE_LONG	4
133 #define				FILE_STRING	5
134 #define				FILE_DATE	6
135 #define				FILE_BESHORT	7
136 #define				FILE_BELONG	8
137 #define				FILE_BEDATE	9
138 #define				FILE_LESHORT	10
139 #define				FILE_LELONG	11
140 #define				FILE_LEDATE	12
141 #define				FILE_PSTRING	13
142 #define				FILE_LDATE	14
143 #define				FILE_BELDATE	15
144 #define				FILE_LELDATE	16
145 #define				FILE_REGEX	17
146 #define				FILE_BESTRING16	18
147 #define				FILE_LESTRING16	19
148 #define				FILE_SEARCH	20
149 #define				FILE_MEDATE	21
150 #define				FILE_MELDATE	22
151 #define				FILE_MELONG	23
152 #define				FILE_QUAD	24
153 #define				FILE_LEQUAD	25
154 #define				FILE_BEQUAD	26
155 #define				FILE_QDATE	27
156 #define				FILE_LEQDATE	28
157 #define				FILE_BEQDATE	29
158 #define				FILE_QLDATE	30
159 #define				FILE_LEQLDATE	31
160 #define				FILE_BEQLDATE	32
161 #define				FILE_FLOAT	33
162 #define				FILE_BEFLOAT	34
163 #define				FILE_LEFLOAT	35
164 #define				FILE_DOUBLE	36
165 #define				FILE_BEDOUBLE	37
166 #define				FILE_LEDOUBLE	38
167 #define				FILE_NAMES_SIZE	39/* size of array to contain all names */
168 
169 #define IS_STRING(t) \
170 	((t) == FILE_STRING || \
171 	 (t) == FILE_PSTRING || \
172 	 (t) == FILE_BESTRING16 || \
173 	 (t) == FILE_LESTRING16 || \
174 	 (t) == FILE_REGEX || \
175 	 (t) == FILE_SEARCH || \
176 	 (t) == FILE_DEFAULT)
177 
178 #define FILE_FMT_NONE 0
179 #define FILE_FMT_NUM  1 /* "cduxXi" */
180 #define FILE_FMT_STR  2 /* "s" */
181 #define FILE_FMT_QUAD 3 /* "ll" */
182 #define FILE_FMT_FLOAT 4 /* "eEfFgG" */
183 #define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
184 
185 	/* Word 3 */
186 	uint8_t in_op;		/* operator for indirection */
187 	uint8_t mask_op;	/* operator for mask */
188 #ifdef ENABLE_CONDITIONALS
189 	uint8_t cond;		/* conditional type */
190 	uint8_t dummy2;
191 #else
192 	uint8_t dummy2;
193 	uint8_t dummy3;
194 #endif
195 
196 #define				FILE_OPS	"&|^+-*/%"
197 #define				FILE_OPAND	0
198 #define				FILE_OPOR	1
199 #define				FILE_OPXOR	2
200 #define				FILE_OPADD	3
201 #define				FILE_OPMINUS	4
202 #define				FILE_OPMULTIPLY	5
203 #define				FILE_OPDIVIDE	6
204 #define				FILE_OPMODULO	7
205 #define				FILE_OPS_MASK	0x07 /* mask for above ops */
206 #define				FILE_UNUSED_1	0x08
207 #define				FILE_UNUSED_2	0x10
208 #define				FILE_UNUSED_3	0x20
209 #define				FILE_OPINVERSE	0x40
210 #define				FILE_OPINDIRECT	0x80
211 
212 #ifdef ENABLE_CONDITIONALS
213 #define				COND_NONE	0
214 #define				COND_IF		1
215 #define				COND_ELIF	2
216 #define				COND_ELSE	3
217 #endif /* ENABLE_CONDITIONALS */
218 
219 	/* Word 4 */
220 	uint32_t offset;	/* offset to magic number */
221 	/* Word 5 */
222 	int32_t in_offset;	/* offset from indirection */
223 	/* Word 6 */
224 	uint32_t lineno;	/* line number in magic file */
225 	/* Word 7,8 */
226 	union {
227 		uint64_t _mask;	/* for use with numeric and date types */
228 		struct {
229 			uint32_t _count;	/* repeat/line count */
230 			uint32_t _flags;	/* modifier flags */
231 		} _s;		/* for use with string types */
232 	} _u;
233 #define num_mask _u._mask
234 #define str_range _u._s._count
235 #define str_flags _u._s._flags
236 
237 	/* Words 9-16 */
238 	union VALUETYPE {
239 		uint8_t b;
240 		uint16_t h;
241 		uint32_t l;
242 		uint64_t q;
243 		uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
244 		uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
245 		uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
246 		char s[MAXstring];	/* the search string or regex pattern */
247 		float f;
248 		double d;
249 	} value;		/* either number or string */
250 	/* Words 17..31 */
251 	char desc[MAXDESC];	/* description */
252 	/* Words 32..47 */
253 	char mimetype[MAXDESC]; /* MIME type */
254 };
255 
256 #define BIT(A)   (1 << (A))
257 #define STRING_COMPACT_BLANK		BIT(0)
258 #define STRING_COMPACT_OPTIONAL_BLANK	BIT(1)
259 #define STRING_IGNORE_LOWERCASE		BIT(2)
260 #define STRING_IGNORE_UPPERCASE		BIT(3)
261 #define REGEX_OFFSET_START		BIT(4)
262 #define CHAR_COMPACT_BLANK		'B'
263 #define CHAR_COMPACT_OPTIONAL_BLANK	'b'
264 #define CHAR_IGNORE_LOWERCASE		'c'
265 #define CHAR_IGNORE_UPPERCASE		'C'
266 #define CHAR_REGEX_OFFSET_START		's'
267 #define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
268 #define STRING_DEFAULT_RANGE		100
269 
270 
271 /* list of magic entries */
272 struct mlist {
273 	struct magic *magic;		/* array of magic entries */
274 	uint32_t nmagic;			/* number of entries in array */
275 	int mapped;  /* allocation type: 0 => apprentice_file
276 		      *                  1 => apprentice_map + malloc
277 		      *                  2 => apprentice_map + mmap */
278 	struct mlist *next, *prev;
279 };
280 
281 struct magic_set {
282 	struct mlist *mlist;
283 	struct cont {
284 		size_t len;
285 		struct level_info {
286 			int32_t off;
287 			int got_match;
288 #ifdef ENABLE_CONDITIONALS
289 			int last_match;
290 			int last_cond;	/* used for error checking by parse() */
291 #endif
292 		} *li;
293 	} c;
294 	struct out {
295 		char *buf;		/* Accumulation buffer */
296 		char *pbuf;		/* Printable buffer */
297 	} o;
298 	uint32_t offset;
299 	int error;
300 	int flags;
301 	int haderr;
302 	const char *file;
303 	size_t line;			/* current magic line number */
304 
305 	/* data for searches */
306 	struct {
307 		const char *s;		/* start of search in original source */
308 		size_t s_len;		/* length of search region */
309 		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
310 		size_t rm_len;		/* match length */
311 	} search;
312 
313 	/* FIXME: Make the string dynamically allocated so that e.g.
314 	   strings matched in files can be longer than MAXstring */
315 	union VALUETYPE ms_value;	/* either number or string */
316 };
317 
318 /* Type for Unicode characters */
319 typedef unsigned long unichar;
320 
321 struct stat;
322 protected const char *file_fmttime(uint32_t, int);
323 protected int file_buffer(struct magic_set *, int, const char *, const void *,
324     size_t);
325 protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
326 protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
327 protected int file_printf(struct magic_set *, const char *, ...);
328 protected int file_reset(struct magic_set *);
329 protected int file_tryelf(struct magic_set *, int, const unsigned char *,
330     size_t);
331 protected int file_zmagic(struct magic_set *, int, const char *,
332     const unsigned char *, size_t);
333 protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
334 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
335 protected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int);
336 protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
337 protected uint64_t file_signextend(struct magic_set *, struct magic *,
338     uint64_t);
339 protected void file_delmagic(struct magic *, int type, size_t entries);
340 protected void file_badread(struct magic_set *);
341 protected void file_badseek(struct magic_set *);
342 protected void file_oomem(struct magic_set *, size_t);
343 protected void file_error(struct magic_set *, int, const char *, ...);
344 protected void file_magerror(struct magic_set *, const char *, ...);
345 protected void file_magwarn(struct magic_set *, const char *, ...);
346 protected void file_mdump(struct magic *);
347 protected void file_showstr(FILE *, const char *, size_t);
348 protected size_t file_mbswidth(const char *);
349 protected const char *file_getbuffer(struct magic_set *);
350 protected ssize_t sread(int, void *, size_t, int);
351 protected int file_check_mem(struct magic_set *, unsigned int);
352 protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
353 
354 #ifndef COMPILE_ONLY
355 extern const char *file_names[];
356 extern const size_t file_nnames;
357 #endif
358 
359 #ifndef HAVE_STRERROR
360 extern int sys_nerr;
361 extern char *sys_errlist[];
362 #define strerror(e) \
363 	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
364 #endif
365 
366 #ifndef HAVE_STRTOUL
367 #define strtoul(a, b, c)	strtol(a, b, c)
368 #endif
369 
370 #ifndef HAVE_VASPRINTF
371 int vasprintf(char **ptr, const char *format_string, va_list vargs);
372 #endif
373 #ifndef HAVE_ASPRINTF
374 int asprintf(char **ptr, const char *format_string, ...);
375 #endif
376 
377 #if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
378 #define QUICK
379 #endif
380 
381 #ifndef O_BINARY
382 #define O_BINARY	0
383 #endif
384 
385 #ifdef __GNUC__
386 static const char *rcsid(const char *) __attribute__((__used__));
387 #endif
388 #define FILE_RCSID(id) \
389 static const char *rcsid(const char *p) { \
390 	return rcsid(p = id); \
391 }
392 
393 #endif /* __file_h__ */
394