xref: /csrg-svn/usr.bin/file/file.c (revision 12673)
1 #ifndef lint
2 static	char sccsid[] = "@(#)file.c	4.6 (Berkeley) 4.6";
3 #endif
4 /*
5  * file - determine type of file
6  */
7 
8 #include <sys/types.h>
9 #include <stat.h>
10 #include <stdio.h>
11 #include <ctype.h>
12 #include <a.out.h>
13 int	errno;
14 char	*sys_errlist[];
15 int in;
16 int i  = 0;
17 char buf[BUFSIZ];
18 char *troff[] = {	/* new troff intermediate lang */
19 	"x","T","res","init","font","202","V0","p1",0};
20 char *fort[] = {
21 	"function","subroutine","common","dimension","block","integer",
22 	"real","data","double",0};
23 char *asc[] = {
24 	"chmk","mov","tst","clr","jmp",0};
25 char *c[] = {
26 	"int","char","float","double","struct","extern",0};
27 char *as[] = {
28 	"globl","byte","align","text","data","comm",0};
29 int	ifile;
30 
31 main(argc, argv)
32 char **argv;
33 {
34 	FILE *fl;
35 	register char *p;
36 	char ap[128];
37 	extern char _sobuf[];
38 
39 	if (argc>1 && argv[1][0]=='-' && argv[1][1]=='f') {
40 		if ((fl = fopen(argv[2], "r")) == NULL) {
41 			perror(argv[2]);
42 			exit(2);
43 		}
44 		while ((p = fgets(ap, 128, fl)) != NULL) {
45 			int l = strlen(p);
46 			if (l>0)
47 				p[l-1] = '\0';
48 			printf("%s:	", p);
49 			type(p);
50 			if (ifile>=0)
51 				close(ifile);
52 		}
53 		exit(1);
54 	}
55 	while(argc > 1) {
56 		printf("%s:	", argv[1]);
57 		type(argv[1]);
58 		fflush(stdout);
59 		argc--;
60 		argv++;
61 		if (ifile >= 0)
62 			close(ifile);
63 	}
64 }
65 
66 type(file)
67 char *file;
68 {
69 	int j,nl;
70 	char ch;
71 	struct stat mbuf;
72 
73 	ifile = -1;
74 	if (lstat(file, &mbuf) < 0) {
75 		printf("%s\n", sys_errlist[errno]);
76 		return;
77 	}
78 	switch (mbuf.st_mode & S_IFMT) {
79 
80 	case S_IFCHR:
81 		printf("character");
82 		goto spcl;
83 
84 	case S_IFLNK:
85 		printf("symbolic link\n");
86 		return;
87 
88 	case S_IFDIR:
89 		printf("directory\n");
90 		return;
91 
92 	case S_IFBLK:
93 		printf("block");
94 
95 spcl:
96 		printf(" special (%d/%d)\n", major(mbuf.st_rdev), minor(mbuf.st_rdev));
97 		return;
98 	}
99 
100 	ifile = open(file, 0);
101 	if(ifile < 0) {
102 		printf("cannot open\n");
103 		return;
104 	}
105 	in = read(ifile, buf, BUFSIZ);
106 	if(in == 0){
107 		printf("empty\n");
108 		return;
109 	}
110 	switch(*(int *)buf) {
111 
112 	case 0413:
113 		printf("demand paged ");
114 
115 	case 0410:
116 		printf("pure ");
117 		goto exec;
118 
119 	case 0411:
120 		printf("jfr or pdp-11 unix 411 executable\n");
121 		return;
122 
123 	case 0407:
124 exec:
125 		printf("executable");
126 		if(((int *)buf)[4] != 0) {
127 			printf(" not stripped");
128 			if(oldo(buf))
129 				printf(" (old format symbol table)");
130 		}
131 		printf("\n");
132 		goto out;
133 
134 	case 0177555:
135 		printf("very old archive\n");
136 		goto out;
137 
138 	case 0177545:
139 		printf("old archive\n");
140 		goto out;
141 
142 	case 070707:
143 		printf("cpio data\n");
144 		goto out;
145 	}
146 
147 	if(strncmp(buf, "!<arch>\n__.SYMDEF", 17) == 0 ) {
148 		printf("archive random library\n");
149 		goto out;
150 	}
151 	if (strncmp(buf, "!<arch>\n", 8)==0) {
152 		printf("archive\n");
153 		goto out;
154 	}
155 	if (mbuf.st_size % 512 == 0) {	/* it may be a PRESS file */
156 		lseek(ifile, -512L, 2);	/* last block */
157 		if (read(ifile, buf, BUFSIZ) > 0
158 		 && *(short int *)buf == 12138) {
159 			printf("PRESS file\n");
160 			goto out;
161 		}
162 	}
163 	i = 0;
164 	if(ccom() == 0)goto notc;
165 	while(buf[i] == '#'){
166 		j = i;
167 		while(buf[i++] != '\n'){
168 			if(i - j > 255){
169 				printf("data\n");
170 				goto out;
171 			}
172 			if(i >= in)goto notc;
173 		}
174 		if(ccom() == 0)goto notc;
175 	}
176 check:
177 	if(lookup(c) == 1){
178 		while((ch = buf[i++]) != ';' && ch != '{')if(i >= in)goto notc;
179 		printf("c program text");
180 		goto outa;
181 	}
182 	nl = 0;
183 	while(buf[i] != '('){
184 		if(buf[i] <= 0)
185 			goto notas;
186 		if(buf[i] == ';'){
187 			i++;
188 			goto check;
189 		}
190 		if(buf[i++] == '\n')
191 			if(nl++ > 6)goto notc;
192 		if(i >= in)goto notc;
193 	}
194 	while(buf[i] != ')'){
195 		if(buf[i++] == '\n')
196 			if(nl++ > 6)goto notc;
197 		if(i >= in)goto notc;
198 	}
199 	while(buf[i] != '{'){
200 		if(buf[i++] == '\n')
201 			if(nl++ > 6)goto notc;
202 		if(i >= in)goto notc;
203 	}
204 	printf("c program text");
205 	goto outa;
206 notc:
207 	i = 0;
208 	while(buf[i] == 'c' || buf[i] == '#'){
209 		while(buf[i++] != '\n')if(i >= in)goto notfort;
210 	}
211 	if(lookup(fort) == 1){
212 		printf("fortran program text");
213 		goto outa;
214 	}
215 notfort:
216 	i=0;
217 	if(ascom() == 0)goto notas;
218 	j = i-1;
219 	if(buf[i] == '.'){
220 		i++;
221 		if(lookup(as) == 1){
222 			printf("assembler program text");
223 			goto outa;
224 		}
225 		else if(buf[j] == '\n' && isalpha(buf[j+2])){
226 			printf("roff, nroff, or eqn input text");
227 			goto outa;
228 		}
229 	}
230 	while(lookup(asc) == 0){
231 		if(ascom() == 0)goto notas;
232 		while(buf[i] != '\n' && buf[i++] != ':')
233 			if(i >= in)goto notas;
234 		while(buf[i] == '\n' || buf[i] == ' ' || buf[i] == '\t')if(i++ >= in)goto notas;
235 		j = i-1;
236 		if(buf[i] == '.'){
237 			i++;
238 			if(lookup(as) == 1){
239 				printf("assembler program text");
240 				goto outa;
241 			}
242 			else if(buf[j] == '\n' && isalpha(buf[j+2])){
243 				printf("roff, nroff, or eqn input text");
244 				goto outa;
245 			}
246 		}
247 	}
248 	printf("assembler program text");
249 	goto outa;
250 notas:
251 	for(i=0; i < in; i++)if(buf[i]&0200){
252 		if (buf[0]=='\100' && buf[1]=='\357') {
253 			printf("troff (CAT) output\n");
254 			goto out;
255 		}
256 		printf("data\n");
257 		goto out;
258 	}
259 	if (mbuf.st_mode&((S_IEXEC)|(S_IEXEC>>3)|(S_IEXEC>>6)))
260 		printf("commands text");
261 	else if (troffint(buf, in))
262 		printf("troff intermediate output text");
263 	else if (english(buf, in))
264 		printf("English text");
265 	else
266 		printf("ascii text");
267 outa:
268 	while(i < in)
269 		if((buf[i++]&0377) > 127){
270 			printf(" with garbage\n");
271 			goto out;
272 		}
273 	/* if next few lines in then read whole file looking for nulls ...
274 		while((in = read(ifile,buf,BUFSIZ)) > 0)
275 			for(i = 0; i < in; i++)
276 				if((buf[i]&0377) > 127){
277 					printf(" with garbage\n");
278 					goto out;
279 				}
280 		/*.... */
281 	printf("\n");
282 out:;
283 }
284 
285 oldo(cp)
286 char *cp;
287 {
288 	struct exec ex;
289 	struct stat stb;
290 
291 	ex = *(struct exec *)cp;
292 	if (fstat(ifile, &stb) < 0)
293 		return(0);
294 	if (N_STROFF(ex)+sizeof(off_t) > stb.st_size)
295 		return (1);
296 	return (0);
297 }
298 
299 
300 
301 troffint(bp, n)
302 char *bp;
303 int n;
304 {
305 	int k;
306 
307 	i = 0;
308 	for (k = 0; k < 6; k++) {
309 		if (lookup(troff) == 0)
310 			return(0);
311 		if (lookup(troff) == 0)
312 			return(0);
313 		while (i < n && buf[i] != '\n')
314 			i++;
315 		if (i++ >= n)
316 			return(0);
317 	}
318 	return(1);
319 }
320 lookup(tab)
321 char *tab[];
322 {
323 	char r;
324 	int k,j,l;
325 	while(buf[i] == ' ' || buf[i] == '\t' || buf[i] == '\n')i++;
326 	for(j=0; tab[j] != 0; j++){
327 		l=0;
328 		for(k=i; ((r=tab[j][l++]) == buf[k] && r != '\0');k++);
329 		if(r == '\0')
330 			if(buf[k] == ' ' || buf[k] == '\n' || buf[k] == '\t'
331 			    || buf[k] == '{' || buf[k] == '/'){
332 				i=k;
333 				return(1);
334 			}
335 	}
336 	return(0);
337 }
338 ccom(){
339 	char cc;
340 	while((cc = buf[i]) == ' ' || cc == '\t' || cc == '\n')if(i++ >= in)return(0);
341 	if(buf[i] == '/' && buf[i+1] == '*'){
342 		i += 2;
343 		while(buf[i] != '*' || buf[i+1] != '/'){
344 			if(buf[i] == '\\')i += 2;
345 			else i++;
346 			if(i >= in)return(0);
347 		}
348 		if((i += 2) >= in)return(0);
349 	}
350 	if(buf[i] == '\n')if(ccom() == 0)return(0);
351 	return(1);
352 }
353 ascom(){
354 	while(buf[i] == '/'){
355 		i++;
356 		while(buf[i++] != '\n')if(i >= in)return(0);
357 		while(buf[i] == '\n')if(i++ >= in)return(0);
358 	}
359 	return(1);
360 }
361 
362 english (bp, n)
363 char *bp;
364 {
365 # define NASC 128
366 	int ct[NASC], j, vow, freq, rare;
367 	int badpun = 0, punct = 0;
368 	if (n<50) return(0); /* no point in statistics on squibs */
369 	for(j=0; j<NASC; j++)
370 		ct[j]=0;
371 	for(j=0; j<n; j++)
372 	{
373 		if (bp[j]<NASC)
374 			ct[bp[j]|040]++;
375 		switch (bp[j])
376 		{
377 		case '.':
378 		case ',':
379 		case ')':
380 		case '%':
381 		case ';':
382 		case ':':
383 		case '?':
384 			punct++;
385 			if ( j < n-1 &&
386 			    bp[j+1] != ' ' &&
387 			    bp[j+1] != '\n')
388 				badpun++;
389 		}
390 	}
391 	if (badpun*5 > punct)
392 		return(0);
393 	vow = ct['a'] + ct['e'] + ct['i'] + ct['o'] + ct['u'];
394 	freq = ct['e'] + ct['t'] + ct['a'] + ct['i'] + ct['o'] + ct['n'];
395 	rare = ct['v'] + ct['j'] + ct['k'] + ct['q'] + ct['x'] + ct['z'];
396 	if (2*ct[';'] > ct['e']) return(0);
397 	if ( (ct['>']+ct['<']+ct['/'])>ct['e']) return(0); /* shell file test */
398 	return (vow*5 >= n-ct[' '] && freq >= 10*rare);
399 }
400