xref: /csrg-svn/usr.bin/wc/wc.c (revision 30103)
121588Sdist /*
221588Sdist  * Copyright (c) 1980 Regents of the University of California.
321588Sdist  * All rights reserved.  The Berkeley software License Agreement
421588Sdist  * specifies the terms and conditions for redistribution.
521588Sdist  */
621588Sdist 
721588Sdist #ifndef lint
821588Sdist char copyright[] =
921588Sdist "@(#) Copyright (c) 1980 Regents of the University of California.\n\
1021588Sdist  All rights reserved.\n";
1121588Sdist #endif not lint
1221588Sdist 
1321588Sdist #ifndef lint
14*30103Sbostic static char sccsid[] = "@(#)wc.c	5.2 (Berkeley) 11/18/86";
1521588Sdist #endif not lint
1621588Sdist 
17*30103Sbostic /* wc line, word and char count */
181161Sbill 
19*30103Sbostic #include <sys/param.h>
20*30103Sbostic #include <sys/stat.h>
21*30103Sbostic #include <sys/file.h>
221161Sbill #include <stdio.h>
231161Sbill 
24*30103Sbostic #define DEL	0177			/* del char */
25*30103Sbostic #define ERR	1			/* error exit */
26*30103Sbostic #define NL	012			/* newline char */
27*30103Sbostic #define NO	0			/* no/false */
28*30103Sbostic #define OK	0			/* okay exit */
29*30103Sbostic #define SPACE	040			/* space char */
30*30103Sbostic #define TAB	011			/* tab char */
31*30103Sbostic #define YES	1			/* yes/true */
32*30103Sbostic 
33*30103Sbostic static long	tlinect,		/* total line count */
34*30103Sbostic 		twordct,		/* total word count */
35*30103Sbostic 		tcharct;		/* total character count */
36*30103Sbostic static short	doline,			/* if want line count */
37*30103Sbostic 		doword,			/* if want word count */
38*30103Sbostic 		dochar;			/* if want character count */
39*30103Sbostic 
40*30103Sbostic main(argc,argv)
41*30103Sbostic int	argc;
42*30103Sbostic char	**argv;
431161Sbill {
44*30103Sbostic 	extern char	*optarg;	/* getopt arguments */
45*30103Sbostic 	extern int	optind;
46*30103Sbostic 	register int	ch;		/* getopt character */
471161Sbill 
48*30103Sbostic 	/*
49*30103Sbostic 	 * wc is unusual in that its flags are on by default, so,
50*30103Sbostic 	 * if you don't get any arguments, you have to turn them
51*30103Sbostic 	 * all on.
52*30103Sbostic 	 */
53*30103Sbostic 	if (argc > 1 && argv[1][0] == '-' && argv[1][1]) {
54*30103Sbostic 		while ((ch = getopt(argc,argv,"lwc")) != EOF)
55*30103Sbostic 			switch((char)ch) {
56*30103Sbostic 				case 'l':
57*30103Sbostic 					doline = YES;
58*30103Sbostic 					break;
59*30103Sbostic 				case 'w':
60*30103Sbostic 					doword = YES;
61*30103Sbostic 					break;
62*30103Sbostic 				case 'c':
63*30103Sbostic 					dochar = YES;
64*30103Sbostic 					break;
65*30103Sbostic 				case '?':
66*30103Sbostic 				default:
67*30103Sbostic 					fputs("Usage: wc [-lwc] [files]\n",stderr);
68*30103Sbostic 					exit(ERR);
69*30103Sbostic 			}
70*30103Sbostic 		argv += optind;
71*30103Sbostic 		argc -= optind;
72*30103Sbostic 	}
73*30103Sbostic 	else {
74*30103Sbostic 		++argv;
75*30103Sbostic 		--argc;
76*30103Sbostic 		doline = doword = dochar = YES;
77*30103Sbostic 	}
78*30103Sbostic 
79*30103Sbostic 	/* should print "stdin" as the file name, here */
80*30103Sbostic 	if (argc <= 1) {
81*30103Sbostic 		if (!*argv || !strcmp(*argv,"-")) {
82*30103Sbostic 			cnt((char *)NULL);
83*30103Sbostic 			putchar('\n');
841161Sbill 		}
85*30103Sbostic 		else {
86*30103Sbostic 			cnt(*argv);
87*30103Sbostic 			printf(" %s\n",*argv);
88*30103Sbostic 		}
89*30103Sbostic 		exit(OK);
901161Sbill 	}
911161Sbill 
92*30103Sbostic 	/*
93*30103Sbostic 	 * cat allows "-" as stdin anywhere in the arg list,
94*30103Sbostic 	 * might as well here, too.  Again, should use "stdin"
95*30103Sbostic 	 * as the file name.
96*30103Sbostic 	 */
971161Sbill 	do {
98*30103Sbostic 		if (!strcmp(*argv,"-")) {
99*30103Sbostic 			cnt((char *)NULL);
100*30103Sbostic 			putchar('\n');
1011161Sbill 		}
102*30103Sbostic 		else {
103*30103Sbostic 			cnt(*argv);
104*30103Sbostic 			printf(" %s\n",*argv);
105*30103Sbostic 		}
106*30103Sbostic 	} while(*++argv);
107*30103Sbostic 
108*30103Sbostic 	if (doline)
109*30103Sbostic 		printf(" %7ld",tlinect);
110*30103Sbostic 	if (doword)
111*30103Sbostic 		printf(" %7ld",twordct);
112*30103Sbostic 	if (dochar)
113*30103Sbostic 		printf(" %7ld",tcharct);
114*30103Sbostic 	puts(" total");
115*30103Sbostic 	exit(OK);
116*30103Sbostic }
117*30103Sbostic 
118*30103Sbostic static
119*30103Sbostic cnt(file)
120*30103Sbostic char	*file;
121*30103Sbostic {
122*30103Sbostic 	register u_char	*C;		/* traveling pointer */
123*30103Sbostic 	register short	gotsp;		/* space toggle */
124*30103Sbostic 	register int	len;		/* length of read */
125*30103Sbostic 	register long	linect,		/* line count */
126*30103Sbostic 			wordct,		/* word count */
127*30103Sbostic 			charct;		/* character count */
128*30103Sbostic 	struct stat	sbuf;		/* stat buffer */
129*30103Sbostic 	int	fd;			/* file descriptor */
130*30103Sbostic 	u_char	buf[MAXBSIZE];		/* read buffer */
131*30103Sbostic 
132*30103Sbostic 	linect = wordct = charct = 0;
133*30103Sbostic 	if (file) {
134*30103Sbostic 		if ((fd = open(file,O_RDONLY)) < 0) {
135*30103Sbostic 			perror(file);
136*30103Sbostic 			exit(ERR);
137*30103Sbostic 		}
138*30103Sbostic 		if (!doword) {
139*30103Sbostic 			/*
140*30103Sbostic 			 * line counting is split out because it's a lot
141*30103Sbostic 			 * faster to get lines than to get words, since
142*30103Sbostic 			 * the word count requires some logic.
143*30103Sbostic 			 */
144*30103Sbostic 			if (doline) {
145*30103Sbostic 				while(len = read(fd,buf,MAXBSIZE)) {
146*30103Sbostic 					if (len == -1) {
147*30103Sbostic 						perror(file);
148*30103Sbostic 						exit(ERR);
149*30103Sbostic 					}
150*30103Sbostic 					charct += len;
151*30103Sbostic 					for (C = buf;len--;++C)
152*30103Sbostic 						if (*C == '\n')
153*30103Sbostic 							++linect;
1541161Sbill 				}
155*30103Sbostic 				tlinect += linect;
156*30103Sbostic 				printf(" %7ld",linect);
157*30103Sbostic 				if (dochar) {
158*30103Sbostic 					tcharct += charct;
159*30103Sbostic 					printf(" %7ld",sbuf.st_size);
160*30103Sbostic 				}
161*30103Sbostic 				close(fd);
162*30103Sbostic 				return;
1631161Sbill 			}
164*30103Sbostic 			/*
165*30103Sbostic 			 * if all we need is the number of characters and
166*30103Sbostic 			 * it's a directory or a regular or linked file, just
167*30103Sbostic 			 * stat the puppy.  We avoid testing for it not being
168*30103Sbostic 			 * a special device in case someone adds a new type
169*30103Sbostic 			 * of inode.
170*30103Sbostic 			 */
171*30103Sbostic 			if (dochar) {
172*30103Sbostic 				if (fstat(fd,&sbuf)) {
173*30103Sbostic 					perror(file);
174*30103Sbostic 					exit(ERR);
175*30103Sbostic 				}
176*30103Sbostic 				if (sbuf.st_mode & (S_IFREG | S_IFLNK | S_IFDIR)) {
177*30103Sbostic 					printf(" %7ld",sbuf.st_size);
178*30103Sbostic 					tcharct += sbuf.st_size;
179*30103Sbostic 					close(fd);
180*30103Sbostic 					return;
181*30103Sbostic 				}
1821161Sbill 			}
1831161Sbill 		}
184*30103Sbostic 	}
185*30103Sbostic 	else
186*30103Sbostic 		fd = 0;
187*30103Sbostic 	/* do it the hard way... */
188*30103Sbostic 	for (gotsp = YES;len = read(fd,buf,MAXBSIZE);) {
189*30103Sbostic 		if (len == -1) {
190*30103Sbostic 			perror(file);
191*30103Sbostic 			exit(ERR);
192*30103Sbostic 		}
193*30103Sbostic 		charct += len;
194*30103Sbostic 		for (C = buf;len--;++C)
195*30103Sbostic 			switch(*C) {
196*30103Sbostic 				case NL:
197*30103Sbostic 					++linect;
198*30103Sbostic 				case TAB:
199*30103Sbostic 				case SPACE:
200*30103Sbostic 					gotsp = YES;
201*30103Sbostic 					continue;
202*30103Sbostic 				default:
203*30103Sbostic #ifdef NOT_DEFINED
204*30103Sbostic 					/*
205*30103Sbostic 					 * This line of code implements the
206*30103Sbostic 					 * original V7 wc algorithm, i.e.
207*30103Sbostic 					 * a non-printing character doesn't
208*30103Sbostic 					 * toggle the "word" count, so that
209*30103Sbostic 					 * "  ^D^F  " counts as 6 spaces,
210*30103Sbostic 					 * while "foo^D^Fbar" counts as 8
211*30103Sbostic 					 * characters.
212*30103Sbostic 					 *
213*30103Sbostic 					 * test order is important -- gotsp
214*30103Sbostic 					 * will normally be NO, so test it
215*30103Sbostic 					 * first
216*30103Sbostic 					 */
217*30103Sbostic 					if (gotsp && *C > SPACE && *C < DEL) {
218*30103Sbostic #endif NOT_DEFINED
219*30103Sbostic 					/*
220*30103Sbostic 					 * This line implements the manual
221*30103Sbostic 					 * page, i.e. a word is a "maximal
222*30103Sbostic 					 * string of characters delimited by
223*30103Sbostic 					 * spaces, tabs or newlines."  Notice
224*30103Sbostic 					 * nothing was said about a character
225*30103Sbostic 					 * being printing or non-printing.
226*30103Sbostic 					 */
227*30103Sbostic 					if (gotsp) {
228*30103Sbostic 						gotsp = NO;
229*30103Sbostic 						++wordct;
230*30103Sbostic 					}
231*30103Sbostic 			}
232*30103Sbostic 	}
233*30103Sbostic 	if (doline) {
2341161Sbill 		tlinect += linect;
235*30103Sbostic 		printf(" %7ld",linect);
236*30103Sbostic 	}
237*30103Sbostic 	if (doword) {
2381161Sbill 		twordct += wordct;
239*30103Sbostic 		printf(" %7ld",wordct);
240*30103Sbostic 	}
241*30103Sbostic 	if (dochar) {
2421161Sbill 		tcharct += charct;
243*30103Sbostic 		printf(" %7ld",charct);
2441161Sbill 	}
245*30103Sbostic 	close(fd);
2461161Sbill }
247