xref: /netbsd-src/usr.bin/wc/wc.c (revision e9d867ef5010fbab8d48045c13025636f5cd7479)
1 /*
2  * Copyright (c) 1980, 1987 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 char copyright[] =
36 "@(#) Copyright (c) 1980, 1987 Regents of the University of California.\n\
37  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 /*static char sccsid[] = "from: @(#)wc.c	5.7 (Berkeley) 3/2/91";*/
42 static char rcsid[] = "$Id: wc.c,v 1.3 1993/08/01 18:03:04 mycroft Exp $";
43 #endif /* not lint */
44 
45 /* wc line, word and char count */
46 
47 #include <sys/param.h>
48 #include <sys/stat.h>
49 #include <sys/file.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <ctype.h>
54 #include <errno.h>
55 
56 #define DEL	0177			/* del char */
57 #define NL	012			/* newline char */
58 #define SPACE	040			/* space char */
59 #define TAB	011			/* tab char */
60 
61 static long	tlinect, twordct, tcharct;
62 static int	doline, doword, dochar;
63 
64 main(argc, argv)
65 	int argc;
66 	char **argv;
67 {
68 	extern int optind;
69 	register int ch;
70 
71 	while ((ch = getopt(argc, argv, "lwc")) != EOF)
72 		switch((char)ch) {
73 		case 'l':
74 			doline = 1;
75 			break;
76 		case 'w':
77 			doword = 1;
78 			break;
79 		case 'c':
80 			dochar = 1;
81 			break;
82 		case '?':
83 		default:
84 			fputs("usage: wc [-lwc] [files]\n", stderr);
85 			exit(1);
86 		}
87 	argv += optind;
88 	argc -= optind;
89 
90 	/*
91 	 * wc is unusual in that its flags are on by default, so,
92 	 * if you don't get any arguments, you have to turn them
93 	 * all on.
94 	 */
95 	if (!doline && !doword && !dochar) {
96 		doline = doword = dochar = 1;
97 	}
98 
99 	if (!*argv) {
100 		cnt((char *)NULL);
101 		putchar('\n');
102 	} else {
103 		int dototal = (argc > 1);
104 
105 		do {
106 			cnt(*argv);
107 			printf(" %s\n", *argv);
108 		} while(*++argv);
109 
110 		if (dototal) {
111 			if (doline)
112 				printf(" %7ld", tlinect);
113 			if (doword)
114 				printf(" %7ld", twordct);
115 			if (dochar)
116 				printf(" %7ld", tcharct);
117 			puts(" total");
118 		}
119 	}
120 
121 	exit(0);
122 }
123 
124 cnt(file)
125 	char *file;
126 {
127 	register u_char *C;
128 	register short gotsp;
129 	register int len;
130 	register long linect, wordct, charct;
131 	struct stat sbuf;
132 	int fd;
133 	u_char buf[MAXBSIZE];
134 
135 	linect = wordct = charct = 0;
136 	if (file) {
137 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
138 			fprintf (stderr, "wc: %s: %s\n", file, strerror(errno));
139 			exit(1);
140 		}
141 		if (!doword) {
142 			/*
143 			 * line counting is split out because it's a lot
144 			 * faster to get lines than to get words, since
145 			 * the word count requires some logic.
146 			 */
147 			if (doline) {
148 				while(len = read(fd, buf, MAXBSIZE)) {
149 					if (len == -1) {
150 						fprintf (stderr, "wc: %s: %s\n",
151 							file, strerror(errno));
152 						exit(1);
153 					}
154 					charct += len;
155 					for (C = buf; len--; ++C)
156 						if (*C == '\n')
157 							++linect;
158 				}
159 				tlinect += linect;
160 				printf(" %7ld", linect);
161 				if (dochar) {
162 					tcharct += charct;
163 					printf(" %7ld", charct);
164 				}
165 				close(fd);
166 				return;
167 			}
168 			/*
169 			 * if all we need is the number of characters and
170 			 * it's a directory or a regular or linked file, just
171 			 * stat the puppy.  We avoid testing for it not being
172 			 * a special device in case someone adds a new type
173 			 * of inode.
174 			 */
175 			if (dochar) {
176 				int ifmt;
177 
178 				if (fstat(fd, &sbuf)) {
179 					fprintf (stderr, "wc: %s: %s\n",
180 						file, strerror(errno));
181 					exit(1);
182 				}
183 
184 				ifmt = sbuf.st_mode & S_IFMT;
185 				if (ifmt == S_IFREG || ifmt == S_IFLNK
186 					|| ifmt == S_IFDIR) {
187 					printf(" %7ld", sbuf.st_size);
188 					tcharct += sbuf.st_size;
189 					close(fd);
190 					return;
191 				}
192 			}
193 		}
194 	}
195 	else
196 		fd = 0;
197 	/* do it the hard way... */
198 	for (gotsp = 1; len = read(fd, buf, MAXBSIZE);) {
199 		if (len == -1) {
200 			fprintf (stderr, "wc: %s: %s\n", file, strerror(errno));
201 			exit(1);
202 		}
203 		charct += len;
204 		for (C = buf; len--; ++C) {
205 			if (isspace (*C)) {
206 				gotsp = 1;
207 				if (*C == NL) {
208 					++linect;
209 				}
210 			} else {
211 #if 0
212 				/*
213 				 * This line of code implements the
214 				 * original V7 wc algorithm, i.e.
215 				 * a non-printing character doesn't
216 				 * toggle the "word" count, so that
217 				 * "  ^D^F  " counts as 6 spaces,
218 				 * while "foo^D^Fbar" counts as 8
219 				 * characters.
220 				 *
221 				 * test order is important -- gotsp
222 				 * will normally be NO, so test it
223 				 * first
224 				 */
225 				if (gotsp && *C > SPACE && *C < DEL) ...
226 #endif
227 				/*
228 				 * This line implements the POSIX
229 				 * spec, i.e. a word is a "maximal
230 				 * string of characters delimited by
231 				 * whitespace."  Notice nothing was
232 				 * said about a character being
233 				 * printing or non-printing.
234 				 */
235 				if (gotsp) {
236 					gotsp = 0;
237 					++wordct;
238 				}
239 			}
240 		}
241 	}
242 	if (doline) {
243 		tlinect += linect;
244 		printf(" %7ld", linect);
245 	}
246 	if (doword) {
247 		twordct += wordct;
248 		printf(" %7ld", wordct);
249 	}
250 	if (dochar) {
251 		tcharct += charct;
252 		printf(" %7ld", charct);
253 	}
254 	close(fd);
255 }
256