xref: /netbsd-src/usr.bin/wc/wc.c (revision cda4f8f6ee55684e8d311b86c99ea59191e6b74f)
1 /*
2  * Copyright (c) 1980, 1987 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 char copyright[] =
36 "@(#) Copyright (c) 1980, 1987 Regents of the University of California.\n\
37  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 static char sccsid[] = "@(#)wc.c	5.7 (Berkeley) 3/2/91";
42 #endif /* not lint */
43 
44 /* wc line, word and char count */
45 
46 #include <sys/param.h>
47 #include <sys/stat.h>
48 #include <sys/file.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <ctype.h>
53 #include <errno.h>
54 
55 #define DEL	0177			/* del char */
56 #define NL	012			/* newline char */
57 #define SPACE	040			/* space char */
58 #define TAB	011			/* tab char */
59 
60 static long	tlinect, twordct, tcharct;
61 static int	doline, doword, dochar;
62 
63 main(argc, argv)
64 	int argc;
65 	char **argv;
66 {
67 	extern int optind;
68 	register int ch;
69 
70 	while ((ch = getopt(argc, argv, "lwc")) != EOF)
71 		switch((char)ch) {
72 		case 'l':
73 			doline = 1;
74 			break;
75 		case 'w':
76 			doword = 1;
77 			break;
78 		case 'c':
79 			dochar = 1;
80 			break;
81 		case '?':
82 		default:
83 			fputs("usage: wc [-lwc] [files]\n", stderr);
84 			exit(1);
85 		}
86 	argv += optind;
87 	argc -= optind;
88 
89 	/*
90 	 * wc is unusual in that its flags are on by default, so,
91 	 * if you don't get any arguments, you have to turn them
92 	 * all on.
93 	 */
94 	if (!doline && !doword && !dochar) {
95 		doline = doword = dochar = 1;
96 	}
97 
98 	if (!*argv) {
99 		cnt((char *)NULL);
100 		putchar('\n');
101 	} else {
102 		int dototal = (argc > 1);
103 
104 		do {
105 			cnt(*argv);
106 			printf(" %s\n", *argv);
107 		} while(*++argv);
108 
109 		if (dototal) {
110 			if (doline)
111 				printf(" %7ld", tlinect);
112 			if (doword)
113 				printf(" %7ld", twordct);
114 			if (dochar)
115 				printf(" %7ld", tcharct);
116 			puts(" total");
117 		}
118 	}
119 
120 	exit(0);
121 }
122 
123 cnt(file)
124 	char *file;
125 {
126 	register u_char *C;
127 	register short gotsp;
128 	register int len;
129 	register long linect, wordct, charct;
130 	struct stat sbuf;
131 	int fd;
132 	u_char buf[MAXBSIZE];
133 
134 	linect = wordct = charct = 0;
135 	if (file) {
136 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
137 			fprintf (stderr, "wc: %s: %s\n", file, strerror(errno));
138 			exit(1);
139 		}
140 		if (!doword) {
141 			/*
142 			 * line counting is split out because it's a lot
143 			 * faster to get lines than to get words, since
144 			 * the word count requires some logic.
145 			 */
146 			if (doline) {
147 				while(len = read(fd, buf, MAXBSIZE)) {
148 					if (len == -1) {
149 						fprintf (stderr, "wc: %s: %s\n",
150 							file, strerror(errno));
151 						exit(1);
152 					}
153 					charct += len;
154 					for (C = buf; len--; ++C)
155 						if (*C == '\n')
156 							++linect;
157 				}
158 				tlinect += linect;
159 				printf(" %7ld", linect);
160 				if (dochar) {
161 					tcharct += charct;
162 					printf(" %7ld", charct);
163 				}
164 				close(fd);
165 				return;
166 			}
167 			/*
168 			 * if all we need is the number of characters and
169 			 * it's a directory or a regular or linked file, just
170 			 * stat the puppy.  We avoid testing for it not being
171 			 * a special device in case someone adds a new type
172 			 * of inode.
173 			 */
174 			if (dochar) {
175 				int ifmt;
176 
177 				if (fstat(fd, &sbuf)) {
178 					fprintf (stderr, "wc: %s: %s\n",
179 						file, strerror(errno));
180 					exit(1);
181 				}
182 
183 				ifmt = sbuf.st_mode & S_IFMT;
184 				if (ifmt == S_IFREG || ifmt == S_IFLNK
185 					|| ifmt == S_IFDIR) {
186 					printf(" %7ld", sbuf.st_size);
187 					tcharct += sbuf.st_size;
188 					close(fd);
189 					return;
190 				}
191 			}
192 		}
193 	}
194 	else
195 		fd = 0;
196 	/* do it the hard way... */
197 	for (gotsp = 1; len = read(fd, buf, MAXBSIZE);) {
198 		if (len == -1) {
199 			fprintf (stderr, "wc: %s: %s\n", file, strerror(errno));
200 			exit(1);
201 		}
202 		charct += len;
203 		for (C = buf; len--; ++C) {
204 			if (isspace (*C)) {
205 				gotsp = 1;
206 				if (*C == NL) {
207 					++linect;
208 				}
209 			} else {
210 #if 0
211 				/*
212 				 * This line of code implements the
213 				 * original V7 wc algorithm, i.e.
214 				 * a non-printing character doesn't
215 				 * toggle the "word" count, so that
216 				 * "  ^D^F  " counts as 6 spaces,
217 				 * while "foo^D^Fbar" counts as 8
218 				 * characters.
219 				 *
220 				 * test order is important -- gotsp
221 				 * will normally be NO, so test it
222 				 * first
223 				 */
224 				if (gotsp && *C > SPACE && *C < DEL) ...
225 #endif
226 				/*
227 				 * This line implements the POSIX
228 				 * spec, i.e. a word is a "maximal
229 				 * string of characters delimited by
230 				 * whitespace."  Notice nothing was
231 				 * said about a character being
232 				 * printing or non-printing.
233 				 */
234 				if (gotsp) {
235 					gotsp = 0;
236 					++wordct;
237 				}
238 			}
239 		}
240 	}
241 	if (doline) {
242 		tlinect += linect;
243 		printf(" %7ld", linect);
244 	}
245 	if (doword) {
246 		twordct += wordct;
247 		printf(" %7ld", wordct);
248 	}
249 	if (dochar) {
250 		tcharct += charct;
251 		printf(" %7ld", charct);
252 	}
253 	close(fd);
254 }
255