xref: /netbsd-src/usr.bin/wc/wc.c (revision 481fca6e59249d8ffcf24fef7cfbe7b131bfb080)
1 /*	$NetBSD: wc.c,v 1.20 1999/03/05 22:52:09 kleink Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)wc.c	8.2 (Berkeley) 5/2/95";
45 #else
46 __RCSID("$NetBSD: wc.c,v 1.20 1999/03/05 22:52:09 kleink Exp $");
47 #endif
48 #endif /* not lint */
49 
50 /* wc line, word and char count */
51 
52 #include <sys/param.h>
53 #include <sys/stat.h>
54 #include <sys/types.h>
55 
56 #include <fcntl.h>
57 #include <unistd.h>
58 #include <errno.h>
59 #include <stdio.h>
60 
61 #include <stdlib.h>
62 #include <string.h>
63 #include <locale.h>
64 #include <ctype.h>
65 #include <errno.h>
66 #include <sys/param.h>
67 #include <sys/stat.h>
68 #include <sys/file.h>
69 #include <unistd.h>
70 #include <err.h>
71 
72 #ifdef NO_QUAD
73 typedef u_long wc_count_t;
74 # define WCFMT	" %7lu"
75 # define WCCAST unsigned long
76 #else
77 typedef u_quad_t wc_count_t;
78 # define WCFMT	" %7llu"
79 # define WCCAST	unsigned long long
80 #endif
81 
82 static wc_count_t	tlinect, twordct, tcharct;
83 static int		doline, doword, dochar;
84 static int 		rval = 0;
85 
86 static void	cnt __P((char *));
87 static void	print_counts __P((wc_count_t, wc_count_t, wc_count_t, char *));
88 static void	usage __P((void));
89 int	main __P((int, char *[]));
90 
91 int
92 main(argc, argv)
93 	int argc;
94 	char *argv[];
95 {
96 	int ch;
97 
98 	setlocale(LC_ALL, "");
99 
100 	while ((ch = getopt(argc, argv, "lwcm")) != -1)
101 		switch((char)ch) {
102 		case 'l':
103 			doline = 1;
104 			break;
105 		case 'w':
106 			doword = 1;
107 			break;
108 		case 'c':
109 		case 'm':
110 			dochar = 1;
111 			break;
112 		case '?':
113 		default:
114 			usage();
115 		}
116 	argv += optind;
117 	argc -= optind;
118 
119 	/* Wc's flags are on by default. */
120 	if (doline + doword + dochar == 0)
121 		doline = doword = dochar = 1;
122 
123 	if (!*argv) {
124 		cnt(NULL);
125 	} else {
126 		int dototal = (argc > 1);
127 
128 		do {
129 			cnt(*argv);
130 		} while(*++argv);
131 
132 		if (dototal)
133 			print_counts(tlinect, twordct, tcharct, "total");
134 	}
135 
136 	exit(rval);
137 }
138 
139 static void
140 cnt(file)
141 	char *file;
142 {
143 	u_char *C;
144 	short gotsp;
145 	int len;
146 	wc_count_t linect, wordct, charct;
147 	struct stat sb;
148 	int fd;
149 	u_char buf[MAXBSIZE];
150 
151 	linect = wordct = charct = 0;
152 	if (file) {
153 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
154 			warn("%s", file);
155 			rval = 1;
156 			return;
157 		}
158 	} else  {
159 		fd = STDIN_FILENO;
160 	}
161 
162 	if (!doword) {
163 		/*
164 		 * line counting is split out because it's a lot
165 		 * faster to get lines than to get words, since
166 		 * the word count requires some logic.
167 		 */
168 		if (doline) {
169 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
170 				charct += len;
171 				for (C = buf; len--; ++C)
172 					if (*C == '\n')
173 						++linect;
174 			}
175 			if (len == -1) {
176 				warn ("%s", file);
177 				rval = 1;
178 			}
179 		}
180 
181 		/*
182 		 * if all we need is the number of characters and
183 		 * it's a directory or a regular or linked file, just
184 		 * stat the puppy.  We avoid testing for it not being
185 		 * a special device in case someone adds a new type
186 		 * of inode.
187 		 */
188 		else if (dochar) {
189 			if (fstat(fd, &sb)) {
190 				warn("%s", file);
191 				rval = 1;
192 			} else {
193 				if (S_ISREG(sb.st_mode) ||
194 				    S_ISLNK(sb.st_mode) ||
195 				    S_ISDIR(sb.st_mode)) {
196 					charct = sb.st_size;
197 				} else {
198 					while ((len = read(fd, buf, MAXBSIZE)) > 0)
199 						charct += len;
200 					if (len == -1) {
201 						warn ("%s", file);
202 						rval = 1;
203 					}
204 				}
205 			}
206 		}
207 	}
208 	else
209 	{
210 		/* do it the hard way... */
211 		gotsp = 1;
212 		while ((len = read(fd, buf, MAXBSIZE)) > 0) {
213 			charct += len;
214 			for (C = buf; len--; ++C) {
215 				if (isspace(*C)) {
216 					gotsp = 1;
217 					if (*C == '\n') {
218 						++linect;
219 					}
220 				} else {
221 					/*
222 					 * This line implements the POSIX
223 					 * spec, i.e. a word is a "maximal
224 					 * string of characters delimited by
225 					 * whitespace."  Notice nothing was
226 					 * said about a character being
227 					 * printing or non-printing.
228 					 */
229 					if (gotsp) {
230 						gotsp = 0;
231 						++wordct;
232 					}
233 				}
234 			}
235 		}
236 		if (len == -1) {
237 			warn("%s", file);
238 			rval = 1;
239 		}
240 	}
241 
242 	print_counts(linect, wordct, charct, file ? file : 0);
243 
244 	/* don't bother checkint doline, doword, or dochar --- speeds
245            up the common case */
246 	tlinect += linect;
247 	twordct += wordct;
248 	tcharct += charct;
249 
250 	if (close(fd)) {
251 		warn ("%s", file);
252 		rval = 1;
253 	}
254 }
255 
256 static void
257 print_counts(lines, words, chars, name)
258 	wc_count_t lines;
259 	wc_count_t words;
260 	wc_count_t chars;
261 	char *name;
262 {
263 
264 	if (doline)
265 		printf(WCFMT, (WCCAST)lines);
266 	if (doword)
267 		printf(WCFMT, (WCCAST)words);
268 	if (dochar)
269 		printf(WCFMT, (WCCAST)chars);
270 
271 	if (name)
272 		printf(" %s\n", name);
273 	else
274 		printf("\n");
275 }
276 
277 static void
278 usage()
279 {
280 	(void)fprintf(stderr, "usage: wc [-clw] [file ...]\n");
281 	exit(1);
282 }
283