xref: /openbsd-src/usr.bin/wc/wc.c (revision c93d6c4984abdab2ee26d4ff062f55ec64f70f0e)
1 /*	$OpenBSD: wc.c,v 1.11 2005/10/19 21:49:02 espie Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #ifndef lint
33 static char copyright[] =
34 "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
35 	The Regents of the University of California.  All rights reserved.\n";
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)wc.c	8.2 (Berkeley) 5/2/95";
41 #else
42 static char rcsid[] = "$OpenBSD: wc.c,v 1.11 2005/10/19 21:49:02 espie Exp $";
43 #endif
44 #endif /* not lint */
45 
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <locale.h>
50 #include <ctype.h>
51 #include <err.h>
52 #include <sys/param.h>
53 #include <sys/stat.h>
54 #include <sys/file.h>
55 #include <unistd.h>
56 #include <util.h>
57 
58 int64_t	tlinect, twordct, tcharct;
59 int	doline, doword, dochar, humanchar;
60 int 	rval;
61 extern char *__progname;
62 
63 void	print_counts(int64_t, int64_t, int64_t, char *);
64 void	cnt(char *);
65 
66 int
67 main(int argc, char *argv[])
68 {
69 	int ch;
70 
71 	setlocale(LC_ALL, "");
72 
73 	while ((ch = getopt(argc, argv, "lwchm")) != -1)
74 		switch((char)ch) {
75 		case 'l':
76 			doline = 1;
77 			break;
78 		case 'w':
79 			doword = 1;
80 			break;
81 		case 'c':
82 		case 'm':
83 			dochar = 1;
84 			break;
85 		case 'h':
86 			humanchar = 1;
87 			break;
88 		case '?':
89 		default:
90 			(void)fprintf(stderr,
91 			    "usage: %s [-c | -m] [-hlw] [file ...]\n",
92 			    __progname);
93 			exit(1);
94 		}
95 	argv += optind;
96 	argc -= optind;
97 
98 	/*
99 	 * wc is unusual in that its flags are on by default, so,
100 	 * if you don't get any arguments, you have to turn them
101 	 * all on.
102 	 */
103 	if (!doline && !doword && !dochar)
104 		doline = doword = dochar = 1;
105 
106 	if (!*argv) {
107 		cnt((char *)NULL);
108 	} else {
109 		int dototal = (argc > 1);
110 
111 		do {
112 			cnt(*argv);
113 		} while(*++argv);
114 
115 		if (dototal)
116 			print_counts(tlinect, twordct, tcharct, "total");
117 	}
118 
119 	exit(rval);
120 }
121 
122 void
123 cnt(char *file)
124 {
125 	u_char *C;
126 	short gotsp;
127 	int len;
128 	int64_t linect, wordct, charct;
129 	struct stat sbuf;
130 	int fd;
131 	u_char buf[MAXBSIZE];
132 
133 	linect = wordct = charct = 0;
134 	if (file) {
135 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
136 			warn("%s", file);
137 			rval = 1;
138 			return;
139 		}
140 	} else  {
141 		fd = STDIN_FILENO;
142 	}
143 
144 	if (!doword) {
145 		/*
146 		 * Line counting is split out because it's a lot
147 		 * faster to get lines than to get words, since
148 		 * the word count requires some logic.
149 		 */
150 		if (doline) {
151 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
152 				charct += len;
153 				for (C = buf; len--; ++C)
154 					if (*C == '\n')
155 						++linect;
156 			}
157 			if (len == -1) {
158 				warn("%s", file);
159 				rval = 1;
160 			}
161 		}
162 		/*
163 		 * If all we need is the number of characters and
164 		 * it's a directory or a regular or linked file, just
165 		 * stat the puppy.  We avoid testing for it not being
166 		 * a special device in case someone adds a new type
167 		 * of inode.
168 		 */
169 		else if (dochar) {
170 			mode_t ifmt;
171 
172 			if (fstat(fd, &sbuf)) {
173 				warn("%s", file);
174 				rval = 1;
175 			} else {
176 				ifmt = sbuf.st_mode & S_IFMT;
177 				if (ifmt == S_IFREG || ifmt == S_IFLNK
178 				    || ifmt == S_IFDIR) {
179 					charct = sbuf.st_size;
180 				} else {
181 					while ((len = read(fd, buf, MAXBSIZE)) > 0)
182 						charct += len;
183 					if (len == -1) {
184 						warn("%s", file);
185 						rval = 1;
186 					}
187 				}
188 			}
189 		}
190 	} else {
191 		/* Do it the hard way... */
192 		gotsp = 1;
193 		while ((len = read(fd, buf, MAXBSIZE)) > 0) {
194 			/*
195 			 * This loses in the presence of multi-byte characters.
196 			 * To do it right would require a function to return a
197 			 * character while knowing how many bytes it consumed.
198 			 */
199 			charct += len;
200 			for (C = buf; len--; ++C) {
201 				if (isspace (*C)) {
202 					gotsp = 1;
203 					if (*C == '\n')
204 						++linect;
205 				} else {
206 					/*
207 					 * This line implements the POSIX
208 					 * spec, i.e. a word is a "maximal
209 					 * string of characters delimited by
210 					 * whitespace."  Notice nothing was
211 					 * said about a character being
212 					 * printing or non-printing.
213 					 */
214 					if (gotsp) {
215 						gotsp = 0;
216 						++wordct;
217 					}
218 				}
219 			}
220 		}
221 		if (len == -1) {
222 			warn("%s", file);
223 			rval = 1;
224 		}
225 	}
226 
227 	print_counts(linect, wordct, charct, file ? file : "");
228 
229 	/*
230 	 * Don't bother checking doline, doword, or dochar -- speeds
231 	 * up the common case
232 	 */
233 	tlinect += linect;
234 	twordct += wordct;
235 	tcharct += charct;
236 
237 	if (close(fd) != 0) {
238 		warn("%s", file);
239 		rval = 1;
240 	}
241 }
242 
243 void
244 format_and_print(long long v)
245 {
246 	if (humanchar) {
247 		char result[FMT_SCALED_STRSIZE];
248 
249 		(void)fmt_scaled(v, result);
250 		(void)printf("%7s", result);
251 	} else {
252 		(void)printf(" %7lld", v);
253 	}
254 }
255 
256 void
257 print_counts(int64_t lines, int64_t words, int64_t chars, char *name)
258 {
259 	if (doline)
260 		format_and_print((long long)lines);
261 	if (doword)
262 		format_and_print((long long)words);
263 	if (dochar)
264 		format_and_print((long long)chars);
265 
266 	(void)printf(" %s\n", name);
267 }
268