xref: /openbsd-src/usr.bin/wc/wc.c (revision 898184e3e61f9129feb5978fad5a8c6865f00b92)
1 /*	$OpenBSD: wc.c,v 1.13 2009/10/27 23:59:49 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <locale.h>
36 #include <ctype.h>
37 #include <err.h>
38 #include <sys/param.h>
39 #include <sys/stat.h>
40 #include <sys/file.h>
41 #include <unistd.h>
42 #include <util.h>
43 
44 int64_t	tlinect, twordct, tcharct;
45 int	doline, doword, dochar, humanchar;
46 int 	rval;
47 extern char *__progname;
48 
49 void	print_counts(int64_t, int64_t, int64_t, char *);
50 void	cnt(char *);
51 
52 int
53 main(int argc, char *argv[])
54 {
55 	int ch;
56 
57 	setlocale(LC_ALL, "");
58 
59 	while ((ch = getopt(argc, argv, "lwchm")) != -1)
60 		switch((char)ch) {
61 		case 'l':
62 			doline = 1;
63 			break;
64 		case 'w':
65 			doword = 1;
66 			break;
67 		case 'c':
68 		case 'm':
69 			dochar = 1;
70 			break;
71 		case 'h':
72 			humanchar = 1;
73 			break;
74 		case '?':
75 		default:
76 			(void)fprintf(stderr,
77 			    "usage: %s [-c | -m] [-hlw] [file ...]\n",
78 			    __progname);
79 			exit(1);
80 		}
81 	argv += optind;
82 	argc -= optind;
83 
84 	/*
85 	 * wc is unusual in that its flags are on by default, so,
86 	 * if you don't get any arguments, you have to turn them
87 	 * all on.
88 	 */
89 	if (!doline && !doword && !dochar)
90 		doline = doword = dochar = 1;
91 
92 	if (!*argv) {
93 		cnt((char *)NULL);
94 	} else {
95 		int dototal = (argc > 1);
96 
97 		do {
98 			cnt(*argv);
99 		} while(*++argv);
100 
101 		if (dototal)
102 			print_counts(tlinect, twordct, tcharct, "total");
103 	}
104 
105 	exit(rval);
106 }
107 
108 void
109 cnt(char *file)
110 {
111 	u_char *C;
112 	short gotsp;
113 	int len;
114 	int64_t linect, wordct, charct;
115 	struct stat sbuf;
116 	int fd;
117 	u_char buf[MAXBSIZE];
118 
119 	linect = wordct = charct = 0;
120 	if (file) {
121 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
122 			warn("%s", file);
123 			rval = 1;
124 			return;
125 		}
126 	} else  {
127 		fd = STDIN_FILENO;
128 	}
129 
130 	if (!doword) {
131 		/*
132 		 * Line counting is split out because it's a lot
133 		 * faster to get lines than to get words, since
134 		 * the word count requires some logic.
135 		 */
136 		if (doline) {
137 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
138 				charct += len;
139 				for (C = buf; len--; ++C)
140 					if (*C == '\n')
141 						++linect;
142 			}
143 			if (len == -1) {
144 				warn("%s", file);
145 				rval = 1;
146 			}
147 		}
148 		/*
149 		 * If all we need is the number of characters and
150 		 * it's a directory or a regular or linked file, just
151 		 * stat the puppy.  We avoid testing for it not being
152 		 * a special device in case someone adds a new type
153 		 * of inode.
154 		 */
155 		else if (dochar) {
156 			mode_t ifmt;
157 
158 			if (fstat(fd, &sbuf)) {
159 				warn("%s", file);
160 				rval = 1;
161 			} else {
162 				ifmt = sbuf.st_mode & S_IFMT;
163 				if (ifmt == S_IFREG || ifmt == S_IFLNK
164 				    || ifmt == S_IFDIR) {
165 					charct = sbuf.st_size;
166 				} else {
167 					while ((len = read(fd, buf, MAXBSIZE)) > 0)
168 						charct += len;
169 					if (len == -1) {
170 						warn("%s", file);
171 						rval = 1;
172 					}
173 				}
174 			}
175 		}
176 	} else {
177 		/* Do it the hard way... */
178 		gotsp = 1;
179 		while ((len = read(fd, buf, MAXBSIZE)) > 0) {
180 			/*
181 			 * This loses in the presence of multi-byte characters.
182 			 * To do it right would require a function to return a
183 			 * character while knowing how many bytes it consumed.
184 			 */
185 			charct += len;
186 			for (C = buf; len--; ++C) {
187 				if (isspace (*C)) {
188 					gotsp = 1;
189 					if (*C == '\n')
190 						++linect;
191 				} else {
192 					/*
193 					 * This line implements the POSIX
194 					 * spec, i.e. a word is a "maximal
195 					 * string of characters delimited by
196 					 * whitespace."  Notice nothing was
197 					 * said about a character being
198 					 * printing or non-printing.
199 					 */
200 					if (gotsp) {
201 						gotsp = 0;
202 						++wordct;
203 					}
204 				}
205 			}
206 		}
207 		if (len == -1) {
208 			warn("%s", file);
209 			rval = 1;
210 		}
211 	}
212 
213 	print_counts(linect, wordct, charct, file);
214 
215 	/*
216 	 * Don't bother checking doline, doword, or dochar -- speeds
217 	 * up the common case
218 	 */
219 	tlinect += linect;
220 	twordct += wordct;
221 	tcharct += charct;
222 
223 	if (close(fd) != 0) {
224 		warn("%s", file);
225 		rval = 1;
226 	}
227 }
228 
229 void
230 format_and_print(long long v)
231 {
232 	if (humanchar) {
233 		char result[FMT_SCALED_STRSIZE];
234 
235 		(void)fmt_scaled(v, result);
236 		(void)printf("%7s", result);
237 	} else {
238 		(void)printf(" %7lld", v);
239 	}
240 }
241 
242 void
243 print_counts(int64_t lines, int64_t words, int64_t chars, char *name)
244 {
245 	if (doline)
246 		format_and_print((long long)lines);
247 	if (doword)
248 		format_and_print((long long)words);
249 	if (dochar)
250 		format_and_print((long long)chars);
251 
252 	if (name)
253 		(void)printf(" %s\n", name);
254 	else
255 		(void)printf("\n");
256 }
257