xref: /openbsd-src/usr.bin/wc/wc.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: wc.c,v 1.21 2016/09/16 09:25:23 fcambus Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/param.h>	/* MAXBSIZE */
33 #include <sys/stat.h>
34 #include <sys/file.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <locale.h>
38 #include <ctype.h>
39 #include <err.h>
40 #include <unistd.h>
41 #include <util.h>
42 #include <wchar.h>
43 #include <wctype.h>
44 
45 int64_t	tlinect, twordct, tcharct;
46 int	doline, doword, dochar, humanchar, multibyte;
47 int	rval;
48 extern char *__progname;
49 
50 static void print_counts(int64_t, int64_t, int64_t, char *);
51 static void format_and_print(int64_t);
52 static void cnt(char *);
53 
54 int
55 main(int argc, char *argv[])
56 {
57 	int ch;
58 
59 	setlocale(LC_CTYPE, "");
60 
61 	if (pledge("stdio rpath", NULL) == -1)
62 		err(1, "pledge");
63 
64 	while ((ch = getopt(argc, argv, "lwchm")) != -1)
65 		switch(ch) {
66 		case 'l':
67 			doline = 1;
68 			break;
69 		case 'w':
70 			doword = 1;
71 			break;
72 		case 'm':
73 			if (MB_CUR_MAX > 1)
74 				multibyte = 1;
75 			/* FALLTHROUGH */
76 		case 'c':
77 			dochar = 1;
78 			break;
79 		case 'h':
80 			humanchar = 1;
81 			break;
82 		case '?':
83 		default:
84 			fprintf(stderr,
85 			    "usage: %s [-c | -m] [-hlw] [file ...]\n",
86 			    __progname);
87 			return 1;
88 		}
89 	argv += optind;
90 	argc -= optind;
91 
92 	/*
93 	 * wc is unusual in that its flags are on by default, so,
94 	 * if you don't get any arguments, you have to turn them
95 	 * all on.
96 	 */
97 	if (!doline && !doword && !dochar)
98 		doline = doword = dochar = 1;
99 
100 	if (!*argv) {
101 		cnt(NULL);
102 	} else {
103 		int dototal = (argc > 1);
104 
105 		do {
106 			cnt(*argv);
107 		} while(*++argv);
108 
109 		if (dototal)
110 			print_counts(tlinect, twordct, tcharct, "total");
111 	}
112 
113 	return rval;
114 }
115 
116 static void
117 cnt(char *file)
118 {
119 	static char *buf;
120 	static size_t bufsz;
121 
122 	FILE *stream;
123 	char *C;
124 	wchar_t wc;
125 	short gotsp;
126 	ssize_t len;
127 	int64_t linect, wordct, charct;
128 	struct stat sbuf;
129 	int fd;
130 
131 	linect = wordct = charct = 0;
132 	stream = NULL;
133 	if (file) {
134 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
135 			warn("%s", file);
136 			rval = 1;
137 			return;
138 		}
139 	} else  {
140 		fd = STDIN_FILENO;
141 	}
142 
143 	if (!doword && !multibyte) {
144 		if (bufsz < MAXBSIZE &&
145 		    (buf = realloc(buf, MAXBSIZE)) == NULL)
146 			err(1, NULL);
147 		/*
148 		 * Line counting is split out because it's a lot
149 		 * faster to get lines than to get words, since
150 		 * the word count requires some logic.
151 		 */
152 		if (doline) {
153 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
154 				charct += len;
155 				for (C = buf; len--; ++C)
156 					if (*C == '\n')
157 						++linect;
158 			}
159 			if (len == -1) {
160 				warn("%s", file);
161 				rval = 1;
162 			}
163 		}
164 		/*
165 		 * If all we need is the number of characters and
166 		 * it's a directory or a regular or linked file, just
167 		 * stat the puppy.  We avoid testing for it not being
168 		 * a special device in case someone adds a new type
169 		 * of inode.
170 		 */
171 		else if (dochar) {
172 			mode_t ifmt;
173 
174 			if (fstat(fd, &sbuf)) {
175 				warn("%s", file);
176 				rval = 1;
177 			} else {
178 				ifmt = sbuf.st_mode & S_IFMT;
179 				if (ifmt == S_IFREG || ifmt == S_IFLNK
180 				    || ifmt == S_IFDIR) {
181 					charct = sbuf.st_size;
182 				} else {
183 					while ((len = read(fd, buf, MAXBSIZE)) > 0)
184 						charct += len;
185 					if (len == -1) {
186 						warn("%s", file);
187 						rval = 1;
188 					}
189 				}
190 			}
191 		}
192 	} else {
193 		if (file == NULL)
194 			stream = stdin;
195 		else if ((stream = fdopen(fd, "r")) == NULL) {
196 			warn("%s", file);
197 			close(fd);
198 			rval = 1;
199 			return;
200 		}
201 
202 		/*
203 		 * Do it the hard way.
204 		 * According to POSIX, a word is a "maximal string of
205 		 * characters delimited by whitespace."  Nothing is said
206 		 * about a character being printing or non-printing.
207 		 */
208 		gotsp = 1;
209 		while ((len = getline(&buf, &bufsz, stream)) > 0) {
210 			if (multibyte) {
211 				for (C = buf; *C != '\0'; C += len) {
212 					++charct;
213 					len = mbtowc(&wc, C, MB_CUR_MAX);
214 					if (len == -1) {
215 						mbtowc(NULL, NULL,
216 						    MB_CUR_MAX);
217 						len = 1;
218 						wc = L' ';
219 					}
220 					if (iswspace(wc)) {
221 						gotsp = 1;
222 						if (wc == L'\n')
223 							++linect;
224 					} else if (gotsp) {
225 						gotsp = 0;
226 						++wordct;
227 					}
228 				}
229 			} else {
230 				charct += len;
231 				for (C = buf; *C != '\0'; ++C) {
232 					if (isspace((unsigned char)*C)) {
233 						gotsp = 1;
234 						if (*C == '\n')
235 							++linect;
236 					} else if (gotsp) {
237 						gotsp = 0;
238 						++wordct;
239 					}
240 				}
241 			}
242 		}
243 		if (ferror(stream)) {
244 			warn("%s", file);
245 			rval = 1;
246 		}
247 	}
248 
249 	print_counts(linect, wordct, charct, file);
250 
251 	/*
252 	 * Don't bother checking doline, doword, or dochar -- speeds
253 	 * up the common case
254 	 */
255 	tlinect += linect;
256 	twordct += wordct;
257 	tcharct += charct;
258 
259 	if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
260 		warn("%s", file);
261 		rval = 1;
262 	}
263 }
264 
265 static void
266 format_and_print(int64_t v)
267 {
268 	if (humanchar) {
269 		char result[FMT_SCALED_STRSIZE];
270 
271 		fmt_scaled((long long)v, result);
272 		printf("%7s", result);
273 	} else {
274 		printf(" %7lld", v);
275 	}
276 }
277 
278 static void
279 print_counts(int64_t lines, int64_t words, int64_t chars, char *name)
280 {
281 	if (doline)
282 		format_and_print(lines);
283 	if (doword)
284 		format_and_print(words);
285 	if (dochar)
286 		format_and_print(chars);
287 
288 	if (name)
289 		printf(" %s\n", name);
290 	else
291 		printf("\n");
292 }
293