xref: /openbsd-src/usr.bin/wc/wc.c (revision ef70a379631ec7e97481e1d0e500e21496e6c4aa)
1 /*	$OpenBSD: wc.c,v 1.28 2021/11/16 23:34:24 cheloha Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/param.h>	/* MAXBSIZE */
33 #include <sys/stat.h>
34 
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <locale.h>
39 #include <ctype.h>
40 #include <err.h>
41 #include <unistd.h>
42 #include <util.h>
43 #include <wchar.h>
44 #include <wctype.h>
45 
46 int64_t	tlinect, twordct, tcharct;
47 int	doline, doword, dochar, humanchar, multibyte;
48 int	rval;
49 extern char *__progname;
50 
51 static void print_counts(int64_t, int64_t, int64_t, const char *);
52 static void format_and_print(int64_t);
53 static void cnt(const char *);
54 
55 int
56 main(int argc, char *argv[])
57 {
58 	int ch;
59 
60 	setlocale(LC_CTYPE, "");
61 
62 	if (pledge("stdio rpath", NULL) == -1)
63 		err(1, "pledge");
64 
65 	while ((ch = getopt(argc, argv, "lwchm")) != -1)
66 		switch(ch) {
67 		case 'l':
68 			doline = 1;
69 			break;
70 		case 'w':
71 			doword = 1;
72 			break;
73 		case 'm':
74 			if (MB_CUR_MAX > 1)
75 				multibyte = 1;
76 			/* FALLTHROUGH */
77 		case 'c':
78 			dochar = 1;
79 			break;
80 		case 'h':
81 			humanchar = 1;
82 			break;
83 		case '?':
84 		default:
85 			fprintf(stderr,
86 			    "usage: %s [-c | -m] [-hlw] [file ...]\n",
87 			    __progname);
88 			return 1;
89 		}
90 	argv += optind;
91 	argc -= optind;
92 
93 	/*
94 	 * wc is unusual in that its flags are on by default, so,
95 	 * if you don't get any arguments, you have to turn them
96 	 * all on.
97 	 */
98 	if (!doline && !doword && !dochar)
99 		doline = doword = dochar = 1;
100 
101 	if (!*argv) {
102 		cnt(NULL);
103 	} else {
104 		int dototal = (argc > 1);
105 
106 		do {
107 			cnt(*argv);
108 		} while(*++argv);
109 
110 		if (dototal)
111 			print_counts(tlinect, twordct, tcharct, "total");
112 	}
113 
114 	return rval;
115 }
116 
117 static void
118 cnt(const char *path)
119 {
120 	static char *buf;
121 	static size_t bufsz;
122 
123 	FILE *stream;
124 	const char *file;
125 	char *C;
126 	wchar_t wc;
127 	short gotsp;
128 	ssize_t len;
129 	int64_t linect, wordct, charct;
130 	struct stat sbuf;
131 	int fd;
132 
133 	linect = wordct = charct = 0;
134 	stream = NULL;
135 	if (path != NULL) {
136 		file = path;
137 		if ((fd = open(file, O_RDONLY)) == -1) {
138 			warn("%s", file);
139 			rval = 1;
140 			return;
141 		}
142 	} else  {
143 		file = "(stdin)";
144 		fd = STDIN_FILENO;
145 	}
146 
147 	if (!doword && !multibyte) {
148 		if (bufsz < MAXBSIZE &&
149 		    (buf = realloc(buf, MAXBSIZE)) == NULL)
150 			err(1, NULL);
151 		/*
152 		 * Line counting is split out because it's a lot
153 		 * faster to get lines than to get words, since
154 		 * the word count requires some logic.
155 		 */
156 		if (doline) {
157 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
158 				charct += len;
159 				for (C = buf; len--; ++C)
160 					if (*C == '\n')
161 						++linect;
162 			}
163 			if (len == -1) {
164 				warn("%s", file);
165 				rval = 1;
166 			}
167 		}
168 		/*
169 		 * If all we need is the number of characters and
170 		 * it's a directory or a regular or linked file, just
171 		 * stat the puppy.  We avoid testing for it not being
172 		 * a special device in case someone adds a new type
173 		 * of inode.
174 		 */
175 		else if (dochar) {
176 			mode_t ifmt;
177 
178 			if (fstat(fd, &sbuf)) {
179 				warn("%s", file);
180 				rval = 1;
181 			} else {
182 				ifmt = sbuf.st_mode & S_IFMT;
183 				if (ifmt == S_IFREG || ifmt == S_IFLNK
184 				    || ifmt == S_IFDIR) {
185 					charct = sbuf.st_size;
186 				} else {
187 					while ((len = read(fd, buf, MAXBSIZE)) > 0)
188 						charct += len;
189 					if (len == -1) {
190 						warn("%s", file);
191 						rval = 1;
192 					}
193 				}
194 			}
195 		}
196 	} else {
197 		if (path == NULL)
198 			stream = stdin;
199 		else if ((stream = fdopen(fd, "r")) == NULL) {
200 			warn("%s", file);
201 			close(fd);
202 			rval = 1;
203 			return;
204 		}
205 
206 		/*
207 		 * Do it the hard way.
208 		 * According to POSIX, a word is a "maximal string of
209 		 * characters delimited by whitespace."  Nothing is said
210 		 * about a character being printing or non-printing.
211 		 */
212 		gotsp = 1;
213 		while ((len = getline(&buf, &bufsz, stream)) > 0) {
214 			if (multibyte) {
215 				const char *end = buf + len;
216 				for (C = buf; C < end; C += len) {
217 					++charct;
218 					len = mbtowc(&wc, C, MB_CUR_MAX);
219 					if (len == -1) {
220 						mbtowc(NULL, NULL,
221 						    MB_CUR_MAX);
222 						len = 1;
223 						wc = L'?';
224 					} else if (len == 0)
225 						len = 1;
226 					if (iswspace(wc)) {
227 						gotsp = 1;
228 						if (wc == L'\n')
229 							++linect;
230 					} else if (gotsp) {
231 						gotsp = 0;
232 						++wordct;
233 					}
234 				}
235 			} else {
236 				charct += len;
237 				for (C = buf; len--; ++C) {
238 					if (isspace((unsigned char)*C)) {
239 						gotsp = 1;
240 						if (*C == '\n')
241 							++linect;
242 					} else if (gotsp) {
243 						gotsp = 0;
244 						++wordct;
245 					}
246 				}
247 			}
248 		}
249 		if (ferror(stream)) {
250 			warn("%s", file);
251 			rval = 1;
252 		}
253 	}
254 
255 	print_counts(linect, wordct, charct, path);
256 
257 	/*
258 	 * Don't bother checking doline, doword, or dochar -- speeds
259 	 * up the common case
260 	 */
261 	tlinect += linect;
262 	twordct += wordct;
263 	tcharct += charct;
264 
265 	if ((stream == NULL ? close(fd) : fclose(stream)) != 0) {
266 		warn("%s", file);
267 		rval = 1;
268 	}
269 }
270 
271 static void
272 format_and_print(int64_t v)
273 {
274 	if (humanchar) {
275 		char result[FMT_SCALED_STRSIZE];
276 
277 		fmt_scaled((long long)v, result);
278 		printf("%7s", result);
279 	} else {
280 		printf(" %7lld", v);
281 	}
282 }
283 
284 static void
285 print_counts(int64_t lines, int64_t words, int64_t chars, const char *name)
286 {
287 	if (doline)
288 		format_and_print(lines);
289 	if (doword)
290 		format_and_print(words);
291 	if (dochar)
292 		format_and_print(chars);
293 
294 	if (name)
295 		printf(" %s\n", name);
296 	else
297 		printf("\n");
298 }
299