xref: /openbsd-src/usr.bin/wc/wc.c (revision 3a3fbb3f2e2521ab7c4a56b7ff7462ebd9095ec5)
1 /*	$OpenBSD: wc.c,v 1.6 2001/11/19 19:02:17 mpech Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1987, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 static char copyright[] =
38 "@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)wc.c	8.2 (Berkeley) 5/2/95";
45 #else
46 static char rcsid[] = "$OpenBSD: wc.c,v 1.6 2001/11/19 19:02:17 mpech Exp $";
47 #endif
48 #endif /* not lint */
49 
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <locale.h>
54 #include <ctype.h>
55 #include <err.h>
56 #include <sys/param.h>
57 #include <sys/stat.h>
58 #include <sys/file.h>
59 #include <unistd.h>
60 
61 int64_t	tlinect, twordct, tcharct;
62 int	doline, doword, dochar;
63 int 	rval;
64 extern char *__progname;
65 
66 void	print_counts __P((int64_t, int64_t, int64_t, char *));
67 void	cnt __P((char *));
68 
69 int
70 main(argc, argv)
71 	int argc;
72 	char *argv[];
73 {
74 	int ch;
75 
76 	setlocale(LC_ALL, "");
77 
78 	while ((ch = getopt(argc, argv, "lwcm")) != -1)
79 		switch((char)ch) {
80 		case 'l':
81 			doline = 1;
82 			break;
83 		case 'w':
84 			doword = 1;
85 			break;
86 		case 'c':
87 		case 'm':
88 			dochar = 1;
89 			break;
90 		case '?':
91 		default:
92 			(void)fprintf(stderr,
93 			    "usage: %s [-c | -m] [-lw] [file ...]\n",
94 			    __progname);
95 			exit(1);
96 		}
97 	argv += optind;
98 	argc -= optind;
99 
100 	/*
101 	 * wc is unusual in that its flags are on by default, so,
102 	 * if you don't get any arguments, you have to turn them
103 	 * all on.
104 	 */
105 	if (!doline && !doword && !dochar)
106 		doline = doword = dochar = 1;
107 
108 	if (!*argv) {
109 		cnt((char *)NULL);
110 	} else {
111 		int dototal = (argc > 1);
112 
113 		do {
114 			cnt(*argv);
115 		} while(*++argv);
116 
117 		if (dototal)
118 			print_counts(tlinect, twordct, tcharct, "total");
119 	}
120 
121 	exit(rval);
122 }
123 
124 void
125 cnt(file)
126 	char *file;
127 {
128 	u_char *C;
129 	short gotsp;
130 	int len;
131 	int64_t linect, wordct, charct;
132 	struct stat sbuf;
133 	int fd;
134 	u_char buf[MAXBSIZE];
135 
136 	linect = wordct = charct = 0;
137 	if (file) {
138 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
139 			warn("%s", file);
140 			rval = 1;
141 			return;
142 		}
143 	} else  {
144 		fd = STDIN_FILENO;
145 	}
146 
147 	if (!doword) {
148 		/*
149 		 * Line counting is split out because it's a lot
150 		 * faster to get lines than to get words, since
151 		 * the word count requires some logic.
152 		 */
153 		if (doline) {
154 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
155 				charct += len;
156 				for (C = buf; len--; ++C)
157 					if (*C == '\n')
158 						++linect;
159 			}
160 			if (len == -1) {
161 				warn("%s", file);
162 				rval = 1;
163 			}
164 		}
165 		/*
166 		 * If all we need is the number of characters and
167 		 * it's a directory or a regular or linked file, just
168 		 * stat the puppy.  We avoid testing for it not being
169 		 * a special device in case someone adds a new type
170 		 * of inode.
171 		 */
172 		else if (dochar) {
173 			mode_t ifmt;
174 
175 			if (fstat(fd, &sbuf)) {
176 				warn("%s", file);
177 				rval = 1;
178 			} else {
179 				ifmt = sbuf.st_mode & S_IFMT;
180 				if (ifmt == S_IFREG || ifmt == S_IFLNK
181 				    || ifmt == S_IFDIR) {
182 					charct = sbuf.st_size;
183 				} else {
184 					while ((len = read(fd, buf, MAXBSIZE)) > 0)
185 						charct += len;
186 					if (len == -1) {
187 						warn("%s", file);
188 						rval = 1;
189 					}
190 				}
191 			}
192 		}
193 	} else {
194 		/* Do it the hard way... */
195 		gotsp = 1;
196 		while ((len = read(fd, buf, MAXBSIZE)) > 0) {
197 			/*
198 			 * This loses in the presence of multi-byte characters.
199 			 * To do it right would require a function to return a
200 			 * character while knowing how many bytes it consumed.
201 			 */
202 			charct += len;
203 			for (C = buf; len--; ++C) {
204 				if (isspace (*C)) {
205 					gotsp = 1;
206 					if (*C == '\n')
207 						++linect;
208 				} else {
209 					/*
210 					 * This line implements the POSIX
211 					 * spec, i.e. a word is a "maximal
212 					 * string of characters delimited by
213 					 * whitespace."  Notice nothing was
214 					 * said about a character being
215 					 * printing or non-printing.
216 					 */
217 					if (gotsp) {
218 						gotsp = 0;
219 						++wordct;
220 					}
221 				}
222 			}
223 		}
224 		if (len == -1) {
225 			warn("%s", file);
226 			rval = 1;
227 		}
228 	}
229 
230 	print_counts(linect, wordct, charct, file ? file : "");
231 
232 	/*
233 	 * Don't bother checking doline, doword, or dochar -- speeds
234          * up the common case
235 	 */
236 	tlinect += linect;
237 	twordct += wordct;
238 	tcharct += charct;
239 
240 	if (close(fd) != 0) {
241 		warn("%s", file);
242 		rval = 1;
243 	}
244 }
245 
246 void
247 print_counts(lines, words, chars, name)
248 	int64_t lines;
249 	int64_t words;
250 	int64_t chars;
251 	char *name;
252 {
253 
254 	if (doline)
255 		(void)printf(" %7lld", (long long)lines);
256 	if (doword)
257 		(void)printf(" %7lld", (long long)words);
258 	if (dochar)
259 		(void)printf(" %7lld", (long long)chars);
260 
261 	(void)printf(" %s\n", name);
262 }
263