xref: /netbsd-src/usr.bin/wc/wc.c (revision 61f282557f0bc41c0b762c629a2f4c14be8b7591)
1 /*
2  * Copyright (c) 1980, 1987 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 char copyright[] =
36 "@(#) Copyright (c) 1980, 1987 Regents of the University of California.\n\
37  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 static char sccsid[] = "@(#)wc.c	5.7 (Berkeley) 3/2/91";
42 #endif /* not lint */
43 
44 /* wc line, word and char count */
45 
46 #include <sys/param.h>
47 #include <sys/stat.h>
48 #include <sys/file.h>
49 #include <stdio.h>
50 
51 #define DEL	0177			/* del char */
52 #define NL	012			/* newline char */
53 #define SPACE	040			/* space char */
54 #define TAB	011			/* tab char */
55 
56 static long	tlinect, twordct, tcharct;
57 static int	doline, doword, dochar;
58 
59 main(argc, argv)
60 	int argc;
61 	char **argv;
62 {
63 	extern int optind;
64 	register int ch;
65 	int total;
66 
67 	/*
68 	 * wc is unusual in that its flags are on by default, so,
69 	 * if you don't get any arguments, you have to turn them
70 	 * all on.
71 	 */
72 	if (argc > 1 && argv[1][0] == '-' && argv[1][1]) {
73 		while ((ch = getopt(argc, argv, "lwc")) != EOF)
74 			switch((char)ch) {
75 			case 'l':
76 				doline = 1;
77 				break;
78 			case 'w':
79 				doword = 1;
80 				break;
81 			case 'c':
82 				dochar = 1;
83 				break;
84 			case '?':
85 			default:
86 				fputs("usage: wc [-lwc] [files]\n", stderr);
87 				exit(1);
88 			}
89 		argv += optind;
90 		argc -= optind;
91 	}
92 	else {
93 		++argv;
94 		--argc;
95 		doline = doword = dochar = 1;
96 	}
97 
98 	total = 0;
99 	if (!*argv) {
100 		cnt((char *)NULL);
101 		putchar('\n');
102 	}
103 	else do {
104 		cnt(*argv);
105 		printf(" %s\n", *argv);
106 		++total;
107 	} while(*++argv);
108 
109 	if (total > 1) {
110 		if (doline)
111 			printf(" %7ld", tlinect);
112 		if (doword)
113 			printf(" %7ld", twordct);
114 		if (dochar)
115 			printf(" %7ld", tcharct);
116 		puts(" total");
117 	}
118 	exit(0);
119 }
120 
121 cnt(file)
122 	char *file;
123 {
124 	register u_char *C;
125 	register short gotsp;
126 	register int len;
127 	register long linect, wordct, charct;
128 	struct stat sbuf;
129 	int fd;
130 	u_char buf[MAXBSIZE];
131 
132 	linect = wordct = charct = 0;
133 	if (file) {
134 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
135 			perror(file);
136 			exit(1);
137 		}
138 		if (!doword) {
139 			/*
140 			 * line counting is split out because it's a lot
141 			 * faster to get lines than to get words, since
142 			 * the word count requires some logic.
143 			 */
144 			if (doline) {
145 				while(len = read(fd, buf, MAXBSIZE)) {
146 					if (len == -1) {
147 						perror(file);
148 						exit(1);
149 					}
150 					charct += len;
151 					for (C = buf; len--; ++C)
152 						if (*C == '\n')
153 							++linect;
154 				}
155 				tlinect += linect;
156 				printf(" %7ld", linect);
157 				if (dochar) {
158 					tcharct += charct;
159 					printf(" %7ld", charct);
160 				}
161 				close(fd);
162 				return;
163 			}
164 			/*
165 			 * if all we need is the number of characters and
166 			 * it's a directory or a regular or linked file, just
167 			 * stat the puppy.  We avoid testing for it not being
168 			 * a special device in case someone adds a new type
169 			 * of inode.
170 			 */
171 			if (dochar) {
172 				int ifmt;
173 
174 				if (fstat(fd, &sbuf)) {
175 					perror(file);
176 					exit(1);
177 				}
178 
179 				ifmt = sbuf.st_mode & S_IFMT;
180 				if (ifmt == S_IFREG || ifmt == S_IFLNK
181 					|| ifmt == S_IFDIR) {
182 					printf(" %7ld", sbuf.st_size);
183 					tcharct += sbuf.st_size;
184 					close(fd);
185 					return;
186 				}
187 			}
188 		}
189 	}
190 	else
191 		fd = 0;
192 	/* do it the hard way... */
193 	for (gotsp = 1; len = read(fd, buf, MAXBSIZE);) {
194 		if (len == -1) {
195 			perror(file);
196 			exit(1);
197 		}
198 		charct += len;
199 		for (C = buf; len--; ++C)
200 			switch(*C) {
201 				case NL:
202 					++linect;
203 				case TAB:
204 				case SPACE:
205 					gotsp = 1;
206 					continue;
207 				default:
208 #ifdef notdef
209 					/*
210 					 * This line of code implements the
211 					 * original V7 wc algorithm, i.e.
212 					 * a non-printing character doesn't
213 					 * toggle the "word" count, so that
214 					 * "  ^D^F  " counts as 6 spaces,
215 					 * while "foo^D^Fbar" counts as 8
216 					 * characters.
217 					 *
218 					 * test order is important -- gotsp
219 					 * will normally be NO, so test it
220 					 * first
221 					 */
222 					if (gotsp && *C > SPACE && *C < DEL) {
223 #endif
224 					/*
225 					 * This line implements the manual
226 					 * page, i.e. a word is a "maximal
227 					 * string of characters delimited by
228 					 * spaces, tabs or newlines."  Notice
229 					 * nothing was said about a character
230 					 * being printing or non-printing.
231 					 */
232 					if (gotsp) {
233 						gotsp = 0;
234 						++wordct;
235 					}
236 			}
237 	}
238 	if (doline) {
239 		tlinect += linect;
240 		printf(" %7ld", linect);
241 	}
242 	if (doword) {
243 		twordct += wordct;
244 		printf(" %7ld", wordct);
245 	}
246 	if (dochar) {
247 		tcharct += charct;
248 		printf(" %7ld", charct);
249 	}
250 	close(fd);
251 }
252