1 /* $NetBSD: wc.c,v 1.37 2024/01/14 17:39:19 christos Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1987, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
35 The Regents of the University of California. All rights reserved.");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95";
41 #else
42 __RCSID("$NetBSD: wc.c,v 1.37 2024/01/14 17:39:19 christos Exp $");
43 #endif
44 #endif /* not lint */
45
46 /* wc line, word, char count and optionally longest line. */
47
48 #include <sys/param.h>
49 #include <sys/file.h>
50 #include <sys/stat.h>
51
52 #include <ctype.h>
53 #include <fcntl.h>
54 #include <err.h>
55 #include <errno.h>
56 #include <locale.h>
57 #include <stdbool.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62 #include <wchar.h>
63 #include <wctype.h>
64
65 #ifdef NO_QUAD
66 typedef u_long wc_count_t;
67 # define WCFMT " %7lu"
68 # define WCCAST unsigned long
69 #else
70 typedef u_quad_t wc_count_t;
71 # define WCFMT " %7llu"
72 # define WCCAST unsigned long long
73 #endif
74
75 static wc_count_t tlinect, twordct, tcharct, tlongest;
76 static bool doline, doword, dobyte, dochar, dolongest;
77 static int rval = 0;
78
79 static void cnt(const char *);
80 static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t,
81 const char *);
82 __dead static void usage(void);
83 static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *,
84 size_t *, const char *);
85
86 int
main(int argc,char * argv[])87 main(int argc, char *argv[])
88 {
89 int ch;
90
91 setlocale(LC_ALL, "");
92
93 while ((ch = getopt(argc, argv, "lwcmL")) != -1)
94 switch (ch) {
95 case 'l':
96 doline = true;
97 break;
98 case 'w':
99 doword = true;
100 break;
101 case 'm':
102 dochar = true;
103 dobyte = 0;
104 break;
105 case 'c':
106 dochar = 0;
107 dobyte = true;
108 break;
109 case 'L':
110 dolongest = true;
111 break;
112 case '?':
113 default:
114 usage();
115 }
116 argv += optind;
117 argc -= optind;
118
119 /* Wc's flags are on by default. */
120 if (!(doline || doword || dobyte || dochar || dolongest))
121 doline = doword = dobyte = true;
122
123 if (*argv == NULL) {
124 cnt(NULL);
125 } else {
126 bool dototal = (argc > 1);
127
128 do {
129 cnt(*argv);
130 } while(*++argv);
131
132 if (dototal) {
133 print_counts(tlinect, twordct, tcharct, tlongest,
134 "total");
135 }
136 }
137
138 exit(rval);
139 }
140
141 static size_t
do_mb(wchar_t * wc,const char * p,size_t len,mbstate_t * st,size_t * retcnt,const char * file)142 do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st,
143 size_t *retcnt, const char *file)
144 {
145 size_t r;
146 size_t c = 0;
147
148 do {
149 r = mbrtowc(wc, p, len, st);
150 if (r == (size_t)-1) {
151 warnx("%s: invalid byte sequence", file);
152 rval = 1;
153
154 /* XXX skip 1 byte */
155 len--;
156 p++;
157 memset(st, 0, sizeof(*st));
158 continue;
159 } else if (r == (size_t)-2)
160 break;
161 else if (r == 0)
162 r = 1;
163 c++;
164 if (wc)
165 wc++;
166 len -= r;
167 p += r;
168 } while (len > 0);
169
170 *retcnt = c;
171
172 return (r);
173 }
174
175 static void
cnt(const char * file)176 cnt(const char *file)
177 {
178 u_char buf[MAXBSIZE];
179 wchar_t wbuf[MAXBSIZE];
180 struct stat sb;
181 wc_count_t charct, linect, wordct, longest;
182 mbstate_t st;
183 u_char *C;
184 wchar_t *WC;
185 const char *name; /* filename or <stdin> */
186 size_t r = 0;
187 int fd, len = 0;
188
189 linect = wordct = charct = longest = 0;
190 if (file != NULL) {
191 if ((fd = open(file, O_RDONLY, 0)) < 0) {
192 warn("%s", file);
193 rval = 1;
194 return;
195 }
196 name = file;
197 } else {
198 fd = STDIN_FILENO;
199 name = "<stdin>";
200 }
201
202 if (dochar || doword || dolongest)
203 (void)memset(&st, 0, sizeof(st));
204
205 if (!(doword || dolongest)) {
206 /*
207 * line counting is split out because it's a lot
208 * faster to get lines than to get words, since
209 * the word count requires some logic.
210 */
211 if (doline || dochar) {
212 while ((len = read(fd, buf, MAXBSIZE)) > 0) {
213 if (dochar) {
214 size_t wlen;
215
216 r = do_mb(0, (char *)buf, (size_t)len,
217 &st, &wlen, name);
218 charct += wlen;
219 } else if (dobyte)
220 charct += len;
221 if (doline) {
222 for (C = buf; len--; ++C) {
223 if (*C == '\n')
224 ++linect;
225 }
226 }
227 }
228 }
229
230 /*
231 * if all we need is the number of characters and
232 * it's a directory or a regular or linked file, just
233 * stat the puppy. We avoid testing for it not being
234 * a special device in case someone adds a new type
235 * of inode.
236 */
237 else if (dobyte) {
238 if (fstat(fd, &sb)) {
239 warn("%s", name);
240 rval = 1;
241 } else {
242 if (sb.st_size != 0 &&
243 (S_ISREG(sb.st_mode) ||
244 S_ISLNK(sb.st_mode) ||
245 S_ISDIR(sb.st_mode))) {
246 charct = sb.st_size;
247 } else {
248 while ((len =
249 read(fd, buf, MAXBSIZE)) > 0)
250 charct += len;
251 }
252 }
253 }
254 } else {
255 /* do it the hard way... */
256 wc_count_t linelen;
257 bool gotsp;
258
259 linelen = 0;
260 gotsp = true;
261 while ((len = read(fd, buf, MAXBSIZE)) > 0) {
262 size_t wlen;
263
264 r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen,
265 name);
266 if (dochar) {
267 charct += wlen;
268 } else if (dobyte) {
269 charct += len;
270 }
271 for (WC = wbuf; wlen--; ++WC) {
272 if (iswspace(*WC)) {
273 gotsp = true;
274 if (*WC == L'\n') {
275 ++linect;
276 if (linelen > longest)
277 longest = linelen;
278 linelen = 0;
279 } else {
280 linelen++;
281 }
282 } else {
283 /*
284 * This line implements the POSIX
285 * spec, i.e. a word is a "maximal
286 * string of characters delimited by
287 * whitespace." Notice nothing was
288 * said about a character being
289 * printing or non-printing.
290 */
291 if (gotsp) {
292 gotsp = false;
293 ++wordct;
294 }
295
296 linelen++;
297 }
298 }
299 }
300 }
301
302 if (len == -1) {
303 warn("%s", name);
304 rval = 1;
305 }
306 if (dochar && r == (size_t)-2) {
307 warnx("%s: incomplete multibyte character", name);
308 rval = 1;
309 }
310
311 print_counts(linect, wordct, charct, longest, file);
312
313 /*
314 * don't bother checkint doline, doword, or dobyte --- speeds
315 * up the common case
316 */
317 tlinect += linect;
318 twordct += wordct;
319 tcharct += charct;
320 if (dolongest && longest > tlongest)
321 tlongest = longest;
322
323 if (close(fd)) {
324 warn("%s", name);
325 rval = 1;
326 }
327 }
328
329 static void
print_counts(wc_count_t lines,wc_count_t words,wc_count_t chars,wc_count_t longest,const char * name)330 print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars,
331 wc_count_t longest, const char *name)
332 {
333
334 if (doline)
335 (void)printf(WCFMT, (WCCAST)lines);
336 if (doword)
337 (void)printf(WCFMT, (WCCAST)words);
338 if (dobyte || dochar)
339 (void)printf(WCFMT, (WCCAST)chars);
340 if (dolongest)
341 (void)printf(WCFMT, (WCCAST)longest);
342
343 if (name != NULL)
344 (void)printf(" %s\n", name);
345 else
346 (void)putchar('\n');
347 }
348
349 static void
usage(void)350 usage(void)
351 {
352
353 (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n");
354 exit(1);
355 }
356