1 /* $NetBSD: wc.c,v 1.34 2010/02/19 11:15:23 tron Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\ 35 The Regents of the University of California. All rights reserved."); 36 #endif /* not lint */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; 41 #else 42 __RCSID("$NetBSD: wc.c,v 1.34 2010/02/19 11:15:23 tron Exp $"); 43 #endif 44 #endif /* not lint */ 45 46 /* wc line, word, char count and optionally longest line. */ 47 48 #include <sys/param.h> 49 #include <sys/file.h> 50 #include <sys/stat.h> 51 52 #include <ctype.h> 53 #include <fcntl.h> 54 #include <err.h> 55 #include <errno.h> 56 #include <locale.h> 57 #include <stdbool.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <unistd.h> 62 #include <wchar.h> 63 #include <wctype.h> 64 65 #ifdef NO_QUAD 66 typedef u_long wc_count_t; 67 # define WCFMT " %7lu" 68 # define WCCAST unsigned long 69 #else 70 typedef u_quad_t wc_count_t; 71 # define WCFMT " %7llu" 72 # define WCCAST unsigned long long 73 #endif 74 75 static wc_count_t tlinect, twordct, tcharct, tlongest; 76 static bool doline, doword, dobyte, dochar, dolongest; 77 static int rval = 0; 78 79 static void cnt(const char *); 80 static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t, 81 const char *); 82 static void usage(void); 83 static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *, 84 size_t *, const char *); 85 int main(int, char *[]); 86 87 int 88 main(int argc, char *argv[]) 89 { 90 int ch; 91 92 setlocale(LC_ALL, ""); 93 94 while ((ch = getopt(argc, argv, "lwcmL")) != -1) 95 switch (ch) { 96 case 'l': 97 doline = true; 98 break; 99 case 'w': 100 doword = true; 101 break; 102 case 'm': 103 dochar = true; 104 dobyte = 0; 105 break; 106 case 'c': 107 dochar = 0; 108 dobyte = true; 109 break; 110 case 'L': 111 dolongest = true; 112 break; 113 case '?': 114 default: 115 usage(); 116 } 117 argv += optind; 118 argc -= optind; 119 120 /* Wc's flags are on by default. */ 121 if (!(doline || doword || dobyte || dochar || dolongest)) 122 doline = doword = dobyte = true; 123 124 if (*argv == NULL) { 125 cnt(NULL); 126 } else { 127 bool dototal = (argc > 1); 128 129 do { 130 cnt(*argv); 131 } while(*++argv); 132 133 if (dototal) { 134 print_counts(tlinect, twordct, tcharct, tlongest, 135 "total"); 136 } 137 } 138 139 exit(rval); 140 } 141 142 static size_t 143 do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st, 144 size_t *retcnt, const char *file) 145 { 146 size_t r; 147 size_t c = 0; 148 149 do { 150 r = mbrtowc(wc, p, len, st); 151 if (r == (size_t)-1) { 152 warnx("%s: invalid byte sequence", file); 153 rval = 1; 154 155 /* XXX skip 1 byte */ 156 len--; 157 p++; 158 memset(st, 0, sizeof(*st)); 159 continue; 160 } else if (r == (size_t)-2) 161 break; 162 else if (r == 0) 163 r = 1; 164 c++; 165 if (wc) 166 wc++; 167 len -= r; 168 p += r; 169 } while (len > 0); 170 171 *retcnt = c; 172 173 return (r); 174 } 175 176 static void 177 cnt(const char *file) 178 { 179 u_char buf[MAXBSIZE]; 180 wchar_t wbuf[MAXBSIZE]; 181 struct stat sb; 182 wc_count_t charct, linect, wordct, longest; 183 mbstate_t st; 184 u_char *C; 185 wchar_t *WC; 186 const char *name; /* filename or <stdin> */ 187 size_t r = 0; 188 int fd, len = 0; 189 190 linect = wordct = charct = longest = 0; 191 if (file != NULL) { 192 if ((fd = open(file, O_RDONLY, 0)) < 0) { 193 warn("%s", file); 194 rval = 1; 195 return; 196 } 197 name = file; 198 } else { 199 fd = STDIN_FILENO; 200 name = "<stdin>"; 201 } 202 203 if (dochar || doword || dolongest) 204 (void)memset(&st, 0, sizeof(st)); 205 206 if (!(doword || dolongest)) { 207 /* 208 * line counting is split out because it's a lot 209 * faster to get lines than to get words, since 210 * the word count requires some logic. 211 */ 212 if (doline || dochar) { 213 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 214 if (dochar) { 215 size_t wlen; 216 217 r = do_mb(0, (char *)buf, (size_t)len, 218 &st, &wlen, name); 219 charct += wlen; 220 } else if (dobyte) 221 charct += len; 222 if (doline) { 223 for (C = buf; len--; ++C) { 224 if (*C == '\n') 225 ++linect; 226 } 227 } 228 } 229 } 230 231 /* 232 * if all we need is the number of characters and 233 * it's a directory or a regular or linked file, just 234 * stat the puppy. We avoid testing for it not being 235 * a special device in case someone adds a new type 236 * of inode. 237 */ 238 else if (dobyte) { 239 if (fstat(fd, &sb)) { 240 warn("%s", name); 241 rval = 1; 242 } else { 243 if (S_ISREG(sb.st_mode) || 244 S_ISLNK(sb.st_mode) || 245 S_ISDIR(sb.st_mode)) { 246 charct = sb.st_size; 247 } else { 248 while ((len = 249 read(fd, buf, MAXBSIZE)) > 0) 250 charct += len; 251 } 252 } 253 } 254 } else { 255 /* do it the hard way... */ 256 wc_count_t linelen; 257 bool gotsp; 258 259 linelen = 0; 260 gotsp = true; 261 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 262 size_t wlen; 263 264 r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, 265 name); 266 if (dochar) { 267 charct += wlen; 268 } else if (dobyte) { 269 charct += len; 270 } 271 for (WC = wbuf; wlen--; ++WC) { 272 if (iswspace(*WC)) { 273 gotsp = true; 274 if (*WC == L'\n') { 275 ++linect; 276 if (linelen > longest) 277 longest = linelen; 278 linelen = 0; 279 } else { 280 linelen++; 281 } 282 } else { 283 /* 284 * This line implements the POSIX 285 * spec, i.e. a word is a "maximal 286 * string of characters delimited by 287 * whitespace." Notice nothing was 288 * said about a character being 289 * printing or non-printing. 290 */ 291 if (gotsp) { 292 gotsp = false; 293 ++wordct; 294 } 295 296 linelen++; 297 } 298 } 299 } 300 } 301 302 if (len == -1) { 303 warn("%s", name); 304 rval = 1; 305 } 306 if (dochar && r == (size_t)-2) { 307 warnx("%s: incomplete multibyte character", name); 308 rval = 1; 309 } 310 311 print_counts(linect, wordct, charct, longest, file); 312 313 /* 314 * don't bother checkint doline, doword, or dobyte --- speeds 315 * up the common case 316 */ 317 tlinect += linect; 318 twordct += wordct; 319 tcharct += charct; 320 if (dolongest && longest > tlongest) 321 tlongest = longest; 322 323 if (close(fd)) { 324 warn("%s", name); 325 rval = 1; 326 } 327 } 328 329 static void 330 print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars, 331 wc_count_t longest, const char *name) 332 { 333 334 if (doline) 335 (void)printf(WCFMT, (WCCAST)lines); 336 if (doword) 337 (void)printf(WCFMT, (WCCAST)words); 338 if (dobyte || dochar) 339 (void)printf(WCFMT, (WCCAST)chars); 340 if (dolongest) 341 (void)printf(WCFMT, (WCCAST)longest); 342 343 if (name != NULL) 344 (void)printf(" %s\n", name); 345 else 346 (void)putchar('\n'); 347 } 348 349 static void 350 usage(void) 351 { 352 353 (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n"); 354 exit(1); 355 } 356