1 /* $NetBSD: wc.c,v 1.30 2006/01/04 01:58:05 perry Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\n\ 35 The Regents of the University of California. All rights reserved.\n"); 36 #endif /* not lint */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; 41 #else 42 __RCSID("$NetBSD: wc.c,v 1.30 2006/01/04 01:58:05 perry Exp $"); 43 #endif 44 #endif /* not lint */ 45 46 /* wc line, word and char count */ 47 48 #include <sys/param.h> 49 #include <sys/file.h> 50 #include <sys/stat.h> 51 52 #include <ctype.h> 53 #include <fcntl.h> 54 #include <err.h> 55 #include <errno.h> 56 #include <locale.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <unistd.h> 61 #include <wchar.h> 62 #include <wctype.h> 63 64 #ifdef NO_QUAD 65 typedef u_long wc_count_t; 66 # define WCFMT " %7lu" 67 # define WCCAST unsigned long 68 #else 69 typedef u_quad_t wc_count_t; 70 # define WCFMT " %7llu" 71 # define WCCAST unsigned long long 72 #endif 73 74 static wc_count_t tlinect, twordct, tcharct; 75 static int doline, doword, dobyte, dochar; 76 static int rval = 0; 77 78 static void cnt(char *); 79 static void print_counts(wc_count_t, wc_count_t, wc_count_t, char *); 80 static void usage(void); 81 static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *, 82 size_t *, const char *); 83 int main(int, char *[]); 84 85 int 86 main(int argc, char *argv[]) 87 { 88 int ch; 89 90 setlocale(LC_ALL, ""); 91 92 while ((ch = getopt(argc, argv, "lwcm")) != -1) 93 switch (ch) { 94 case 'l': 95 doline = 1; 96 break; 97 case 'w': 98 doword = 1; 99 break; 100 case 'm': 101 dochar = 1; 102 dobyte = 0; 103 break; 104 case 'c': 105 dochar = 0; 106 dobyte = 1; 107 break; 108 case '?': 109 default: 110 usage(); 111 } 112 argv += optind; 113 argc -= optind; 114 115 /* Wc's flags are on by default. */ 116 if (doline + doword + dobyte + dochar == 0) 117 doline = doword = dobyte = 1; 118 119 if (!*argv) { 120 cnt(NULL); 121 } else { 122 int dototal = (argc > 1); 123 124 do { 125 cnt(*argv); 126 } while(*++argv); 127 128 if (dototal) 129 print_counts(tlinect, twordct, tcharct, "total"); 130 } 131 132 exit(rval); 133 } 134 135 static size_t 136 do_mb(wchar_t *wc, const char *p, size_t mblen, mbstate_t *st, 137 size_t *cnt, const char *file) 138 { 139 size_t r; 140 size_t c = 0; 141 142 do { 143 r = mbrtowc(wc, p, mblen, st); 144 if (r == (size_t)-1) { 145 warnx("%s: invalid byte sequence", file); 146 rval = 1; 147 148 /* XXX skip 1 byte */ 149 mblen--; 150 p++; 151 memset(st, 0, sizeof(*st)); 152 continue; 153 } else if (r == (size_t)-2) 154 break; 155 else if (r == 0) 156 r = 1; 157 c++; 158 if (wc) 159 wc++; 160 mblen -= r; 161 p += r; 162 } while (mblen > 0); 163 164 *cnt = c; 165 166 return (r); 167 } 168 169 static void 170 cnt(char *file) 171 { 172 u_char buf[MAXBSIZE]; 173 wchar_t wbuf[MAXBSIZE]; 174 struct stat sb; 175 wc_count_t charct, linect, wordct; 176 mbstate_t st; 177 u_char *C; 178 wchar_t *WC; 179 char *name; /* filename or <stdin> */ 180 size_t r = 0; 181 int fd, gotsp, len = 0; 182 183 linect = wordct = charct = 0; 184 if (file) { 185 if ((fd = open(file, O_RDONLY, 0)) < 0) { 186 warn("%s", file); 187 rval = 1; 188 return; 189 } 190 name = file; 191 } else { 192 fd = STDIN_FILENO; 193 name = "<stdin>"; 194 } 195 196 if (dochar || doword) 197 memset(&st, 0, sizeof(st)); 198 199 if (!doword) { 200 /* 201 * line counting is split out because it's a lot 202 * faster to get lines than to get words, since 203 * the word count requires some logic. 204 */ 205 if (doline || dochar) { 206 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 207 if (dochar) { 208 size_t wlen; 209 210 r = do_mb(0, (char *)buf, (size_t)len, 211 &st, &wlen, name); 212 charct += wlen; 213 } else if (dobyte) 214 charct += len; 215 if (doline) 216 for (C = buf; len--; ++C) 217 if (*C == '\n') 218 ++linect; 219 } 220 } 221 222 /* 223 * if all we need is the number of characters and 224 * it's a directory or a regular or linked file, just 225 * stat the puppy. We avoid testing for it not being 226 * a special device in case someone adds a new type 227 * of inode. 228 */ 229 else if (dobyte) { 230 if (fstat(fd, &sb)) { 231 warn("%s", name); 232 rval = 1; 233 } else { 234 if (S_ISREG(sb.st_mode) || 235 S_ISLNK(sb.st_mode) || 236 S_ISDIR(sb.st_mode)) { 237 charct = sb.st_size; 238 } else { 239 while ((len = 240 read(fd, buf, MAXBSIZE)) > 0) 241 charct += len; 242 } 243 } 244 } 245 } else { 246 /* do it the hard way... */ 247 gotsp = 1; 248 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 249 size_t wlen; 250 251 r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, 252 name); 253 if (dochar) { 254 charct += wlen; 255 } else if (dobyte) 256 charct += len; 257 for (WC = wbuf; wlen--; ++WC) { 258 if (iswspace(*WC)) { 259 gotsp = 1; 260 if (*WC == L'\n') { 261 ++linect; 262 } 263 } else { 264 /* 265 * This line implements the POSIX 266 * spec, i.e. a word is a "maximal 267 * string of characters delimited by 268 * whitespace." Notice nothing was 269 * said about a character being 270 * printing or non-printing. 271 */ 272 if (gotsp) { 273 gotsp = 0; 274 ++wordct; 275 } 276 } 277 } 278 } 279 } 280 281 if (len == -1) { 282 warn("%s", name); 283 rval = 1; 284 } 285 if (dochar && r == (size_t)-2) { 286 warnx("%s: incomplete multibyte character", name); 287 rval = 1; 288 } 289 290 print_counts(linect, wordct, charct, file); 291 292 /* 293 * don't bother checkint doline, doword, or dobyte --- speeds 294 * up the common case 295 */ 296 tlinect += linect; 297 twordct += wordct; 298 tcharct += charct; 299 300 if (close(fd)) { 301 warn("%s", name); 302 rval = 1; 303 } 304 } 305 306 static void 307 print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars, char *name) 308 { 309 310 if (doline) 311 printf(WCFMT, (WCCAST)lines); 312 if (doword) 313 printf(WCFMT, (WCCAST)words); 314 if (dobyte || dochar) 315 printf(WCFMT, (WCCAST)chars); 316 317 if (name) 318 printf(" %s\n", name); 319 else 320 printf("\n"); 321 } 322 323 static void 324 usage(void) 325 { 326 327 (void)fprintf(stderr, "usage: wc [-c | -m] [-lw] [file ...]\n"); 328 exit(1); 329 } 330