1 /* $NetBSD: wc.c,v 1.21 2001/10/19 06:09:56 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\n\ 39 The Regents of the University of California. All rights reserved.\n"); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; 45 #else 46 __RCSID("$NetBSD: wc.c,v 1.21 2001/10/19 06:09:56 yamt Exp $"); 47 #endif 48 #endif /* not lint */ 49 50 /* wc line, word and char count */ 51 52 #include <sys/param.h> 53 #include <sys/stat.h> 54 #include <sys/types.h> 55 56 #include <fcntl.h> 57 #include <unistd.h> 58 #include <errno.h> 59 #include <stdio.h> 60 61 #include <stdlib.h> 62 #include <string.h> 63 #include <locale.h> 64 #include <ctype.h> 65 #include <errno.h> 66 #include <sys/param.h> 67 #include <sys/stat.h> 68 #include <sys/file.h> 69 #include <unistd.h> 70 #include <err.h> 71 #include <wchar.h> 72 #include <wctype.h> 73 74 #ifdef NO_QUAD 75 typedef u_long wc_count_t; 76 # define WCFMT " %7lu" 77 # define WCCAST unsigned long 78 #else 79 typedef u_quad_t wc_count_t; 80 # define WCFMT " %7llu" 81 # define WCCAST unsigned long long 82 #endif 83 84 static wc_count_t tlinect, twordct, tcharct; 85 static int doline, doword, dobyte, dochar; 86 static int rval = 0; 87 88 static void cnt __P((char *)); 89 static void print_counts __P((wc_count_t, wc_count_t, wc_count_t, char *)); 90 static void usage __P((void)); 91 static size_t do_mb __P((wchar_t *, const char *, size_t, mbstate_t *, 92 size_t *, const char *)); 93 int main __P((int, char *[])); 94 95 int 96 main(argc, argv) 97 int argc; 98 char *argv[]; 99 { 100 int ch; 101 102 setlocale(LC_ALL, ""); 103 104 while ((ch = getopt(argc, argv, "lwcm")) != -1) 105 switch((char)ch) { 106 case 'l': 107 doline = 1; 108 break; 109 case 'w': 110 doword = 1; 111 break; 112 case 'm': 113 dochar = 1; 114 dobyte = 0; 115 break; 116 case 'c': 117 dochar = 0; 118 dobyte = 1; 119 break; 120 case '?': 121 default: 122 usage(); 123 } 124 argv += optind; 125 argc -= optind; 126 127 /* Wc's flags are on by default. */ 128 if (doline + doword + dobyte + dochar == 0) 129 doline = doword = dobyte = 1; 130 131 if (!*argv) { 132 cnt(NULL); 133 } else { 134 int dototal = (argc > 1); 135 136 do { 137 cnt(*argv); 138 } while(*++argv); 139 140 if (dototal) 141 print_counts(tlinect, twordct, tcharct, "total"); 142 } 143 144 exit(rval); 145 } 146 147 static size_t 148 do_mb(wc, p, mblen, st, cnt, file) 149 wchar_t *wc; 150 const char *p; 151 size_t mblen; 152 mbstate_t *st; 153 size_t *cnt; 154 const char *file; 155 { 156 size_t r; 157 size_t c = 0; 158 159 do { 160 r = mbrtowc(wc, p, mblen, st); 161 if (r == (size_t)-1) { 162 warnx("%s: invalid byte sequence", file); 163 rval = 1; 164 165 /* XXX skip 1 byte */ 166 mblen --; 167 p ++; 168 memset(st, 0, sizeof(*st)); 169 } 170 else if (r == (size_t)-2) 171 break; 172 else if (r == 0) 173 r = 1; 174 c ++; 175 if (wc) 176 wc ++; 177 mblen -= r; 178 p += r; 179 } while (mblen > 0); 180 181 *cnt = c; 182 183 return r; 184 } 185 186 static void 187 cnt(file) 188 char *file; 189 { 190 u_char *C; 191 wchar_t *WC; 192 short gotsp; 193 int len = 0; 194 wc_count_t linect, wordct, charct; 195 struct stat sb; 196 int fd; 197 u_char buf[MAXBSIZE]; 198 wchar_t wbuf[MAXBSIZE]; 199 size_t r = 0; 200 mbstate_t st; 201 202 linect = wordct = charct = 0; 203 if (file) { 204 if ((fd = open(file, O_RDONLY, 0)) < 0) { 205 warn("%s", file); 206 rval = 1; 207 return; 208 } 209 } else { 210 fd = STDIN_FILENO; 211 } 212 213 if (dochar || doword) 214 memset(&st, 0, sizeof(st)); 215 216 if (!doword) { 217 /* 218 * line counting is split out because it's a lot 219 * faster to get lines than to get words, since 220 * the word count requires some logic. 221 */ 222 if (doline || dochar) { 223 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 224 if (dochar) { 225 size_t wlen; 226 227 r = do_mb(0, (char *)buf, (size_t)len, &st, &wlen, file); 228 charct += wlen; 229 } 230 else if (dobyte) 231 charct += len; 232 if (doline) 233 for (C = buf; len--; ++C) 234 if (*C == '\n') 235 ++linect; 236 } 237 } 238 239 /* 240 * if all we need is the number of characters and 241 * it's a directory or a regular or linked file, just 242 * stat the puppy. We avoid testing for it not being 243 * a special device in case someone adds a new type 244 * of inode. 245 */ 246 else if (dobyte) { 247 if (fstat(fd, &sb)) { 248 warn("%s", file); 249 rval = 1; 250 } else { 251 if (S_ISREG(sb.st_mode) || 252 S_ISLNK(sb.st_mode) || 253 S_ISDIR(sb.st_mode)) { 254 charct = sb.st_size; 255 } else { 256 while ((len = read(fd, buf, MAXBSIZE)) > 0) 257 charct += len; 258 } 259 } 260 } 261 } 262 else 263 { 264 /* do it the hard way... */ 265 gotsp = 1; 266 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 267 size_t wlen; 268 269 r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, file); 270 if (dochar) { 271 charct += wlen; 272 } 273 else if (dobyte) 274 charct += len; 275 for (WC = wbuf; wlen--; ++WC) { 276 if (iswspace(*WC)) { 277 gotsp = 1; 278 if (*WC == L'\n') { 279 ++linect; 280 } 281 } else { 282 /* 283 * This line implements the POSIX 284 * spec, i.e. a word is a "maximal 285 * string of characters delimited by 286 * whitespace." Notice nothing was 287 * said about a character being 288 * printing or non-printing. 289 */ 290 if (gotsp) { 291 gotsp = 0; 292 ++wordct; 293 } 294 } 295 } 296 } 297 } 298 299 if (len == -1) { 300 warn ("%s", file); 301 rval = 1; 302 } 303 if (dochar && r == (size_t)-2) { 304 warnx ("%s: incomplete multibyte character", file); 305 rval = 1; 306 } 307 308 print_counts(linect, wordct, charct, file ? file : 0); 309 310 /* don't bother checkint doline, doword, or dobyte --- speeds 311 up the common case */ 312 tlinect += linect; 313 twordct += wordct; 314 tcharct += charct; 315 316 if (close(fd)) { 317 warn ("%s", file); 318 rval = 1; 319 } 320 } 321 322 static void 323 print_counts(lines, words, chars, name) 324 wc_count_t lines; 325 wc_count_t words; 326 wc_count_t chars; 327 char *name; 328 { 329 330 if (doline) 331 printf(WCFMT, (WCCAST)lines); 332 if (doword) 333 printf(WCFMT, (WCCAST)words); 334 if (dobyte || dochar) 335 printf(WCFMT, (WCCAST)chars); 336 337 if (name) 338 printf(" %s\n", name); 339 else 340 printf("\n"); 341 } 342 343 static void 344 usage() 345 { 346 (void)fprintf(stderr, "usage: wc [-clw] [file ...]\n"); 347 exit(1); 348 } 349