1 /* $OpenBSD: wc.c,v 1.21 2016/09/16 09:25:23 fcambus Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> /* MAXBSIZE */ 33 #include <sys/stat.h> 34 #include <sys/file.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <locale.h> 38 #include <ctype.h> 39 #include <err.h> 40 #include <unistd.h> 41 #include <util.h> 42 #include <wchar.h> 43 #include <wctype.h> 44 45 int64_t tlinect, twordct, tcharct; 46 int doline, doword, dochar, humanchar, multibyte; 47 int rval; 48 extern char *__progname; 49 50 static void print_counts(int64_t, int64_t, int64_t, char *); 51 static void format_and_print(int64_t); 52 static void cnt(char *); 53 54 int 55 main(int argc, char *argv[]) 56 { 57 int ch; 58 59 setlocale(LC_CTYPE, ""); 60 61 if (pledge("stdio rpath", NULL) == -1) 62 err(1, "pledge"); 63 64 while ((ch = getopt(argc, argv, "lwchm")) != -1) 65 switch(ch) { 66 case 'l': 67 doline = 1; 68 break; 69 case 'w': 70 doword = 1; 71 break; 72 case 'm': 73 if (MB_CUR_MAX > 1) 74 multibyte = 1; 75 /* FALLTHROUGH */ 76 case 'c': 77 dochar = 1; 78 break; 79 case 'h': 80 humanchar = 1; 81 break; 82 case '?': 83 default: 84 fprintf(stderr, 85 "usage: %s [-c | -m] [-hlw] [file ...]\n", 86 __progname); 87 return 1; 88 } 89 argv += optind; 90 argc -= optind; 91 92 /* 93 * wc is unusual in that its flags are on by default, so, 94 * if you don't get any arguments, you have to turn them 95 * all on. 96 */ 97 if (!doline && !doword && !dochar) 98 doline = doword = dochar = 1; 99 100 if (!*argv) { 101 cnt(NULL); 102 } else { 103 int dototal = (argc > 1); 104 105 do { 106 cnt(*argv); 107 } while(*++argv); 108 109 if (dototal) 110 print_counts(tlinect, twordct, tcharct, "total"); 111 } 112 113 return rval; 114 } 115 116 static void 117 cnt(char *file) 118 { 119 static char *buf; 120 static size_t bufsz; 121 122 FILE *stream; 123 char *C; 124 wchar_t wc; 125 short gotsp; 126 ssize_t len; 127 int64_t linect, wordct, charct; 128 struct stat sbuf; 129 int fd; 130 131 linect = wordct = charct = 0; 132 stream = NULL; 133 if (file) { 134 if ((fd = open(file, O_RDONLY, 0)) < 0) { 135 warn("%s", file); 136 rval = 1; 137 return; 138 } 139 } else { 140 fd = STDIN_FILENO; 141 } 142 143 if (!doword && !multibyte) { 144 if (bufsz < MAXBSIZE && 145 (buf = realloc(buf, MAXBSIZE)) == NULL) 146 err(1, NULL); 147 /* 148 * Line counting is split out because it's a lot 149 * faster to get lines than to get words, since 150 * the word count requires some logic. 151 */ 152 if (doline) { 153 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 154 charct += len; 155 for (C = buf; len--; ++C) 156 if (*C == '\n') 157 ++linect; 158 } 159 if (len == -1) { 160 warn("%s", file); 161 rval = 1; 162 } 163 } 164 /* 165 * If all we need is the number of characters and 166 * it's a directory or a regular or linked file, just 167 * stat the puppy. We avoid testing for it not being 168 * a special device in case someone adds a new type 169 * of inode. 170 */ 171 else if (dochar) { 172 mode_t ifmt; 173 174 if (fstat(fd, &sbuf)) { 175 warn("%s", file); 176 rval = 1; 177 } else { 178 ifmt = sbuf.st_mode & S_IFMT; 179 if (ifmt == S_IFREG || ifmt == S_IFLNK 180 || ifmt == S_IFDIR) { 181 charct = sbuf.st_size; 182 } else { 183 while ((len = read(fd, buf, MAXBSIZE)) > 0) 184 charct += len; 185 if (len == -1) { 186 warn("%s", file); 187 rval = 1; 188 } 189 } 190 } 191 } 192 } else { 193 if (file == NULL) 194 stream = stdin; 195 else if ((stream = fdopen(fd, "r")) == NULL) { 196 warn("%s", file); 197 close(fd); 198 rval = 1; 199 return; 200 } 201 202 /* 203 * Do it the hard way. 204 * According to POSIX, a word is a "maximal string of 205 * characters delimited by whitespace." Nothing is said 206 * about a character being printing or non-printing. 207 */ 208 gotsp = 1; 209 while ((len = getline(&buf, &bufsz, stream)) > 0) { 210 if (multibyte) { 211 for (C = buf; *C != '\0'; C += len) { 212 ++charct; 213 len = mbtowc(&wc, C, MB_CUR_MAX); 214 if (len == -1) { 215 mbtowc(NULL, NULL, 216 MB_CUR_MAX); 217 len = 1; 218 wc = L' '; 219 } 220 if (iswspace(wc)) { 221 gotsp = 1; 222 if (wc == L'\n') 223 ++linect; 224 } else if (gotsp) { 225 gotsp = 0; 226 ++wordct; 227 } 228 } 229 } else { 230 charct += len; 231 for (C = buf; *C != '\0'; ++C) { 232 if (isspace((unsigned char)*C)) { 233 gotsp = 1; 234 if (*C == '\n') 235 ++linect; 236 } else if (gotsp) { 237 gotsp = 0; 238 ++wordct; 239 } 240 } 241 } 242 } 243 if (ferror(stream)) { 244 warn("%s", file); 245 rval = 1; 246 } 247 } 248 249 print_counts(linect, wordct, charct, file); 250 251 /* 252 * Don't bother checking doline, doword, or dochar -- speeds 253 * up the common case 254 */ 255 tlinect += linect; 256 twordct += wordct; 257 tcharct += charct; 258 259 if ((stream == NULL ? close(fd) : fclose(stream)) != 0) { 260 warn("%s", file); 261 rval = 1; 262 } 263 } 264 265 static void 266 format_and_print(int64_t v) 267 { 268 if (humanchar) { 269 char result[FMT_SCALED_STRSIZE]; 270 271 fmt_scaled((long long)v, result); 272 printf("%7s", result); 273 } else { 274 printf(" %7lld", v); 275 } 276 } 277 278 static void 279 print_counts(int64_t lines, int64_t words, int64_t chars, char *name) 280 { 281 if (doline) 282 format_and_print(lines); 283 if (doword) 284 format_and_print(words); 285 if (dochar) 286 format_and_print(chars); 287 288 if (name) 289 printf(" %s\n", name); 290 else 291 printf("\n"); 292 } 293