1 /* $OpenBSD: wc.c,v 1.28 2021/11/16 23:34:24 cheloha Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> /* MAXBSIZE */ 33 #include <sys/stat.h> 34 35 #include <fcntl.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <locale.h> 39 #include <ctype.h> 40 #include <err.h> 41 #include <unistd.h> 42 #include <util.h> 43 #include <wchar.h> 44 #include <wctype.h> 45 46 int64_t tlinect, twordct, tcharct; 47 int doline, doword, dochar, humanchar, multibyte; 48 int rval; 49 extern char *__progname; 50 51 static void print_counts(int64_t, int64_t, int64_t, const char *); 52 static void format_and_print(int64_t); 53 static void cnt(const char *); 54 55 int 56 main(int argc, char *argv[]) 57 { 58 int ch; 59 60 setlocale(LC_CTYPE, ""); 61 62 if (pledge("stdio rpath", NULL) == -1) 63 err(1, "pledge"); 64 65 while ((ch = getopt(argc, argv, "lwchm")) != -1) 66 switch(ch) { 67 case 'l': 68 doline = 1; 69 break; 70 case 'w': 71 doword = 1; 72 break; 73 case 'm': 74 if (MB_CUR_MAX > 1) 75 multibyte = 1; 76 /* FALLTHROUGH */ 77 case 'c': 78 dochar = 1; 79 break; 80 case 'h': 81 humanchar = 1; 82 break; 83 case '?': 84 default: 85 fprintf(stderr, 86 "usage: %s [-c | -m] [-hlw] [file ...]\n", 87 __progname); 88 return 1; 89 } 90 argv += optind; 91 argc -= optind; 92 93 /* 94 * wc is unusual in that its flags are on by default, so, 95 * if you don't get any arguments, you have to turn them 96 * all on. 97 */ 98 if (!doline && !doword && !dochar) 99 doline = doword = dochar = 1; 100 101 if (!*argv) { 102 cnt(NULL); 103 } else { 104 int dototal = (argc > 1); 105 106 do { 107 cnt(*argv); 108 } while(*++argv); 109 110 if (dototal) 111 print_counts(tlinect, twordct, tcharct, "total"); 112 } 113 114 return rval; 115 } 116 117 static void 118 cnt(const char *path) 119 { 120 static char *buf; 121 static size_t bufsz; 122 123 FILE *stream; 124 const char *file; 125 char *C; 126 wchar_t wc; 127 short gotsp; 128 ssize_t len; 129 int64_t linect, wordct, charct; 130 struct stat sbuf; 131 int fd; 132 133 linect = wordct = charct = 0; 134 stream = NULL; 135 if (path != NULL) { 136 file = path; 137 if ((fd = open(file, O_RDONLY)) == -1) { 138 warn("%s", file); 139 rval = 1; 140 return; 141 } 142 } else { 143 file = "(stdin)"; 144 fd = STDIN_FILENO; 145 } 146 147 if (!doword && !multibyte) { 148 if (bufsz < MAXBSIZE && 149 (buf = realloc(buf, MAXBSIZE)) == NULL) 150 err(1, NULL); 151 /* 152 * Line counting is split out because it's a lot 153 * faster to get lines than to get words, since 154 * the word count requires some logic. 155 */ 156 if (doline) { 157 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 158 charct += len; 159 for (C = buf; len--; ++C) 160 if (*C == '\n') 161 ++linect; 162 } 163 if (len == -1) { 164 warn("%s", file); 165 rval = 1; 166 } 167 } 168 /* 169 * If all we need is the number of characters and 170 * it's a directory or a regular or linked file, just 171 * stat the puppy. We avoid testing for it not being 172 * a special device in case someone adds a new type 173 * of inode. 174 */ 175 else if (dochar) { 176 mode_t ifmt; 177 178 if (fstat(fd, &sbuf)) { 179 warn("%s", file); 180 rval = 1; 181 } else { 182 ifmt = sbuf.st_mode & S_IFMT; 183 if (ifmt == S_IFREG || ifmt == S_IFLNK 184 || ifmt == S_IFDIR) { 185 charct = sbuf.st_size; 186 } else { 187 while ((len = read(fd, buf, MAXBSIZE)) > 0) 188 charct += len; 189 if (len == -1) { 190 warn("%s", file); 191 rval = 1; 192 } 193 } 194 } 195 } 196 } else { 197 if (path == NULL) 198 stream = stdin; 199 else if ((stream = fdopen(fd, "r")) == NULL) { 200 warn("%s", file); 201 close(fd); 202 rval = 1; 203 return; 204 } 205 206 /* 207 * Do it the hard way. 208 * According to POSIX, a word is a "maximal string of 209 * characters delimited by whitespace." Nothing is said 210 * about a character being printing or non-printing. 211 */ 212 gotsp = 1; 213 while ((len = getline(&buf, &bufsz, stream)) > 0) { 214 if (multibyte) { 215 const char *end = buf + len; 216 for (C = buf; C < end; C += len) { 217 ++charct; 218 len = mbtowc(&wc, C, MB_CUR_MAX); 219 if (len == -1) { 220 mbtowc(NULL, NULL, 221 MB_CUR_MAX); 222 len = 1; 223 wc = L'?'; 224 } else if (len == 0) 225 len = 1; 226 if (iswspace(wc)) { 227 gotsp = 1; 228 if (wc == L'\n') 229 ++linect; 230 } else if (gotsp) { 231 gotsp = 0; 232 ++wordct; 233 } 234 } 235 } else { 236 charct += len; 237 for (C = buf; len--; ++C) { 238 if (isspace((unsigned char)*C)) { 239 gotsp = 1; 240 if (*C == '\n') 241 ++linect; 242 } else if (gotsp) { 243 gotsp = 0; 244 ++wordct; 245 } 246 } 247 } 248 } 249 if (ferror(stream)) { 250 warn("%s", file); 251 rval = 1; 252 } 253 } 254 255 print_counts(linect, wordct, charct, path); 256 257 /* 258 * Don't bother checking doline, doword, or dochar -- speeds 259 * up the common case 260 */ 261 tlinect += linect; 262 twordct += wordct; 263 tcharct += charct; 264 265 if ((stream == NULL ? close(fd) : fclose(stream)) != 0) { 266 warn("%s", file); 267 rval = 1; 268 } 269 } 270 271 static void 272 format_and_print(int64_t v) 273 { 274 if (humanchar) { 275 char result[FMT_SCALED_STRSIZE]; 276 277 fmt_scaled((long long)v, result); 278 printf("%7s", result); 279 } else { 280 printf(" %7lld", v); 281 } 282 } 283 284 static void 285 print_counts(int64_t lines, int64_t words, int64_t chars, const char *name) 286 { 287 if (doline) 288 format_and_print(lines); 289 if (doword) 290 format_and_print(words); 291 if (dochar) 292 format_and_print(chars); 293 294 if (name) 295 printf(" %s\n", name); 296 else 297 printf("\n"); 298 } 299