1 /* $OpenBSD: wc.c,v 1.29 2021/11/28 19:28:42 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/stat.h> 33 34 #include <fcntl.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <locale.h> 38 #include <ctype.h> 39 #include <err.h> 40 #include <unistd.h> 41 #include <util.h> 42 #include <wchar.h> 43 #include <wctype.h> 44 45 #define _MAXBSIZE (64 * 1024) 46 47 int64_t tlinect, twordct, tcharct; 48 int doline, doword, dochar, humanchar, multibyte; 49 int rval; 50 extern char *__progname; 51 52 static void print_counts(int64_t, int64_t, int64_t, const char *); 53 static void format_and_print(int64_t); 54 static void cnt(const char *); 55 56 int 57 main(int argc, char *argv[]) 58 { 59 int ch; 60 61 setlocale(LC_CTYPE, ""); 62 63 if (pledge("stdio rpath", NULL) == -1) 64 err(1, "pledge"); 65 66 while ((ch = getopt(argc, argv, "lwchm")) != -1) 67 switch(ch) { 68 case 'l': 69 doline = 1; 70 break; 71 case 'w': 72 doword = 1; 73 break; 74 case 'm': 75 if (MB_CUR_MAX > 1) 76 multibyte = 1; 77 /* FALLTHROUGH */ 78 case 'c': 79 dochar = 1; 80 break; 81 case 'h': 82 humanchar = 1; 83 break; 84 case '?': 85 default: 86 fprintf(stderr, 87 "usage: %s [-c | -m] [-hlw] [file ...]\n", 88 __progname); 89 return 1; 90 } 91 argv += optind; 92 argc -= optind; 93 94 /* 95 * wc is unusual in that its flags are on by default, so, 96 * if you don't get any arguments, you have to turn them 97 * all on. 98 */ 99 if (!doline && !doword && !dochar) 100 doline = doword = dochar = 1; 101 102 if (!*argv) { 103 cnt(NULL); 104 } else { 105 int dototal = (argc > 1); 106 107 do { 108 cnt(*argv); 109 } while(*++argv); 110 111 if (dototal) 112 print_counts(tlinect, twordct, tcharct, "total"); 113 } 114 115 return rval; 116 } 117 118 static void 119 cnt(const char *path) 120 { 121 static char *buf; 122 static size_t bufsz; 123 124 FILE *stream; 125 const char *file; 126 char *C; 127 wchar_t wc; 128 short gotsp; 129 ssize_t len; 130 int64_t linect, wordct, charct; 131 struct stat sbuf; 132 int fd; 133 134 linect = wordct = charct = 0; 135 stream = NULL; 136 if (path != NULL) { 137 file = path; 138 if ((fd = open(file, O_RDONLY)) == -1) { 139 warn("%s", file); 140 rval = 1; 141 return; 142 } 143 } else { 144 file = "(stdin)"; 145 fd = STDIN_FILENO; 146 } 147 148 if (!doword && !multibyte) { 149 if (bufsz < _MAXBSIZE && 150 (buf = realloc(buf, _MAXBSIZE)) == NULL) 151 err(1, NULL); 152 /* 153 * Line counting is split out because it's a lot 154 * faster to get lines than to get words, since 155 * the word count requires some logic. 156 */ 157 if (doline) { 158 while ((len = read(fd, buf, _MAXBSIZE)) > 0) { 159 charct += len; 160 for (C = buf; len--; ++C) 161 if (*C == '\n') 162 ++linect; 163 } 164 if (len == -1) { 165 warn("%s", file); 166 rval = 1; 167 } 168 } 169 /* 170 * If all we need is the number of characters and 171 * it's a directory or a regular or linked file, just 172 * stat the puppy. We avoid testing for it not being 173 * a special device in case someone adds a new type 174 * of inode. 175 */ 176 else if (dochar) { 177 mode_t ifmt; 178 179 if (fstat(fd, &sbuf)) { 180 warn("%s", file); 181 rval = 1; 182 } else { 183 ifmt = sbuf.st_mode & S_IFMT; 184 if (ifmt == S_IFREG || ifmt == S_IFLNK 185 || ifmt == S_IFDIR) { 186 charct = sbuf.st_size; 187 } else { 188 while ((len = read(fd, buf, _MAXBSIZE)) > 0) 189 charct += len; 190 if (len == -1) { 191 warn("%s", file); 192 rval = 1; 193 } 194 } 195 } 196 } 197 } else { 198 if (path == NULL) 199 stream = stdin; 200 else if ((stream = fdopen(fd, "r")) == NULL) { 201 warn("%s", file); 202 close(fd); 203 rval = 1; 204 return; 205 } 206 207 /* 208 * Do it the hard way. 209 * According to POSIX, a word is a "maximal string of 210 * characters delimited by whitespace." Nothing is said 211 * about a character being printing or non-printing. 212 */ 213 gotsp = 1; 214 while ((len = getline(&buf, &bufsz, stream)) > 0) { 215 if (multibyte) { 216 const char *end = buf + len; 217 for (C = buf; C < end; C += len) { 218 ++charct; 219 len = mbtowc(&wc, C, MB_CUR_MAX); 220 if (len == -1) { 221 mbtowc(NULL, NULL, 222 MB_CUR_MAX); 223 len = 1; 224 wc = L'?'; 225 } else if (len == 0) 226 len = 1; 227 if (iswspace(wc)) { 228 gotsp = 1; 229 if (wc == L'\n') 230 ++linect; 231 } else if (gotsp) { 232 gotsp = 0; 233 ++wordct; 234 } 235 } 236 } else { 237 charct += len; 238 for (C = buf; len--; ++C) { 239 if (isspace((unsigned char)*C)) { 240 gotsp = 1; 241 if (*C == '\n') 242 ++linect; 243 } else if (gotsp) { 244 gotsp = 0; 245 ++wordct; 246 } 247 } 248 } 249 } 250 if (ferror(stream)) { 251 warn("%s", file); 252 rval = 1; 253 } 254 } 255 256 print_counts(linect, wordct, charct, path); 257 258 /* 259 * Don't bother checking doline, doword, or dochar -- speeds 260 * up the common case 261 */ 262 tlinect += linect; 263 twordct += wordct; 264 tcharct += charct; 265 266 if ((stream == NULL ? close(fd) : fclose(stream)) != 0) { 267 warn("%s", file); 268 rval = 1; 269 } 270 } 271 272 static void 273 format_and_print(int64_t v) 274 { 275 if (humanchar) { 276 char result[FMT_SCALED_STRSIZE]; 277 278 fmt_scaled((long long)v, result); 279 printf("%7s", result); 280 } else { 281 printf(" %7lld", v); 282 } 283 } 284 285 static void 286 print_counts(int64_t lines, int64_t words, int64_t chars, const char *name) 287 { 288 if (doline) 289 format_and_print(lines); 290 if (doword) 291 format_and_print(words); 292 if (dochar) 293 format_and_print(chars); 294 295 if (name) 296 printf(" %s\n", name); 297 else 298 printf("\n"); 299 } 300