xref: /csrg-svn/usr.bin/wc/wc.c (revision 35284)
121588Sdist /*
233163Sbostic  * Copyright (c) 1980, 1987 Regents of the University of California.
333163Sbostic  * All rights reserved.
433163Sbostic  *
533163Sbostic  * Redistribution and use in source and binary forms are permitted
634911Sbostic  * provided that the above copyright notice and this paragraph are
734911Sbostic  * duplicated in all such forms and that any documentation,
834911Sbostic  * advertising materials, and other materials related to such
934911Sbostic  * distribution and use acknowledge that the software was developed
1034911Sbostic  * by the University of California, Berkeley.  The name of the
1134911Sbostic  * University may not be used to endorse or promote products derived
1234911Sbostic  * from this software without specific prior written permission.
1334911Sbostic  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1434911Sbostic  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1534911Sbostic  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1621588Sdist  */
1721588Sdist 
1821588Sdist #ifndef lint
1921588Sdist char copyright[] =
2033163Sbostic "@(#) Copyright (c) 1980, 1987 Regents of the University of California.\n\
2121588Sdist  All rights reserved.\n";
2233163Sbostic #endif /* not lint */
2321588Sdist 
2421588Sdist #ifndef lint
25*35284Sbostic static char sccsid[] = "@(#)wc.c	5.5 (Berkeley) 07/25/88";
2633163Sbostic #endif /* not lint */
2721588Sdist 
2830103Sbostic /* wc line, word and char count */
291161Sbill 
3030103Sbostic #include <sys/param.h>
3130103Sbostic #include <sys/stat.h>
3230103Sbostic #include <sys/file.h>
331161Sbill #include <stdio.h>
341161Sbill 
3530103Sbostic #define DEL	0177			/* del char */
3630103Sbostic #define NL	012			/* newline char */
3730103Sbostic #define SPACE	040			/* space char */
3830103Sbostic #define TAB	011			/* tab char */
3930103Sbostic 
4033163Sbostic static long	tlinect, twordct, tcharct;
41*35284Sbostic static int	doline, doword, dochar;
4230103Sbostic 
43*35284Sbostic main(argc, argv)
4433163Sbostic 	int argc;
4533163Sbostic 	char **argv;
461161Sbill {
4733163Sbostic 	extern int optind;
4833163Sbostic 	register int ch;
49*35284Sbostic 	int total;
501161Sbill 
5130103Sbostic 	/*
5230103Sbostic 	 * wc is unusual in that its flags are on by default, so,
5330103Sbostic 	 * if you don't get any arguments, you have to turn them
5430103Sbostic 	 * all on.
5530103Sbostic 	 */
5630103Sbostic 	if (argc > 1 && argv[1][0] == '-' && argv[1][1]) {
57*35284Sbostic 		while ((ch = getopt(argc, argv, "lwc")) != EOF)
5830103Sbostic 			switch((char)ch) {
5933163Sbostic 			case 'l':
6033163Sbostic 				doline = 1;
6133163Sbostic 				break;
6233163Sbostic 			case 'w':
6333163Sbostic 				doword = 1;
6433163Sbostic 				break;
6533163Sbostic 			case 'c':
6633163Sbostic 				dochar = 1;
6733163Sbostic 				break;
6833163Sbostic 			case '?':
6933163Sbostic 			default:
70*35284Sbostic 				fputs("usage: wc [-lwc] [files]\n", stderr);
7133163Sbostic 				exit(1);
7230103Sbostic 			}
7330103Sbostic 		argv += optind;
7430103Sbostic 		argc -= optind;
7530103Sbostic 	}
7630103Sbostic 	else {
7730103Sbostic 		++argv;
7830103Sbostic 		--argc;
7933163Sbostic 		doline = doword = dochar = 1;
8030103Sbostic 	}
8130103Sbostic 
82*35284Sbostic 	total = 0;
83*35284Sbostic 	if (!*argv) {
84*35284Sbostic 		cnt((char *)NULL);
85*35284Sbostic 		putchar('\n');
861161Sbill 	}
87*35284Sbostic 	else do {
88*35284Sbostic 		cnt(*argv);
89*35284Sbostic 		printf(" %s\n", *argv);
90*35284Sbostic 		++total;
9130103Sbostic 	} while(*++argv);
9230103Sbostic 
93*35284Sbostic 	if (total > 1) {
94*35284Sbostic 		if (doline)
95*35284Sbostic 			printf(" %7ld", tlinect);
96*35284Sbostic 		if (doword)
97*35284Sbostic 			printf(" %7ld", twordct);
98*35284Sbostic 		if (dochar)
99*35284Sbostic 			printf(" %7ld", tcharct);
100*35284Sbostic 		puts(" total");
101*35284Sbostic 	}
10233163Sbostic 	exit(0);
10330103Sbostic }
10430103Sbostic 
10530103Sbostic static
10630103Sbostic cnt(file)
10733163Sbostic 	char *file;
10830103Sbostic {
10933163Sbostic 	register u_char *C;
11033163Sbostic 	register short gotsp;
11133163Sbostic 	register int len;
112*35284Sbostic 	register long linect, wordct, charct;
11333163Sbostic 	struct stat sbuf;
11433163Sbostic 	int fd;
11533163Sbostic 	u_char buf[MAXBSIZE];
11630103Sbostic 
11730103Sbostic 	linect = wordct = charct = 0;
11830103Sbostic 	if (file) {
11933163Sbostic 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
12030103Sbostic 			perror(file);
12133163Sbostic 			exit(1);
12230103Sbostic 		}
12330103Sbostic 		if (!doword) {
12430103Sbostic 			/*
12530103Sbostic 			 * line counting is split out because it's a lot
12630103Sbostic 			 * faster to get lines than to get words, since
12730103Sbostic 			 * the word count requires some logic.
12830103Sbostic 			 */
12930103Sbostic 			if (doline) {
13033163Sbostic 				while(len = read(fd, buf, MAXBSIZE)) {
13130103Sbostic 					if (len == -1) {
13230103Sbostic 						perror(file);
13333163Sbostic 						exit(1);
13430103Sbostic 					}
13530103Sbostic 					charct += len;
13633163Sbostic 					for (C = buf; len--; ++C)
13730103Sbostic 						if (*C == '\n')
13830103Sbostic 							++linect;
1391161Sbill 				}
14030103Sbostic 				tlinect += linect;
14133163Sbostic 				printf(" %7ld", linect);
14230103Sbostic 				if (dochar) {
14330103Sbostic 					tcharct += charct;
144*35284Sbostic 					printf(" %7ld", charct);
14530103Sbostic 				}
14630103Sbostic 				close(fd);
14730103Sbostic 				return;
1481161Sbill 			}
14930103Sbostic 			/*
15030103Sbostic 			 * if all we need is the number of characters and
15130103Sbostic 			 * it's a directory or a regular or linked file, just
15230103Sbostic 			 * stat the puppy.  We avoid testing for it not being
15330103Sbostic 			 * a special device in case someone adds a new type
15430103Sbostic 			 * of inode.
15530103Sbostic 			 */
15630103Sbostic 			if (dochar) {
15733163Sbostic 				if (fstat(fd, &sbuf)) {
15830103Sbostic 					perror(file);
15933163Sbostic 					exit(1);
16030103Sbostic 				}
16130103Sbostic 				if (sbuf.st_mode & (S_IFREG | S_IFLNK | S_IFDIR)) {
16233163Sbostic 					printf(" %7ld", sbuf.st_size);
16330103Sbostic 					tcharct += sbuf.st_size;
16430103Sbostic 					close(fd);
16530103Sbostic 					return;
16630103Sbostic 				}
1671161Sbill 			}
1681161Sbill 		}
16930103Sbostic 	}
17030103Sbostic 	else
17130103Sbostic 		fd = 0;
17230103Sbostic 	/* do it the hard way... */
17333163Sbostic 	for (gotsp = 1; len = read(fd, buf, MAXBSIZE);) {
17430103Sbostic 		if (len == -1) {
17530103Sbostic 			perror(file);
17633163Sbostic 			exit(1);
17730103Sbostic 		}
17830103Sbostic 		charct += len;
17933163Sbostic 		for (C = buf; len--; ++C)
18030103Sbostic 			switch(*C) {
18130103Sbostic 				case NL:
18230103Sbostic 					++linect;
18330103Sbostic 				case TAB:
18430103Sbostic 				case SPACE:
18533163Sbostic 					gotsp = 1;
18630103Sbostic 					continue;
18730103Sbostic 				default:
18833163Sbostic #ifdef notdef
18930103Sbostic 					/*
19030103Sbostic 					 * This line of code implements the
19130103Sbostic 					 * original V7 wc algorithm, i.e.
19230103Sbostic 					 * a non-printing character doesn't
19330103Sbostic 					 * toggle the "word" count, so that
19430103Sbostic 					 * "  ^D^F  " counts as 6 spaces,
19530103Sbostic 					 * while "foo^D^Fbar" counts as 8
19630103Sbostic 					 * characters.
19730103Sbostic 					 *
19830103Sbostic 					 * test order is important -- gotsp
19930103Sbostic 					 * will normally be NO, so test it
20030103Sbostic 					 * first
20130103Sbostic 					 */
20230103Sbostic 					if (gotsp && *C > SPACE && *C < DEL) {
20333163Sbostic #endif
20430103Sbostic 					/*
20530103Sbostic 					 * This line implements the manual
20630103Sbostic 					 * page, i.e. a word is a "maximal
20730103Sbostic 					 * string of characters delimited by
20830103Sbostic 					 * spaces, tabs or newlines."  Notice
20930103Sbostic 					 * nothing was said about a character
21030103Sbostic 					 * being printing or non-printing.
21130103Sbostic 					 */
21230103Sbostic 					if (gotsp) {
21333163Sbostic 						gotsp = 0;
21430103Sbostic 						++wordct;
21530103Sbostic 					}
21630103Sbostic 			}
21730103Sbostic 	}
21830103Sbostic 	if (doline) {
2191161Sbill 		tlinect += linect;
22033163Sbostic 		printf(" %7ld", linect);
22130103Sbostic 	}
22230103Sbostic 	if (doword) {
2231161Sbill 		twordct += wordct;
22433163Sbostic 		printf(" %7ld", wordct);
22530103Sbostic 	}
22630103Sbostic 	if (dochar) {
2271161Sbill 		tcharct += charct;
22833163Sbostic 		printf(" %7ld", charct);
2291161Sbill 	}
23030103Sbostic 	close(fd);
2311161Sbill }
232