xref: /onnv-gate/usr/src/lib/libcmd/common/wclib.c (revision 12068:08a39a083754)
14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*12068SRoger.Faulkner@Oracle.COM *          Copyright (c) 1992-2010 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
78462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                  David Korn <dgk@research.att.com>                   *
194887Schin *                                                                      *
204887Schin ***********************************************************************/
214887Schin #pragma prototyped
224887Schin /*
234887Schin  * David Korn
244887Schin  * AT&T Bell Laboratories
254887Schin  *
264887Schin  * library interface for word count
274887Schin  */
284887Schin 
294887Schin #include <cmd.h>
304887Schin #include <wc.h>
314887Schin #include <ctype.h>
324887Schin 
338462SApril.Chin@Sun.COM #if _hdr_wchar && _hdr_wctype && _lib_iswctype
344887Schin 
354887Schin #include <wchar.h>
364887Schin #include <wctype.h>
3710898Sroland.mainz@nrubsig.org #include <lc.h>
384887Schin 
394887Schin #else
404887Schin 
414887Schin #ifndef iswspace
424887Schin #define iswspace(x)	isspace(x)
434887Schin #endif
444887Schin 
454887Schin #endif
464887Schin 
4710898Sroland.mainz@nrubsig.org #define	WC_SP		0x08
4810898Sroland.mainz@nrubsig.org #define	WC_NL		0x10
4910898Sroland.mainz@nrubsig.org #define	WC_MB		0x20
5010898Sroland.mainz@nrubsig.org #define	WC_ERR		0x40
514887Schin 
5210898Sroland.mainz@nrubsig.org #define eol(c)		((c)&WC_NL)
5310898Sroland.mainz@nrubsig.org #define mbc(c)		((c)&WC_MB)
5410898Sroland.mainz@nrubsig.org #define spc(c)		((c)&WC_SP)
55*12068SRoger.Faulkner@Oracle.COM #define mb2wc(w,p,n)	(*ast.mb_towc)(&w,(char*)p,n)
5610898Sroland.mainz@nrubsig.org 
wc_init(int mode)5710898Sroland.mainz@nrubsig.org Wc_t* wc_init(int mode)
584887Schin {
594887Schin 	register int	n;
604887Schin 	register int	w;
614887Schin 	Wc_t*		wp;
624887Schin 
6310898Sroland.mainz@nrubsig.org 	if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
6410898Sroland.mainz@nrubsig.org 		return 0;
6510898Sroland.mainz@nrubsig.org 	if (!mbwide())
6610898Sroland.mainz@nrubsig.org 		wp->mb = 0;
6710898Sroland.mainz@nrubsig.org #if _hdr_wchar && _hdr_wctype && _lib_iswctype
6810898Sroland.mainz@nrubsig.org 	else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8))
6910898Sroland.mainz@nrubsig.org 		wp->mb = 1;
7010898Sroland.mainz@nrubsig.org #endif
7110898Sroland.mainz@nrubsig.org 	else
7210898Sroland.mainz@nrubsig.org 		wp->mb = -1;
734887Schin 	w = mode & WC_WORDS;
7410898Sroland.mainz@nrubsig.org 	for (n = (1<<CHAR_BIT); --n >= 0;)
7510898Sroland.mainz@nrubsig.org 		wp->type[n] = (w && isspace(n)) ? WC_SP : 0;
7610898Sroland.mainz@nrubsig.org 	wp->type['\n'] = WC_SP|WC_NL;
7710898Sroland.mainz@nrubsig.org 	if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0)
7810898Sroland.mainz@nrubsig.org 	{
7910898Sroland.mainz@nrubsig.org 		for (n = 0; n < 64; n++)
8010898Sroland.mainz@nrubsig.org 		{
8110898Sroland.mainz@nrubsig.org 			wp->type[0x80+n] |= WC_MB;
8210898Sroland.mainz@nrubsig.org 			if (n<32)
8310898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+1;
8410898Sroland.mainz@nrubsig.org 			else if (n<48)
8510898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+2;
8610898Sroland.mainz@nrubsig.org 			else if (n<56)
8710898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+3;
8810898Sroland.mainz@nrubsig.org 			else if (n<60)
8910898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+4;
9010898Sroland.mainz@nrubsig.org 			else if (n<62)
9110898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+5;
9210898Sroland.mainz@nrubsig.org 		}
9310898Sroland.mainz@nrubsig.org 		wp->type[0xc0] = WC_MB|WC_ERR;
9410898Sroland.mainz@nrubsig.org 		wp->type[0xc1] = WC_MB|WC_ERR;
9510898Sroland.mainz@nrubsig.org 		wp->type[0xfe] = WC_MB|WC_ERR;
9610898Sroland.mainz@nrubsig.org 		wp->type[0xff] = WC_MB|WC_ERR;
9710898Sroland.mainz@nrubsig.org 	}
9810898Sroland.mainz@nrubsig.org 	wp->mode = mode;
9910898Sroland.mainz@nrubsig.org 	return wp;
10010898Sroland.mainz@nrubsig.org }
10110898Sroland.mainz@nrubsig.org 
invalid(const char * file,int nlines)10210898Sroland.mainz@nrubsig.org static int invalid(const char *file, int nlines)
10310898Sroland.mainz@nrubsig.org {
10410898Sroland.mainz@nrubsig.org 	error_info.file = (char*)file;
10510898Sroland.mainz@nrubsig.org 	error_info.line = nlines;
10610898Sroland.mainz@nrubsig.org 	error(ERROR_SYSTEM|1, "invalid multibyte character");
10710898Sroland.mainz@nrubsig.org 	error_info.file = 0;
10810898Sroland.mainz@nrubsig.org 	error_info.line = 0;
10910898Sroland.mainz@nrubsig.org 	return nlines;
11010898Sroland.mainz@nrubsig.org }
11110898Sroland.mainz@nrubsig.org 
11210898Sroland.mainz@nrubsig.org /*
11310898Sroland.mainz@nrubsig.org  * handle utf space characters
11410898Sroland.mainz@nrubsig.org  */
11510898Sroland.mainz@nrubsig.org 
chkstate(int state,register unsigned int c)11610898Sroland.mainz@nrubsig.org static int chkstate(int state, register unsigned int c)
11710898Sroland.mainz@nrubsig.org {
11810898Sroland.mainz@nrubsig.org 	switch(state)
11910898Sroland.mainz@nrubsig.org 	{
12010898Sroland.mainz@nrubsig.org 	case 1:
12110898Sroland.mainz@nrubsig.org 		state = (c==0x9a?4:0);
12210898Sroland.mainz@nrubsig.org 		break;
12310898Sroland.mainz@nrubsig.org 	case 2:
12410898Sroland.mainz@nrubsig.org 		state = ((c==0x80||c==0x81)?6+(c&1):0);
12510898Sroland.mainz@nrubsig.org 		break;
12610898Sroland.mainz@nrubsig.org 	case 3:
12710898Sroland.mainz@nrubsig.org 		state = (c==0x80?5:0);
12810898Sroland.mainz@nrubsig.org 		break;
12910898Sroland.mainz@nrubsig.org 	case 4:
13010898Sroland.mainz@nrubsig.org 		state = (c==0x80?10:0);
13110898Sroland.mainz@nrubsig.org 		break;
13210898Sroland.mainz@nrubsig.org 	case 5:
13310898Sroland.mainz@nrubsig.org 		state = (c==0x80?10:0);
13410898Sroland.mainz@nrubsig.org 		break;
13510898Sroland.mainz@nrubsig.org 	case 6:
13610898Sroland.mainz@nrubsig.org 		state = 0;
13710898Sroland.mainz@nrubsig.org 		if(c==0xa0 || c==0xa1)
13810898Sroland.mainz@nrubsig.org 			return(10);
13910898Sroland.mainz@nrubsig.org 		else if((c&0xf0)== 0x80)
14010898Sroland.mainz@nrubsig.org 		{
14110898Sroland.mainz@nrubsig.org 			if((c&=0xf)==7)
14210898Sroland.mainz@nrubsig.org 				return(iswspace(0x2007)?10:0);
14310898Sroland.mainz@nrubsig.org 			if(c<=0xb)
14410898Sroland.mainz@nrubsig.org 				return(10);
14510898Sroland.mainz@nrubsig.org 		}
14610898Sroland.mainz@nrubsig.org 		else if(c==0xaf && iswspace(0x202f))
14710898Sroland.mainz@nrubsig.org 			return(10);
14810898Sroland.mainz@nrubsig.org 		break;
14910898Sroland.mainz@nrubsig.org 	case 7:
15010898Sroland.mainz@nrubsig.org 		state = (c==0x9f?10:0);
15110898Sroland.mainz@nrubsig.org 		break;
15210898Sroland.mainz@nrubsig.org 	case 8:
15310898Sroland.mainz@nrubsig.org 		return (iswspace(c)?10:0);
15410898Sroland.mainz@nrubsig.org 	}
15510898Sroland.mainz@nrubsig.org 	return state;
1564887Schin }
1574887Schin 
1584887Schin /*
1594887Schin  * compute the line, word, and character count for file <fd>
1604887Schin  */
16110898Sroland.mainz@nrubsig.org 
wc_count(Wc_t * wp,Sfio_t * fd,const char * file)1624887Schin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
1634887Schin {
16410898Sroland.mainz@nrubsig.org 	register char*		type = wp->type;
16510898Sroland.mainz@nrubsig.org 	register unsigned char*	cp;
166*12068SRoger.Faulkner@Oracle.COM 	register Sfoff_t	nbytes;
1674887Schin 	register Sfoff_t	nchars;
1684887Schin 	register Sfoff_t	nwords;
1694887Schin 	register Sfoff_t	nlines;
17010898Sroland.mainz@nrubsig.org 	register Sfoff_t	eline = -1;
17110898Sroland.mainz@nrubsig.org 	register Sfoff_t	longest = 0;
1724887Schin 	register ssize_t	c;
17310898Sroland.mainz@nrubsig.org 	register unsigned char*	endbuff;
17410898Sroland.mainz@nrubsig.org 	register int		lasttype = WC_SP;
1754887Schin 	unsigned int		lastchar;
17610898Sroland.mainz@nrubsig.org 	ssize_t			n;
17710898Sroland.mainz@nrubsig.org 	ssize_t			o;
17810898Sroland.mainz@nrubsig.org 	unsigned char*		buff;
1794887Schin 	wchar_t			x;
18010898Sroland.mainz@nrubsig.org 	unsigned char		side[32];
1814887Schin 
1824887Schin 	sfset(fd,SF_WRITE,1);
183*12068SRoger.Faulkner@Oracle.COM 	nlines = nwords = nchars = nbytes = 0;
1844887Schin 	wp->longest = 0;
18510898Sroland.mainz@nrubsig.org 	if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS)))
1864887Schin 	{
1874887Schin 		cp = buff = endbuff = 0;
1884887Schin 		for (;;)
1894887Schin 		{
190*12068SRoger.Faulkner@Oracle.COM 			if (cp >= endbuff || (n = mb2wc(x, cp, endbuff-cp)) < 0)
1914887Schin 			{
19210898Sroland.mainz@nrubsig.org 				if ((o = endbuff-cp) < sizeof(side))
19310898Sroland.mainz@nrubsig.org 				{
19410898Sroland.mainz@nrubsig.org 					if (buff)
19510898Sroland.mainz@nrubsig.org 					{
19610898Sroland.mainz@nrubsig.org 						if (o)
19710898Sroland.mainz@nrubsig.org 							memcpy(side, cp, o);
19810898Sroland.mainz@nrubsig.org 						mbinit();
19910898Sroland.mainz@nrubsig.org 					}
20010898Sroland.mainz@nrubsig.org 					else
20110898Sroland.mainz@nrubsig.org 						o = 0;
20210898Sroland.mainz@nrubsig.org 					cp = side + o;
20310898Sroland.mainz@nrubsig.org 					if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0)
20410898Sroland.mainz@nrubsig.org 					{
20510898Sroland.mainz@nrubsig.org 						if ((nchars - longest) > wp->longest)
20610898Sroland.mainz@nrubsig.org 							wp->longest = nchars - longest;
20710898Sroland.mainz@nrubsig.org 						break;
20810898Sroland.mainz@nrubsig.org 					}
209*12068SRoger.Faulkner@Oracle.COM 					nbytes += n;
21010898Sroland.mainz@nrubsig.org 					if ((c = sizeof(side) - o) > n)
21110898Sroland.mainz@nrubsig.org 						c = n;
21210898Sroland.mainz@nrubsig.org 					if (c)
21310898Sroland.mainz@nrubsig.org 						memcpy(cp, buff, c);
21410898Sroland.mainz@nrubsig.org 					endbuff = buff + n;
21510898Sroland.mainz@nrubsig.org 					cp = side;
21610898Sroland.mainz@nrubsig.org 					x = mbchar(cp);
21710898Sroland.mainz@nrubsig.org 					if ((cp-side) < o)
21810898Sroland.mainz@nrubsig.org 					{
21910898Sroland.mainz@nrubsig.org 						cp = buff;
22010898Sroland.mainz@nrubsig.org 						nchars += (cp-side) - 1;
22110898Sroland.mainz@nrubsig.org 					}
22210898Sroland.mainz@nrubsig.org 					else
22310898Sroland.mainz@nrubsig.org 						cp = buff + (cp-side) - o;
22410898Sroland.mainz@nrubsig.org 				}
22510898Sroland.mainz@nrubsig.org 				else
22610898Sroland.mainz@nrubsig.org 				{
22710898Sroland.mainz@nrubsig.org 					cp++;
22810898Sroland.mainz@nrubsig.org 					x = -1;
22910898Sroland.mainz@nrubsig.org 				}
23010898Sroland.mainz@nrubsig.org 				if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET))
23110898Sroland.mainz@nrubsig.org 					eline = invalid(file, nlines);
2324887Schin 			}
23310898Sroland.mainz@nrubsig.org 			else
23410898Sroland.mainz@nrubsig.org 				cp += n ? n : 1;
23510898Sroland.mainz@nrubsig.org 			if (x == '\n')
2364887Schin 			{
2374887Schin 				if ((nchars - longest) > wp->longest)
2384887Schin 					wp->longest = nchars - longest;
23910898Sroland.mainz@nrubsig.org 				longest = nchars + 1;
2404887Schin 				nlines++;
2414887Schin 				lasttype = 1;
2424887Schin 			}
2434887Schin 			else if (iswspace(x))
2444887Schin 				lasttype = 1;
2454887Schin 			else if (lasttype)
2464887Schin 			{
2474887Schin 				lasttype = 0;
2484887Schin 				nwords++;
2494887Schin 			}
25010898Sroland.mainz@nrubsig.org 			nchars++;
25110898Sroland.mainz@nrubsig.org 		}
252*12068SRoger.Faulkner@Oracle.COM 		if (!(wp->mode & WC_MBYTE))
253*12068SRoger.Faulkner@Oracle.COM 			nchars = nbytes;
25410898Sroland.mainz@nrubsig.org 	}
25510898Sroland.mainz@nrubsig.org 	else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25610898Sroland.mainz@nrubsig.org 	{
25710898Sroland.mainz@nrubsig.org 		if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25810898Sroland.mainz@nrubsig.org 		{
25910898Sroland.mainz@nrubsig.org 			while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
26010898Sroland.mainz@nrubsig.org 			{
26110898Sroland.mainz@nrubsig.org 				nchars += c;
26210898Sroland.mainz@nrubsig.org 				endbuff = cp + c;
26310898Sroland.mainz@nrubsig.org 				if (*--endbuff == '\n')
26410898Sroland.mainz@nrubsig.org 					nlines++;
26510898Sroland.mainz@nrubsig.org 				else
26610898Sroland.mainz@nrubsig.org 					*endbuff = '\n';
26710898Sroland.mainz@nrubsig.org 				for (;;)
26810898Sroland.mainz@nrubsig.org 					if (*cp++ == '\n')
26910898Sroland.mainz@nrubsig.org 					{
27010898Sroland.mainz@nrubsig.org 						if (cp > endbuff)
27110898Sroland.mainz@nrubsig.org 							break;
27210898Sroland.mainz@nrubsig.org 						nlines++;
27310898Sroland.mainz@nrubsig.org 					}
27410898Sroland.mainz@nrubsig.org 			}
27510898Sroland.mainz@nrubsig.org 		}
27610898Sroland.mainz@nrubsig.org 		else
27710898Sroland.mainz@nrubsig.org 		{
27810898Sroland.mainz@nrubsig.org 			while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
27910898Sroland.mainz@nrubsig.org 			{
28010898Sroland.mainz@nrubsig.org 				nchars += c;
28110898Sroland.mainz@nrubsig.org 				/* check to see whether first character terminates word */
28210898Sroland.mainz@nrubsig.org 				if (c==1)
28310898Sroland.mainz@nrubsig.org 				{
28410898Sroland.mainz@nrubsig.org 					if (eol(lasttype))
28510898Sroland.mainz@nrubsig.org 						nlines++;
28610898Sroland.mainz@nrubsig.org 					if ((c = type[*cp]) && !lasttype)
28710898Sroland.mainz@nrubsig.org 						nwords++;
28810898Sroland.mainz@nrubsig.org 					lasttype = c;
28910898Sroland.mainz@nrubsig.org 					continue;
29010898Sroland.mainz@nrubsig.org 				}
29110898Sroland.mainz@nrubsig.org 				if (!lasttype && type[*cp])
29210898Sroland.mainz@nrubsig.org 					nwords++;
29310898Sroland.mainz@nrubsig.org 				lastchar = cp[--c];
29410898Sroland.mainz@nrubsig.org 				*(endbuff = cp+c) = '\n';
29510898Sroland.mainz@nrubsig.org 				c = lasttype;
29610898Sroland.mainz@nrubsig.org 				/* process each buffer */
29710898Sroland.mainz@nrubsig.org 				for (;;)
29810898Sroland.mainz@nrubsig.org 				{
29910898Sroland.mainz@nrubsig.org 					/* process spaces and new-lines */
30010898Sroland.mainz@nrubsig.org 					do
30110898Sroland.mainz@nrubsig.org 					{
30210898Sroland.mainz@nrubsig.org 						if (eol(c))
30310898Sroland.mainz@nrubsig.org 							for (;;)
30410898Sroland.mainz@nrubsig.org 							{
30510898Sroland.mainz@nrubsig.org 								/* check for end of buffer */
30610898Sroland.mainz@nrubsig.org 								if (cp > endbuff)
30710898Sroland.mainz@nrubsig.org 									goto beob;
30810898Sroland.mainz@nrubsig.org 								nlines++;
30910898Sroland.mainz@nrubsig.org 								if (*cp != '\n')
31010898Sroland.mainz@nrubsig.org 									break;
31110898Sroland.mainz@nrubsig.org 								cp++;
31210898Sroland.mainz@nrubsig.org 							}
31310898Sroland.mainz@nrubsig.org 					} while (c = type[*cp++]);
31410898Sroland.mainz@nrubsig.org 					/* skip over word characters */
31510898Sroland.mainz@nrubsig.org 					while (!(c = type[*cp++]));
31610898Sroland.mainz@nrubsig.org 					nwords++;
31710898Sroland.mainz@nrubsig.org 				}
31810898Sroland.mainz@nrubsig.org 			beob:
31910898Sroland.mainz@nrubsig.org 				if ((cp -= 2) >= buff)
32010898Sroland.mainz@nrubsig.org 					c = type[*cp];
32110898Sroland.mainz@nrubsig.org 				else
32210898Sroland.mainz@nrubsig.org 					c = lasttype;
32310898Sroland.mainz@nrubsig.org 				lasttype = type[lastchar];
32410898Sroland.mainz@nrubsig.org 				/* see if was in word */
32510898Sroland.mainz@nrubsig.org 				if (!c && !lasttype)
32610898Sroland.mainz@nrubsig.org 					nwords--;
32710898Sroland.mainz@nrubsig.org 			}
32810898Sroland.mainz@nrubsig.org 			if (eol(lasttype))
32910898Sroland.mainz@nrubsig.org 				nlines++;
33010898Sroland.mainz@nrubsig.org 			else if (!lasttype)
33110898Sroland.mainz@nrubsig.org 				nwords++;
3324887Schin 		}
3334887Schin 	}
3344887Schin 	else
3354887Schin 	{
33610898Sroland.mainz@nrubsig.org 		int		lineoff=0;
33710898Sroland.mainz@nrubsig.org 		int		skip=0;
33810898Sroland.mainz@nrubsig.org 		int		adjust=0;
33910898Sroland.mainz@nrubsig.org 		int		state=0;
34010898Sroland.mainz@nrubsig.org 		int		oldc;
34110898Sroland.mainz@nrubsig.org 		int		xspace;
34210898Sroland.mainz@nrubsig.org 		int		wasspace = 1;
34310898Sroland.mainz@nrubsig.org 		unsigned char*	start;
34410898Sroland.mainz@nrubsig.org 
34510898Sroland.mainz@nrubsig.org 		lastchar = 0;
34610898Sroland.mainz@nrubsig.org 		start = (endbuff = side) + 1;
34710898Sroland.mainz@nrubsig.org 		xspace = iswspace(0xa0) || iswspace(0x85);
34810898Sroland.mainz@nrubsig.org 		while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
3494887Schin 		{
350*12068SRoger.Faulkner@Oracle.COM 			nbytes += c;
3514887Schin 			nchars += c;
35210898Sroland.mainz@nrubsig.org 			start = cp-lineoff;
3534887Schin 			/* check to see whether first character terminates word */
3544887Schin 			if(c==1)
3554887Schin 			{
35610898Sroland.mainz@nrubsig.org 				if(eol(lasttype))
3574887Schin 					nlines++;
35810898Sroland.mainz@nrubsig.org 				if((c = type[*cp]) && !lasttype)
3594887Schin 					nwords++;
3604887Schin 				lasttype = c;
36110898Sroland.mainz@nrubsig.org 				endbuff = start;
3624887Schin 				continue;
3634887Schin 			}
36410898Sroland.mainz@nrubsig.org 			lastchar = cp[--c];
36510898Sroland.mainz@nrubsig.org 			endbuff = cp+c;
36610898Sroland.mainz@nrubsig.org 			cp[c] = '\n';
36710898Sroland.mainz@nrubsig.org 			if(mbc(lasttype))
36810898Sroland.mainz@nrubsig.org 			{
36910898Sroland.mainz@nrubsig.org 				c = lasttype;
37010898Sroland.mainz@nrubsig.org 				goto mbyte;
37110898Sroland.mainz@nrubsig.org 			}
37210898Sroland.mainz@nrubsig.org 			if(!lasttype && spc(type[*cp]))
3734887Schin 				nwords++;
3744887Schin 			c = lasttype;
3754887Schin 			/* process each buffer */
3764887Schin 			for (;;)
3774887Schin 			{
3784887Schin 				/* process spaces and new-lines */
37910898Sroland.mainz@nrubsig.org 			spaces:
38010898Sroland.mainz@nrubsig.org 				do
3814887Schin 				{
38210898Sroland.mainz@nrubsig.org 					if (eol(c))
3834887Schin 					{
3844887Schin 						/* check for end of buffer */
3854887Schin 						if (cp > endbuff)
3864887Schin 							goto eob;
38710898Sroland.mainz@nrubsig.org 						if(wp->mode&WC_LONGEST)
38810898Sroland.mainz@nrubsig.org 						{
38910898Sroland.mainz@nrubsig.org 							if((cp-start)-adjust > longest)
39010898Sroland.mainz@nrubsig.org 								longest = (cp-start)-adjust-1;
39110898Sroland.mainz@nrubsig.org 							start = cp;
39210898Sroland.mainz@nrubsig.org 						}
3934887Schin 						nlines++;
39410898Sroland.mainz@nrubsig.org 						nchars -= adjust;
39510898Sroland.mainz@nrubsig.org 						adjust = 0;
39610898Sroland.mainz@nrubsig.org 					}
39710898Sroland.mainz@nrubsig.org 				} while (spc(c = type[*cp++]));
39810898Sroland.mainz@nrubsig.org 				wasspace=1;
39910898Sroland.mainz@nrubsig.org 				if(mbc(c))
40010898Sroland.mainz@nrubsig.org 				{
40110898Sroland.mainz@nrubsig.org 				mbyte:
40210898Sroland.mainz@nrubsig.org 					do
40310898Sroland.mainz@nrubsig.org 					{
40410898Sroland.mainz@nrubsig.org 						if(c&WC_ERR)
40510898Sroland.mainz@nrubsig.org 							goto err;
40610898Sroland.mainz@nrubsig.org 						if(skip && (c&7))
4074887Schin 							break;
40810898Sroland.mainz@nrubsig.org 						if(!skip)
40910898Sroland.mainz@nrubsig.org 						{
41010898Sroland.mainz@nrubsig.org 							if(!(c&7))
41110898Sroland.mainz@nrubsig.org 							{
41210898Sroland.mainz@nrubsig.org 								skip=1;
41310898Sroland.mainz@nrubsig.org 								break;
41410898Sroland.mainz@nrubsig.org 							}
41510898Sroland.mainz@nrubsig.org 							skip = (c&7);
41610898Sroland.mainz@nrubsig.org 							adjust += skip;
41710898Sroland.mainz@nrubsig.org 							state = 0;
41810898Sroland.mainz@nrubsig.org 							if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3)))
41910898Sroland.mainz@nrubsig.org 								oldc = *cp;
42010898Sroland.mainz@nrubsig.org 							else if(xspace && cp[-1]==0xc2)
42110898Sroland.mainz@nrubsig.org 							{
42210898Sroland.mainz@nrubsig.org 								state = 8;
42310898Sroland.mainz@nrubsig.org 								oldc = *cp;
42410898Sroland.mainz@nrubsig.org 							}
42510898Sroland.mainz@nrubsig.org 						}
42610898Sroland.mainz@nrubsig.org 						else
42710898Sroland.mainz@nrubsig.org 						{
42810898Sroland.mainz@nrubsig.org 							skip--;
42910898Sroland.mainz@nrubsig.org 							if(state && (state=chkstate(state,oldc)))
43010898Sroland.mainz@nrubsig.org 							{
43110898Sroland.mainz@nrubsig.org 								if(state==10)
43210898Sroland.mainz@nrubsig.org 								{
43310898Sroland.mainz@nrubsig.org 									if(!wasspace)
43410898Sroland.mainz@nrubsig.org 										nwords++;
43510898Sroland.mainz@nrubsig.org 									wasspace = 1;
43610898Sroland.mainz@nrubsig.org 									state=0;
43710898Sroland.mainz@nrubsig.org 									goto spaces;
43810898Sroland.mainz@nrubsig.org 								}
43910898Sroland.mainz@nrubsig.org 								oldc = *cp;
44010898Sroland.mainz@nrubsig.org 							}
44110898Sroland.mainz@nrubsig.org 						}
44210898Sroland.mainz@nrubsig.org 					} while (mbc(c = type[*cp++]));
44310898Sroland.mainz@nrubsig.org 					wasspace = 0;
44410898Sroland.mainz@nrubsig.org 					if(skip)
44510898Sroland.mainz@nrubsig.org 					{
44610898Sroland.mainz@nrubsig.org 						if(eol(c) && (cp > endbuff))
44710898Sroland.mainz@nrubsig.org 							goto eob;
44810898Sroland.mainz@nrubsig.org 				err:
44910898Sroland.mainz@nrubsig.org 						skip = 0;
45010898Sroland.mainz@nrubsig.org 						state = 0;
45110898Sroland.mainz@nrubsig.org 						if(eline!=nlines && !(wp->mode & WC_QUIET))
45210898Sroland.mainz@nrubsig.org 							eline = invalid(file, nlines);
45310898Sroland.mainz@nrubsig.org 						while(mbc(c) && ((c|WC_ERR) || (c&7)==0))
45410898Sroland.mainz@nrubsig.org 							c=type[*cp++];
45510898Sroland.mainz@nrubsig.org 						if(eol(c) && (cp > endbuff))
45610898Sroland.mainz@nrubsig.org 						{
45710898Sroland.mainz@nrubsig.org 							c = WC_MB|WC_ERR;
45810898Sroland.mainz@nrubsig.org 							goto eob;
45910898Sroland.mainz@nrubsig.org 						}
46010898Sroland.mainz@nrubsig.org 						if(mbc(c))
46110898Sroland.mainz@nrubsig.org 							goto mbyte;
46210898Sroland.mainz@nrubsig.org 						else if(c&WC_SP)
46310898Sroland.mainz@nrubsig.org 							goto spaces;
4644887Schin 					}
46510898Sroland.mainz@nrubsig.org 					if(spc(c))
46610898Sroland.mainz@nrubsig.org 					{
46710898Sroland.mainz@nrubsig.org 						nwords++;
46810898Sroland.mainz@nrubsig.org 						continue;
46910898Sroland.mainz@nrubsig.org 					}
47010898Sroland.mainz@nrubsig.org 				}
4714887Schin 				/* skip over word characters */
47210898Sroland.mainz@nrubsig.org 				while(!(c = type[*cp++]));
47310898Sroland.mainz@nrubsig.org 				if(mbc(c))
47410898Sroland.mainz@nrubsig.org 					goto mbyte;
4754887Schin 				nwords++;
4764887Schin 			}
4774887Schin 		eob:
47810898Sroland.mainz@nrubsig.org 			lineoff = cp-start;
4794887Schin 			if((cp -= 2) >= buff)
48010898Sroland.mainz@nrubsig.org 				c = type[*cp];
4814887Schin 			else
48210898Sroland.mainz@nrubsig.org 				c = lasttype;
48310898Sroland.mainz@nrubsig.org 			lasttype = type[lastchar];
4844887Schin 			/* see if was in word */
4854887Schin 			if(!c && !lasttype)
4864887Schin 				nwords--;
4874887Schin 		}
48810898Sroland.mainz@nrubsig.org 		if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest)
48910898Sroland.mainz@nrubsig.org 			longest = (endbuff + 1 - start) - adjust - (lastchar == '\n');
49010898Sroland.mainz@nrubsig.org 		wp->longest = longest;
49110898Sroland.mainz@nrubsig.org 		if (eol(lasttype))
4924887Schin 			nlines++;
49310898Sroland.mainz@nrubsig.org 		else if (!lasttype)
4944887Schin 			nwords++;
495*12068SRoger.Faulkner@Oracle.COM 		if (wp->mode & WC_MBYTE)
496*12068SRoger.Faulkner@Oracle.COM 			nchars -= adjust;
497*12068SRoger.Faulkner@Oracle.COM 		else
498*12068SRoger.Faulkner@Oracle.COM 			nchars = nbytes;
4994887Schin 	}
5004887Schin 	wp->chars = nchars;
5014887Schin 	wp->words = nwords;
5024887Schin 	wp->lines = nlines;
50310898Sroland.mainz@nrubsig.org 	return 0;
5044887Schin }
50510898Sroland.mainz@nrubsig.org 
506