xref: /onnv-gate/usr/src/lib/libcmd/common/wclib.c (revision 10898)
14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*10898Sroland.mainz@nrubsig.org *          Copyright (c) 1992-2009 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
78462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                  David Korn <dgk@research.att.com>                   *
194887Schin *                                                                      *
204887Schin ***********************************************************************/
214887Schin #pragma prototyped
224887Schin /*
234887Schin  * David Korn
244887Schin  * AT&T Bell Laboratories
254887Schin  *
264887Schin  * library interface for word count
274887Schin  */
284887Schin 
294887Schin #include <cmd.h>
304887Schin #include <wc.h>
314887Schin #include <ctype.h>
324887Schin 
338462SApril.Chin@Sun.COM #if _hdr_wchar && _hdr_wctype && _lib_iswctype
344887Schin 
354887Schin #include <wchar.h>
364887Schin #include <wctype.h>
37*10898Sroland.mainz@nrubsig.org #include <lc.h>
384887Schin 
394887Schin #else
404887Schin 
414887Schin #ifndef iswspace
424887Schin #define iswspace(x)	isspace(x)
434887Schin #endif
444887Schin 
454887Schin #endif
464887Schin 
47*10898Sroland.mainz@nrubsig.org #define	WC_SP		0x08
48*10898Sroland.mainz@nrubsig.org #define	WC_NL		0x10
49*10898Sroland.mainz@nrubsig.org #define	WC_MB		0x20
50*10898Sroland.mainz@nrubsig.org #define	WC_ERR		0x40
514887Schin 
52*10898Sroland.mainz@nrubsig.org #define eol(c)		((c)&WC_NL)
53*10898Sroland.mainz@nrubsig.org #define mbc(c)		((c)&WC_MB)
54*10898Sroland.mainz@nrubsig.org #define spc(c)		((c)&WC_SP)
55*10898Sroland.mainz@nrubsig.org #define mbwc(w,p,n)	(*ast.mb_towc)(&w,(char*)p,n)
56*10898Sroland.mainz@nrubsig.org 
57*10898Sroland.mainz@nrubsig.org Wc_t* wc_init(int mode)
584887Schin {
594887Schin 	register int	n;
604887Schin 	register int	w;
614887Schin 	Wc_t*		wp;
624887Schin 
63*10898Sroland.mainz@nrubsig.org 	if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
64*10898Sroland.mainz@nrubsig.org 		return 0;
65*10898Sroland.mainz@nrubsig.org 	if (!mbwide())
66*10898Sroland.mainz@nrubsig.org 		wp->mb = 0;
67*10898Sroland.mainz@nrubsig.org #if _hdr_wchar && _hdr_wctype && _lib_iswctype
68*10898Sroland.mainz@nrubsig.org 	else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8))
69*10898Sroland.mainz@nrubsig.org 		wp->mb = 1;
70*10898Sroland.mainz@nrubsig.org #endif
71*10898Sroland.mainz@nrubsig.org 	else
72*10898Sroland.mainz@nrubsig.org 		wp->mb = -1;
734887Schin 	w = mode & WC_WORDS;
74*10898Sroland.mainz@nrubsig.org 	for (n = (1<<CHAR_BIT); --n >= 0;)
75*10898Sroland.mainz@nrubsig.org 		wp->type[n] = (w && isspace(n)) ? WC_SP : 0;
76*10898Sroland.mainz@nrubsig.org 	wp->type['\n'] = WC_SP|WC_NL;
77*10898Sroland.mainz@nrubsig.org 	if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0)
78*10898Sroland.mainz@nrubsig.org 	{
79*10898Sroland.mainz@nrubsig.org 		for (n = 0; n < 64; n++)
80*10898Sroland.mainz@nrubsig.org 		{
81*10898Sroland.mainz@nrubsig.org 			wp->type[0x80+n] |= WC_MB;
82*10898Sroland.mainz@nrubsig.org 			if (n<32)
83*10898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+1;
84*10898Sroland.mainz@nrubsig.org 			else if (n<48)
85*10898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+2;
86*10898Sroland.mainz@nrubsig.org 			else if (n<56)
87*10898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+3;
88*10898Sroland.mainz@nrubsig.org 			else if (n<60)
89*10898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+4;
90*10898Sroland.mainz@nrubsig.org 			else if (n<62)
91*10898Sroland.mainz@nrubsig.org 				wp->type[0xc0+n] |= WC_MB+5;
92*10898Sroland.mainz@nrubsig.org 		}
93*10898Sroland.mainz@nrubsig.org 		wp->type[0xc0] = WC_MB|WC_ERR;
94*10898Sroland.mainz@nrubsig.org 		wp->type[0xc1] = WC_MB|WC_ERR;
95*10898Sroland.mainz@nrubsig.org 		wp->type[0xfe] = WC_MB|WC_ERR;
96*10898Sroland.mainz@nrubsig.org 		wp->type[0xff] = WC_MB|WC_ERR;
97*10898Sroland.mainz@nrubsig.org 	}
98*10898Sroland.mainz@nrubsig.org 	wp->mode = mode;
99*10898Sroland.mainz@nrubsig.org 	return wp;
100*10898Sroland.mainz@nrubsig.org }
101*10898Sroland.mainz@nrubsig.org 
102*10898Sroland.mainz@nrubsig.org static int invalid(const char *file, int nlines)
103*10898Sroland.mainz@nrubsig.org {
104*10898Sroland.mainz@nrubsig.org 	error_info.file = (char*)file;
105*10898Sroland.mainz@nrubsig.org 	error_info.line = nlines;
106*10898Sroland.mainz@nrubsig.org 	error(ERROR_SYSTEM|1, "invalid multibyte character");
107*10898Sroland.mainz@nrubsig.org 	error_info.file = 0;
108*10898Sroland.mainz@nrubsig.org 	error_info.line = 0;
109*10898Sroland.mainz@nrubsig.org 	return nlines;
110*10898Sroland.mainz@nrubsig.org }
111*10898Sroland.mainz@nrubsig.org 
112*10898Sroland.mainz@nrubsig.org /*
113*10898Sroland.mainz@nrubsig.org  * handle utf space characters
114*10898Sroland.mainz@nrubsig.org  */
115*10898Sroland.mainz@nrubsig.org 
116*10898Sroland.mainz@nrubsig.org static int chkstate(int state, register unsigned int c)
117*10898Sroland.mainz@nrubsig.org {
118*10898Sroland.mainz@nrubsig.org 	switch(state)
119*10898Sroland.mainz@nrubsig.org 	{
120*10898Sroland.mainz@nrubsig.org 	case 1:
121*10898Sroland.mainz@nrubsig.org 		state = (c==0x9a?4:0);
122*10898Sroland.mainz@nrubsig.org 		break;
123*10898Sroland.mainz@nrubsig.org 	case 2:
124*10898Sroland.mainz@nrubsig.org 		state = ((c==0x80||c==0x81)?6+(c&1):0);
125*10898Sroland.mainz@nrubsig.org 		break;
126*10898Sroland.mainz@nrubsig.org 	case 3:
127*10898Sroland.mainz@nrubsig.org 		state = (c==0x80?5:0);
128*10898Sroland.mainz@nrubsig.org 		break;
129*10898Sroland.mainz@nrubsig.org 	case 4:
130*10898Sroland.mainz@nrubsig.org 		state = (c==0x80?10:0);
131*10898Sroland.mainz@nrubsig.org 		break;
132*10898Sroland.mainz@nrubsig.org 	case 5:
133*10898Sroland.mainz@nrubsig.org 		state = (c==0x80?10:0);
134*10898Sroland.mainz@nrubsig.org 		break;
135*10898Sroland.mainz@nrubsig.org 	case 6:
136*10898Sroland.mainz@nrubsig.org 		state = 0;
137*10898Sroland.mainz@nrubsig.org 		if(c==0xa0 || c==0xa1)
138*10898Sroland.mainz@nrubsig.org 			return(10);
139*10898Sroland.mainz@nrubsig.org 		else if((c&0xf0)== 0x80)
140*10898Sroland.mainz@nrubsig.org 		{
141*10898Sroland.mainz@nrubsig.org 			if((c&=0xf)==7)
142*10898Sroland.mainz@nrubsig.org 				return(iswspace(0x2007)?10:0);
143*10898Sroland.mainz@nrubsig.org 			if(c<=0xb)
144*10898Sroland.mainz@nrubsig.org 				return(10);
145*10898Sroland.mainz@nrubsig.org 		}
146*10898Sroland.mainz@nrubsig.org 		else if(c==0xaf && iswspace(0x202f))
147*10898Sroland.mainz@nrubsig.org 			return(10);
148*10898Sroland.mainz@nrubsig.org 		break;
149*10898Sroland.mainz@nrubsig.org 	case 7:
150*10898Sroland.mainz@nrubsig.org 		state = (c==0x9f?10:0);
151*10898Sroland.mainz@nrubsig.org 		break;
152*10898Sroland.mainz@nrubsig.org 	case 8:
153*10898Sroland.mainz@nrubsig.org 		return (iswspace(c)?10:0);
154*10898Sroland.mainz@nrubsig.org 	}
155*10898Sroland.mainz@nrubsig.org 	return state;
1564887Schin }
1574887Schin 
1584887Schin /*
1594887Schin  * compute the line, word, and character count for file <fd>
1604887Schin  */
161*10898Sroland.mainz@nrubsig.org 
1624887Schin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
1634887Schin {
164*10898Sroland.mainz@nrubsig.org 	register char*		type = wp->type;
165*10898Sroland.mainz@nrubsig.org 	register unsigned char*	cp;
1664887Schin 	register Sfoff_t	nchars;
1674887Schin 	register Sfoff_t	nwords;
1684887Schin 	register Sfoff_t	nlines;
169*10898Sroland.mainz@nrubsig.org 	register Sfoff_t	eline = -1;
170*10898Sroland.mainz@nrubsig.org 	register Sfoff_t	longest = 0;
1714887Schin 	register ssize_t	c;
172*10898Sroland.mainz@nrubsig.org 	register unsigned char*	endbuff;
173*10898Sroland.mainz@nrubsig.org 	register int		lasttype = WC_SP;
1744887Schin 	unsigned int		lastchar;
175*10898Sroland.mainz@nrubsig.org 	ssize_t			n;
176*10898Sroland.mainz@nrubsig.org 	ssize_t			o;
177*10898Sroland.mainz@nrubsig.org 	unsigned char*		buff;
1784887Schin 	wchar_t			x;
179*10898Sroland.mainz@nrubsig.org 	unsigned char		side[32];
1804887Schin 
1814887Schin 	sfset(fd,SF_WRITE,1);
1824887Schin 	nlines = nwords = nchars = 0;
1834887Schin 	wp->longest = 0;
184*10898Sroland.mainz@nrubsig.org 	if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS)))
1854887Schin 	{
1864887Schin 		cp = buff = endbuff = 0;
1874887Schin 		for (;;)
1884887Schin 		{
189*10898Sroland.mainz@nrubsig.org 			if (cp >= endbuff || (n = mbwc(x, cp, endbuff-cp)) < 0)
1904887Schin 			{
191*10898Sroland.mainz@nrubsig.org 				if ((o = endbuff-cp) < sizeof(side))
192*10898Sroland.mainz@nrubsig.org 				{
193*10898Sroland.mainz@nrubsig.org 					if (buff)
194*10898Sroland.mainz@nrubsig.org 					{
195*10898Sroland.mainz@nrubsig.org 						if (o)
196*10898Sroland.mainz@nrubsig.org 							memcpy(side, cp, o);
197*10898Sroland.mainz@nrubsig.org 						mbinit();
198*10898Sroland.mainz@nrubsig.org 					}
199*10898Sroland.mainz@nrubsig.org 					else
200*10898Sroland.mainz@nrubsig.org 						o = 0;
201*10898Sroland.mainz@nrubsig.org 					cp = side + o;
202*10898Sroland.mainz@nrubsig.org 					if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0)
203*10898Sroland.mainz@nrubsig.org 					{
204*10898Sroland.mainz@nrubsig.org 						if ((nchars - longest) > wp->longest)
205*10898Sroland.mainz@nrubsig.org 							wp->longest = nchars - longest;
206*10898Sroland.mainz@nrubsig.org 						break;
207*10898Sroland.mainz@nrubsig.org 					}
208*10898Sroland.mainz@nrubsig.org 					if ((c = sizeof(side) - o) > n)
209*10898Sroland.mainz@nrubsig.org 						c = n;
210*10898Sroland.mainz@nrubsig.org 					if (c)
211*10898Sroland.mainz@nrubsig.org 						memcpy(cp, buff, c);
212*10898Sroland.mainz@nrubsig.org 					endbuff = buff + n;
213*10898Sroland.mainz@nrubsig.org 					cp = side;
214*10898Sroland.mainz@nrubsig.org 					x = mbchar(cp);
215*10898Sroland.mainz@nrubsig.org 					if ((cp-side) < o)
216*10898Sroland.mainz@nrubsig.org 					{
217*10898Sroland.mainz@nrubsig.org 						cp = buff;
218*10898Sroland.mainz@nrubsig.org 						nchars += (cp-side) - 1;
219*10898Sroland.mainz@nrubsig.org 					}
220*10898Sroland.mainz@nrubsig.org 					else
221*10898Sroland.mainz@nrubsig.org 						cp = buff + (cp-side) - o;
222*10898Sroland.mainz@nrubsig.org 				}
223*10898Sroland.mainz@nrubsig.org 				else
224*10898Sroland.mainz@nrubsig.org 				{
225*10898Sroland.mainz@nrubsig.org 					cp++;
226*10898Sroland.mainz@nrubsig.org 					x = -1;
227*10898Sroland.mainz@nrubsig.org 				}
228*10898Sroland.mainz@nrubsig.org 				if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET))
229*10898Sroland.mainz@nrubsig.org 					eline = invalid(file, nlines);
2304887Schin 			}
231*10898Sroland.mainz@nrubsig.org 			else
232*10898Sroland.mainz@nrubsig.org 				cp += n ? n : 1;
233*10898Sroland.mainz@nrubsig.org 			if (x == '\n')
2344887Schin 			{
2354887Schin 				if ((nchars - longest) > wp->longest)
2364887Schin 					wp->longest = nchars - longest;
237*10898Sroland.mainz@nrubsig.org 				longest = nchars + 1;
2384887Schin 				nlines++;
2394887Schin 				lasttype = 1;
2404887Schin 			}
2414887Schin 			else if (iswspace(x))
2424887Schin 				lasttype = 1;
2434887Schin 			else if (lasttype)
2444887Schin 			{
2454887Schin 				lasttype = 0;
2464887Schin 				nwords++;
2474887Schin 			}
248*10898Sroland.mainz@nrubsig.org 			nchars++;
249*10898Sroland.mainz@nrubsig.org 		}
250*10898Sroland.mainz@nrubsig.org 	}
251*10898Sroland.mainz@nrubsig.org 	else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
252*10898Sroland.mainz@nrubsig.org 	{
253*10898Sroland.mainz@nrubsig.org 		if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
254*10898Sroland.mainz@nrubsig.org 		{
255*10898Sroland.mainz@nrubsig.org 			while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
256*10898Sroland.mainz@nrubsig.org 			{
257*10898Sroland.mainz@nrubsig.org 				nchars += c;
258*10898Sroland.mainz@nrubsig.org 				endbuff = cp + c;
259*10898Sroland.mainz@nrubsig.org 				if (*--endbuff == '\n')
260*10898Sroland.mainz@nrubsig.org 					nlines++;
261*10898Sroland.mainz@nrubsig.org 				else
262*10898Sroland.mainz@nrubsig.org 					*endbuff = '\n';
263*10898Sroland.mainz@nrubsig.org 				for (;;)
264*10898Sroland.mainz@nrubsig.org 					if (*cp++ == '\n')
265*10898Sroland.mainz@nrubsig.org 					{
266*10898Sroland.mainz@nrubsig.org 						if (cp > endbuff)
267*10898Sroland.mainz@nrubsig.org 							break;
268*10898Sroland.mainz@nrubsig.org 						nlines++;
269*10898Sroland.mainz@nrubsig.org 					}
270*10898Sroland.mainz@nrubsig.org 			}
271*10898Sroland.mainz@nrubsig.org 		}
272*10898Sroland.mainz@nrubsig.org 		else
273*10898Sroland.mainz@nrubsig.org 		{
274*10898Sroland.mainz@nrubsig.org 			while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
275*10898Sroland.mainz@nrubsig.org 			{
276*10898Sroland.mainz@nrubsig.org 				nchars += c;
277*10898Sroland.mainz@nrubsig.org 				/* check to see whether first character terminates word */
278*10898Sroland.mainz@nrubsig.org 				if (c==1)
279*10898Sroland.mainz@nrubsig.org 				{
280*10898Sroland.mainz@nrubsig.org 					if (eol(lasttype))
281*10898Sroland.mainz@nrubsig.org 						nlines++;
282*10898Sroland.mainz@nrubsig.org 					if ((c = type[*cp]) && !lasttype)
283*10898Sroland.mainz@nrubsig.org 						nwords++;
284*10898Sroland.mainz@nrubsig.org 					lasttype = c;
285*10898Sroland.mainz@nrubsig.org 					continue;
286*10898Sroland.mainz@nrubsig.org 				}
287*10898Sroland.mainz@nrubsig.org 				if (!lasttype && type[*cp])
288*10898Sroland.mainz@nrubsig.org 					nwords++;
289*10898Sroland.mainz@nrubsig.org 				lastchar = cp[--c];
290*10898Sroland.mainz@nrubsig.org 				*(endbuff = cp+c) = '\n';
291*10898Sroland.mainz@nrubsig.org 				c = lasttype;
292*10898Sroland.mainz@nrubsig.org 				/* process each buffer */
293*10898Sroland.mainz@nrubsig.org 				for (;;)
294*10898Sroland.mainz@nrubsig.org 				{
295*10898Sroland.mainz@nrubsig.org 					/* process spaces and new-lines */
296*10898Sroland.mainz@nrubsig.org 					do
297*10898Sroland.mainz@nrubsig.org 					{
298*10898Sroland.mainz@nrubsig.org 						if (eol(c))
299*10898Sroland.mainz@nrubsig.org 							for (;;)
300*10898Sroland.mainz@nrubsig.org 							{
301*10898Sroland.mainz@nrubsig.org 								/* check for end of buffer */
302*10898Sroland.mainz@nrubsig.org 								if (cp > endbuff)
303*10898Sroland.mainz@nrubsig.org 									goto beob;
304*10898Sroland.mainz@nrubsig.org 								nlines++;
305*10898Sroland.mainz@nrubsig.org 								if (*cp != '\n')
306*10898Sroland.mainz@nrubsig.org 									break;
307*10898Sroland.mainz@nrubsig.org 								cp++;
308*10898Sroland.mainz@nrubsig.org 							}
309*10898Sroland.mainz@nrubsig.org 					} while (c = type[*cp++]);
310*10898Sroland.mainz@nrubsig.org 					/* skip over word characters */
311*10898Sroland.mainz@nrubsig.org 					while (!(c = type[*cp++]));
312*10898Sroland.mainz@nrubsig.org 					nwords++;
313*10898Sroland.mainz@nrubsig.org 				}
314*10898Sroland.mainz@nrubsig.org 			beob:
315*10898Sroland.mainz@nrubsig.org 				if ((cp -= 2) >= buff)
316*10898Sroland.mainz@nrubsig.org 					c = type[*cp];
317*10898Sroland.mainz@nrubsig.org 				else
318*10898Sroland.mainz@nrubsig.org 					c = lasttype;
319*10898Sroland.mainz@nrubsig.org 				lasttype = type[lastchar];
320*10898Sroland.mainz@nrubsig.org 				/* see if was in word */
321*10898Sroland.mainz@nrubsig.org 				if (!c && !lasttype)
322*10898Sroland.mainz@nrubsig.org 					nwords--;
323*10898Sroland.mainz@nrubsig.org 			}
324*10898Sroland.mainz@nrubsig.org 			if (eol(lasttype))
325*10898Sroland.mainz@nrubsig.org 				nlines++;
326*10898Sroland.mainz@nrubsig.org 			else if (!lasttype)
327*10898Sroland.mainz@nrubsig.org 				nwords++;
3284887Schin 		}
3294887Schin 	}
3304887Schin 	else
3314887Schin 	{
332*10898Sroland.mainz@nrubsig.org 		int		lineoff=0;
333*10898Sroland.mainz@nrubsig.org 		int		skip=0;
334*10898Sroland.mainz@nrubsig.org 		int		adjust=0;
335*10898Sroland.mainz@nrubsig.org 		int		state=0;
336*10898Sroland.mainz@nrubsig.org 		int		oldc;
337*10898Sroland.mainz@nrubsig.org 		int		xspace;
338*10898Sroland.mainz@nrubsig.org 		int		wasspace = 1;
339*10898Sroland.mainz@nrubsig.org 		unsigned char*	start;
340*10898Sroland.mainz@nrubsig.org 
341*10898Sroland.mainz@nrubsig.org 		lastchar = 0;
342*10898Sroland.mainz@nrubsig.org 		start = (endbuff = side) + 1;
343*10898Sroland.mainz@nrubsig.org 		xspace = iswspace(0xa0) || iswspace(0x85);
344*10898Sroland.mainz@nrubsig.org 		while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
3454887Schin 		{
3464887Schin 			nchars += c;
347*10898Sroland.mainz@nrubsig.org 			start = cp-lineoff;
3484887Schin 			/* check to see whether first character terminates word */
3494887Schin 			if(c==1)
3504887Schin 			{
351*10898Sroland.mainz@nrubsig.org 				if(eol(lasttype))
3524887Schin 					nlines++;
353*10898Sroland.mainz@nrubsig.org 				if((c = type[*cp]) && !lasttype)
3544887Schin 					nwords++;
3554887Schin 				lasttype = c;
356*10898Sroland.mainz@nrubsig.org 				endbuff = start;
3574887Schin 				continue;
3584887Schin 			}
359*10898Sroland.mainz@nrubsig.org 			lastchar = cp[--c];
360*10898Sroland.mainz@nrubsig.org 			endbuff = cp+c;
361*10898Sroland.mainz@nrubsig.org 			cp[c] = '\n';
362*10898Sroland.mainz@nrubsig.org 			if(mbc(lasttype))
363*10898Sroland.mainz@nrubsig.org 			{
364*10898Sroland.mainz@nrubsig.org 				c = lasttype;
365*10898Sroland.mainz@nrubsig.org 				goto mbyte;
366*10898Sroland.mainz@nrubsig.org 			}
367*10898Sroland.mainz@nrubsig.org 			if(!lasttype && spc(type[*cp]))
3684887Schin 				nwords++;
3694887Schin 			c = lasttype;
3704887Schin 			/* process each buffer */
3714887Schin 			for (;;)
3724887Schin 			{
3734887Schin 				/* process spaces and new-lines */
374*10898Sroland.mainz@nrubsig.org 			spaces:
375*10898Sroland.mainz@nrubsig.org 				do
3764887Schin 				{
377*10898Sroland.mainz@nrubsig.org 					if (eol(c))
3784887Schin 					{
3794887Schin 						/* check for end of buffer */
3804887Schin 						if (cp > endbuff)
3814887Schin 							goto eob;
382*10898Sroland.mainz@nrubsig.org 						if(wp->mode&WC_LONGEST)
383*10898Sroland.mainz@nrubsig.org 						{
384*10898Sroland.mainz@nrubsig.org 							if((cp-start)-adjust > longest)
385*10898Sroland.mainz@nrubsig.org 								longest = (cp-start)-adjust-1;
386*10898Sroland.mainz@nrubsig.org 							start = cp;
387*10898Sroland.mainz@nrubsig.org 						}
3884887Schin 						nlines++;
389*10898Sroland.mainz@nrubsig.org 						nchars -= adjust;
390*10898Sroland.mainz@nrubsig.org 						adjust = 0;
391*10898Sroland.mainz@nrubsig.org 					}
392*10898Sroland.mainz@nrubsig.org 				} while (spc(c = type[*cp++]));
393*10898Sroland.mainz@nrubsig.org 				wasspace=1;
394*10898Sroland.mainz@nrubsig.org 				if(mbc(c))
395*10898Sroland.mainz@nrubsig.org 				{
396*10898Sroland.mainz@nrubsig.org 				mbyte:
397*10898Sroland.mainz@nrubsig.org 					do
398*10898Sroland.mainz@nrubsig.org 					{
399*10898Sroland.mainz@nrubsig.org 						if(c&WC_ERR)
400*10898Sroland.mainz@nrubsig.org 							goto err;
401*10898Sroland.mainz@nrubsig.org 						if(skip && (c&7))
4024887Schin 							break;
403*10898Sroland.mainz@nrubsig.org 						if(!skip)
404*10898Sroland.mainz@nrubsig.org 						{
405*10898Sroland.mainz@nrubsig.org 							if(!(c&7))
406*10898Sroland.mainz@nrubsig.org 							{
407*10898Sroland.mainz@nrubsig.org 								skip=1;
408*10898Sroland.mainz@nrubsig.org 								break;
409*10898Sroland.mainz@nrubsig.org 							}
410*10898Sroland.mainz@nrubsig.org 							skip = (c&7);
411*10898Sroland.mainz@nrubsig.org 							adjust += skip;
412*10898Sroland.mainz@nrubsig.org 							state = 0;
413*10898Sroland.mainz@nrubsig.org 							if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3)))
414*10898Sroland.mainz@nrubsig.org 								oldc = *cp;
415*10898Sroland.mainz@nrubsig.org 							else if(xspace && cp[-1]==0xc2)
416*10898Sroland.mainz@nrubsig.org 							{
417*10898Sroland.mainz@nrubsig.org 								state = 8;
418*10898Sroland.mainz@nrubsig.org 								oldc = *cp;
419*10898Sroland.mainz@nrubsig.org 							}
420*10898Sroland.mainz@nrubsig.org 						}
421*10898Sroland.mainz@nrubsig.org 						else
422*10898Sroland.mainz@nrubsig.org 						{
423*10898Sroland.mainz@nrubsig.org 							skip--;
424*10898Sroland.mainz@nrubsig.org 							if(state && (state=chkstate(state,oldc)))
425*10898Sroland.mainz@nrubsig.org 							{
426*10898Sroland.mainz@nrubsig.org 								if(state==10)
427*10898Sroland.mainz@nrubsig.org 								{
428*10898Sroland.mainz@nrubsig.org 									if(!wasspace)
429*10898Sroland.mainz@nrubsig.org 										nwords++;
430*10898Sroland.mainz@nrubsig.org 									wasspace = 1;
431*10898Sroland.mainz@nrubsig.org 									state=0;
432*10898Sroland.mainz@nrubsig.org 									goto spaces;
433*10898Sroland.mainz@nrubsig.org 								}
434*10898Sroland.mainz@nrubsig.org 								oldc = *cp;
435*10898Sroland.mainz@nrubsig.org 							}
436*10898Sroland.mainz@nrubsig.org 						}
437*10898Sroland.mainz@nrubsig.org 					} while (mbc(c = type[*cp++]));
438*10898Sroland.mainz@nrubsig.org 					wasspace = 0;
439*10898Sroland.mainz@nrubsig.org 					if(skip)
440*10898Sroland.mainz@nrubsig.org 					{
441*10898Sroland.mainz@nrubsig.org 						if(eol(c) && (cp > endbuff))
442*10898Sroland.mainz@nrubsig.org 							goto eob;
443*10898Sroland.mainz@nrubsig.org 				err:
444*10898Sroland.mainz@nrubsig.org 						skip = 0;
445*10898Sroland.mainz@nrubsig.org 						state = 0;
446*10898Sroland.mainz@nrubsig.org 						if(eline!=nlines && !(wp->mode & WC_QUIET))
447*10898Sroland.mainz@nrubsig.org 							eline = invalid(file, nlines);
448*10898Sroland.mainz@nrubsig.org 						while(mbc(c) && ((c|WC_ERR) || (c&7)==0))
449*10898Sroland.mainz@nrubsig.org 							c=type[*cp++];
450*10898Sroland.mainz@nrubsig.org 						if(eol(c) && (cp > endbuff))
451*10898Sroland.mainz@nrubsig.org 						{
452*10898Sroland.mainz@nrubsig.org 							c = WC_MB|WC_ERR;
453*10898Sroland.mainz@nrubsig.org 							goto eob;
454*10898Sroland.mainz@nrubsig.org 						}
455*10898Sroland.mainz@nrubsig.org 						if(mbc(c))
456*10898Sroland.mainz@nrubsig.org 							goto mbyte;
457*10898Sroland.mainz@nrubsig.org 						else if(c&WC_SP)
458*10898Sroland.mainz@nrubsig.org 							goto spaces;
4594887Schin 					}
460*10898Sroland.mainz@nrubsig.org 					if(spc(c))
461*10898Sroland.mainz@nrubsig.org 					{
462*10898Sroland.mainz@nrubsig.org 						nwords++;
463*10898Sroland.mainz@nrubsig.org 						continue;
464*10898Sroland.mainz@nrubsig.org 					}
465*10898Sroland.mainz@nrubsig.org 				}
4664887Schin 				/* skip over word characters */
467*10898Sroland.mainz@nrubsig.org 				while(!(c = type[*cp++]));
468*10898Sroland.mainz@nrubsig.org 				if(mbc(c))
469*10898Sroland.mainz@nrubsig.org 					goto mbyte;
4704887Schin 				nwords++;
4714887Schin 			}
4724887Schin 		eob:
473*10898Sroland.mainz@nrubsig.org 			lineoff = cp-start;
4744887Schin 			if((cp -= 2) >= buff)
475*10898Sroland.mainz@nrubsig.org 				c = type[*cp];
4764887Schin 			else
477*10898Sroland.mainz@nrubsig.org 				c = lasttype;
478*10898Sroland.mainz@nrubsig.org 			lasttype = type[lastchar];
4794887Schin 			/* see if was in word */
4804887Schin 			if(!c && !lasttype)
4814887Schin 				nwords--;
4824887Schin 		}
483*10898Sroland.mainz@nrubsig.org 		if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest)
484*10898Sroland.mainz@nrubsig.org 			longest = (endbuff + 1 - start) - adjust - (lastchar == '\n');
485*10898Sroland.mainz@nrubsig.org 		wp->longest = longest;
486*10898Sroland.mainz@nrubsig.org 		if (eol(lasttype))
4874887Schin 			nlines++;
488*10898Sroland.mainz@nrubsig.org 		else if (!lasttype)
4894887Schin 			nwords++;
490*10898Sroland.mainz@nrubsig.org 		nchars -= adjust;
4914887Schin 	}
4924887Schin 	wp->chars = nchars;
4934887Schin 	wp->words = nwords;
4944887Schin 	wp->lines = nlines;
495*10898Sroland.mainz@nrubsig.org 	return 0;
4964887Schin }
497*10898Sroland.mainz@nrubsig.org 
498