14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1992-2010 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * David Korn <dgk@research.att.com> *
194887Schin * *
204887Schin ***********************************************************************/
214887Schin #pragma prototyped
224887Schin /*
234887Schin * David Korn
244887Schin * AT&T Bell Laboratories
254887Schin *
264887Schin * library interface for word count
274887Schin */
284887Schin
294887Schin #include <cmd.h>
304887Schin #include <wc.h>
314887Schin #include <ctype.h>
324887Schin
338462SApril.Chin@Sun.COM #if _hdr_wchar && _hdr_wctype && _lib_iswctype
344887Schin
354887Schin #include <wchar.h>
364887Schin #include <wctype.h>
3710898Sroland.mainz@nrubsig.org #include <lc.h>
384887Schin
394887Schin #else
404887Schin
414887Schin #ifndef iswspace
424887Schin #define iswspace(x) isspace(x)
434887Schin #endif
444887Schin
454887Schin #endif
464887Schin
4710898Sroland.mainz@nrubsig.org #define WC_SP 0x08
4810898Sroland.mainz@nrubsig.org #define WC_NL 0x10
4910898Sroland.mainz@nrubsig.org #define WC_MB 0x20
5010898Sroland.mainz@nrubsig.org #define WC_ERR 0x40
514887Schin
5210898Sroland.mainz@nrubsig.org #define eol(c) ((c)&WC_NL)
5310898Sroland.mainz@nrubsig.org #define mbc(c) ((c)&WC_MB)
5410898Sroland.mainz@nrubsig.org #define spc(c) ((c)&WC_SP)
55*12068SRoger.Faulkner@Oracle.COM #define mb2wc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n)
5610898Sroland.mainz@nrubsig.org
wc_init(int mode)5710898Sroland.mainz@nrubsig.org Wc_t* wc_init(int mode)
584887Schin {
594887Schin register int n;
604887Schin register int w;
614887Schin Wc_t* wp;
624887Schin
6310898Sroland.mainz@nrubsig.org if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
6410898Sroland.mainz@nrubsig.org return 0;
6510898Sroland.mainz@nrubsig.org if (!mbwide())
6610898Sroland.mainz@nrubsig.org wp->mb = 0;
6710898Sroland.mainz@nrubsig.org #if _hdr_wchar && _hdr_wctype && _lib_iswctype
6810898Sroland.mainz@nrubsig.org else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8))
6910898Sroland.mainz@nrubsig.org wp->mb = 1;
7010898Sroland.mainz@nrubsig.org #endif
7110898Sroland.mainz@nrubsig.org else
7210898Sroland.mainz@nrubsig.org wp->mb = -1;
734887Schin w = mode & WC_WORDS;
7410898Sroland.mainz@nrubsig.org for (n = (1<<CHAR_BIT); --n >= 0;)
7510898Sroland.mainz@nrubsig.org wp->type[n] = (w && isspace(n)) ? WC_SP : 0;
7610898Sroland.mainz@nrubsig.org wp->type['\n'] = WC_SP|WC_NL;
7710898Sroland.mainz@nrubsig.org if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0)
7810898Sroland.mainz@nrubsig.org {
7910898Sroland.mainz@nrubsig.org for (n = 0; n < 64; n++)
8010898Sroland.mainz@nrubsig.org {
8110898Sroland.mainz@nrubsig.org wp->type[0x80+n] |= WC_MB;
8210898Sroland.mainz@nrubsig.org if (n<32)
8310898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+1;
8410898Sroland.mainz@nrubsig.org else if (n<48)
8510898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+2;
8610898Sroland.mainz@nrubsig.org else if (n<56)
8710898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+3;
8810898Sroland.mainz@nrubsig.org else if (n<60)
8910898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+4;
9010898Sroland.mainz@nrubsig.org else if (n<62)
9110898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+5;
9210898Sroland.mainz@nrubsig.org }
9310898Sroland.mainz@nrubsig.org wp->type[0xc0] = WC_MB|WC_ERR;
9410898Sroland.mainz@nrubsig.org wp->type[0xc1] = WC_MB|WC_ERR;
9510898Sroland.mainz@nrubsig.org wp->type[0xfe] = WC_MB|WC_ERR;
9610898Sroland.mainz@nrubsig.org wp->type[0xff] = WC_MB|WC_ERR;
9710898Sroland.mainz@nrubsig.org }
9810898Sroland.mainz@nrubsig.org wp->mode = mode;
9910898Sroland.mainz@nrubsig.org return wp;
10010898Sroland.mainz@nrubsig.org }
10110898Sroland.mainz@nrubsig.org
invalid(const char * file,int nlines)10210898Sroland.mainz@nrubsig.org static int invalid(const char *file, int nlines)
10310898Sroland.mainz@nrubsig.org {
10410898Sroland.mainz@nrubsig.org error_info.file = (char*)file;
10510898Sroland.mainz@nrubsig.org error_info.line = nlines;
10610898Sroland.mainz@nrubsig.org error(ERROR_SYSTEM|1, "invalid multibyte character");
10710898Sroland.mainz@nrubsig.org error_info.file = 0;
10810898Sroland.mainz@nrubsig.org error_info.line = 0;
10910898Sroland.mainz@nrubsig.org return nlines;
11010898Sroland.mainz@nrubsig.org }
11110898Sroland.mainz@nrubsig.org
11210898Sroland.mainz@nrubsig.org /*
11310898Sroland.mainz@nrubsig.org * handle utf space characters
11410898Sroland.mainz@nrubsig.org */
11510898Sroland.mainz@nrubsig.org
chkstate(int state,register unsigned int c)11610898Sroland.mainz@nrubsig.org static int chkstate(int state, register unsigned int c)
11710898Sroland.mainz@nrubsig.org {
11810898Sroland.mainz@nrubsig.org switch(state)
11910898Sroland.mainz@nrubsig.org {
12010898Sroland.mainz@nrubsig.org case 1:
12110898Sroland.mainz@nrubsig.org state = (c==0x9a?4:0);
12210898Sroland.mainz@nrubsig.org break;
12310898Sroland.mainz@nrubsig.org case 2:
12410898Sroland.mainz@nrubsig.org state = ((c==0x80||c==0x81)?6+(c&1):0);
12510898Sroland.mainz@nrubsig.org break;
12610898Sroland.mainz@nrubsig.org case 3:
12710898Sroland.mainz@nrubsig.org state = (c==0x80?5:0);
12810898Sroland.mainz@nrubsig.org break;
12910898Sroland.mainz@nrubsig.org case 4:
13010898Sroland.mainz@nrubsig.org state = (c==0x80?10:0);
13110898Sroland.mainz@nrubsig.org break;
13210898Sroland.mainz@nrubsig.org case 5:
13310898Sroland.mainz@nrubsig.org state = (c==0x80?10:0);
13410898Sroland.mainz@nrubsig.org break;
13510898Sroland.mainz@nrubsig.org case 6:
13610898Sroland.mainz@nrubsig.org state = 0;
13710898Sroland.mainz@nrubsig.org if(c==0xa0 || c==0xa1)
13810898Sroland.mainz@nrubsig.org return(10);
13910898Sroland.mainz@nrubsig.org else if((c&0xf0)== 0x80)
14010898Sroland.mainz@nrubsig.org {
14110898Sroland.mainz@nrubsig.org if((c&=0xf)==7)
14210898Sroland.mainz@nrubsig.org return(iswspace(0x2007)?10:0);
14310898Sroland.mainz@nrubsig.org if(c<=0xb)
14410898Sroland.mainz@nrubsig.org return(10);
14510898Sroland.mainz@nrubsig.org }
14610898Sroland.mainz@nrubsig.org else if(c==0xaf && iswspace(0x202f))
14710898Sroland.mainz@nrubsig.org return(10);
14810898Sroland.mainz@nrubsig.org break;
14910898Sroland.mainz@nrubsig.org case 7:
15010898Sroland.mainz@nrubsig.org state = (c==0x9f?10:0);
15110898Sroland.mainz@nrubsig.org break;
15210898Sroland.mainz@nrubsig.org case 8:
15310898Sroland.mainz@nrubsig.org return (iswspace(c)?10:0);
15410898Sroland.mainz@nrubsig.org }
15510898Sroland.mainz@nrubsig.org return state;
1564887Schin }
1574887Schin
1584887Schin /*
1594887Schin * compute the line, word, and character count for file <fd>
1604887Schin */
16110898Sroland.mainz@nrubsig.org
wc_count(Wc_t * wp,Sfio_t * fd,const char * file)1624887Schin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
1634887Schin {
16410898Sroland.mainz@nrubsig.org register char* type = wp->type;
16510898Sroland.mainz@nrubsig.org register unsigned char* cp;
166*12068SRoger.Faulkner@Oracle.COM register Sfoff_t nbytes;
1674887Schin register Sfoff_t nchars;
1684887Schin register Sfoff_t nwords;
1694887Schin register Sfoff_t nlines;
17010898Sroland.mainz@nrubsig.org register Sfoff_t eline = -1;
17110898Sroland.mainz@nrubsig.org register Sfoff_t longest = 0;
1724887Schin register ssize_t c;
17310898Sroland.mainz@nrubsig.org register unsigned char* endbuff;
17410898Sroland.mainz@nrubsig.org register int lasttype = WC_SP;
1754887Schin unsigned int lastchar;
17610898Sroland.mainz@nrubsig.org ssize_t n;
17710898Sroland.mainz@nrubsig.org ssize_t o;
17810898Sroland.mainz@nrubsig.org unsigned char* buff;
1794887Schin wchar_t x;
18010898Sroland.mainz@nrubsig.org unsigned char side[32];
1814887Schin
1824887Schin sfset(fd,SF_WRITE,1);
183*12068SRoger.Faulkner@Oracle.COM nlines = nwords = nchars = nbytes = 0;
1844887Schin wp->longest = 0;
18510898Sroland.mainz@nrubsig.org if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS)))
1864887Schin {
1874887Schin cp = buff = endbuff = 0;
1884887Schin for (;;)
1894887Schin {
190*12068SRoger.Faulkner@Oracle.COM if (cp >= endbuff || (n = mb2wc(x, cp, endbuff-cp)) < 0)
1914887Schin {
19210898Sroland.mainz@nrubsig.org if ((o = endbuff-cp) < sizeof(side))
19310898Sroland.mainz@nrubsig.org {
19410898Sroland.mainz@nrubsig.org if (buff)
19510898Sroland.mainz@nrubsig.org {
19610898Sroland.mainz@nrubsig.org if (o)
19710898Sroland.mainz@nrubsig.org memcpy(side, cp, o);
19810898Sroland.mainz@nrubsig.org mbinit();
19910898Sroland.mainz@nrubsig.org }
20010898Sroland.mainz@nrubsig.org else
20110898Sroland.mainz@nrubsig.org o = 0;
20210898Sroland.mainz@nrubsig.org cp = side + o;
20310898Sroland.mainz@nrubsig.org if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0)
20410898Sroland.mainz@nrubsig.org {
20510898Sroland.mainz@nrubsig.org if ((nchars - longest) > wp->longest)
20610898Sroland.mainz@nrubsig.org wp->longest = nchars - longest;
20710898Sroland.mainz@nrubsig.org break;
20810898Sroland.mainz@nrubsig.org }
209*12068SRoger.Faulkner@Oracle.COM nbytes += n;
21010898Sroland.mainz@nrubsig.org if ((c = sizeof(side) - o) > n)
21110898Sroland.mainz@nrubsig.org c = n;
21210898Sroland.mainz@nrubsig.org if (c)
21310898Sroland.mainz@nrubsig.org memcpy(cp, buff, c);
21410898Sroland.mainz@nrubsig.org endbuff = buff + n;
21510898Sroland.mainz@nrubsig.org cp = side;
21610898Sroland.mainz@nrubsig.org x = mbchar(cp);
21710898Sroland.mainz@nrubsig.org if ((cp-side) < o)
21810898Sroland.mainz@nrubsig.org {
21910898Sroland.mainz@nrubsig.org cp = buff;
22010898Sroland.mainz@nrubsig.org nchars += (cp-side) - 1;
22110898Sroland.mainz@nrubsig.org }
22210898Sroland.mainz@nrubsig.org else
22310898Sroland.mainz@nrubsig.org cp = buff + (cp-side) - o;
22410898Sroland.mainz@nrubsig.org }
22510898Sroland.mainz@nrubsig.org else
22610898Sroland.mainz@nrubsig.org {
22710898Sroland.mainz@nrubsig.org cp++;
22810898Sroland.mainz@nrubsig.org x = -1;
22910898Sroland.mainz@nrubsig.org }
23010898Sroland.mainz@nrubsig.org if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET))
23110898Sroland.mainz@nrubsig.org eline = invalid(file, nlines);
2324887Schin }
23310898Sroland.mainz@nrubsig.org else
23410898Sroland.mainz@nrubsig.org cp += n ? n : 1;
23510898Sroland.mainz@nrubsig.org if (x == '\n')
2364887Schin {
2374887Schin if ((nchars - longest) > wp->longest)
2384887Schin wp->longest = nchars - longest;
23910898Sroland.mainz@nrubsig.org longest = nchars + 1;
2404887Schin nlines++;
2414887Schin lasttype = 1;
2424887Schin }
2434887Schin else if (iswspace(x))
2444887Schin lasttype = 1;
2454887Schin else if (lasttype)
2464887Schin {
2474887Schin lasttype = 0;
2484887Schin nwords++;
2494887Schin }
25010898Sroland.mainz@nrubsig.org nchars++;
25110898Sroland.mainz@nrubsig.org }
252*12068SRoger.Faulkner@Oracle.COM if (!(wp->mode & WC_MBYTE))
253*12068SRoger.Faulkner@Oracle.COM nchars = nbytes;
25410898Sroland.mainz@nrubsig.org }
25510898Sroland.mainz@nrubsig.org else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25610898Sroland.mainz@nrubsig.org {
25710898Sroland.mainz@nrubsig.org if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST)))
25810898Sroland.mainz@nrubsig.org {
25910898Sroland.mainz@nrubsig.org while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
26010898Sroland.mainz@nrubsig.org {
26110898Sroland.mainz@nrubsig.org nchars += c;
26210898Sroland.mainz@nrubsig.org endbuff = cp + c;
26310898Sroland.mainz@nrubsig.org if (*--endbuff == '\n')
26410898Sroland.mainz@nrubsig.org nlines++;
26510898Sroland.mainz@nrubsig.org else
26610898Sroland.mainz@nrubsig.org *endbuff = '\n';
26710898Sroland.mainz@nrubsig.org for (;;)
26810898Sroland.mainz@nrubsig.org if (*cp++ == '\n')
26910898Sroland.mainz@nrubsig.org {
27010898Sroland.mainz@nrubsig.org if (cp > endbuff)
27110898Sroland.mainz@nrubsig.org break;
27210898Sroland.mainz@nrubsig.org nlines++;
27310898Sroland.mainz@nrubsig.org }
27410898Sroland.mainz@nrubsig.org }
27510898Sroland.mainz@nrubsig.org }
27610898Sroland.mainz@nrubsig.org else
27710898Sroland.mainz@nrubsig.org {
27810898Sroland.mainz@nrubsig.org while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
27910898Sroland.mainz@nrubsig.org {
28010898Sroland.mainz@nrubsig.org nchars += c;
28110898Sroland.mainz@nrubsig.org /* check to see whether first character terminates word */
28210898Sroland.mainz@nrubsig.org if (c==1)
28310898Sroland.mainz@nrubsig.org {
28410898Sroland.mainz@nrubsig.org if (eol(lasttype))
28510898Sroland.mainz@nrubsig.org nlines++;
28610898Sroland.mainz@nrubsig.org if ((c = type[*cp]) && !lasttype)
28710898Sroland.mainz@nrubsig.org nwords++;
28810898Sroland.mainz@nrubsig.org lasttype = c;
28910898Sroland.mainz@nrubsig.org continue;
29010898Sroland.mainz@nrubsig.org }
29110898Sroland.mainz@nrubsig.org if (!lasttype && type[*cp])
29210898Sroland.mainz@nrubsig.org nwords++;
29310898Sroland.mainz@nrubsig.org lastchar = cp[--c];
29410898Sroland.mainz@nrubsig.org *(endbuff = cp+c) = '\n';
29510898Sroland.mainz@nrubsig.org c = lasttype;
29610898Sroland.mainz@nrubsig.org /* process each buffer */
29710898Sroland.mainz@nrubsig.org for (;;)
29810898Sroland.mainz@nrubsig.org {
29910898Sroland.mainz@nrubsig.org /* process spaces and new-lines */
30010898Sroland.mainz@nrubsig.org do
30110898Sroland.mainz@nrubsig.org {
30210898Sroland.mainz@nrubsig.org if (eol(c))
30310898Sroland.mainz@nrubsig.org for (;;)
30410898Sroland.mainz@nrubsig.org {
30510898Sroland.mainz@nrubsig.org /* check for end of buffer */
30610898Sroland.mainz@nrubsig.org if (cp > endbuff)
30710898Sroland.mainz@nrubsig.org goto beob;
30810898Sroland.mainz@nrubsig.org nlines++;
30910898Sroland.mainz@nrubsig.org if (*cp != '\n')
31010898Sroland.mainz@nrubsig.org break;
31110898Sroland.mainz@nrubsig.org cp++;
31210898Sroland.mainz@nrubsig.org }
31310898Sroland.mainz@nrubsig.org } while (c = type[*cp++]);
31410898Sroland.mainz@nrubsig.org /* skip over word characters */
31510898Sroland.mainz@nrubsig.org while (!(c = type[*cp++]));
31610898Sroland.mainz@nrubsig.org nwords++;
31710898Sroland.mainz@nrubsig.org }
31810898Sroland.mainz@nrubsig.org beob:
31910898Sroland.mainz@nrubsig.org if ((cp -= 2) >= buff)
32010898Sroland.mainz@nrubsig.org c = type[*cp];
32110898Sroland.mainz@nrubsig.org else
32210898Sroland.mainz@nrubsig.org c = lasttype;
32310898Sroland.mainz@nrubsig.org lasttype = type[lastchar];
32410898Sroland.mainz@nrubsig.org /* see if was in word */
32510898Sroland.mainz@nrubsig.org if (!c && !lasttype)
32610898Sroland.mainz@nrubsig.org nwords--;
32710898Sroland.mainz@nrubsig.org }
32810898Sroland.mainz@nrubsig.org if (eol(lasttype))
32910898Sroland.mainz@nrubsig.org nlines++;
33010898Sroland.mainz@nrubsig.org else if (!lasttype)
33110898Sroland.mainz@nrubsig.org nwords++;
3324887Schin }
3334887Schin }
3344887Schin else
3354887Schin {
33610898Sroland.mainz@nrubsig.org int lineoff=0;
33710898Sroland.mainz@nrubsig.org int skip=0;
33810898Sroland.mainz@nrubsig.org int adjust=0;
33910898Sroland.mainz@nrubsig.org int state=0;
34010898Sroland.mainz@nrubsig.org int oldc;
34110898Sroland.mainz@nrubsig.org int xspace;
34210898Sroland.mainz@nrubsig.org int wasspace = 1;
34310898Sroland.mainz@nrubsig.org unsigned char* start;
34410898Sroland.mainz@nrubsig.org
34510898Sroland.mainz@nrubsig.org lastchar = 0;
34610898Sroland.mainz@nrubsig.org start = (endbuff = side) + 1;
34710898Sroland.mainz@nrubsig.org xspace = iswspace(0xa0) || iswspace(0x85);
34810898Sroland.mainz@nrubsig.org while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0)
3494887Schin {
350*12068SRoger.Faulkner@Oracle.COM nbytes += c;
3514887Schin nchars += c;
35210898Sroland.mainz@nrubsig.org start = cp-lineoff;
3534887Schin /* check to see whether first character terminates word */
3544887Schin if(c==1)
3554887Schin {
35610898Sroland.mainz@nrubsig.org if(eol(lasttype))
3574887Schin nlines++;
35810898Sroland.mainz@nrubsig.org if((c = type[*cp]) && !lasttype)
3594887Schin nwords++;
3604887Schin lasttype = c;
36110898Sroland.mainz@nrubsig.org endbuff = start;
3624887Schin continue;
3634887Schin }
36410898Sroland.mainz@nrubsig.org lastchar = cp[--c];
36510898Sroland.mainz@nrubsig.org endbuff = cp+c;
36610898Sroland.mainz@nrubsig.org cp[c] = '\n';
36710898Sroland.mainz@nrubsig.org if(mbc(lasttype))
36810898Sroland.mainz@nrubsig.org {
36910898Sroland.mainz@nrubsig.org c = lasttype;
37010898Sroland.mainz@nrubsig.org goto mbyte;
37110898Sroland.mainz@nrubsig.org }
37210898Sroland.mainz@nrubsig.org if(!lasttype && spc(type[*cp]))
3734887Schin nwords++;
3744887Schin c = lasttype;
3754887Schin /* process each buffer */
3764887Schin for (;;)
3774887Schin {
3784887Schin /* process spaces and new-lines */
37910898Sroland.mainz@nrubsig.org spaces:
38010898Sroland.mainz@nrubsig.org do
3814887Schin {
38210898Sroland.mainz@nrubsig.org if (eol(c))
3834887Schin {
3844887Schin /* check for end of buffer */
3854887Schin if (cp > endbuff)
3864887Schin goto eob;
38710898Sroland.mainz@nrubsig.org if(wp->mode&WC_LONGEST)
38810898Sroland.mainz@nrubsig.org {
38910898Sroland.mainz@nrubsig.org if((cp-start)-adjust > longest)
39010898Sroland.mainz@nrubsig.org longest = (cp-start)-adjust-1;
39110898Sroland.mainz@nrubsig.org start = cp;
39210898Sroland.mainz@nrubsig.org }
3934887Schin nlines++;
39410898Sroland.mainz@nrubsig.org nchars -= adjust;
39510898Sroland.mainz@nrubsig.org adjust = 0;
39610898Sroland.mainz@nrubsig.org }
39710898Sroland.mainz@nrubsig.org } while (spc(c = type[*cp++]));
39810898Sroland.mainz@nrubsig.org wasspace=1;
39910898Sroland.mainz@nrubsig.org if(mbc(c))
40010898Sroland.mainz@nrubsig.org {
40110898Sroland.mainz@nrubsig.org mbyte:
40210898Sroland.mainz@nrubsig.org do
40310898Sroland.mainz@nrubsig.org {
40410898Sroland.mainz@nrubsig.org if(c&WC_ERR)
40510898Sroland.mainz@nrubsig.org goto err;
40610898Sroland.mainz@nrubsig.org if(skip && (c&7))
4074887Schin break;
40810898Sroland.mainz@nrubsig.org if(!skip)
40910898Sroland.mainz@nrubsig.org {
41010898Sroland.mainz@nrubsig.org if(!(c&7))
41110898Sroland.mainz@nrubsig.org {
41210898Sroland.mainz@nrubsig.org skip=1;
41310898Sroland.mainz@nrubsig.org break;
41410898Sroland.mainz@nrubsig.org }
41510898Sroland.mainz@nrubsig.org skip = (c&7);
41610898Sroland.mainz@nrubsig.org adjust += skip;
41710898Sroland.mainz@nrubsig.org state = 0;
41810898Sroland.mainz@nrubsig.org if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3)))
41910898Sroland.mainz@nrubsig.org oldc = *cp;
42010898Sroland.mainz@nrubsig.org else if(xspace && cp[-1]==0xc2)
42110898Sroland.mainz@nrubsig.org {
42210898Sroland.mainz@nrubsig.org state = 8;
42310898Sroland.mainz@nrubsig.org oldc = *cp;
42410898Sroland.mainz@nrubsig.org }
42510898Sroland.mainz@nrubsig.org }
42610898Sroland.mainz@nrubsig.org else
42710898Sroland.mainz@nrubsig.org {
42810898Sroland.mainz@nrubsig.org skip--;
42910898Sroland.mainz@nrubsig.org if(state && (state=chkstate(state,oldc)))
43010898Sroland.mainz@nrubsig.org {
43110898Sroland.mainz@nrubsig.org if(state==10)
43210898Sroland.mainz@nrubsig.org {
43310898Sroland.mainz@nrubsig.org if(!wasspace)
43410898Sroland.mainz@nrubsig.org nwords++;
43510898Sroland.mainz@nrubsig.org wasspace = 1;
43610898Sroland.mainz@nrubsig.org state=0;
43710898Sroland.mainz@nrubsig.org goto spaces;
43810898Sroland.mainz@nrubsig.org }
43910898Sroland.mainz@nrubsig.org oldc = *cp;
44010898Sroland.mainz@nrubsig.org }
44110898Sroland.mainz@nrubsig.org }
44210898Sroland.mainz@nrubsig.org } while (mbc(c = type[*cp++]));
44310898Sroland.mainz@nrubsig.org wasspace = 0;
44410898Sroland.mainz@nrubsig.org if(skip)
44510898Sroland.mainz@nrubsig.org {
44610898Sroland.mainz@nrubsig.org if(eol(c) && (cp > endbuff))
44710898Sroland.mainz@nrubsig.org goto eob;
44810898Sroland.mainz@nrubsig.org err:
44910898Sroland.mainz@nrubsig.org skip = 0;
45010898Sroland.mainz@nrubsig.org state = 0;
45110898Sroland.mainz@nrubsig.org if(eline!=nlines && !(wp->mode & WC_QUIET))
45210898Sroland.mainz@nrubsig.org eline = invalid(file, nlines);
45310898Sroland.mainz@nrubsig.org while(mbc(c) && ((c|WC_ERR) || (c&7)==0))
45410898Sroland.mainz@nrubsig.org c=type[*cp++];
45510898Sroland.mainz@nrubsig.org if(eol(c) && (cp > endbuff))
45610898Sroland.mainz@nrubsig.org {
45710898Sroland.mainz@nrubsig.org c = WC_MB|WC_ERR;
45810898Sroland.mainz@nrubsig.org goto eob;
45910898Sroland.mainz@nrubsig.org }
46010898Sroland.mainz@nrubsig.org if(mbc(c))
46110898Sroland.mainz@nrubsig.org goto mbyte;
46210898Sroland.mainz@nrubsig.org else if(c&WC_SP)
46310898Sroland.mainz@nrubsig.org goto spaces;
4644887Schin }
46510898Sroland.mainz@nrubsig.org if(spc(c))
46610898Sroland.mainz@nrubsig.org {
46710898Sroland.mainz@nrubsig.org nwords++;
46810898Sroland.mainz@nrubsig.org continue;
46910898Sroland.mainz@nrubsig.org }
47010898Sroland.mainz@nrubsig.org }
4714887Schin /* skip over word characters */
47210898Sroland.mainz@nrubsig.org while(!(c = type[*cp++]));
47310898Sroland.mainz@nrubsig.org if(mbc(c))
47410898Sroland.mainz@nrubsig.org goto mbyte;
4754887Schin nwords++;
4764887Schin }
4774887Schin eob:
47810898Sroland.mainz@nrubsig.org lineoff = cp-start;
4794887Schin if((cp -= 2) >= buff)
48010898Sroland.mainz@nrubsig.org c = type[*cp];
4814887Schin else
48210898Sroland.mainz@nrubsig.org c = lasttype;
48310898Sroland.mainz@nrubsig.org lasttype = type[lastchar];
4844887Schin /* see if was in word */
4854887Schin if(!c && !lasttype)
4864887Schin nwords--;
4874887Schin }
48810898Sroland.mainz@nrubsig.org if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest)
48910898Sroland.mainz@nrubsig.org longest = (endbuff + 1 - start) - adjust - (lastchar == '\n');
49010898Sroland.mainz@nrubsig.org wp->longest = longest;
49110898Sroland.mainz@nrubsig.org if (eol(lasttype))
4924887Schin nlines++;
49310898Sroland.mainz@nrubsig.org else if (!lasttype)
4944887Schin nwords++;
495*12068SRoger.Faulkner@Oracle.COM if (wp->mode & WC_MBYTE)
496*12068SRoger.Faulkner@Oracle.COM nchars -= adjust;
497*12068SRoger.Faulkner@Oracle.COM else
498*12068SRoger.Faulkner@Oracle.COM nchars = nbytes;
4994887Schin }
5004887Schin wp->chars = nchars;
5014887Schin wp->words = nwords;
5024887Schin wp->lines = nlines;
50310898Sroland.mainz@nrubsig.org return 0;
5044887Schin }
50510898Sroland.mainz@nrubsig.org
506