14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1992-2010 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 78462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * * 204887Schin ***********************************************************************/ 214887Schin #pragma prototyped 224887Schin /* 234887Schin * David Korn 244887Schin * AT&T Bell Laboratories 254887Schin * 264887Schin * library interface for word count 274887Schin */ 284887Schin 294887Schin #include <cmd.h> 304887Schin #include <wc.h> 314887Schin #include <ctype.h> 324887Schin 338462SApril.Chin@Sun.COM #if _hdr_wchar && _hdr_wctype && _lib_iswctype 344887Schin 354887Schin #include <wchar.h> 364887Schin #include <wctype.h> 3710898Sroland.mainz@nrubsig.org #include <lc.h> 384887Schin 394887Schin #else 404887Schin 414887Schin #ifndef iswspace 424887Schin #define iswspace(x) isspace(x) 434887Schin #endif 444887Schin 454887Schin #endif 464887Schin 4710898Sroland.mainz@nrubsig.org #define WC_SP 0x08 4810898Sroland.mainz@nrubsig.org #define WC_NL 0x10 4910898Sroland.mainz@nrubsig.org #define WC_MB 0x20 5010898Sroland.mainz@nrubsig.org #define WC_ERR 0x40 514887Schin 5210898Sroland.mainz@nrubsig.org #define eol(c) ((c)&WC_NL) 5310898Sroland.mainz@nrubsig.org #define mbc(c) ((c)&WC_MB) 5410898Sroland.mainz@nrubsig.org #define spc(c) ((c)&WC_SP) 55*12068SRoger.Faulkner@Oracle.COM #define mb2wc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n) 5610898Sroland.mainz@nrubsig.org 5710898Sroland.mainz@nrubsig.org Wc_t* wc_init(int mode) 584887Schin { 594887Schin register int n; 604887Schin register int w; 614887Schin Wc_t* wp; 624887Schin 6310898Sroland.mainz@nrubsig.org if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) 6410898Sroland.mainz@nrubsig.org return 0; 6510898Sroland.mainz@nrubsig.org if (!mbwide()) 6610898Sroland.mainz@nrubsig.org wp->mb = 0; 6710898Sroland.mainz@nrubsig.org #if _hdr_wchar && _hdr_wctype && _lib_iswctype 6810898Sroland.mainz@nrubsig.org else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8)) 6910898Sroland.mainz@nrubsig.org wp->mb = 1; 7010898Sroland.mainz@nrubsig.org #endif 7110898Sroland.mainz@nrubsig.org else 7210898Sroland.mainz@nrubsig.org wp->mb = -1; 734887Schin w = mode & WC_WORDS; 7410898Sroland.mainz@nrubsig.org for (n = (1<<CHAR_BIT); --n >= 0;) 7510898Sroland.mainz@nrubsig.org wp->type[n] = (w && isspace(n)) ? WC_SP : 0; 7610898Sroland.mainz@nrubsig.org wp->type['\n'] = WC_SP|WC_NL; 7710898Sroland.mainz@nrubsig.org if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0) 7810898Sroland.mainz@nrubsig.org { 7910898Sroland.mainz@nrubsig.org for (n = 0; n < 64; n++) 8010898Sroland.mainz@nrubsig.org { 8110898Sroland.mainz@nrubsig.org wp->type[0x80+n] |= WC_MB; 8210898Sroland.mainz@nrubsig.org if (n<32) 8310898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+1; 8410898Sroland.mainz@nrubsig.org else if (n<48) 8510898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+2; 8610898Sroland.mainz@nrubsig.org else if (n<56) 8710898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+3; 8810898Sroland.mainz@nrubsig.org else if (n<60) 8910898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+4; 9010898Sroland.mainz@nrubsig.org else if (n<62) 9110898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+5; 9210898Sroland.mainz@nrubsig.org } 9310898Sroland.mainz@nrubsig.org wp->type[0xc0] = WC_MB|WC_ERR; 9410898Sroland.mainz@nrubsig.org wp->type[0xc1] = WC_MB|WC_ERR; 9510898Sroland.mainz@nrubsig.org wp->type[0xfe] = WC_MB|WC_ERR; 9610898Sroland.mainz@nrubsig.org wp->type[0xff] = WC_MB|WC_ERR; 9710898Sroland.mainz@nrubsig.org } 9810898Sroland.mainz@nrubsig.org wp->mode = mode; 9910898Sroland.mainz@nrubsig.org return wp; 10010898Sroland.mainz@nrubsig.org } 10110898Sroland.mainz@nrubsig.org 10210898Sroland.mainz@nrubsig.org static int invalid(const char *file, int nlines) 10310898Sroland.mainz@nrubsig.org { 10410898Sroland.mainz@nrubsig.org error_info.file = (char*)file; 10510898Sroland.mainz@nrubsig.org error_info.line = nlines; 10610898Sroland.mainz@nrubsig.org error(ERROR_SYSTEM|1, "invalid multibyte character"); 10710898Sroland.mainz@nrubsig.org error_info.file = 0; 10810898Sroland.mainz@nrubsig.org error_info.line = 0; 10910898Sroland.mainz@nrubsig.org return nlines; 11010898Sroland.mainz@nrubsig.org } 11110898Sroland.mainz@nrubsig.org 11210898Sroland.mainz@nrubsig.org /* 11310898Sroland.mainz@nrubsig.org * handle utf space characters 11410898Sroland.mainz@nrubsig.org */ 11510898Sroland.mainz@nrubsig.org 11610898Sroland.mainz@nrubsig.org static int chkstate(int state, register unsigned int c) 11710898Sroland.mainz@nrubsig.org { 11810898Sroland.mainz@nrubsig.org switch(state) 11910898Sroland.mainz@nrubsig.org { 12010898Sroland.mainz@nrubsig.org case 1: 12110898Sroland.mainz@nrubsig.org state = (c==0x9a?4:0); 12210898Sroland.mainz@nrubsig.org break; 12310898Sroland.mainz@nrubsig.org case 2: 12410898Sroland.mainz@nrubsig.org state = ((c==0x80||c==0x81)?6+(c&1):0); 12510898Sroland.mainz@nrubsig.org break; 12610898Sroland.mainz@nrubsig.org case 3: 12710898Sroland.mainz@nrubsig.org state = (c==0x80?5:0); 12810898Sroland.mainz@nrubsig.org break; 12910898Sroland.mainz@nrubsig.org case 4: 13010898Sroland.mainz@nrubsig.org state = (c==0x80?10:0); 13110898Sroland.mainz@nrubsig.org break; 13210898Sroland.mainz@nrubsig.org case 5: 13310898Sroland.mainz@nrubsig.org state = (c==0x80?10:0); 13410898Sroland.mainz@nrubsig.org break; 13510898Sroland.mainz@nrubsig.org case 6: 13610898Sroland.mainz@nrubsig.org state = 0; 13710898Sroland.mainz@nrubsig.org if(c==0xa0 || c==0xa1) 13810898Sroland.mainz@nrubsig.org return(10); 13910898Sroland.mainz@nrubsig.org else if((c&0xf0)== 0x80) 14010898Sroland.mainz@nrubsig.org { 14110898Sroland.mainz@nrubsig.org if((c&=0xf)==7) 14210898Sroland.mainz@nrubsig.org return(iswspace(0x2007)?10:0); 14310898Sroland.mainz@nrubsig.org if(c<=0xb) 14410898Sroland.mainz@nrubsig.org return(10); 14510898Sroland.mainz@nrubsig.org } 14610898Sroland.mainz@nrubsig.org else if(c==0xaf && iswspace(0x202f)) 14710898Sroland.mainz@nrubsig.org return(10); 14810898Sroland.mainz@nrubsig.org break; 14910898Sroland.mainz@nrubsig.org case 7: 15010898Sroland.mainz@nrubsig.org state = (c==0x9f?10:0); 15110898Sroland.mainz@nrubsig.org break; 15210898Sroland.mainz@nrubsig.org case 8: 15310898Sroland.mainz@nrubsig.org return (iswspace(c)?10:0); 15410898Sroland.mainz@nrubsig.org } 15510898Sroland.mainz@nrubsig.org return state; 1564887Schin } 1574887Schin 1584887Schin /* 1594887Schin * compute the line, word, and character count for file <fd> 1604887Schin */ 16110898Sroland.mainz@nrubsig.org 1624887Schin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) 1634887Schin { 16410898Sroland.mainz@nrubsig.org register char* type = wp->type; 16510898Sroland.mainz@nrubsig.org register unsigned char* cp; 166*12068SRoger.Faulkner@Oracle.COM register Sfoff_t nbytes; 1674887Schin register Sfoff_t nchars; 1684887Schin register Sfoff_t nwords; 1694887Schin register Sfoff_t nlines; 17010898Sroland.mainz@nrubsig.org register Sfoff_t eline = -1; 17110898Sroland.mainz@nrubsig.org register Sfoff_t longest = 0; 1724887Schin register ssize_t c; 17310898Sroland.mainz@nrubsig.org register unsigned char* endbuff; 17410898Sroland.mainz@nrubsig.org register int lasttype = WC_SP; 1754887Schin unsigned int lastchar; 17610898Sroland.mainz@nrubsig.org ssize_t n; 17710898Sroland.mainz@nrubsig.org ssize_t o; 17810898Sroland.mainz@nrubsig.org unsigned char* buff; 1794887Schin wchar_t x; 18010898Sroland.mainz@nrubsig.org unsigned char side[32]; 1814887Schin 1824887Schin sfset(fd,SF_WRITE,1); 183*12068SRoger.Faulkner@Oracle.COM nlines = nwords = nchars = nbytes = 0; 1844887Schin wp->longest = 0; 18510898Sroland.mainz@nrubsig.org if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS))) 1864887Schin { 1874887Schin cp = buff = endbuff = 0; 1884887Schin for (;;) 1894887Schin { 190*12068SRoger.Faulkner@Oracle.COM if (cp >= endbuff || (n = mb2wc(x, cp, endbuff-cp)) < 0) 1914887Schin { 19210898Sroland.mainz@nrubsig.org if ((o = endbuff-cp) < sizeof(side)) 19310898Sroland.mainz@nrubsig.org { 19410898Sroland.mainz@nrubsig.org if (buff) 19510898Sroland.mainz@nrubsig.org { 19610898Sroland.mainz@nrubsig.org if (o) 19710898Sroland.mainz@nrubsig.org memcpy(side, cp, o); 19810898Sroland.mainz@nrubsig.org mbinit(); 19910898Sroland.mainz@nrubsig.org } 20010898Sroland.mainz@nrubsig.org else 20110898Sroland.mainz@nrubsig.org o = 0; 20210898Sroland.mainz@nrubsig.org cp = side + o; 20310898Sroland.mainz@nrubsig.org if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0) 20410898Sroland.mainz@nrubsig.org { 20510898Sroland.mainz@nrubsig.org if ((nchars - longest) > wp->longest) 20610898Sroland.mainz@nrubsig.org wp->longest = nchars - longest; 20710898Sroland.mainz@nrubsig.org break; 20810898Sroland.mainz@nrubsig.org } 209*12068SRoger.Faulkner@Oracle.COM nbytes += n; 21010898Sroland.mainz@nrubsig.org if ((c = sizeof(side) - o) > n) 21110898Sroland.mainz@nrubsig.org c = n; 21210898Sroland.mainz@nrubsig.org if (c) 21310898Sroland.mainz@nrubsig.org memcpy(cp, buff, c); 21410898Sroland.mainz@nrubsig.org endbuff = buff + n; 21510898Sroland.mainz@nrubsig.org cp = side; 21610898Sroland.mainz@nrubsig.org x = mbchar(cp); 21710898Sroland.mainz@nrubsig.org if ((cp-side) < o) 21810898Sroland.mainz@nrubsig.org { 21910898Sroland.mainz@nrubsig.org cp = buff; 22010898Sroland.mainz@nrubsig.org nchars += (cp-side) - 1; 22110898Sroland.mainz@nrubsig.org } 22210898Sroland.mainz@nrubsig.org else 22310898Sroland.mainz@nrubsig.org cp = buff + (cp-side) - o; 22410898Sroland.mainz@nrubsig.org } 22510898Sroland.mainz@nrubsig.org else 22610898Sroland.mainz@nrubsig.org { 22710898Sroland.mainz@nrubsig.org cp++; 22810898Sroland.mainz@nrubsig.org x = -1; 22910898Sroland.mainz@nrubsig.org } 23010898Sroland.mainz@nrubsig.org if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET)) 23110898Sroland.mainz@nrubsig.org eline = invalid(file, nlines); 2324887Schin } 23310898Sroland.mainz@nrubsig.org else 23410898Sroland.mainz@nrubsig.org cp += n ? n : 1; 23510898Sroland.mainz@nrubsig.org if (x == '\n') 2364887Schin { 2374887Schin if ((nchars - longest) > wp->longest) 2384887Schin wp->longest = nchars - longest; 23910898Sroland.mainz@nrubsig.org longest = nchars + 1; 2404887Schin nlines++; 2414887Schin lasttype = 1; 2424887Schin } 2434887Schin else if (iswspace(x)) 2444887Schin lasttype = 1; 2454887Schin else if (lasttype) 2464887Schin { 2474887Schin lasttype = 0; 2484887Schin nwords++; 2494887Schin } 25010898Sroland.mainz@nrubsig.org nchars++; 25110898Sroland.mainz@nrubsig.org } 252*12068SRoger.Faulkner@Oracle.COM if (!(wp->mode & WC_MBYTE)) 253*12068SRoger.Faulkner@Oracle.COM nchars = nbytes; 25410898Sroland.mainz@nrubsig.org } 25510898Sroland.mainz@nrubsig.org else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST))) 25610898Sroland.mainz@nrubsig.org { 25710898Sroland.mainz@nrubsig.org if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST))) 25810898Sroland.mainz@nrubsig.org { 25910898Sroland.mainz@nrubsig.org while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 26010898Sroland.mainz@nrubsig.org { 26110898Sroland.mainz@nrubsig.org nchars += c; 26210898Sroland.mainz@nrubsig.org endbuff = cp + c; 26310898Sroland.mainz@nrubsig.org if (*--endbuff == '\n') 26410898Sroland.mainz@nrubsig.org nlines++; 26510898Sroland.mainz@nrubsig.org else 26610898Sroland.mainz@nrubsig.org *endbuff = '\n'; 26710898Sroland.mainz@nrubsig.org for (;;) 26810898Sroland.mainz@nrubsig.org if (*cp++ == '\n') 26910898Sroland.mainz@nrubsig.org { 27010898Sroland.mainz@nrubsig.org if (cp > endbuff) 27110898Sroland.mainz@nrubsig.org break; 27210898Sroland.mainz@nrubsig.org nlines++; 27310898Sroland.mainz@nrubsig.org } 27410898Sroland.mainz@nrubsig.org } 27510898Sroland.mainz@nrubsig.org } 27610898Sroland.mainz@nrubsig.org else 27710898Sroland.mainz@nrubsig.org { 27810898Sroland.mainz@nrubsig.org while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 27910898Sroland.mainz@nrubsig.org { 28010898Sroland.mainz@nrubsig.org nchars += c; 28110898Sroland.mainz@nrubsig.org /* check to see whether first character terminates word */ 28210898Sroland.mainz@nrubsig.org if (c==1) 28310898Sroland.mainz@nrubsig.org { 28410898Sroland.mainz@nrubsig.org if (eol(lasttype)) 28510898Sroland.mainz@nrubsig.org nlines++; 28610898Sroland.mainz@nrubsig.org if ((c = type[*cp]) && !lasttype) 28710898Sroland.mainz@nrubsig.org nwords++; 28810898Sroland.mainz@nrubsig.org lasttype = c; 28910898Sroland.mainz@nrubsig.org continue; 29010898Sroland.mainz@nrubsig.org } 29110898Sroland.mainz@nrubsig.org if (!lasttype && type[*cp]) 29210898Sroland.mainz@nrubsig.org nwords++; 29310898Sroland.mainz@nrubsig.org lastchar = cp[--c]; 29410898Sroland.mainz@nrubsig.org *(endbuff = cp+c) = '\n'; 29510898Sroland.mainz@nrubsig.org c = lasttype; 29610898Sroland.mainz@nrubsig.org /* process each buffer */ 29710898Sroland.mainz@nrubsig.org for (;;) 29810898Sroland.mainz@nrubsig.org { 29910898Sroland.mainz@nrubsig.org /* process spaces and new-lines */ 30010898Sroland.mainz@nrubsig.org do 30110898Sroland.mainz@nrubsig.org { 30210898Sroland.mainz@nrubsig.org if (eol(c)) 30310898Sroland.mainz@nrubsig.org for (;;) 30410898Sroland.mainz@nrubsig.org { 30510898Sroland.mainz@nrubsig.org /* check for end of buffer */ 30610898Sroland.mainz@nrubsig.org if (cp > endbuff) 30710898Sroland.mainz@nrubsig.org goto beob; 30810898Sroland.mainz@nrubsig.org nlines++; 30910898Sroland.mainz@nrubsig.org if (*cp != '\n') 31010898Sroland.mainz@nrubsig.org break; 31110898Sroland.mainz@nrubsig.org cp++; 31210898Sroland.mainz@nrubsig.org } 31310898Sroland.mainz@nrubsig.org } while (c = type[*cp++]); 31410898Sroland.mainz@nrubsig.org /* skip over word characters */ 31510898Sroland.mainz@nrubsig.org while (!(c = type[*cp++])); 31610898Sroland.mainz@nrubsig.org nwords++; 31710898Sroland.mainz@nrubsig.org } 31810898Sroland.mainz@nrubsig.org beob: 31910898Sroland.mainz@nrubsig.org if ((cp -= 2) >= buff) 32010898Sroland.mainz@nrubsig.org c = type[*cp]; 32110898Sroland.mainz@nrubsig.org else 32210898Sroland.mainz@nrubsig.org c = lasttype; 32310898Sroland.mainz@nrubsig.org lasttype = type[lastchar]; 32410898Sroland.mainz@nrubsig.org /* see if was in word */ 32510898Sroland.mainz@nrubsig.org if (!c && !lasttype) 32610898Sroland.mainz@nrubsig.org nwords--; 32710898Sroland.mainz@nrubsig.org } 32810898Sroland.mainz@nrubsig.org if (eol(lasttype)) 32910898Sroland.mainz@nrubsig.org nlines++; 33010898Sroland.mainz@nrubsig.org else if (!lasttype) 33110898Sroland.mainz@nrubsig.org nwords++; 3324887Schin } 3334887Schin } 3344887Schin else 3354887Schin { 33610898Sroland.mainz@nrubsig.org int lineoff=0; 33710898Sroland.mainz@nrubsig.org int skip=0; 33810898Sroland.mainz@nrubsig.org int adjust=0; 33910898Sroland.mainz@nrubsig.org int state=0; 34010898Sroland.mainz@nrubsig.org int oldc; 34110898Sroland.mainz@nrubsig.org int xspace; 34210898Sroland.mainz@nrubsig.org int wasspace = 1; 34310898Sroland.mainz@nrubsig.org unsigned char* start; 34410898Sroland.mainz@nrubsig.org 34510898Sroland.mainz@nrubsig.org lastchar = 0; 34610898Sroland.mainz@nrubsig.org start = (endbuff = side) + 1; 34710898Sroland.mainz@nrubsig.org xspace = iswspace(0xa0) || iswspace(0x85); 34810898Sroland.mainz@nrubsig.org while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 3494887Schin { 350*12068SRoger.Faulkner@Oracle.COM nbytes += c; 3514887Schin nchars += c; 35210898Sroland.mainz@nrubsig.org start = cp-lineoff; 3534887Schin /* check to see whether first character terminates word */ 3544887Schin if(c==1) 3554887Schin { 35610898Sroland.mainz@nrubsig.org if(eol(lasttype)) 3574887Schin nlines++; 35810898Sroland.mainz@nrubsig.org if((c = type[*cp]) && !lasttype) 3594887Schin nwords++; 3604887Schin lasttype = c; 36110898Sroland.mainz@nrubsig.org endbuff = start; 3624887Schin continue; 3634887Schin } 36410898Sroland.mainz@nrubsig.org lastchar = cp[--c]; 36510898Sroland.mainz@nrubsig.org endbuff = cp+c; 36610898Sroland.mainz@nrubsig.org cp[c] = '\n'; 36710898Sroland.mainz@nrubsig.org if(mbc(lasttype)) 36810898Sroland.mainz@nrubsig.org { 36910898Sroland.mainz@nrubsig.org c = lasttype; 37010898Sroland.mainz@nrubsig.org goto mbyte; 37110898Sroland.mainz@nrubsig.org } 37210898Sroland.mainz@nrubsig.org if(!lasttype && spc(type[*cp])) 3734887Schin nwords++; 3744887Schin c = lasttype; 3754887Schin /* process each buffer */ 3764887Schin for (;;) 3774887Schin { 3784887Schin /* process spaces and new-lines */ 37910898Sroland.mainz@nrubsig.org spaces: 38010898Sroland.mainz@nrubsig.org do 3814887Schin { 38210898Sroland.mainz@nrubsig.org if (eol(c)) 3834887Schin { 3844887Schin /* check for end of buffer */ 3854887Schin if (cp > endbuff) 3864887Schin goto eob; 38710898Sroland.mainz@nrubsig.org if(wp->mode&WC_LONGEST) 38810898Sroland.mainz@nrubsig.org { 38910898Sroland.mainz@nrubsig.org if((cp-start)-adjust > longest) 39010898Sroland.mainz@nrubsig.org longest = (cp-start)-adjust-1; 39110898Sroland.mainz@nrubsig.org start = cp; 39210898Sroland.mainz@nrubsig.org } 3934887Schin nlines++; 39410898Sroland.mainz@nrubsig.org nchars -= adjust; 39510898Sroland.mainz@nrubsig.org adjust = 0; 39610898Sroland.mainz@nrubsig.org } 39710898Sroland.mainz@nrubsig.org } while (spc(c = type[*cp++])); 39810898Sroland.mainz@nrubsig.org wasspace=1; 39910898Sroland.mainz@nrubsig.org if(mbc(c)) 40010898Sroland.mainz@nrubsig.org { 40110898Sroland.mainz@nrubsig.org mbyte: 40210898Sroland.mainz@nrubsig.org do 40310898Sroland.mainz@nrubsig.org { 40410898Sroland.mainz@nrubsig.org if(c&WC_ERR) 40510898Sroland.mainz@nrubsig.org goto err; 40610898Sroland.mainz@nrubsig.org if(skip && (c&7)) 4074887Schin break; 40810898Sroland.mainz@nrubsig.org if(!skip) 40910898Sroland.mainz@nrubsig.org { 41010898Sroland.mainz@nrubsig.org if(!(c&7)) 41110898Sroland.mainz@nrubsig.org { 41210898Sroland.mainz@nrubsig.org skip=1; 41310898Sroland.mainz@nrubsig.org break; 41410898Sroland.mainz@nrubsig.org } 41510898Sroland.mainz@nrubsig.org skip = (c&7); 41610898Sroland.mainz@nrubsig.org adjust += skip; 41710898Sroland.mainz@nrubsig.org state = 0; 41810898Sroland.mainz@nrubsig.org if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3))) 41910898Sroland.mainz@nrubsig.org oldc = *cp; 42010898Sroland.mainz@nrubsig.org else if(xspace && cp[-1]==0xc2) 42110898Sroland.mainz@nrubsig.org { 42210898Sroland.mainz@nrubsig.org state = 8; 42310898Sroland.mainz@nrubsig.org oldc = *cp; 42410898Sroland.mainz@nrubsig.org } 42510898Sroland.mainz@nrubsig.org } 42610898Sroland.mainz@nrubsig.org else 42710898Sroland.mainz@nrubsig.org { 42810898Sroland.mainz@nrubsig.org skip--; 42910898Sroland.mainz@nrubsig.org if(state && (state=chkstate(state,oldc))) 43010898Sroland.mainz@nrubsig.org { 43110898Sroland.mainz@nrubsig.org if(state==10) 43210898Sroland.mainz@nrubsig.org { 43310898Sroland.mainz@nrubsig.org if(!wasspace) 43410898Sroland.mainz@nrubsig.org nwords++; 43510898Sroland.mainz@nrubsig.org wasspace = 1; 43610898Sroland.mainz@nrubsig.org state=0; 43710898Sroland.mainz@nrubsig.org goto spaces; 43810898Sroland.mainz@nrubsig.org } 43910898Sroland.mainz@nrubsig.org oldc = *cp; 44010898Sroland.mainz@nrubsig.org } 44110898Sroland.mainz@nrubsig.org } 44210898Sroland.mainz@nrubsig.org } while (mbc(c = type[*cp++])); 44310898Sroland.mainz@nrubsig.org wasspace = 0; 44410898Sroland.mainz@nrubsig.org if(skip) 44510898Sroland.mainz@nrubsig.org { 44610898Sroland.mainz@nrubsig.org if(eol(c) && (cp > endbuff)) 44710898Sroland.mainz@nrubsig.org goto eob; 44810898Sroland.mainz@nrubsig.org err: 44910898Sroland.mainz@nrubsig.org skip = 0; 45010898Sroland.mainz@nrubsig.org state = 0; 45110898Sroland.mainz@nrubsig.org if(eline!=nlines && !(wp->mode & WC_QUIET)) 45210898Sroland.mainz@nrubsig.org eline = invalid(file, nlines); 45310898Sroland.mainz@nrubsig.org while(mbc(c) && ((c|WC_ERR) || (c&7)==0)) 45410898Sroland.mainz@nrubsig.org c=type[*cp++]; 45510898Sroland.mainz@nrubsig.org if(eol(c) && (cp > endbuff)) 45610898Sroland.mainz@nrubsig.org { 45710898Sroland.mainz@nrubsig.org c = WC_MB|WC_ERR; 45810898Sroland.mainz@nrubsig.org goto eob; 45910898Sroland.mainz@nrubsig.org } 46010898Sroland.mainz@nrubsig.org if(mbc(c)) 46110898Sroland.mainz@nrubsig.org goto mbyte; 46210898Sroland.mainz@nrubsig.org else if(c&WC_SP) 46310898Sroland.mainz@nrubsig.org goto spaces; 4644887Schin } 46510898Sroland.mainz@nrubsig.org if(spc(c)) 46610898Sroland.mainz@nrubsig.org { 46710898Sroland.mainz@nrubsig.org nwords++; 46810898Sroland.mainz@nrubsig.org continue; 46910898Sroland.mainz@nrubsig.org } 47010898Sroland.mainz@nrubsig.org } 4714887Schin /* skip over word characters */ 47210898Sroland.mainz@nrubsig.org while(!(c = type[*cp++])); 47310898Sroland.mainz@nrubsig.org if(mbc(c)) 47410898Sroland.mainz@nrubsig.org goto mbyte; 4754887Schin nwords++; 4764887Schin } 4774887Schin eob: 47810898Sroland.mainz@nrubsig.org lineoff = cp-start; 4794887Schin if((cp -= 2) >= buff) 48010898Sroland.mainz@nrubsig.org c = type[*cp]; 4814887Schin else 48210898Sroland.mainz@nrubsig.org c = lasttype; 48310898Sroland.mainz@nrubsig.org lasttype = type[lastchar]; 4844887Schin /* see if was in word */ 4854887Schin if(!c && !lasttype) 4864887Schin nwords--; 4874887Schin } 48810898Sroland.mainz@nrubsig.org if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest) 48910898Sroland.mainz@nrubsig.org longest = (endbuff + 1 - start) - adjust - (lastchar == '\n'); 49010898Sroland.mainz@nrubsig.org wp->longest = longest; 49110898Sroland.mainz@nrubsig.org if (eol(lasttype)) 4924887Schin nlines++; 49310898Sroland.mainz@nrubsig.org else if (!lasttype) 4944887Schin nwords++; 495*12068SRoger.Faulkner@Oracle.COM if (wp->mode & WC_MBYTE) 496*12068SRoger.Faulkner@Oracle.COM nchars -= adjust; 497*12068SRoger.Faulkner@Oracle.COM else 498*12068SRoger.Faulkner@Oracle.COM nchars = nbytes; 4994887Schin } 5004887Schin wp->chars = nchars; 5014887Schin wp->words = nwords; 5024887Schin wp->lines = nlines; 50310898Sroland.mainz@nrubsig.org return 0; 5044887Schin } 50510898Sroland.mainz@nrubsig.org 506