14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*10898Sroland.mainz@nrubsig.org * Copyright (c) 1992-2009 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 78462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * * 204887Schin ***********************************************************************/ 214887Schin #pragma prototyped 224887Schin /* 234887Schin * David Korn 244887Schin * AT&T Bell Laboratories 254887Schin * 264887Schin * library interface for word count 274887Schin */ 284887Schin 294887Schin #include <cmd.h> 304887Schin #include <wc.h> 314887Schin #include <ctype.h> 324887Schin 338462SApril.Chin@Sun.COM #if _hdr_wchar && _hdr_wctype && _lib_iswctype 344887Schin 354887Schin #include <wchar.h> 364887Schin #include <wctype.h> 37*10898Sroland.mainz@nrubsig.org #include <lc.h> 384887Schin 394887Schin #else 404887Schin 414887Schin #ifndef iswspace 424887Schin #define iswspace(x) isspace(x) 434887Schin #endif 444887Schin 454887Schin #endif 464887Schin 47*10898Sroland.mainz@nrubsig.org #define WC_SP 0x08 48*10898Sroland.mainz@nrubsig.org #define WC_NL 0x10 49*10898Sroland.mainz@nrubsig.org #define WC_MB 0x20 50*10898Sroland.mainz@nrubsig.org #define WC_ERR 0x40 514887Schin 52*10898Sroland.mainz@nrubsig.org #define eol(c) ((c)&WC_NL) 53*10898Sroland.mainz@nrubsig.org #define mbc(c) ((c)&WC_MB) 54*10898Sroland.mainz@nrubsig.org #define spc(c) ((c)&WC_SP) 55*10898Sroland.mainz@nrubsig.org #define mbwc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n) 56*10898Sroland.mainz@nrubsig.org 57*10898Sroland.mainz@nrubsig.org Wc_t* wc_init(int mode) 584887Schin { 594887Schin register int n; 604887Schin register int w; 614887Schin Wc_t* wp; 624887Schin 63*10898Sroland.mainz@nrubsig.org if (!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) 64*10898Sroland.mainz@nrubsig.org return 0; 65*10898Sroland.mainz@nrubsig.org if (!mbwide()) 66*10898Sroland.mainz@nrubsig.org wp->mb = 0; 67*10898Sroland.mainz@nrubsig.org #if _hdr_wchar && _hdr_wctype && _lib_iswctype 68*10898Sroland.mainz@nrubsig.org else if (!(mode & WC_NOUTF8) && (lcinfo(LC_CTYPE)->lc->flags & LC_utf8)) 69*10898Sroland.mainz@nrubsig.org wp->mb = 1; 70*10898Sroland.mainz@nrubsig.org #endif 71*10898Sroland.mainz@nrubsig.org else 72*10898Sroland.mainz@nrubsig.org wp->mb = -1; 734887Schin w = mode & WC_WORDS; 74*10898Sroland.mainz@nrubsig.org for (n = (1<<CHAR_BIT); --n >= 0;) 75*10898Sroland.mainz@nrubsig.org wp->type[n] = (w && isspace(n)) ? WC_SP : 0; 76*10898Sroland.mainz@nrubsig.org wp->type['\n'] = WC_SP|WC_NL; 77*10898Sroland.mainz@nrubsig.org if ((mode & (WC_MBYTE|WC_WORDS)) && wp->mb > 0) 78*10898Sroland.mainz@nrubsig.org { 79*10898Sroland.mainz@nrubsig.org for (n = 0; n < 64; n++) 80*10898Sroland.mainz@nrubsig.org { 81*10898Sroland.mainz@nrubsig.org wp->type[0x80+n] |= WC_MB; 82*10898Sroland.mainz@nrubsig.org if (n<32) 83*10898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+1; 84*10898Sroland.mainz@nrubsig.org else if (n<48) 85*10898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+2; 86*10898Sroland.mainz@nrubsig.org else if (n<56) 87*10898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+3; 88*10898Sroland.mainz@nrubsig.org else if (n<60) 89*10898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+4; 90*10898Sroland.mainz@nrubsig.org else if (n<62) 91*10898Sroland.mainz@nrubsig.org wp->type[0xc0+n] |= WC_MB+5; 92*10898Sroland.mainz@nrubsig.org } 93*10898Sroland.mainz@nrubsig.org wp->type[0xc0] = WC_MB|WC_ERR; 94*10898Sroland.mainz@nrubsig.org wp->type[0xc1] = WC_MB|WC_ERR; 95*10898Sroland.mainz@nrubsig.org wp->type[0xfe] = WC_MB|WC_ERR; 96*10898Sroland.mainz@nrubsig.org wp->type[0xff] = WC_MB|WC_ERR; 97*10898Sroland.mainz@nrubsig.org } 98*10898Sroland.mainz@nrubsig.org wp->mode = mode; 99*10898Sroland.mainz@nrubsig.org return wp; 100*10898Sroland.mainz@nrubsig.org } 101*10898Sroland.mainz@nrubsig.org 102*10898Sroland.mainz@nrubsig.org static int invalid(const char *file, int nlines) 103*10898Sroland.mainz@nrubsig.org { 104*10898Sroland.mainz@nrubsig.org error_info.file = (char*)file; 105*10898Sroland.mainz@nrubsig.org error_info.line = nlines; 106*10898Sroland.mainz@nrubsig.org error(ERROR_SYSTEM|1, "invalid multibyte character"); 107*10898Sroland.mainz@nrubsig.org error_info.file = 0; 108*10898Sroland.mainz@nrubsig.org error_info.line = 0; 109*10898Sroland.mainz@nrubsig.org return nlines; 110*10898Sroland.mainz@nrubsig.org } 111*10898Sroland.mainz@nrubsig.org 112*10898Sroland.mainz@nrubsig.org /* 113*10898Sroland.mainz@nrubsig.org * handle utf space characters 114*10898Sroland.mainz@nrubsig.org */ 115*10898Sroland.mainz@nrubsig.org 116*10898Sroland.mainz@nrubsig.org static int chkstate(int state, register unsigned int c) 117*10898Sroland.mainz@nrubsig.org { 118*10898Sroland.mainz@nrubsig.org switch(state) 119*10898Sroland.mainz@nrubsig.org { 120*10898Sroland.mainz@nrubsig.org case 1: 121*10898Sroland.mainz@nrubsig.org state = (c==0x9a?4:0); 122*10898Sroland.mainz@nrubsig.org break; 123*10898Sroland.mainz@nrubsig.org case 2: 124*10898Sroland.mainz@nrubsig.org state = ((c==0x80||c==0x81)?6+(c&1):0); 125*10898Sroland.mainz@nrubsig.org break; 126*10898Sroland.mainz@nrubsig.org case 3: 127*10898Sroland.mainz@nrubsig.org state = (c==0x80?5:0); 128*10898Sroland.mainz@nrubsig.org break; 129*10898Sroland.mainz@nrubsig.org case 4: 130*10898Sroland.mainz@nrubsig.org state = (c==0x80?10:0); 131*10898Sroland.mainz@nrubsig.org break; 132*10898Sroland.mainz@nrubsig.org case 5: 133*10898Sroland.mainz@nrubsig.org state = (c==0x80?10:0); 134*10898Sroland.mainz@nrubsig.org break; 135*10898Sroland.mainz@nrubsig.org case 6: 136*10898Sroland.mainz@nrubsig.org state = 0; 137*10898Sroland.mainz@nrubsig.org if(c==0xa0 || c==0xa1) 138*10898Sroland.mainz@nrubsig.org return(10); 139*10898Sroland.mainz@nrubsig.org else if((c&0xf0)== 0x80) 140*10898Sroland.mainz@nrubsig.org { 141*10898Sroland.mainz@nrubsig.org if((c&=0xf)==7) 142*10898Sroland.mainz@nrubsig.org return(iswspace(0x2007)?10:0); 143*10898Sroland.mainz@nrubsig.org if(c<=0xb) 144*10898Sroland.mainz@nrubsig.org return(10); 145*10898Sroland.mainz@nrubsig.org } 146*10898Sroland.mainz@nrubsig.org else if(c==0xaf && iswspace(0x202f)) 147*10898Sroland.mainz@nrubsig.org return(10); 148*10898Sroland.mainz@nrubsig.org break; 149*10898Sroland.mainz@nrubsig.org case 7: 150*10898Sroland.mainz@nrubsig.org state = (c==0x9f?10:0); 151*10898Sroland.mainz@nrubsig.org break; 152*10898Sroland.mainz@nrubsig.org case 8: 153*10898Sroland.mainz@nrubsig.org return (iswspace(c)?10:0); 154*10898Sroland.mainz@nrubsig.org } 155*10898Sroland.mainz@nrubsig.org return state; 1564887Schin } 1574887Schin 1584887Schin /* 1594887Schin * compute the line, word, and character count for file <fd> 1604887Schin */ 161*10898Sroland.mainz@nrubsig.org 1624887Schin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) 1634887Schin { 164*10898Sroland.mainz@nrubsig.org register char* type = wp->type; 165*10898Sroland.mainz@nrubsig.org register unsigned char* cp; 1664887Schin register Sfoff_t nchars; 1674887Schin register Sfoff_t nwords; 1684887Schin register Sfoff_t nlines; 169*10898Sroland.mainz@nrubsig.org register Sfoff_t eline = -1; 170*10898Sroland.mainz@nrubsig.org register Sfoff_t longest = 0; 1714887Schin register ssize_t c; 172*10898Sroland.mainz@nrubsig.org register unsigned char* endbuff; 173*10898Sroland.mainz@nrubsig.org register int lasttype = WC_SP; 1744887Schin unsigned int lastchar; 175*10898Sroland.mainz@nrubsig.org ssize_t n; 176*10898Sroland.mainz@nrubsig.org ssize_t o; 177*10898Sroland.mainz@nrubsig.org unsigned char* buff; 1784887Schin wchar_t x; 179*10898Sroland.mainz@nrubsig.org unsigned char side[32]; 1804887Schin 1814887Schin sfset(fd,SF_WRITE,1); 1824887Schin nlines = nwords = nchars = 0; 1834887Schin wp->longest = 0; 184*10898Sroland.mainz@nrubsig.org if (wp->mb < 0 && (wp->mode & (WC_MBYTE|WC_WORDS))) 1854887Schin { 1864887Schin cp = buff = endbuff = 0; 1874887Schin for (;;) 1884887Schin { 189*10898Sroland.mainz@nrubsig.org if (cp >= endbuff || (n = mbwc(x, cp, endbuff-cp)) < 0) 1904887Schin { 191*10898Sroland.mainz@nrubsig.org if ((o = endbuff-cp) < sizeof(side)) 192*10898Sroland.mainz@nrubsig.org { 193*10898Sroland.mainz@nrubsig.org if (buff) 194*10898Sroland.mainz@nrubsig.org { 195*10898Sroland.mainz@nrubsig.org if (o) 196*10898Sroland.mainz@nrubsig.org memcpy(side, cp, o); 197*10898Sroland.mainz@nrubsig.org mbinit(); 198*10898Sroland.mainz@nrubsig.org } 199*10898Sroland.mainz@nrubsig.org else 200*10898Sroland.mainz@nrubsig.org o = 0; 201*10898Sroland.mainz@nrubsig.org cp = side + o; 202*10898Sroland.mainz@nrubsig.org if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) || (n = sfvalue(fd)) <= 0) 203*10898Sroland.mainz@nrubsig.org { 204*10898Sroland.mainz@nrubsig.org if ((nchars - longest) > wp->longest) 205*10898Sroland.mainz@nrubsig.org wp->longest = nchars - longest; 206*10898Sroland.mainz@nrubsig.org break; 207*10898Sroland.mainz@nrubsig.org } 208*10898Sroland.mainz@nrubsig.org if ((c = sizeof(side) - o) > n) 209*10898Sroland.mainz@nrubsig.org c = n; 210*10898Sroland.mainz@nrubsig.org if (c) 211*10898Sroland.mainz@nrubsig.org memcpy(cp, buff, c); 212*10898Sroland.mainz@nrubsig.org endbuff = buff + n; 213*10898Sroland.mainz@nrubsig.org cp = side; 214*10898Sroland.mainz@nrubsig.org x = mbchar(cp); 215*10898Sroland.mainz@nrubsig.org if ((cp-side) < o) 216*10898Sroland.mainz@nrubsig.org { 217*10898Sroland.mainz@nrubsig.org cp = buff; 218*10898Sroland.mainz@nrubsig.org nchars += (cp-side) - 1; 219*10898Sroland.mainz@nrubsig.org } 220*10898Sroland.mainz@nrubsig.org else 221*10898Sroland.mainz@nrubsig.org cp = buff + (cp-side) - o; 222*10898Sroland.mainz@nrubsig.org } 223*10898Sroland.mainz@nrubsig.org else 224*10898Sroland.mainz@nrubsig.org { 225*10898Sroland.mainz@nrubsig.org cp++; 226*10898Sroland.mainz@nrubsig.org x = -1; 227*10898Sroland.mainz@nrubsig.org } 228*10898Sroland.mainz@nrubsig.org if (x == -1 && eline != nlines && !(wp->mode & WC_QUIET)) 229*10898Sroland.mainz@nrubsig.org eline = invalid(file, nlines); 2304887Schin } 231*10898Sroland.mainz@nrubsig.org else 232*10898Sroland.mainz@nrubsig.org cp += n ? n : 1; 233*10898Sroland.mainz@nrubsig.org if (x == '\n') 2344887Schin { 2354887Schin if ((nchars - longest) > wp->longest) 2364887Schin wp->longest = nchars - longest; 237*10898Sroland.mainz@nrubsig.org longest = nchars + 1; 2384887Schin nlines++; 2394887Schin lasttype = 1; 2404887Schin } 2414887Schin else if (iswspace(x)) 2424887Schin lasttype = 1; 2434887Schin else if (lasttype) 2444887Schin { 2454887Schin lasttype = 0; 2464887Schin nwords++; 2474887Schin } 248*10898Sroland.mainz@nrubsig.org nchars++; 249*10898Sroland.mainz@nrubsig.org } 250*10898Sroland.mainz@nrubsig.org } 251*10898Sroland.mainz@nrubsig.org else if (!wp->mb && !(wp->mode & WC_LONGEST) || wp->mb > 0 && !(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST))) 252*10898Sroland.mainz@nrubsig.org { 253*10898Sroland.mainz@nrubsig.org if (!(wp->mode & (WC_MBYTE|WC_WORDS|WC_LONGEST))) 254*10898Sroland.mainz@nrubsig.org { 255*10898Sroland.mainz@nrubsig.org while ((cp = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 256*10898Sroland.mainz@nrubsig.org { 257*10898Sroland.mainz@nrubsig.org nchars += c; 258*10898Sroland.mainz@nrubsig.org endbuff = cp + c; 259*10898Sroland.mainz@nrubsig.org if (*--endbuff == '\n') 260*10898Sroland.mainz@nrubsig.org nlines++; 261*10898Sroland.mainz@nrubsig.org else 262*10898Sroland.mainz@nrubsig.org *endbuff = '\n'; 263*10898Sroland.mainz@nrubsig.org for (;;) 264*10898Sroland.mainz@nrubsig.org if (*cp++ == '\n') 265*10898Sroland.mainz@nrubsig.org { 266*10898Sroland.mainz@nrubsig.org if (cp > endbuff) 267*10898Sroland.mainz@nrubsig.org break; 268*10898Sroland.mainz@nrubsig.org nlines++; 269*10898Sroland.mainz@nrubsig.org } 270*10898Sroland.mainz@nrubsig.org } 271*10898Sroland.mainz@nrubsig.org } 272*10898Sroland.mainz@nrubsig.org else 273*10898Sroland.mainz@nrubsig.org { 274*10898Sroland.mainz@nrubsig.org while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 275*10898Sroland.mainz@nrubsig.org { 276*10898Sroland.mainz@nrubsig.org nchars += c; 277*10898Sroland.mainz@nrubsig.org /* check to see whether first character terminates word */ 278*10898Sroland.mainz@nrubsig.org if (c==1) 279*10898Sroland.mainz@nrubsig.org { 280*10898Sroland.mainz@nrubsig.org if (eol(lasttype)) 281*10898Sroland.mainz@nrubsig.org nlines++; 282*10898Sroland.mainz@nrubsig.org if ((c = type[*cp]) && !lasttype) 283*10898Sroland.mainz@nrubsig.org nwords++; 284*10898Sroland.mainz@nrubsig.org lasttype = c; 285*10898Sroland.mainz@nrubsig.org continue; 286*10898Sroland.mainz@nrubsig.org } 287*10898Sroland.mainz@nrubsig.org if (!lasttype && type[*cp]) 288*10898Sroland.mainz@nrubsig.org nwords++; 289*10898Sroland.mainz@nrubsig.org lastchar = cp[--c]; 290*10898Sroland.mainz@nrubsig.org *(endbuff = cp+c) = '\n'; 291*10898Sroland.mainz@nrubsig.org c = lasttype; 292*10898Sroland.mainz@nrubsig.org /* process each buffer */ 293*10898Sroland.mainz@nrubsig.org for (;;) 294*10898Sroland.mainz@nrubsig.org { 295*10898Sroland.mainz@nrubsig.org /* process spaces and new-lines */ 296*10898Sroland.mainz@nrubsig.org do 297*10898Sroland.mainz@nrubsig.org { 298*10898Sroland.mainz@nrubsig.org if (eol(c)) 299*10898Sroland.mainz@nrubsig.org for (;;) 300*10898Sroland.mainz@nrubsig.org { 301*10898Sroland.mainz@nrubsig.org /* check for end of buffer */ 302*10898Sroland.mainz@nrubsig.org if (cp > endbuff) 303*10898Sroland.mainz@nrubsig.org goto beob; 304*10898Sroland.mainz@nrubsig.org nlines++; 305*10898Sroland.mainz@nrubsig.org if (*cp != '\n') 306*10898Sroland.mainz@nrubsig.org break; 307*10898Sroland.mainz@nrubsig.org cp++; 308*10898Sroland.mainz@nrubsig.org } 309*10898Sroland.mainz@nrubsig.org } while (c = type[*cp++]); 310*10898Sroland.mainz@nrubsig.org /* skip over word characters */ 311*10898Sroland.mainz@nrubsig.org while (!(c = type[*cp++])); 312*10898Sroland.mainz@nrubsig.org nwords++; 313*10898Sroland.mainz@nrubsig.org } 314*10898Sroland.mainz@nrubsig.org beob: 315*10898Sroland.mainz@nrubsig.org if ((cp -= 2) >= buff) 316*10898Sroland.mainz@nrubsig.org c = type[*cp]; 317*10898Sroland.mainz@nrubsig.org else 318*10898Sroland.mainz@nrubsig.org c = lasttype; 319*10898Sroland.mainz@nrubsig.org lasttype = type[lastchar]; 320*10898Sroland.mainz@nrubsig.org /* see if was in word */ 321*10898Sroland.mainz@nrubsig.org if (!c && !lasttype) 322*10898Sroland.mainz@nrubsig.org nwords--; 323*10898Sroland.mainz@nrubsig.org } 324*10898Sroland.mainz@nrubsig.org if (eol(lasttype)) 325*10898Sroland.mainz@nrubsig.org nlines++; 326*10898Sroland.mainz@nrubsig.org else if (!lasttype) 327*10898Sroland.mainz@nrubsig.org nwords++; 3284887Schin } 3294887Schin } 3304887Schin else 3314887Schin { 332*10898Sroland.mainz@nrubsig.org int lineoff=0; 333*10898Sroland.mainz@nrubsig.org int skip=0; 334*10898Sroland.mainz@nrubsig.org int adjust=0; 335*10898Sroland.mainz@nrubsig.org int state=0; 336*10898Sroland.mainz@nrubsig.org int oldc; 337*10898Sroland.mainz@nrubsig.org int xspace; 338*10898Sroland.mainz@nrubsig.org int wasspace = 1; 339*10898Sroland.mainz@nrubsig.org unsigned char* start; 340*10898Sroland.mainz@nrubsig.org 341*10898Sroland.mainz@nrubsig.org lastchar = 0; 342*10898Sroland.mainz@nrubsig.org start = (endbuff = side) + 1; 343*10898Sroland.mainz@nrubsig.org xspace = iswspace(0xa0) || iswspace(0x85); 344*10898Sroland.mainz@nrubsig.org while ((cp = buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, 0)) && (c = sfvalue(fd)) > 0) 3454887Schin { 3464887Schin nchars += c; 347*10898Sroland.mainz@nrubsig.org start = cp-lineoff; 3484887Schin /* check to see whether first character terminates word */ 3494887Schin if(c==1) 3504887Schin { 351*10898Sroland.mainz@nrubsig.org if(eol(lasttype)) 3524887Schin nlines++; 353*10898Sroland.mainz@nrubsig.org if((c = type[*cp]) && !lasttype) 3544887Schin nwords++; 3554887Schin lasttype = c; 356*10898Sroland.mainz@nrubsig.org endbuff = start; 3574887Schin continue; 3584887Schin } 359*10898Sroland.mainz@nrubsig.org lastchar = cp[--c]; 360*10898Sroland.mainz@nrubsig.org endbuff = cp+c; 361*10898Sroland.mainz@nrubsig.org cp[c] = '\n'; 362*10898Sroland.mainz@nrubsig.org if(mbc(lasttype)) 363*10898Sroland.mainz@nrubsig.org { 364*10898Sroland.mainz@nrubsig.org c = lasttype; 365*10898Sroland.mainz@nrubsig.org goto mbyte; 366*10898Sroland.mainz@nrubsig.org } 367*10898Sroland.mainz@nrubsig.org if(!lasttype && spc(type[*cp])) 3684887Schin nwords++; 3694887Schin c = lasttype; 3704887Schin /* process each buffer */ 3714887Schin for (;;) 3724887Schin { 3734887Schin /* process spaces and new-lines */ 374*10898Sroland.mainz@nrubsig.org spaces: 375*10898Sroland.mainz@nrubsig.org do 3764887Schin { 377*10898Sroland.mainz@nrubsig.org if (eol(c)) 3784887Schin { 3794887Schin /* check for end of buffer */ 3804887Schin if (cp > endbuff) 3814887Schin goto eob; 382*10898Sroland.mainz@nrubsig.org if(wp->mode&WC_LONGEST) 383*10898Sroland.mainz@nrubsig.org { 384*10898Sroland.mainz@nrubsig.org if((cp-start)-adjust > longest) 385*10898Sroland.mainz@nrubsig.org longest = (cp-start)-adjust-1; 386*10898Sroland.mainz@nrubsig.org start = cp; 387*10898Sroland.mainz@nrubsig.org } 3884887Schin nlines++; 389*10898Sroland.mainz@nrubsig.org nchars -= adjust; 390*10898Sroland.mainz@nrubsig.org adjust = 0; 391*10898Sroland.mainz@nrubsig.org } 392*10898Sroland.mainz@nrubsig.org } while (spc(c = type[*cp++])); 393*10898Sroland.mainz@nrubsig.org wasspace=1; 394*10898Sroland.mainz@nrubsig.org if(mbc(c)) 395*10898Sroland.mainz@nrubsig.org { 396*10898Sroland.mainz@nrubsig.org mbyte: 397*10898Sroland.mainz@nrubsig.org do 398*10898Sroland.mainz@nrubsig.org { 399*10898Sroland.mainz@nrubsig.org if(c&WC_ERR) 400*10898Sroland.mainz@nrubsig.org goto err; 401*10898Sroland.mainz@nrubsig.org if(skip && (c&7)) 4024887Schin break; 403*10898Sroland.mainz@nrubsig.org if(!skip) 404*10898Sroland.mainz@nrubsig.org { 405*10898Sroland.mainz@nrubsig.org if(!(c&7)) 406*10898Sroland.mainz@nrubsig.org { 407*10898Sroland.mainz@nrubsig.org skip=1; 408*10898Sroland.mainz@nrubsig.org break; 409*10898Sroland.mainz@nrubsig.org } 410*10898Sroland.mainz@nrubsig.org skip = (c&7); 411*10898Sroland.mainz@nrubsig.org adjust += skip; 412*10898Sroland.mainz@nrubsig.org state = 0; 413*10898Sroland.mainz@nrubsig.org if(skip==2 && (cp[-1]&0xc)==0 && (state=(cp[-1]&0x3))) 414*10898Sroland.mainz@nrubsig.org oldc = *cp; 415*10898Sroland.mainz@nrubsig.org else if(xspace && cp[-1]==0xc2) 416*10898Sroland.mainz@nrubsig.org { 417*10898Sroland.mainz@nrubsig.org state = 8; 418*10898Sroland.mainz@nrubsig.org oldc = *cp; 419*10898Sroland.mainz@nrubsig.org } 420*10898Sroland.mainz@nrubsig.org } 421*10898Sroland.mainz@nrubsig.org else 422*10898Sroland.mainz@nrubsig.org { 423*10898Sroland.mainz@nrubsig.org skip--; 424*10898Sroland.mainz@nrubsig.org if(state && (state=chkstate(state,oldc))) 425*10898Sroland.mainz@nrubsig.org { 426*10898Sroland.mainz@nrubsig.org if(state==10) 427*10898Sroland.mainz@nrubsig.org { 428*10898Sroland.mainz@nrubsig.org if(!wasspace) 429*10898Sroland.mainz@nrubsig.org nwords++; 430*10898Sroland.mainz@nrubsig.org wasspace = 1; 431*10898Sroland.mainz@nrubsig.org state=0; 432*10898Sroland.mainz@nrubsig.org goto spaces; 433*10898Sroland.mainz@nrubsig.org } 434*10898Sroland.mainz@nrubsig.org oldc = *cp; 435*10898Sroland.mainz@nrubsig.org } 436*10898Sroland.mainz@nrubsig.org } 437*10898Sroland.mainz@nrubsig.org } while (mbc(c = type[*cp++])); 438*10898Sroland.mainz@nrubsig.org wasspace = 0; 439*10898Sroland.mainz@nrubsig.org if(skip) 440*10898Sroland.mainz@nrubsig.org { 441*10898Sroland.mainz@nrubsig.org if(eol(c) && (cp > endbuff)) 442*10898Sroland.mainz@nrubsig.org goto eob; 443*10898Sroland.mainz@nrubsig.org err: 444*10898Sroland.mainz@nrubsig.org skip = 0; 445*10898Sroland.mainz@nrubsig.org state = 0; 446*10898Sroland.mainz@nrubsig.org if(eline!=nlines && !(wp->mode & WC_QUIET)) 447*10898Sroland.mainz@nrubsig.org eline = invalid(file, nlines); 448*10898Sroland.mainz@nrubsig.org while(mbc(c) && ((c|WC_ERR) || (c&7)==0)) 449*10898Sroland.mainz@nrubsig.org c=type[*cp++]; 450*10898Sroland.mainz@nrubsig.org if(eol(c) && (cp > endbuff)) 451*10898Sroland.mainz@nrubsig.org { 452*10898Sroland.mainz@nrubsig.org c = WC_MB|WC_ERR; 453*10898Sroland.mainz@nrubsig.org goto eob; 454*10898Sroland.mainz@nrubsig.org } 455*10898Sroland.mainz@nrubsig.org if(mbc(c)) 456*10898Sroland.mainz@nrubsig.org goto mbyte; 457*10898Sroland.mainz@nrubsig.org else if(c&WC_SP) 458*10898Sroland.mainz@nrubsig.org goto spaces; 4594887Schin } 460*10898Sroland.mainz@nrubsig.org if(spc(c)) 461*10898Sroland.mainz@nrubsig.org { 462*10898Sroland.mainz@nrubsig.org nwords++; 463*10898Sroland.mainz@nrubsig.org continue; 464*10898Sroland.mainz@nrubsig.org } 465*10898Sroland.mainz@nrubsig.org } 4664887Schin /* skip over word characters */ 467*10898Sroland.mainz@nrubsig.org while(!(c = type[*cp++])); 468*10898Sroland.mainz@nrubsig.org if(mbc(c)) 469*10898Sroland.mainz@nrubsig.org goto mbyte; 4704887Schin nwords++; 4714887Schin } 4724887Schin eob: 473*10898Sroland.mainz@nrubsig.org lineoff = cp-start; 4744887Schin if((cp -= 2) >= buff) 475*10898Sroland.mainz@nrubsig.org c = type[*cp]; 4764887Schin else 477*10898Sroland.mainz@nrubsig.org c = lasttype; 478*10898Sroland.mainz@nrubsig.org lasttype = type[lastchar]; 4794887Schin /* see if was in word */ 4804887Schin if(!c && !lasttype) 4814887Schin nwords--; 4824887Schin } 483*10898Sroland.mainz@nrubsig.org if ((wp->mode&WC_LONGEST) && ((endbuff + 1 - start) - adjust - (lastchar == '\n')) > longest) 484*10898Sroland.mainz@nrubsig.org longest = (endbuff + 1 - start) - adjust - (lastchar == '\n'); 485*10898Sroland.mainz@nrubsig.org wp->longest = longest; 486*10898Sroland.mainz@nrubsig.org if (eol(lasttype)) 4874887Schin nlines++; 488*10898Sroland.mainz@nrubsig.org else if (!lasttype) 4894887Schin nwords++; 490*10898Sroland.mainz@nrubsig.org nchars -= adjust; 4914887Schin } 4924887Schin wp->chars = nchars; 4934887Schin wp->words = nwords; 4944887Schin wp->lines = nlines; 495*10898Sroland.mainz@nrubsig.org return 0; 4964887Schin } 497*10898Sroland.mainz@nrubsig.org 498