1*4887Schin /*********************************************************************** 2*4887Schin * * 3*4887Schin * This software is part of the ast package * 4*4887Schin * Copyright (c) 1992-2007 AT&T Knowledge Ventures * 5*4887Schin * and is licensed under the * 6*4887Schin * Common Public License, Version 1.0 * 7*4887Schin * by AT&T Knowledge Ventures * 8*4887Schin * * 9*4887Schin * A copy of the License is available at * 10*4887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 11*4887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12*4887Schin * * 13*4887Schin * Information and Software Systems Research * 14*4887Schin * AT&T Research * 15*4887Schin * Florham Park NJ * 16*4887Schin * * 17*4887Schin * Glenn Fowler <gsf@research.att.com> * 18*4887Schin * David Korn <dgk@research.att.com> * 19*4887Schin * * 20*4887Schin ***********************************************************************/ 21*4887Schin #pragma prototyped 22*4887Schin /* 23*4887Schin * David Korn 24*4887Schin * AT&T Bell Laboratories 25*4887Schin * 26*4887Schin * library interface for word count 27*4887Schin */ 28*4887Schin 29*4887Schin #include <cmd.h> 30*4887Schin #include <wc.h> 31*4887Schin #include <ctype.h> 32*4887Schin 33*4887Schin #if _hdr_wchar && _hdr_wctype 34*4887Schin 35*4887Schin #include <wchar.h> 36*4887Schin #include <wctype.h> 37*4887Schin 38*4887Schin #else 39*4887Schin 40*4887Schin #ifndef iswspace 41*4887Schin #define iswspace(x) isspace(x) 42*4887Schin #endif 43*4887Schin 44*4887Schin #endif 45*4887Schin 46*4887Schin #define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1)) 47*4887Schin #define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1) 48*4887Schin 49*4887Schin Wc_t *wc_init(int mode) 50*4887Schin { 51*4887Schin register int n; 52*4887Schin register int w; 53*4887Schin Wc_t* wp; 54*4887Schin 55*4887Schin if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) 56*4887Schin return(0); 57*4887Schin wp->mode = mode; 58*4887Schin w = mode & WC_WORDS; 59*4887Schin for(n=(1<<CHAR_BIT);--n >=0;) 60*4887Schin wp->space[n] = w ? !!isspace(n) : 0; 61*4887Schin wp->space['\n'] = -1; 62*4887Schin return(wp); 63*4887Schin } 64*4887Schin 65*4887Schin /* 66*4887Schin * compute the line, word, and character count for file <fd> 67*4887Schin */ 68*4887Schin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) 69*4887Schin { 70*4887Schin register signed char *space = wp->space; 71*4887Schin register unsigned char *cp; 72*4887Schin register Sfoff_t nchars; 73*4887Schin register Sfoff_t nwords; 74*4887Schin register Sfoff_t nlines; 75*4887Schin register Sfoff_t eline; 76*4887Schin register Sfoff_t longest; 77*4887Schin register ssize_t c; 78*4887Schin register unsigned char *endbuff; 79*4887Schin register int lasttype = 1; 80*4887Schin unsigned int lastchar; 81*4887Schin unsigned char *buff; 82*4887Schin wchar_t x; 83*4887Schin 84*4887Schin sfset(fd,SF_WRITE,1); 85*4887Schin nlines = nwords = nchars = 0; 86*4887Schin wp->longest = 0; 87*4887Schin if (wp->mode & (WC_LONGEST|WC_MBYTE)) 88*4887Schin { 89*4887Schin longest = 0; 90*4887Schin eline = -1; 91*4887Schin cp = buff = endbuff = 0; 92*4887Schin for (;;) 93*4887Schin { 94*4887Schin if (!mbok(cp, endbuff-cp)) 95*4887Schin { 96*4887Schin if (buff) 97*4887Schin sfread(fd, buff, cp-buff); 98*4887Schin if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR))) 99*4887Schin break; 100*4887Schin endbuff = (cp = buff) + sfvalue(fd); 101*4887Schin } 102*4887Schin nchars++; 103*4887Schin x = mbchar(cp); 104*4887Schin if (x == -1) 105*4887Schin { 106*4887Schin if (eline != nlines && !(wp->mode & WC_QUIET)) 107*4887Schin { 108*4887Schin error_info.file = (char*)file; 109*4887Schin error_info.line = eline = nlines; 110*4887Schin error(ERROR_SYSTEM|1, "invalid multibyte character"); 111*4887Schin error_info.file = 0; 112*4887Schin error_info.line = 0; 113*4887Schin } 114*4887Schin } 115*4887Schin else if (x == '\n') 116*4887Schin { 117*4887Schin if ((nchars - longest) > wp->longest) 118*4887Schin wp->longest = nchars - longest; 119*4887Schin longest = nchars; 120*4887Schin nlines++; 121*4887Schin lasttype = 1; 122*4887Schin } 123*4887Schin else if (iswspace(x)) 124*4887Schin lasttype = 1; 125*4887Schin else if (lasttype) 126*4887Schin { 127*4887Schin lasttype = 0; 128*4887Schin nwords++; 129*4887Schin } 130*4887Schin } 131*4887Schin } 132*4887Schin else 133*4887Schin { 134*4887Schin for (;;) 135*4887Schin { 136*4887Schin /* fill next buffer and check for end-of-file */ 137*4887Schin if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0) 138*4887Schin break; 139*4887Schin sfread(fd,(char*)(cp=buff),c); 140*4887Schin nchars += c; 141*4887Schin /* check to see whether first character terminates word */ 142*4887Schin if(c==1) 143*4887Schin { 144*4887Schin if(endline(lasttype)) 145*4887Schin nlines++; 146*4887Schin if((c = space[*cp]) && !lasttype) 147*4887Schin nwords++; 148*4887Schin lasttype = c; 149*4887Schin continue; 150*4887Schin } 151*4887Schin if(!lasttype && space[*cp]) 152*4887Schin nwords++; 153*4887Schin lastchar = cp[--c]; 154*4887Schin cp[c] = '\n'; 155*4887Schin endbuff = cp+c; 156*4887Schin c = lasttype; 157*4887Schin /* process each buffer */ 158*4887Schin for (;;) 159*4887Schin { 160*4887Schin /* process spaces and new-lines */ 161*4887Schin do if (endline(c)) 162*4887Schin { 163*4887Schin for (;;) 164*4887Schin { 165*4887Schin /* check for end of buffer */ 166*4887Schin if (cp > endbuff) 167*4887Schin goto eob; 168*4887Schin nlines++; 169*4887Schin if (*cp != '\n') 170*4887Schin break; 171*4887Schin cp++; 172*4887Schin } 173*4887Schin } while (c = space[*cp++]); 174*4887Schin /* skip over word characters */ 175*4887Schin while(!(c = space[*cp++])); 176*4887Schin nwords++; 177*4887Schin } 178*4887Schin eob: 179*4887Schin if((cp -= 2) >= buff) 180*4887Schin c = space[*cp]; 181*4887Schin else 182*4887Schin c = lasttype; 183*4887Schin lasttype = space[lastchar]; 184*4887Schin /* see if was in word */ 185*4887Schin if(!c && !lasttype) 186*4887Schin nwords--; 187*4887Schin } 188*4887Schin if(endline(lasttype)) 189*4887Schin nlines++; 190*4887Schin else if(!lasttype) 191*4887Schin nwords++; 192*4887Schin } 193*4887Schin wp->chars = nchars; 194*4887Schin wp->words = nwords; 195*4887Schin wp->lines = nlines; 196*4887Schin return(0); 197*4887Schin } 198