14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*8462SApril.Chin@Sun.COM * Copyright (c) 1992-2008 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 7*8462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * * 204887Schin ***********************************************************************/ 214887Schin #pragma prototyped 224887Schin /* 234887Schin * David Korn 244887Schin * AT&T Bell Laboratories 254887Schin * 264887Schin * library interface for word count 274887Schin */ 284887Schin 294887Schin #include <cmd.h> 304887Schin #include <wc.h> 314887Schin #include <ctype.h> 324887Schin 33*8462SApril.Chin@Sun.COM #if _hdr_wchar && _hdr_wctype && _lib_iswctype 344887Schin 354887Schin #include <wchar.h> 364887Schin #include <wctype.h> 374887Schin 384887Schin #else 394887Schin 404887Schin #ifndef iswspace 414887Schin #define iswspace(x) isspace(x) 424887Schin #endif 434887Schin 444887Schin #endif 454887Schin 464887Schin #define endline(c) (((signed char)-1)<0?(c)<0:(c)==((char)-1)) 474887Schin #define mbok(p,n) (((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1) 484887Schin 494887Schin Wc_t *wc_init(int mode) 504887Schin { 514887Schin register int n; 524887Schin register int w; 534887Schin Wc_t* wp; 544887Schin 554887Schin if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t)))) 564887Schin return(0); 574887Schin wp->mode = mode; 584887Schin w = mode & WC_WORDS; 594887Schin for(n=(1<<CHAR_BIT);--n >=0;) 604887Schin wp->space[n] = w ? !!isspace(n) : 0; 614887Schin wp->space['\n'] = -1; 624887Schin return(wp); 634887Schin } 644887Schin 654887Schin /* 664887Schin * compute the line, word, and character count for file <fd> 674887Schin */ 684887Schin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file) 694887Schin { 704887Schin register signed char *space = wp->space; 714887Schin register unsigned char *cp; 724887Schin register Sfoff_t nchars; 734887Schin register Sfoff_t nwords; 744887Schin register Sfoff_t nlines; 754887Schin register Sfoff_t eline; 764887Schin register Sfoff_t longest; 774887Schin register ssize_t c; 784887Schin register unsigned char *endbuff; 794887Schin register int lasttype = 1; 804887Schin unsigned int lastchar; 814887Schin unsigned char *buff; 824887Schin wchar_t x; 834887Schin 844887Schin sfset(fd,SF_WRITE,1); 854887Schin nlines = nwords = nchars = 0; 864887Schin wp->longest = 0; 874887Schin if (wp->mode & (WC_LONGEST|WC_MBYTE)) 884887Schin { 894887Schin longest = 0; 904887Schin eline = -1; 914887Schin cp = buff = endbuff = 0; 924887Schin for (;;) 934887Schin { 944887Schin if (!mbok(cp, endbuff-cp)) 954887Schin { 964887Schin if (buff) 974887Schin sfread(fd, buff, cp-buff); 984887Schin if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR))) 994887Schin break; 1004887Schin endbuff = (cp = buff) + sfvalue(fd); 1014887Schin } 1024887Schin nchars++; 1034887Schin x = mbchar(cp); 1044887Schin if (x == -1) 1054887Schin { 1064887Schin if (eline != nlines && !(wp->mode & WC_QUIET)) 1074887Schin { 1084887Schin error_info.file = (char*)file; 1094887Schin error_info.line = eline = nlines; 1104887Schin error(ERROR_SYSTEM|1, "invalid multibyte character"); 1114887Schin error_info.file = 0; 1124887Schin error_info.line = 0; 1134887Schin } 1144887Schin } 1154887Schin else if (x == '\n') 1164887Schin { 1174887Schin if ((nchars - longest) > wp->longest) 1184887Schin wp->longest = nchars - longest; 1194887Schin longest = nchars; 1204887Schin nlines++; 1214887Schin lasttype = 1; 1224887Schin } 1234887Schin else if (iswspace(x)) 1244887Schin lasttype = 1; 1254887Schin else if (lasttype) 1264887Schin { 1274887Schin lasttype = 0; 1284887Schin nwords++; 1294887Schin } 1304887Schin } 1314887Schin } 1324887Schin else 1334887Schin { 1344887Schin for (;;) 1354887Schin { 1364887Schin /* fill next buffer and check for end-of-file */ 1374887Schin if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0) 1384887Schin break; 1394887Schin sfread(fd,(char*)(cp=buff),c); 1404887Schin nchars += c; 1414887Schin /* check to see whether first character terminates word */ 1424887Schin if(c==1) 1434887Schin { 1444887Schin if(endline(lasttype)) 1454887Schin nlines++; 1464887Schin if((c = space[*cp]) && !lasttype) 1474887Schin nwords++; 1484887Schin lasttype = c; 1494887Schin continue; 1504887Schin } 1514887Schin if(!lasttype && space[*cp]) 1524887Schin nwords++; 1534887Schin lastchar = cp[--c]; 1544887Schin cp[c] = '\n'; 1554887Schin endbuff = cp+c; 1564887Schin c = lasttype; 1574887Schin /* process each buffer */ 1584887Schin for (;;) 1594887Schin { 1604887Schin /* process spaces and new-lines */ 1614887Schin do if (endline(c)) 1624887Schin { 1634887Schin for (;;) 1644887Schin { 1654887Schin /* check for end of buffer */ 1664887Schin if (cp > endbuff) 1674887Schin goto eob; 1684887Schin nlines++; 1694887Schin if (*cp != '\n') 1704887Schin break; 1714887Schin cp++; 1724887Schin } 1734887Schin } while (c = space[*cp++]); 1744887Schin /* skip over word characters */ 1754887Schin while(!(c = space[*cp++])); 1764887Schin nwords++; 1774887Schin } 1784887Schin eob: 1794887Schin if((cp -= 2) >= buff) 1804887Schin c = space[*cp]; 1814887Schin else 1824887Schin c = lasttype; 1834887Schin lasttype = space[lastchar]; 1844887Schin /* see if was in word */ 1854887Schin if(!c && !lasttype) 1864887Schin nwords--; 1874887Schin } 1884887Schin if(endline(lasttype)) 1894887Schin nlines++; 1904887Schin else if(!lasttype) 1914887Schin nwords++; 1924887Schin } 1934887Schin wp->chars = nchars; 1944887Schin wp->words = nwords; 1954887Schin wp->lines = nlines; 1964887Schin return(0); 1974887Schin } 198