14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*8462SApril.Chin@Sun.COM *          Copyright (c) 1992-2008 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
7*8462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                  David Korn <dgk@research.att.com>                   *
194887Schin *                                                                      *
204887Schin ***********************************************************************/
214887Schin #pragma prototyped
224887Schin /*
234887Schin  * David Korn
244887Schin  * AT&T Bell Laboratories
254887Schin  *
264887Schin  * library interface for word count
274887Schin  */
284887Schin 
294887Schin #include <cmd.h>
304887Schin #include <wc.h>
314887Schin #include <ctype.h>
324887Schin 
33*8462SApril.Chin@Sun.COM #if _hdr_wchar && _hdr_wctype && _lib_iswctype
344887Schin 
354887Schin #include <wchar.h>
364887Schin #include <wctype.h>
374887Schin 
384887Schin #else
394887Schin 
404887Schin #ifndef iswspace
414887Schin #define iswspace(x)	isspace(x)
424887Schin #endif
434887Schin 
444887Schin #endif
454887Schin 
464887Schin #define endline(c)	(((signed char)-1)<0?(c)<0:(c)==((char)-1))
474887Schin #define mbok(p,n)	(((n)<1)?0:mbwide()?((*ast.mb_towc)(NiL,(char*)(p),n)>=0):1)
484887Schin 
494887Schin Wc_t *wc_init(int mode)
504887Schin {
514887Schin 	register int	n;
524887Schin 	register int	w;
534887Schin 	Wc_t*		wp;
544887Schin 
554887Schin 	if(!(wp = (Wc_t*)stakalloc(sizeof(Wc_t))))
564887Schin 		return(0);
574887Schin 	wp->mode = mode;
584887Schin 	w = mode & WC_WORDS;
594887Schin 	for(n=(1<<CHAR_BIT);--n >=0;)
604887Schin 		wp->space[n] = w ? !!isspace(n) : 0;
614887Schin 	wp->space['\n'] = -1;
624887Schin 	return(wp);
634887Schin }
644887Schin 
654887Schin /*
664887Schin  * compute the line, word, and character count for file <fd>
674887Schin  */
684887Schin int wc_count(Wc_t *wp, Sfio_t *fd, const char* file)
694887Schin {
704887Schin 	register signed char	*space = wp->space;
714887Schin 	register unsigned char	*cp;
724887Schin 	register Sfoff_t	nchars;
734887Schin 	register Sfoff_t	nwords;
744887Schin 	register Sfoff_t	nlines;
754887Schin 	register Sfoff_t	eline;
764887Schin 	register Sfoff_t	longest;
774887Schin 	register ssize_t	c;
784887Schin 	register unsigned char	*endbuff;
794887Schin 	register int		lasttype = 1;
804887Schin 	unsigned int		lastchar;
814887Schin 	unsigned char		*buff;
824887Schin 	wchar_t			x;
834887Schin 
844887Schin 	sfset(fd,SF_WRITE,1);
854887Schin 	nlines = nwords = nchars = 0;
864887Schin 	wp->longest = 0;
874887Schin 	if (wp->mode & (WC_LONGEST|WC_MBYTE))
884887Schin 	{
894887Schin 		longest = 0;
904887Schin 		eline = -1;
914887Schin 		cp = buff = endbuff = 0;
924887Schin 		for (;;)
934887Schin 		{
944887Schin 			if (!mbok(cp, endbuff-cp))
954887Schin 			{
964887Schin 				if (buff)
974887Schin 					sfread(fd, buff, cp-buff);
984887Schin 				if (!(buff = (unsigned char*)sfreserve(fd, SF_UNBOUND, SF_LOCKR)))
994887Schin 					break;
1004887Schin 				endbuff = (cp = buff) + sfvalue(fd);
1014887Schin 			}
1024887Schin 			nchars++;
1034887Schin 			x = mbchar(cp);
1044887Schin 			if (x == -1)
1054887Schin 			{
1064887Schin 				if (eline != nlines && !(wp->mode & WC_QUIET))
1074887Schin 				{
1084887Schin 					error_info.file = (char*)file;
1094887Schin 					error_info.line = eline = nlines;
1104887Schin 					error(ERROR_SYSTEM|1, "invalid multibyte character");
1114887Schin 					error_info.file = 0;
1124887Schin 					error_info.line = 0;
1134887Schin 				}
1144887Schin 			}
1154887Schin 			else if (x == '\n')
1164887Schin 			{
1174887Schin 				if ((nchars - longest) > wp->longest)
1184887Schin 					wp->longest = nchars - longest;
1194887Schin 				longest = nchars;
1204887Schin 				nlines++;
1214887Schin 				lasttype = 1;
1224887Schin 			}
1234887Schin 			else if (iswspace(x))
1244887Schin 				lasttype = 1;
1254887Schin 			else if (lasttype)
1264887Schin 			{
1274887Schin 				lasttype = 0;
1284887Schin 				nwords++;
1294887Schin 			}
1304887Schin 		}
1314887Schin 	}
1324887Schin 	else
1334887Schin 	{
1344887Schin 		for (;;)
1354887Schin 		{
1364887Schin 			/* fill next buffer and check for end-of-file */
1374887Schin 			if (!(buff = (unsigned char*)sfreserve(fd, 0, 0)) || (c = sfvalue(fd)) <= 0)
1384887Schin 				break;
1394887Schin 			sfread(fd,(char*)(cp=buff),c);
1404887Schin 			nchars += c;
1414887Schin 			/* check to see whether first character terminates word */
1424887Schin 			if(c==1)
1434887Schin 			{
1444887Schin 				if(endline(lasttype))
1454887Schin 					nlines++;
1464887Schin 				if((c = space[*cp]) && !lasttype)
1474887Schin 					nwords++;
1484887Schin 				lasttype = c;
1494887Schin 				continue;
1504887Schin 			}
1514887Schin 			if(!lasttype && space[*cp])
1524887Schin 				nwords++;
1534887Schin 			lastchar = cp[--c];
1544887Schin 			cp[c] = '\n';
1554887Schin 			endbuff = cp+c;
1564887Schin 			c = lasttype;
1574887Schin 			/* process each buffer */
1584887Schin 			for (;;)
1594887Schin 			{
1604887Schin 				/* process spaces and new-lines */
1614887Schin 				do if (endline(c))
1624887Schin 				{
1634887Schin 					for (;;)
1644887Schin 					{
1654887Schin 						/* check for end of buffer */
1664887Schin 						if (cp > endbuff)
1674887Schin 							goto eob;
1684887Schin 						nlines++;
1694887Schin 						if (*cp != '\n')
1704887Schin 							break;
1714887Schin 						cp++;
1724887Schin 					}
1734887Schin 				} while (c = space[*cp++]);
1744887Schin 				/* skip over word characters */
1754887Schin 				while(!(c = space[*cp++]));
1764887Schin 				nwords++;
1774887Schin 			}
1784887Schin 		eob:
1794887Schin 			if((cp -= 2) >= buff)
1804887Schin 				c = space[*cp];
1814887Schin 			else
1824887Schin 				c  = lasttype;
1834887Schin 			lasttype = space[lastchar];
1844887Schin 			/* see if was in word */
1854887Schin 			if(!c && !lasttype)
1864887Schin 				nwords--;
1874887Schin 		}
1884887Schin 		if(endline(lasttype))
1894887Schin 			nlines++;
1904887Schin 		else if(!lasttype)
1914887Schin 			nwords++;
1924887Schin 	}
1934887Schin 	wp->chars = nchars;
1944887Schin 	wp->words = nwords;
1954887Schin 	wp->lines = nlines;
1964887Schin 	return(0);
1974887Schin }
198