xref: /onnv-gate/usr/src/lib/libcmd/common/cut.c (revision 12068:08a39a083754)
14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*12068SRoger.Faulkner@Oracle.COM *          Copyright (c) 1992-2010 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
78462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                  David Korn <dgk@research.att.com>                   *
194887Schin *                                                                      *
204887Schin ***********************************************************************/
214887Schin #pragma prototyped
224887Schin /*
234887Schin  * David Korn
244887Schin  * AT&T Bell Laboratories
254887Schin  *
264887Schin  * cut fields or columns from fields from a file
274887Schin  */
284887Schin 
294887Schin static const char usage[] =
30*12068SRoger.Faulkner@Oracle.COM "[-?\n@(#)$Id: cut (AT&T Research) 2009-12-04 $\n]"
314887Schin USAGE_LICENSE
324887Schin "[+NAME?cut - cut out selected columns or fields of each line of a file]"
334887Schin "[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields "
344887Schin 	"from one or more files, contatenating them on standard output.]"
354887Schin "[+?The option argument \alist\a is a comma-separated or blank-separated "
364887Schin 	"list of positive numbers and ranges.  Ranges can be of three "
374887Schin 	"forms.  The first is two positive integers separated by a hyphen "
384887Schin 	"(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to "
394887Schin 	"\ahigh\a.  The second is a positive number preceded by a hyphen "
404887Schin 	"(\b-\b\ahigh\a), which represents all fields from field \b1\b to "
414887Schin 	"\ahigh\a.  The last is a positive number followed by a hyphen "
424887Schin 	"(\alow\a\b-\b), which represents all fields from \alow\a to the "
434887Schin 	"last field, inclusive.  Elements in the \alist\a can be repeated, "
444887Schin 	"can overlap, and can appear in any order.  The order of the "
454887Schin 	"output is that of the input.]"
464887Schin "[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]"
474887Schin "[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b "
484887Schin         "cuts from standard input.   The start of the file is defined "
494887Schin         "as the current offset.]"
50*12068SRoger.Faulkner@Oracle.COM "[b:bytes]:[list?\bcut\b based on a list of byte counts.]"
51*12068SRoger.Faulkner@Oracle.COM "[c:characters]:[list?\bcut\b based on a list of character counts.]"
524887Schin "[d:delimiter]:[delim?The field character for the \b-f\b option is set "
534887Schin 	"to \adelim\a.  The default is the \btab\b character.]"
544887Schin "[f:fields]:[list?\bcut\b based on fields separated by the delimiter "
554887Schin 	"character specified with the \b-d\b optiion.]"
56*12068SRoger.Faulkner@Oracle.COM "[n!:split?Split multibyte characters selected by the \b-b\b option.]"
574887Schin "[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length "
584887Schin 	"records of length \areclen\a when used with the \b-b\b or \b-c\b "
594887Schin 	"option.]"
604887Schin "[s:suppress|only-delimited?Suppress lines with no delimiter characters, "
614887Schin 	"when used with the \b-f\b option.  By default, lines with no "
624887Schin 	"delimiters will be passsed in untouched.]"
634887Schin "[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for "
644887Schin 	"the \b-f\b option is set to \aldelim\a.  The default is the "
654887Schin 	"\bnewline\b character.]"
66*12068SRoger.Faulkner@Oracle.COM "[N!:newline?Output new-lines at end of each record when used "
674887Schin 	"with the \b-b\b or \b-c\b option.]"
684887Schin "\n"
694887Schin "\n[file ...]\n"
704887Schin "\n"
714887Schin "[+EXIT STATUS?]{"
724887Schin 	"[+0?All files processed successfully.]"
734887Schin 	"[+>0?One or more files failed to open or could not be read.]"
744887Schin "}"
754887Schin "[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]"
764887Schin ;
774887Schin 
784887Schin #include <cmd.h>
794887Schin #include <ctype.h>
804887Schin 
81*12068SRoger.Faulkner@Oracle.COM typedef struct Delim_s
824887Schin {
83*12068SRoger.Faulkner@Oracle.COM 	char*		str;
84*12068SRoger.Faulkner@Oracle.COM 	int		len;
85*12068SRoger.Faulkner@Oracle.COM 	int		chr;
86*12068SRoger.Faulkner@Oracle.COM } Delim_t;
874887Schin 
884887Schin typedef struct Cut_s
894887Schin {
90*12068SRoger.Faulkner@Oracle.COM 	int		mb;
91*12068SRoger.Faulkner@Oracle.COM 	int		eob;
924887Schin 	int		cflag;
93*12068SRoger.Faulkner@Oracle.COM 	int		nosplit;
944887Schin 	int		sflag;
954887Schin 	int		nlflag;
964887Schin 	int		reclen;
97*12068SRoger.Faulkner@Oracle.COM 	Delim_t		wdelim;
98*12068SRoger.Faulkner@Oracle.COM 	Delim_t		ldelim;
99*12068SRoger.Faulkner@Oracle.COM 	unsigned char	space[UCHAR_MAX+1];
1004887Schin 	int		list[2];	/* NOTE: must be last member */
1014887Schin } Cut_t;
1024887Schin 
103*12068SRoger.Faulkner@Oracle.COM #define HUGE		INT_MAX
1044887Schin #define BLOCK		8*1024
1054887Schin #define C_BYTES		1
1064887Schin #define C_CHARS		2
1074887Schin #define C_FIELDS	4
1084887Schin #define C_SUPRESS	8
109*12068SRoger.Faulkner@Oracle.COM #define C_NOSPLIT	16
1104887Schin #define C_NONEWLINE	32
1114887Schin 
112*12068SRoger.Faulkner@Oracle.COM #define SP_LINE		1
113*12068SRoger.Faulkner@Oracle.COM #define SP_WORD		2
114*12068SRoger.Faulkner@Oracle.COM #define SP_WIDE		3
115*12068SRoger.Faulkner@Oracle.COM 
116*12068SRoger.Faulkner@Oracle.COM #define mb2wc(w,p,n)	(*ast.mb_towc)(&w,(char*)p,n)
117*12068SRoger.Faulkner@Oracle.COM 
1184887Schin /*
1194887Schin  * compare the first of an array of integers
1204887Schin  */
1214887Schin 
122*12068SRoger.Faulkner@Oracle.COM static int
mycomp(register const void * a,register const void * b)123*12068SRoger.Faulkner@Oracle.COM mycomp(register const void* a, register const void* b)
1244887Schin {
125*12068SRoger.Faulkner@Oracle.COM 	if (*((int*)a) < *((int*)b))
126*12068SRoger.Faulkner@Oracle.COM 		return -1;
127*12068SRoger.Faulkner@Oracle.COM 	if (*((int*)a) > *((int*)b))
128*12068SRoger.Faulkner@Oracle.COM 		return 1;
129*12068SRoger.Faulkner@Oracle.COM 	return 0;
1304887Schin }
1314887Schin 
132*12068SRoger.Faulkner@Oracle.COM static Cut_t*
cutinit(int mode,char * str,Delim_t * wdelim,Delim_t * ldelim,size_t reclen)133*12068SRoger.Faulkner@Oracle.COM cutinit(int mode, char* str, Delim_t* wdelim, Delim_t* ldelim, size_t reclen)
1344887Schin {
135*12068SRoger.Faulkner@Oracle.COM 	register int*	lp;
136*12068SRoger.Faulkner@Oracle.COM 	register int	c;
137*12068SRoger.Faulkner@Oracle.COM 	register int	n = 0;
138*12068SRoger.Faulkner@Oracle.COM 	register int	range = 0;
139*12068SRoger.Faulkner@Oracle.COM 	register char*	cp = str;
140*12068SRoger.Faulkner@Oracle.COM 	Cut_t*		cut;
141*12068SRoger.Faulkner@Oracle.COM 
142*12068SRoger.Faulkner@Oracle.COM 	if (!(cut = (Cut_t*)stakalloc(sizeof(Cut_t) + strlen(cp) * sizeof(int))))
1434887Schin 		error(ERROR_exit(1), "out of space");
144*12068SRoger.Faulkner@Oracle.COM 	if (cut->mb = mbwide())
1454887Schin 	{
146*12068SRoger.Faulkner@Oracle.COM 		memset(cut->space, 0, sizeof(cut->space) / 2);
147*12068SRoger.Faulkner@Oracle.COM 		memset(cut->space + sizeof(cut->space) / 2, SP_WIDE, sizeof(cut->space) / 2);
148*12068SRoger.Faulkner@Oracle.COM 	}
149*12068SRoger.Faulkner@Oracle.COM 	else
150*12068SRoger.Faulkner@Oracle.COM 		memset(cut->space, 0, sizeof(cut->space));
151*12068SRoger.Faulkner@Oracle.COM 	cut->wdelim = *wdelim;
152*12068SRoger.Faulkner@Oracle.COM 	if (wdelim->len == 1)
153*12068SRoger.Faulkner@Oracle.COM 		cut->space[wdelim->chr] = SP_WORD;
154*12068SRoger.Faulkner@Oracle.COM 	cut->ldelim = *ldelim;
155*12068SRoger.Faulkner@Oracle.COM 	cut->eob = (ldelim->len == 1) ? ldelim->chr : 0;
156*12068SRoger.Faulkner@Oracle.COM 	cut->space[cut->eob] = SP_LINE;
157*12068SRoger.Faulkner@Oracle.COM 	cut->cflag = (mode&C_CHARS) && cut->mb;
158*12068SRoger.Faulkner@Oracle.COM 	cut->nosplit = (mode&(C_BYTES|C_NOSPLIT)) == (C_BYTES|C_NOSPLIT) && cut->mb;
159*12068SRoger.Faulkner@Oracle.COM 	cut->sflag = (mode&C_SUPRESS) != 0;
160*12068SRoger.Faulkner@Oracle.COM 	cut->nlflag = (mode&C_NONEWLINE) != 0;
161*12068SRoger.Faulkner@Oracle.COM 	cut->reclen = reclen;
162*12068SRoger.Faulkner@Oracle.COM 	lp = cut->list;
163*12068SRoger.Faulkner@Oracle.COM 	for (;;)
164*12068SRoger.Faulkner@Oracle.COM 		switch(c = *cp++)
165*12068SRoger.Faulkner@Oracle.COM 		{
1664887Schin 		case ' ':
1674887Schin 		case '\t':
1684887Schin 			while(*cp==' ' || *cp=='\t')
1694887Schin 				cp++;
170*12068SRoger.Faulkner@Oracle.COM 			/*FALLTHROUGH*/
1714887Schin 		case 0:
1724887Schin 		case ',':
1734887Schin 			if(range)
1744887Schin 			{
1754887Schin 				--range;
176*12068SRoger.Faulkner@Oracle.COM 				if((n = (n ? (n-range) : (HUGE-1))) < 0)
1774887Schin 					error(ERROR_exit(1),"invalid range for c/f option");
1784887Schin 				*lp++ = range;
1794887Schin 				*lp++ = n;
1804887Schin 			}
1814887Schin 			else
1824887Schin 			{
1834887Schin 				*lp++ = --n;
1844887Schin 				*lp++ = 1;
1854887Schin 			}
1864887Schin 			if(c==0)
1874887Schin 			{
1884887Schin 				register int *dp;
1894887Schin 				*lp = HUGE;
190*12068SRoger.Faulkner@Oracle.COM 				n = 1 + (lp-cut->list)/2;
191*12068SRoger.Faulkner@Oracle.COM 				qsort(lp=cut->list,n,2*sizeof(*lp),mycomp);
1924887Schin 				/* eliminate overlapping regions */
1934887Schin 				for(n=0,range= -2,dp=lp; *lp!=HUGE; lp+=2)
1944887Schin 				{
1954887Schin 					if(lp[0] <= range)
1964887Schin 					{
1974887Schin 						if(lp[1]==HUGE)
1984887Schin 						{
1994887Schin 							dp[-1] = HUGE;
2004887Schin 							break;
2014887Schin 						}
2024887Schin 						if((c = lp[0]+lp[1]-range)>0)
2034887Schin 						{
2044887Schin 							range += c;
2054887Schin 							dp[-1] += c;
2064887Schin 						}
2074887Schin 					}
2084887Schin 					else
2094887Schin 					{
2104887Schin 						range = *dp++ = lp[0];
2114887Schin 						if(lp[1]==HUGE)
2124887Schin 						{
2134887Schin 							*dp++ = HUGE;
2144887Schin 							break;
2154887Schin 						}
2164887Schin 						range += (*dp++ = lp[1]);
2174887Schin 					}
2184887Schin 				}
2194887Schin 				*dp = HUGE;
220*12068SRoger.Faulkner@Oracle.COM 				lp = cut->list;
2214887Schin 				/* convert ranges into gaps */
2224887Schin 				for(n=0; *lp!=HUGE; lp+=2)
2234887Schin 				{
2244887Schin 					c = *lp;
2254887Schin 					*lp -= n;
2264887Schin 					n = c+lp[1];
2274887Schin 				}
228*12068SRoger.Faulkner@Oracle.COM 				return cut;
2294887Schin 			}
2304887Schin 			n = range = 0;
2314887Schin 			break;
2324887Schin 
2334887Schin 		case '-':
2344887Schin 			if(range)
2354887Schin 				error(ERROR_exit(1),"bad list for c/f option");
2364887Schin 			range = n?n:1;
2374887Schin 			n = 0;
2384887Schin 			break;
2394887Schin 
2404887Schin 		default:
2414887Schin 			if(!isdigit(c))
2424887Schin 				error(ERROR_exit(1),"bad list for c/f option");
2434887Schin 			n = 10*n + (c-'0');
244*12068SRoger.Faulkner@Oracle.COM 			break;
245*12068SRoger.Faulkner@Oracle.COM 		}
2464887Schin 	/* NOTREACHED */
2474887Schin }
2484887Schin 
2494887Schin /*
2504887Schin  * cut each line of file <fdin> and put results to <fdout> using list <list>
2514887Schin  */
2524887Schin 
253*12068SRoger.Faulkner@Oracle.COM static void
cutcols(Cut_t * cut,Sfio_t * fdin,Sfio_t * fdout)254*12068SRoger.Faulkner@Oracle.COM cutcols(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
2554887Schin {
256*12068SRoger.Faulkner@Oracle.COM 	register int		c;
257*12068SRoger.Faulkner@Oracle.COM 	register int		len;
258*12068SRoger.Faulkner@Oracle.COM 	register int		ncol = 0;
259*12068SRoger.Faulkner@Oracle.COM 	register const int*	lp = cut->list;
260*12068SRoger.Faulkner@Oracle.COM 	register char*		bp;
2614887Schin 	register int		skip; /* non-zero for don't copy */
262*12068SRoger.Faulkner@Oracle.COM 	int			must;
263*12068SRoger.Faulkner@Oracle.COM 	char*			ep;
264*12068SRoger.Faulkner@Oracle.COM 	const char*		xx;
265*12068SRoger.Faulkner@Oracle.COM 
266*12068SRoger.Faulkner@Oracle.COM 	for (;;)
2674887Schin 	{
268*12068SRoger.Faulkner@Oracle.COM 		if (len = cut->reclen)
269*12068SRoger.Faulkner@Oracle.COM 			bp = sfreserve(fdin, len, -1);
2704887Schin 		else
271*12068SRoger.Faulkner@Oracle.COM 			bp = sfgetr(fdin, '\n', 0);
272*12068SRoger.Faulkner@Oracle.COM 		if (!bp && !(bp = sfgetr(fdin, 0, SF_LASTR)))
2734887Schin 			break;
2744887Schin 		len = sfvalue(fdin);
275*12068SRoger.Faulkner@Oracle.COM 		ep = bp + len;
276*12068SRoger.Faulkner@Oracle.COM 		xx = 0;
277*12068SRoger.Faulkner@Oracle.COM 		if (!(ncol = skip  = *(lp = cut->list)))
2784887Schin 			ncol = *++lp;
279*12068SRoger.Faulkner@Oracle.COM 		must = 1;
280*12068SRoger.Faulkner@Oracle.COM 		do
2814887Schin 		{
282*12068SRoger.Faulkner@Oracle.COM 			if (cut->nosplit)
283*12068SRoger.Faulkner@Oracle.COM 			{
284*12068SRoger.Faulkner@Oracle.COM 				register const char*	s = bp;
285*12068SRoger.Faulkner@Oracle.COM 				register int		w = len < ncol ? len : ncol;
286*12068SRoger.Faulkner@Oracle.COM 				register int		z;
287*12068SRoger.Faulkner@Oracle.COM 
288*12068SRoger.Faulkner@Oracle.COM 				while (w > 0)
289*12068SRoger.Faulkner@Oracle.COM 				{
290*12068SRoger.Faulkner@Oracle.COM 					if (!(*s & 0x80))
291*12068SRoger.Faulkner@Oracle.COM 						z = 1;
292*12068SRoger.Faulkner@Oracle.COM 					else if ((z = mblen(s, w)) <= 0)
293*12068SRoger.Faulkner@Oracle.COM 					{
294*12068SRoger.Faulkner@Oracle.COM 						if (s == bp && xx)
295*12068SRoger.Faulkner@Oracle.COM 						{
296*12068SRoger.Faulkner@Oracle.COM 							w += s - xx;
297*12068SRoger.Faulkner@Oracle.COM 							bp = (char*)(s = xx);
298*12068SRoger.Faulkner@Oracle.COM 							xx = 0;
299*12068SRoger.Faulkner@Oracle.COM 							continue;
300*12068SRoger.Faulkner@Oracle.COM 						}
301*12068SRoger.Faulkner@Oracle.COM 						xx = s;
302*12068SRoger.Faulkner@Oracle.COM 						if (skip)
303*12068SRoger.Faulkner@Oracle.COM 							s += w;
304*12068SRoger.Faulkner@Oracle.COM 						w = 0;
305*12068SRoger.Faulkner@Oracle.COM 						break;
306*12068SRoger.Faulkner@Oracle.COM 					}
307*12068SRoger.Faulkner@Oracle.COM 					s += z;
308*12068SRoger.Faulkner@Oracle.COM 					w -= z;
309*12068SRoger.Faulkner@Oracle.COM 				}
310*12068SRoger.Faulkner@Oracle.COM 				c = s - bp;
311*12068SRoger.Faulkner@Oracle.COM 				ncol = !w && ncol >= len;
312*12068SRoger.Faulkner@Oracle.COM 			}
313*12068SRoger.Faulkner@Oracle.COM 			else if (cut->cflag)
314*12068SRoger.Faulkner@Oracle.COM 			{
315*12068SRoger.Faulkner@Oracle.COM 				register const char*	s = bp;
316*12068SRoger.Faulkner@Oracle.COM 				register int		w = len;
317*12068SRoger.Faulkner@Oracle.COM 				register int		z;
318*12068SRoger.Faulkner@Oracle.COM 
319*12068SRoger.Faulkner@Oracle.COM 				while (w > 0 && ncol > 0)
320*12068SRoger.Faulkner@Oracle.COM 				{
321*12068SRoger.Faulkner@Oracle.COM 					ncol--;
322*12068SRoger.Faulkner@Oracle.COM 					if (!(*s & 0x80) || (z = mblen(s, w)) <= 0)
323*12068SRoger.Faulkner@Oracle.COM 						z = 1;
324*12068SRoger.Faulkner@Oracle.COM 					s += z;
325*12068SRoger.Faulkner@Oracle.COM 					w -= z;
326*12068SRoger.Faulkner@Oracle.COM 
327*12068SRoger.Faulkner@Oracle.COM 				}
328*12068SRoger.Faulkner@Oracle.COM 				c = s - bp;
329*12068SRoger.Faulkner@Oracle.COM 				ncol = !w && (ncol || !skip);
330*12068SRoger.Faulkner@Oracle.COM 			}
331*12068SRoger.Faulkner@Oracle.COM 			else
332*12068SRoger.Faulkner@Oracle.COM 			{
333*12068SRoger.Faulkner@Oracle.COM 				if ((c = ncol) > len)
334*12068SRoger.Faulkner@Oracle.COM 					c = len;
335*12068SRoger.Faulkner@Oracle.COM 				else if (c == len && !skip)
336*12068SRoger.Faulkner@Oracle.COM 					ncol++;
337*12068SRoger.Faulkner@Oracle.COM 				ncol -= c;
338*12068SRoger.Faulkner@Oracle.COM 			}
339*12068SRoger.Faulkner@Oracle.COM 			if (!skip && c)
340*12068SRoger.Faulkner@Oracle.COM 			{
341*12068SRoger.Faulkner@Oracle.COM 				if (sfwrite(fdout, (char*)bp, c) < 0)
342*12068SRoger.Faulkner@Oracle.COM 					return;
343*12068SRoger.Faulkner@Oracle.COM 				must = 0;
344*12068SRoger.Faulkner@Oracle.COM 			}
345*12068SRoger.Faulkner@Oracle.COM 			bp += c;
346*12068SRoger.Faulkner@Oracle.COM 			if (ncol)
3474887Schin 				break;
3484887Schin 			len -= c;
3494887Schin 			ncol = *++lp;
3504887Schin 			skip = !skip;
351*12068SRoger.Faulkner@Oracle.COM 		} while (ncol != HUGE);
352*12068SRoger.Faulkner@Oracle.COM 		if (!cut->nlflag && (skip || must || cut->reclen))
353*12068SRoger.Faulkner@Oracle.COM 		{
354*12068SRoger.Faulkner@Oracle.COM 			if (cut->ldelim.len > 1)
355*12068SRoger.Faulkner@Oracle.COM 				sfwrite(fdout, cut->ldelim.str, cut->ldelim.len);
356*12068SRoger.Faulkner@Oracle.COM 			else
357*12068SRoger.Faulkner@Oracle.COM 				sfputc(fdout, cut->ldelim.chr);
3584887Schin 		}
3594887Schin 	}
3604887Schin }
3614887Schin 
3624887Schin /*
3634887Schin  * cut each line of file <fdin> and put results to <fdout> using list <list>
3644887Schin  * stream <fdin> must be line buffered
3654887Schin  */
3664887Schin 
367*12068SRoger.Faulkner@Oracle.COM static void
cutfields(Cut_t * cut,Sfio_t * fdin,Sfio_t * fdout)368*12068SRoger.Faulkner@Oracle.COM cutfields(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
3694887Schin {
370*12068SRoger.Faulkner@Oracle.COM 	register unsigned char *sp = cut->space;
3714887Schin 	register unsigned char *cp;
372*12068SRoger.Faulkner@Oracle.COM 	register unsigned char *wp;
3734887Schin 	register int c, nfields;
374*12068SRoger.Faulkner@Oracle.COM 	register const int *lp = cut->list;
3754887Schin 	register unsigned char *copy;
3764887Schin 	register int nodelim, empty, inword=0;
377*12068SRoger.Faulkner@Oracle.COM 	register unsigned char *ep;
378*12068SRoger.Faulkner@Oracle.COM 	unsigned char *bp, *first;
3794887Schin 	int lastchar;
380*12068SRoger.Faulkner@Oracle.COM 	wchar_t w;
3814887Schin 	Sfio_t *fdtmp = 0;
3824887Schin 	long offset = 0;
383*12068SRoger.Faulkner@Oracle.COM 	unsigned char mb[8];
3844887Schin 	/* process each buffer */
385*12068SRoger.Faulkner@Oracle.COM 	while ((bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) && (c = sfvalue(fdin)) > 0)
3864887Schin 	{
387*12068SRoger.Faulkner@Oracle.COM 		cp = bp;
388*12068SRoger.Faulkner@Oracle.COM 		ep = cp + --c;
389*12068SRoger.Faulkner@Oracle.COM 		if((lastchar = cp[c]) != cut->eob)
390*12068SRoger.Faulkner@Oracle.COM 			*ep = cut->eob;
3914887Schin 		/* process each line in the buffer */
392*12068SRoger.Faulkner@Oracle.COM 		while (cp <= ep)
3934887Schin 		{
3944887Schin 			first = cp;
395*12068SRoger.Faulkner@Oracle.COM 			if (!inword)
3964887Schin 			{
3974887Schin 				nodelim = empty = 1;
3984887Schin 				copy = cp;
399*12068SRoger.Faulkner@Oracle.COM 				if (nfields = *(lp = cut->list))
4004887Schin 					copy = 0;
4014887Schin 				else
4024887Schin 					nfields = *++lp;
4034887Schin 			}
404*12068SRoger.Faulkner@Oracle.COM 			else if (copy)
4054887Schin 				copy = cp;
4064887Schin 			inword = 0;
407*12068SRoger.Faulkner@Oracle.COM 			do
4084887Schin 			{
4094887Schin 				/* skip over non-delimiter characters */
410*12068SRoger.Faulkner@Oracle.COM 				if (cut->mb)
411*12068SRoger.Faulkner@Oracle.COM 					for (;;)
412*12068SRoger.Faulkner@Oracle.COM 					{
413*12068SRoger.Faulkner@Oracle.COM 						switch (c = sp[*(unsigned char*)cp++])
414*12068SRoger.Faulkner@Oracle.COM 						{
415*12068SRoger.Faulkner@Oracle.COM 						case 0:
416*12068SRoger.Faulkner@Oracle.COM 							continue;
417*12068SRoger.Faulkner@Oracle.COM 						case SP_WIDE:
418*12068SRoger.Faulkner@Oracle.COM 							wp = --cp;
419*12068SRoger.Faulkner@Oracle.COM 							while ((c = mb2wc(w, cp, ep - cp)) <= 0)
420*12068SRoger.Faulkner@Oracle.COM 							{
421*12068SRoger.Faulkner@Oracle.COM 								/* mb char possibly spanning buffer boundary -- fun stuff */
422*12068SRoger.Faulkner@Oracle.COM 								if ((ep - cp) < mbmax())
423*12068SRoger.Faulkner@Oracle.COM 								{
424*12068SRoger.Faulkner@Oracle.COM 									int	i;
425*12068SRoger.Faulkner@Oracle.COM 									int	j;
426*12068SRoger.Faulkner@Oracle.COM 									int	k;
427*12068SRoger.Faulkner@Oracle.COM 
428*12068SRoger.Faulkner@Oracle.COM 									if (lastchar != cut->eob)
429*12068SRoger.Faulkner@Oracle.COM 									{
430*12068SRoger.Faulkner@Oracle.COM 										*ep = lastchar;
431*12068SRoger.Faulkner@Oracle.COM 										if ((c = mb2wc(w, cp, ep - cp)) > 0)
432*12068SRoger.Faulkner@Oracle.COM 											break;
433*12068SRoger.Faulkner@Oracle.COM 									}
434*12068SRoger.Faulkner@Oracle.COM 									if (copy)
435*12068SRoger.Faulkner@Oracle.COM 									{
436*12068SRoger.Faulkner@Oracle.COM 										empty = 0;
437*12068SRoger.Faulkner@Oracle.COM 										if ((c = cp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
438*12068SRoger.Faulkner@Oracle.COM 											goto failed;
439*12068SRoger.Faulkner@Oracle.COM 									}
440*12068SRoger.Faulkner@Oracle.COM 									for (i = 0; i <= (ep - cp); i++)
441*12068SRoger.Faulkner@Oracle.COM 										mb[i] = cp[i];
442*12068SRoger.Faulkner@Oracle.COM 									if (!(bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) || (c = sfvalue(fdin)) <= 0)
443*12068SRoger.Faulkner@Oracle.COM 										goto failed;
444*12068SRoger.Faulkner@Oracle.COM 									cp = bp;
445*12068SRoger.Faulkner@Oracle.COM 									ep = cp + --c;
446*12068SRoger.Faulkner@Oracle.COM 									if ((lastchar = cp[c]) != cut->eob)
447*12068SRoger.Faulkner@Oracle.COM 										*ep = cut->eob;
448*12068SRoger.Faulkner@Oracle.COM 									j = i;
449*12068SRoger.Faulkner@Oracle.COM 									k = 0;
450*12068SRoger.Faulkner@Oracle.COM 									while (j < mbmax())
451*12068SRoger.Faulkner@Oracle.COM 										mb[j++] = cp[k++];
452*12068SRoger.Faulkner@Oracle.COM 									if ((c = mb2wc(w, (char*)mb, j)) <= 0)
453*12068SRoger.Faulkner@Oracle.COM 									{
454*12068SRoger.Faulkner@Oracle.COM 										c = i;
455*12068SRoger.Faulkner@Oracle.COM 										w = 0;
456*12068SRoger.Faulkner@Oracle.COM 									}
457*12068SRoger.Faulkner@Oracle.COM 									first = bp = cp += c - i;
458*12068SRoger.Faulkner@Oracle.COM 									if (copy)
459*12068SRoger.Faulkner@Oracle.COM 									{
460*12068SRoger.Faulkner@Oracle.COM 										copy = bp;
461*12068SRoger.Faulkner@Oracle.COM 										if (w == cut->ldelim.chr)
462*12068SRoger.Faulkner@Oracle.COM 											lastchar = cut->ldelim.chr;
463*12068SRoger.Faulkner@Oracle.COM 										else if (w != cut->wdelim.chr)
464*12068SRoger.Faulkner@Oracle.COM 										{
465*12068SRoger.Faulkner@Oracle.COM 											empty = 0;
466*12068SRoger.Faulkner@Oracle.COM 											if (sfwrite(fdout, (char*)mb, c) < 0)
467*12068SRoger.Faulkner@Oracle.COM 												goto failed;
468*12068SRoger.Faulkner@Oracle.COM 										}
469*12068SRoger.Faulkner@Oracle.COM 									}
470*12068SRoger.Faulkner@Oracle.COM 									c = 0;
471*12068SRoger.Faulkner@Oracle.COM 								}
472*12068SRoger.Faulkner@Oracle.COM 								else
473*12068SRoger.Faulkner@Oracle.COM 								{
474*12068SRoger.Faulkner@Oracle.COM 									w = *cp;
475*12068SRoger.Faulkner@Oracle.COM 									c = 1;
476*12068SRoger.Faulkner@Oracle.COM 								}
477*12068SRoger.Faulkner@Oracle.COM 								break;
478*12068SRoger.Faulkner@Oracle.COM 							}
479*12068SRoger.Faulkner@Oracle.COM 							cp += c;
480*12068SRoger.Faulkner@Oracle.COM 							c = w;
481*12068SRoger.Faulkner@Oracle.COM 							if (c == cut->wdelim.chr)
482*12068SRoger.Faulkner@Oracle.COM 							{
483*12068SRoger.Faulkner@Oracle.COM 								c = SP_WORD;
484*12068SRoger.Faulkner@Oracle.COM 								break;
485*12068SRoger.Faulkner@Oracle.COM 							}
486*12068SRoger.Faulkner@Oracle.COM 							if (c == cut->ldelim.chr)
487*12068SRoger.Faulkner@Oracle.COM 							{
488*12068SRoger.Faulkner@Oracle.COM 								c = SP_LINE;
489*12068SRoger.Faulkner@Oracle.COM 								break;
490*12068SRoger.Faulkner@Oracle.COM 							}
491*12068SRoger.Faulkner@Oracle.COM 							continue;
492*12068SRoger.Faulkner@Oracle.COM 						default:
493*12068SRoger.Faulkner@Oracle.COM 							wp = cp - 1;
494*12068SRoger.Faulkner@Oracle.COM 							break;
495*12068SRoger.Faulkner@Oracle.COM 						}
4964887Schin 						break;
497*12068SRoger.Faulkner@Oracle.COM 					}
498*12068SRoger.Faulkner@Oracle.COM 				else
499*12068SRoger.Faulkner@Oracle.COM 				{
500*12068SRoger.Faulkner@Oracle.COM 					while (!(c = sp[*cp++]));
501*12068SRoger.Faulkner@Oracle.COM 					wp = cp - 1;
502*12068SRoger.Faulkner@Oracle.COM 				}
503*12068SRoger.Faulkner@Oracle.COM 				/* check for end-of-line */
504*12068SRoger.Faulkner@Oracle.COM 				if (c == SP_LINE)
505*12068SRoger.Faulkner@Oracle.COM 				{
506*12068SRoger.Faulkner@Oracle.COM 					if (cp <= ep)
5074887Schin 						break;
508*12068SRoger.Faulkner@Oracle.COM 					if (lastchar == cut->ldelim.chr)
509*12068SRoger.Faulkner@Oracle.COM 						break;
510*12068SRoger.Faulkner@Oracle.COM 					/* restore cut->last character */
511*12068SRoger.Faulkner@Oracle.COM 					if (lastchar != cut->eob)
512*12068SRoger.Faulkner@Oracle.COM 						*ep = lastchar;
5134887Schin 					inword++;
514*12068SRoger.Faulkner@Oracle.COM 					if (!sp[lastchar])
5154887Schin 						break;
5164887Schin 				}
5174887Schin 				nodelim = 0;
518*12068SRoger.Faulkner@Oracle.COM 				if (--nfields > 0)
5194887Schin 					continue;
5204887Schin 				nfields = *++lp;
521*12068SRoger.Faulkner@Oracle.COM 				if (copy)
5224887Schin 				{
5234887Schin 					empty = 0;
524*12068SRoger.Faulkner@Oracle.COM 					if ((c = wp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
5254887Schin 						goto failed;
5264887Schin 					copy = 0;
5274887Schin 				}
5284887Schin 				else
5294887Schin 					/* set to delimiter unless the first field */
530*12068SRoger.Faulkner@Oracle.COM 					copy = empty ? cp : wp;
531*12068SRoger.Faulkner@Oracle.COM 			} while (!inword);
532*12068SRoger.Faulkner@Oracle.COM 			if (!inword)
5334887Schin 			{
534*12068SRoger.Faulkner@Oracle.COM 				if (!copy)
5354887Schin 				{
536*12068SRoger.Faulkner@Oracle.COM 					if (nodelim)
5374887Schin 					{
538*12068SRoger.Faulkner@Oracle.COM 						if (!cut->sflag)
5394887Schin 						{
540*12068SRoger.Faulkner@Oracle.COM 							if (offset)
5414887Schin 							{
5424887Schin 								sfseek(fdtmp,(Sfoff_t)0,SEEK_SET);
5434887Schin 								sfmove(fdtmp,fdout,offset,-1);
5444887Schin 							}
5454887Schin 							copy = first;
5464887Schin 						}
5474887Schin 					}
5484887Schin 					else
5494887Schin 						sfputc(fdout,'\n');
5504887Schin 				}
551*12068SRoger.Faulkner@Oracle.COM 				if (offset)
5524887Schin 					sfseek(fdtmp,offset=0,SEEK_SET);
5534887Schin 			}
554*12068SRoger.Faulkner@Oracle.COM 			if (copy && (c=cp-copy)>0 && (!nodelim || !cut->sflag) && sfwrite(fdout,(char*)copy,c)< 0)
5554887Schin 				goto failed;
5564887Schin 		}
5574887Schin 		/* see whether to save in tmp file */
558*12068SRoger.Faulkner@Oracle.COM 		if(inword && nodelim && !cut->sflag && (c=cp-first)>0)
5594887Schin 		{
5604887Schin 			/* copy line to tmpfile in case no fields */
5614887Schin 			if(!fdtmp)
5624887Schin 				fdtmp = sftmp(BLOCK);
5634887Schin 			sfwrite(fdtmp,(char*)first,c);
5644887Schin 			offset +=c;
5654887Schin 		}
5664887Schin 	}
567*12068SRoger.Faulkner@Oracle.COM  failed:
5684887Schin 	if(fdtmp)
5694887Schin 		sfclose(fdtmp);
5704887Schin }
5714887Schin 
5724887Schin int
b_cut(int argc,char ** argv,void * context)573*12068SRoger.Faulkner@Oracle.COM b_cut(int argc, char** argv, void* context)
5744887Schin {
575*12068SRoger.Faulkner@Oracle.COM 	register char*		cp = 0;
576*12068SRoger.Faulkner@Oracle.COM 	register Sfio_t*	fp;
577*12068SRoger.Faulkner@Oracle.COM 	char*			s;
578*12068SRoger.Faulkner@Oracle.COM 	int			n;
579*12068SRoger.Faulkner@Oracle.COM 	Cut_t*			cut;
580*12068SRoger.Faulkner@Oracle.COM 	int			mode = 0;
581*12068SRoger.Faulkner@Oracle.COM 	Delim_t			wdelim;
582*12068SRoger.Faulkner@Oracle.COM 	Delim_t			ldelim;
583*12068SRoger.Faulkner@Oracle.COM 	size_t			reclen = 0;
5844887Schin 
5854887Schin 	cmdinit(argc, argv, context, ERROR_CATALOG, 0);
586*12068SRoger.Faulkner@Oracle.COM 	wdelim.chr = '\t';
587*12068SRoger.Faulkner@Oracle.COM 	ldelim.chr = '\n';
588*12068SRoger.Faulkner@Oracle.COM 	wdelim.len = ldelim.len = 1;
589*12068SRoger.Faulkner@Oracle.COM 	for (;;)
5904887Schin 	{
591*12068SRoger.Faulkner@Oracle.COM 		switch (n = optget(argv, usage))
5924887Schin 		{
593*12068SRoger.Faulkner@Oracle.COM 		case 0:
594*12068SRoger.Faulkner@Oracle.COM 			break;
595*12068SRoger.Faulkner@Oracle.COM 		case 'b':
596*12068SRoger.Faulkner@Oracle.COM 		case 'c':
597*12068SRoger.Faulkner@Oracle.COM 			if(mode&C_FIELDS)
598*12068SRoger.Faulkner@Oracle.COM 			{
599*12068SRoger.Faulkner@Oracle.COM 				error(2, "f option already specified");
600*12068SRoger.Faulkner@Oracle.COM 				continue;
601*12068SRoger.Faulkner@Oracle.COM 			}
602*12068SRoger.Faulkner@Oracle.COM 			cp = opt_info.arg;
603*12068SRoger.Faulkner@Oracle.COM 			if(n=='b')
604*12068SRoger.Faulkner@Oracle.COM 				mode |= C_BYTES;
605*12068SRoger.Faulkner@Oracle.COM 			else
606*12068SRoger.Faulkner@Oracle.COM 				mode |= C_CHARS;
607*12068SRoger.Faulkner@Oracle.COM 			continue;
608*12068SRoger.Faulkner@Oracle.COM 		case 'D':
609*12068SRoger.Faulkner@Oracle.COM 			ldelim.str = opt_info.arg;
610*12068SRoger.Faulkner@Oracle.COM 			if (mbwide())
611*12068SRoger.Faulkner@Oracle.COM 			{
612*12068SRoger.Faulkner@Oracle.COM 				s = opt_info.arg;
613*12068SRoger.Faulkner@Oracle.COM 				ldelim.chr = mbchar(s);
614*12068SRoger.Faulkner@Oracle.COM 				if ((n = s - opt_info.arg) > 1)
615*12068SRoger.Faulkner@Oracle.COM 				{
616*12068SRoger.Faulkner@Oracle.COM 					ldelim.len = n;
617*12068SRoger.Faulkner@Oracle.COM 					continue;
618*12068SRoger.Faulkner@Oracle.COM 				}
619*12068SRoger.Faulkner@Oracle.COM 			}
620*12068SRoger.Faulkner@Oracle.COM 			ldelim.chr = *(unsigned char*)opt_info.arg;
621*12068SRoger.Faulkner@Oracle.COM 			ldelim.len = 1;
622*12068SRoger.Faulkner@Oracle.COM 			continue;
623*12068SRoger.Faulkner@Oracle.COM 		case 'd':
624*12068SRoger.Faulkner@Oracle.COM 			wdelim.str = opt_info.arg;
625*12068SRoger.Faulkner@Oracle.COM 			if (mbwide())
626*12068SRoger.Faulkner@Oracle.COM 			{
627*12068SRoger.Faulkner@Oracle.COM 				s = opt_info.arg;
628*12068SRoger.Faulkner@Oracle.COM 				wdelim.chr = mbchar(s);
629*12068SRoger.Faulkner@Oracle.COM 				if ((n = s - opt_info.arg) > 1)
630*12068SRoger.Faulkner@Oracle.COM 				{
631*12068SRoger.Faulkner@Oracle.COM 					wdelim.len = n;
632*12068SRoger.Faulkner@Oracle.COM 					continue;
633*12068SRoger.Faulkner@Oracle.COM 				}
634*12068SRoger.Faulkner@Oracle.COM 			}
635*12068SRoger.Faulkner@Oracle.COM 			wdelim.chr = *(unsigned char*)opt_info.arg;
636*12068SRoger.Faulkner@Oracle.COM 			wdelim.len = 1;
637*12068SRoger.Faulkner@Oracle.COM 			continue;
638*12068SRoger.Faulkner@Oracle.COM 		case 'f':
639*12068SRoger.Faulkner@Oracle.COM 			if(mode&(C_CHARS|C_BYTES))
640*12068SRoger.Faulkner@Oracle.COM 			{
641*12068SRoger.Faulkner@Oracle.COM 				error(2, "c option already specified");
642*12068SRoger.Faulkner@Oracle.COM 				continue;
643*12068SRoger.Faulkner@Oracle.COM 			}
644*12068SRoger.Faulkner@Oracle.COM 			cp = opt_info.arg;
645*12068SRoger.Faulkner@Oracle.COM 			mode |= C_FIELDS;
646*12068SRoger.Faulkner@Oracle.COM 			continue;
647*12068SRoger.Faulkner@Oracle.COM 		case 'n':
648*12068SRoger.Faulkner@Oracle.COM 			mode |= C_NOSPLIT;
649*12068SRoger.Faulkner@Oracle.COM 			continue;
650*12068SRoger.Faulkner@Oracle.COM 		case 'N':
651*12068SRoger.Faulkner@Oracle.COM 			mode |= C_NONEWLINE;
652*12068SRoger.Faulkner@Oracle.COM 			continue;
653*12068SRoger.Faulkner@Oracle.COM 		case 'R':
654*12068SRoger.Faulkner@Oracle.COM 		case 'r':
655*12068SRoger.Faulkner@Oracle.COM 			if(opt_info.num>0)
656*12068SRoger.Faulkner@Oracle.COM 				reclen = opt_info.num;
657*12068SRoger.Faulkner@Oracle.COM 			continue;
658*12068SRoger.Faulkner@Oracle.COM 		case 's':
659*12068SRoger.Faulkner@Oracle.COM 			mode |= C_SUPRESS;
660*12068SRoger.Faulkner@Oracle.COM 			continue;
661*12068SRoger.Faulkner@Oracle.COM 		case ':':
662*12068SRoger.Faulkner@Oracle.COM 			error(2, "%s", opt_info.arg);
663*12068SRoger.Faulkner@Oracle.COM 			break;
664*12068SRoger.Faulkner@Oracle.COM 		case '?':
665*12068SRoger.Faulkner@Oracle.COM 			error(ERROR_usage(2), "%s", opt_info.arg);
6664887Schin 			break;
6674887Schin 		}
6684887Schin 		break;
6694887Schin 	}
6704887Schin 	argv += opt_info.index;
6714887Schin 	if (error_info.errors)
6724887Schin 		error(ERROR_usage(2), "%s",optusage(NiL));
6734887Schin 	if(!cp)
6744887Schin 	{
6754887Schin 		error(2, "b, c or f option must be specified");
6764887Schin 		error(ERROR_usage(2), "%s", optusage(NiL));
6774887Schin 	}
6784887Schin 	if(!*cp)
6794887Schin 		error(3, "non-empty b, c or f option must be specified");
6804887Schin 	if((mode & (C_FIELDS|C_SUPRESS)) == C_SUPRESS)
6814887Schin 		error(3, "s option requires f option");
682*12068SRoger.Faulkner@Oracle.COM 	cut = cutinit(mode, cp, &wdelim, &ldelim, reclen);
6834887Schin 	if(cp = *argv)
6844887Schin 		argv++;
6854887Schin 	do
6864887Schin 	{
6874887Schin 		if(!cp || streq(cp,"-"))
6884887Schin 			fp = sfstdin;
6894887Schin 		else if(!(fp = sfopen(NiL,cp,"r")))
6904887Schin 		{
6914887Schin 			error(ERROR_system(0),"%s: cannot open",cp);
6924887Schin 			continue;
6934887Schin 		}
6944887Schin 		if(mode&C_FIELDS)
695*12068SRoger.Faulkner@Oracle.COM 			cutfields(cut,fp,sfstdout);
6964887Schin 		else
697*12068SRoger.Faulkner@Oracle.COM 			cutcols(cut,fp,sfstdout);
6984887Schin 		if(fp!=sfstdin)
6994887Schin 			sfclose(fp);
7008462SApril.Chin@Sun.COM 	} while(cp = *argv++);
7018462SApril.Chin@Sun.COM 	if (sfsync(sfstdout))
7028462SApril.Chin@Sun.COM 		error(ERROR_system(0), "write error");
703*12068SRoger.Faulkner@Oracle.COM 	return error_info.errors != 0;
7044887Schin }
705