14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1992-2010 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 78462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * * 204887Schin ***********************************************************************/ 214887Schin #pragma prototyped 224887Schin /* 234887Schin * David Korn 244887Schin * AT&T Bell Laboratories 254887Schin * 264887Schin * cut fields or columns from fields from a file 274887Schin */ 284887Schin 294887Schin static const char usage[] = 30*12068SRoger.Faulkner@Oracle.COM "[-?\n@(#)$Id: cut (AT&T Research) 2009-12-04 $\n]" 314887Schin USAGE_LICENSE 324887Schin "[+NAME?cut - cut out selected columns or fields of each line of a file]" 334887Schin "[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields " 344887Schin "from one or more files, contatenating them on standard output.]" 354887Schin "[+?The option argument \alist\a is a comma-separated or blank-separated " 364887Schin "list of positive numbers and ranges. Ranges can be of three " 374887Schin "forms. The first is two positive integers separated by a hyphen " 384887Schin "(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to " 394887Schin "\ahigh\a. The second is a positive number preceded by a hyphen " 404887Schin "(\b-\b\ahigh\a), which represents all fields from field \b1\b to " 414887Schin "\ahigh\a. The last is a positive number followed by a hyphen " 424887Schin "(\alow\a\b-\b), which represents all fields from \alow\a to the " 434887Schin "last field, inclusive. Elements in the \alist\a can be repeated, " 444887Schin "can overlap, and can appear in any order. The order of the " 454887Schin "output is that of the input.]" 464887Schin "[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]" 474887Schin "[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b " 484887Schin "cuts from standard input. The start of the file is defined " 494887Schin "as the current offset.]" 50*12068SRoger.Faulkner@Oracle.COM "[b:bytes]:[list?\bcut\b based on a list of byte counts.]" 51*12068SRoger.Faulkner@Oracle.COM "[c:characters]:[list?\bcut\b based on a list of character counts.]" 524887Schin "[d:delimiter]:[delim?The field character for the \b-f\b option is set " 534887Schin "to \adelim\a. The default is the \btab\b character.]" 544887Schin "[f:fields]:[list?\bcut\b based on fields separated by the delimiter " 554887Schin "character specified with the \b-d\b optiion.]" 56*12068SRoger.Faulkner@Oracle.COM "[n!:split?Split multibyte characters selected by the \b-b\b option.]" 574887Schin "[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length " 584887Schin "records of length \areclen\a when used with the \b-b\b or \b-c\b " 594887Schin "option.]" 604887Schin "[s:suppress|only-delimited?Suppress lines with no delimiter characters, " 614887Schin "when used with the \b-f\b option. By default, lines with no " 624887Schin "delimiters will be passsed in untouched.]" 634887Schin "[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for " 644887Schin "the \b-f\b option is set to \aldelim\a. The default is the " 654887Schin "\bnewline\b character.]" 66*12068SRoger.Faulkner@Oracle.COM "[N!:newline?Output new-lines at end of each record when used " 674887Schin "with the \b-b\b or \b-c\b option.]" 684887Schin "\n" 694887Schin "\n[file ...]\n" 704887Schin "\n" 714887Schin "[+EXIT STATUS?]{" 724887Schin "[+0?All files processed successfully.]" 734887Schin "[+>0?One or more files failed to open or could not be read.]" 744887Schin "}" 754887Schin "[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]" 764887Schin ; 774887Schin 784887Schin #include <cmd.h> 794887Schin #include <ctype.h> 804887Schin 81*12068SRoger.Faulkner@Oracle.COM typedef struct Delim_s 824887Schin { 83*12068SRoger.Faulkner@Oracle.COM char* str; 84*12068SRoger.Faulkner@Oracle.COM int len; 85*12068SRoger.Faulkner@Oracle.COM int chr; 86*12068SRoger.Faulkner@Oracle.COM } Delim_t; 874887Schin 884887Schin typedef struct Cut_s 894887Schin { 90*12068SRoger.Faulkner@Oracle.COM int mb; 91*12068SRoger.Faulkner@Oracle.COM int eob; 924887Schin int cflag; 93*12068SRoger.Faulkner@Oracle.COM int nosplit; 944887Schin int sflag; 954887Schin int nlflag; 964887Schin int reclen; 97*12068SRoger.Faulkner@Oracle.COM Delim_t wdelim; 98*12068SRoger.Faulkner@Oracle.COM Delim_t ldelim; 99*12068SRoger.Faulkner@Oracle.COM unsigned char space[UCHAR_MAX+1]; 1004887Schin int list[2]; /* NOTE: must be last member */ 1014887Schin } Cut_t; 1024887Schin 103*12068SRoger.Faulkner@Oracle.COM #define HUGE INT_MAX 1044887Schin #define BLOCK 8*1024 1054887Schin #define C_BYTES 1 1064887Schin #define C_CHARS 2 1074887Schin #define C_FIELDS 4 1084887Schin #define C_SUPRESS 8 109*12068SRoger.Faulkner@Oracle.COM #define C_NOSPLIT 16 1104887Schin #define C_NONEWLINE 32 1114887Schin 112*12068SRoger.Faulkner@Oracle.COM #define SP_LINE 1 113*12068SRoger.Faulkner@Oracle.COM #define SP_WORD 2 114*12068SRoger.Faulkner@Oracle.COM #define SP_WIDE 3 115*12068SRoger.Faulkner@Oracle.COM 116*12068SRoger.Faulkner@Oracle.COM #define mb2wc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n) 117*12068SRoger.Faulkner@Oracle.COM 1184887Schin /* 1194887Schin * compare the first of an array of integers 1204887Schin */ 1214887Schin 122*12068SRoger.Faulkner@Oracle.COM static int 123*12068SRoger.Faulkner@Oracle.COM mycomp(register const void* a, register const void* b) 1244887Schin { 125*12068SRoger.Faulkner@Oracle.COM if (*((int*)a) < *((int*)b)) 126*12068SRoger.Faulkner@Oracle.COM return -1; 127*12068SRoger.Faulkner@Oracle.COM if (*((int*)a) > *((int*)b)) 128*12068SRoger.Faulkner@Oracle.COM return 1; 129*12068SRoger.Faulkner@Oracle.COM return 0; 1304887Schin } 1314887Schin 132*12068SRoger.Faulkner@Oracle.COM static Cut_t* 133*12068SRoger.Faulkner@Oracle.COM cutinit(int mode, char* str, Delim_t* wdelim, Delim_t* ldelim, size_t reclen) 1344887Schin { 135*12068SRoger.Faulkner@Oracle.COM register int* lp; 136*12068SRoger.Faulkner@Oracle.COM register int c; 137*12068SRoger.Faulkner@Oracle.COM register int n = 0; 138*12068SRoger.Faulkner@Oracle.COM register int range = 0; 139*12068SRoger.Faulkner@Oracle.COM register char* cp = str; 140*12068SRoger.Faulkner@Oracle.COM Cut_t* cut; 141*12068SRoger.Faulkner@Oracle.COM 142*12068SRoger.Faulkner@Oracle.COM if (!(cut = (Cut_t*)stakalloc(sizeof(Cut_t) + strlen(cp) * sizeof(int)))) 1434887Schin error(ERROR_exit(1), "out of space"); 144*12068SRoger.Faulkner@Oracle.COM if (cut->mb = mbwide()) 1454887Schin { 146*12068SRoger.Faulkner@Oracle.COM memset(cut->space, 0, sizeof(cut->space) / 2); 147*12068SRoger.Faulkner@Oracle.COM memset(cut->space + sizeof(cut->space) / 2, SP_WIDE, sizeof(cut->space) / 2); 148*12068SRoger.Faulkner@Oracle.COM } 149*12068SRoger.Faulkner@Oracle.COM else 150*12068SRoger.Faulkner@Oracle.COM memset(cut->space, 0, sizeof(cut->space)); 151*12068SRoger.Faulkner@Oracle.COM cut->wdelim = *wdelim; 152*12068SRoger.Faulkner@Oracle.COM if (wdelim->len == 1) 153*12068SRoger.Faulkner@Oracle.COM cut->space[wdelim->chr] = SP_WORD; 154*12068SRoger.Faulkner@Oracle.COM cut->ldelim = *ldelim; 155*12068SRoger.Faulkner@Oracle.COM cut->eob = (ldelim->len == 1) ? ldelim->chr : 0; 156*12068SRoger.Faulkner@Oracle.COM cut->space[cut->eob] = SP_LINE; 157*12068SRoger.Faulkner@Oracle.COM cut->cflag = (mode&C_CHARS) && cut->mb; 158*12068SRoger.Faulkner@Oracle.COM cut->nosplit = (mode&(C_BYTES|C_NOSPLIT)) == (C_BYTES|C_NOSPLIT) && cut->mb; 159*12068SRoger.Faulkner@Oracle.COM cut->sflag = (mode&C_SUPRESS) != 0; 160*12068SRoger.Faulkner@Oracle.COM cut->nlflag = (mode&C_NONEWLINE) != 0; 161*12068SRoger.Faulkner@Oracle.COM cut->reclen = reclen; 162*12068SRoger.Faulkner@Oracle.COM lp = cut->list; 163*12068SRoger.Faulkner@Oracle.COM for (;;) 164*12068SRoger.Faulkner@Oracle.COM switch(c = *cp++) 165*12068SRoger.Faulkner@Oracle.COM { 1664887Schin case ' ': 1674887Schin case '\t': 1684887Schin while(*cp==' ' || *cp=='\t') 1694887Schin cp++; 170*12068SRoger.Faulkner@Oracle.COM /*FALLTHROUGH*/ 1714887Schin case 0: 1724887Schin case ',': 1734887Schin if(range) 1744887Schin { 1754887Schin --range; 176*12068SRoger.Faulkner@Oracle.COM if((n = (n ? (n-range) : (HUGE-1))) < 0) 1774887Schin error(ERROR_exit(1),"invalid range for c/f option"); 1784887Schin *lp++ = range; 1794887Schin *lp++ = n; 1804887Schin } 1814887Schin else 1824887Schin { 1834887Schin *lp++ = --n; 1844887Schin *lp++ = 1; 1854887Schin } 1864887Schin if(c==0) 1874887Schin { 1884887Schin register int *dp; 1894887Schin *lp = HUGE; 190*12068SRoger.Faulkner@Oracle.COM n = 1 + (lp-cut->list)/2; 191*12068SRoger.Faulkner@Oracle.COM qsort(lp=cut->list,n,2*sizeof(*lp),mycomp); 1924887Schin /* eliminate overlapping regions */ 1934887Schin for(n=0,range= -2,dp=lp; *lp!=HUGE; lp+=2) 1944887Schin { 1954887Schin if(lp[0] <= range) 1964887Schin { 1974887Schin if(lp[1]==HUGE) 1984887Schin { 1994887Schin dp[-1] = HUGE; 2004887Schin break; 2014887Schin } 2024887Schin if((c = lp[0]+lp[1]-range)>0) 2034887Schin { 2044887Schin range += c; 2054887Schin dp[-1] += c; 2064887Schin } 2074887Schin } 2084887Schin else 2094887Schin { 2104887Schin range = *dp++ = lp[0]; 2114887Schin if(lp[1]==HUGE) 2124887Schin { 2134887Schin *dp++ = HUGE; 2144887Schin break; 2154887Schin } 2164887Schin range += (*dp++ = lp[1]); 2174887Schin } 2184887Schin } 2194887Schin *dp = HUGE; 220*12068SRoger.Faulkner@Oracle.COM lp = cut->list; 2214887Schin /* convert ranges into gaps */ 2224887Schin for(n=0; *lp!=HUGE; lp+=2) 2234887Schin { 2244887Schin c = *lp; 2254887Schin *lp -= n; 2264887Schin n = c+lp[1]; 2274887Schin } 228*12068SRoger.Faulkner@Oracle.COM return cut; 2294887Schin } 2304887Schin n = range = 0; 2314887Schin break; 2324887Schin 2334887Schin case '-': 2344887Schin if(range) 2354887Schin error(ERROR_exit(1),"bad list for c/f option"); 2364887Schin range = n?n:1; 2374887Schin n = 0; 2384887Schin break; 2394887Schin 2404887Schin default: 2414887Schin if(!isdigit(c)) 2424887Schin error(ERROR_exit(1),"bad list for c/f option"); 2434887Schin n = 10*n + (c-'0'); 244*12068SRoger.Faulkner@Oracle.COM break; 245*12068SRoger.Faulkner@Oracle.COM } 2464887Schin /* NOTREACHED */ 2474887Schin } 2484887Schin 2494887Schin /* 2504887Schin * cut each line of file <fdin> and put results to <fdout> using list <list> 2514887Schin */ 2524887Schin 253*12068SRoger.Faulkner@Oracle.COM static void 254*12068SRoger.Faulkner@Oracle.COM cutcols(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout) 2554887Schin { 256*12068SRoger.Faulkner@Oracle.COM register int c; 257*12068SRoger.Faulkner@Oracle.COM register int len; 258*12068SRoger.Faulkner@Oracle.COM register int ncol = 0; 259*12068SRoger.Faulkner@Oracle.COM register const int* lp = cut->list; 260*12068SRoger.Faulkner@Oracle.COM register char* bp; 2614887Schin register int skip; /* non-zero for don't copy */ 262*12068SRoger.Faulkner@Oracle.COM int must; 263*12068SRoger.Faulkner@Oracle.COM char* ep; 264*12068SRoger.Faulkner@Oracle.COM const char* xx; 265*12068SRoger.Faulkner@Oracle.COM 266*12068SRoger.Faulkner@Oracle.COM for (;;) 2674887Schin { 268*12068SRoger.Faulkner@Oracle.COM if (len = cut->reclen) 269*12068SRoger.Faulkner@Oracle.COM bp = sfreserve(fdin, len, -1); 2704887Schin else 271*12068SRoger.Faulkner@Oracle.COM bp = sfgetr(fdin, '\n', 0); 272*12068SRoger.Faulkner@Oracle.COM if (!bp && !(bp = sfgetr(fdin, 0, SF_LASTR))) 2734887Schin break; 2744887Schin len = sfvalue(fdin); 275*12068SRoger.Faulkner@Oracle.COM ep = bp + len; 276*12068SRoger.Faulkner@Oracle.COM xx = 0; 277*12068SRoger.Faulkner@Oracle.COM if (!(ncol = skip = *(lp = cut->list))) 2784887Schin ncol = *++lp; 279*12068SRoger.Faulkner@Oracle.COM must = 1; 280*12068SRoger.Faulkner@Oracle.COM do 2814887Schin { 282*12068SRoger.Faulkner@Oracle.COM if (cut->nosplit) 283*12068SRoger.Faulkner@Oracle.COM { 284*12068SRoger.Faulkner@Oracle.COM register const char* s = bp; 285*12068SRoger.Faulkner@Oracle.COM register int w = len < ncol ? len : ncol; 286*12068SRoger.Faulkner@Oracle.COM register int z; 287*12068SRoger.Faulkner@Oracle.COM 288*12068SRoger.Faulkner@Oracle.COM while (w > 0) 289*12068SRoger.Faulkner@Oracle.COM { 290*12068SRoger.Faulkner@Oracle.COM if (!(*s & 0x80)) 291*12068SRoger.Faulkner@Oracle.COM z = 1; 292*12068SRoger.Faulkner@Oracle.COM else if ((z = mblen(s, w)) <= 0) 293*12068SRoger.Faulkner@Oracle.COM { 294*12068SRoger.Faulkner@Oracle.COM if (s == bp && xx) 295*12068SRoger.Faulkner@Oracle.COM { 296*12068SRoger.Faulkner@Oracle.COM w += s - xx; 297*12068SRoger.Faulkner@Oracle.COM bp = (char*)(s = xx); 298*12068SRoger.Faulkner@Oracle.COM xx = 0; 299*12068SRoger.Faulkner@Oracle.COM continue; 300*12068SRoger.Faulkner@Oracle.COM } 301*12068SRoger.Faulkner@Oracle.COM xx = s; 302*12068SRoger.Faulkner@Oracle.COM if (skip) 303*12068SRoger.Faulkner@Oracle.COM s += w; 304*12068SRoger.Faulkner@Oracle.COM w = 0; 305*12068SRoger.Faulkner@Oracle.COM break; 306*12068SRoger.Faulkner@Oracle.COM } 307*12068SRoger.Faulkner@Oracle.COM s += z; 308*12068SRoger.Faulkner@Oracle.COM w -= z; 309*12068SRoger.Faulkner@Oracle.COM } 310*12068SRoger.Faulkner@Oracle.COM c = s - bp; 311*12068SRoger.Faulkner@Oracle.COM ncol = !w && ncol >= len; 312*12068SRoger.Faulkner@Oracle.COM } 313*12068SRoger.Faulkner@Oracle.COM else if (cut->cflag) 314*12068SRoger.Faulkner@Oracle.COM { 315*12068SRoger.Faulkner@Oracle.COM register const char* s = bp; 316*12068SRoger.Faulkner@Oracle.COM register int w = len; 317*12068SRoger.Faulkner@Oracle.COM register int z; 318*12068SRoger.Faulkner@Oracle.COM 319*12068SRoger.Faulkner@Oracle.COM while (w > 0 && ncol > 0) 320*12068SRoger.Faulkner@Oracle.COM { 321*12068SRoger.Faulkner@Oracle.COM ncol--; 322*12068SRoger.Faulkner@Oracle.COM if (!(*s & 0x80) || (z = mblen(s, w)) <= 0) 323*12068SRoger.Faulkner@Oracle.COM z = 1; 324*12068SRoger.Faulkner@Oracle.COM s += z; 325*12068SRoger.Faulkner@Oracle.COM w -= z; 326*12068SRoger.Faulkner@Oracle.COM 327*12068SRoger.Faulkner@Oracle.COM } 328*12068SRoger.Faulkner@Oracle.COM c = s - bp; 329*12068SRoger.Faulkner@Oracle.COM ncol = !w && (ncol || !skip); 330*12068SRoger.Faulkner@Oracle.COM } 331*12068SRoger.Faulkner@Oracle.COM else 332*12068SRoger.Faulkner@Oracle.COM { 333*12068SRoger.Faulkner@Oracle.COM if ((c = ncol) > len) 334*12068SRoger.Faulkner@Oracle.COM c = len; 335*12068SRoger.Faulkner@Oracle.COM else if (c == len && !skip) 336*12068SRoger.Faulkner@Oracle.COM ncol++; 337*12068SRoger.Faulkner@Oracle.COM ncol -= c; 338*12068SRoger.Faulkner@Oracle.COM } 339*12068SRoger.Faulkner@Oracle.COM if (!skip && c) 340*12068SRoger.Faulkner@Oracle.COM { 341*12068SRoger.Faulkner@Oracle.COM if (sfwrite(fdout, (char*)bp, c) < 0) 342*12068SRoger.Faulkner@Oracle.COM return; 343*12068SRoger.Faulkner@Oracle.COM must = 0; 344*12068SRoger.Faulkner@Oracle.COM } 345*12068SRoger.Faulkner@Oracle.COM bp += c; 346*12068SRoger.Faulkner@Oracle.COM if (ncol) 3474887Schin break; 3484887Schin len -= c; 3494887Schin ncol = *++lp; 3504887Schin skip = !skip; 351*12068SRoger.Faulkner@Oracle.COM } while (ncol != HUGE); 352*12068SRoger.Faulkner@Oracle.COM if (!cut->nlflag && (skip || must || cut->reclen)) 353*12068SRoger.Faulkner@Oracle.COM { 354*12068SRoger.Faulkner@Oracle.COM if (cut->ldelim.len > 1) 355*12068SRoger.Faulkner@Oracle.COM sfwrite(fdout, cut->ldelim.str, cut->ldelim.len); 356*12068SRoger.Faulkner@Oracle.COM else 357*12068SRoger.Faulkner@Oracle.COM sfputc(fdout, cut->ldelim.chr); 3584887Schin } 3594887Schin } 3604887Schin } 3614887Schin 3624887Schin /* 3634887Schin * cut each line of file <fdin> and put results to <fdout> using list <list> 3644887Schin * stream <fdin> must be line buffered 3654887Schin */ 3664887Schin 367*12068SRoger.Faulkner@Oracle.COM static void 368*12068SRoger.Faulkner@Oracle.COM cutfields(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout) 3694887Schin { 370*12068SRoger.Faulkner@Oracle.COM register unsigned char *sp = cut->space; 3714887Schin register unsigned char *cp; 372*12068SRoger.Faulkner@Oracle.COM register unsigned char *wp; 3734887Schin register int c, nfields; 374*12068SRoger.Faulkner@Oracle.COM register const int *lp = cut->list; 3754887Schin register unsigned char *copy; 3764887Schin register int nodelim, empty, inword=0; 377*12068SRoger.Faulkner@Oracle.COM register unsigned char *ep; 378*12068SRoger.Faulkner@Oracle.COM unsigned char *bp, *first; 3794887Schin int lastchar; 380*12068SRoger.Faulkner@Oracle.COM wchar_t w; 3814887Schin Sfio_t *fdtmp = 0; 3824887Schin long offset = 0; 383*12068SRoger.Faulkner@Oracle.COM unsigned char mb[8]; 3844887Schin /* process each buffer */ 385*12068SRoger.Faulkner@Oracle.COM while ((bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) && (c = sfvalue(fdin)) > 0) 3864887Schin { 387*12068SRoger.Faulkner@Oracle.COM cp = bp; 388*12068SRoger.Faulkner@Oracle.COM ep = cp + --c; 389*12068SRoger.Faulkner@Oracle.COM if((lastchar = cp[c]) != cut->eob) 390*12068SRoger.Faulkner@Oracle.COM *ep = cut->eob; 3914887Schin /* process each line in the buffer */ 392*12068SRoger.Faulkner@Oracle.COM while (cp <= ep) 3934887Schin { 3944887Schin first = cp; 395*12068SRoger.Faulkner@Oracle.COM if (!inword) 3964887Schin { 3974887Schin nodelim = empty = 1; 3984887Schin copy = cp; 399*12068SRoger.Faulkner@Oracle.COM if (nfields = *(lp = cut->list)) 4004887Schin copy = 0; 4014887Schin else 4024887Schin nfields = *++lp; 4034887Schin } 404*12068SRoger.Faulkner@Oracle.COM else if (copy) 4054887Schin copy = cp; 4064887Schin inword = 0; 407*12068SRoger.Faulkner@Oracle.COM do 4084887Schin { 4094887Schin /* skip over non-delimiter characters */ 410*12068SRoger.Faulkner@Oracle.COM if (cut->mb) 411*12068SRoger.Faulkner@Oracle.COM for (;;) 412*12068SRoger.Faulkner@Oracle.COM { 413*12068SRoger.Faulkner@Oracle.COM switch (c = sp[*(unsigned char*)cp++]) 414*12068SRoger.Faulkner@Oracle.COM { 415*12068SRoger.Faulkner@Oracle.COM case 0: 416*12068SRoger.Faulkner@Oracle.COM continue; 417*12068SRoger.Faulkner@Oracle.COM case SP_WIDE: 418*12068SRoger.Faulkner@Oracle.COM wp = --cp; 419*12068SRoger.Faulkner@Oracle.COM while ((c = mb2wc(w, cp, ep - cp)) <= 0) 420*12068SRoger.Faulkner@Oracle.COM { 421*12068SRoger.Faulkner@Oracle.COM /* mb char possibly spanning buffer boundary -- fun stuff */ 422*12068SRoger.Faulkner@Oracle.COM if ((ep - cp) < mbmax()) 423*12068SRoger.Faulkner@Oracle.COM { 424*12068SRoger.Faulkner@Oracle.COM int i; 425*12068SRoger.Faulkner@Oracle.COM int j; 426*12068SRoger.Faulkner@Oracle.COM int k; 427*12068SRoger.Faulkner@Oracle.COM 428*12068SRoger.Faulkner@Oracle.COM if (lastchar != cut->eob) 429*12068SRoger.Faulkner@Oracle.COM { 430*12068SRoger.Faulkner@Oracle.COM *ep = lastchar; 431*12068SRoger.Faulkner@Oracle.COM if ((c = mb2wc(w, cp, ep - cp)) > 0) 432*12068SRoger.Faulkner@Oracle.COM break; 433*12068SRoger.Faulkner@Oracle.COM } 434*12068SRoger.Faulkner@Oracle.COM if (copy) 435*12068SRoger.Faulkner@Oracle.COM { 436*12068SRoger.Faulkner@Oracle.COM empty = 0; 437*12068SRoger.Faulkner@Oracle.COM if ((c = cp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0) 438*12068SRoger.Faulkner@Oracle.COM goto failed; 439*12068SRoger.Faulkner@Oracle.COM } 440*12068SRoger.Faulkner@Oracle.COM for (i = 0; i <= (ep - cp); i++) 441*12068SRoger.Faulkner@Oracle.COM mb[i] = cp[i]; 442*12068SRoger.Faulkner@Oracle.COM if (!(bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) || (c = sfvalue(fdin)) <= 0) 443*12068SRoger.Faulkner@Oracle.COM goto failed; 444*12068SRoger.Faulkner@Oracle.COM cp = bp; 445*12068SRoger.Faulkner@Oracle.COM ep = cp + --c; 446*12068SRoger.Faulkner@Oracle.COM if ((lastchar = cp[c]) != cut->eob) 447*12068SRoger.Faulkner@Oracle.COM *ep = cut->eob; 448*12068SRoger.Faulkner@Oracle.COM j = i; 449*12068SRoger.Faulkner@Oracle.COM k = 0; 450*12068SRoger.Faulkner@Oracle.COM while (j < mbmax()) 451*12068SRoger.Faulkner@Oracle.COM mb[j++] = cp[k++]; 452*12068SRoger.Faulkner@Oracle.COM if ((c = mb2wc(w, (char*)mb, j)) <= 0) 453*12068SRoger.Faulkner@Oracle.COM { 454*12068SRoger.Faulkner@Oracle.COM c = i; 455*12068SRoger.Faulkner@Oracle.COM w = 0; 456*12068SRoger.Faulkner@Oracle.COM } 457*12068SRoger.Faulkner@Oracle.COM first = bp = cp += c - i; 458*12068SRoger.Faulkner@Oracle.COM if (copy) 459*12068SRoger.Faulkner@Oracle.COM { 460*12068SRoger.Faulkner@Oracle.COM copy = bp; 461*12068SRoger.Faulkner@Oracle.COM if (w == cut->ldelim.chr) 462*12068SRoger.Faulkner@Oracle.COM lastchar = cut->ldelim.chr; 463*12068SRoger.Faulkner@Oracle.COM else if (w != cut->wdelim.chr) 464*12068SRoger.Faulkner@Oracle.COM { 465*12068SRoger.Faulkner@Oracle.COM empty = 0; 466*12068SRoger.Faulkner@Oracle.COM if (sfwrite(fdout, (char*)mb, c) < 0) 467*12068SRoger.Faulkner@Oracle.COM goto failed; 468*12068SRoger.Faulkner@Oracle.COM } 469*12068SRoger.Faulkner@Oracle.COM } 470*12068SRoger.Faulkner@Oracle.COM c = 0; 471*12068SRoger.Faulkner@Oracle.COM } 472*12068SRoger.Faulkner@Oracle.COM else 473*12068SRoger.Faulkner@Oracle.COM { 474*12068SRoger.Faulkner@Oracle.COM w = *cp; 475*12068SRoger.Faulkner@Oracle.COM c = 1; 476*12068SRoger.Faulkner@Oracle.COM } 477*12068SRoger.Faulkner@Oracle.COM break; 478*12068SRoger.Faulkner@Oracle.COM } 479*12068SRoger.Faulkner@Oracle.COM cp += c; 480*12068SRoger.Faulkner@Oracle.COM c = w; 481*12068SRoger.Faulkner@Oracle.COM if (c == cut->wdelim.chr) 482*12068SRoger.Faulkner@Oracle.COM { 483*12068SRoger.Faulkner@Oracle.COM c = SP_WORD; 484*12068SRoger.Faulkner@Oracle.COM break; 485*12068SRoger.Faulkner@Oracle.COM } 486*12068SRoger.Faulkner@Oracle.COM if (c == cut->ldelim.chr) 487*12068SRoger.Faulkner@Oracle.COM { 488*12068SRoger.Faulkner@Oracle.COM c = SP_LINE; 489*12068SRoger.Faulkner@Oracle.COM break; 490*12068SRoger.Faulkner@Oracle.COM } 491*12068SRoger.Faulkner@Oracle.COM continue; 492*12068SRoger.Faulkner@Oracle.COM default: 493*12068SRoger.Faulkner@Oracle.COM wp = cp - 1; 494*12068SRoger.Faulkner@Oracle.COM break; 495*12068SRoger.Faulkner@Oracle.COM } 4964887Schin break; 497*12068SRoger.Faulkner@Oracle.COM } 498*12068SRoger.Faulkner@Oracle.COM else 499*12068SRoger.Faulkner@Oracle.COM { 500*12068SRoger.Faulkner@Oracle.COM while (!(c = sp[*cp++])); 501*12068SRoger.Faulkner@Oracle.COM wp = cp - 1; 502*12068SRoger.Faulkner@Oracle.COM } 503*12068SRoger.Faulkner@Oracle.COM /* check for end-of-line */ 504*12068SRoger.Faulkner@Oracle.COM if (c == SP_LINE) 505*12068SRoger.Faulkner@Oracle.COM { 506*12068SRoger.Faulkner@Oracle.COM if (cp <= ep) 5074887Schin break; 508*12068SRoger.Faulkner@Oracle.COM if (lastchar == cut->ldelim.chr) 509*12068SRoger.Faulkner@Oracle.COM break; 510*12068SRoger.Faulkner@Oracle.COM /* restore cut->last character */ 511*12068SRoger.Faulkner@Oracle.COM if (lastchar != cut->eob) 512*12068SRoger.Faulkner@Oracle.COM *ep = lastchar; 5134887Schin inword++; 514*12068SRoger.Faulkner@Oracle.COM if (!sp[lastchar]) 5154887Schin break; 5164887Schin } 5174887Schin nodelim = 0; 518*12068SRoger.Faulkner@Oracle.COM if (--nfields > 0) 5194887Schin continue; 5204887Schin nfields = *++lp; 521*12068SRoger.Faulkner@Oracle.COM if (copy) 5224887Schin { 5234887Schin empty = 0; 524*12068SRoger.Faulkner@Oracle.COM if ((c = wp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0) 5254887Schin goto failed; 5264887Schin copy = 0; 5274887Schin } 5284887Schin else 5294887Schin /* set to delimiter unless the first field */ 530*12068SRoger.Faulkner@Oracle.COM copy = empty ? cp : wp; 531*12068SRoger.Faulkner@Oracle.COM } while (!inword); 532*12068SRoger.Faulkner@Oracle.COM if (!inword) 5334887Schin { 534*12068SRoger.Faulkner@Oracle.COM if (!copy) 5354887Schin { 536*12068SRoger.Faulkner@Oracle.COM if (nodelim) 5374887Schin { 538*12068SRoger.Faulkner@Oracle.COM if (!cut->sflag) 5394887Schin { 540*12068SRoger.Faulkner@Oracle.COM if (offset) 5414887Schin { 5424887Schin sfseek(fdtmp,(Sfoff_t)0,SEEK_SET); 5434887Schin sfmove(fdtmp,fdout,offset,-1); 5444887Schin } 5454887Schin copy = first; 5464887Schin } 5474887Schin } 5484887Schin else 5494887Schin sfputc(fdout,'\n'); 5504887Schin } 551*12068SRoger.Faulkner@Oracle.COM if (offset) 5524887Schin sfseek(fdtmp,offset=0,SEEK_SET); 5534887Schin } 554*12068SRoger.Faulkner@Oracle.COM if (copy && (c=cp-copy)>0 && (!nodelim || !cut->sflag) && sfwrite(fdout,(char*)copy,c)< 0) 5554887Schin goto failed; 5564887Schin } 5574887Schin /* see whether to save in tmp file */ 558*12068SRoger.Faulkner@Oracle.COM if(inword && nodelim && !cut->sflag && (c=cp-first)>0) 5594887Schin { 5604887Schin /* copy line to tmpfile in case no fields */ 5614887Schin if(!fdtmp) 5624887Schin fdtmp = sftmp(BLOCK); 5634887Schin sfwrite(fdtmp,(char*)first,c); 5644887Schin offset +=c; 5654887Schin } 5664887Schin } 567*12068SRoger.Faulkner@Oracle.COM failed: 5684887Schin if(fdtmp) 5694887Schin sfclose(fdtmp); 5704887Schin } 5714887Schin 5724887Schin int 573*12068SRoger.Faulkner@Oracle.COM b_cut(int argc, char** argv, void* context) 5744887Schin { 575*12068SRoger.Faulkner@Oracle.COM register char* cp = 0; 576*12068SRoger.Faulkner@Oracle.COM register Sfio_t* fp; 577*12068SRoger.Faulkner@Oracle.COM char* s; 578*12068SRoger.Faulkner@Oracle.COM int n; 579*12068SRoger.Faulkner@Oracle.COM Cut_t* cut; 580*12068SRoger.Faulkner@Oracle.COM int mode = 0; 581*12068SRoger.Faulkner@Oracle.COM Delim_t wdelim; 582*12068SRoger.Faulkner@Oracle.COM Delim_t ldelim; 583*12068SRoger.Faulkner@Oracle.COM size_t reclen = 0; 5844887Schin 5854887Schin cmdinit(argc, argv, context, ERROR_CATALOG, 0); 586*12068SRoger.Faulkner@Oracle.COM wdelim.chr = '\t'; 587*12068SRoger.Faulkner@Oracle.COM ldelim.chr = '\n'; 588*12068SRoger.Faulkner@Oracle.COM wdelim.len = ldelim.len = 1; 589*12068SRoger.Faulkner@Oracle.COM for (;;) 5904887Schin { 591*12068SRoger.Faulkner@Oracle.COM switch (n = optget(argv, usage)) 5924887Schin { 593*12068SRoger.Faulkner@Oracle.COM case 0: 594*12068SRoger.Faulkner@Oracle.COM break; 595*12068SRoger.Faulkner@Oracle.COM case 'b': 596*12068SRoger.Faulkner@Oracle.COM case 'c': 597*12068SRoger.Faulkner@Oracle.COM if(mode&C_FIELDS) 598*12068SRoger.Faulkner@Oracle.COM { 599*12068SRoger.Faulkner@Oracle.COM error(2, "f option already specified"); 600*12068SRoger.Faulkner@Oracle.COM continue; 601*12068SRoger.Faulkner@Oracle.COM } 602*12068SRoger.Faulkner@Oracle.COM cp = opt_info.arg; 603*12068SRoger.Faulkner@Oracle.COM if(n=='b') 604*12068SRoger.Faulkner@Oracle.COM mode |= C_BYTES; 605*12068SRoger.Faulkner@Oracle.COM else 606*12068SRoger.Faulkner@Oracle.COM mode |= C_CHARS; 607*12068SRoger.Faulkner@Oracle.COM continue; 608*12068SRoger.Faulkner@Oracle.COM case 'D': 609*12068SRoger.Faulkner@Oracle.COM ldelim.str = opt_info.arg; 610*12068SRoger.Faulkner@Oracle.COM if (mbwide()) 611*12068SRoger.Faulkner@Oracle.COM { 612*12068SRoger.Faulkner@Oracle.COM s = opt_info.arg; 613*12068SRoger.Faulkner@Oracle.COM ldelim.chr = mbchar(s); 614*12068SRoger.Faulkner@Oracle.COM if ((n = s - opt_info.arg) > 1) 615*12068SRoger.Faulkner@Oracle.COM { 616*12068SRoger.Faulkner@Oracle.COM ldelim.len = n; 617*12068SRoger.Faulkner@Oracle.COM continue; 618*12068SRoger.Faulkner@Oracle.COM } 619*12068SRoger.Faulkner@Oracle.COM } 620*12068SRoger.Faulkner@Oracle.COM ldelim.chr = *(unsigned char*)opt_info.arg; 621*12068SRoger.Faulkner@Oracle.COM ldelim.len = 1; 622*12068SRoger.Faulkner@Oracle.COM continue; 623*12068SRoger.Faulkner@Oracle.COM case 'd': 624*12068SRoger.Faulkner@Oracle.COM wdelim.str = opt_info.arg; 625*12068SRoger.Faulkner@Oracle.COM if (mbwide()) 626*12068SRoger.Faulkner@Oracle.COM { 627*12068SRoger.Faulkner@Oracle.COM s = opt_info.arg; 628*12068SRoger.Faulkner@Oracle.COM wdelim.chr = mbchar(s); 629*12068SRoger.Faulkner@Oracle.COM if ((n = s - opt_info.arg) > 1) 630*12068SRoger.Faulkner@Oracle.COM { 631*12068SRoger.Faulkner@Oracle.COM wdelim.len = n; 632*12068SRoger.Faulkner@Oracle.COM continue; 633*12068SRoger.Faulkner@Oracle.COM } 634*12068SRoger.Faulkner@Oracle.COM } 635*12068SRoger.Faulkner@Oracle.COM wdelim.chr = *(unsigned char*)opt_info.arg; 636*12068SRoger.Faulkner@Oracle.COM wdelim.len = 1; 637*12068SRoger.Faulkner@Oracle.COM continue; 638*12068SRoger.Faulkner@Oracle.COM case 'f': 639*12068SRoger.Faulkner@Oracle.COM if(mode&(C_CHARS|C_BYTES)) 640*12068SRoger.Faulkner@Oracle.COM { 641*12068SRoger.Faulkner@Oracle.COM error(2, "c option already specified"); 642*12068SRoger.Faulkner@Oracle.COM continue; 643*12068SRoger.Faulkner@Oracle.COM } 644*12068SRoger.Faulkner@Oracle.COM cp = opt_info.arg; 645*12068SRoger.Faulkner@Oracle.COM mode |= C_FIELDS; 646*12068SRoger.Faulkner@Oracle.COM continue; 647*12068SRoger.Faulkner@Oracle.COM case 'n': 648*12068SRoger.Faulkner@Oracle.COM mode |= C_NOSPLIT; 649*12068SRoger.Faulkner@Oracle.COM continue; 650*12068SRoger.Faulkner@Oracle.COM case 'N': 651*12068SRoger.Faulkner@Oracle.COM mode |= C_NONEWLINE; 652*12068SRoger.Faulkner@Oracle.COM continue; 653*12068SRoger.Faulkner@Oracle.COM case 'R': 654*12068SRoger.Faulkner@Oracle.COM case 'r': 655*12068SRoger.Faulkner@Oracle.COM if(opt_info.num>0) 656*12068SRoger.Faulkner@Oracle.COM reclen = opt_info.num; 657*12068SRoger.Faulkner@Oracle.COM continue; 658*12068SRoger.Faulkner@Oracle.COM case 's': 659*12068SRoger.Faulkner@Oracle.COM mode |= C_SUPRESS; 660*12068SRoger.Faulkner@Oracle.COM continue; 661*12068SRoger.Faulkner@Oracle.COM case ':': 662*12068SRoger.Faulkner@Oracle.COM error(2, "%s", opt_info.arg); 663*12068SRoger.Faulkner@Oracle.COM break; 664*12068SRoger.Faulkner@Oracle.COM case '?': 665*12068SRoger.Faulkner@Oracle.COM error(ERROR_usage(2), "%s", opt_info.arg); 6664887Schin break; 6674887Schin } 6684887Schin break; 6694887Schin } 6704887Schin argv += opt_info.index; 6714887Schin if (error_info.errors) 6724887Schin error(ERROR_usage(2), "%s",optusage(NiL)); 6734887Schin if(!cp) 6744887Schin { 6754887Schin error(2, "b, c or f option must be specified"); 6764887Schin error(ERROR_usage(2), "%s", optusage(NiL)); 6774887Schin } 6784887Schin if(!*cp) 6794887Schin error(3, "non-empty b, c or f option must be specified"); 6804887Schin if((mode & (C_FIELDS|C_SUPRESS)) == C_SUPRESS) 6814887Schin error(3, "s option requires f option"); 682*12068SRoger.Faulkner@Oracle.COM cut = cutinit(mode, cp, &wdelim, &ldelim, reclen); 6834887Schin if(cp = *argv) 6844887Schin argv++; 6854887Schin do 6864887Schin { 6874887Schin if(!cp || streq(cp,"-")) 6884887Schin fp = sfstdin; 6894887Schin else if(!(fp = sfopen(NiL,cp,"r"))) 6904887Schin { 6914887Schin error(ERROR_system(0),"%s: cannot open",cp); 6924887Schin continue; 6934887Schin } 6944887Schin if(mode&C_FIELDS) 695*12068SRoger.Faulkner@Oracle.COM cutfields(cut,fp,sfstdout); 6964887Schin else 697*12068SRoger.Faulkner@Oracle.COM cutcols(cut,fp,sfstdout); 6984887Schin if(fp!=sfstdin) 6994887Schin sfclose(fp); 7008462SApril.Chin@Sun.COM } while(cp = *argv++); 7018462SApril.Chin@Sun.COM if (sfsync(sfstdout)) 7028462SApril.Chin@Sun.COM error(ERROR_system(0), "write error"); 703*12068SRoger.Faulkner@Oracle.COM return error_info.errors != 0; 7044887Schin } 705