1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 char copyright[] = 39 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ 40 All rights reserved.\n"; 41 #endif /* not lint */ 42 43 #ifndef lint 44 /*static char sccsid[] = "from: @(#)cut.c 5.4 (Berkeley) 10/30/90";*/ 45 static char rcsid[] = "$Id: cut.c,v 1.6 1993/12/31 19:24:42 jtc Exp $"; 46 #endif /* not lint */ 47 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <limits.h> 52 #include <locale.h> 53 #include <ctype.h> 54 #include <err.h> 55 56 int cflag; 57 char dchar; 58 int dflag; 59 int fflag; 60 int sflag; 61 62 int 63 main(argc, argv) 64 int argc; 65 char **argv; 66 { 67 extern char *optarg; 68 extern int errno, optind; 69 FILE *fp; 70 int ch, (*fcn)(), c_cut(), f_cut(); 71 72 setlocale (LC_ALL, ""); 73 74 dchar = '\t'; /* default delimiter is \t */ 75 76 /* Since we don't support multi-byte characters, the -c and -b 77 options are equivalent, and the -n option is meaningless. */ 78 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != EOF) 79 switch(ch) { 80 case 'b': 81 case 'c': 82 fcn = c_cut; 83 get_list(optarg); 84 cflag = 1; 85 break; 86 case 'd': 87 dchar = *optarg; 88 dflag = 1; 89 break; 90 case 'f': 91 get_list(optarg); 92 fcn = f_cut; 93 fflag = 1; 94 break; 95 case 's': 96 sflag = 1; 97 break; 98 case 'n': 99 break; 100 case '?': 101 default: 102 usage(); 103 } 104 argc -= optind; 105 argv += optind; 106 107 if (fflag) { 108 if (cflag) 109 usage(); 110 } else if (!cflag || dflag || sflag) 111 usage(); 112 113 if (*argv) 114 for (; *argv; ++argv) { 115 if (!(fp = fopen(*argv, "r"))) { 116 err(1, "%s", *argv); 117 /* NOTREACHED */ 118 } 119 fcn(fp, *argv); 120 } 121 else 122 fcn(stdin, "stdin"); 123 exit(0); 124 } 125 126 int autostart, autostop, maxval; 127 128 char positions[_POSIX2_LINE_MAX + 1]; 129 130 get_list(list) 131 char *list; 132 { 133 register char *pos; 134 register int setautostart, start, stop; 135 char *p; 136 137 /* 138 * set a byte in the positions array to indicate if a field or 139 * column is to be selected; use +1, it's 1-based, not 0-based. 140 * This parser is less restrictive than the Draft 9 POSIX spec. 141 * POSIX doesn't allow lists that aren't in increasing order or 142 * overlapping lists. We also handle "-3-5" although there's no 143 * real reason too. 144 */ 145 for (; p = strtok(list, ", \t"); list = NULL) { 146 setautostart = start = stop = 0; 147 if (*p == '-') { 148 ++p; 149 setautostart = 1; 150 } 151 if (isdigit(*p)) { 152 start = stop = strtol(p, &p, 10); 153 if (setautostart && start > autostart) 154 autostart = start; 155 } 156 if (*p == '-') { 157 if (isdigit(p[1])) 158 stop = strtol(p + 1, &p, 10); 159 if (*p == '-') { 160 ++p; 161 if (!autostop || autostop > stop) 162 autostop = stop; 163 } 164 } 165 if (*p) 166 badlist("illegal list value"); 167 if (!stop || !start) 168 badlist("values may not include zero"); 169 if (stop > _POSIX2_LINE_MAX) { 170 /* positions used rather than allocate a new buffer */ 171 (void)sprintf(positions, "%d too large (max %d)", 172 stop, _POSIX2_LINE_MAX); 173 badlist(positions); 174 } 175 if (maxval < stop) 176 maxval = stop; 177 for (pos = positions + start; start++ <= stop; *pos++ = 1); 178 } 179 180 /* overlapping ranges */ 181 if (autostop && maxval > autostop) 182 maxval = autostop; 183 184 /* set autostart */ 185 if (autostart) 186 memset(positions + 1, '1', autostart); 187 } 188 189 /* ARGSUSED */ 190 c_cut(fp, fname) 191 FILE *fp; 192 char *fname; 193 { 194 register int ch, col; 195 register char *pos; 196 197 for (;;) { 198 pos = positions + 1; 199 for (col = maxval; col; --col) { 200 if ((ch = getc(fp)) == EOF) 201 return; 202 if (ch == '\n') 203 break; 204 if (*pos++) 205 putchar(ch); 206 } 207 if (ch != '\n') 208 if (autostop) 209 while ((ch = getc(fp)) != EOF && ch != '\n') 210 putchar(ch); 211 else 212 while ((ch = getc(fp)) != EOF && ch != '\n'); 213 putchar('\n'); 214 } 215 } 216 217 f_cut(fp, fname) 218 FILE *fp; 219 char *fname; 220 { 221 register int ch, field, isdelim; 222 register char *pos, *p, sep; 223 int output; 224 char lbuf[_POSIX2_LINE_MAX + 1]; 225 226 for (sep = dchar, output = 0; fgets(lbuf, sizeof(lbuf), fp); output = 0) { 227 for (isdelim = 0, p = lbuf;; ++p) { 228 if (!(ch = *p)) { 229 (void)fprintf(stderr, 230 "cut: %s: line too long.\n", fname); 231 exit(1); 232 } 233 /* this should work if newline is delimiter */ 234 if (ch == sep) 235 isdelim = 1; 236 if (ch == '\n') { 237 if (!isdelim && !sflag) 238 (void)printf("%s", lbuf); 239 break; 240 } 241 } 242 if (!isdelim) 243 continue; 244 245 pos = positions + 1; 246 for (field = maxval, p = lbuf; field; --field, ++pos) { 247 if (*pos) { 248 if (output++) 249 putchar(sep); 250 while ((ch = *p++) != '\n' && ch != sep) 251 putchar(ch); 252 } else 253 while ((ch = *p++) != '\n' && ch != sep); 254 if (ch == '\n') 255 break; 256 } 257 if (ch != '\n') 258 if (autostop) { 259 if (output) 260 putchar(sep); 261 for (; (ch = *p) != '\n'; ++p) 262 putchar(ch); 263 } else 264 for (; (ch = *p) != '\n'; ++p); 265 putchar('\n'); 266 } 267 } 268 269 badlist(msg) 270 char *msg; 271 { 272 (void)fprintf(stderr, "cut: [-cf] list: %s.\n", msg); 273 exit(1); 274 } 275 276 usage() 277 { 278 (void)fprintf(stderr, 279 "usage:\tcut -c list [file1 ...]\n\tcut -f list [-s] [-d delim] [file ...]\n"); 280 exit(1); 281 } 282