1 /* $OpenBSD: cut.c,v 1.7 2000/06/04 23:52:19 aaron Exp $ */ 2 /* $NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #ifndef lint 41 static char copyright[] = 42 "@(#) Copyright (c) 1989, 1993\n\ 43 The Regents of the University of California. All rights reserved.\n"; 44 #endif /* not lint */ 45 46 #ifndef lint 47 #if 0 48 static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 49 #endif 50 static char rcsid[] = "$OpenBSD: cut.c,v 1.7 2000/06/04 23:52:19 aaron Exp $"; 51 #endif /* not lint */ 52 53 #include <ctype.h> 54 #include <err.h> 55 #include <errno.h> 56 #include <limits.h> 57 #include <locale.h> 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <unistd.h> 62 63 int cflag; 64 char dchar; 65 int dflag; 66 int fflag; 67 int sflag; 68 69 void c_cut __P((FILE *, char *)); 70 void f_cut __P((FILE *, char *)); 71 void get_list __P((char *)); 72 void usage __P((void)); 73 74 int 75 main(argc, argv) 76 int argc; 77 char *argv[]; 78 { 79 FILE *fp; 80 void (*fcn) __P((FILE *, char *)); 81 int ch; 82 83 setlocale (LC_ALL, ""); 84 85 dchar = '\t'; /* default delimiter is \t */ 86 87 /* Since we don't support multi-byte characters, the -c and -b 88 options are equivalent, and the -n option is meaningless. */ 89 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 90 switch(ch) { 91 case 'b': 92 case 'c': 93 fcn = c_cut; 94 get_list(optarg); 95 cflag = 1; 96 break; 97 case 'd': 98 dchar = *optarg; 99 dflag = 1; 100 break; 101 case 'f': 102 get_list(optarg); 103 fcn = f_cut; 104 fflag = 1; 105 break; 106 case 's': 107 sflag = 1; 108 break; 109 case 'n': 110 break; 111 case '?': 112 default: 113 usage(); 114 } 115 argc -= optind; 116 argv += optind; 117 118 if (fflag) { 119 if (cflag) 120 usage(); 121 } else if (!cflag || dflag || sflag) 122 usage(); 123 124 if (*argv) 125 for (; *argv; ++argv) { 126 if (!(fp = fopen(*argv, "r"))) 127 err(1, "%s", *argv); 128 fcn(fp, *argv); 129 (void)fclose(fp); 130 } 131 else 132 fcn(stdin, "stdin"); 133 exit(0); 134 } 135 136 int autostart, autostop, maxval; 137 138 char positions[_POSIX2_LINE_MAX + 1]; 139 140 void 141 get_list(list) 142 char *list; 143 { 144 register int setautostart, start, stop; 145 register char *pos; 146 char *p; 147 148 /* 149 * set a byte in the positions array to indicate if a field or 150 * column is to be selected; use +1, it's 1-based, not 0-based. 151 * This parser is less restrictive than the Draft 9 POSIX spec. 152 * POSIX doesn't allow lists that aren't in increasing order or 153 * overlapping lists. We also handle "-3-5" although there's no 154 * real reason too. 155 */ 156 while ((p = strsep(&list, ", \t"))) { 157 setautostart = start = stop = 0; 158 if (*p == '-') { 159 ++p; 160 setautostart = 1; 161 } 162 if (isdigit(*p)) { 163 start = stop = strtol(p, &p, 10); 164 if (setautostart && start > autostart) 165 autostart = start; 166 } 167 if (*p == '-') { 168 if (isdigit(p[1])) 169 stop = strtol(p + 1, &p, 10); 170 if (*p == '-') { 171 ++p; 172 if (!autostop || autostop > stop) 173 autostop = stop; 174 } 175 } 176 if (*p) 177 errx(1, "[-cf] list: illegal list value"); 178 if (!stop || !start) 179 errx(1, "[-cf] list: values may not include zero"); 180 if (stop > _POSIX2_LINE_MAX) 181 errx(1, "[-cf] list: %d too large (max %d)", 182 stop, _POSIX2_LINE_MAX); 183 if (maxval < stop) 184 maxval = stop; 185 for (pos = positions + start; start++ <= stop; *pos++ = 1) 186 ; 187 } 188 189 /* overlapping ranges */ 190 if (autostop && maxval > autostop) 191 maxval = autostop; 192 193 /* set autostart */ 194 if (autostart) 195 memset(positions + 1, '1', autostart); 196 } 197 198 /* ARGSUSED */ 199 void 200 c_cut(fp, fname) 201 FILE *fp; 202 char *fname; 203 { 204 register int ch, col; 205 register char *pos; 206 207 for (;;) { 208 pos = positions + 1; 209 for (col = maxval; col; --col) { 210 if ((ch = getc(fp)) == EOF) 211 return; 212 if (ch == '\n') 213 break; 214 if (*pos++) 215 (void)putchar(ch); 216 } 217 if (ch != '\n') { 218 if (autostop) 219 while ((ch = getc(fp)) != EOF && ch != '\n') 220 (void)putchar(ch); 221 else 222 while ((ch = getc(fp)) != EOF && ch != '\n') 223 ; 224 } 225 (void)putchar('\n'); 226 } 227 } 228 229 void 230 f_cut(fp, fname) 231 FILE *fp; 232 char *fname; 233 { 234 register int ch, field, isdelim; 235 register char *pos, *p, sep; 236 int output; 237 size_t len; 238 char *lbuf, *tbuf; 239 240 for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len));) { 241 output = 0; 242 if (lbuf[len - 1] != '\n') { 243 /* no newline at the end of the last line so add one */ 244 if ((tbuf = (char *)malloc(len + 1)) == NULL) 245 err(1, NULL); 246 memcpy(tbuf, lbuf, len); 247 tbuf[len] = '\n'; 248 lbuf = tbuf; 249 } 250 for (isdelim = 0, p = lbuf;; ++p) { 251 ch = *p; 252 /* this should work if newline is delimiter */ 253 if (ch == sep) 254 isdelim = 1; 255 if (ch == '\n') { 256 if (!isdelim && !sflag) 257 (void)fwrite(lbuf, len, 1, stdout); 258 break; 259 } 260 } 261 if (!isdelim) 262 continue; 263 264 pos = positions + 1; 265 for (field = maxval, p = lbuf; field; --field, ++pos) { 266 if (*pos) { 267 if (output++) 268 (void)putchar(sep); 269 while ((ch = *p++) != '\n' && ch != sep) 270 (void)putchar(ch); 271 } else 272 while ((ch = *p++) != '\n' && ch != sep) 273 ; 274 if (ch == '\n') 275 break; 276 } 277 if (ch != '\n') { 278 if (autostop) { 279 if (output) 280 (void)putchar(sep); 281 for (; (ch = *p) != '\n'; ++p) 282 (void)putchar(ch); 283 } else 284 for (; (ch = *p) != '\n'; ++p) 285 ; 286 } 287 (void)putchar('\n'); 288 } 289 if (tbuf) 290 free(tbuf); 291 } 292 293 void 294 usage() 295 { 296 (void)fprintf(stderr, 297 "usage:\tcut -c list [file1 ...]\n" 298 "\tcut -f list [-s] [-d delim] [file ...]\n" 299 "\tcut -b list [-n] [file ...]\n"); 300 exit(1); 301 } 302