1 /* $NetBSD: cut.c,v 1.16 2003/08/07 11:13:32 agc Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\ 38 The Regents of the University of California. All rights reserved.\n"); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 __RCSID("$NetBSD: cut.c,v 1.16 2003/08/07 11:13:32 agc Exp $"); 46 #endif /* not lint */ 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <limits.h> 52 #include <locale.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 58 int cflag; 59 char dchar; 60 int dflag; 61 int fflag; 62 int sflag; 63 64 void c_cut __P((FILE *, char *)); 65 void f_cut __P((FILE *, char *)); 66 void get_list __P((char *)); 67 int main __P((int, char **)); 68 void usage __P((void)); 69 70 int 71 main(argc, argv) 72 int argc; 73 char *argv[]; 74 { 75 FILE *fp; 76 void (*fcn) __P((FILE *, char *)); 77 int ch; 78 79 fcn = NULL; 80 setlocale (LC_ALL, ""); 81 82 dchar = '\t'; /* default delimiter is \t */ 83 84 /* Since we don't support multi-byte characters, the -c and -b 85 options are equivalent, and the -n option is meaningless. */ 86 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 87 switch(ch) { 88 case 'b': 89 case 'c': 90 fcn = c_cut; 91 get_list(optarg); 92 cflag = 1; 93 break; 94 case 'd': 95 dchar = *optarg; 96 dflag = 1; 97 break; 98 case 'f': 99 get_list(optarg); 100 fcn = f_cut; 101 fflag = 1; 102 break; 103 case 's': 104 sflag = 1; 105 break; 106 case 'n': 107 break; 108 case '?': 109 default: 110 usage(); 111 } 112 argc -= optind; 113 argv += optind; 114 115 if (fflag) { 116 if (cflag) 117 usage(); 118 } else if (!cflag || dflag || sflag) 119 usage(); 120 121 if (*argv) 122 for (; *argv; ++argv) { 123 if (!(fp = fopen(*argv, "r"))) 124 err(1, "%s", *argv); 125 fcn(fp, *argv); 126 (void)fclose(fp); 127 } 128 else 129 fcn(stdin, "stdin"); 130 exit(0); 131 } 132 133 int autostart, autostop, maxval; 134 135 char positions[_POSIX2_LINE_MAX + 1]; 136 137 void 138 get_list(list) 139 char *list; 140 { 141 int setautostart, start, stop; 142 char *pos; 143 char *p; 144 145 /* 146 * set a byte in the positions array to indicate if a field or 147 * column is to be selected; use +1, it's 1-based, not 0-based. 148 * This parser is less restrictive than the Draft 9 POSIX spec. 149 * POSIX doesn't allow lists that aren't in increasing order or 150 * overlapping lists. We also handle "-3-5" although there's no 151 * real reason too. 152 */ 153 for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { 154 setautostart = start = stop = 0; 155 if (*p == '-') { 156 ++p; 157 setautostart = 1; 158 } 159 if (isdigit((unsigned char)*p)) { 160 start = stop = strtol(p, &p, 10); 161 if (setautostart && start > autostart) 162 autostart = start; 163 } 164 if (*p == '-') { 165 if (isdigit((unsigned char)p[1])) 166 stop = strtol(p + 1, &p, 10); 167 if (*p == '-') { 168 ++p; 169 if (!autostop || autostop > stop) 170 autostop = stop; 171 } 172 } 173 if (*p) 174 errx(1, "[-cf] list: illegal list value"); 175 if (!stop || !start) 176 errx(1, "[-cf] list: values may not include zero"); 177 if (stop > _POSIX2_LINE_MAX) 178 errx(1, "[-cf] list: %d too large (max %d)", 179 stop, _POSIX2_LINE_MAX); 180 if (maxval < stop) 181 maxval = stop; 182 for (pos = positions + start; start++ <= stop; *pos++ = 1); 183 } 184 185 /* overlapping ranges */ 186 if (autostop && maxval > autostop) 187 maxval = autostop; 188 189 /* set autostart */ 190 if (autostart) 191 memset(positions + 1, '1', autostart); 192 } 193 194 /* ARGSUSED */ 195 void 196 c_cut(fp, fname) 197 FILE *fp; 198 char *fname; 199 { 200 int ch, col; 201 char *pos; 202 203 ch = 0; 204 for (;;) { 205 pos = positions + 1; 206 for (col = maxval; col; --col) { 207 if ((ch = getc(fp)) == EOF) 208 return; 209 if (ch == '\n') 210 break; 211 if (*pos++) 212 (void)putchar(ch); 213 } 214 if (ch != '\n') { 215 if (autostop) 216 while ((ch = getc(fp)) != EOF && ch != '\n') 217 (void)putchar(ch); 218 else 219 while ((ch = getc(fp)) != EOF && ch != '\n'); 220 } 221 (void)putchar('\n'); 222 } 223 } 224 225 void 226 f_cut(fp, fname) 227 FILE *fp; 228 char *fname; 229 { 230 int ch, field, isdelim; 231 char *pos, *p, sep; 232 int output; 233 char lbuf[_POSIX2_LINE_MAX + 1]; 234 235 for (sep = dchar; fgets(lbuf, sizeof(lbuf), fp);) { 236 output = 0; 237 for (isdelim = 0, p = lbuf;; ++p) { 238 if (!(ch = *p)) 239 errx(1, "%s: line too long.", fname); 240 /* this should work if newline is delimiter */ 241 if (ch == sep) 242 isdelim = 1; 243 if (ch == '\n') { 244 if (!isdelim && !sflag) 245 (void)printf("%s", lbuf); 246 break; 247 } 248 } 249 if (!isdelim) 250 continue; 251 252 pos = positions + 1; 253 for (field = maxval, p = lbuf; field; --field, ++pos) { 254 if (*pos) { 255 if (output++) 256 (void)putchar(sep); 257 while ((ch = *p++) != '\n' && ch != sep) 258 (void)putchar(ch); 259 } else { 260 while ((ch = *p++) != '\n' && ch != sep) 261 continue; 262 } 263 if (ch == '\n') 264 break; 265 } 266 if (ch != '\n') { 267 if (autostop) { 268 if (output) 269 (void)putchar(sep); 270 for (; (ch = *p) != '\n'; ++p) 271 (void)putchar(ch); 272 } else 273 for (; (ch = *p) != '\n'; ++p); 274 } 275 (void)putchar('\n'); 276 } 277 } 278 279 void 280 usage() 281 { 282 (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n" 283 "\tcut -c list [file1 ...]\n" 284 "\tcut -f list [-d delim] [-s] [file ...]\n"); 285 exit(1); 286 } 287