1 /* $NetBSD: cut.c,v 1.17 2005/02/17 17:35:47 xtraeme Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\ 38 The Regents of the University of California. All rights reserved.\n"); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 __RCSID("$NetBSD: cut.c,v 1.17 2005/02/17 17:35:47 xtraeme Exp $"); 46 #endif /* not lint */ 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <limits.h> 52 #include <locale.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 58 int cflag; 59 char dchar; 60 int dflag; 61 int fflag; 62 int sflag; 63 64 void c_cut(FILE *, const char *); 65 void f_cut(FILE *, const char *); 66 void get_list(char *); 67 void usage(void); 68 69 int 70 main(int argc, char *argv[]) 71 { 72 FILE *fp; 73 void (*fcn)(FILE *, const char *); 74 int ch; 75 76 fcn = NULL; 77 setlocale (LC_ALL, ""); 78 79 dchar = '\t'; /* default delimiter is \t */ 80 81 /* Since we don't support multi-byte characters, the -c and -b 82 options are equivalent, and the -n option is meaningless. */ 83 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 84 switch(ch) { 85 case 'b': 86 case 'c': 87 fcn = c_cut; 88 get_list(optarg); 89 cflag = 1; 90 break; 91 case 'd': 92 dchar = *optarg; 93 dflag = 1; 94 break; 95 case 'f': 96 get_list(optarg); 97 fcn = f_cut; 98 fflag = 1; 99 break; 100 case 's': 101 sflag = 1; 102 break; 103 case 'n': 104 break; 105 case '?': 106 default: 107 usage(); 108 } 109 argc -= optind; 110 argv += optind; 111 112 if (fflag) { 113 if (cflag) 114 usage(); 115 } else if (!cflag || dflag || sflag) 116 usage(); 117 118 if (*argv) 119 for (; *argv; ++argv) { 120 if (!(fp = fopen(*argv, "r"))) 121 err(1, "%s", *argv); 122 fcn(fp, *argv); 123 (void)fclose(fp); 124 } 125 else 126 fcn(stdin, "stdin"); 127 exit(0); 128 } 129 130 int autostart, autostop, maxval; 131 132 char positions[_POSIX2_LINE_MAX + 1]; 133 134 void 135 get_list(char *list) 136 { 137 int setautostart, start, stop; 138 char *pos; 139 char *p; 140 141 /* 142 * set a byte in the positions array to indicate if a field or 143 * column is to be selected; use +1, it's 1-based, not 0-based. 144 * This parser is less restrictive than the Draft 9 POSIX spec. 145 * POSIX doesn't allow lists that aren't in increasing order or 146 * overlapping lists. We also handle "-3-5" although there's no 147 * real reason too. 148 */ 149 for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { 150 setautostart = start = stop = 0; 151 if (*p == '-') { 152 ++p; 153 setautostart = 1; 154 } 155 if (isdigit((unsigned char)*p)) { 156 start = stop = strtol(p, &p, 10); 157 if (setautostart && start > autostart) 158 autostart = start; 159 } 160 if (*p == '-') { 161 if (isdigit((unsigned char)p[1])) 162 stop = strtol(p + 1, &p, 10); 163 if (*p == '-') { 164 ++p; 165 if (!autostop || autostop > stop) 166 autostop = stop; 167 } 168 } 169 if (*p) 170 errx(1, "[-cf] list: illegal list value"); 171 if (!stop || !start) 172 errx(1, "[-cf] list: values may not include zero"); 173 if (stop > _POSIX2_LINE_MAX) 174 errx(1, "[-cf] list: %d too large (max %d)", 175 stop, _POSIX2_LINE_MAX); 176 if (maxval < stop) 177 maxval = stop; 178 for (pos = positions + start; start++ <= stop; *pos++ = 1); 179 } 180 181 /* overlapping ranges */ 182 if (autostop && maxval > autostop) 183 maxval = autostop; 184 185 /* set autostart */ 186 if (autostart) 187 memset(positions + 1, '1', autostart); 188 } 189 190 /* ARGSUSED */ 191 void 192 c_cut(FILE *fp, const char *fname) 193 { 194 int ch, col; 195 char *pos; 196 197 ch = 0; 198 for (;;) { 199 pos = positions + 1; 200 for (col = maxval; col; --col) { 201 if ((ch = getc(fp)) == EOF) 202 return; 203 if (ch == '\n') 204 break; 205 if (*pos++) 206 (void)putchar(ch); 207 } 208 if (ch != '\n') { 209 if (autostop) 210 while ((ch = getc(fp)) != EOF && ch != '\n') 211 (void)putchar(ch); 212 else 213 while ((ch = getc(fp)) != EOF && ch != '\n'); 214 } 215 (void)putchar('\n'); 216 } 217 } 218 219 void 220 f_cut(FILE *fp, const char *fname) 221 { 222 int ch, field, isdelim; 223 char *pos, *p, sep; 224 int output; 225 char lbuf[_POSIX2_LINE_MAX + 1]; 226 227 for (sep = dchar; fgets(lbuf, sizeof(lbuf), fp);) { 228 output = 0; 229 for (isdelim = 0, p = lbuf;; ++p) { 230 if (!(ch = *p)) 231 errx(1, "%s: line too long.", fname); 232 /* this should work if newline is delimiter */ 233 if (ch == sep) 234 isdelim = 1; 235 if (ch == '\n') { 236 if (!isdelim && !sflag) 237 (void)printf("%s", lbuf); 238 break; 239 } 240 } 241 if (!isdelim) 242 continue; 243 244 pos = positions + 1; 245 for (field = maxval, p = lbuf; field; --field, ++pos) { 246 if (*pos) { 247 if (output++) 248 (void)putchar(sep); 249 while ((ch = *p++) != '\n' && ch != sep) 250 (void)putchar(ch); 251 } else { 252 while ((ch = *p++) != '\n' && ch != sep) 253 continue; 254 } 255 if (ch == '\n') 256 break; 257 } 258 if (ch != '\n') { 259 if (autostop) { 260 if (output) 261 (void)putchar(sep); 262 for (; (ch = *p) != '\n'; ++p) 263 (void)putchar(ch); 264 } else 265 for (; (ch = *p) != '\n'; ++p); 266 } 267 (void)putchar('\n'); 268 } 269 } 270 271 void 272 usage(void) 273 { 274 (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n" 275 "\tcut -c list [file1 ...]\n" 276 "\tcut -f list [-d delim] [-s] [file ...]\n"); 277 exit(1); 278 } 279