1 /* $NetBSD: cut.c,v 1.20 2006/04/25 19:34:42 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\ 38 The Regents of the University of California. All rights reserved.\n"); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 __RCSID("$NetBSD: cut.c,v 1.20 2006/04/25 19:34:42 christos Exp $"); 46 #endif /* not lint */ 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <limits.h> 52 #include <locale.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 58 int cflag; 59 char dchar; 60 int dflag; 61 int fflag; 62 int sflag; 63 64 void c_cut(FILE *, const char *); 65 void f_cut(FILE *, const char *); 66 void get_list(char *); 67 void usage(void); 68 69 int 70 main(int argc, char *argv[]) 71 { 72 FILE *fp; 73 void (*fcn)(FILE *, const char *); 74 int ch; 75 76 fcn = NULL; 77 setlocale (LC_ALL, ""); 78 79 dchar = '\t'; /* default delimiter is \t */ 80 81 /* Since we don't support multi-byte characters, the -c and -b 82 options are equivalent, and the -n option is meaningless. */ 83 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 84 switch(ch) { 85 case 'b': 86 case 'c': 87 fcn = c_cut; 88 get_list(optarg); 89 cflag = 1; 90 break; 91 case 'd': 92 dchar = *optarg; 93 dflag = 1; 94 break; 95 case 'f': 96 get_list(optarg); 97 fcn = f_cut; 98 fflag = 1; 99 break; 100 case 's': 101 sflag = 1; 102 break; 103 case 'n': 104 break; 105 case '?': 106 default: 107 usage(); 108 } 109 argc -= optind; 110 argv += optind; 111 112 if (fflag) { 113 if (cflag) 114 usage(); 115 } else if (!cflag || dflag || sflag) 116 usage(); 117 118 if (*argv) 119 for (; *argv; ++argv) { 120 if (!(fp = fopen(*argv, "r"))) 121 err(1, "%s", *argv); 122 fcn(fp, *argv); 123 (void)fclose(fp); 124 } 125 else 126 fcn(stdin, "stdin"); 127 exit(0); 128 } 129 130 int autostart, autostop, maxval; 131 132 char positions[_POSIX2_LINE_MAX + 1]; 133 134 void 135 get_list(char *list) 136 { 137 int setautostart, start, stop; 138 char *pos; 139 char *p; 140 141 /* 142 * set a byte in the positions array to indicate if a field or 143 * column is to be selected; use +1, it's 1-based, not 0-based. 144 * This parser is less restrictive than the Draft 9 POSIX spec. 145 * POSIX doesn't allow lists that aren't in increasing order or 146 * overlapping lists. We also handle "-3-5" although there's no 147 * real reason too. 148 */ 149 for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { 150 setautostart = start = stop = 0; 151 if (*p == '-') { 152 ++p; 153 setautostart = 1; 154 } 155 if (isdigit((unsigned char)*p)) { 156 start = stop = strtol(p, &p, 10); 157 if (setautostart && start > autostart) 158 autostart = start; 159 } 160 if (*p == '-') { 161 if (isdigit((unsigned char)p[1])) 162 stop = strtol(p + 1, &p, 10); 163 if (*p == '-') { 164 ++p; 165 if (!autostop || autostop > stop) 166 autostop = stop; 167 } 168 } 169 if (*p) 170 errx(1, "[-cf] list: illegal list value"); 171 if (!stop || !start) 172 errx(1, "[-cf] list: values may not include zero"); 173 if (stop > _POSIX2_LINE_MAX) 174 errx(1, "[-cf] list: %d too large (max %d)", 175 stop, _POSIX2_LINE_MAX); 176 if (maxval < stop) 177 maxval = stop; 178 for (pos = positions + start; start++ <= stop; *pos++ = 1); 179 } 180 181 /* overlapping ranges */ 182 if (autostop && maxval > autostop) 183 maxval = autostop; 184 185 /* set autostart */ 186 if (autostart) 187 memset(positions + 1, '1', autostart); 188 } 189 190 /* ARGSUSED */ 191 void 192 c_cut(FILE *fp, const char *fname) 193 { 194 int ch, col; 195 char *pos; 196 197 ch = 0; 198 for (;;) { 199 pos = positions + 1; 200 for (col = maxval; col; --col) { 201 if ((ch = getc(fp)) == EOF) 202 return; 203 if (ch == '\n') 204 break; 205 if (*pos++) 206 (void)putchar(ch); 207 } 208 if (ch != '\n') { 209 if (autostop) 210 while ((ch = getc(fp)) != EOF && ch != '\n') 211 (void)putchar(ch); 212 else 213 while ((ch = getc(fp)) != EOF && ch != '\n'); 214 } 215 (void)putchar('\n'); 216 } 217 } 218 219 void 220 f_cut(FILE *fp, const char *fname) 221 { 222 int ch, field, isdelim; 223 char *pos, *p, sep; 224 int output; 225 size_t len; 226 char *lbuf, *tbuf; 227 228 for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len));) { 229 output = 0; 230 if (lbuf[len - 1] != '\n') { 231 /* no newline at the end of the last line so add one */ 232 if ((tbuf = (char *)malloc(len + 1)) == NULL) 233 err(1, NULL); 234 memcpy(tbuf, lbuf, len); 235 tbuf[len++] = '\n'; 236 lbuf = tbuf; 237 } 238 for (isdelim = 0, p = lbuf;; ++p) { 239 ch = *p; 240 /* this should work if newline is delimiter */ 241 if (ch == sep) 242 isdelim = 1; 243 if (ch == '\n') { 244 if (!isdelim && !sflag) 245 (void)fwrite(lbuf, len, 1, stdout); 246 break; 247 } 248 } 249 if (!isdelim) 250 continue; 251 252 pos = positions + 1; 253 for (field = maxval, p = lbuf; field; --field, ++pos) { 254 if (*pos) { 255 if (output++) 256 (void)putchar(sep); 257 while ((ch = *p++) != '\n' && ch != sep) 258 (void)putchar(ch); 259 } else { 260 while ((ch = *p++) != '\n' && ch != sep) 261 continue; 262 } 263 if (ch == '\n') 264 break; 265 } 266 if (ch != '\n') { 267 if (autostop) { 268 if (output) 269 (void)putchar(sep); 270 for (; (ch = *p) != '\n'; ++p) 271 (void)putchar(ch); 272 } else 273 for (; (ch = *p) != '\n'; ++p); 274 } 275 (void)putchar('\n'); 276 if (tbuf) { 277 free(tbuf); 278 tbuf = NULL; 279 } 280 } 281 if (tbuf) 282 free(tbuf); 283 } 284 285 void 286 usage(void) 287 { 288 (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n" 289 "\tcut -c list [file1 ...]\n" 290 "\tcut -f list [-d delim] [-s] [file ...]\n"); 291 exit(1); 292 } 293