1 /* $OpenBSD: cut.c,v 1.18 2014/02/02 11:44:01 sobrado Exp $ */ 2 /* $NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <errno.h> 39 #include <limits.h> 40 #include <locale.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <unistd.h> 45 46 int cflag; 47 char dchar; 48 int dflag; 49 int fflag; 50 int sflag; 51 52 void c_cut(FILE *, char *); 53 void f_cut(FILE *, char *); 54 void get_list(char *); 55 void usage(void); 56 57 int 58 main(int argc, char *argv[]) 59 { 60 FILE *fp; 61 void (*fcn)(FILE *, char *); 62 int ch, rval; 63 64 setlocale (LC_ALL, ""); 65 66 dchar = '\t'; /* default delimiter is \t */ 67 68 /* Since we don't support multi-byte characters, the -c and -b 69 options are equivalent, and the -n option is meaningless. */ 70 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 71 switch(ch) { 72 case 'b': 73 case 'c': 74 fcn = c_cut; 75 get_list(optarg); 76 cflag = 1; 77 break; 78 case 'd': 79 dchar = *optarg; 80 dflag = 1; 81 break; 82 case 'f': 83 get_list(optarg); 84 fcn = f_cut; 85 fflag = 1; 86 break; 87 case 's': 88 sflag = 1; 89 break; 90 case 'n': 91 break; 92 case '?': 93 default: 94 usage(); 95 } 96 argc -= optind; 97 argv += optind; 98 99 if (fflag) { 100 if (cflag) 101 usage(); 102 } else if (!cflag || dflag || sflag) 103 usage(); 104 105 rval = 0; 106 if (*argv) 107 for (; *argv; ++argv) { 108 if (strcmp(*argv, "-") == 0) 109 fcn(stdin, "stdin"); 110 else { 111 if ((fp = fopen(*argv, "r"))) { 112 fcn(fp, *argv); 113 (void)fclose(fp); 114 } else { 115 rval = 1; 116 warn("%s", *argv); 117 } 118 } 119 } 120 else 121 fcn(stdin, "stdin"); 122 exit(rval); 123 } 124 125 int autostart, autostop, maxval; 126 127 char positions[_POSIX2_LINE_MAX + 1]; 128 129 void 130 get_list(char *list) 131 { 132 int setautostart, start, stop; 133 char *pos; 134 char *p; 135 136 /* 137 * set a byte in the positions array to indicate if a field or 138 * column is to be selected; use +1, it's 1-based, not 0-based. 139 * This parser is less restrictive than the Draft 9 POSIX spec. 140 * POSIX doesn't allow lists that aren't in increasing order or 141 * overlapping lists. We also handle "-3-5" although there's no 142 * real reason too. 143 */ 144 while ((p = strsep(&list, ", \t"))) { 145 setautostart = start = stop = 0; 146 if (*p == '-') { 147 ++p; 148 setautostart = 1; 149 } 150 if (isdigit((unsigned char)*p)) { 151 start = stop = strtol(p, &p, 10); 152 if (setautostart && start > autostart) 153 autostart = start; 154 } 155 if (*p == '-') { 156 if (isdigit((unsigned char)p[1])) 157 stop = strtol(p + 1, &p, 10); 158 if (*p == '-') { 159 ++p; 160 if (!autostop || autostop > stop) 161 autostop = stop; 162 } 163 } 164 if (*p) 165 errx(1, "[-bcf] list: illegal list value"); 166 if (!stop || !start) 167 errx(1, "[-bcf] list: values may not include zero"); 168 if (stop > _POSIX2_LINE_MAX) 169 errx(1, "[-bcf] list: %d too large (max %d)", 170 stop, _POSIX2_LINE_MAX); 171 if (maxval < stop) 172 maxval = stop; 173 for (pos = positions + start; start++ <= stop; *pos++ = 1) 174 ; 175 } 176 177 /* overlapping ranges */ 178 if (autostop && maxval > autostop) 179 maxval = autostop; 180 181 /* set autostart */ 182 if (autostart) 183 memset(positions + 1, '1', autostart); 184 } 185 186 /* ARGSUSED */ 187 void 188 c_cut(FILE *fp, char *fname) 189 { 190 int ch, col; 191 char *pos; 192 193 for (;;) { 194 pos = positions + 1; 195 for (col = maxval; col; --col) { 196 if ((ch = getc(fp)) == EOF) 197 return; 198 if (ch == '\n') 199 break; 200 if (*pos++) 201 (void)putchar(ch); 202 } 203 if (ch != '\n') { 204 if (autostop) 205 while ((ch = getc(fp)) != EOF && ch != '\n') 206 (void)putchar(ch); 207 else 208 while ((ch = getc(fp)) != EOF && ch != '\n') 209 ; 210 } 211 (void)putchar('\n'); 212 } 213 } 214 215 void 216 f_cut(FILE *fp, char *fname) 217 { 218 int ch, field, isdelim; 219 char *pos, *p, sep; 220 int output; 221 size_t len; 222 char *lbuf, *tbuf; 223 224 for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len));) { 225 output = 0; 226 if (lbuf[len - 1] != '\n') { 227 /* no newline at the end of the last line so add one */ 228 if ((tbuf = (char *)malloc(len + 1)) == NULL) 229 err(1, NULL); 230 memcpy(tbuf, lbuf, len); 231 tbuf[len] = '\n'; 232 lbuf = tbuf; 233 } 234 for (isdelim = 0, p = lbuf;; ++p) { 235 ch = *p; 236 /* this should work if newline is delimiter */ 237 if (ch == sep) 238 isdelim = 1; 239 if (ch == '\n') { 240 if (!isdelim && !sflag) 241 (void)fwrite(lbuf, len, 1, stdout); 242 break; 243 } 244 } 245 if (!isdelim) 246 continue; 247 248 pos = positions + 1; 249 for (field = maxval, p = lbuf; field; --field, ++pos) { 250 if (*pos) { 251 if (output++) 252 (void)putchar(sep); 253 while ((ch = *p++) != '\n' && ch != sep) 254 (void)putchar(ch); 255 } else 256 while ((ch = *p++) != '\n' && ch != sep) 257 ; 258 if (ch == '\n') 259 break; 260 } 261 if (ch != '\n') { 262 if (autostop) { 263 if (output) 264 (void)putchar(sep); 265 for (; (ch = *p) != '\n'; ++p) 266 (void)putchar(ch); 267 } else 268 for (; (ch = *p) != '\n'; ++p) 269 ; 270 } 271 (void)putchar('\n'); 272 } 273 if (tbuf) 274 free(tbuf); 275 } 276 277 void 278 usage(void) 279 { 280 (void)fprintf(stderr, 281 "usage: cut -b list [-n] [file ...]\n" 282 " cut -c list [file ...]\n" 283 " cut -f list [-s] [-d delim] [file ...]\n"); 284 exit(1); 285 } 286