1 /* $OpenBSD: cut.c,v 1.27 2022/12/04 23:50:47 cheloha Exp $ */ 2 /* $NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <assert.h> 37 #include <ctype.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <limits.h> 41 #include <locale.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 47 char dchar[5]; 48 int dlen; 49 50 int bflag; 51 int cflag; 52 int dflag; 53 int fflag; 54 int nflag; 55 int sflag; 56 57 void b_cut(FILE *, char *); 58 void c_cut(FILE *, char *); 59 void f_cut(FILE *, char *); 60 void get_list(char *); 61 void usage(void); 62 63 int 64 main(int argc, char *argv[]) 65 { 66 FILE *fp; 67 void (*fcn)(FILE *, char *); 68 int ch, rval; 69 70 setlocale(LC_CTYPE, ""); 71 72 if (pledge("stdio rpath", NULL) == -1) 73 err(1, "pledge"); 74 75 dchar[0] = '\t'; /* default delimiter */ 76 dchar[1] = '\0'; 77 dlen = 1; 78 79 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 80 switch(ch) { 81 case 'b': 82 get_list(optarg); 83 bflag = 1; 84 break; 85 case 'c': 86 get_list(optarg); 87 cflag = 1; 88 break; 89 case 'd': 90 if ((dlen = mblen(optarg, MB_CUR_MAX)) == -1) 91 usage(); 92 assert(dlen < sizeof(dchar)); 93 (void)memcpy(dchar, optarg, dlen); 94 dchar[dlen] = '\0'; 95 dflag = 1; 96 break; 97 case 'f': 98 get_list(optarg); 99 fflag = 1; 100 break; 101 case 'n': 102 nflag = 1; 103 break; 104 case 's': 105 sflag = 1; 106 break; 107 default: 108 usage(); 109 } 110 argc -= optind; 111 argv += optind; 112 113 if (bflag + cflag + fflag != 1 || 114 (nflag && !bflag) || 115 ((dflag || sflag) && !fflag)) 116 usage(); 117 118 if (MB_CUR_MAX == 1) { 119 nflag = 0; 120 if (cflag) { 121 bflag = 1; 122 cflag = 0; 123 } 124 } 125 126 fcn = fflag ? f_cut : (cflag || nflag) ? c_cut : b_cut; 127 128 rval = 0; 129 if (*argv) 130 for (; *argv; ++argv) { 131 if (strcmp(*argv, "-") == 0) 132 fcn(stdin, "stdin"); 133 else { 134 if ((fp = fopen(*argv, "r"))) { 135 fcn(fp, *argv); 136 (void)fclose(fp); 137 } else { 138 rval = 1; 139 warn("%s", *argv); 140 } 141 } 142 } 143 else { 144 if (pledge("stdio", NULL) == -1) 145 err(1, "pledge"); 146 147 fcn(stdin, "stdin"); 148 } 149 exit(rval); 150 } 151 152 int autostart, autostop, maxval; 153 154 char positions[_POSIX2_LINE_MAX + 1]; 155 156 int 157 read_number(char **p) 158 { 159 int dash, n; 160 const char *errstr; 161 char *q; 162 163 q = *p + strcspn(*p, "-"); 164 dash = *q == '-'; 165 *q = '\0'; 166 n = strtonum(*p, 1, _POSIX2_LINE_MAX, &errstr); 167 if (errstr != NULL) 168 errx(1, "[-bcf] list: %s %s (allowed 1-%d)", *p, errstr, 169 _POSIX2_LINE_MAX); 170 if (dash) 171 *q = '-'; 172 *p = q; 173 174 return n; 175 } 176 177 void 178 get_list(char *list) 179 { 180 int setautostart, start, stop; 181 char *p; 182 183 /* 184 * set a byte in the positions array to indicate if a field or 185 * column is to be selected; use +1, it's 1-based, not 0-based. 186 * This parser is less restrictive than the Draft 9 POSIX spec. 187 * POSIX doesn't allow lists that aren't in increasing order or 188 * overlapping lists. We also handle "-3-5" although there's no 189 * real reason too. 190 */ 191 while ((p = strsep(&list, ", \t"))) { 192 setautostart = start = stop = 0; 193 if (*p == '-') { 194 ++p; 195 setautostart = 1; 196 } 197 if (isdigit((unsigned char)*p)) { 198 start = stop = read_number(&p); 199 if (setautostart && start > autostart) 200 autostart = start; 201 } 202 if (*p == '-') { 203 if (isdigit((unsigned char)p[1])) { 204 ++p; 205 stop = read_number(&p); 206 } 207 if (*p == '-') { 208 ++p; 209 if (!autostop || autostop > stop) 210 autostop = stop; 211 } 212 } 213 if (*p != '\0' || !stop || !start) 214 errx(1, "[-bcf] list: illegal list value"); 215 if (maxval < stop) 216 maxval = stop; 217 if (start <= stop) 218 memset(positions + start, 1, stop - start + 1); 219 } 220 221 /* overlapping ranges */ 222 if (autostop && maxval > autostop) 223 maxval = autostop; 224 225 /* set autostart */ 226 if (autostart) 227 memset(positions + 1, '1', autostart); 228 } 229 230 /* ARGSUSED */ 231 void 232 b_cut(FILE *fp, char *fname) 233 { 234 int ch, col; 235 char *pos; 236 237 for (;;) { 238 pos = positions + 1; 239 for (col = maxval; col; --col) { 240 if ((ch = getc(fp)) == EOF) 241 return; 242 if (ch == '\n') 243 break; 244 if (*pos++) 245 (void)putchar(ch); 246 } 247 if (ch != '\n') { 248 if (autostop) 249 while ((ch = getc(fp)) != EOF && ch != '\n') 250 (void)putchar(ch); 251 else 252 while ((ch = getc(fp)) != EOF && ch != '\n') 253 ; 254 } 255 (void)putchar('\n'); 256 } 257 } 258 259 void 260 c_cut(FILE *fp, char *fname) 261 { 262 static char *line = NULL; 263 static size_t linesz = 0; 264 ssize_t linelen; 265 char *cp, *pos, *maxpos; 266 int len; 267 268 while ((linelen = getline(&line, &linesz, fp)) != -1) { 269 if (line[linelen - 1] == '\n') 270 line[linelen - 1] = '\0'; 271 272 cp = line; 273 pos = positions + 1; 274 maxpos = pos + maxval; 275 while(pos < maxpos && *cp != '\0') { 276 len = mblen(cp, MB_CUR_MAX); 277 if (len == -1) 278 len = 1; 279 pos += nflag ? len : 1; 280 if (pos[-1] == '\0') 281 cp += len; 282 else 283 while (len--) 284 putchar(*cp++); 285 } 286 if (autostop) 287 puts(cp); 288 else 289 putchar('\n'); 290 } 291 } 292 293 void 294 f_cut(FILE *fp, char *fname) 295 { 296 static char *line = NULL; 297 static size_t linesz = 0; 298 ssize_t linelen; 299 char *sp, *ep, *pos, *maxpos; 300 int output; 301 302 while ((linelen = getline(&line, &linesz, fp)) != -1) { 303 if (line[linelen - 1] == '\n') 304 line[linelen - 1] = '\0'; 305 306 if ((ep = strstr(line, dchar)) == NULL) { 307 if (!sflag) 308 puts(line); 309 continue; 310 } 311 312 pos = positions + 1; 313 maxpos = pos + maxval; 314 output = 0; 315 sp = line; 316 for (;;) { 317 if (*pos++) { 318 if (output) 319 fputs(dchar, stdout); 320 while (sp < ep) 321 putchar(*sp++); 322 output = 1; 323 } else 324 sp = ep; 325 if (*sp == '\0' || pos == maxpos) 326 break; 327 sp += dlen; 328 if ((ep = strstr(sp, dchar)) == NULL) 329 ep = strchr(sp, '\0'); 330 } 331 if (autostop) 332 puts(sp); 333 else 334 putchar('\n'); 335 } 336 } 337 338 void 339 usage(void) 340 { 341 (void)fprintf(stderr, 342 "usage: cut -b list [-n] [file ...]\n" 343 " cut -c list [file ...]\n" 344 " cut -f list [-s] [-d delim] [file ...]\n"); 345 exit(1); 346 } 347