1*97cd0ff4Swiz /* $NetBSD: cut.c,v 1.29 2014/02/03 20:22:19 wiz Exp $ */
2843a1dd6Sglass
361f28255Scgd /*
4843a1dd6Sglass * Copyright (c) 1989, 1993
5843a1dd6Sglass * The Regents of the University of California. All rights reserved.
661f28255Scgd *
761f28255Scgd * This code is derived from software contributed to Berkeley by
861f28255Scgd * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
961f28255Scgd *
1061f28255Scgd * Redistribution and use in source and binary forms, with or without
1161f28255Scgd * modification, are permitted provided that the following conditions
1261f28255Scgd * are met:
1361f28255Scgd * 1. Redistributions of source code must retain the above copyright
1461f28255Scgd * notice, this list of conditions and the following disclaimer.
1561f28255Scgd * 2. Redistributions in binary form must reproduce the above copyright
1661f28255Scgd * notice, this list of conditions and the following disclaimer in the
1761f28255Scgd * documentation and/or other materials provided with the distribution.
1889aaa1bbSagc * 3. Neither the name of the University nor the names of its contributors
1961f28255Scgd * may be used to endorse or promote products derived from this software
2061f28255Scgd * without specific prior written permission.
2161f28255Scgd *
2261f28255Scgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2361f28255Scgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2461f28255Scgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2561f28255Scgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2661f28255Scgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2761f28255Scgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2861f28255Scgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2961f28255Scgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3061f28255Scgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3161f28255Scgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3261f28255Scgd * SUCH DAMAGE.
3361f28255Scgd */
3461f28255Scgd
35263324f1Slukem #include <sys/cdefs.h>
3661f28255Scgd #ifndef lint
3798e5374cSlukem __COPYRIGHT("@(#) Copyright (c) 1989, 1993\
3898e5374cSlukem The Regents of the University of California. All rights reserved.");
3961f28255Scgd #endif /* not lint */
4061f28255Scgd
4161f28255Scgd #ifndef lint
42843a1dd6Sglass #if 0
4371bb6ddaSjtc static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95";
44843a1dd6Sglass #endif
45*97cd0ff4Swiz __RCSID("$NetBSD: cut.c,v 1.29 2014/02/03 20:22:19 wiz Exp $");
4661f28255Scgd #endif /* not lint */
4761f28255Scgd
48843a1dd6Sglass #include <ctype.h>
49843a1dd6Sglass #include <err.h>
50843a1dd6Sglass #include <errno.h>
51843a1dd6Sglass #include <limits.h>
52843a1dd6Sglass #include <locale.h>
5361f28255Scgd #include <stdio.h>
54f7c6bf57Sjtc #include <stdlib.h>
55f7c6bf57Sjtc #include <string.h>
5671bb6ddaSjtc #include <unistd.h>
57b3af16c2Schristos #include <util.h>
5894c137d5Shubertf #include <wchar.h>
59b3af16c2Schristos #include <sys/param.h>
6061f28255Scgd
61b3af16c2Schristos static int bflag;
62b3af16c2Schristos static int cflag;
63b3af16c2Schristos static char dchar;
64b3af16c2Schristos static int dflag;
65b3af16c2Schristos static int fflag;
66b3af16c2Schristos static int sflag;
6761f28255Scgd
68b3af16c2Schristos static void b_cut(FILE *, const char *);
69b3af16c2Schristos static void c_cut(FILE *, const char *);
70b3af16c2Schristos static void f_cut(FILE *, const char *);
71b3af16c2Schristos static void get_list(char *);
728b0f9554Sperry static void usage(void) __dead;
73843a1dd6Sglass
74f7c6bf57Sjtc int
main(int argc,char * argv[])756dca62f2Sxtraeme main(int argc, char *argv[])
7661f28255Scgd {
7761f28255Scgd FILE *fp;
786dca62f2Sxtraeme void (*fcn)(FILE *, const char *);
79*97cd0ff4Swiz int ch, rval;
80105b403aSjtc
81263324f1Slukem fcn = NULL;
82b3af16c2Schristos (void)setlocale(LC_ALL, "");
8361f28255Scgd
8461f28255Scgd dchar = '\t'; /* default delimiter is \t */
8561f28255Scgd
862ea901d3Sjtc /* Since we don't support multi-byte characters, the -c and -b
872ea901d3Sjtc options are equivalent, and the -n option is meaningless. */
88954a0f16Smrg while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
8961f28255Scgd switch(ch) {
902ea901d3Sjtc case 'b':
9194c137d5Shubertf fcn = b_cut;
9294c137d5Shubertf get_list(optarg);
9394c137d5Shubertf bflag = 1;
9494c137d5Shubertf break;
9561f28255Scgd case 'c':
9661f28255Scgd fcn = c_cut;
9761f28255Scgd get_list(optarg);
9861f28255Scgd cflag = 1;
9961f28255Scgd break;
10061f28255Scgd case 'd':
10161f28255Scgd dchar = *optarg;
10261f28255Scgd dflag = 1;
10361f28255Scgd break;
10461f28255Scgd case 'f':
10561f28255Scgd get_list(optarg);
10661f28255Scgd fcn = f_cut;
10761f28255Scgd fflag = 1;
10861f28255Scgd break;
10961f28255Scgd case 's':
11061f28255Scgd sflag = 1;
11161f28255Scgd break;
1122ea901d3Sjtc case 'n':
1132ea901d3Sjtc break;
11461f28255Scgd case '?':
11561f28255Scgd default:
11661f28255Scgd usage();
11761f28255Scgd }
11861f28255Scgd argc -= optind;
11961f28255Scgd argv += optind;
12061f28255Scgd
12161f28255Scgd if (fflag) {
12294c137d5Shubertf if (cflag || bflag)
12361f28255Scgd usage();
12494c137d5Shubertf } else if ((!cflag && !bflag) || dflag || sflag)
12594c137d5Shubertf usage();
12694c137d5Shubertf else if (bflag && cflag)
12761f28255Scgd usage();
12861f28255Scgd
129*97cd0ff4Swiz rval = 0;
13061f28255Scgd if (*argv)
13161f28255Scgd for (; *argv; ++argv) {
132a8488a76Sjnemeth if (strcmp(*argv, "-") == 0)
133a8488a76Sjnemeth fcn(stdin, "stdin");
134a8488a76Sjnemeth else {
135*97cd0ff4Swiz if ((fp = fopen(*argv, "r"))) {
13661f28255Scgd fcn(fp, *argv);
137843a1dd6Sglass (void)fclose(fp);
138*97cd0ff4Swiz } else {
139*97cd0ff4Swiz rval = 1;
140*97cd0ff4Swiz warn("%s", *argv);
141*97cd0ff4Swiz }
14261f28255Scgd }
143a8488a76Sjnemeth }
14461f28255Scgd else
14561f28255Scgd fcn(stdin, "stdin");
146*97cd0ff4Swiz return(rval);
14761f28255Scgd }
14861f28255Scgd
149b3af16c2Schristos static size_t autostart, autostop, maxval;
15061f28255Scgd
151b3af16c2Schristos static char *positions = NULL;
152b3af16c2Schristos static size_t numpositions = 0;
153b3af16c2Schristos #define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */
15461f28255Scgd
155b3af16c2Schristos static void
get_list(char * list)1566dca62f2Sxtraeme get_list(char *list)
15761f28255Scgd {
158b3af16c2Schristos size_t setautostart, start, stop;
159263324f1Slukem char *pos;
160105b403aSjtc char *p;
16161f28255Scgd
162b3af16c2Schristos if (positions == NULL) {
163b3af16c2Schristos numpositions = ALLOC_CHUNK;
164b3af16c2Schristos positions = ecalloc(numpositions, sizeof(*positions));
165b3af16c2Schristos }
166b3af16c2Schristos
16761f28255Scgd /*
16861f28255Scgd * set a byte in the positions array to indicate if a field or
16961f28255Scgd * column is to be selected; use +1, it's 1-based, not 0-based.
17061f28255Scgd * This parser is less restrictive than the Draft 9 POSIX spec.
17161f28255Scgd * POSIX doesn't allow lists that aren't in increasing order or
17261f28255Scgd * overlapping lists. We also handle "-3-5" although there's no
17359a4ab1aSwiz * real reason to.
17461f28255Scgd */
175263324f1Slukem for (; (p = strtok(list, ", \t")) != NULL; list = NULL) {
17661f28255Scgd setautostart = start = stop = 0;
17761f28255Scgd if (*p == '-') {
17861f28255Scgd ++p;
17961f28255Scgd setautostart = 1;
18061f28255Scgd }
181c3203395Schristos if (isdigit((unsigned char)*p)) {
18261f28255Scgd start = stop = strtol(p, &p, 10);
18361f28255Scgd if (setautostart && start > autostart)
18461f28255Scgd autostart = start;
18561f28255Scgd }
18661f28255Scgd if (*p == '-') {
187c3203395Schristos if (isdigit((unsigned char)p[1]))
18861f28255Scgd stop = strtol(p + 1, &p, 10);
18961f28255Scgd if (*p == '-') {
19061f28255Scgd ++p;
19161f28255Scgd if (!autostop || autostop > stop)
19261f28255Scgd autostop = stop;
19361f28255Scgd }
19461f28255Scgd }
19561f28255Scgd if (*p)
19659a4ab1aSwiz errx(1, "[-bcf] list: illegal list value");
19761f28255Scgd if (!stop || !start)
19859a4ab1aSwiz errx(1, "[-bcf] list: values may not include zero");
199b3af16c2Schristos if (stop + 1 > numpositions) {
200b3af16c2Schristos size_t newsize;
201b3af16c2Schristos newsize = roundup(stop + 1, ALLOC_CHUNK);
202b3af16c2Schristos positions = erealloc(positions, newsize);
203b3af16c2Schristos (void)memset(positions + numpositions, 0,
204b3af16c2Schristos newsize - numpositions);
205b3af16c2Schristos numpositions = newsize;
206b3af16c2Schristos }
20761f28255Scgd if (maxval < stop)
20861f28255Scgd maxval = stop;
209b3af16c2Schristos for (pos = positions + start; start++ <= stop; pos++)
210b3af16c2Schristos *pos = 1;
21161f28255Scgd }
21261f28255Scgd
21361f28255Scgd /* overlapping ranges */
21461f28255Scgd if (autostop && maxval > autostop)
21561f28255Scgd maxval = autostop;
21661f28255Scgd
21761f28255Scgd /* set autostart */
21861f28255Scgd if (autostart)
219b3af16c2Schristos (void)memset(positions + 1, '1', autostart);
22061f28255Scgd }
22161f28255Scgd
222b3af16c2Schristos static void
223b3af16c2Schristos /*ARGSUSED*/
f_cut(FILE * fp,const char * fname __unused)224b3af16c2Schristos f_cut(FILE *fp, const char *fname __unused)
22561f28255Scgd {
226263324f1Slukem int ch, field, isdelim;
227263324f1Slukem char *pos, *p, sep;
22861f28255Scgd int output;
229656cf52aSyamt size_t len;
230656cf52aSyamt char *lbuf, *tbuf;
23161f28255Scgd
232b3af16c2Schristos for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) {
23375cf3d05Smycroft output = 0;
234656cf52aSyamt if (lbuf[len - 1] != '\n') {
235656cf52aSyamt /* no newline at the end of the last line so add one */
236656cf52aSyamt if ((tbuf = (char *)malloc(len + 1)) == NULL)
237656cf52aSyamt err(1, NULL);
238b3af16c2Schristos (void)memcpy(tbuf, lbuf, len);
2394c18b91eSdsl tbuf[len++] = '\n';
240656cf52aSyamt lbuf = tbuf;
241656cf52aSyamt }
24261f28255Scgd for (isdelim = 0, p = lbuf;; ++p) {
243656cf52aSyamt ch = *p;
24461f28255Scgd /* this should work if newline is delimiter */
24561f28255Scgd if (ch == sep)
24661f28255Scgd isdelim = 1;
24761f28255Scgd if (ch == '\n') {
24861f28255Scgd if (!isdelim && !sflag)
249656cf52aSyamt (void)fwrite(lbuf, len, 1, stdout);
25061f28255Scgd break;
25161f28255Scgd }
25261f28255Scgd }
25361f28255Scgd if (!isdelim)
25461f28255Scgd continue;
25561f28255Scgd
25661f28255Scgd pos = positions + 1;
25761f28255Scgd for (field = maxval, p = lbuf; field; --field, ++pos) {
25861f28255Scgd if (*pos) {
25961f28255Scgd if (output++)
260843a1dd6Sglass (void)putchar(sep);
26161f28255Scgd while ((ch = *p++) != '\n' && ch != sep)
262843a1dd6Sglass (void)putchar(ch);
263f670fa10Sross } else {
264f670fa10Sross while ((ch = *p++) != '\n' && ch != sep)
265f670fa10Sross continue;
266f670fa10Sross }
26761f28255Scgd if (ch == '\n')
26861f28255Scgd break;
26961f28255Scgd }
270f670fa10Sross if (ch != '\n') {
27161f28255Scgd if (autostop) {
27261f28255Scgd if (output)
273843a1dd6Sglass (void)putchar(sep);
27461f28255Scgd for (; (ch = *p) != '\n'; ++p)
275843a1dd6Sglass (void)putchar(ch);
27661f28255Scgd } else
27761f28255Scgd for (; (ch = *p) != '\n'; ++p);
278f670fa10Sross }
279843a1dd6Sglass (void)putchar('\n');
2804c18b91eSdsl if (tbuf) {
281656cf52aSyamt free(tbuf);
2824c18b91eSdsl tbuf = NULL;
2834c18b91eSdsl }
2844c18b91eSdsl }
28563b0d1abSchristos if (tbuf)
28663b0d1abSchristos free(tbuf);
28761f28255Scgd }
28861f28255Scgd
289b3af16c2Schristos static void
usage(void)2906dca62f2Sxtraeme usage(void)
29161f28255Scgd {
2929c98053fSwiz (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n"
2939c98053fSwiz "\tcut -c list [file ...]\n"
2942414f920Swiz "\tcut -f list [-d string] [-s] [file ...]\n");
29561f28255Scgd exit(1);
29661f28255Scgd }
29794c137d5Shubertf
29894c137d5Shubertf /* make b_put(): */
29994c137d5Shubertf #define CUT_BYTE 1
30094c137d5Shubertf #include "x_cut.c"
30194c137d5Shubertf #undef CUT_BYTE
30294c137d5Shubertf
30394c137d5Shubertf /* make c_put(): */
30494c137d5Shubertf #define CUT_BYTE 0
30594c137d5Shubertf #include "x_cut.c"
30694c137d5Shubertf #undef CUT_BYTE
307