xref: /openbsd-src/usr.bin/cut/cut.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: cut.c,v 1.23 2015/12/02 00:56:46 schwarze Exp $	*/
2 /*	$NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <assert.h>
37 #include <ctype.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 
47 char	dchar[5];
48 int	dlen;
49 
50 int	bflag;
51 int	cflag;
52 int	dflag;
53 int	fflag;
54 int	nflag;
55 int	sflag;
56 
57 void	b_cut(FILE *, char *);
58 void	c_cut(FILE *, char *);
59 void	f_cut(FILE *, char *);
60 void	get_list(char *);
61 void	usage(void);
62 
63 int
64 main(int argc, char *argv[])
65 {
66 	FILE *fp;
67 	void (*fcn)(FILE *, char *);
68 	int ch, rval;
69 
70 	setlocale(LC_CTYPE, "");
71 
72 	if (pledge("stdio rpath", NULL) == -1)
73 		err(1, "pledge");
74 
75 	dchar[0] = '\t';		/* default delimiter */
76 	dchar[1] = '\0';
77 	dlen = 1;
78 
79 	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
80 		switch(ch) {
81 		case 'b':
82 			get_list(optarg);
83 			bflag = 1;
84 			break;
85 		case 'c':
86 			get_list(optarg);
87 			cflag = 1;
88 			break;
89 		case 'd':
90 			if ((dlen = mblen(optarg, MB_CUR_MAX)) == -1)
91 				usage();
92 			assert(dlen < sizeof(dchar));
93 			(void)memcpy(dchar, optarg, dlen);
94 			dchar[dlen] = '\0';
95 			dflag = 1;
96 			break;
97 		case 'f':
98 			get_list(optarg);
99 			fflag = 1;
100 			break;
101 		case 'n':
102 			nflag = 1;
103 			break;
104 		case 's':
105 			sflag = 1;
106 			break;
107 		case '?':
108 		default:
109 			usage();
110 		}
111 	argc -= optind;
112 	argv += optind;
113 
114 	if (bflag + cflag + fflag != 1 ||
115 	    (nflag && !bflag) ||
116 	    ((dflag || sflag) && !fflag))
117 		usage();
118 
119 	if (MB_CUR_MAX == 1) {
120 		nflag = 0;
121 		if (cflag) {
122 			bflag = 1;
123 			cflag = 0;
124 		}
125 	}
126 
127 	fcn = fflag ? f_cut : (cflag || nflag) ? c_cut : b_cut;
128 
129 	rval = 0;
130 	if (*argv)
131 		for (; *argv; ++argv) {
132 			if (strcmp(*argv, "-") == 0)
133 				fcn(stdin, "stdin");
134 			else {
135 				if ((fp = fopen(*argv, "r"))) {
136 					fcn(fp, *argv);
137 					(void)fclose(fp);
138 				} else {
139 					rval = 1;
140 					warn("%s", *argv);
141 				}
142 			}
143 		}
144 	else {
145 		if (pledge("stdio rpath", NULL) == -1)
146 			err(1, "pledge");
147 
148 		fcn(stdin, "stdin");
149 	}
150 	exit(rval);
151 }
152 
153 int autostart, autostop, maxval;
154 
155 char positions[_POSIX2_LINE_MAX + 1];
156 
157 void
158 get_list(char *list)
159 {
160 	int setautostart, start, stop;
161 	char *pos;
162 	char *p;
163 
164 	/*
165 	 * set a byte in the positions array to indicate if a field or
166 	 * column is to be selected; use +1, it's 1-based, not 0-based.
167 	 * This parser is less restrictive than the Draft 9 POSIX spec.
168 	 * POSIX doesn't allow lists that aren't in increasing order or
169 	 * overlapping lists.  We also handle "-3-5" although there's no
170 	 * real reason too.
171 	 */
172 	while ((p = strsep(&list, ", \t"))) {
173 		setautostart = start = stop = 0;
174 		if (*p == '-') {
175 			++p;
176 			setautostart = 1;
177 		}
178 		if (isdigit((unsigned char)*p)) {
179 			start = stop = strtol(p, &p, 10);
180 			if (setautostart && start > autostart)
181 				autostart = start;
182 		}
183 		if (*p == '-') {
184 			if (isdigit((unsigned char)p[1]))
185 				stop = strtol(p + 1, &p, 10);
186 			if (*p == '-') {
187 				++p;
188 				if (!autostop || autostop > stop)
189 					autostop = stop;
190 			}
191 		}
192 		if (*p)
193 			errx(1, "[-bcf] list: illegal list value");
194 		if (!stop || !start)
195 			errx(1, "[-bcf] list: values may not include zero");
196 		if (stop > _POSIX2_LINE_MAX)
197 			errx(1, "[-bcf] list: %d too large (max %d)",
198 			    stop, _POSIX2_LINE_MAX);
199 		if (maxval < stop)
200 			maxval = stop;
201 		for (pos = positions + start; start++ <= stop; *pos++ = 1)
202 			;
203 	}
204 
205 	/* overlapping ranges */
206 	if (autostop && maxval > autostop)
207 		maxval = autostop;
208 
209 	/* set autostart */
210 	if (autostart)
211 		memset(positions + 1, '1', autostart);
212 }
213 
214 /* ARGSUSED */
215 void
216 b_cut(FILE *fp, char *fname)
217 {
218 	int ch, col;
219 	char *pos;
220 
221 	for (;;) {
222 		pos = positions + 1;
223 		for (col = maxval; col; --col) {
224 			if ((ch = getc(fp)) == EOF)
225 				return;
226 			if (ch == '\n')
227 				break;
228 			if (*pos++)
229 				(void)putchar(ch);
230 		}
231 		if (ch != '\n') {
232 			if (autostop)
233 				while ((ch = getc(fp)) != EOF && ch != '\n')
234 					(void)putchar(ch);
235 			else
236 				while ((ch = getc(fp)) != EOF && ch != '\n')
237 					;
238 		}
239 		(void)putchar('\n');
240 	}
241 }
242 
243 void
244 c_cut(FILE *fp, char *fname)
245 {
246 	static char	*line = NULL;
247 	static size_t	 linesz = 0;
248 	ssize_t		 linelen;
249 	char		*cp, *pos, *maxpos;
250 	int		 len;
251 
252 	while ((linelen = getline(&line, &linesz, fp)) != -1) {
253 		if (line[linelen - 1] == '\n')
254 			line[linelen - 1] = '\0';
255 
256 		cp = line;
257 		pos = positions + 1;
258 		maxpos = pos + maxval;
259 		while(pos < maxpos && *cp != '\0') {
260 			len = mblen(cp, MB_CUR_MAX);
261 			if (len == -1)
262 				len = 1;
263 			pos += nflag ? len : 1;
264 			if (pos[-1] == '\0')
265 				cp += len;
266 			else
267 				while (len--)
268 					putchar(*cp++);
269 		}
270 		if (autostop)
271 			puts(cp);
272 		else
273 			putchar('\n');
274 	}
275 }
276 
277 void
278 f_cut(FILE *fp, char *fname)
279 {
280 	static char	*line = NULL;
281 	static size_t	 linesz = 0;
282 	ssize_t		 linelen;
283 	char		*sp, *ep, *pos, *maxpos;
284 	int		 output;
285 
286 	while ((linelen = getline(&line, &linesz, fp)) != -1) {
287 		if (line[linelen - 1] == '\n')
288 			line[linelen - 1] = '\0';
289 
290 		if ((ep = strstr(line, dchar)) == NULL) {
291 			if (!sflag)
292 				puts(line);
293 			continue;
294 		}
295 
296 		pos = positions + 1;
297 		maxpos = pos + maxval;
298 		output = 0;
299 		sp = line;
300 		for (;;) {
301 			if (*pos++) {
302 				if (output)
303 					fputs(dchar, stdout);
304 				while (sp < ep)
305 					putchar(*sp++);
306 				output = 1;
307 			} else
308 				sp = ep;
309 			if (*sp == '\0' || pos == maxpos)
310 				break;
311 			sp += dlen;
312 			if ((ep = strstr(sp, dchar)) == NULL)
313 				ep = strchr(sp, '\0');
314 		}
315 		if (autostop)
316 			puts(sp);
317 		else
318 			putchar('\n');
319 	}
320 }
321 
322 void
323 usage(void)
324 {
325 	(void)fprintf(stderr,
326 	    "usage: cut -b list [-n] [file ...]\n"
327 	    "       cut -c list [file ...]\n"
328 	    "       cut -f list [-s] [-d delim] [file ...]\n");
329 	exit(1);
330 }
331