xref: /openbsd-src/usr.bin/split/split.c (revision daf88648c0e349d5c02e1504293082072c981640)
1 /*	$OpenBSD: split.c,v 1.13 2006/08/10 22:44:17 millert Exp $	*/
2 /*	$NetBSD: split.c,v 1.5 1995/08/31 22:22:05 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1987, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifndef lint
34 static char copyright[] =
35 "@(#) Copyright (c) 1987, 1993, 1994\n\
36 	The Regents of the University of California.  All rights reserved.\n";
37 #endif /* not lint */
38 
39 #ifndef lint
40 #if 0
41 static char sccsid[] = "@(#)split.c	8.3 (Berkeley) 4/25/94";
42 #else
43 static char rcsid[] = "$OpenBSD: split.c,v 1.13 2006/08/10 22:44:17 millert Exp $";
44 #endif
45 #endif /* not lint */
46 
47 #include <sys/param.h>
48 #include <sys/types.h>
49 
50 #include <ctype.h>
51 #include <err.h>
52 #include <fcntl.h>
53 #include <limits.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58 #include <regex.h>
59 #include <sysexits.h>
60 
61 #define DEFLINE	1000			/* Default num lines per file. */
62 
63 ssize_t	 bytecnt;			/* Byte count to split on. */
64 long	 numlines;			/* Line count to split on. */
65 int	 file_open;			/* If a file open. */
66 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
67 char	 bfr[MAXBSIZE];			/* I/O buffer. */
68 char	 fname[MAXPATHLEN];		/* File name prefix. */
69 regex_t	 rgx;
70 int	 pflag;
71 int	 sufflen = 2;			/* File name suffix length. */
72 
73 void newfile(void);
74 void split1(void);
75 void split2(void);
76 __dead void usage(void);
77 
78 int
79 main(int argc, char *argv[])
80 {
81 	int ch, scale;
82 	char *ep, *p;
83 	const char *errstr;
84 
85 	while ((ch = getopt(argc, argv, "0123456789a:b:l:p:-")) != -1)
86 		switch (ch) {
87 		case '0': case '1': case '2': case '3': case '4':
88 		case '5': case '6': case '7': case '8': case '9':
89 			/*
90 			 * Undocumented kludge: split was originally designed
91 			 * to take a number after a dash.
92 			 */
93 			if (numlines == 0) {
94 				p = argv[optind - 1];
95 				if (p[0] == '-' && p[1] == ch && !p[2])
96 					numlines = strtol(++p, &ep, 10);
97 				else
98 					numlines =
99 					    strtol(argv[optind] + 1, &ep, 10);
100 				if (numlines <= 0 || *ep)
101 					errx(EX_USAGE,
102 					    "%s: illegal line count", optarg);
103 			}
104 			break;
105 		case '-':		/* Undocumented: historic stdin flag. */
106 			if (ifd != -1)
107 				usage();
108 			ifd = 0;
109 			break;
110 		case 'a':		/* suffix length. */
111 			sufflen = strtonum(optarg, 1, NAME_MAX, &errstr);
112 			if (errstr)
113 				errx(EX_USAGE, "%s: %s", optarg, errstr);
114 			break;
115 		case 'b':		/* Byte count. */
116 			if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 ||
117 			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
118 				errx(EX_USAGE,
119 				    "%s: illegal byte count", optarg);
120 			if (*ep == 'k')
121 				scale = 1024;
122 			else if (*ep == 'm')
123 				scale = 1048576;
124 			else
125 				scale = 1;
126 			if (bytecnt > SSIZE_MAX / scale)
127 				errx(EX_USAGE, "%s: byte count too large",
128 				    optarg);
129 			bytecnt *= scale;
130 			break;
131 		case 'p' :      /* pattern matching. */
132 			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
133 				errx(EX_USAGE, "%s: illegal regexp", optarg);
134 			pflag = 1;
135 			break;
136 		case 'l':		/* Line count. */
137 			if (numlines != 0)
138 				usage();
139 			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
140 				errx(EX_USAGE,
141 				    "%s: illegal line count", optarg);
142 			break;
143 		default:
144 			usage();
145 		}
146 	argv += optind;
147 	argc -= optind;
148 
149 	if (*argv != NULL)
150 		if (ifd == -1) {		/* Input file. */
151 			if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
152 				err(EX_NOINPUT, "%s", *argv);
153 			++argv;
154 		}
155 	if (*argv != NULL)			/* File name prefix. */
156 		(void)strlcpy(fname, *argv++, sizeof(fname));
157 	if (*argv != NULL)
158 		usage();
159 
160 	if (strlen(fname) + sufflen >= sizeof(fname))
161 		errx(EX_USAGE, "suffix is too long");
162 	if (pflag && (numlines != 0 || bytecnt != 0))
163 		usage();
164 
165 	if (numlines == 0)
166 		numlines = DEFLINE;
167 	else if (bytecnt != 0)
168 		usage();
169 
170 	if (ifd == -1)				/* Stdin by default. */
171 		ifd = 0;
172 
173 	if (bytecnt) {
174 		split1();
175 		exit (0);
176 	}
177 	split2();
178 	if (pflag)
179 		regfree(&rgx);
180 	exit(0);
181 }
182 
183 /*
184  * split1 --
185  *	Split the input by bytes.
186  */
187 void
188 split1(void)
189 {
190 	ssize_t bcnt, dist, len;
191 	char *C;
192 
193 	for (bcnt = 0;;)
194 		switch ((len = read(ifd, bfr, MAXBSIZE))) {
195 		case 0:
196 			exit(0);
197 		case -1:
198 			err(EX_IOERR, "read");
199 			/* NOTREACHED */
200 		default:
201 			if (!file_open)
202 				newfile();
203 			if (bcnt + len >= bytecnt) {
204 				dist = bytecnt - bcnt;
205 				if (write(ofd, bfr, dist) != dist)
206 					err(EX_IOERR, "write");
207 				len -= dist;
208 				for (C = bfr + dist; len >= bytecnt;
209 				    len -= bytecnt, C += bytecnt) {
210 					newfile();
211 					if (write(ofd, C, bytecnt) != bytecnt)
212 						err(EX_IOERR, "write");
213 				}
214 				if (len != 0) {
215 					newfile();
216 					if (write(ofd, C, len) != len)
217 						err(EX_IOERR, "write");
218 				} else
219 					file_open = 0;
220 				bcnt = len;
221 			} else {
222 				bcnt += len;
223 				if (write(ofd, bfr, len) != len)
224 					err(EX_IOERR, "write");
225 			}
226 		}
227 }
228 
229 /*
230  * split2 --
231  *	Split the input by lines.
232  */
233 void
234 split2(void)
235 {
236 	long lcnt = 0;
237 	FILE *infp;
238 
239 	/* Stick a stream on top of input file descriptor */
240 	if ((infp = fdopen(ifd, "r")) == NULL)
241 		err(EX_NOINPUT, "fdopen");
242 
243 	/* Process input one line at a time */
244 	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
245 		const int len = strlen(bfr);
246 
247 		/* If line is too long to deal with, just write it out */
248 		if (bfr[len - 1] != '\n')
249 			goto writeit;
250 
251 		/* Check if we need to start a new file */
252 		if (pflag) {
253 			regmatch_t pmatch;
254 
255 			pmatch.rm_so = 0;
256 			pmatch.rm_eo = len - 1;
257 			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
258 				newfile();
259 		} else if (lcnt++ == numlines) {
260 			newfile();
261 			lcnt = 1;
262 		}
263 
264 writeit:
265 		/* Open output file if needed */
266 		if (!file_open)
267 			newfile();
268 
269 		/* Write out line */
270 		if (write(ofd, bfr, len) != len)
271 			err(EX_IOERR, "write");
272 	}
273 
274 	/* EOF or error? */
275 	if (ferror(infp))
276 		err(EX_IOERR, "read");
277 	else
278 		exit(0);
279 }
280 
281 /*
282  * newfile --
283  *	Open a new output file.
284  */
285 void
286 newfile(void)
287 {
288 	static char *suffix, *sufftail;
289 	static int defname;
290 
291 	if (ofd == -1) {
292 		if (fname[0] == '\0') {
293 			fname[0] = 'x';
294 			suffix = fname + 1;
295 			defname = 1;
296 		} else {
297 			suffix = fname + strlen(fname);
298 			defname = 0;
299 		}
300 		memset(suffix, 'a', sufflen);
301 		suffix[sufflen] = '\0';
302 		sufftail = suffix + sufflen - 1;
303 		--sufftail[0];		/* incremented later */
304 		ofd = fileno(stdout);
305 	}
306 
307 	if (sufftail[0] == 'z') {
308 		int i;
309 
310 		/* Increment the non-tail portion of the suffix. */
311 		for (i = sufflen - 2; i >= 0; i--) {
312 			if (suffix[i] != 'z') {
313 				suffix[i]++;
314 				break;
315 			}
316 		}
317 		if (i < 0) {
318 			/* Hack to support y and z prefix if no name spec'd. */
319 			if (!defname || fname[0] == 'z')
320 				errx(EX_DATAERR, "too many files");
321 			++fname[0];
322 			memset(suffix, 'a', sufflen);
323 		} else
324 			sufftail[0] = 'a';	/* reset tail */
325 	} else
326 		++sufftail[0];
327 
328 	if (!freopen(fname, "w", stdout))
329 		err(EX_IOERR, "%s", fname);
330 	file_open = 1;
331 }
332 
333 __dead void
334 usage(void)
335 {
336 	extern char *__progname;
337 
338 	(void)fprintf(stderr, "usage: %s [-a suffix_length] "
339 	    "[-b byte_count[k|m] | -l line_count | -p pattern] [file [name]]\n",
340 	    __progname);
341 	exit(EX_USAGE);
342 }
343