xref: /openbsd-src/usr.bin/split/split.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: split.c,v 1.16 2007/10/20 18:08:57 sobrado Exp $	*/
2 /*	$NetBSD: split.c,v 1.5 1995/08/31 22:22:05 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1987, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifndef lint
34 static char copyright[] =
35 "@(#) Copyright (c) 1987, 1993, 1994\n\
36 	The Regents of the University of California.  All rights reserved.\n";
37 #endif /* not lint */
38 
39 #ifndef lint
40 #if 0
41 static char sccsid[] = "@(#)split.c	8.3 (Berkeley) 4/25/94";
42 #else
43 static char rcsid[] = "$OpenBSD: split.c,v 1.16 2007/10/20 18:08:57 sobrado Exp $";
44 #endif
45 #endif /* not lint */
46 
47 #include <sys/param.h>
48 #include <sys/types.h>
49 
50 #include <ctype.h>
51 #include <err.h>
52 #include <fcntl.h>
53 #include <limits.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58 #include <regex.h>
59 #include <sysexits.h>
60 
61 #define DEFLINE	1000			/* Default num lines per file. */
62 
63 ssize_t	 bytecnt;			/* Byte count to split on. */
64 long	 numlines;			/* Line count to split on. */
65 int	 file_open;			/* If a file open. */
66 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
67 char	 bfr[MAXBSIZE];			/* I/O buffer. */
68 char	 fname[MAXPATHLEN];		/* File name prefix. */
69 regex_t	 rgx;
70 int	 pflag;
71 int	 sufflen = 2;			/* File name suffix length. */
72 
73 void newfile(void);
74 void split1(void);
75 void split2(void);
76 __dead void usage(void);
77 
78 int
79 main(int argc, char *argv[])
80 {
81 	int ch, scale;
82 	char *ep, *p;
83 	const char *errstr;
84 
85 	while ((ch = getopt(argc, argv, "0123456789a:b:l:p:-")) != -1)
86 		switch (ch) {
87 		case '0': case '1': case '2': case '3': case '4':
88 		case '5': case '6': case '7': case '8': case '9':
89 			/*
90 			 * Undocumented kludge: split was originally designed
91 			 * to take a number after a dash.
92 			 */
93 			if (numlines == 0) {
94 				p = argv[optind - 1];
95 				if (p[0] == '-' && p[1] == ch && !p[2])
96 					numlines = strtol(++p, &ep, 10);
97 				else
98 					numlines =
99 					    strtol(argv[optind] + 1, &ep, 10);
100 				if (numlines <= 0 || *ep)
101 					errx(EX_USAGE,
102 					    "%s: illegal line count", optarg);
103 			}
104 			break;
105 		case '-':		/* Undocumented: historic stdin flag. */
106 			if (ifd != -1)
107 				usage();
108 			ifd = 0;
109 			break;
110 		case 'a':		/* suffix length. */
111 			sufflen = strtonum(optarg, 1, NAME_MAX, &errstr);
112 			if (errstr)
113 				errx(EX_USAGE, "%s: %s", optarg, errstr);
114 			break;
115 		case 'b':		/* Byte count. */
116 			if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 ||
117 			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
118 				errx(EX_USAGE,
119 				    "%s: illegal byte count", optarg);
120 			if (*ep == 'k')
121 				scale = 1024;
122 			else if (*ep == 'm')
123 				scale = 1048576;
124 			else
125 				scale = 1;
126 			if (bytecnt > SSIZE_MAX / scale)
127 				errx(EX_USAGE, "%s: byte count too large",
128 				    optarg);
129 			bytecnt *= scale;
130 			break;
131 		case 'p' :      /* pattern matching. */
132 			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
133 				errx(EX_USAGE, "%s: illegal regexp", optarg);
134 			pflag = 1;
135 			break;
136 		case 'l':		/* Line count. */
137 			if (numlines != 0)
138 				usage();
139 			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
140 				errx(EX_USAGE,
141 				    "%s: illegal line count", optarg);
142 			break;
143 		default:
144 			usage();
145 		}
146 	argv += optind;
147 	argc -= optind;
148 
149 	if (*argv != NULL)
150 		if (ifd == -1) {		/* Input file. */
151 			if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
152 				err(EX_NOINPUT, "%s", *argv);
153 			++argv;
154 		}
155 	if (*argv != NULL)			/* File name prefix. */
156 		(void)strlcpy(fname, *argv++, sizeof(fname));
157 	if (*argv != NULL)
158 		usage();
159 
160 	if (strlen(fname) + sufflen >= sizeof(fname))
161 		errx(EX_USAGE, "suffix is too long");
162 	if (pflag && (numlines != 0 || bytecnt != 0))
163 		usage();
164 
165 	if (numlines == 0)
166 		numlines = DEFLINE;
167 	else if (bytecnt != 0)
168 		usage();
169 
170 	if (ifd == -1)				/* Stdin by default. */
171 		ifd = 0;
172 
173 	if (bytecnt) {
174 		split1();
175 		exit (0);
176 	}
177 	split2();
178 	if (pflag)
179 		regfree(&rgx);
180 	exit(0);
181 }
182 
183 /*
184  * split1 --
185  *	Split the input by bytes.
186  */
187 void
188 split1(void)
189 {
190 	ssize_t bcnt, dist, len;
191 	char *C;
192 
193 	for (bcnt = 0;;)
194 		switch ((len = read(ifd, bfr, MAXBSIZE))) {
195 		case 0:
196 			exit(0);
197 		case -1:
198 			err(EX_IOERR, "read");
199 			/* NOTREACHED */
200 		default:
201 			if (!file_open)
202 				newfile();
203 			if (bcnt + len >= bytecnt) {
204 				dist = bytecnt - bcnt;
205 				if (write(ofd, bfr, dist) != dist)
206 					err(EX_IOERR, "write");
207 				len -= dist;
208 				for (C = bfr + dist; len >= bytecnt;
209 				    len -= bytecnt, C += bytecnt) {
210 					newfile();
211 					if (write(ofd, C, bytecnt) != bytecnt)
212 						err(EX_IOERR, "write");
213 				}
214 				if (len != 0) {
215 					newfile();
216 					if (write(ofd, C, len) != len)
217 						err(EX_IOERR, "write");
218 				} else
219 					file_open = 0;
220 				bcnt = len;
221 			} else {
222 				bcnt += len;
223 				if (write(ofd, bfr, len) != len)
224 					err(EX_IOERR, "write");
225 			}
226 		}
227 }
228 
229 /*
230  * split2 --
231  *	Split the input by lines.
232  */
233 void
234 split2(void)
235 {
236 	long lcnt = 0;
237 	FILE *infp;
238 
239 	/* Stick a stream on top of input file descriptor */
240 	if ((infp = fdopen(ifd, "r")) == NULL)
241 		err(EX_NOINPUT, "fdopen");
242 
243 	/* Process input one line at a time */
244 	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
245 		const int len = strlen(bfr);
246 
247 		if (len == 0)
248 			continue;
249 
250 		/* If line is too long to deal with, just write it out */
251 		if (bfr[len - 1] != '\n')
252 			goto writeit;
253 
254 		/* Check if we need to start a new file */
255 		if (pflag) {
256 			regmatch_t pmatch;
257 
258 			pmatch.rm_so = 0;
259 			pmatch.rm_eo = len - 1;
260 			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
261 				newfile();
262 		} else if (lcnt++ == numlines) {
263 			newfile();
264 			lcnt = 1;
265 		}
266 
267 writeit:
268 		/* Open output file if needed */
269 		if (!file_open)
270 			newfile();
271 
272 		/* Write out line */
273 		if (write(ofd, bfr, len) != len)
274 			err(EX_IOERR, "write");
275 	}
276 
277 	/* EOF or error? */
278 	if (ferror(infp))
279 		err(EX_IOERR, "read");
280 	else
281 		exit(0);
282 }
283 
284 /*
285  * newfile --
286  *	Open a new output file.
287  */
288 void
289 newfile(void)
290 {
291 	static char *suffix, *sufftail;
292 	char *sptr;
293 
294 	if (ofd == -1) {
295 		ofd = fileno(stdout);
296 		if (*fname == '\0') {
297 			*fname = 'x';	/* no name specified, use 'x' */
298 			memset(fname + 1, 'a', sufflen);
299 			suffix = fname;
300 			sufflen++;	/* treat 'x' as part of suffix */
301 		} else {
302 			suffix = fname + strlen(fname);
303 			memset(suffix, 'a', sufflen);
304 		}
305 		suffix[sufflen] = '\0';
306 		sufftail = suffix + sufflen - 1;
307 	} else {
308 		for (sptr = sufftail; sptr >= suffix; sptr--) {
309 			if (*sptr != 'z') {
310 				(*sptr)++;
311 				break;
312 			} else
313 				*sptr = 'a';
314 		}
315 		if (sptr < suffix)
316 			errx(EX_DATAERR, "too many files");
317 	}
318 
319 	if (!freopen(fname, "w", stdout))
320 		err(EX_IOERR, "%s", fname);
321 	file_open = 1;
322 }
323 
324 __dead void
325 usage(void)
326 {
327 	extern char *__progname;
328 
329 	(void)fprintf(stderr, "usage: %s [-a suffix_length]\n"
330 	    "             [-b byte_count[k|m] | -l line_count | -p pattern] "
331 	    "[file [name]]\n", __progname);
332 	exit(EX_USAGE);
333 }
334