xref: /openbsd-src/usr.bin/split/split.c (revision 62a742911104f98b9185b2c6b6007d9b1c36396c)
1 /*	$OpenBSD: split.c,v 1.4 1999/02/04 03:53:48 millert Exp $	*/
2 /*	$NetBSD: split.c,v 1.5 1995/08/31 22:22:05 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1987, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 static char copyright[] =
39 "@(#) Copyright (c) 1987, 1993, 1994\n\
40 	The Regents of the University of California.  All rights reserved.\n";
41 #endif /* not lint */
42 
43 #ifndef lint
44 #if 0
45 static char sccsid[] = "@(#)split.c	8.3 (Berkeley) 4/25/94";
46 #else
47 static char rcsid[] = "$OpenBSD: split.c,v 1.4 1999/02/04 03:53:48 millert Exp $";
48 #endif
49 #endif /* not lint */
50 
51 #include <sys/param.h>
52 #include <sys/types.h>
53 
54 #include <ctype.h>
55 #include <err.h>
56 #include <fcntl.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 #include <regex.h>
62 #include <sysexits.h>
63 
64 #define DEFLINE	1000			/* Default num lines per file. */
65 
66 long	 bytecnt;			/* Byte count to split on. */
67 long	 numlines;			/* Line count to split on. */
68 int	 file_open;			/* If a file open. */
69 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
70 char	 bfr[MAXBSIZE];			/* I/O buffer. */
71 char	 fname[MAXPATHLEN];		/* File name prefix. */
72 regex_t	 rgx;
73 int	 pflag;
74 
75 void newfile __P((void));
76 void split1 __P((void));
77 void split2 __P((void));
78 void usage __P((void));
79 
80 int
81 main(argc, argv)
82 	int argc;
83 	char *argv[];
84 {
85 	int ch;
86 	char *ep, *p;
87 
88 	while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1)
89 		switch (ch) {
90 		case '0': case '1': case '2': case '3': case '4':
91 		case '5': case '6': case '7': case '8': case '9':
92 			/*
93 			 * Undocumented kludge: split was originally designed
94 			 * to take a number after a dash.
95 			 */
96 			if (numlines == 0) {
97 				p = argv[optind - 1];
98 				if (p[0] == '-' && p[1] == ch && !p[2])
99 					numlines = strtol(++p, &ep, 10);
100 				else
101 					numlines =
102 					    strtol(argv[optind] + 1, &ep, 10);
103 				if (numlines <= 0 || *ep)
104 					errx(EX_USAGE,
105 					    "%s: illegal line count", optarg);
106 			}
107 			break;
108 		case '-':		/* Undocumented: historic stdin flag. */
109 			if (ifd != -1)
110 				usage();
111 			ifd = 0;
112 			break;
113 		case 'b':		/* Byte count. */
114 			if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 ||
115 			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
116 				errx(EX_USAGE,
117 				    "%s: illegal byte count", optarg);
118 			if (*ep == 'k')
119 				bytecnt *= 1024;
120 			else if (*ep == 'm')
121 				bytecnt *= 1048576;
122 			break;
123 		case 'p' :      /* pattern matching. */
124 			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
125 				errx(EX_USAGE, "%s: illegal regexp", optarg);
126 			pflag = 1;
127 			break;
128 		case 'l':		/* Line count. */
129 			if (numlines != 0)
130 				usage();
131 			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
132 				errx(EX_USAGE,
133 				    "%s: illegal line count", optarg);
134 			break;
135 		default:
136 			usage();
137 		}
138 	argv += optind;
139 	argc -= optind;
140 
141 	if (*argv != NULL)
142 		if (ifd == -1) {		/* Input file. */
143 			if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
144 				err(EX_NOINPUT, "%s", *argv);
145 			++argv;
146 		}
147 	if (*argv != NULL)			/* File name prefix. */
148 		(void)strcpy(fname, *argv++);
149 	if (*argv != NULL)
150 		usage();
151 
152 	if (pflag && (numlines != 0 || bytecnt != 0))
153 		usage();
154 
155 	if (numlines == 0)
156 		numlines = DEFLINE;
157 	else if (bytecnt != 0)
158 		usage();
159 
160 	if (ifd == -1)				/* Stdin by default. */
161 		ifd = 0;
162 
163 	if (bytecnt) {
164 		split1();
165 		exit (0);
166 	}
167 	split2();
168 	if (pflag)
169 		regfree(&rgx);
170 	exit(0);
171 }
172 
173 /*
174  * split1 --
175  *	Split the input by bytes.
176  */
177 void
178 split1()
179 {
180 	long bcnt;
181 	int dist, len;
182 	char *C;
183 
184 	for (bcnt = 0;;)
185 		switch ((len = read(ifd, bfr, MAXBSIZE))) {
186 		case 0:
187 			exit(0);
188 		case -1:
189 			err(EX_IOERR, "read");
190 			/* NOTREACHED */
191 		default:
192 			if (!file_open)
193 				newfile();
194 			if (bcnt + len >= bytecnt) {
195 				dist = bytecnt - bcnt;
196 				if (write(ofd, bfr, dist) != dist)
197 					err(EX_IOERR, "write");
198 				len -= dist;
199 				for (C = bfr + dist; len >= bytecnt;
200 				    len -= bytecnt, C += bytecnt) {
201 					newfile();
202 					if (write(ofd,
203 					    C, (int)bytecnt) != bytecnt)
204 						err(EX_IOERR, "write");
205 				}
206 				if (len != 0) {
207 					newfile();
208 					if (write(ofd, C, len) != len)
209 						err(EX_IOERR, "write");
210 				} else
211 					file_open = 0;
212 				bcnt = len;
213 			} else {
214 				bcnt += len;
215 				if (write(ofd, bfr, len) != len)
216 					err(EX_IOERR, "write");
217 			}
218 		}
219 }
220 
221 /*
222  * split2 --
223  *	Split the input by lines.
224  */
225 void
226 split2()
227 {
228 	long lcnt = 0;
229 	FILE *infp;
230 
231 	/* Stick a stream on top of input file descriptor */
232 	if ((infp = fdopen(ifd, "r")) == NULL)
233 		err(EX_NOINPUT, "fdopen");
234 
235 	/* Process input one line at a time */
236 	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
237 		const int len = strlen(bfr);
238 
239 		/* If line is too long to deal with, just write it out */
240 		if (bfr[len - 1] != '\n')
241 			goto writeit;
242 
243 		/* Check if we need to start a new file */
244 		if (pflag) {
245 			regmatch_t pmatch;
246 
247 			pmatch.rm_so = 0;
248 			pmatch.rm_eo = len - 1;
249 			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
250 				newfile();
251 		} else if (lcnt++ == numlines) {
252 			newfile();
253 			lcnt = 1;
254 		}
255 
256 writeit:
257 		/* Open output file if needed */
258 		if (!file_open)
259 			newfile();
260 
261 		/* Write out line */
262 		if (write(ofd, bfr, len) != len)
263 			err(EX_IOERR, "write");
264 	}
265 
266 	/* EOF or error? */
267 	if (ferror(infp))
268 		err(EX_IOERR, "read");
269 	else
270 		exit(0);
271 }
272 
273 /*
274  * newfile --
275  *	Open a new output file.
276  */
277 void
278 newfile()
279 {
280 	static long fnum;
281 	static int defname;
282 	static char *fpnt;
283 
284 	if (ofd == -1) {
285 		if (fname[0] == '\0') {
286 			fname[0] = 'x';
287 			fpnt = fname + 1;
288 			defname = 1;
289 		} else {
290 			fpnt = fname + strlen(fname);
291 			defname = 0;
292 		}
293 		ofd = fileno(stdout);
294 	}
295 	/*
296 	 * Hack to increase max files; original code wandered through
297 	 * magic characters.  Maximum files is 3 * 26 * 26 == 2028
298 	 */
299 #define MAXFILES	676
300 	if (fnum == MAXFILES) {
301 		if (!defname || fname[0] == 'z')
302 			errx(EX_DATAERR, "too many files");
303 		++fname[0];
304 		fnum = 0;
305 	}
306 	fpnt[0] = fnum / 26 + 'a';
307 	fpnt[1] = fnum % 26 + 'a';
308 	++fnum;
309 	if (!freopen(fname, "w", stdout))
310 		err(EX_IOERR, "%s", fname);
311 	file_open = 1;
312 }
313 
314 void
315 usage()
316 {
317 	extern char *__progname;
318 
319 	(void)fprintf(stderr,
320 "usage: %s [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n",
321 __progname);
322 	exit(EX_USAGE);
323 }
324