xref: /netbsd-src/usr.bin/split/split.c (revision d37a996a49db27ca1b1e656cc3e261bfe3295a71)
1 /*	$NetBSD: split.c,v 1.19 2003/07/10 21:30:16 bjh21 Exp $	*/
2 
3 /*
4  * Copyright (c) 1987, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)split.c	8.3 (Berkeley) 4/25/94";
45 #endif
46 __RCSID("$NetBSD: split.c,v 1.19 2003/07/10 21:30:16 bjh21 Exp $");
47 #endif /* not lint */
48 
49 #include <sys/param.h>
50 
51 #include <ctype.h>
52 #include <err.h>
53 #include <errno.h>
54 #include <fcntl.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 
60 #define DEFLINE	1000		/* Default num lines per file. */
61 
62 static int file_open;		/* If a file open. */
63 static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */
64 static char *fname;		/* File name prefix. */
65 static size_t sfxlen = 2;		/* suffix length. */
66 
67 int  main(int, char **);
68 static void newfile(void);
69 static void split1(off_t);
70 static void split2(off_t);
71 static void usage(void) __attribute__((__noreturn__));
72 static size_t bigwrite(int, void const *, size_t);
73 
74 int
75 main(int argc, char *argv[])
76 {
77 	int ch;
78 	char *ep, *p;
79 	char const *base;
80 	off_t bytecnt = 0;	/* Byte count to split on. */
81 	off_t numlines = 0;	/* Line count to split on. */
82 
83 	while ((ch = getopt(argc, argv, "0123456789b:l:a:")) != -1)
84 		switch (ch) {
85 		case '0': case '1': case '2': case '3': case '4':
86 		case '5': case '6': case '7': case '8': case '9':
87 			/*
88 			 * Undocumented kludge: split was originally designed
89 			 * to take a number after a dash.
90 			 */
91 			if (numlines == 0) {
92 				p = argv[optind - 1];
93 				if (p[0] == '-' && p[1] == ch && !p[2])
94 					p++;
95 				else
96 					p = argv[optind] + 1;
97 				numlines = strtoull(p, &ep, 10);
98 				if (numlines == 0 || *ep != '\0')
99 					errx(1, "%s: illegal line count.", p);
100 			}
101 			break;
102 		case 'b':		/* Byte count. */
103 			if (!isdigit((unsigned char)optarg[0]) ||
104 			    (bytecnt = strtoull(optarg, &ep, 10)) == 0 ||
105 			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
106 				errx(1, "%s: illegal byte count.", optarg);
107 			if (*ep == 'k')
108 				bytecnt *= 1024;
109 			else if (*ep == 'm')
110 				bytecnt *= 1024 * 1024;
111 			break;
112 		case 'l':		/* Line count. */
113 			if (numlines != 0)
114 				usage();
115 			if (!isdigit((unsigned char)optarg[0]) ||
116 			    (numlines = strtoull(optarg, &ep, 10)) == 0 ||
117 			    *ep != '\0')
118 				errx(1, "%s: illegal line count.", optarg);
119 			break;
120 		case 'a':		/* Suffix length. */
121 			if (!isdigit((unsigned char)optarg[0]) ||
122 			    (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
123 			    *ep != '\0')
124 				errx(1, "%s: illegal suffix length.", optarg);
125 			break;
126 		default:
127 			usage();
128 		}
129 	argv += optind;
130 	argc -= optind;
131 
132 	if (*argv != NULL) {
133 		if (strcmp(*argv, "-") != 0 &&
134 		    (ifd = open(*argv, O_RDONLY, 0)) < 0)
135 			err(1, "%s", *argv);
136 		++argv;
137 	}
138 
139 
140 	base = (*argv != NULL) ? *argv++ : "x";
141 	if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL)
142 		err(EXIT_FAILURE, NULL);
143 	(void)strcpy(fname, base);		/* File name prefix. */
144 
145 	if (*argv != NULL)
146 		usage();
147 
148 	if (numlines == 0)
149 		numlines = DEFLINE;
150 	else if (bytecnt)
151 		usage();
152 
153 	if (bytecnt)
154 		split1(bytecnt);
155 	else
156 		split2(numlines);
157 
158 	return 0;
159 }
160 
161 /*
162  * split1 --
163  *	Split the input by bytes.
164  */
165 static void
166 split1(off_t bytecnt)
167 {
168 	off_t bcnt;
169 	ssize_t dist, len;
170 	char *C;
171 	char bfr[MAXBSIZE];
172 
173 	for (bcnt = 0;;)
174 		switch (len = read(ifd, bfr, MAXBSIZE)) {
175 		case 0:
176 			exit(0);
177 			/* NOTREACHED */
178 		case -1:
179 			err(1, "read");
180 			/* NOTREACHED */
181 		default:
182 			if (!file_open) {
183 				newfile();
184 				file_open = 1;
185 			}
186 			if (bcnt + len >= bytecnt) {
187 				/* LINTED: bytecnt - bcnt <= len */
188 				dist = bytecnt - bcnt;
189 				if (bigwrite(ofd, bfr, dist) != dist)
190 					err(1, "write");
191 				len -= dist;
192 				for (C = bfr + dist; len >= bytecnt;
193 				    /* LINTED: bytecnt <= len */
194 				    len -= bytecnt, C += bytecnt) {
195 					newfile();
196 					/* LINTED: as above */
197 					if (bigwrite(ofd,
198 					    C, bytecnt) != bytecnt)
199 						err(1, "write");
200 				}
201 				if (len) {
202 					newfile();
203 					/* LINTED: len >= 0 */
204 					if (bigwrite(ofd, C, len) != len)
205 						err(1, "write");
206 				} else
207 					file_open = 0;
208 				bcnt = len;
209 			} else {
210 				bcnt += len;
211 				/* LINTED: len >= 0 */
212 				if (bigwrite(ofd, bfr, len) != len)
213 					err(1, "write");
214 			}
215 		}
216 }
217 
218 /*
219  * split2 --
220  *	Split the input by lines.
221  */
222 static void
223 split2(off_t numlines)
224 {
225 	off_t lcnt;
226 	size_t bcnt;
227 	ssize_t len;
228 	char *Ce, *Cs;
229 	char bfr[MAXBSIZE];
230 
231 	for (lcnt = 0;;)
232 		switch (len = read(ifd, bfr, MAXBSIZE)) {
233 		case 0:
234 			exit(0);
235 			/* NOTREACHED */
236 		case -1:
237 			err(1, "read");
238 			/* NOTREACHED */
239 		default:
240 			if (!file_open) {
241 				newfile();
242 				file_open = 1;
243 			}
244 			for (Cs = Ce = bfr; len--; Ce++)
245 				if (*Ce == '\n' && ++lcnt == numlines) {
246 					bcnt = Ce - Cs + 1;
247 					if (bigwrite(ofd, Cs, bcnt) != bcnt)
248 						err(1, "write");
249 					lcnt = 0;
250 					Cs = Ce + 1;
251 					if (len)
252 						newfile();
253 					else
254 						file_open = 0;
255 				}
256 			if (Cs < Ce) {
257 				bcnt = Ce - Cs;
258 				if (bigwrite(ofd, Cs, bcnt) != bcnt)
259 					err(1, "write");
260 			}
261 		}
262 }
263 
264 /*
265  * newfile --
266  *	Open a new output file.
267  */
268 static void
269 newfile(void)
270 {
271 	static int fnum;
272 	static char *fpnt;
273 	int quot, i;
274 
275 	if (ofd == -1) {
276 		fpnt = fname + strlen(fname);
277 		fpnt[sfxlen] = '\0';
278 	} else if (close(ofd) != 0)
279 		err(1, "%s", fname);
280 
281 	quot = fnum;
282 	for (i = sfxlen - 1; i >= 0; i--) {
283 		fpnt[i] = quot % 26 + 'a';
284 		quot = quot / 26;
285 	}
286 	if (quot > 0)
287 		errx(1, "too many files.");
288 	++fnum;
289 	if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0)
290 		err(1, "%s", fname);
291 }
292 
293 static size_t
294 bigwrite(int fd, const void *buf, size_t len)
295 {
296 	const char *ptr = buf;
297 	size_t sofar = 0;
298 	ssize_t w;
299 
300 	while (len != 0) {
301 		if  ((w = write(fd, ptr, len)) == -1)
302 			return sofar;
303 		len -= w;
304 		ptr += w;
305 		sofar += w;
306 	}
307 	return sofar;
308 }
309 
310 
311 static void
312 usage(void)
313 {
314 	(void)fprintf(stderr,
315 "Usage: %s [-b byte_count] [-l line_count] [-a suffix_length] "
316 "[file [prefix]]\n", getprogname());
317 	exit(1);
318 }
319