1*30eeed2cSThomas Cort /* $NetBSD: split.c,v 1.26 2011/09/16 15:39:29 joerg Exp $ */
2*30eeed2cSThomas Cort
3*30eeed2cSThomas Cort /*
4*30eeed2cSThomas Cort * Copyright (c) 1987, 1993, 1994
5*30eeed2cSThomas Cort * The Regents of the University of California. All rights reserved.
6*30eeed2cSThomas Cort *
7*30eeed2cSThomas Cort * Redistribution and use in source and binary forms, with or without
8*30eeed2cSThomas Cort * modification, are permitted provided that the following conditions
9*30eeed2cSThomas Cort * are met:
10*30eeed2cSThomas Cort * 1. Redistributions of source code must retain the above copyright
11*30eeed2cSThomas Cort * notice, this list of conditions and the following disclaimer.
12*30eeed2cSThomas Cort * 2. Redistributions in binary form must reproduce the above copyright
13*30eeed2cSThomas Cort * notice, this list of conditions and the following disclaimer in the
14*30eeed2cSThomas Cort * documentation and/or other materials provided with the distribution.
15*30eeed2cSThomas Cort * 3. Neither the name of the University nor the names of its contributors
16*30eeed2cSThomas Cort * may be used to endorse or promote products derived from this software
17*30eeed2cSThomas Cort * without specific prior written permission.
18*30eeed2cSThomas Cort *
19*30eeed2cSThomas Cort * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20*30eeed2cSThomas Cort * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21*30eeed2cSThomas Cort * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22*30eeed2cSThomas Cort * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23*30eeed2cSThomas Cort * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24*30eeed2cSThomas Cort * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25*30eeed2cSThomas Cort * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26*30eeed2cSThomas Cort * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27*30eeed2cSThomas Cort * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28*30eeed2cSThomas Cort * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29*30eeed2cSThomas Cort * SUCH DAMAGE.
30*30eeed2cSThomas Cort */
31*30eeed2cSThomas Cort
32*30eeed2cSThomas Cort #include <sys/cdefs.h>
33*30eeed2cSThomas Cort #ifndef lint
34*30eeed2cSThomas Cort __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\
35*30eeed2cSThomas Cort The Regents of the University of California. All rights reserved.");
36*30eeed2cSThomas Cort #endif /* not lint */
37*30eeed2cSThomas Cort
38*30eeed2cSThomas Cort #ifndef lint
39*30eeed2cSThomas Cort #if 0
40*30eeed2cSThomas Cort static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94";
41*30eeed2cSThomas Cort #endif
42*30eeed2cSThomas Cort __RCSID("$NetBSD: split.c,v 1.26 2011/09/16 15:39:29 joerg Exp $");
43*30eeed2cSThomas Cort #endif /* not lint */
44*30eeed2cSThomas Cort
45*30eeed2cSThomas Cort #include <sys/param.h>
46*30eeed2cSThomas Cort
47*30eeed2cSThomas Cort #include <ctype.h>
48*30eeed2cSThomas Cort #include <err.h>
49*30eeed2cSThomas Cort #include <errno.h>
50*30eeed2cSThomas Cort #include <fcntl.h>
51*30eeed2cSThomas Cort #include <stdio.h>
52*30eeed2cSThomas Cort #include <stdlib.h>
53*30eeed2cSThomas Cort #include <string.h>
54*30eeed2cSThomas Cort #include <unistd.h>
55*30eeed2cSThomas Cort
56*30eeed2cSThomas Cort #define DEFLINE 1000 /* Default num lines per file. */
57*30eeed2cSThomas Cort
58*30eeed2cSThomas Cort static int file_open; /* If a file open. */
59*30eeed2cSThomas Cort static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */
60*30eeed2cSThomas Cort static char *fname; /* File name prefix. */
61*30eeed2cSThomas Cort static size_t sfxlen = 2; /* suffix length. */
62*30eeed2cSThomas Cort
63*30eeed2cSThomas Cort static void newfile(void);
64*30eeed2cSThomas Cort static void split1(off_t, int) __dead;
65*30eeed2cSThomas Cort static void split2(off_t) __dead;
66*30eeed2cSThomas Cort static void split3(off_t) __dead;
67*30eeed2cSThomas Cort static void usage(void) __dead;
68*30eeed2cSThomas Cort static size_t bigwrite(int, void const *, size_t);
69*30eeed2cSThomas Cort
70*30eeed2cSThomas Cort int
main(int argc,char * argv[])71*30eeed2cSThomas Cort main(int argc, char *argv[])
72*30eeed2cSThomas Cort {
73*30eeed2cSThomas Cort int ch;
74*30eeed2cSThomas Cort char *ep, *p;
75*30eeed2cSThomas Cort char const *base;
76*30eeed2cSThomas Cort off_t bytecnt = 0; /* Byte count to split on. */
77*30eeed2cSThomas Cort off_t numlines = 0; /* Line count to split on. */
78*30eeed2cSThomas Cort off_t chunks = 0; /* Number of chunks to split into. */
79*30eeed2cSThomas Cort
80*30eeed2cSThomas Cort while ((ch = getopt(argc, argv, "0123456789b:l:a:n:")) != -1)
81*30eeed2cSThomas Cort switch (ch) {
82*30eeed2cSThomas Cort case '0': case '1': case '2': case '3': case '4':
83*30eeed2cSThomas Cort case '5': case '6': case '7': case '8': case '9':
84*30eeed2cSThomas Cort /*
85*30eeed2cSThomas Cort * Undocumented kludge: split was originally designed
86*30eeed2cSThomas Cort * to take a number after a dash.
87*30eeed2cSThomas Cort */
88*30eeed2cSThomas Cort if (numlines == 0) {
89*30eeed2cSThomas Cort p = argv[optind - 1];
90*30eeed2cSThomas Cort if (p[0] == '-' && p[1] == ch && !p[2])
91*30eeed2cSThomas Cort p++;
92*30eeed2cSThomas Cort else
93*30eeed2cSThomas Cort p = argv[optind] + 1;
94*30eeed2cSThomas Cort numlines = strtoull(p, &ep, 10);
95*30eeed2cSThomas Cort if (numlines == 0 || *ep != '\0')
96*30eeed2cSThomas Cort errx(1, "%s: illegal line count.", p);
97*30eeed2cSThomas Cort }
98*30eeed2cSThomas Cort break;
99*30eeed2cSThomas Cort case 'b': /* Byte count. */
100*30eeed2cSThomas Cort if (!isdigit((unsigned char)optarg[0]) ||
101*30eeed2cSThomas Cort (bytecnt = strtoull(optarg, &ep, 10)) == 0 ||
102*30eeed2cSThomas Cort (*ep != '\0' && *ep != 'k' && *ep != 'm'))
103*30eeed2cSThomas Cort errx(1, "%s: illegal byte count.", optarg);
104*30eeed2cSThomas Cort if (*ep == 'k')
105*30eeed2cSThomas Cort bytecnt *= 1024;
106*30eeed2cSThomas Cort else if (*ep == 'm')
107*30eeed2cSThomas Cort bytecnt *= 1024 * 1024;
108*30eeed2cSThomas Cort break;
109*30eeed2cSThomas Cort case 'l': /* Line count. */
110*30eeed2cSThomas Cort if (numlines != 0)
111*30eeed2cSThomas Cort usage();
112*30eeed2cSThomas Cort if (!isdigit((unsigned char)optarg[0]) ||
113*30eeed2cSThomas Cort (numlines = strtoull(optarg, &ep, 10)) == 0 ||
114*30eeed2cSThomas Cort *ep != '\0')
115*30eeed2cSThomas Cort errx(1, "%s: illegal line count.", optarg);
116*30eeed2cSThomas Cort break;
117*30eeed2cSThomas Cort case 'a': /* Suffix length. */
118*30eeed2cSThomas Cort if (!isdigit((unsigned char)optarg[0]) ||
119*30eeed2cSThomas Cort (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
120*30eeed2cSThomas Cort *ep != '\0')
121*30eeed2cSThomas Cort errx(1, "%s: illegal suffix length.", optarg);
122*30eeed2cSThomas Cort break;
123*30eeed2cSThomas Cort case 'n': /* Chunks. */
124*30eeed2cSThomas Cort if (!isdigit((unsigned char)optarg[0]) ||
125*30eeed2cSThomas Cort (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
126*30eeed2cSThomas Cort *ep != '\0')
127*30eeed2cSThomas Cort errx(1, "%s: illegal number of chunks.", optarg);
128*30eeed2cSThomas Cort break;
129*30eeed2cSThomas Cort default:
130*30eeed2cSThomas Cort usage();
131*30eeed2cSThomas Cort }
132*30eeed2cSThomas Cort argv += optind;
133*30eeed2cSThomas Cort argc -= optind;
134*30eeed2cSThomas Cort
135*30eeed2cSThomas Cort if (*argv != NULL) {
136*30eeed2cSThomas Cort if (strcmp(*argv, "-") != 0 &&
137*30eeed2cSThomas Cort (ifd = open(*argv, O_RDONLY, 0)) < 0)
138*30eeed2cSThomas Cort err(1, "%s", *argv);
139*30eeed2cSThomas Cort ++argv;
140*30eeed2cSThomas Cort }
141*30eeed2cSThomas Cort
142*30eeed2cSThomas Cort
143*30eeed2cSThomas Cort base = (*argv != NULL) ? *argv++ : "x";
144*30eeed2cSThomas Cort if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL)
145*30eeed2cSThomas Cort err(EXIT_FAILURE, NULL);
146*30eeed2cSThomas Cort (void)strcpy(fname, base); /* File name prefix. */
147*30eeed2cSThomas Cort
148*30eeed2cSThomas Cort if (*argv != NULL)
149*30eeed2cSThomas Cort usage();
150*30eeed2cSThomas Cort
151*30eeed2cSThomas Cort if (numlines == 0)
152*30eeed2cSThomas Cort numlines = DEFLINE;
153*30eeed2cSThomas Cort else if (bytecnt || chunks)
154*30eeed2cSThomas Cort usage();
155*30eeed2cSThomas Cort
156*30eeed2cSThomas Cort if (bytecnt && chunks)
157*30eeed2cSThomas Cort usage();
158*30eeed2cSThomas Cort
159*30eeed2cSThomas Cort if (bytecnt)
160*30eeed2cSThomas Cort split1(bytecnt, 0);
161*30eeed2cSThomas Cort else if (chunks)
162*30eeed2cSThomas Cort split3(chunks);
163*30eeed2cSThomas Cort else
164*30eeed2cSThomas Cort split2(numlines);
165*30eeed2cSThomas Cort
166*30eeed2cSThomas Cort return 0;
167*30eeed2cSThomas Cort }
168*30eeed2cSThomas Cort
169*30eeed2cSThomas Cort /*
170*30eeed2cSThomas Cort * split1 --
171*30eeed2cSThomas Cort * Split the input by bytes.
172*30eeed2cSThomas Cort */
173*30eeed2cSThomas Cort static void
split1(off_t bytecnt,int maxcnt)174*30eeed2cSThomas Cort split1(off_t bytecnt, int maxcnt)
175*30eeed2cSThomas Cort {
176*30eeed2cSThomas Cort off_t bcnt;
177*30eeed2cSThomas Cort ssize_t dist, len;
178*30eeed2cSThomas Cort char *C;
179*30eeed2cSThomas Cort char bfr[MAXBSIZE];
180*30eeed2cSThomas Cort int nfiles;
181*30eeed2cSThomas Cort
182*30eeed2cSThomas Cort nfiles = 0;
183*30eeed2cSThomas Cort
184*30eeed2cSThomas Cort for (bcnt = 0;;)
185*30eeed2cSThomas Cort switch (len = read(ifd, bfr, MAXBSIZE)) {
186*30eeed2cSThomas Cort case 0:
187*30eeed2cSThomas Cort exit(0);
188*30eeed2cSThomas Cort /* NOTREACHED */
189*30eeed2cSThomas Cort case -1:
190*30eeed2cSThomas Cort err(1, "read");
191*30eeed2cSThomas Cort /* NOTREACHED */
192*30eeed2cSThomas Cort default:
193*30eeed2cSThomas Cort if (!file_open) {
194*30eeed2cSThomas Cort if (!maxcnt || (nfiles < maxcnt)) {
195*30eeed2cSThomas Cort newfile();
196*30eeed2cSThomas Cort nfiles++;
197*30eeed2cSThomas Cort file_open = 1;
198*30eeed2cSThomas Cort }
199*30eeed2cSThomas Cort }
200*30eeed2cSThomas Cort if (bcnt + len >= bytecnt) {
201*30eeed2cSThomas Cort /* LINTED: bytecnt - bcnt <= len */
202*30eeed2cSThomas Cort dist = bytecnt - bcnt;
203*30eeed2cSThomas Cort if (bigwrite(ofd, bfr, dist) != (size_t)dist)
204*30eeed2cSThomas Cort err(1, "write");
205*30eeed2cSThomas Cort len -= dist;
206*30eeed2cSThomas Cort for (C = bfr + dist; len >= bytecnt;
207*30eeed2cSThomas Cort /* LINTED: bytecnt <= len */
208*30eeed2cSThomas Cort len -= bytecnt, C += bytecnt) {
209*30eeed2cSThomas Cort if (!maxcnt || (nfiles < maxcnt)) {
210*30eeed2cSThomas Cort newfile();
211*30eeed2cSThomas Cort nfiles++;
212*30eeed2cSThomas Cort }
213*30eeed2cSThomas Cort /* LINTED: as above */
214*30eeed2cSThomas Cort if (bigwrite(ofd,
215*30eeed2cSThomas Cort C, bytecnt) != (size_t)bytecnt)
216*30eeed2cSThomas Cort err(1, "write");
217*30eeed2cSThomas Cort }
218*30eeed2cSThomas Cort if (len) {
219*30eeed2cSThomas Cort if (!maxcnt || (nfiles < maxcnt)) {
220*30eeed2cSThomas Cort newfile();
221*30eeed2cSThomas Cort nfiles++;
222*30eeed2cSThomas Cort }
223*30eeed2cSThomas Cort /* LINTED: len >= 0 */
224*30eeed2cSThomas Cort if (bigwrite(ofd, C, len) != (size_t)len)
225*30eeed2cSThomas Cort err(1, "write");
226*30eeed2cSThomas Cort } else
227*30eeed2cSThomas Cort file_open = 0;
228*30eeed2cSThomas Cort bcnt = len;
229*30eeed2cSThomas Cort } else {
230*30eeed2cSThomas Cort bcnt += len;
231*30eeed2cSThomas Cort /* LINTED: len >= 0 */
232*30eeed2cSThomas Cort if (bigwrite(ofd, bfr, len) != (size_t)len)
233*30eeed2cSThomas Cort err(1, "write");
234*30eeed2cSThomas Cort }
235*30eeed2cSThomas Cort }
236*30eeed2cSThomas Cort }
237*30eeed2cSThomas Cort
238*30eeed2cSThomas Cort /*
239*30eeed2cSThomas Cort * split2 --
240*30eeed2cSThomas Cort * Split the input by lines.
241*30eeed2cSThomas Cort */
242*30eeed2cSThomas Cort static void
split2(off_t numlines)243*30eeed2cSThomas Cort split2(off_t numlines)
244*30eeed2cSThomas Cort {
245*30eeed2cSThomas Cort off_t lcnt;
246*30eeed2cSThomas Cort size_t bcnt;
247*30eeed2cSThomas Cort ssize_t len;
248*30eeed2cSThomas Cort char *Ce, *Cs;
249*30eeed2cSThomas Cort char bfr[MAXBSIZE];
250*30eeed2cSThomas Cort
251*30eeed2cSThomas Cort for (lcnt = 0;;)
252*30eeed2cSThomas Cort switch (len = read(ifd, bfr, MAXBSIZE)) {
253*30eeed2cSThomas Cort case 0:
254*30eeed2cSThomas Cort exit(0);
255*30eeed2cSThomas Cort /* NOTREACHED */
256*30eeed2cSThomas Cort case -1:
257*30eeed2cSThomas Cort err(1, "read");
258*30eeed2cSThomas Cort /* NOTREACHED */
259*30eeed2cSThomas Cort default:
260*30eeed2cSThomas Cort if (!file_open) {
261*30eeed2cSThomas Cort newfile();
262*30eeed2cSThomas Cort file_open = 1;
263*30eeed2cSThomas Cort }
264*30eeed2cSThomas Cort for (Cs = Ce = bfr; len--; Ce++)
265*30eeed2cSThomas Cort if (*Ce == '\n' && ++lcnt == numlines) {
266*30eeed2cSThomas Cort bcnt = Ce - Cs + 1;
267*30eeed2cSThomas Cort if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
268*30eeed2cSThomas Cort err(1, "write");
269*30eeed2cSThomas Cort lcnt = 0;
270*30eeed2cSThomas Cort Cs = Ce + 1;
271*30eeed2cSThomas Cort if (len)
272*30eeed2cSThomas Cort newfile();
273*30eeed2cSThomas Cort else
274*30eeed2cSThomas Cort file_open = 0;
275*30eeed2cSThomas Cort }
276*30eeed2cSThomas Cort if (Cs < Ce) {
277*30eeed2cSThomas Cort bcnt = Ce - Cs;
278*30eeed2cSThomas Cort if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
279*30eeed2cSThomas Cort err(1, "write");
280*30eeed2cSThomas Cort }
281*30eeed2cSThomas Cort }
282*30eeed2cSThomas Cort }
283*30eeed2cSThomas Cort
284*30eeed2cSThomas Cort /*
285*30eeed2cSThomas Cort * split3 --
286*30eeed2cSThomas Cort * Split the input into specified number of chunks
287*30eeed2cSThomas Cort */
288*30eeed2cSThomas Cort static void
split3(off_t chunks)289*30eeed2cSThomas Cort split3(off_t chunks)
290*30eeed2cSThomas Cort {
291*30eeed2cSThomas Cort struct stat sb;
292*30eeed2cSThomas Cort
293*30eeed2cSThomas Cort if (fstat(ifd, &sb) == -1) {
294*30eeed2cSThomas Cort err(1, "stat");
295*30eeed2cSThomas Cort /* NOTREACHED */
296*30eeed2cSThomas Cort }
297*30eeed2cSThomas Cort
298*30eeed2cSThomas Cort if (chunks > sb.st_size) {
299*30eeed2cSThomas Cort errx(1, "can't split into more than %d files",
300*30eeed2cSThomas Cort (int)sb.st_size);
301*30eeed2cSThomas Cort /* NOTREACHED */
302*30eeed2cSThomas Cort }
303*30eeed2cSThomas Cort
304*30eeed2cSThomas Cort split1(sb.st_size/chunks, chunks);
305*30eeed2cSThomas Cort }
306*30eeed2cSThomas Cort
307*30eeed2cSThomas Cort /*
308*30eeed2cSThomas Cort * newfile --
309*30eeed2cSThomas Cort * Open a new output file.
310*30eeed2cSThomas Cort */
311*30eeed2cSThomas Cort static void
newfile(void)312*30eeed2cSThomas Cort newfile(void)
313*30eeed2cSThomas Cort {
314*30eeed2cSThomas Cort static int fnum;
315*30eeed2cSThomas Cort static char *fpnt;
316*30eeed2cSThomas Cort int quot, i;
317*30eeed2cSThomas Cort
318*30eeed2cSThomas Cort if (ofd == -1) {
319*30eeed2cSThomas Cort fpnt = fname + strlen(fname);
320*30eeed2cSThomas Cort fpnt[sfxlen] = '\0';
321*30eeed2cSThomas Cort } else if (close(ofd) != 0)
322*30eeed2cSThomas Cort err(1, "%s", fname);
323*30eeed2cSThomas Cort
324*30eeed2cSThomas Cort quot = fnum;
325*30eeed2cSThomas Cort for (i = sfxlen - 1; i >= 0; i--) {
326*30eeed2cSThomas Cort fpnt[i] = quot % 26 + 'a';
327*30eeed2cSThomas Cort quot = quot / 26;
328*30eeed2cSThomas Cort }
329*30eeed2cSThomas Cort if (quot > 0)
330*30eeed2cSThomas Cort errx(1, "too many files.");
331*30eeed2cSThomas Cort ++fnum;
332*30eeed2cSThomas Cort if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0)
333*30eeed2cSThomas Cort err(1, "%s", fname);
334*30eeed2cSThomas Cort }
335*30eeed2cSThomas Cort
336*30eeed2cSThomas Cort static size_t
bigwrite(int fd,const void * buf,size_t len)337*30eeed2cSThomas Cort bigwrite(int fd, const void *buf, size_t len)
338*30eeed2cSThomas Cort {
339*30eeed2cSThomas Cort const char *ptr = buf;
340*30eeed2cSThomas Cort size_t sofar = 0;
341*30eeed2cSThomas Cort ssize_t w;
342*30eeed2cSThomas Cort
343*30eeed2cSThomas Cort while (len != 0) {
344*30eeed2cSThomas Cort if ((w = write(fd, ptr, len)) == -1)
345*30eeed2cSThomas Cort return sofar;
346*30eeed2cSThomas Cort len -= w;
347*30eeed2cSThomas Cort ptr += w;
348*30eeed2cSThomas Cort sofar += w;
349*30eeed2cSThomas Cort }
350*30eeed2cSThomas Cort return sofar;
351*30eeed2cSThomas Cort }
352*30eeed2cSThomas Cort
353*30eeed2cSThomas Cort
354*30eeed2cSThomas Cort static void
usage(void)355*30eeed2cSThomas Cort usage(void)
356*30eeed2cSThomas Cort {
357*30eeed2cSThomas Cort (void)fprintf(stderr,
358*30eeed2cSThomas Cort "usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] "
359*30eeed2cSThomas Cort "[file [prefix]]\n", getprogname());
360*30eeed2cSThomas Cort exit(1);
361*30eeed2cSThomas Cort }
362