1 /* $NetBSD: split.c,v 1.33 2024/02/09 22:08:38 andvar Exp $ */
2
3 /*
4 * Copyright (c) 1987, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\
35 The Regents of the University of California. All rights reserved.");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94";
41 #endif
42 __RCSID("$NetBSD: split.c,v 1.33 2024/02/09 22:08:38 andvar Exp $");
43 #endif /* not lint */
44
45 #include <sys/param.h>
46 #include <sys/stat.h>
47
48 #include <ctype.h>
49 #include <err.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <unistd.h>
56
57 #define DEFLINE 1000 /* Default num lines per file. */
58
59 static int file_open; /* If a file is open. */
60 static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */
61 static char *fname; /* File name prefix. */
62 static size_t sfxlen = 2; /* Suffix length. */
63 static int autosfx = 1; /* Whether to auto-extend the suffix length. */
64
65 static void newfile(void);
66 static void split1(off_t, int) __dead;
67 static void split2(off_t) __dead;
68 static void split3(off_t) __dead;
69 static void usage(void) __dead;
70 static size_t bigwrite(int, void const *, size_t);
71
72 int
main(int argc,char * argv[])73 main(int argc, char *argv[])
74 {
75 int ch;
76 char *ep, *p;
77 char const *base;
78 off_t bytecnt = 0; /* Byte count to split on. */
79 off_t numlines = 0; /* Line count to split on. */
80 off_t chunks = 0; /* Number of chunks to split into. */
81
82 while ((ch = getopt(argc, argv, "0123456789a:b:l:n:")) != -1)
83 switch (ch) {
84 case '0': case '1': case '2': case '3': case '4':
85 case '5': case '6': case '7': case '8': case '9':
86 /*
87 * Undocumented kludge: split was originally designed
88 * to take a number after a dash.
89 */
90 if (numlines == 0) {
91 p = argv[optind - 1];
92 if (p[0] == '-' && p[1] == ch && !p[2])
93 p++;
94 else
95 p = argv[optind] + 1;
96 numlines = strtoull(p, &ep, 10);
97 if (numlines == 0 || *ep != '\0')
98 errx(EXIT_FAILURE, "%s: illegal line count.", p);
99 }
100 break;
101 case 'a': /* Suffix length. */
102 if (!isdigit((unsigned char)optarg[0]) ||
103 (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
104 *ep != '\0')
105 errx(EXIT_FAILURE, "%s: illegal suffix length.", optarg);
106 autosfx = 0;
107 break;
108 case 'b': /* Byte count. */
109 if (!isdigit((unsigned char)optarg[0]) ||
110 (bytecnt = strtoull(optarg, &ep, 10)) == 0 ||
111 (*ep != '\0' && *ep != 'k' && *ep != 'm'))
112 errx(EXIT_FAILURE, "%s: illegal byte count.", optarg);
113 if (*ep == 'k')
114 bytecnt *= 1024;
115 else if (*ep == 'm')
116 bytecnt *= 1024 * 1024;
117 break;
118 case 'l': /* Line count. */
119 if (numlines != 0)
120 usage();
121 if (!isdigit((unsigned char)optarg[0]) ||
122 (numlines = strtoull(optarg, &ep, 10)) == 0 ||
123 *ep != '\0')
124 errx(EXIT_FAILURE, "%s: illegal line count.", optarg);
125 break;
126 case 'n': /* Chunks. */
127 if (!isdigit((unsigned char)optarg[0]) ||
128 (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
129 *ep != '\0')
130 errx(EXIT_FAILURE, "%s: illegal number of chunks.", optarg);
131 break;
132 default:
133 usage();
134 }
135 argv += optind;
136 argc -= optind;
137
138 if (*argv != NULL) {
139 if (strcmp(*argv, "-") != 0 &&
140 (ifd = open(*argv, O_RDONLY, 0)) < 0)
141 err(EXIT_FAILURE, "%s", *argv);
142 ++argv;
143 }
144
145
146 base = (*argv != NULL) ? *argv++ : "x";
147 if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL)
148 err(EXIT_FAILURE, NULL);
149 (void)strcpy(fname, base); /* File name prefix. */
150
151 if (*argv != NULL)
152 usage();
153
154 if (numlines == 0)
155 numlines = DEFLINE;
156 else if (bytecnt || chunks)
157 usage();
158
159 if (bytecnt && chunks)
160 usage();
161
162 if (bytecnt)
163 split1(bytecnt, 0);
164 else if (chunks)
165 split3(chunks);
166 else
167 split2(numlines);
168
169 return 0;
170 }
171
172 /*
173 * split1 --
174 * Split the input by bytes.
175 */
176 static void
split1(off_t bytecnt,int maxcnt)177 split1(off_t bytecnt, int maxcnt)
178 {
179 off_t bcnt;
180 ssize_t dist, len;
181 char *C;
182 char bfr[MAXBSIZE];
183 int nfiles;
184
185 nfiles = 0;
186
187 for (bcnt = 0;;)
188 switch (len = read(ifd, bfr, MAXBSIZE)) {
189 case 0:
190 exit(EXIT_SUCCESS);
191 /* NOTREACHED */
192 case -1:
193 err(EXIT_FAILURE, "read");
194 /* NOTREACHED */
195 default:
196 if (!file_open) {
197 if (!maxcnt || (nfiles < maxcnt)) {
198 newfile();
199 nfiles++;
200 file_open = 1;
201 }
202 }
203 if (bcnt + len >= bytecnt) {
204 /* LINTED: bytecnt - bcnt <= len */
205 dist = bytecnt - bcnt;
206 if (bigwrite(ofd, bfr, dist) != (size_t)dist)
207 err(EXIT_FAILURE, "write");
208 len -= dist;
209 for (C = bfr + dist; len >= bytecnt;
210 /* LINTED: bytecnt <= len */
211 len -= bytecnt, C += bytecnt) {
212 if (!maxcnt || (nfiles < maxcnt)) {
213 newfile();
214 nfiles++;
215 }
216 /* LINTED: as above */
217 if (bigwrite(ofd,
218 C, bytecnt) != (size_t)bytecnt)
219 err(EXIT_FAILURE, "write");
220 }
221 if (len) {
222 if (!maxcnt || (nfiles < maxcnt)) {
223 newfile();
224 nfiles++;
225 }
226 /* LINTED: len >= 0 */
227 if (bigwrite(ofd, C, len) != (size_t)len)
228 err(EXIT_FAILURE, "write");
229 } else
230 file_open = 0;
231 bcnt = len;
232 } else {
233 bcnt += len;
234 /* LINTED: len >= 0 */
235 if (bigwrite(ofd, bfr, len) != (size_t)len)
236 err(EXIT_FAILURE, "write");
237 }
238 }
239 }
240
241 /*
242 * split2 --
243 * Split the input by lines.
244 */
245 static void
split2(off_t numlines)246 split2(off_t numlines)
247 {
248 off_t lcnt;
249 size_t bcnt;
250 ssize_t len;
251 char *Ce, *Cs;
252 char bfr[MAXBSIZE];
253
254 for (lcnt = 0;;)
255 switch (len = read(ifd, bfr, MAXBSIZE)) {
256 case 0:
257 exit(EXIT_SUCCESS);
258 /* NOTREACHED */
259 case -1:
260 err(EXIT_FAILURE, "read");
261 /* NOTREACHED */
262 default:
263 if (!file_open) {
264 newfile();
265 file_open = 1;
266 }
267 for (Cs = Ce = bfr; len--; Ce++)
268 if (*Ce == '\n' && ++lcnt == numlines) {
269 bcnt = Ce - Cs + 1;
270 if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
271 err(EXIT_FAILURE, "write");
272 lcnt = 0;
273 Cs = Ce + 1;
274 if (len)
275 newfile();
276 else
277 file_open = 0;
278 }
279 if (Cs < Ce) {
280 bcnt = Ce - Cs;
281 if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt)
282 err(EXIT_FAILURE, "write");
283 }
284 }
285 }
286
287 /*
288 * split3 --
289 * Split the input into specified number of chunks
290 */
291 static void
split3(off_t chunks)292 split3(off_t chunks)
293 {
294 struct stat sb;
295
296 if (fstat(ifd, &sb) == -1) {
297 err(EXIT_FAILURE, "stat");
298 /* NOTREACHED */
299 }
300
301 if (chunks > sb.st_size) {
302 errx(EXIT_FAILURE, "can't split into more than %d files",
303 (int)sb.st_size);
304 /* NOTREACHED */
305 }
306
307 split1(sb.st_size/chunks, chunks);
308 }
309
310 /*
311 * newfile --
312 * Open a new output file.
313 */
314 static void
newfile(void)315 newfile(void)
316 {
317 static int fnum;
318 static char *fpnt;
319 int quot, i;
320
321 if (ofd == -1) {
322 fpnt = fname + strlen(fname);
323 fpnt[sfxlen] = '\0';
324 } else if (close(ofd) != 0)
325 err(EXIT_FAILURE, "%s", fname);
326
327 quot = fnum;
328
329 /* If '-a' is not specified, then we automatically expand the
330 * suffix length to accommodate splitting all input. We do this
331 * by moving the suffix pointer (fpnt) forward and incrementing
332 * sfxlen by one, thereby yielding an additional two characters
333 * and allowing all output files to sort such that 'cat *' yields
334 * the input in order. I.e., the order is '... xyy xyz xzaaa
335 * xzaab ... xzyzy, xzyzz, xzzaaaa, xzzaaab' and so on. */
336 if (autosfx && (fpnt[0] == 'y') && (strspn(fpnt+1, "z") == strlen(fpnt+1))) {
337 if ((fname = realloc(fname, strlen(fname) + sfxlen + 2 + 1)) == NULL)
338 err(EXIT_FAILURE, NULL);
339 /* NOTREACHED */
340
341 fpnt = fname + strlen(fname) - sfxlen;
342 fpnt[sfxlen + 2] = '\0';
343
344 fpnt[0] = 'z';
345 fpnt[1] = 'a';
346
347 /* Basename | Suffix
348 * before:
349 * x | yz
350 * after:
351 * xz | a.. */
352 fpnt++;
353 sfxlen++;
354
355 /* Reset so we start back at all 'a's in our extended suffix. */
356 quot = 0;
357 fnum = 0;
358 }
359
360 for (i = sfxlen - 1; i >= 0; i--) {
361 fpnt[i] = quot % 26 + 'a';
362 quot = quot / 26;
363 }
364 if (quot > 0)
365 errx(EXIT_FAILURE, "too many files.");
366 ++fnum;
367 if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0)
368 err(EXIT_FAILURE, "%s", fname);
369 }
370
371 static size_t
bigwrite(int fd,const void * buf,size_t len)372 bigwrite(int fd, const void *buf, size_t len)
373 {
374 const char *ptr = buf;
375 size_t sofar = 0;
376 ssize_t w;
377
378 while (len != 0) {
379 if ((w = write(fd, ptr, len)) == -1)
380 return sofar;
381 len -= w;
382 ptr += w;
383 sofar += w;
384 }
385 return sofar;
386 }
387
388
389 static void
usage(void)390 usage(void)
391 {
392 (void)fprintf(stderr,
393 "usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] "
394 "[file [prefix]]\n", getprogname());
395 exit(EXIT_FAILURE);
396 }
397