1 /* $NetBSD: split.c,v 1.13 2003/06/24 18:45:08 bjh21 Exp $ */ 2 3 /* 4 * Copyright (c) 1987, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\n\ 39 The Regents of the University of California. All rights reserved.\n"); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94"; 45 #endif 46 __RCSID("$NetBSD: split.c,v 1.13 2003/06/24 18:45:08 bjh21 Exp $"); 47 #endif /* not lint */ 48 49 #include <sys/param.h> 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <errno.h> 54 #include <fcntl.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 60 #define DEFLINE 1000 /* Default num lines per file. */ 61 62 static int file_open; /* If a file open. */ 63 static int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 64 static char *fname; /* File name prefix. */ 65 static int sfxlen = 2; /* suffix length. */ 66 67 int main(int, char **); 68 static void newfile(void); 69 static void split1(unsigned long long); 70 static void split2(unsigned long long); 71 static void usage(void) __attribute__((__noreturn__)); 72 static unsigned long long bigwrite __P((int, const void *, unsigned long long)); 73 74 int 75 main(int argc, char *argv[]) 76 { 77 int ch; 78 char *ep, *p; 79 unsigned long long bytecnt = 0; /* Byte count to split on. */ 80 unsigned long long numlines = 0;/* Line count to split on. */ 81 size_t namelen; 82 long name_max; 83 84 while ((ch = getopt(argc, argv, "-0123456789b:l:a:")) != -1) 85 switch (ch) { 86 case '0': case '1': case '2': case '3': case '4': 87 case '5': case '6': case '7': case '8': case '9': 88 /* 89 * Undocumented kludge: split was originally designed 90 * to take a number after a dash. 91 */ 92 if (numlines == 0) { 93 p = argv[optind - 1]; 94 if (p[0] == '-' && p[1] == ch && !p[2]) 95 numlines = strtol(++p, &ep, 10); 96 else 97 numlines = 98 strtol(argv[optind] + 1, &ep, 10); 99 if (numlines <= 0 || *ep) 100 errx(1, 101 "%s: illegal line count.", optarg); 102 } 103 break; 104 case '-': /* stdin flag. */ 105 if (ifd != -1) 106 usage(); 107 ifd = 0; 108 break; 109 case 'b': /* Byte count. */ 110 if ((bytecnt = strtoull(optarg, &ep, 10)) <= 0 || 111 (*ep != '\0' && *ep != 'k' && *ep != 'm')) 112 errx(1, "%s: illegal byte count.", optarg); 113 if (*ep == 'k') 114 bytecnt *= 1024; 115 else if (*ep == 'm') 116 bytecnt *= 1024 * 1024; 117 break; 118 case 'l': /* Line count. */ 119 if (numlines != 0) 120 usage(); 121 if ((numlines = strtoull(optarg, &ep, 10)) <= 0 || *ep) 122 errx(1, "%s: illegal line count.", optarg); 123 break; 124 case 'a': /* Suffix length. */ 125 if ((sfxlen = strtol(optarg, &ep, 10)) <= 0 || *ep) 126 errx(1, "%s: illegal suffix length.", optarg); 127 break; 128 default: 129 usage(); 130 } 131 argv += optind; 132 argc -= optind; 133 134 if (*argv != NULL) 135 if (ifd == -1) { /* Input file. */ 136 if (strcmp(*argv, "-") == 0) 137 ifd = STDIN_FILENO; 138 else if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 139 err(1, "%s", *argv); 140 ++argv; 141 } 142 143 errno = 0; 144 if ((name_max = pathconf(".", _PC_NAME_MAX)) == -1 && 145 errno != 0) 146 err(EXIT_FAILURE, "pathconf"); 147 if (*argv != NULL) { 148 namelen = strlen(*argv) + sfxlen; 149 if (name_max != -1 && namelen > name_max) 150 errx(EXIT_FAILURE, "Output file name too long"); 151 if ((fname = malloc(namelen + 1)) == NULL) 152 err(EXIT_FAILURE, NULL); 153 (void)strcpy(fname, *argv++); /* File name prefix. */ 154 } else { 155 if (name_max != -1 && 1 + sfxlen > name_max) 156 errx(EXIT_FAILURE, "Output file name too long"); 157 if ((fname = malloc(sfxlen + 2)) == NULL) 158 err(EXIT_FAILURE, NULL); 159 fname[0] = '\0'; 160 } 161 162 if (*argv != NULL) 163 usage(); 164 165 if (numlines == 0) 166 numlines = DEFLINE; 167 else if (bytecnt) 168 usage(); 169 170 if (ifd == -1) /* Stdin by default. */ 171 ifd = 0; 172 173 if (bytecnt) 174 split1(bytecnt); 175 else 176 split2(numlines); 177 178 return 0; 179 } 180 181 /* 182 * split1 -- 183 * Split the input by bytes. 184 */ 185 static void 186 split1(unsigned long long bytecnt) 187 { 188 unsigned long long bcnt, dist; 189 ssize_t len; 190 char *C; 191 char bfr[MAXBSIZE]; 192 193 for (bcnt = 0;;) 194 switch (len = read(ifd, bfr, MAXBSIZE)) { 195 case 0: 196 exit(0); 197 /* NOTREACHED */ 198 case -1: 199 err(1, "read"); 200 /* NOTREACHED */ 201 default: 202 if (!file_open) { 203 newfile(); 204 file_open = 1; 205 } 206 if (bcnt + len >= bytecnt) { 207 dist = bytecnt - bcnt; 208 if (bigwrite(ofd, bfr, dist) != dist) 209 err(1, "write"); 210 len -= dist; 211 for (C = bfr + dist; len >= bytecnt; 212 len -= bytecnt, C += bytecnt) { 213 newfile(); 214 if (bigwrite(ofd, 215 C, (int)bytecnt) != bytecnt) 216 err(1, "write"); 217 } 218 if (len) { 219 newfile(); 220 if (bigwrite(ofd, C, len) != len) 221 err(1, "write"); 222 } else 223 file_open = 0; 224 bcnt = len; 225 } else { 226 bcnt += len; 227 if (bigwrite(ofd, bfr, len) != len) 228 err(1, "write"); 229 } 230 } 231 } 232 233 /* 234 * split2 -- 235 * Split the input by lines. 236 */ 237 static void 238 split2(unsigned long long numlines) 239 { 240 unsigned long long lcnt, bcnt; 241 ssize_t len; 242 char *Ce, *Cs; 243 char bfr[MAXBSIZE]; 244 245 for (lcnt = 0;;) 246 switch (len = read(ifd, bfr, MAXBSIZE)) { 247 case 0: 248 exit(0); 249 /* NOTREACHED */ 250 case -1: 251 err(1, "read"); 252 /* NOTREACHED */ 253 default: 254 if (!file_open) { 255 newfile(); 256 file_open = 1; 257 } 258 for (Cs = Ce = bfr; len--; Ce++) 259 if (*Ce == '\n' && ++lcnt == numlines) { 260 bcnt = Ce - Cs + 1; 261 if (bigwrite(ofd, Cs, bcnt) != bcnt) 262 err(1, "write"); 263 lcnt = 0; 264 Cs = Ce + 1; 265 if (len) 266 newfile(); 267 else 268 file_open = 0; 269 } 270 if (Cs < Ce) { 271 bcnt = Ce - Cs; 272 if (bigwrite(ofd, Cs, bcnt) != bcnt) 273 err(1, "write"); 274 } 275 } 276 } 277 278 /* 279 * newfile -- 280 * Open a new output file. 281 */ 282 static void 283 newfile(void) 284 { 285 static int fnum; 286 static int defname; 287 static char *fpnt; 288 int quot, i; 289 290 if (ofd == -1) { 291 if (fname[0] == '\0') { 292 fname[0] = 'x'; 293 fpnt = fname + 1; 294 defname = 1; 295 } else { 296 fpnt = fname + strlen(fname); 297 defname = 0; 298 } 299 ofd = fileno(stdout); 300 } 301 /* 302 * Hack to increase max files; original code wandered through 303 * magic characters. Maximum files is 3 * 26 * 26 == 2028 304 */ 305 fpnt[sfxlen] = '\0'; 306 quot = fnum; 307 for (i = sfxlen - 1; i >= 0; i--) { 308 fpnt[i] = quot % 26 + 'a'; 309 quot = quot / 26; 310 } 311 if (quot > 0) { 312 if (!defname || fname[0] == 'z') 313 errx(1, "too many files."); 314 ++fname[0]; 315 fnum = 0; 316 } 317 ++fnum; 318 if (!freopen(fname, "w", stdout)) 319 err(1, "%s", fname); 320 } 321 322 static unsigned long long 323 bigwrite(int fd, const void *buf, unsigned long long len) 324 { 325 const char *ptr = buf; 326 unsigned long long sofar = 0; 327 328 while (len != 0) { 329 ssize_t w, nw = (len > INT_MAX) ? INT_MAX : (ssize_t)len; 330 if ((w = write(fd, ptr, nw)) == -1) 331 return sofar; 332 len -= w; 333 ptr += w; 334 sofar += w; 335 } 336 return sofar; 337 } 338 339 340 static void 341 usage(void) 342 { 343 (void)fprintf(stderr, 344 "Usage: %s [-b byte_count] [-l line_count] [-a suffix_length] " 345 "[file [prefix]]\n", getprogname()); 346 exit(1); 347 } 348