1 /* $NetBSD: split.c,v 1.9 2003/03/20 14:12:50 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1987, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\n\ 39 The Regents of the University of California. All rights reserved.\n"); 40 #endif /* not lint */ 41 42 #ifndef lint 43 #if 0 44 static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94"; 45 #endif 46 __RCSID("$NetBSD: split.c,v 1.9 2003/03/20 14:12:50 christos Exp $"); 47 #endif /* not lint */ 48 49 #include <sys/param.h> 50 51 #include <ctype.h> 52 #include <err.h> 53 #include <fcntl.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include <unistd.h> 58 59 #define DEFLINE 1000 /* Default num lines per file. */ 60 61 static int file_open; /* If a file open. */ 62 static int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 63 static char fname[MAXPATHLEN]; /* File name prefix. */ 64 65 int main(int, char **); 66 static void newfile(void); 67 static void split1(unsigned long long); 68 static void split2(unsigned long long); 69 static void usage(void) __attribute__((__noreturn__)); 70 static unsigned long long bigwrite __P((int, const void *, unsigned long long)); 71 72 int 73 main(int argc, char *argv[]) 74 { 75 int ch; 76 char *ep, *p; 77 unsigned long long bytecnt = 0; /* Byte count to split on. */ 78 unsigned long long numlines = 0;/* Line count to split on. */ 79 80 while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1) 81 switch (ch) { 82 case '0': case '1': case '2': case '3': case '4': 83 case '5': case '6': case '7': case '8': case '9': 84 /* 85 * Undocumented kludge: split was originally designed 86 * to take a number after a dash. 87 */ 88 if (numlines == 0) { 89 p = argv[optind - 1]; 90 if (p[0] == '-' && p[1] == ch && !p[2]) 91 numlines = strtol(++p, &ep, 10); 92 else 93 numlines = 94 strtol(argv[optind] + 1, &ep, 10); 95 if (numlines <= 0 || *ep) 96 errx(1, 97 "%s: illegal line count.", optarg); 98 } 99 break; 100 case '-': /* Undocumented: historic stdin flag. */ 101 if (ifd != -1) 102 usage(); 103 ifd = 0; 104 break; 105 case 'b': /* Byte count. */ 106 if ((bytecnt = strtoull(optarg, &ep, 10)) <= 0 || 107 (*ep != '\0' && *ep != 'k' && *ep != 'm')) 108 errx(1, "%s: illegal byte count.", optarg); 109 if (*ep == 'k') 110 bytecnt *= 1024; 111 else if (*ep == 'm') 112 bytecnt *= 1024 * 1024; 113 break; 114 case 'l': /* Line count. */ 115 if (numlines != 0) 116 usage(); 117 if ((numlines = strtoull(optarg, &ep, 10)) <= 0 || *ep) 118 errx(1, "%s: illegal line count.", optarg); 119 break; 120 default: 121 usage(); 122 } 123 argv += optind; 124 argc -= optind; 125 126 if (*argv != NULL) 127 if (ifd == -1) { /* Input file. */ 128 if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 129 err(1, "%s", *argv); 130 ++argv; 131 } 132 if (*argv != NULL) /* File name prefix. */ 133 (void)strcpy(fname, *argv++); 134 if (*argv != NULL) 135 usage(); 136 137 if (numlines == 0) 138 numlines = DEFLINE; 139 else if (bytecnt) 140 usage(); 141 142 if (ifd == -1) /* Stdin by default. */ 143 ifd = 0; 144 145 if (bytecnt) { 146 split1(bytecnt); 147 } else { 148 split2(numlines); 149 } 150 return 0; 151 } 152 153 /* 154 * split1 -- 155 * Split the input by bytes. 156 */ 157 static void 158 split1(unsigned long long bytecnt) 159 { 160 unsigned long long bcnt, dist; 161 ssize_t len; 162 char *C; 163 char bfr[MAXBSIZE]; 164 165 for (bcnt = 0;;) 166 switch (len = read(ifd, bfr, MAXBSIZE)) { 167 case 0: 168 exit(0); 169 case -1: 170 err(1, "read"); 171 /* NOTREACHED */ 172 default: 173 if (!file_open) { 174 newfile(); 175 file_open = 1; 176 } 177 if (bcnt + len >= bytecnt) { 178 dist = bytecnt - bcnt; 179 if (bigwrite(ofd, bfr, dist) != dist) 180 err(1, "write"); 181 len -= dist; 182 for (C = bfr + dist; len >= bytecnt; 183 len -= bytecnt, C += bytecnt) { 184 newfile(); 185 if (bigwrite(ofd, 186 C, (int)bytecnt) != bytecnt) 187 err(1, "write"); 188 } 189 if (len) { 190 newfile(); 191 if (bigwrite(ofd, C, len) != len) 192 err(1, "write"); 193 } else 194 file_open = 0; 195 bcnt = len; 196 } else { 197 bcnt += len; 198 if (bigwrite(ofd, bfr, len) != len) 199 err(1, "write"); 200 } 201 } 202 } 203 204 /* 205 * split2 -- 206 * Split the input by lines. 207 */ 208 static void 209 split2(unsigned long long numlines) 210 { 211 unsigned long long lcnt, bcnt; 212 ssize_t len; 213 char *Ce, *Cs; 214 char bfr[MAXBSIZE]; 215 216 for (lcnt = 0;;) 217 switch (len = read(ifd, bfr, MAXBSIZE)) { 218 case 0: 219 exit(0); 220 case -1: 221 err(1, "read"); 222 /* NOTREACHED */ 223 default: 224 if (!file_open) { 225 newfile(); 226 file_open = 1; 227 } 228 for (Cs = Ce = bfr; len--; Ce++) 229 if (*Ce == '\n' && ++lcnt == numlines) { 230 bcnt = Ce - Cs + 1; 231 if (bigwrite(ofd, Cs, bcnt) != bcnt) 232 err(1, "write"); 233 lcnt = 0; 234 Cs = Ce + 1; 235 if (len) 236 newfile(); 237 else 238 file_open = 0; 239 } 240 if (Cs < Ce) { 241 bcnt = Ce - Cs; 242 if (bigwrite(ofd, Cs, bcnt) != bcnt) 243 err(1, "write"); 244 } 245 } 246 } 247 248 /* 249 * newfile -- 250 * Open a new output file. 251 */ 252 static void 253 newfile(void) 254 { 255 static int fnum; 256 static int defname; 257 static char *fpnt; 258 259 if (ofd == -1) { 260 if (fname[0] == '\0') { 261 fname[0] = 'x'; 262 fpnt = fname + 1; 263 defname = 1; 264 } else { 265 fpnt = fname + strlen(fname); 266 defname = 0; 267 } 268 ofd = fileno(stdout); 269 } 270 /* 271 * Hack to increase max files; original code wandered through 272 * magic characters. Maximum files is 3 * 26 * 26 == 2028 273 */ 274 #define MAXFILES 676 275 if (fnum == MAXFILES) { 276 if (!defname || fname[0] == 'z') 277 errx(1, "too many files."); 278 ++fname[0]; 279 fnum = 0; 280 } 281 fpnt[0] = fnum / 26 + 'a'; 282 fpnt[1] = fnum % 26 + 'a'; 283 ++fnum; 284 if (!freopen(fname, "w", stdout)) 285 err(1, "%s", fname); 286 } 287 288 static unsigned long long 289 bigwrite(int fd, const void *buf, unsigned long long len) 290 { 291 const char *ptr = buf; 292 unsigned long long sofar = 0; 293 294 while (len != 0) { 295 ssize_t w, nw = (len > INT_MAX) ? INT_MAX : (ssize_t)len; 296 if ((w = write(fd, ptr, nw)) == -1) 297 return sofar; 298 len -= w; 299 ptr += w; 300 sofar += w; 301 } 302 return sofar; 303 } 304 305 306 static void 307 usage(void) 308 { 309 (void)fprintf(stderr, 310 "Usage: %s [-b byte_count] [-l line_count] [file [prefix]]\n", getprogname()); 311 exit(1); 312 } 313