1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <regexp.h> 5 6 char digit[] = "0123456789"; 7 char *suffix = ""; 8 char *stem = "x"; 9 char suff[] = "aa"; 10 char name[200]; 11 Biobuf bout; 12 Biobuf *output = &bout; 13 14 extern int nextfile(void); 15 extern int matchfile(Resub*); 16 extern void openf(void); 17 extern char *fold(char*,int); 18 extern void usage(void); 19 extern void badexp(void); 20 21 void 22 main(int argc, char *argv[]) 23 { 24 Reprog *exp; 25 char *pattern = 0; 26 int n = 1000; 27 char *line; 28 int xflag = 0; 29 int iflag = 0; 30 Biobuf bin; 31 Biobuf *b = &bin; 32 char buf[256]; 33 34 ARGBEGIN { 35 case 'n': 36 n=atoi(ARGF()); 37 break; 38 case 'e': 39 pattern = strdup(ARGF()); 40 break; 41 case 'f': 42 stem = strdup(ARGF()); 43 break; 44 case 's': 45 suffix = strdup(ARGF()); 46 break; 47 case 'x': 48 xflag++; 49 break; 50 case 'i': 51 iflag++; 52 break; 53 default: 54 usage(); 55 break; 56 57 } ARGEND; 58 59 if(argc < 0 || argc > 1) 60 usage(); 61 62 if(argc != 0) { 63 b = Bopen(argv[0], OREAD); 64 if(b == nil) { 65 fprint(2, "split: can't open %s: %r\n", argv[0]); 66 exits("open"); 67 } 68 } else 69 Binit(b, 0, OREAD); 70 71 if(pattern) { 72 if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern))) 73 badexp(); 74 while((line=Brdline(b,'\n')) != 0) { 75 Resub match[2]; 76 line[Blinelen(b)-1] = 0; 77 if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) { 78 if(matchfile(match) && xflag) 79 continue; 80 } else if(output == 0) 81 nextfile(); /* at most once */ 82 Bwrite(output, line, Blinelen(b)-1); 83 Bputc(output, '\n'); 84 } 85 } else { 86 int linecnt = n; 87 88 while((line=Brdline(b,'\n')) != 0) { 89 if(++linecnt > n) { 90 nextfile(); 91 linecnt = 1; 92 } 93 Bwrite(output, line, Blinelen(b)); 94 } 95 96 /* 97 * in case we didn't end with a newline, tack whatever's 98 * left onto the last file 99 */ 100 while((n = Bread(b, buf, sizeof(buf))) > 0) 101 Bwrite(output, buf, n); 102 } 103 if(b != nil) 104 Bterm(b); 105 exits(0); 106 } 107 108 int 109 nextfile(void) 110 { 111 static canopen = 1; 112 if(suff[0] > 'z') { 113 if(canopen) 114 fprint(2, "split: file %szz not split\n",stem); 115 canopen = 0; 116 } else { 117 strcpy(name, stem); 118 strcat(name, suff); 119 if(++suff[1] > 'z') 120 suff[1] = 'a', ++suff[0]; 121 openf(); 122 } 123 return canopen; 124 } 125 126 int 127 matchfile(Resub *match) 128 { 129 if(match[1].sp) { 130 int len = match[1].ep - match[1].sp; 131 strncpy(name, match[1].sp, len); 132 strcpy(name+len, suffix); 133 openf(); 134 return 1; 135 } 136 return nextfile(); 137 } 138 139 void 140 openf(void) 141 { 142 int fd; 143 Bflush(output); 144 Bterm(output); 145 fd = create(name,OWRITE,0666); 146 if(fd < 0) { 147 fprint(2, "grep: can't create %s: %r\n", name); 148 exits("create"); 149 } 150 Binit(output, fd, OWRITE); 151 } 152 153 char * 154 fold(char *s, int n) 155 { 156 static char *fline; 157 static int linesize = 0; 158 char *t; 159 160 if(linesize < n+1){ 161 fline = realloc(fline,n+1); 162 linesize = n+1; 163 } 164 for(t=fline; *t++ = tolower(*s++); ) 165 continue; 166 /* we assume the 'A'-'Z' only appear as themselves 167 * in a utf encoding. 168 */ 169 return fline; 170 } 171 172 void 173 usage(void) 174 { 175 fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); 176 exits("usage"); 177 } 178 179 void 180 badexp(void) 181 { 182 fprint(2, "split: bad regular expression\n"); 183 exits("bad regular expression"); 184 } 185