1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <regexp.h> 5 6 char digit[] = "0123456789"; 7 char *suffix = ""; 8 char *stem = "x"; 9 char suff[] = "aa"; 10 char name[200]; 11 Biobuf bout; 12 Biobuf *output = &bout; 13 14 extern int nextfile(void); 15 extern int matchfile(Resub*); 16 extern void openf(void); 17 extern char *fold(char*,int); 18 extern void usage(void); 19 extern void badexp(void); 20 21 void 22 main(int argc, char *argv[]) 23 { 24 Reprog *exp; 25 char *pattern = 0; 26 int n = 1000; 27 char *line; 28 int xflag = 0; 29 int iflag = 0; 30 Biobuf bin; 31 Biobuf *b = &bin; 32 char buf[256]; 33 34 ARGBEGIN { 35 case 'n': 36 n=atoi(EARGF(usage())); 37 break; 38 case 'e': 39 pattern = strdup(EARGF(usage())); 40 break; 41 case 'f': 42 stem = strdup(EARGF(usage())); 43 break; 44 case 's': 45 suffix = strdup(EARGF(usage())); 46 break; 47 case 'x': 48 xflag++; 49 break; 50 case 'i': 51 iflag++; 52 break; 53 default: 54 usage(); 55 break; 56 57 } ARGEND; 58 59 if(argc < 0 || argc > 1) 60 usage(); 61 62 if(argc != 0) { 63 b = Bopen(argv[0], OREAD); 64 if(b == nil) { 65 fprint(2, "split: can't open %s: %r\n", argv[0]); 66 exits("open"); 67 } 68 } else 69 Binit(b, 0, OREAD); 70 71 if(pattern) { 72 if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern))) 73 badexp(); 74 while((line=Brdline(b,'\n')) != 0) { 75 Resub match[2]; 76 memset(match, 0, sizeof match); 77 line[Blinelen(b)-1] = 0; 78 if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) { 79 if(matchfile(match) && xflag) 80 continue; 81 } else if(output == 0) 82 nextfile(); /* at most once */ 83 Bwrite(output, line, Blinelen(b)-1); 84 Bputc(output, '\n'); 85 } 86 } else { 87 int linecnt = n; 88 89 while((line=Brdline(b,'\n')) != 0) { 90 if(++linecnt > n) { 91 nextfile(); 92 linecnt = 1; 93 } 94 Bwrite(output, line, Blinelen(b)); 95 } 96 97 /* 98 * in case we didn't end with a newline, tack whatever's 99 * left onto the last file 100 */ 101 while((n = Bread(b, buf, sizeof(buf))) > 0) 102 Bwrite(output, buf, n); 103 } 104 if(b != nil) 105 Bterm(b); 106 exits(0); 107 } 108 109 int 110 nextfile(void) 111 { 112 static canopen = 1; 113 if(suff[0] > 'z') { 114 if(canopen) 115 fprint(2, "split: file %szz not split\n",stem); 116 canopen = 0; 117 } else { 118 strcpy(name, stem); 119 strcat(name, suff); 120 if(++suff[1] > 'z') 121 suff[1] = 'a', ++suff[0]; 122 openf(); 123 } 124 return canopen; 125 } 126 127 int 128 matchfile(Resub *match) 129 { 130 if(match[1].sp) { 131 int len = match[1].ep - match[1].sp; 132 strncpy(name, match[1].sp, len); 133 strcpy(name+len, suffix); 134 openf(); 135 return 1; 136 } 137 return nextfile(); 138 } 139 140 void 141 openf(void) 142 { 143 static int fd = 0; 144 Bflush(output); 145 Bterm(output); 146 if(fd > 0) 147 close(fd); 148 fd = create(name,OWRITE,0666); 149 if(fd < 0) { 150 fprint(2, "grep: can't create %s: %r\n", name); 151 exits("create"); 152 } 153 Binit(output, fd, OWRITE); 154 } 155 156 char * 157 fold(char *s, int n) 158 { 159 static char *fline; 160 static int linesize = 0; 161 char *t; 162 163 if(linesize < n+1){ 164 fline = realloc(fline,n+1); 165 linesize = n+1; 166 } 167 for(t=fline; *t++ = tolower(*s++); ) 168 continue; 169 /* we assume the 'A'-'Z' only appear as themselves 170 * in a utf encoding. 171 */ 172 return fline; 173 } 174 175 void 176 usage(void) 177 { 178 fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); 179 exits("usage"); 180 } 181 182 void 183 badexp(void) 184 { 185 fprint(2, "split: bad regular expression\n"); 186 exits("bad regular expression"); 187 } 188