1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <regexp.h> 5 6 char digit[] = "0123456789"; 7 char *suffix = ""; 8 char *stem = "x"; 9 char suff[] = "aa"; 10 char name[200]; 11 Biobuf bout; 12 Biobuf *output = &bout; 13 14 extern int nextfile(void); 15 extern int matchfile(Resub*); 16 extern void openf(void); 17 extern char *fold(char*,int); 18 extern void usage(void); 19 extern void badexp(void); 20 21 void 22 main(int argc, char *argv[]) 23 { 24 Reprog *exp; 25 char *pattern = 0; 26 register n = 1000; 27 char *line; 28 int xflag = 0; 29 int iflag = 0; 30 Biobuf bin; 31 Biobuf *b = &bin; 32 char buf[256]; 33 34 ARGBEGIN{ 35 case 'n': 36 n=atoi(ARGF()); 37 break; 38 case 'e': 39 pattern = strdup(ARGF()); 40 break; 41 case 'f': 42 stem = strdup(ARGF()); 43 break; 44 case 's': 45 suffix = strdup(ARGF()); 46 break; 47 case 'x': 48 xflag++; 49 break; 50 case 'i': 51 iflag++; 52 break; 53 default: 54 goto Usage; 55 }ARGEND; 56 if(argc < 0 || argc > 1) 57 Usage: usage(); 58 59 if(argc == 0){ 60 Binit(b, 0, OREAD); 61 }else{ 62 b = Bopen(argv[0], OREAD); 63 if(b == 0) 64 fprint(2, "split: can't open %s: %r\n", argv[0]); 65 } 66 if(pattern) { 67 if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern))) 68 badexp(); 69 while((line=Brdline(b,'\n')) != 0) { 70 Resub match[2]; 71 line[BLINELEN(b)-1] = 0; 72 if(regexec(exp,iflag?fold(line,BLINELEN(b)-1):line,match,2)) { 73 if(matchfile(match) && xflag) 74 continue; 75 } else if(output == 0) 76 nextfile(); /* at most once */ 77 Bwrite(output, line, BLINELEN(b)-1); 78 Bputc(output, '\n'); 79 } 80 } else { 81 register linecnt = n; 82 while((line=Brdline(b,'\n')) != 0) { 83 if(++linecnt > n) { 84 nextfile(); 85 linecnt = 1; 86 } 87 Bwrite(output, line, BLINELEN(b)); 88 } 89 90 /* 91 * in case we didn't end with a newline, tack whatever's 92 * left onto the last file 93 */ 94 while((n = Bread(b, buf, sizeof(buf))) > 0) 95 Bwrite(output, buf, n); 96 } 97 if(b != 0) 98 Bterm(b); 99 exits(0); 100 } 101 102 int 103 nextfile(void) 104 { 105 static canopen = 1; 106 if(suff[0] > 'z') { 107 if(canopen) 108 fprint(2, "split: file %szz not split\n",stem); 109 canopen = 0; 110 } else { 111 strcpy(name, stem); 112 strcat(name, suff); 113 if(++suff[1] > 'z') 114 suff[1] = 'a', ++suff[0]; 115 openf(); 116 } 117 return canopen; 118 } 119 120 int 121 matchfile(Resub *match) 122 { 123 if(match[1].sp) { 124 int len = match[1].ep - match[1].sp; 125 strncpy(name, match[1].sp, len); 126 strcpy(name+len, suffix); 127 openf(); 128 return 1; 129 } 130 return nextfile(); 131 } 132 133 void 134 openf(void) 135 { 136 int fd; 137 Bflush(output); 138 Bterm(output); 139 if((fd=create(name,OWRITE,0666)) == -1) { 140 fprint(2, "grep: can't open %s: %r\n", name); 141 exits("open failed"); 142 } 143 Binit(output,fd,OWRITE); 144 } 145 146 char * 147 fold(char *s, int n) 148 { 149 static char *fline; 150 static int linesize = 0; 151 char *t; 152 if(linesize < n+1){ 153 fline = realloc(fline,n+1); 154 linesize = n+1; 155 } 156 for(t=fline; *t++=tolower(*s++); ) 157 continue; 158 /* we assume the 'A'-'Z' only appear as themselves 159 * in a utf encoding. 160 */ 161 return fline; 162 } 163 164 void 165 usage(void) 166 { 167 fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); 168 exits("split usage"); 169 } 170 171 void 172 badexp(void) 173 { 174 fprint(2, "split: bad regular expression\n"); 175 exits("bad regular expression"); 176 } 177