1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <regexp.h> 5 6 char digit[] = "0123456789"; 7 char *suffix = ""; 8 char *stem = "x"; 9 char suff[] = "aa"; 10 char name[200]; 11 Biobuf bout; 12 Biobuf *output = &bout; 13 14 extern int nextfile(void); 15 extern int matchfile(Resub*); 16 extern void openf(void); 17 extern char *fold(char*,int); 18 extern void usage(void); 19 extern void badexp(void); 20 21 void 22 main(int argc, char *argv[]) 23 { 24 Reprog *exp; 25 char *pattern = 0; 26 int n = 1000; 27 char *line; 28 int xflag = 0; 29 int iflag = 0; 30 Biobuf bin; 31 Biobuf *b = &bin; 32 char buf[256]; 33 34 ARGBEGIN { 35 case 'l': 36 case 'n': 37 n=atoi(EARGF(usage())); 38 break; 39 case 'e': 40 pattern = strdup(EARGF(usage())); 41 break; 42 case 'f': 43 stem = strdup(EARGF(usage())); 44 break; 45 case 's': 46 suffix = strdup(EARGF(usage())); 47 break; 48 case 'x': 49 xflag++; 50 break; 51 case 'i': 52 iflag++; 53 break; 54 default: 55 usage(); 56 break; 57 58 } ARGEND; 59 60 if(argc < 0 || argc > 1) 61 usage(); 62 63 if(argc != 0) { 64 b = Bopen(argv[0], OREAD); 65 if(b == nil) { 66 fprint(2, "split: can't open %s: %r\n", argv[0]); 67 exits("open"); 68 } 69 } else 70 Binit(b, 0, OREAD); 71 72 if(pattern) { 73 Resub match[2]; 74 75 if(!(exp = regcomp(iflag? fold(pattern, strlen(pattern)): 76 pattern))) 77 badexp(); 78 memset(match, 0, sizeof match); 79 matchfile(match); 80 while((line=Brdline(b,'\n')) != 0) { 81 memset(match, 0, sizeof match); 82 line[Blinelen(b)-1] = 0; 83 if(regexec(exp, iflag? fold(line, Blinelen(b)-1): line, 84 match, 2)) { 85 if(matchfile(match) && xflag) 86 continue; 87 } else if(output == 0) 88 nextfile(); /* at most once */ 89 Bwrite(output, line, Blinelen(b)-1); 90 Bputc(output, '\n'); 91 } 92 } else { 93 int linecnt = n; 94 95 while((line=Brdline(b,'\n')) != 0) { 96 if(++linecnt > n) { 97 nextfile(); 98 linecnt = 1; 99 } 100 Bwrite(output, line, Blinelen(b)); 101 } 102 103 /* 104 * in case we didn't end with a newline, tack whatever's 105 * left onto the last file 106 */ 107 while((n = Bread(b, buf, sizeof(buf))) > 0) 108 Bwrite(output, buf, n); 109 } 110 if(b != nil) 111 Bterm(b); 112 exits(0); 113 } 114 115 int 116 nextfile(void) 117 { 118 static int canopen = 1; 119 120 if(suff[0] > 'z') { 121 if(canopen) 122 fprint(2, "split: file %szz not split\n",stem); 123 canopen = 0; 124 } else { 125 snprint(name, sizeof name, "%s%s", stem, suff); 126 if(++suff[1] > 'z') 127 suff[1] = 'a', ++suff[0]; 128 openf(); 129 } 130 return canopen; 131 } 132 133 int 134 matchfile(Resub *match) 135 { 136 if(match[1].sp) { 137 int len = match[1].ep - match[1].sp; 138 139 strncpy(name, match[1].sp, len); 140 strcpy(name+len, suffix); 141 openf(); 142 return 1; 143 } 144 return nextfile(); 145 } 146 147 void 148 openf(void) 149 { 150 static int fd = 0; 151 152 Bflush(output); 153 Bterm(output); 154 if(fd > 0) 155 close(fd); 156 fd = create(name,OWRITE,0666); 157 if(fd < 0) { 158 fprint(2, "grep: can't create %s: %r\n", name); 159 exits("create"); 160 } 161 Binit(output, fd, OWRITE); 162 } 163 164 char * 165 fold(char *s, int n) 166 { 167 static char *fline; 168 static int linesize = 0; 169 char *t; 170 171 if(linesize < n+1){ 172 fline = realloc(fline,n+1); 173 linesize = n+1; 174 } 175 for(t=fline; *t++ = tolower(*s++); ) 176 continue; 177 /* we assume the 'A'-'Z' only appear as themselves 178 * in a utf encoding. 179 */ 180 return fline; 181 } 182 183 void 184 usage(void) 185 { 186 fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); 187 exits("usage"); 188 } 189 190 void 191 badexp(void) 192 { 193 fprint(2, "split: bad regular expression\n"); 194 exits("bad regular expression"); 195 } 196