1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <regexp.h> 5 6 char digit[] = "0123456789"; 7 char *suffix = ""; 8 char *stem = "x"; 9 char suff[] = "aa"; 10 char name[200]; 11 Biobuf bout; 12 Biobuf *output = &bout; 13 14 extern int nextfile(void); 15 extern int matchfile(Resub*); 16 extern void openf(void); 17 extern char *fold(char*,int); 18 extern void usage(void); 19 extern void badexp(void); 20 21 void 22 main(int argc, char *argv[]) 23 { 24 Reprog *exp; 25 char *pattern = 0; 26 int n = 1000; 27 char *line; 28 int xflag = 0; 29 int iflag = 0; 30 Biobuf bin; 31 Biobuf *b = &bin; 32 char buf[256]; 33 34 ARGBEGIN { 35 case 'l': 36 case 'n': 37 n=atoi(EARGF(usage())); 38 break; 39 case 'e': 40 pattern = strdup(EARGF(usage())); 41 break; 42 case 'f': 43 stem = strdup(EARGF(usage())); 44 break; 45 case 's': 46 suffix = strdup(EARGF(usage())); 47 break; 48 case 'x': 49 xflag++; 50 break; 51 case 'i': 52 iflag++; 53 break; 54 default: 55 usage(); 56 break; 57 58 } ARGEND; 59 60 if(argc < 0 || argc > 1) 61 usage(); 62 63 if(argc != 0) { 64 b = Bopen(argv[0], OREAD); 65 if(b == nil) { 66 fprint(2, "split: can't open %s: %r\n", argv[0]); 67 exits("open"); 68 } 69 } else 70 Binit(b, 0, OREAD); 71 72 if(pattern) { 73 if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern))) 74 badexp(); 75 while((line=Brdline(b,'\n')) != 0) { 76 Resub match[2]; 77 memset(match, 0, sizeof match); 78 line[Blinelen(b)-1] = 0; 79 if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) { 80 if(matchfile(match) && xflag) 81 continue; 82 } else if(output == 0) 83 nextfile(); /* at most once */ 84 Bwrite(output, line, Blinelen(b)-1); 85 Bputc(output, '\n'); 86 } 87 } else { 88 int linecnt = n; 89 90 while((line=Brdline(b,'\n')) != 0) { 91 if(++linecnt > n) { 92 nextfile(); 93 linecnt = 1; 94 } 95 Bwrite(output, line, Blinelen(b)); 96 } 97 98 /* 99 * in case we didn't end with a newline, tack whatever's 100 * left onto the last file 101 */ 102 while((n = Bread(b, buf, sizeof(buf))) > 0) 103 Bwrite(output, buf, n); 104 } 105 if(b != nil) 106 Bterm(b); 107 exits(0); 108 } 109 110 int 111 nextfile(void) 112 { 113 static canopen = 1; 114 if(suff[0] > 'z') { 115 if(canopen) 116 fprint(2, "split: file %szz not split\n",stem); 117 canopen = 0; 118 } else { 119 strcpy(name, stem); 120 strcat(name, suff); 121 if(++suff[1] > 'z') 122 suff[1] = 'a', ++suff[0]; 123 openf(); 124 } 125 return canopen; 126 } 127 128 int 129 matchfile(Resub *match) 130 { 131 if(match[1].sp) { 132 int len = match[1].ep - match[1].sp; 133 strncpy(name, match[1].sp, len); 134 strcpy(name+len, suffix); 135 openf(); 136 return 1; 137 } 138 return nextfile(); 139 } 140 141 void 142 openf(void) 143 { 144 static int fd = 0; 145 Bflush(output); 146 Bterm(output); 147 if(fd > 0) 148 close(fd); 149 fd = create(name,OWRITE,0666); 150 if(fd < 0) { 151 fprint(2, "grep: can't create %s: %r\n", name); 152 exits("create"); 153 } 154 Binit(output, fd, OWRITE); 155 } 156 157 char * 158 fold(char *s, int n) 159 { 160 static char *fline; 161 static int linesize = 0; 162 char *t; 163 164 if(linesize < n+1){ 165 fline = realloc(fline,n+1); 166 linesize = n+1; 167 } 168 for(t=fline; *t++ = tolower(*s++); ) 169 continue; 170 /* we assume the 'A'-'Z' only appear as themselves 171 * in a utf encoding. 172 */ 173 return fline; 174 } 175 176 void 177 usage(void) 178 { 179 fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); 180 exits("usage"); 181 } 182 183 void 184 badexp(void) 185 { 186 fprint(2, "split: bad regular expression\n"); 187 exits("bad regular expression"); 188 } 189