1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <regexp.h>
5
6 char digit[] = "0123456789";
7 char *suffix = "";
8 char *stem = "x";
9 char suff[] = "aa";
10 char name[200];
11 Biobuf bout;
12 Biobuf *output = &bout;
13
14 extern int nextfile(void);
15 extern int matchfile(Resub*);
16 extern void openf(void);
17 extern char *fold(char*,int);
18 extern void usage(void);
19 extern void badexp(void);
20
21 void
main(int argc,char * argv[])22 main(int argc, char *argv[])
23 {
24 Reprog *exp;
25 char *pattern = 0;
26 int n = 1000;
27 char *line;
28 int xflag = 0;
29 int iflag = 0;
30 Biobuf bin;
31 Biobuf *b = &bin;
32 char buf[256];
33
34 ARGBEGIN {
35 case 'l':
36 case 'n':
37 n=atoi(EARGF(usage()));
38 break;
39 case 'e':
40 pattern = strdup(EARGF(usage()));
41 break;
42 case 'f':
43 stem = strdup(EARGF(usage()));
44 break;
45 case 's':
46 suffix = strdup(EARGF(usage()));
47 break;
48 case 'x':
49 xflag++;
50 break;
51 case 'i':
52 iflag++;
53 break;
54 default:
55 usage();
56 break;
57
58 } ARGEND;
59
60 if(argc < 0 || argc > 1)
61 usage();
62
63 if(argc != 0) {
64 b = Bopen(argv[0], OREAD);
65 if(b == nil) {
66 fprint(2, "split: can't open %s: %r\n", argv[0]);
67 exits("open");
68 }
69 } else
70 Binit(b, 0, OREAD);
71
72 if(pattern) {
73 Resub match[2];
74
75 if(!(exp = regcomp(iflag? fold(pattern, strlen(pattern)):
76 pattern)))
77 badexp();
78 memset(match, 0, sizeof match);
79 matchfile(match);
80 while((line=Brdline(b,'\n')) != 0) {
81 memset(match, 0, sizeof match);
82 line[Blinelen(b)-1] = 0;
83 if(regexec(exp, iflag? fold(line, Blinelen(b)-1): line,
84 match, 2)) {
85 if(matchfile(match) && xflag)
86 continue;
87 } else if(output == 0)
88 nextfile(); /* at most once */
89 Bwrite(output, line, Blinelen(b)-1);
90 Bputc(output, '\n');
91 }
92 } else {
93 int linecnt = n;
94
95 while((line=Brdline(b,'\n')) != 0) {
96 if(++linecnt > n) {
97 nextfile();
98 linecnt = 1;
99 }
100 Bwrite(output, line, Blinelen(b));
101 }
102
103 /*
104 * in case we didn't end with a newline, tack whatever's
105 * left onto the last file
106 */
107 while((n = Bread(b, buf, sizeof(buf))) > 0)
108 Bwrite(output, buf, n);
109 }
110 if(b != nil)
111 Bterm(b);
112 exits(0);
113 }
114
115 int
nextfile(void)116 nextfile(void)
117 {
118 static int canopen = 1;
119
120 if(suff[0] > 'z') {
121 if(canopen)
122 fprint(2, "split: file %szz not split\n",stem);
123 canopen = 0;
124 } else {
125 snprint(name, sizeof name, "%s%s", stem, suff);
126 if(++suff[1] > 'z')
127 suff[1] = 'a', ++suff[0];
128 openf();
129 }
130 return canopen;
131 }
132
133 int
matchfile(Resub * match)134 matchfile(Resub *match)
135 {
136 if(match[1].sp) {
137 int len = match[1].ep - match[1].sp;
138
139 strncpy(name, match[1].sp, len);
140 strcpy(name+len, suffix);
141 openf();
142 return 1;
143 }
144 return nextfile();
145 }
146
147 void
openf(void)148 openf(void)
149 {
150 static int fd = 0;
151
152 Bflush(output);
153 Bterm(output);
154 if(fd > 0)
155 close(fd);
156 fd = create(name,OWRITE,0666);
157 if(fd < 0) {
158 fprint(2, "grep: can't create %s: %r\n", name);
159 exits("create");
160 }
161 Binit(output, fd, OWRITE);
162 }
163
164 char *
fold(char * s,int n)165 fold(char *s, int n)
166 {
167 static char *fline;
168 static int linesize = 0;
169 char *t;
170
171 if(linesize < n+1){
172 fline = realloc(fline,n+1);
173 linesize = n+1;
174 }
175 for(t=fline; *t++ = tolower(*s++); )
176 continue;
177 /* we assume the 'A'-'Z' only appear as themselves
178 * in a utf encoding.
179 */
180 return fline;
181 }
182
183 void
usage(void)184 usage(void)
185 {
186 fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
187 exits("usage");
188 }
189
190 void
badexp(void)191 badexp(void)
192 {
193 fprint(2, "split: bad regular expression\n");
194 exits("bad regular expression");
195 }
196