xref: /plan9/sys/src/cmd/split.c (revision 25fc69938fdecc61cd09e795cbe2d2f72f1082b1)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <regexp.h>
5 
6 char	digit[] = "0123456789";
7 char	*suffix = "";
8 char	*stem = "x";
9 char	suff[] = "aa";
10 char	name[200];
11 Biobuf	bout;
12 Biobuf	*output = &bout;
13 
14 extern int nextfile(void);
15 extern int matchfile(Resub*);
16 extern void openf(void);
17 extern char *fold(char*,int);
18 extern void usage(void);
19 extern void badexp(void);
20 
21 void
main(int argc,char * argv[])22 main(int argc, char *argv[])
23 {
24 	Reprog *exp;
25 	char *pattern = 0;
26 	int n = 1000;
27 	char *line;
28 	int xflag = 0;
29 	int iflag = 0;
30 	Biobuf bin;
31 	Biobuf *b = &bin;
32 	char buf[256];
33 
34 	ARGBEGIN {
35 	case 'l':
36 	case 'n':
37 		n=atoi(EARGF(usage()));
38 		break;
39 	case 'e':
40 		pattern = strdup(EARGF(usage()));
41 		break;
42 	case 'f':
43 		stem = strdup(EARGF(usage()));
44 		break;
45 	case 's':
46 		suffix = strdup(EARGF(usage()));
47 		break;
48 	case 'x':
49 		xflag++;
50 		break;
51 	case 'i':
52 		iflag++;
53 		break;
54 	default:
55 		usage();
56 		break;
57 
58 	} ARGEND;
59 
60 	if(argc < 0 || argc > 1)
61 		usage();
62 
63 	if(argc != 0) {
64 		b = Bopen(argv[0], OREAD);
65 		if(b == nil) {
66 			fprint(2, "split: can't open %s: %r\n", argv[0]);
67 			exits("open");
68 		}
69 	} else
70 		Binit(b, 0, OREAD);
71 
72 	if(pattern) {
73 		Resub match[2];
74 
75 		if(!(exp = regcomp(iflag? fold(pattern, strlen(pattern)):
76 		    pattern)))
77 			badexp();
78 		memset(match, 0, sizeof match);
79 		matchfile(match);
80 		while((line=Brdline(b,'\n')) != 0) {
81 			memset(match, 0, sizeof match);
82 			line[Blinelen(b)-1] = 0;
83 			if(regexec(exp, iflag? fold(line, Blinelen(b)-1): line,
84 			    match, 2)) {
85 				if(matchfile(match) && xflag)
86 					continue;
87 			} else if(output == 0)
88 				nextfile();	/* at most once */
89 			Bwrite(output, line, Blinelen(b)-1);
90 			Bputc(output, '\n');
91 		}
92 	} else {
93 		int linecnt = n;
94 
95 		while((line=Brdline(b,'\n')) != 0) {
96 			if(++linecnt > n) {
97 				nextfile();
98 				linecnt = 1;
99 			}
100 			Bwrite(output, line, Blinelen(b));
101 		}
102 
103 		/*
104 		 * in case we didn't end with a newline, tack whatever's
105 		 * left onto the last file
106 		 */
107 		while((n = Bread(b, buf, sizeof(buf))) > 0)
108 			Bwrite(output, buf, n);
109 	}
110 	if(b != nil)
111 		Bterm(b);
112 	exits(0);
113 }
114 
115 int
nextfile(void)116 nextfile(void)
117 {
118 	static int canopen = 1;
119 
120 	if(suff[0] > 'z') {
121 		if(canopen)
122 			fprint(2, "split: file %szz not split\n",stem);
123 		canopen = 0;
124 	} else {
125 		snprint(name, sizeof name, "%s%s", stem, suff);
126 		if(++suff[1] > 'z')
127 			suff[1] = 'a', ++suff[0];
128 		openf();
129 	}
130 	return canopen;
131 }
132 
133 int
matchfile(Resub * match)134 matchfile(Resub *match)
135 {
136 	if(match[1].sp) {
137 		int len = match[1].ep - match[1].sp;
138 
139 		strncpy(name, match[1].sp, len);
140 		strcpy(name+len, suffix);
141 		openf();
142 		return 1;
143 	}
144 	return nextfile();
145 }
146 
147 void
openf(void)148 openf(void)
149 {
150 	static int fd = 0;
151 
152 	Bflush(output);
153 	Bterm(output);
154 	if(fd > 0)
155 		close(fd);
156 	fd = create(name,OWRITE,0666);
157 	if(fd < 0) {
158 		fprint(2, "grep: can't create %s: %r\n", name);
159 		exits("create");
160 	}
161 	Binit(output, fd, OWRITE);
162 }
163 
164 char *
fold(char * s,int n)165 fold(char *s, int n)
166 {
167 	static char *fline;
168 	static int linesize = 0;
169 	char *t;
170 
171 	if(linesize < n+1){
172 		fline = realloc(fline,n+1);
173 		linesize = n+1;
174 	}
175 	for(t=fline; *t++ = tolower(*s++); )
176 		continue;
177 		/* we assume the 'A'-'Z' only appear as themselves
178 		 * in a utf encoding.
179 		 */
180 	return fline;
181 }
182 
183 void
usage(void)184 usage(void)
185 {
186 	fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
187 	exits("usage");
188 }
189 
190 void
badexp(void)191 badexp(void)
192 {
193 	fprint(2, "split: bad regular expression\n");
194 	exits("bad regular expression");
195 }
196