xref: /plan9-contrib/sys/src/cmd/split.c (revision 9a747e4fd48b9f4522c70c07e8f882a15030f964)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <regexp.h>
5 
6 char	digit[] = "0123456789";
7 char	*suffix = "";
8 char	*stem = "x";
9 char	suff[] = "aa";
10 char	name[200];
11 Biobuf	bout;
12 Biobuf	*output = &bout;
13 
14 extern int nextfile(void);
15 extern int matchfile(Resub*);
16 extern void openf(void);
17 extern char *fold(char*,int);
18 extern void usage(void);
19 extern void badexp(void);
20 
21 void
22 main(int argc, char *argv[])
23 {
24 	Reprog *exp;
25 	char *pattern = 0;
26 	int n = 1000;
27 	char *line;
28 	int xflag = 0;
29 	int iflag = 0;
30 	Biobuf bin;
31 	Biobuf *b = &bin;
32 	char buf[256];
33 
34 	ARGBEGIN {
35 	case 'n':
36 		n=atoi(EARGF(usage()));
37 		break;
38 	case 'e':
39 		pattern = strdup(EARGF(usage()));
40 		break;
41 	case 'f':
42 		stem = strdup(EARGF(usage()));
43 		break;
44 	case 's':
45 		suffix = strdup(EARGF(usage()));
46 		break;
47 	case 'x':
48 		xflag++;
49 		break;
50 	case 'i':
51 		iflag++;
52 		break;
53 	default:
54 		usage();
55 		break;
56 
57 	} ARGEND;
58 
59 	if(argc < 0 || argc > 1)
60 		usage();
61 
62 	if(argc != 0) {
63 		b = Bopen(argv[0], OREAD);
64 		if(b == nil) {
65 			fprint(2, "split: can't open %s: %r\n", argv[0]);
66 			exits("open");
67 		}
68 	} else
69 		Binit(b, 0, OREAD);
70 
71 	if(pattern) {
72 		if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
73 			badexp();
74 		while((line=Brdline(b,'\n')) != 0) {
75 			Resub match[2];
76 			memset(match, 0, sizeof match);
77 			line[Blinelen(b)-1] = 0;
78 			if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
79 				if(matchfile(match) && xflag)
80 					continue;
81 			} else if(output == 0)
82 				nextfile();	/* at most once */
83 			Bwrite(output, line, Blinelen(b)-1);
84 			Bputc(output, '\n');
85 		}
86 	} else {
87 		int linecnt = n;
88 
89 		while((line=Brdline(b,'\n')) != 0) {
90 			if(++linecnt > n) {
91 				nextfile();
92 				linecnt = 1;
93 			}
94 			Bwrite(output, line, Blinelen(b));
95 		}
96 
97 		/*
98 		 * in case we didn't end with a newline, tack whatever's
99 		 * left onto the last file
100 		 */
101 		while((n = Bread(b, buf, sizeof(buf))) > 0)
102 			Bwrite(output, buf, n);
103 	}
104 	if(b != nil)
105 		Bterm(b);
106 	exits(0);
107 }
108 
109 int
110 nextfile(void)
111 {
112 	static canopen = 1;
113 	if(suff[0] > 'z') {
114 		if(canopen)
115 			fprint(2, "split: file %szz not split\n",stem);
116 		canopen = 0;
117 	} else {
118 		strcpy(name, stem);
119 		strcat(name, suff);
120 		if(++suff[1] > 'z')
121 			suff[1] = 'a', ++suff[0];
122 		openf();
123 	}
124 	return canopen;
125 }
126 
127 int
128 matchfile(Resub *match)
129 {
130 	if(match[1].sp) {
131 		int len = match[1].ep - match[1].sp;
132 		strncpy(name, match[1].sp, len);
133 		strcpy(name+len, suffix);
134 		openf();
135 		return 1;
136 	}
137 	return nextfile();
138 }
139 
140 void
141 openf(void)
142 {
143 	static int fd = 0;
144 	Bflush(output);
145 	Bterm(output);
146 	if(fd > 0)
147 		close(fd);
148 	fd = create(name,OWRITE,0666);
149 	if(fd < 0) {
150 		fprint(2, "grep: can't create %s: %r\n", name);
151 		exits("create");
152 	}
153 	Binit(output, fd, OWRITE);
154 }
155 
156 char *
157 fold(char *s, int n)
158 {
159 	static char *fline;
160 	static int linesize = 0;
161 	char *t;
162 
163 	if(linesize < n+1){
164 		fline = realloc(fline,n+1);
165 		linesize = n+1;
166 	}
167 	for(t=fline; *t++ = tolower(*s++); )
168 		continue;
169 		/* we assume the 'A'-'Z' only appear as themselves
170 		 * in a utf encoding.
171 		 */
172 	return fline;
173 }
174 
175 void
176 usage(void)
177 {
178 	fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
179 	exits("usage");
180 }
181 
182 void
183 badexp(void)
184 {
185 	fprint(2, "split: bad regular expression\n");
186 	exits("bad regular expression");
187 }
188