xref: /plan9-contrib/sys/src/cmd/split.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <regexp.h>
5 
6 char	digit[] = "0123456789";
7 char	*suffix = "";
8 char	*stem = "x";
9 char	suff[] = "aa";
10 char	name[200];
11 Biobuf	bout;
12 Biobuf	*output = &bout;
13 
14 extern int nextfile(void);
15 extern int matchfile(Resub*);
16 extern void openf(void);
17 extern char *fold(char*,int);
18 extern void usage(void);
19 extern void badexp(void);
20 
21 void
22 main(int argc, char *argv[])
23 {
24 	Reprog *exp;
25 	char *pattern = 0;
26 	register n = 1000;
27 	char *line;
28 	int xflag = 0;
29 	int iflag = 0;
30 	Biobuf bin;
31 	Biobuf *b = &bin;
32 	char buf[256];
33 
34 	ARGBEGIN{
35 	case 'n':
36 		n=atoi(ARGF());
37 		break;
38 	case 'e':
39 		pattern = strdup(ARGF());
40 		break;
41 	case 'f':
42 		stem = strdup(ARGF());
43 		break;
44 	case 's':
45 		suffix = strdup(ARGF());
46 		break;
47 	case 'x':
48 		xflag++;
49 		break;
50 	case 'i':
51 		iflag++;
52 		break;
53 	default:
54 		goto Usage;
55 	}ARGEND;
56 	if(argc < 0 || argc > 1)
57     Usage:	usage();
58 
59 	if(argc == 0){
60 		Binit(b, 0, OREAD);
61 	}else{
62 		b = Bopen(argv[0], OREAD);
63 		if(b == 0)
64 			fprint(2, "split: can't open %s: %r\n", argv[0]);
65 	}
66 	if(pattern) {
67 		if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
68 			badexp();
69 		while((line=Brdline(b,'\n')) != 0) {
70 			Resub match[2];
71 			line[BLINELEN(b)-1] = 0;
72 			if(regexec(exp,iflag?fold(line,BLINELEN(b)-1):line,match,2)) {
73 				if(matchfile(match) && xflag)
74 					continue;
75 			} else if(output == 0)
76 				nextfile();	/* at most once */
77 			Bwrite(output, line, BLINELEN(b)-1);
78 			Bputc(output, '\n');
79 		}
80 	} else {
81 		register linecnt = n;
82 		while((line=Brdline(b,'\n')) != 0) {
83 			if(++linecnt > n) {
84 				nextfile();
85 				linecnt = 1;
86 			}
87 			Bwrite(output, line, BLINELEN(b));
88 		}
89 
90 		/*
91 		 * in case we didn't end with a newline, tack whatever's
92 		 * left onto the last file
93 		 */
94 		while((n = Bread(b, buf, sizeof(buf))) > 0)
95 			Bwrite(output, buf, n);
96 	}
97 	if(b != 0)
98 		Bterm(b);
99 	exits(0);
100 }
101 
102 int
103 nextfile(void)
104 {
105 	static canopen = 1;
106 	if(suff[0] > 'z') {
107 		if(canopen)
108 			fprint(2, "split: file %szz not split\n",stem);
109 		canopen = 0;
110 	} else {
111 		strcpy(name, stem);
112 		strcat(name, suff);
113 		if(++suff[1] > 'z')
114 			suff[1] = 'a', ++suff[0];
115 		openf();
116 	}
117 	return canopen;
118 }
119 
120 int
121 matchfile(Resub *match)
122 {
123 	if(match[1].sp) {
124 		int len = match[1].ep - match[1].sp;
125 		strncpy(name, match[1].sp, len);
126 		strcpy(name+len, suffix);
127 		openf();
128 		return 1;
129 	}
130 	return nextfile();
131 }
132 
133 void
134 openf(void)
135 {
136 	int fd;
137 	Bflush(output);
138 	Bterm(output);
139 	if((fd=create(name,OWRITE,0666)) == -1) {
140 		fprint(2, "grep: can't open %s: %r\n", name);
141 		exits("open failed");
142 	}
143 	Binit(output,fd,OWRITE);
144 }
145 
146 char *
147 fold(char *s, int n)
148 {
149 	static char *fline;
150 	static int linesize = 0;
151 	char *t;
152 	if(linesize < n+1){
153 		fline = realloc(fline,n+1);
154 		linesize = n+1;
155 	}
156 	for(t=fline; *t++=tolower(*s++); )
157 		continue;
158 		/* we assume the 'A'-'Z' only appear as themselves
159 		 * in a utf encoding.
160 		 */
161 	return fline;
162 }
163 
164 void
165 usage(void)
166 {
167 	fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
168 	exits("split usage");
169 }
170 
171 void
172 badexp(void)
173 {
174 	fprint(2, "split: bad regular expression\n");
175 	exits("bad regular expression");
176 }
177