xref: /inferno-os/utils/tr/tr.c (revision 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a)
1 #include 	<lib9.h>
2 
3 typedef struct PCB	/* Control block controlling specification parse */
4 {
5 	char	*base;		/* start of specification */
6 	char	*current;	/* current parse point */
7 	long	last;		/* last Rune returned */
8 	long	final;		/* final Rune in a span */
9 } Pcb;
10 
11 uchar	bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
12 
13 #define	SETBIT(a, c)		((a)[(c)/8] |= bits[(c)&07])
14 #define	CLEARBIT(a,c)		((a)[(c)/8] &= ~bits[(c)&07])
15 #define	BITSET(a,c)		((a)[(c)/8] & bits[(c)&07])
16 
17 #define	MAXRUNE	0xFFFF
18 
19 uchar	f[(MAXRUNE+1)/8];
20 uchar	t[(MAXRUNE+1)/8];
21 char 	wbuf[4096];
22 char	*wptr;
23 
24 Pcb pfrom, pto;
25 
26 int cflag;
27 int dflag;
28 int sflag;
29 
30 void	complement(void);
31 void	delete(void);
32 void	squeeze(void);
33 void	translit(void);
34 void	error(char*);
35 long	canon(Pcb*);
36 char	*getrune(char*, Rune*);
37 void	Pinit(Pcb*, char*);
38 void	Prewind(Pcb *p);
39 int	readrune(int, long*);
40 void	wflush(int);
41 void	writerune(int, Rune);
42 
43 void
main(int argc,char ** argv)44 main(int argc, char **argv)
45 {
46 	ARGBEGIN{
47 	case 's':	sflag++; break;
48 	case 'd':	dflag++; break;
49 	case 'c':	cflag++; break;
50 	default:	error("bad option");
51 	}ARGEND
52 	if(argc>0)
53 		Pinit(&pfrom, argv[0]);
54 	if(argc>1)
55 		Pinit(&pto, argv[1]);
56 	if(argc>2)
57 		error("arg count");
58 	if(dflag) {
59 		if ((sflag && argc != 2) || (!sflag && argc != 1))
60 			error("arg count");
61 		delete();
62 	} else {
63 		if (argc != 2)
64 			error("arg count");
65 		if (cflag)
66 			complement();
67 		else translit();
68 	}
69 	exits(0);
70 }
71 
72 void
delete(void)73 delete(void)
74 {
75 	long c, last;
76 
77 	if (cflag) {
78 		memset((char *) f, 0xff, sizeof f);
79 		while ((c = canon(&pfrom)) >= 0)
80 			CLEARBIT(f, c);
81 	} else {
82 		while ((c = canon(&pfrom)) >= 0)
83 			SETBIT(f, c);
84 	}
85 	if (sflag) {
86 		while ((c = canon(&pto)) >= 0)
87 			SETBIT(t, c);
88 	}
89 
90 	last = 0x10000;
91 	while (readrune(0, &c) > 0) {
92 		if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
93 			last = c;
94 			writerune(1, (Rune) c);
95 		}
96 	}
97 	wflush(1);
98 }
99 
100 void
complement(void)101 complement(void)
102 {
103 	Rune *p;
104 	int i;
105 	long from, to, lastc, high;
106 
107 	lastc = 0;
108 	high = 0;
109 	while ((from = canon(&pfrom)) >= 0) {
110 		if (from > high) high = from;
111 		SETBIT(f, from);
112 	}
113 	while ((to = canon(&pto)) > 0) {
114 		if (to > high) high = to;
115 		SETBIT(t,to);
116 	}
117 	Prewind(&pto);
118 	if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
119 		error("can't allocate memory");
120 	for (i = 0; i <= high; i++){
121 		if (!BITSET(f,i)) {
122 			if ((to = canon(&pto)) < 0)
123 				to = lastc;
124 			else lastc = to;
125 			p[i] = to;
126 		}
127 		else p[i] = i;
128 	}
129 	if (sflag){
130 		lastc = 0x10000;
131 		while (readrune(0, &from) > 0) {
132 			if (from > high)
133 				from = to;
134 			else
135 				from = p[from];
136 			if (from != lastc || !BITSET(t,from)) {
137 				lastc = from;
138 				writerune(1, (Rune) from);
139 			}
140 		}
141 
142 	} else {
143 		while (readrune(0, &from) > 0){
144 			if (from > high)
145 				from = to;
146 			else
147 				from = p[from];
148 			writerune(1, (Rune) from);
149 		}
150 	}
151 	wflush(1);
152 }
153 
154 void
translit(void)155 translit(void)
156 {
157 	Rune *p;
158 	int i;
159 	long from, to, lastc, high;
160 
161 	lastc = 0;
162 	high = 0;
163 	while ((from = canon(&pfrom)) >= 0)
164 		if (from > high) high = from;
165 	Prewind(&pfrom);
166 	if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
167 		error("can't allocate memory");
168 	for (i = 0; i <= high; i++)
169 		p[i] = i;
170 	while ((from = canon(&pfrom)) >= 0) {
171 		if ((to = canon(&pto)) < 0)
172 			to = lastc;
173 		else lastc = to;
174 		if (BITSET(f,from) && p[from] != to)
175 			error("ambiguous translation");
176 		SETBIT(f,from);
177 		p[from] = to;
178 		SETBIT(t,to);
179 	}
180 	while ((to = canon(&pto)) >= 0) {
181 		SETBIT(t,to);
182 	}
183 	if (sflag){
184 		lastc = 0x10000;
185 		while (readrune(0, &from) > 0) {
186 			if (from <= high)
187 				from = p[from];
188 			if (from != lastc || !BITSET(t,from)) {
189 				lastc = from;
190 				writerune(1, (Rune) from);
191 			}
192 		}
193 
194 	} else {
195 		while (readrune(0, &from) > 0) {
196 			if (from <= high)
197 				from = p[from];
198 			writerune(1, (Rune) from);
199 		}
200 	}
201 	wflush(1);
202 }
203 
204 int
readrune(int fd,long * rp)205 readrune(int fd, long *rp)
206 {
207 	Rune r;
208 	int j;
209 	static int i, n;
210 	static char buf[4096];
211 
212 	j = i;
213 	for (;;) {
214 		if (i >= n) {
215 			wflush(1);
216 			if (j != i)
217 				memcpy(buf, buf+j, n-j);
218 			i = n-j;
219 			n = read(fd, &buf[i], sizeof(buf)-i);
220 			if (n < 0)
221 				error("read error");
222 			if (n == 0)
223 				return 0;
224 			j = 0;
225 			n += i;
226 		}
227 		i++;
228 		if (fullrune(&buf[j], i-j))
229 			break;
230 	}
231 	chartorune(&r, &buf[j]);
232 	*rp = r;
233 	return 1;
234 }
235 
236 void
writerune(int fd,Rune r)237 writerune(int fd, Rune r)
238 {
239 	char buf[UTFmax];
240 	int n;
241 
242 	if (!wptr)
243 		wptr = wbuf;
244 	n = runetochar(buf, (Rune*)&r);
245 	if (wptr+n >= wbuf+sizeof(wbuf))
246 		wflush(fd);
247 	memcpy(wptr, buf, n);
248 	wptr += n;
249 }
250 
251 void
wflush(int fd)252 wflush(int fd)
253 {
254 	if (wptr && wptr > wbuf)
255 		if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
256 			error("write error");
257 	wptr = wbuf;
258 }
259 
260 char *
getrune(char * s,Rune * rp)261 getrune(char *s, Rune *rp)
262 {
263 	Rune r;
264 	char *save;
265 	int i, n;
266 
267 	s += chartorune(rp, s);
268 	if((r = *rp) == '\\' && *s){
269 		n = 0;
270 		if (*s == 'x') {
271 			s++;
272 			for (i = 0; i < 4; i++) {
273 				save = s;
274 				s += chartorune(&r, s);
275 				if ('0' <= r && r <= '9')
276 					n = 16*n + r - '0';
277 				else if ('a' <= r && r <= 'f')
278 					n = 16*n + r - 'a' + 10;
279 				else if ('A' <= r && r <= 'F')
280 					n = 16*n + r - 'A' + 10;
281 				else {
282 					if (i == 0)
283 						*rp = 'x';
284 					else *rp = n;
285 					return save;
286 				}
287 			}
288 		} else {
289 			for(i = 0; i < 3; i++) {
290 				save = s;
291 				s += chartorune(&r, s);
292 				if('0' <= r && r <= '7')
293 					n = 8*n + r - '0';
294 				else {
295 					if (i == 0)
296 					{
297 						*rp = r;
298 						return s;
299 					}
300 					*rp = n;
301 					return save;
302 				}
303 			}
304 			if(n > 0377)
305 				error("char>0377");
306 		}
307 		*rp = n;
308 	}
309 	return s;
310 }
311 
312 long
canon(Pcb * p)313 canon(Pcb *p)
314 {
315 	Rune r;
316 
317 	if (p->final >= 0) {
318 		if (p->last < p->final)
319 			return ++p->last;
320 		p->final = -1;
321 	}
322 	if (*p->current == '\0')
323 		return -1;
324 	if(*p->current == '-' && p->last >= 0 && p->current[1]){
325 		p->current = getrune(p->current+1, &r);
326 		if (r < p->last)
327 			error ("Invalid range specification");
328 		if (r > p->last) {
329 			p->final = r;
330 			return ++p->last;
331 		}
332 	}
333 	p->current = getrune(p->current, &r);
334 	p->last = r;
335 	return p->last;
336 }
337 
338 void
Pinit(Pcb * p,char * cp)339 Pinit(Pcb *p, char *cp)
340 {
341 	p->current = p->base = cp;
342 	p->last = p->final = -1;
343 }
344 void
Prewind(Pcb * p)345 Prewind(Pcb *p)
346 {
347 	p->current = p->base;
348 	p->last = p->final = -1;
349 }
350 void
error(char * s)351 error(char *s)
352 {
353 	fprint(2, "tr: %s\n", s);
354 	exits(s);
355 }
356