1 #include <u.h>
2 #include <libc.h>
3
4 typedef struct PCB /* Control block controlling specification parse */
5 {
6 char *base; /* start of specification */
7 char *current; /* current parse point */
8 long last; /* last Rune returned */
9 long final; /* final Rune in a span */
10 } Pcb;
11
12 uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
13
14 #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07])
15 #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
16 #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
17
18 uchar f[(Runemax+1)/8];
19 uchar t[(Runemax+1)/8];
20 char wbuf[4096];
21 char *wptr;
22
23 Pcb pfrom, pto;
24
25 int cflag;
26 int dflag;
27 int sflag;
28
29 void complement(void);
30 void delete(void);
31 void squeeze(void);
32 void translit(void);
33 long canon(Pcb*);
34 char *getrune(char*, Rune*);
35 void Pinit(Pcb*, char*);
36 void Prewind(Pcb *p);
37 int readrune(int, long*);
38 void wflush(int);
39 void writerune(int, Rune);
40
41 static void
usage(void)42 usage(void)
43 {
44 fprint(2, "usage: %s [-cds] [string1 [string2]]\n", argv0);
45 exits("usage");
46 }
47
48 void
main(int argc,char ** argv)49 main(int argc, char **argv)
50 {
51 ARGBEGIN{
52 case 's': sflag++; break;
53 case 'd': dflag++; break;
54 case 'c': cflag++; break;
55 default: usage();
56 }ARGEND
57 if(argc>0)
58 Pinit(&pfrom, argv[0]);
59 if(argc>1)
60 Pinit(&pto, argv[1]);
61 if(argc>2)
62 usage();
63 if(dflag) {
64 if ((sflag && argc != 2) || (!sflag && argc != 1))
65 usage();
66 delete();
67 } else {
68 if (argc != 2)
69 usage();
70 if (cflag)
71 complement();
72 else translit();
73 }
74 exits(0);
75 }
76
77 void
delete(void)78 delete(void)
79 {
80 long c, last;
81
82 if (cflag) {
83 memset((char *) f, 0xff, sizeof f);
84 while ((c = canon(&pfrom)) >= 0)
85 CLEARBIT(f, c);
86 } else {
87 while ((c = canon(&pfrom)) >= 0)
88 SETBIT(f, c);
89 }
90 if (sflag) {
91 while ((c = canon(&pto)) >= 0)
92 SETBIT(t, c);
93 }
94
95 last = 0x10000;
96 while (readrune(0, &c) > 0) {
97 if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
98 last = c;
99 writerune(1, (Rune) c);
100 }
101 }
102 wflush(1);
103 }
104
105 void
complement(void)106 complement(void)
107 {
108 Rune *p;
109 int i;
110 long from, to, lastc, high;
111
112 lastc = 0;
113 high = 0;
114 while ((from = canon(&pfrom)) >= 0) {
115 if (from > high) high = from;
116 SETBIT(f, from);
117 }
118 while ((to = canon(&pto)) > 0) {
119 if (to > high) high = to;
120 SETBIT(t,to);
121 }
122 Prewind(&pto);
123 if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
124 sysfatal("no memory");
125 for (i = 0; i <= high; i++){
126 if (!BITSET(f,i)) {
127 if ((to = canon(&pto)) < 0)
128 to = lastc;
129 else lastc = to;
130 p[i] = to;
131 }
132 else p[i] = i;
133 }
134 if (sflag){
135 lastc = 0x10000;
136 while (readrune(0, &from) > 0) {
137 if (from > high)
138 from = to;
139 else
140 from = p[from];
141 if (from != lastc || !BITSET(t,from)) {
142 lastc = from;
143 writerune(1, (Rune) from);
144 }
145 }
146
147 } else {
148 while (readrune(0, &from) > 0){
149 if (from > high)
150 from = to;
151 else
152 from = p[from];
153 writerune(1, (Rune) from);
154 }
155 }
156 wflush(1);
157 }
158
159 void
translit(void)160 translit(void)
161 {
162 Rune *p;
163 int i;
164 long from, to, lastc, high;
165
166 lastc = 0;
167 high = 0;
168 while ((from = canon(&pfrom)) >= 0)
169 if (from > high) high = from;
170 Prewind(&pfrom);
171 if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
172 sysfatal("no memory");
173 for (i = 0; i <= high; i++)
174 p[i] = i;
175 while ((from = canon(&pfrom)) >= 0) {
176 if ((to = canon(&pto)) < 0)
177 to = lastc;
178 else lastc = to;
179 if (BITSET(f,from) && p[from] != to)
180 sysfatal("ambiguous translation");
181 SETBIT(f,from);
182 p[from] = to;
183 SETBIT(t,to);
184 }
185 while ((to = canon(&pto)) >= 0) {
186 SETBIT(t,to);
187 }
188 if (sflag){
189 lastc = 0x10000;
190 while (readrune(0, &from) > 0) {
191 if (from <= high)
192 from = p[from];
193 if (from != lastc || !BITSET(t,from)) {
194 lastc = from;
195 writerune(1, (Rune) from);
196 }
197 }
198
199 } else {
200 while (readrune(0, &from) > 0) {
201 if (from <= high)
202 from = p[from];
203 writerune(1, (Rune) from);
204 }
205 }
206 wflush(1);
207 }
208
209 int
readrune(int fd,long * rp)210 readrune(int fd, long *rp)
211 {
212 Rune r;
213 int j;
214 static int i, n;
215 static char buf[4096];
216
217 j = i;
218 for (;;) {
219 if (i >= n) {
220 wflush(1);
221 if (j != i)
222 memcpy(buf, buf+j, n-j);
223 i = n-j;
224 n = read(fd, &buf[i], sizeof(buf)-i);
225 if (n < 0)
226 sysfatal("read error: %r");
227 if (n == 0)
228 return 0;
229 j = 0;
230 n += i;
231 }
232 i++;
233 if (fullrune(&buf[j], i-j))
234 break;
235 }
236 chartorune(&r, &buf[j]);
237 *rp = r;
238 return 1;
239 }
240
241 void
writerune(int fd,Rune r)242 writerune(int fd, Rune r)
243 {
244 char buf[UTFmax];
245 int n;
246
247 if (!wptr)
248 wptr = wbuf;
249 n = runetochar(buf, (Rune*)&r);
250 if (wptr+n >= wbuf+sizeof(wbuf))
251 wflush(fd);
252 memcpy(wptr, buf, n);
253 wptr += n;
254 }
255
256 void
wflush(int fd)257 wflush(int fd)
258 {
259 if (wptr && wptr > wbuf)
260 if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
261 sysfatal("write error: %r");
262 wptr = wbuf;
263 }
264
265 char *
getrune(char * s,Rune * rp)266 getrune(char *s, Rune *rp)
267 {
268 Rune r;
269 char *save;
270 int i, n;
271
272 s += chartorune(rp, s);
273 if((r = *rp) == '\\' && *s){
274 n = 0;
275 if (*s == 'x') {
276 s++;
277 for (i = 0; i < 4; i++) {
278 save = s;
279 s += chartorune(&r, s);
280 if ('0' <= r && r <= '9')
281 n = 16*n + r - '0';
282 else if ('a' <= r && r <= 'f')
283 n = 16*n + r - 'a' + 10;
284 else if ('A' <= r && r <= 'F')
285 n = 16*n + r - 'A' + 10;
286 else {
287 if (i == 0)
288 *rp = 'x';
289 else *rp = n;
290 return save;
291 }
292 }
293 } else {
294 for(i = 0; i < 3; i++) {
295 save = s;
296 s += chartorune(&r, s);
297 if('0' <= r && r <= '7')
298 n = 8*n + r - '0';
299 else {
300 if (i == 0)
301 {
302 *rp = r;
303 return s;
304 }
305 *rp = n;
306 return save;
307 }
308 }
309 if(n > 0377)
310 sysfatal("character > 0377");
311 }
312 *rp = n;
313 }
314 return s;
315 }
316
317 long
canon(Pcb * p)318 canon(Pcb *p)
319 {
320 Rune r;
321
322 if (p->final >= 0) {
323 if (p->last < p->final)
324 return ++p->last;
325 p->final = -1;
326 }
327 if (*p->current == '\0')
328 return -1;
329 if(*p->current == '-' && p->last >= 0 && p->current[1]){
330 p->current = getrune(p->current+1, &r);
331 if (r < p->last)
332 sysfatal("invalid range specification");
333 if (r > p->last) {
334 p->final = r;
335 return ++p->last;
336 }
337 }
338 p->current = getrune(p->current, &r);
339 p->last = r;
340 return p->last;
341 }
342
343 void
Pinit(Pcb * p,char * cp)344 Pinit(Pcb *p, char *cp)
345 {
346 p->current = p->base = cp;
347 p->last = p->final = -1;
348 }
349 void
Prewind(Pcb * p)350 Prewind(Pcb *p)
351 {
352 p->current = p->base;
353 p->last = p->final = -1;
354 }
355