1 #include <u.h> 2 #include <libc.h> 3 4 typedef struct PCB /* Control block controlling specification parse */ 5 { 6 char *base; /* start of specification */ 7 char *current; /* current parse point */ 8 long last; /* last Rune returned */ 9 long final; /* final Rune in a span */ 10 } Pcb; 11 12 uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 13 14 #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07]) 15 #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07]) 16 #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07]) 17 18 #define MAXRUNE 0xFFFF 19 20 uchar f[(MAXRUNE+1)/8]; 21 uchar t[(MAXRUNE+1)/8]; 22 char wbuf[4096]; 23 char *wptr; 24 25 Pcb pfrom, pto; 26 27 int cflag; 28 int dflag; 29 int sflag; 30 31 void complement(void); 32 void delete(void); 33 void squeeze(void); 34 void translit(void); 35 long canon(Pcb*); 36 char *getrune(char*, Rune*); 37 void Pinit(Pcb*, char*); 38 void Prewind(Pcb *p); 39 int readrune(int, long*); 40 void wflush(int); 41 void writerune(int, Rune); 42 43 static void 44 usage(void) 45 { 46 fprint(2, "usage: %s [-cds] [string1 [string2]]\n", argv0); 47 exits("usage"); 48 } 49 50 void 51 main(int argc, char **argv) 52 { 53 ARGBEGIN{ 54 case 's': sflag++; break; 55 case 'd': dflag++; break; 56 case 'c': cflag++; break; 57 default: usage(); 58 }ARGEND 59 if(argc>0) 60 Pinit(&pfrom, argv[0]); 61 if(argc>1) 62 Pinit(&pto, argv[1]); 63 if(argc>2) 64 usage(); 65 if(dflag) { 66 if ((sflag && argc != 2) || (!sflag && argc != 1)) 67 usage(); 68 delete(); 69 } else { 70 if (argc != 2) 71 usage(); 72 if (cflag) 73 complement(); 74 else translit(); 75 } 76 exits(0); 77 } 78 79 void 80 delete(void) 81 { 82 long c, last; 83 84 if (cflag) { 85 memset((char *) f, 0xff, sizeof f); 86 while ((c = canon(&pfrom)) >= 0) 87 CLEARBIT(f, c); 88 } else { 89 while ((c = canon(&pfrom)) >= 0) 90 SETBIT(f, c); 91 } 92 if (sflag) { 93 while ((c = canon(&pto)) >= 0) 94 SETBIT(t, c); 95 } 96 97 last = 0x10000; 98 while (readrune(0, &c) > 0) { 99 if(!BITSET(f, c) && (c != last || !BITSET(t,c))) { 100 last = c; 101 writerune(1, (Rune) c); 102 } 103 } 104 wflush(1); 105 } 106 107 void 108 complement(void) 109 { 110 Rune *p; 111 int i; 112 long from, to, lastc, high; 113 114 lastc = 0; 115 high = 0; 116 while ((from = canon(&pfrom)) >= 0) { 117 if (from > high) high = from; 118 SETBIT(f, from); 119 } 120 while ((to = canon(&pto)) > 0) { 121 if (to > high) high = to; 122 SETBIT(t,to); 123 } 124 Prewind(&pto); 125 if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0) 126 sysfatal("no memory"); 127 for (i = 0; i <= high; i++){ 128 if (!BITSET(f,i)) { 129 if ((to = canon(&pto)) < 0) 130 to = lastc; 131 else lastc = to; 132 p[i] = to; 133 } 134 else p[i] = i; 135 } 136 if (sflag){ 137 lastc = 0x10000; 138 while (readrune(0, &from) > 0) { 139 if (from > high) 140 from = to; 141 else 142 from = p[from]; 143 if (from != lastc || !BITSET(t,from)) { 144 lastc = from; 145 writerune(1, (Rune) from); 146 } 147 } 148 149 } else { 150 while (readrune(0, &from) > 0){ 151 if (from > high) 152 from = to; 153 else 154 from = p[from]; 155 writerune(1, (Rune) from); 156 } 157 } 158 wflush(1); 159 } 160 161 void 162 translit(void) 163 { 164 Rune *p; 165 int i; 166 long from, to, lastc, high; 167 168 lastc = 0; 169 high = 0; 170 while ((from = canon(&pfrom)) >= 0) 171 if (from > high) high = from; 172 Prewind(&pfrom); 173 if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0) 174 sysfatal("no memory"); 175 for (i = 0; i <= high; i++) 176 p[i] = i; 177 while ((from = canon(&pfrom)) >= 0) { 178 if ((to = canon(&pto)) < 0) 179 to = lastc; 180 else lastc = to; 181 if (BITSET(f,from) && p[from] != to) 182 sysfatal("ambiguous translation"); 183 SETBIT(f,from); 184 p[from] = to; 185 SETBIT(t,to); 186 } 187 while ((to = canon(&pto)) >= 0) { 188 SETBIT(t,to); 189 } 190 if (sflag){ 191 lastc = 0x10000; 192 while (readrune(0, &from) > 0) { 193 if (from <= high) 194 from = p[from]; 195 if (from != lastc || !BITSET(t,from)) { 196 lastc = from; 197 writerune(1, (Rune) from); 198 } 199 } 200 201 } else { 202 while (readrune(0, &from) > 0) { 203 if (from <= high) 204 from = p[from]; 205 writerune(1, (Rune) from); 206 } 207 } 208 wflush(1); 209 } 210 211 int 212 readrune(int fd, long *rp) 213 { 214 Rune r; 215 int j; 216 static int i, n; 217 static char buf[4096]; 218 219 j = i; 220 for (;;) { 221 if (i >= n) { 222 wflush(1); 223 if (j != i) 224 memcpy(buf, buf+j, n-j); 225 i = n-j; 226 n = read(fd, &buf[i], sizeof(buf)-i); 227 if (n < 0) 228 sysfatal("read error: %r"); 229 if (n == 0) 230 return 0; 231 j = 0; 232 n += i; 233 } 234 i++; 235 if (fullrune(&buf[j], i-j)) 236 break; 237 } 238 chartorune(&r, &buf[j]); 239 *rp = r; 240 return 1; 241 } 242 243 void 244 writerune(int fd, Rune r) 245 { 246 char buf[UTFmax]; 247 int n; 248 249 if (!wptr) 250 wptr = wbuf; 251 n = runetochar(buf, (Rune*)&r); 252 if (wptr+n >= wbuf+sizeof(wbuf)) 253 wflush(fd); 254 memcpy(wptr, buf, n); 255 wptr += n; 256 } 257 258 void 259 wflush(int fd) 260 { 261 if (wptr && wptr > wbuf) 262 if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf) 263 sysfatal("write error: %r"); 264 wptr = wbuf; 265 } 266 267 char * 268 getrune(char *s, Rune *rp) 269 { 270 Rune r; 271 char *save; 272 int i, n; 273 274 s += chartorune(rp, s); 275 if((r = *rp) == '\\' && *s){ 276 n = 0; 277 if (*s == 'x') { 278 s++; 279 for (i = 0; i < 4; i++) { 280 save = s; 281 s += chartorune(&r, s); 282 if ('0' <= r && r <= '9') 283 n = 16*n + r - '0'; 284 else if ('a' <= r && r <= 'f') 285 n = 16*n + r - 'a' + 10; 286 else if ('A' <= r && r <= 'F') 287 n = 16*n + r - 'A' + 10; 288 else { 289 if (i == 0) 290 *rp = 'x'; 291 else *rp = n; 292 return save; 293 } 294 } 295 } else { 296 for(i = 0; i < 3; i++) { 297 save = s; 298 s += chartorune(&r, s); 299 if('0' <= r && r <= '7') 300 n = 8*n + r - '0'; 301 else { 302 if (i == 0) 303 { 304 *rp = r; 305 return s; 306 } 307 *rp = n; 308 return save; 309 } 310 } 311 if(n > 0377) 312 sysfatal("character > 0377"); 313 } 314 *rp = n; 315 } 316 return s; 317 } 318 319 long 320 canon(Pcb *p) 321 { 322 Rune r; 323 324 if (p->final >= 0) { 325 if (p->last < p->final) 326 return ++p->last; 327 p->final = -1; 328 } 329 if (*p->current == '\0') 330 return -1; 331 if(*p->current == '-' && p->last >= 0 && p->current[1]){ 332 p->current = getrune(p->current+1, &r); 333 if (r < p->last) 334 sysfatal("invalid range specification"); 335 if (r > p->last) { 336 p->final = r; 337 return ++p->last; 338 } 339 } 340 p->current = getrune(p->current, &r); 341 p->last = r; 342 return p->last; 343 } 344 345 void 346 Pinit(Pcb *p, char *cp) 347 { 348 p->current = p->base = cp; 349 p->last = p->final = -1; 350 } 351 void 352 Prewind(Pcb *p) 353 { 354 p->current = p->base; 355 p->last = p->final = -1; 356 } 357