1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <stdarg.h> 5 #include "dict.h" 6 7 Dict dicts[] = { 8 {"oed", "Oxford English Dictionary, 2nd Ed.", 9 "/lib/dict/oed2", "/lib/dict/oed2index", 10 oednextoff, oedprintentry, oedprintkey}, 11 {"ahd", "American Heritage Dictionary, 2nd College Ed.", 12 "/lib/ahd/DICT.DB", "/lib/ahd/index", 13 ahdnextoff, ahdprintentry, ahdprintkey}, 14 {"thesaurus", "Collins Thesaurus", 15 "/lib/dict/thesaurus", "/lib/dict/thesindex", 16 thesnextoff, thesprintentry, thesprintkey}, 17 18 {"ce", "Gendai Chinese->English", 19 "/lib/dict/world/sansdata/sandic24.dat", 20 "/lib/dict/world/sansdata/ceindex", 21 worldnextoff, worldprintentry, worldprintkey}, 22 {"ceh", "Gendai Chinese->English (Hanzi index)", 23 "/lib/dict/world/sansdata/sandic24.dat", 24 "/lib/dict/world/sansdata/cehindex", 25 worldnextoff, worldprintentry, worldprintkey}, 26 {"ec", "Gendai English->Chinese", 27 "/lib/dict/world/sansdata/sandic24.dat", 28 "/lib/dict/world/sansdata/ecindex", 29 worldnextoff, worldprintentry, worldprintkey}, 30 31 {"dae", "Gyldendal Danish->English", 32 "/lib/dict/world/gylddata/sandic30.dat", 33 "/lib/dict/world/gylddata/daeindex", 34 worldnextoff, worldprintentry, worldprintkey}, 35 {"eda", "Gyldendal English->Danish", 36 "/lib/dict/world/gylddata/sandic29.dat", 37 "/lib/dict/world/gylddata/edaindex", 38 worldnextoff, worldprintentry, worldprintkey}, 39 40 {"due", "Wolters-Noordhoff Dutch->English", 41 "/lib/dict/world/woltdata/sandic07.dat", 42 "/lib/dict/world/woltdata/deindex", 43 worldnextoff, worldprintentry, worldprintkey}, 44 {"edu", "Wolters-Noordhoff English->Dutch", 45 "/lib/dict/world/woltdata/sandic06.dat", 46 "/lib/dict/world/woltdata/edindex", 47 worldnextoff, worldprintentry, worldprintkey}, 48 49 {"fie", "WSOY Finnish->English", 50 "/lib/dict/world/werndata/sandic32.dat", 51 "/lib/dict/world/werndata/fieindex", 52 worldnextoff, worldprintentry, worldprintkey}, 53 {"efi", "WSOY English->Finnish", 54 "/lib/dict/world/werndata/sandic31.dat", 55 "/lib/dict/world/werndata/efiindex", 56 worldnextoff, worldprintentry, worldprintkey}, 57 58 {"fe", "Collins French->English", 59 "/lib/dict/fe", "/lib/dict/feindex", 60 pcollnextoff, pcollprintentry, pcollprintkey}, 61 {"ef", "Collins English->French", 62 "/lib/dict/ef", "/lib/dict/efindex", 63 pcollnextoff, pcollprintentry, pcollprintkey}, 64 65 {"ge", "Collins German->English", 66 "/lib/dict/ge", "/lib/dict/geindex", 67 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 68 {"eg", "Collins English->German", 69 "/lib/dict/eg", "/lib/dict/egindex", 70 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 71 72 {"ie", "Collins Italian->English", 73 "/lib/dict/ie", "/lib/dict/ieindex", 74 pcollnextoff, pcollprintentry, pcollprintkey}, 75 {"ei", "Collins English->Italian", 76 "/lib/dict/ei", "/lib/dict/eiindex", 77 pcollnextoff, pcollprintentry, pcollprintkey}, 78 79 {"je", "Sanshusha Japanese->English", 80 "/lib/dict/world/sansdata/sandic18.dat", 81 "/lib/dict/world/sansdata/jeindex", 82 worldnextoff, worldprintentry, worldprintkey}, 83 {"jek", "Sanshusha Japanese->English (Kanji index)", 84 "/lib/dict/world/sansdata/sandic18.dat", 85 "/lib/dict/world/sansdata/jekindex", 86 worldnextoff, worldprintentry, worldprintkey}, 87 {"ej", "Sanshusha English->Japanese", 88 "/lib/dict/world/sansdata/sandic18.dat", 89 "/lib/dict/world/sansdata/ejindex", 90 worldnextoff, worldprintentry, worldprintkey}, 91 92 {"tjeg", "Sanshusha technical Japanese->English,German", 93 "/lib/dict/world/sansdata/sandic16.dat", 94 "/lib/dict/world/sansdata/tjegindex", 95 worldnextoff, worldprintentry, worldprintkey}, 96 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)", 97 "/lib/dict/world/sansdata/sandic16.dat", 98 "/lib/dict/world/sansdata/tjegkindex", 99 worldnextoff, worldprintentry, worldprintkey}, 100 {"tegj", "Sanshusha technical English->German,Japanese", 101 "/lib/dict/world/sansdata/sandic16.dat", 102 "/lib/dict/world/sansdata/tegjindex", 103 worldnextoff, worldprintentry, worldprintkey}, 104 {"tgje", "Sanshusha technical German->Japanese,English", 105 "/lib/dict/world/sansdata/sandic16.dat", 106 "/lib/dict/world/sansdata/tgjeindex", 107 worldnextoff, worldprintentry, worldprintkey}, 108 109 {"ne", "Kunnskapforlaget Norwegian->English", 110 "/lib/dict/world/kunndata/sandic28.dat", 111 "/lib/dict/world/kunndata/neindex", 112 worldnextoff, worldprintentry, worldprintkey}, 113 {"en", "Kunnskapforlaget English->Norwegian", 114 "/lib/dict/world/kunndata/sandic27.dat", 115 "/lib/dict/world/kunndata/enindex", 116 worldnextoff, worldprintentry, worldprintkey}, 117 118 {"re", "Leon Ungier Russian->English", 119 "/lib/dict/re", "/lib/dict/reindex", 120 simplenextoff, simpleprintentry, simpleprintkey}, 121 {"er", "Leon Ungier English->Russian", 122 "/lib/dict/re", "/lib/dict/erindex", 123 simplenextoff, simpleprintentry, simpleprintkey}, 124 125 {"se", "Collins Spanish->English", 126 "/lib/dict/se", "/lib/dict/seindex", 127 pcollnextoff, pcollprintentry, pcollprintkey}, 128 {"es", "Collins English->Spanish", 129 "/lib/dict/es", "/lib/dict/esindex", 130 pcollnextoff, pcollprintentry, pcollprintkey}, 131 132 {"swe", "Esselte Studium Swedish->English", 133 "/lib/dict/world/essedata/sandic34.dat", 134 "/lib/dict/world/essedata/sweindex", 135 worldnextoff, worldprintentry, worldprintkey}, 136 {"esw", "Esselte Studium English->Swedish", 137 "/lib/dict/world/essedata/sandic33.dat", 138 "/lib/dict/world/essedata/eswindex", 139 worldnextoff, worldprintentry, worldprintkey}, 140 141 {"movie", "Movies -- by title", 142 "/lib/movie/data", "/lib/dict/movtindex", 143 movienextoff, movieprintentry, movieprintkey}, 144 {"moviea", "Movies -- by actor", 145 "/lib/movie/data", "/lib/dict/movaindex", 146 movienextoff, movieprintentry, movieprintkey}, 147 {"movied", "Movies -- by director", 148 "/lib/movie/data", "/lib/dict/movdindex", 149 movienextoff, movieprintentry, movieprintkey}, 150 151 {"slang", "English Slang", 152 "/lib/dict/slang", "/lib/dict/slangindex", 153 slangnextoff, slangprintentry, slangprintkey}, 154 155 {"robert", "Robert Électronique", 156 "/lib/dict/robert/_pointers", "/lib/dict/robert/_index", 157 robertnextoff, robertindexentry, robertprintkey}, 158 {"robertv", "Robert Électronique - formes des verbes", 159 "/lib/dict/robert/flex.rob", "/lib/dict/robert/_flexindex", 160 robertnextflex, robertflexentry, robertprintkey}, 161 162 {0, 0, 0, 0, 0} 163 }; 164 165 typedef struct Lig Lig; 166 struct Lig { 167 Rune start; /* accent rune */ 168 Rune *pairs; /* <char,accented version> pairs */ 169 }; 170 171 static Lig ligtab[Nligs] = { 172 [LACU-LIGS] {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"}, 173 [LGRV-LIGS] {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"}, 174 [LUML-LIGS] {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"}, 175 [LCED-LIGS] {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"}, 176 [LTIL-LIGS] {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"}, 177 [LBRV-LIGS] {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"}, 178 [LRNG-LIGS] {L'˚', L"AÅaåUŮuů"}, 179 [LDOT-LIGS] {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"}, 180 [LDTB-LIGS] {L'.', L""}, 181 [LFRN-LIGS] {L'⌢', L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"}, 182 [LFRB-LIGS] {L'̯', L""}, 183 [LOGO-LIGS] {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"}, 184 [LMAC-LIGS] {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"}, 185 [LHCK-LIGS] {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"}, 186 [LASP-LIGS] {L'ʽ', L""}, 187 [LLEN-LIGS] {L'ʼ', L""}, 188 [LBRB-LIGS] {L'̮', L""} 189 }; 190 191 Rune *multitab[Nmulti] = { 192 [MAAS-MULTI] L"ʽα", 193 [MALN-MULTI] L"ʼα", 194 [MAND-MULTI] L"and", 195 [MAOQ-MULTI] L"a/q", 196 [MBRA-MULTI] L"<|", 197 [MDD-MULTI] L"..", 198 [MDDD-MULTI] L"...", 199 [MEAS-MULTI] L"ʽε", 200 [MELN-MULTI] L"ʼε", 201 [MEMM-MULTI] L"——", 202 [MHAS-MULTI] L"ʽη", 203 [MHLN-MULTI] L"ʼη", 204 [MIAS-MULTI] L"ʽι", 205 [MILN-MULTI] L"ʼι", 206 [MLCT-MULTI] L"ct", 207 [MLFF-MULTI] L"ff", 208 [MLFFI-MULTI] L"ffi", 209 [MLFFL-MULTI] L"ffl", 210 [MLFL-MULTI] L"fl", 211 [MLFI-MULTI] L"fi", 212 [MLLS-MULTI] L"ɫɫ", 213 [MLST-MULTI] L"st", 214 [MOAS-MULTI] L"ʽο", 215 [MOLN-MULTI] L"ʼο", 216 [MOR-MULTI] L"or", 217 [MRAS-MULTI] L"ʽρ", 218 [MRLN-MULTI] L"ʼρ", 219 [MTT-MULTI] L"~~", 220 [MUAS-MULTI] L"ʽυ", 221 [MULN-MULTI] L"ʼυ", 222 [MWAS-MULTI] L"ʽω", 223 [MWLN-MULTI] L"ʼω", 224 [MOE-MULTI] L"oe", 225 [MES-MULTI] L" ", 226 }; 227 228 #define risupper(r) (L'A' <= (r) && (r) <= L'Z') 229 #define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF) 230 #define rtolower(r) ((r)-'A'+'a') 231 232 static Rune latin_fold_tab[] = 233 { 234 /* Table to fold latin 1 characters to ASCII equivalents 235 based at Rune value 0xc0 236 237 À Á Â Ã Ä Å Æ Ç 238 È É Ê Ë Ì Í Î Ï 239 Ð Ñ Ò Ó Ô Õ Ö × 240 Ø Ù Ú Û Ü Ý Þ ß 241 à á â ã ä å æ ç 242 è é ê ë ì í î ï 243 ð ñ ò ó ô õ ö ÷ 244 ø ù ú û ü ý þ ÿ 245 */ 246 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 247 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 248 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 249 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 250 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 251 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 252 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 253 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', 254 }; 255 256 static Rune *ttabstack[20]; 257 static int ntt; 258 259 /* 260 * tab is an array of n Assoc's, sorted by key. 261 * Look for key in tab, and return corresponding val 262 * or -1 if not there 263 */ 264 long 265 lookassoc(Assoc *tab, int n, char *key) 266 { 267 Assoc *q; 268 long i, low, high; 269 int r; 270 271 for(low = -1, high = n; high > low+1; ){ 272 i = (high+low)/2; 273 q = &tab[i]; 274 if((r=strcmp(key, q->key))<0) 275 high = i; 276 else if(r == 0) 277 return q->val; 278 else 279 low=i; 280 } 281 return -1; 282 } 283 284 long 285 looknassoc(Nassoc *tab, int n, long key) 286 { 287 Nassoc *q; 288 long i, low, high; 289 290 for(low = -1, high = n; high > low+1; ){ 291 i = (high+low)/2; 292 q = &tab[i]; 293 if(key < q->key) 294 high = i; 295 else if(key == q->key) 296 return q->val; 297 else 298 low=i; 299 } 300 return -1; 301 } 302 303 void 304 err(char *fmt, ...) 305 { 306 char buf[1000]; 307 va_list v; 308 309 va_start(v, fmt); 310 doprint(buf, &buf[1000], fmt, v); 311 va_end(v); 312 fprint(2, "%s: %s\n", argv0, buf); 313 } 314 315 /* 316 * Write the rune r to bout, keeping track of line length 317 * and breaking the lines (at blanks) when they get too long 318 */ 319 void 320 outrune(long r) 321 { 322 if(outinhibit) 323 return; 324 if(++linelen > breaklen && r == L' ') { 325 Bputc(bout, '\n'); 326 linelen = 0; 327 } else 328 Bputrune(bout, r); 329 } 330 331 void 332 outrunes(Rune *rp) 333 { 334 Rune r; 335 336 while((r = *rp++) != 0) 337 outrune(r); 338 } 339 340 /* like outrune, but when arg is know to be a char */ 341 void 342 outchar(int c) 343 { 344 if(outinhibit) 345 return; 346 if(++linelen > breaklen && c == ' ') { 347 c ='\n'; 348 linelen = 0; 349 } 350 BPUTC(bout, c); 351 } 352 353 void 354 outchars(char *s) 355 { 356 char c; 357 358 while((c = *s++) != 0) 359 outchar(c); 360 } 361 362 void 363 outprint(char *fmt, ...) 364 { 365 char buf[1000]; 366 va_list v; 367 368 va_start(v, fmt); 369 doprint(buf, &buf[1000], fmt, v); 370 va_end(v); 371 outchars(buf); 372 } 373 374 void 375 outpiece(char *b, char *e) 376 { 377 int c, lastc; 378 379 lastc = 0; 380 while(b < e) { 381 c = *b++; 382 if(c == '\n') 383 c = ' '; 384 if(!(c == ' ' && lastc == ' ')) 385 outchar(c); 386 lastc = c; 387 } 388 } 389 390 /* 391 * Go to new line if not already there; indent if ind != 0. 392 * If ind > 1, leave a blank line too. 393 * Slight hack: assume if current line is only one or two 394 * characters long, then they were spaces. 395 */ 396 void 397 outnl(int ind) 398 { 399 if(outinhibit) 400 return; 401 if(ind) { 402 if(ind > 1) { 403 if(linelen > 2) 404 Bputc(bout, '\n'); 405 Bprint(bout, "\n "); 406 } else if(linelen == 0) 407 Bprint(bout, " "); 408 else if(linelen == 1) 409 Bputc(bout, ' '); 410 else if(linelen != 2) 411 Bprint(bout, "\n "); 412 linelen = 2; 413 } else { 414 if(linelen) { 415 Bputc(bout, '\n'); 416 linelen = 0; 417 } 418 } 419 } 420 421 /* 422 * Fold the runes in null-terminated rp. 423 * Use the sort(1) definition of folding (uppercase to lowercase, 424 * latin1-accented characters to corresponding unaccented chars) 425 */ 426 void 427 fold(Rune *rp) 428 { 429 Rune r; 430 431 while((r = *rp) != 0) { 432 if (rislatin1(r) && latin_fold_tab[r-0xc0]) 433 r = latin_fold_tab[r-0xc0]; 434 if(risupper(r)) 435 r = rtolower(r); 436 *rp++ = r; 437 } 438 } 439 440 /* 441 * Like fold, but put folded result into new 442 * (assumed to have enough space). 443 * old is a regular expression, but we know that 444 * metacharacters aren't affected 445 */ 446 void 447 foldre(char *new, char *old) 448 { 449 Rune r; 450 451 while(*old) { 452 old += chartorune(&r, old); 453 if (rislatin1(r) && latin_fold_tab[r-0xc0]) 454 r = latin_fold_tab[r-0xc0]; 455 if(risupper(r)) 456 r = rtolower(r); 457 new += runetochar(new, &r); 458 } 459 *new = 0; 460 } 461 462 /* 463 * acomp(s, t) returns: 464 * -2 if s strictly precedes t 465 * -1 if s is a prefix of t 466 * 0 if s is the same as t 467 * 1 if t is a prefix of s 468 * 2 if t strictly precedes s 469 */ 470 471 int 472 acomp(Rune *s, Rune *t) 473 { 474 int cs, ct; 475 476 for(;;) { 477 cs = *s; 478 ct = *t; 479 if(cs != ct) 480 break; 481 if(cs == 0) 482 return 0; 483 s++; 484 t++; 485 } 486 if(cs == 0) 487 return -1; 488 if(ct == 0) 489 return 1; 490 if(cs < ct) 491 return -2; 492 return 2; 493 } 494 495 /* 496 * Copy null terminated Runes from 'from' to 'to'. 497 */ 498 void 499 runescpy(Rune *to, Rune *from) 500 { 501 while((*to++ = *from++) != 0) 502 continue; 503 } 504 505 /* 506 * Conversion of unsigned number to long, no overflow detection 507 */ 508 long 509 runetol(Rune *r) 510 { 511 int c; 512 long n; 513 514 n = 0; 515 for(;; r++){ 516 c = *r; 517 if(L'0'<=c && c<=L'9') 518 c -= '0'; 519 else 520 break; 521 n = n*10 + c; 522 } 523 return n; 524 } 525 526 /* 527 * See if there is a rune corresponding to the accented 528 * version of r with accent acc (acc in [LIGS..LIGE-1]), 529 * and return it if so, else return NONE. 530 */ 531 Rune 532 liglookup(Rune acc, Rune r) 533 { 534 Rune *p; 535 536 if(acc < LIGS || acc >= LIGE) 537 return NONE; 538 for(p = ligtab[acc-LIGS].pairs; *p; p += 2) 539 if(*p == r) 540 return *(p+1); 541 return NONE; 542 } 543 544 /* 545 * Maintain a translation table stack (a translation table 546 * is an array of Runes indexed by bytes or 7-bit bytes). 547 * If starting is true, push the curtab onto the stack 548 * and return newtab; else pop the top of the stack and 549 * return it. 550 * If curtab is 0, initialize the stack and return. 551 */ 552 Rune * 553 changett(Rune *curtab, Rune *newtab, int starting) 554 { 555 if(curtab == 0) { 556 ntt = 0; 557 return 0; 558 } 559 if(starting) { 560 if(ntt >= asize(ttabstack)) { 561 if(debug) 562 err("translation stack overflow"); 563 return curtab; 564 } 565 ttabstack[ntt++] = curtab; 566 return newtab; 567 } else { 568 if(ntt == 0) { 569 if(debug) 570 err("translation stack underflow"); 571 return curtab; 572 } 573 return ttabstack[--ntt]; 574 } 575 } 576