1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include "dict.h" 5 6 Dict dicts[] = { 7 {"oed", "Oxford English Dictionary, 2nd Ed.", 8 "/lib/dict/oed2", "/lib/dict/oed2index", 9 oednextoff, oedprintentry, oedprintkey}, 10 {"ahd", "American Heritage Dictionary, 2nd College Ed.", 11 "/lib/ahd/DICT.DB", "/lib/ahd/index", 12 ahdnextoff, ahdprintentry, ahdprintkey}, 13 {"pgw", "Project Gutenberg Webster Dictionary", 14 "/lib/dict/pgw", "/lib/dict/pgwindex", 15 pgwnextoff, pgwprintentry, pgwprintkey}, 16 {"thesaurus", "Collins Thesaurus", 17 "/lib/dict/thesaurus", "/lib/dict/thesindex", 18 thesnextoff, thesprintentry, thesprintkey}, 19 20 {"ce", "Gendai Chinese->English", 21 "/lib/dict/world/sansdata/sandic24.dat", 22 "/lib/dict/world/sansdata/ceindex", 23 worldnextoff, worldprintentry, worldprintkey}, 24 {"ceh", "Gendai Chinese->English (Hanzi index)", 25 "/lib/dict/world/sansdata/sandic24.dat", 26 "/lib/dict/world/sansdata/cehindex", 27 worldnextoff, worldprintentry, worldprintkey}, 28 {"ec", "Gendai English->Chinese", 29 "/lib/dict/world/sansdata/sandic24.dat", 30 "/lib/dict/world/sansdata/ecindex", 31 worldnextoff, worldprintentry, worldprintkey}, 32 33 {"dae", "Gyldendal Danish->English", 34 "/lib/dict/world/gylddata/sandic30.dat", 35 "/lib/dict/world/gylddata/daeindex", 36 worldnextoff, worldprintentry, worldprintkey}, 37 {"eda", "Gyldendal English->Danish", 38 "/lib/dict/world/gylddata/sandic29.dat", 39 "/lib/dict/world/gylddata/edaindex", 40 worldnextoff, worldprintentry, worldprintkey}, 41 42 {"due", "Wolters-Noordhoff Dutch->English", 43 "/lib/dict/world/woltdata/sandic07.dat", 44 "/lib/dict/world/woltdata/deindex", 45 worldnextoff, worldprintentry, worldprintkey}, 46 {"edu", "Wolters-Noordhoff English->Dutch", 47 "/lib/dict/world/woltdata/sandic06.dat", 48 "/lib/dict/world/woltdata/edindex", 49 worldnextoff, worldprintentry, worldprintkey}, 50 51 {"fie", "WSOY Finnish->English", 52 "/lib/dict/world/werndata/sandic32.dat", 53 "/lib/dict/world/werndata/fieindex", 54 worldnextoff, worldprintentry, worldprintkey}, 55 {"efi", "WSOY English->Finnish", 56 "/lib/dict/world/werndata/sandic31.dat", 57 "/lib/dict/world/werndata/efiindex", 58 worldnextoff, worldprintentry, worldprintkey}, 59 60 {"fe", "Collins French->English", 61 "/lib/dict/fe", "/lib/dict/feindex", 62 pcollnextoff, pcollprintentry, pcollprintkey}, 63 {"ef", "Collins English->French", 64 "/lib/dict/ef", "/lib/dict/efindex", 65 pcollnextoff, pcollprintentry, pcollprintkey}, 66 67 {"ge", "Collins German->English", 68 "/lib/dict/ge", "/lib/dict/geindex", 69 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 70 {"eg", "Collins English->German", 71 "/lib/dict/eg", "/lib/dict/egindex", 72 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 73 74 {"ie", "Collins Italian->English", 75 "/lib/dict/ie", "/lib/dict/ieindex", 76 pcollnextoff, pcollprintentry, pcollprintkey}, 77 {"ei", "Collins English->Italian", 78 "/lib/dict/ei", "/lib/dict/eiindex", 79 pcollnextoff, pcollprintentry, pcollprintkey}, 80 81 {"je", "Sanshusha Japanese->English", 82 "/lib/dict/world/sansdata/sandic18.dat", 83 "/lib/dict/world/sansdata/jeindex", 84 worldnextoff, worldprintentry, worldprintkey}, 85 {"jek", "Sanshusha Japanese->English (Kanji index)", 86 "/lib/dict/world/sansdata/sandic18.dat", 87 "/lib/dict/world/sansdata/jekindex", 88 worldnextoff, worldprintentry, worldprintkey}, 89 {"ej", "Sanshusha English->Japanese", 90 "/lib/dict/world/sansdata/sandic18.dat", 91 "/lib/dict/world/sansdata/ejindex", 92 worldnextoff, worldprintentry, worldprintkey}, 93 94 {"tjeg", "Sanshusha technical Japanese->English,German", 95 "/lib/dict/world/sansdata/sandic16.dat", 96 "/lib/dict/world/sansdata/tjegindex", 97 worldnextoff, worldprintentry, worldprintkey}, 98 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)", 99 "/lib/dict/world/sansdata/sandic16.dat", 100 "/lib/dict/world/sansdata/tjegkindex", 101 worldnextoff, worldprintentry, worldprintkey}, 102 {"tegj", "Sanshusha technical English->German,Japanese", 103 "/lib/dict/world/sansdata/sandic16.dat", 104 "/lib/dict/world/sansdata/tegjindex", 105 worldnextoff, worldprintentry, worldprintkey}, 106 {"tgje", "Sanshusha technical German->Japanese,English", 107 "/lib/dict/world/sansdata/sandic16.dat", 108 "/lib/dict/world/sansdata/tgjeindex", 109 worldnextoff, worldprintentry, worldprintkey}, 110 111 {"ne", "Kunnskapforlaget Norwegian->English", 112 "/lib/dict/world/kunndata/sandic28.dat", 113 "/lib/dict/world/kunndata/neindex", 114 worldnextoff, worldprintentry, worldprintkey}, 115 {"en", "Kunnskapforlaget English->Norwegian", 116 "/lib/dict/world/kunndata/sandic27.dat", 117 "/lib/dict/world/kunndata/enindex", 118 worldnextoff, worldprintentry, worldprintkey}, 119 120 {"re", "Leon Ungier Russian->English", 121 "/lib/dict/re", "/lib/dict/reindex", 122 simplenextoff, simpleprintentry, simpleprintkey}, 123 {"er", "Leon Ungier English->Russian", 124 "/lib/dict/re", "/lib/dict/erindex", 125 simplenextoff, simpleprintentry, simpleprintkey}, 126 127 {"se", "Collins Spanish->English", 128 "/lib/dict/se", "/lib/dict/seindex", 129 pcollnextoff, pcollprintentry, pcollprintkey}, 130 {"es", "Collins English->Spanish", 131 "/lib/dict/es", "/lib/dict/esindex", 132 pcollnextoff, pcollprintentry, pcollprintkey}, 133 134 {"swe", "Esselte Studium Swedish->English", 135 "/lib/dict/world/essedata/sandic34.dat", 136 "/lib/dict/world/essedata/sweindex", 137 worldnextoff, worldprintentry, worldprintkey}, 138 {"esw", "Esselte Studium English->Swedish", 139 "/lib/dict/world/essedata/sandic33.dat", 140 "/lib/dict/world/essedata/eswindex", 141 worldnextoff, worldprintentry, worldprintkey}, 142 143 {"movie", "Movies -- by title", 144 "/lib/movie/data", "/lib/dict/movtindex", 145 movienextoff, movieprintentry, movieprintkey}, 146 {"moviea", "Movies -- by actor", 147 "/lib/movie/data", "/lib/dict/movaindex", 148 movienextoff, movieprintentry, movieprintkey}, 149 {"movied", "Movies -- by director", 150 "/lib/movie/data", "/lib/dict/movdindex", 151 movienextoff, movieprintentry, movieprintkey}, 152 153 {"slang", "English Slang", 154 "/lib/dict/slang", "/lib/dict/slangindex", 155 slangnextoff, slangprintentry, slangprintkey}, 156 157 {"robert", "Robert Électronique", 158 "/lib/dict/robert/_pointers", "/lib/dict/robert/_index", 159 robertnextoff, robertindexentry, robertprintkey}, 160 {"robertv", "Robert Électronique - formes des verbes", 161 "/lib/dict/robert/flex.rob", "/lib/dict/robert/_flexindex", 162 robertnextflex, robertflexentry, robertprintkey}, 163 164 {0, 0, 0, 0, 0} 165 }; 166 167 typedef struct Lig Lig; 168 struct Lig { 169 Rune start; /* accent rune */ 170 Rune *pairs; /* <char,accented version> pairs */ 171 }; 172 173 static Lig ligtab[Nligs] = { 174 [LACU-LIGS] {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"}, 175 [LGRV-LIGS] {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"}, 176 [LUML-LIGS] {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"}, 177 [LCED-LIGS] {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"}, 178 [LTIL-LIGS] {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"}, 179 [LBRV-LIGS] {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"}, 180 [LRNG-LIGS] {L'˚', L"AÅaåUŮuů"}, 181 [LDOT-LIGS] {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"}, 182 [LDTB-LIGS] {L'.', L""}, 183 [LFRN-LIGS] {L'⌢', L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"}, 184 [LFRB-LIGS] {L'̯', L""}, 185 [LOGO-LIGS] {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"}, 186 [LMAC-LIGS] {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"}, 187 [LHCK-LIGS] {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"}, 188 [LASP-LIGS] {L'ʽ', L""}, 189 [LLEN-LIGS] {L'ʼ', L""}, 190 [LBRB-LIGS] {L'̮', L""} 191 }; 192 193 Rune *multitab[Nmulti] = { 194 [MAAS-MULTI] L"ʽα", 195 [MALN-MULTI] L"ʼα", 196 [MAND-MULTI] L"and", 197 [MAOQ-MULTI] L"a/q", 198 [MBRA-MULTI] L"<|", 199 [MDD-MULTI] L"..", 200 [MDDD-MULTI] L"...", 201 [MEAS-MULTI] L"ʽε", 202 [MELN-MULTI] L"ʼε", 203 [MEMM-MULTI] L"——", 204 [MHAS-MULTI] L"ʽη", 205 [MHLN-MULTI] L"ʼη", 206 [MIAS-MULTI] L"ʽι", 207 [MILN-MULTI] L"ʼι", 208 [MLCT-MULTI] L"ct", 209 [MLFF-MULTI] L"ff", 210 [MLFFI-MULTI] L"ffi", 211 [MLFFL-MULTI] L"ffl", 212 [MLFL-MULTI] L"fl", 213 [MLFI-MULTI] L"fi", 214 [MLLS-MULTI] L"ɫɫ", 215 [MLST-MULTI] L"st", 216 [MOAS-MULTI] L"ʽο", 217 [MOLN-MULTI] L"ʼο", 218 [MOR-MULTI] L"or", 219 [MRAS-MULTI] L"ʽρ", 220 [MRLN-MULTI] L"ʼρ", 221 [MTT-MULTI] L"~~", 222 [MUAS-MULTI] L"ʽυ", 223 [MULN-MULTI] L"ʼυ", 224 [MWAS-MULTI] L"ʽω", 225 [MWLN-MULTI] L"ʼω", 226 [MOE-MULTI] L"oe", 227 [MES-MULTI] L" ", 228 }; 229 230 #define risupper(r) (L'A' <= (r) && (r) <= L'Z') 231 #define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF) 232 #define rtolower(r) ((r)-'A'+'a') 233 234 static Rune latin_fold_tab[] = 235 { 236 /* Table to fold latin 1 characters to ASCII equivalents 237 based at Rune value 0xc0 238 239 À Á Â Ã Ä Å Æ Ç 240 È É Ê Ë Ì Í Î Ï 241 Ð Ñ Ò Ó Ô Õ Ö × 242 Ø Ù Ú Û Ü Ý Þ ß 243 à á â ã ä å æ ç 244 è é ê ë ì í î ï 245 ð ñ ò ó ô õ ö ÷ 246 ø ù ú û ü ý þ ÿ 247 */ 248 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 249 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 250 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 251 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 252 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 253 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 254 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 255 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', 256 }; 257 258 static Rune *ttabstack[20]; 259 static int ntt; 260 261 /* 262 * tab is an array of n Assoc's, sorted by key. 263 * Look for key in tab, and return corresponding val 264 * or -1 if not there 265 */ 266 long 267 lookassoc(Assoc *tab, int n, char *key) 268 { 269 Assoc *q; 270 long i, low, high; 271 int r; 272 273 for(low = -1, high = n; high > low+1; ){ 274 i = (high+low)/2; 275 q = &tab[i]; 276 if((r=strcmp(key, q->key))<0) 277 high = i; 278 else if(r == 0) 279 return q->val; 280 else 281 low=i; 282 } 283 return -1; 284 } 285 286 long 287 looknassoc(Nassoc *tab, int n, long key) 288 { 289 Nassoc *q; 290 long i, low, high; 291 292 for(low = -1, high = n; high > low+1; ){ 293 i = (high+low)/2; 294 q = &tab[i]; 295 if(key < q->key) 296 high = i; 297 else if(key == q->key) 298 return q->val; 299 else 300 low=i; 301 } 302 return -1; 303 } 304 305 void 306 err(char *fmt, ...) 307 { 308 char buf[1000]; 309 va_list v; 310 311 va_start(v, fmt); 312 vsnprint(buf, sizeof(buf), fmt, v); 313 va_end(v); 314 fprint(2, "%s: %s\n", argv0, buf); 315 } 316 317 /* 318 * Write the rune r to bout, keeping track of line length 319 * and breaking the lines (at blanks) when they get too long 320 */ 321 void 322 outrune(long r) 323 { 324 if(outinhibit) 325 return; 326 if(++linelen > breaklen && r == L' ') { 327 Bputc(bout, '\n'); 328 linelen = 0; 329 } else 330 Bputrune(bout, r); 331 } 332 333 void 334 outrunes(Rune *rp) 335 { 336 Rune r; 337 338 while((r = *rp++) != 0) 339 outrune(r); 340 } 341 342 /* like outrune, but when arg is know to be a char */ 343 void 344 outchar(int c) 345 { 346 if(outinhibit) 347 return; 348 if(++linelen > breaklen && c == ' ') { 349 c ='\n'; 350 linelen = 0; 351 } 352 Bputc(bout, c); 353 } 354 355 void 356 outchars(char *s) 357 { 358 char c; 359 360 while((c = *s++) != 0) 361 outchar(c); 362 } 363 364 void 365 outprint(char *fmt, ...) 366 { 367 char buf[1000]; 368 va_list v; 369 370 va_start(v, fmt); 371 vsnprint(buf, sizeof(buf), fmt, v); 372 va_end(v); 373 outchars(buf); 374 } 375 376 void 377 outpiece(char *b, char *e) 378 { 379 int c, lastc; 380 381 lastc = 0; 382 while(b < e) { 383 c = *b++; 384 if(c == '\n') 385 c = ' '; 386 if(!(c == ' ' && lastc == ' ')) 387 outchar(c); 388 lastc = c; 389 } 390 } 391 392 /* 393 * Go to new line if not already there; indent if ind != 0. 394 * If ind > 1, leave a blank line too. 395 * Slight hack: assume if current line is only one or two 396 * characters long, then they were spaces. 397 */ 398 void 399 outnl(int ind) 400 { 401 if(outinhibit) 402 return; 403 if(ind) { 404 if(ind > 1) { 405 if(linelen > 2) 406 Bputc(bout, '\n'); 407 Bprint(bout, "\n "); 408 } else if(linelen == 0) 409 Bprint(bout, " "); 410 else if(linelen == 1) 411 Bputc(bout, ' '); 412 else if(linelen != 2) 413 Bprint(bout, "\n "); 414 linelen = 2; 415 } else { 416 if(linelen) { 417 Bputc(bout, '\n'); 418 linelen = 0; 419 } 420 } 421 } 422 423 /* 424 * Fold the runes in null-terminated rp. 425 * Use the sort(1) definition of folding (uppercase to lowercase, 426 * latin1-accented characters to corresponding unaccented chars) 427 */ 428 void 429 fold(Rune *rp) 430 { 431 Rune r; 432 433 while((r = *rp) != 0) { 434 if (rislatin1(r) && latin_fold_tab[r-0xc0]) 435 r = latin_fold_tab[r-0xc0]; 436 if(risupper(r)) 437 r = rtolower(r); 438 *rp++ = r; 439 } 440 } 441 442 /* 443 * Like fold, but put folded result into new 444 * (assumed to have enough space). 445 * old is a regular expression, but we know that 446 * metacharacters aren't affected 447 */ 448 void 449 foldre(char *new, char *old) 450 { 451 Rune r; 452 453 while(*old) { 454 old += chartorune(&r, old); 455 if (rislatin1(r) && latin_fold_tab[r-0xc0]) 456 r = latin_fold_tab[r-0xc0]; 457 if(risupper(r)) 458 r = rtolower(r); 459 new += runetochar(new, &r); 460 } 461 *new = 0; 462 } 463 464 /* 465 * acomp(s, t) returns: 466 * -2 if s strictly precedes t 467 * -1 if s is a prefix of t 468 * 0 if s is the same as t 469 * 1 if t is a prefix of s 470 * 2 if t strictly precedes s 471 */ 472 473 int 474 acomp(Rune *s, Rune *t) 475 { 476 int cs, ct; 477 478 for(;;) { 479 cs = *s; 480 ct = *t; 481 if(cs != ct) 482 break; 483 if(cs == 0) 484 return 0; 485 s++; 486 t++; 487 } 488 if(cs == 0) 489 return -1; 490 if(ct == 0) 491 return 1; 492 if(cs < ct) 493 return -2; 494 return 2; 495 } 496 497 /* 498 * Copy null terminated Runes from 'from' to 'to'. 499 */ 500 void 501 runescpy(Rune *to, Rune *from) 502 { 503 while((*to++ = *from++) != 0) 504 continue; 505 } 506 507 /* 508 * Conversion of unsigned number to long, no overflow detection 509 */ 510 long 511 runetol(Rune *r) 512 { 513 int c; 514 long n; 515 516 n = 0; 517 for(;; r++){ 518 c = *r; 519 if(L'0'<=c && c<=L'9') 520 c -= '0'; 521 else 522 break; 523 n = n*10 + c; 524 } 525 return n; 526 } 527 528 /* 529 * See if there is a rune corresponding to the accented 530 * version of r with accent acc (acc in [LIGS..LIGE-1]), 531 * and return it if so, else return NONE. 532 */ 533 Rune 534 liglookup(Rune acc, Rune r) 535 { 536 Rune *p; 537 538 if(acc < LIGS || acc >= LIGE) 539 return NONE; 540 for(p = ligtab[acc-LIGS].pairs; *p; p += 2) 541 if(*p == r) 542 return *(p+1); 543 return NONE; 544 } 545 546 /* 547 * Maintain a translation table stack (a translation table 548 * is an array of Runes indexed by bytes or 7-bit bytes). 549 * If starting is true, push the curtab onto the stack 550 * and return newtab; else pop the top of the stack and 551 * return it. 552 * If curtab is 0, initialize the stack and return. 553 */ 554 Rune * 555 changett(Rune *curtab, Rune *newtab, int starting) 556 { 557 if(curtab == 0) { 558 ntt = 0; 559 return 0; 560 } 561 if(starting) { 562 if(ntt >= asize(ttabstack)) { 563 if(debug) 564 err("translation stack overflow"); 565 return curtab; 566 } 567 ttabstack[ntt++] = curtab; 568 return newtab; 569 } else { 570 if(ntt == 0) { 571 if(debug) 572 err("translation stack underflow"); 573 return curtab; 574 } 575 return ttabstack[--ntt]; 576 } 577 } 578