1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include "dict.h" 5 6 Dict dicts[] = { 7 {"oed", "Oxford English Dictionary, 2nd Ed.", 8 "/lib/dict/oed2", "/lib/dict/oed2index", 9 oednextoff, oedprintentry, oedprintkey}, 10 {"ahd", "American Heritage Dictionary, 2nd College Ed.", 11 "/lib/ahd/DICT.DB", "/lib/ahd/index", 12 ahdnextoff, ahdprintentry, ahdprintkey}, 13 {"pgw", "Project Gutenberg Webster Dictionary", 14 "/lib/dict/pgw", "/lib/dict/pgwindex", 15 pgwnextoff, pgwprintentry, pgwprintkey}, 16 {"thesaurus", "Collins Thesaurus", 17 "/lib/dict/thesaurus", "/lib/dict/thesindex", 18 thesnextoff, thesprintentry, thesprintkey}, 19 {"roget", "Project Gutenberg Roget's Thesaurus", 20 "/lib/dict/roget", "/lib/dict/rogetindex", 21 rogetnextoff, rogetprintentry, rogetprintkey}, 22 23 {"ce", "Gendai Chinese->English", 24 "/lib/dict/world/sansdata/sandic24.dat", 25 "/lib/dict/world/sansdata/ceindex", 26 worldnextoff, worldprintentry, worldprintkey}, 27 {"ceh", "Gendai Chinese->English (Hanzi index)", 28 "/lib/dict/world/sansdata/sandic24.dat", 29 "/lib/dict/world/sansdata/cehindex", 30 worldnextoff, worldprintentry, worldprintkey}, 31 {"ec", "Gendai English->Chinese", 32 "/lib/dict/world/sansdata/sandic24.dat", 33 "/lib/dict/world/sansdata/ecindex", 34 worldnextoff, worldprintentry, worldprintkey}, 35 36 {"dae", "Gyldendal Danish->English", 37 "/lib/dict/world/gylddata/sandic30.dat", 38 "/lib/dict/world/gylddata/daeindex", 39 worldnextoff, worldprintentry, worldprintkey}, 40 {"eda", "Gyldendal English->Danish", 41 "/lib/dict/world/gylddata/sandic29.dat", 42 "/lib/dict/world/gylddata/edaindex", 43 worldnextoff, worldprintentry, worldprintkey}, 44 45 {"due", "Wolters-Noordhoff Dutch->English", 46 "/lib/dict/world/woltdata/sandic07.dat", 47 "/lib/dict/world/woltdata/deindex", 48 worldnextoff, worldprintentry, worldprintkey}, 49 {"edu", "Wolters-Noordhoff English->Dutch", 50 "/lib/dict/world/woltdata/sandic06.dat", 51 "/lib/dict/world/woltdata/edindex", 52 worldnextoff, worldprintentry, worldprintkey}, 53 54 {"fie", "WSOY Finnish->English", 55 "/lib/dict/world/werndata/sandic32.dat", 56 "/lib/dict/world/werndata/fieindex", 57 worldnextoff, worldprintentry, worldprintkey}, 58 {"efi", "WSOY English->Finnish", 59 "/lib/dict/world/werndata/sandic31.dat", 60 "/lib/dict/world/werndata/efiindex", 61 worldnextoff, worldprintentry, worldprintkey}, 62 63 {"fe", "Collins French->English", 64 "/lib/dict/fe", "/lib/dict/feindex", 65 pcollnextoff, pcollprintentry, pcollprintkey}, 66 {"ef", "Collins English->French", 67 "/lib/dict/ef", "/lib/dict/efindex", 68 pcollnextoff, pcollprintentry, pcollprintkey}, 69 70 {"ge", "Collins German->English", 71 "/lib/dict/ge", "/lib/dict/geindex", 72 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 73 {"eg", "Collins English->German", 74 "/lib/dict/eg", "/lib/dict/egindex", 75 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 76 77 {"ie", "Collins Italian->English", 78 "/lib/dict/ie", "/lib/dict/ieindex", 79 pcollnextoff, pcollprintentry, pcollprintkey}, 80 {"ei", "Collins English->Italian", 81 "/lib/dict/ei", "/lib/dict/eiindex", 82 pcollnextoff, pcollprintentry, pcollprintkey}, 83 84 {"je", "Sanshusha Japanese->English", 85 "/lib/dict/world/sansdata/sandic18.dat", 86 "/lib/dict/world/sansdata/jeindex", 87 worldnextoff, worldprintentry, worldprintkey}, 88 {"jek", "Sanshusha Japanese->English (Kanji index)", 89 "/lib/dict/world/sansdata/sandic18.dat", 90 "/lib/dict/world/sansdata/jekindex", 91 worldnextoff, worldprintentry, worldprintkey}, 92 {"ej", "Sanshusha English->Japanese", 93 "/lib/dict/world/sansdata/sandic18.dat", 94 "/lib/dict/world/sansdata/ejindex", 95 worldnextoff, worldprintentry, worldprintkey}, 96 97 {"tjeg", "Sanshusha technical Japanese->English,German", 98 "/lib/dict/world/sansdata/sandic16.dat", 99 "/lib/dict/world/sansdata/tjegindex", 100 worldnextoff, worldprintentry, worldprintkey}, 101 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)", 102 "/lib/dict/world/sansdata/sandic16.dat", 103 "/lib/dict/world/sansdata/tjegkindex", 104 worldnextoff, worldprintentry, worldprintkey}, 105 {"tegj", "Sanshusha technical English->German,Japanese", 106 "/lib/dict/world/sansdata/sandic16.dat", 107 "/lib/dict/world/sansdata/tegjindex", 108 worldnextoff, worldprintentry, worldprintkey}, 109 {"tgje", "Sanshusha technical German->Japanese,English", 110 "/lib/dict/world/sansdata/sandic16.dat", 111 "/lib/dict/world/sansdata/tgjeindex", 112 worldnextoff, worldprintentry, worldprintkey}, 113 114 {"ne", "Kunnskapforlaget Norwegian->English", 115 "/lib/dict/world/kunndata/sandic28.dat", 116 "/lib/dict/world/kunndata/neindex", 117 worldnextoff, worldprintentry, worldprintkey}, 118 {"en", "Kunnskapforlaget English->Norwegian", 119 "/lib/dict/world/kunndata/sandic27.dat", 120 "/lib/dict/world/kunndata/enindex", 121 worldnextoff, worldprintentry, worldprintkey}, 122 123 {"re", "Leon Ungier Russian->English", 124 "/lib/dict/re", "/lib/dict/reindex", 125 simplenextoff, simpleprintentry, simpleprintkey}, 126 {"er", "Leon Ungier English->Russian", 127 "/lib/dict/re", "/lib/dict/erindex", 128 simplenextoff, simpleprintentry, simpleprintkey}, 129 130 {"se", "Collins Spanish->English", 131 "/lib/dict/se", "/lib/dict/seindex", 132 pcollnextoff, pcollprintentry, pcollprintkey}, 133 {"es", "Collins English->Spanish", 134 "/lib/dict/es", "/lib/dict/esindex", 135 pcollnextoff, pcollprintentry, pcollprintkey}, 136 137 {"swe", "Esselte Studium Swedish->English", 138 "/lib/dict/world/essedata/sandic34.dat", 139 "/lib/dict/world/essedata/sweindex", 140 worldnextoff, worldprintentry, worldprintkey}, 141 {"esw", "Esselte Studium English->Swedish", 142 "/lib/dict/world/essedata/sandic33.dat", 143 "/lib/dict/world/essedata/eswindex", 144 worldnextoff, worldprintentry, worldprintkey}, 145 146 {"movie", "Movies -- by title", 147 "/lib/movie/data", "/lib/dict/movtindex", 148 movienextoff, movieprintentry, movieprintkey}, 149 {"moviea", "Movies -- by actor", 150 "/lib/movie/data", "/lib/dict/movaindex", 151 movienextoff, movieprintentry, movieprintkey}, 152 {"movied", "Movies -- by director", 153 "/lib/movie/data", "/lib/dict/movdindex", 154 movienextoff, movieprintentry, movieprintkey}, 155 156 {"slang", "English Slang", 157 "/lib/dict/slang", "/lib/dict/slangindex", 158 slangnextoff, slangprintentry, slangprintkey}, 159 160 {"robert", "Robert Électronique", 161 "/lib/dict/robert/_pointers", "/lib/dict/robert/_index", 162 robertnextoff, robertindexentry, robertprintkey}, 163 {"robertv", "Robert Électronique - formes des verbes", 164 "/lib/dict/robert/flex.rob", "/lib/dict/robert/_flexindex", 165 robertnextflex, robertflexentry, robertprintkey}, 166 167 {0, 0, 0, 0, 0} 168 }; 169 170 typedef struct Lig Lig; 171 struct Lig { 172 Rune start; /* accent rune */ 173 Rune *pairs; /* <char,accented version> pairs */ 174 }; 175 176 static Lig ligtab[Nligs] = { 177 [LACU-LIGS] {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"}, 178 [LGRV-LIGS] {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"}, 179 [LUML-LIGS] {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"}, 180 [LCED-LIGS] {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"}, 181 [LTIL-LIGS] {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"}, 182 [LBRV-LIGS] {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"}, 183 [LRNG-LIGS] {L'˚', L"AÅaåUŮuů"}, 184 [LDOT-LIGS] {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"}, 185 [LDTB-LIGS] {L'.', L""}, 186 [LFRN-LIGS] {L'⌢', L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"}, 187 [LFRB-LIGS] {L'̯', L""}, 188 [LOGO-LIGS] {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"}, 189 [LMAC-LIGS] {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"}, 190 [LHCK-LIGS] {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"}, 191 [LASP-LIGS] {L'ʽ', L""}, 192 [LLEN-LIGS] {L'ʼ', L""}, 193 [LBRB-LIGS] {L'̮', L""} 194 }; 195 196 Rune *multitab[Nmulti] = { 197 [MAAS-MULTI] L"ʽα", 198 [MALN-MULTI] L"ʼα", 199 [MAND-MULTI] L"and", 200 [MAOQ-MULTI] L"a/q", 201 [MBRA-MULTI] L"<|", 202 [MDD-MULTI] L"..", 203 [MDDD-MULTI] L"...", 204 [MEAS-MULTI] L"ʽε", 205 [MELN-MULTI] L"ʼε", 206 [MEMM-MULTI] L"——", 207 [MHAS-MULTI] L"ʽη", 208 [MHLN-MULTI] L"ʼη", 209 [MIAS-MULTI] L"ʽι", 210 [MILN-MULTI] L"ʼι", 211 [MLCT-MULTI] L"ct", 212 [MLFF-MULTI] L"ff", 213 [MLFFI-MULTI] L"ffi", 214 [MLFFL-MULTI] L"ffl", 215 [MLFL-MULTI] L"fl", 216 [MLFI-MULTI] L"fi", 217 [MLLS-MULTI] L"ɫɫ", 218 [MLST-MULTI] L"st", 219 [MOAS-MULTI] L"ʽο", 220 [MOLN-MULTI] L"ʼο", 221 [MOR-MULTI] L"or", 222 [MRAS-MULTI] L"ʽρ", 223 [MRLN-MULTI] L"ʼρ", 224 [MTT-MULTI] L"~~", 225 [MUAS-MULTI] L"ʽυ", 226 [MULN-MULTI] L"ʼυ", 227 [MWAS-MULTI] L"ʽω", 228 [MWLN-MULTI] L"ʼω", 229 [MOE-MULTI] L"oe", 230 [MES-MULTI] L" ", 231 }; 232 233 #define risupper(r) (L'A' <= (r) && (r) <= L'Z') 234 #define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF) 235 #define rtolower(r) ((r)-'A'+'a') 236 237 static Rune latin_fold_tab[] = 238 { 239 /* Table to fold latin 1 characters to ASCII equivalents 240 based at Rune value 0xc0 241 242 À Á Â Ã Ä Å Æ Ç 243 È É Ê Ë Ì Í Î Ï 244 Ð Ñ Ò Ó Ô Õ Ö × 245 Ø Ù Ú Û Ü Ý Þ ß 246 à á â ã ä å æ ç 247 è é ê ë ì í î ï 248 ð ñ ò ó ô õ ö ÷ 249 ø ù ú û ü ý þ ÿ 250 */ 251 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 252 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 253 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 254 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 255 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 256 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 257 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 258 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', 259 }; 260 261 static Rune *ttabstack[20]; 262 static int ntt; 263 264 /* 265 * tab is an array of n Assoc's, sorted by key. 266 * Look for key in tab, and return corresponding val 267 * or -1 if not there 268 */ 269 long 270 lookassoc(Assoc *tab, int n, char *key) 271 { 272 Assoc *q; 273 long i, low, high; 274 int r; 275 276 for(low = -1, high = n; high > low+1; ){ 277 i = (high+low)/2; 278 q = &tab[i]; 279 if((r=strcmp(key, q->key))<0) 280 high = i; 281 else if(r == 0) 282 return q->val; 283 else 284 low=i; 285 } 286 return -1; 287 } 288 289 long 290 looknassoc(Nassoc *tab, int n, long key) 291 { 292 Nassoc *q; 293 long i, low, high; 294 295 for(low = -1, high = n; high > low+1; ){ 296 i = (high+low)/2; 297 q = &tab[i]; 298 if(key < q->key) 299 high = i; 300 else if(key == q->key) 301 return q->val; 302 else 303 low=i; 304 } 305 return -1; 306 } 307 308 void 309 err(char *fmt, ...) 310 { 311 char buf[1000]; 312 va_list v; 313 314 va_start(v, fmt); 315 vsnprint(buf, sizeof(buf), fmt, v); 316 va_end(v); 317 fprint(2, "%s: %s\n", argv0, buf); 318 } 319 320 /* 321 * Write the rune r to bout, keeping track of line length 322 * and breaking the lines (at blanks) when they get too long 323 */ 324 void 325 outrune(long r) 326 { 327 if(outinhibit) 328 return; 329 if(++linelen > breaklen && r == L' ') { 330 Bputc(bout, '\n'); 331 linelen = 0; 332 } else 333 Bputrune(bout, r); 334 } 335 336 void 337 outrunes(Rune *rp) 338 { 339 Rune r; 340 341 while((r = *rp++) != 0) 342 outrune(r); 343 } 344 345 /* like outrune, but when arg is know to be a char */ 346 void 347 outchar(int c) 348 { 349 if(outinhibit) 350 return; 351 if(++linelen > breaklen && c == ' ') { 352 c ='\n'; 353 linelen = 0; 354 } 355 Bputc(bout, c); 356 } 357 358 void 359 outchars(char *s) 360 { 361 char c; 362 363 while((c = *s++) != 0) 364 outchar(c); 365 } 366 367 void 368 outprint(char *fmt, ...) 369 { 370 char buf[1000]; 371 va_list v; 372 373 va_start(v, fmt); 374 vsnprint(buf, sizeof(buf), fmt, v); 375 va_end(v); 376 outchars(buf); 377 } 378 379 void 380 outpiece(char *b, char *e) 381 { 382 int c, lastc; 383 384 lastc = 0; 385 while(b < e) { 386 c = *b++; 387 if(c == '\n') 388 c = ' '; 389 if(!(c == ' ' && lastc == ' ')) 390 outchar(c); 391 lastc = c; 392 } 393 } 394 395 /* 396 * Go to new line if not already there; indent if ind != 0. 397 * If ind > 1, leave a blank line too. 398 * Slight hack: assume if current line is only one or two 399 * characters long, then they were spaces. 400 */ 401 void 402 outnl(int ind) 403 { 404 if(outinhibit) 405 return; 406 if(ind) { 407 if(ind > 1) { 408 if(linelen > 2) 409 Bputc(bout, '\n'); 410 Bprint(bout, "\n "); 411 } else if(linelen == 0) 412 Bprint(bout, " "); 413 else if(linelen == 1) 414 Bputc(bout, ' '); 415 else if(linelen != 2) 416 Bprint(bout, "\n "); 417 linelen = 2; 418 } else { 419 if(linelen) { 420 Bputc(bout, '\n'); 421 linelen = 0; 422 } 423 } 424 } 425 426 /* 427 * Fold the runes in null-terminated rp. 428 * Use the sort(1) definition of folding (uppercase to lowercase, 429 * latin1-accented characters to corresponding unaccented chars) 430 */ 431 void 432 fold(Rune *rp) 433 { 434 Rune r; 435 436 while((r = *rp) != 0) { 437 if (rislatin1(r) && latin_fold_tab[r-0xc0]) 438 r = latin_fold_tab[r-0xc0]; 439 if(risupper(r)) 440 r = rtolower(r); 441 *rp++ = r; 442 } 443 } 444 445 /* 446 * Like fold, but put folded result into new 447 * (assumed to have enough space). 448 * old is a regular expression, but we know that 449 * metacharacters aren't affected 450 */ 451 void 452 foldre(char *new, char *old) 453 { 454 Rune r; 455 456 while(*old) { 457 old += chartorune(&r, old); 458 if (rislatin1(r) && latin_fold_tab[r-0xc0]) 459 r = latin_fold_tab[r-0xc0]; 460 if(risupper(r)) 461 r = rtolower(r); 462 new += runetochar(new, &r); 463 } 464 *new = 0; 465 } 466 467 /* 468 * acomp(s, t) returns: 469 * -2 if s strictly precedes t 470 * -1 if s is a prefix of t 471 * 0 if s is the same as t 472 * 1 if t is a prefix of s 473 * 2 if t strictly precedes s 474 */ 475 476 int 477 acomp(Rune *s, Rune *t) 478 { 479 int cs, ct; 480 481 for(;;) { 482 cs = *s; 483 ct = *t; 484 if(cs != ct) 485 break; 486 if(cs == 0) 487 return 0; 488 s++; 489 t++; 490 } 491 if(cs == 0) 492 return -1; 493 if(ct == 0) 494 return 1; 495 if(cs < ct) 496 return -2; 497 return 2; 498 } 499 500 /* 501 * Copy null terminated Runes from 'from' to 'to'. 502 */ 503 void 504 runescpy(Rune *to, Rune *from) 505 { 506 while((*to++ = *from++) != 0) 507 continue; 508 } 509 510 /* 511 * Conversion of unsigned number to long, no overflow detection 512 */ 513 long 514 runetol(Rune *r) 515 { 516 int c; 517 long n; 518 519 n = 0; 520 for(;; r++){ 521 c = *r; 522 if(L'0'<=c && c<=L'9') 523 c -= '0'; 524 else 525 break; 526 n = n*10 + c; 527 } 528 return n; 529 } 530 531 /* 532 * See if there is a rune corresponding to the accented 533 * version of r with accent acc (acc in [LIGS..LIGE-1]), 534 * and return it if so, else return NONE. 535 */ 536 Rune 537 liglookup(Rune acc, Rune r) 538 { 539 Rune *p; 540 541 if(acc < LIGS || acc >= LIGE) 542 return NONE; 543 for(p = ligtab[acc-LIGS].pairs; *p; p += 2) 544 if(*p == r) 545 return *(p+1); 546 return NONE; 547 } 548 549 /* 550 * Maintain a translation table stack (a translation table 551 * is an array of Runes indexed by bytes or 7-bit bytes). 552 * If starting is true, push the curtab onto the stack 553 * and return newtab; else pop the top of the stack and 554 * return it. 555 * If curtab is 0, initialize the stack and return. 556 */ 557 Rune * 558 changett(Rune *curtab, Rune *newtab, int starting) 559 { 560 if(curtab == 0) { 561 ntt = 0; 562 return 0; 563 } 564 if(starting) { 565 if(ntt >= asize(ttabstack)) { 566 if(debug) 567 err("translation stack overflow"); 568 return curtab; 569 } 570 ttabstack[ntt++] = curtab; 571 return newtab; 572 } else { 573 if(ntt == 0) { 574 if(debug) 575 err("translation stack underflow"); 576 return curtab; 577 } 578 return ttabstack[--ntt]; 579 } 580 } 581