1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include "dict.h" 5 6 Dict dicts[] = { 7 {"oed", "Oxford English Dictionary, 2nd Ed.", 8 "/lib/dict/oed2", "/lib/dict/oed2index", 9 oednextoff, oedprintentry, oedprintkey}, 10 {"ahd", "American Heritage Dictionary, 2nd College Ed.", 11 "/lib/ahd/DICT.DB", "/lib/ahd/index", 12 ahdnextoff, ahdprintentry, ahdprintkey}, 13 {"thesaurus", "Collins Thesaurus", 14 "/lib/dict/thesaurus", "/lib/dict/thesindex", 15 thesnextoff, thesprintentry, thesprintkey}, 16 17 {"ce", "Gendai Chinese->English", 18 "/lib/dict/world/sansdata/sandic24.dat", 19 "/lib/dict/world/sansdata/ceindex", 20 worldnextoff, worldprintentry, worldprintkey}, 21 {"ceh", "Gendai Chinese->English (Hanzi index)", 22 "/lib/dict/world/sansdata/sandic24.dat", 23 "/lib/dict/world/sansdata/cehindex", 24 worldnextoff, worldprintentry, worldprintkey}, 25 {"ec", "Gendai English->Chinese", 26 "/lib/dict/world/sansdata/sandic24.dat", 27 "/lib/dict/world/sansdata/ecindex", 28 worldnextoff, worldprintentry, worldprintkey}, 29 30 {"dae", "Gyldendal Danish->English", 31 "/lib/dict/world/gylddata/sandic30.dat", 32 "/lib/dict/world/gylddata/daeindex", 33 worldnextoff, worldprintentry, worldprintkey}, 34 {"eda", "Gyldendal English->Danish", 35 "/lib/dict/world/gylddata/sandic29.dat", 36 "/lib/dict/world/gylddata/edaindex", 37 worldnextoff, worldprintentry, worldprintkey}, 38 39 {"due", "Wolters-Noordhoff Dutch->English", 40 "/lib/dict/world/woltdata/sandic07.dat", 41 "/lib/dict/world/woltdata/deindex", 42 worldnextoff, worldprintentry, worldprintkey}, 43 {"edu", "Wolters-Noordhoff English->Dutch", 44 "/lib/dict/world/woltdata/sandic06.dat", 45 "/lib/dict/world/woltdata/edindex", 46 worldnextoff, worldprintentry, worldprintkey}, 47 48 {"fie", "WSOY Finnish->English", 49 "/lib/dict/world/werndata/sandic32.dat", 50 "/lib/dict/world/werndata/fieindex", 51 worldnextoff, worldprintentry, worldprintkey}, 52 {"efi", "WSOY English->Finnish", 53 "/lib/dict/world/werndata/sandic31.dat", 54 "/lib/dict/world/werndata/efiindex", 55 worldnextoff, worldprintentry, worldprintkey}, 56 57 {"fe", "Collins French->English", 58 "/lib/dict/fe", "/lib/dict/feindex", 59 pcollnextoff, pcollprintentry, pcollprintkey}, 60 {"ef", "Collins English->French", 61 "/lib/dict/ef", "/lib/dict/efindex", 62 pcollnextoff, pcollprintentry, pcollprintkey}, 63 64 {"ge", "Collins German->English", 65 "/lib/dict/ge", "/lib/dict/geindex", 66 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 67 {"eg", "Collins English->German", 68 "/lib/dict/eg", "/lib/dict/egindex", 69 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 70 71 {"ie", "Collins Italian->English", 72 "/lib/dict/ie", "/lib/dict/ieindex", 73 pcollnextoff, pcollprintentry, pcollprintkey}, 74 {"ei", "Collins English->Italian", 75 "/lib/dict/ei", "/lib/dict/eiindex", 76 pcollnextoff, pcollprintentry, pcollprintkey}, 77 78 {"je", "Sanshusha Japanese->English", 79 "/lib/dict/world/sansdata/sandic18.dat", 80 "/lib/dict/world/sansdata/jeindex", 81 worldnextoff, worldprintentry, worldprintkey}, 82 {"jek", "Sanshusha Japanese->English (Kanji index)", 83 "/lib/dict/world/sansdata/sandic18.dat", 84 "/lib/dict/world/sansdata/jekindex", 85 worldnextoff, worldprintentry, worldprintkey}, 86 {"ej", "Sanshusha English->Japanese", 87 "/lib/dict/world/sansdata/sandic18.dat", 88 "/lib/dict/world/sansdata/ejindex", 89 worldnextoff, worldprintentry, worldprintkey}, 90 91 {"tjeg", "Sanshusha technical Japanese->English,German", 92 "/lib/dict/world/sansdata/sandic16.dat", 93 "/lib/dict/world/sansdata/tjegindex", 94 worldnextoff, worldprintentry, worldprintkey}, 95 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)", 96 "/lib/dict/world/sansdata/sandic16.dat", 97 "/lib/dict/world/sansdata/tjegkindex", 98 worldnextoff, worldprintentry, worldprintkey}, 99 {"tegj", "Sanshusha technical English->German,Japanese", 100 "/lib/dict/world/sansdata/sandic16.dat", 101 "/lib/dict/world/sansdata/tegjindex", 102 worldnextoff, worldprintentry, worldprintkey}, 103 {"tgje", "Sanshusha technical German->Japanese,English", 104 "/lib/dict/world/sansdata/sandic16.dat", 105 "/lib/dict/world/sansdata/tgjeindex", 106 worldnextoff, worldprintentry, worldprintkey}, 107 108 {"ne", "Kunnskapforlaget Norwegian->English", 109 "/lib/dict/world/kunndata/sandic28.dat", 110 "/lib/dict/world/kunndata/neindex", 111 worldnextoff, worldprintentry, worldprintkey}, 112 {"en", "Kunnskapforlaget English->Norwegian", 113 "/lib/dict/world/kunndata/sandic27.dat", 114 "/lib/dict/world/kunndata/enindex", 115 worldnextoff, worldprintentry, worldprintkey}, 116 117 {"re", "Leon Ungier Russian->English", 118 "/lib/dict/re", "/lib/dict/reindex", 119 simplenextoff, simpleprintentry, simpleprintkey}, 120 {"er", "Leon Ungier English->Russian", 121 "/lib/dict/re", "/lib/dict/erindex", 122 simplenextoff, simpleprintentry, simpleprintkey}, 123 124 {"se", "Collins Spanish->English", 125 "/lib/dict/se", "/lib/dict/seindex", 126 pcollnextoff, pcollprintentry, pcollprintkey}, 127 {"es", "Collins English->Spanish", 128 "/lib/dict/es", "/lib/dict/esindex", 129 pcollnextoff, pcollprintentry, pcollprintkey}, 130 131 {"swe", "Esselte Studium Swedish->English", 132 "/lib/dict/world/essedata/sandic34.dat", 133 "/lib/dict/world/essedata/sweindex", 134 worldnextoff, worldprintentry, worldprintkey}, 135 {"esw", "Esselte Studium English->Swedish", 136 "/lib/dict/world/essedata/sandic33.dat", 137 "/lib/dict/world/essedata/eswindex", 138 worldnextoff, worldprintentry, worldprintkey}, 139 140 {"movie", "Movies -- by title", 141 "/lib/movie/data", "/lib/dict/movtindex", 142 movienextoff, movieprintentry, movieprintkey}, 143 {"moviea", "Movies -- by actor", 144 "/lib/movie/data", "/lib/dict/movaindex", 145 movienextoff, movieprintentry, movieprintkey}, 146 {"movied", "Movies -- by director", 147 "/lib/movie/data", "/lib/dict/movdindex", 148 movienextoff, movieprintentry, movieprintkey}, 149 150 {"slang", "English Slang", 151 "/lib/dict/slang", "/lib/dict/slangindex", 152 slangnextoff, slangprintentry, slangprintkey}, 153 154 {"robert", "Robert Électronique", 155 "/lib/dict/robert/_pointers", "/lib/dict/robert/_index", 156 robertnextoff, robertindexentry, robertprintkey}, 157 {"robertv", "Robert Électronique - formes des verbes", 158 "/lib/dict/robert/flex.rob", "/lib/dict/robert/_flexindex", 159 robertnextflex, robertflexentry, robertprintkey}, 160 161 {0, 0, 0, 0, 0} 162 }; 163 164 typedef struct Lig Lig; 165 struct Lig { 166 Rune start; /* accent rune */ 167 Rune *pairs; /* <char,accented version> pairs */ 168 }; 169 170 static Lig ligtab[Nligs] = { 171 [LACU-LIGS] {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"}, 172 [LGRV-LIGS] {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"}, 173 [LUML-LIGS] {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"}, 174 [LCED-LIGS] {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"}, 175 [LTIL-LIGS] {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"}, 176 [LBRV-LIGS] {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"}, 177 [LRNG-LIGS] {L'˚', L"AÅaåUŮuů"}, 178 [LDOT-LIGS] {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"}, 179 [LDTB-LIGS] {L'.', L""}, 180 [LFRN-LIGS] {L'⌢', L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"}, 181 [LFRB-LIGS] {L'̯', L""}, 182 [LOGO-LIGS] {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"}, 183 [LMAC-LIGS] {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"}, 184 [LHCK-LIGS] {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"}, 185 [LASP-LIGS] {L'ʽ', L""}, 186 [LLEN-LIGS] {L'ʼ', L""}, 187 [LBRB-LIGS] {L'̮', L""} 188 }; 189 190 Rune *multitab[Nmulti] = { 191 [MAAS-MULTI] L"ʽα", 192 [MALN-MULTI] L"ʼα", 193 [MAND-MULTI] L"and", 194 [MAOQ-MULTI] L"a/q", 195 [MBRA-MULTI] L"<|", 196 [MDD-MULTI] L"..", 197 [MDDD-MULTI] L"...", 198 [MEAS-MULTI] L"ʽε", 199 [MELN-MULTI] L"ʼε", 200 [MEMM-MULTI] L"——", 201 [MHAS-MULTI] L"ʽη", 202 [MHLN-MULTI] L"ʼη", 203 [MIAS-MULTI] L"ʽι", 204 [MILN-MULTI] L"ʼι", 205 [MLCT-MULTI] L"ct", 206 [MLFF-MULTI] L"ff", 207 [MLFFI-MULTI] L"ffi", 208 [MLFFL-MULTI] L"ffl", 209 [MLFL-MULTI] L"fl", 210 [MLFI-MULTI] L"fi", 211 [MLLS-MULTI] L"ɫɫ", 212 [MLST-MULTI] L"st", 213 [MOAS-MULTI] L"ʽο", 214 [MOLN-MULTI] L"ʼο", 215 [MOR-MULTI] L"or", 216 [MRAS-MULTI] L"ʽρ", 217 [MRLN-MULTI] L"ʼρ", 218 [MTT-MULTI] L"~~", 219 [MUAS-MULTI] L"ʽυ", 220 [MULN-MULTI] L"ʼυ", 221 [MWAS-MULTI] L"ʽω", 222 [MWLN-MULTI] L"ʼω", 223 [MOE-MULTI] L"oe", 224 [MES-MULTI] L" ", 225 }; 226 227 #define risupper(r) (L'A' <= (r) && (r) <= L'Z') 228 #define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF) 229 #define rtolower(r) ((r)-'A'+'a') 230 231 static Rune latin_fold_tab[] = 232 { 233 /* Table to fold latin 1 characters to ASCII equivalents 234 based at Rune value 0xc0 235 236 À Á Â Ã Ä Å Æ Ç 237 È É Ê Ë Ì Í Î Ï 238 Ð Ñ Ò Ó Ô Õ Ö × 239 Ø Ù Ú Û Ü Ý Þ ß 240 à á â ã ä å æ ç 241 è é ê ë ì í î ï 242 ð ñ ò ó ô õ ö ÷ 243 ø ù ú û ü ý þ ÿ 244 */ 245 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 246 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 247 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 248 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 249 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 250 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 251 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 252 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', 253 }; 254 255 static Rune *ttabstack[20]; 256 static int ntt; 257 258 /* 259 * tab is an array of n Assoc's, sorted by key. 260 * Look for key in tab, and return corresponding val 261 * or -1 if not there 262 */ 263 long 264 lookassoc(Assoc *tab, int n, char *key) 265 { 266 Assoc *q; 267 long i, low, high; 268 int r; 269 270 for(low = -1, high = n; high > low+1; ){ 271 i = (high+low)/2; 272 q = &tab[i]; 273 if((r=strcmp(key, q->key))<0) 274 high = i; 275 else if(r == 0) 276 return q->val; 277 else 278 low=i; 279 } 280 return -1; 281 } 282 283 long 284 looknassoc(Nassoc *tab, int n, long key) 285 { 286 Nassoc *q; 287 long i, low, high; 288 289 for(low = -1, high = n; high > low+1; ){ 290 i = (high+low)/2; 291 q = &tab[i]; 292 if(key < q->key) 293 high = i; 294 else if(key == q->key) 295 return q->val; 296 else 297 low=i; 298 } 299 return -1; 300 } 301 302 void 303 err(char *fmt, ...) 304 { 305 char buf[1000]; 306 va_list v; 307 308 va_start(v, fmt); 309 vsnprint(buf, sizeof(buf), fmt, v); 310 va_end(v); 311 fprint(2, "%s: %s\n", argv0, buf); 312 } 313 314 /* 315 * Write the rune r to bout, keeping track of line length 316 * and breaking the lines (at blanks) when they get too long 317 */ 318 void 319 outrune(long r) 320 { 321 if(outinhibit) 322 return; 323 if(++linelen > breaklen && r == L' ') { 324 Bputc(bout, '\n'); 325 linelen = 0; 326 } else 327 Bputrune(bout, r); 328 } 329 330 void 331 outrunes(Rune *rp) 332 { 333 Rune r; 334 335 while((r = *rp++) != 0) 336 outrune(r); 337 } 338 339 /* like outrune, but when arg is know to be a char */ 340 void 341 outchar(int c) 342 { 343 if(outinhibit) 344 return; 345 if(++linelen > breaklen && c == ' ') { 346 c ='\n'; 347 linelen = 0; 348 } 349 Bputc(bout, c); 350 } 351 352 void 353 outchars(char *s) 354 { 355 char c; 356 357 while((c = *s++) != 0) 358 outchar(c); 359 } 360 361 void 362 outprint(char *fmt, ...) 363 { 364 char buf[1000]; 365 va_list v; 366 367 va_start(v, fmt); 368 vsnprint(buf, sizeof(buf), fmt, v); 369 va_end(v); 370 outchars(buf); 371 } 372 373 void 374 outpiece(char *b, char *e) 375 { 376 int c, lastc; 377 378 lastc = 0; 379 while(b < e) { 380 c = *b++; 381 if(c == '\n') 382 c = ' '; 383 if(!(c == ' ' && lastc == ' ')) 384 outchar(c); 385 lastc = c; 386 } 387 } 388 389 /* 390 * Go to new line if not already there; indent if ind != 0. 391 * If ind > 1, leave a blank line too. 392 * Slight hack: assume if current line is only one or two 393 * characters long, then they were spaces. 394 */ 395 void 396 outnl(int ind) 397 { 398 if(outinhibit) 399 return; 400 if(ind) { 401 if(ind > 1) { 402 if(linelen > 2) 403 Bputc(bout, '\n'); 404 Bprint(bout, "\n "); 405 } else if(linelen == 0) 406 Bprint(bout, " "); 407 else if(linelen == 1) 408 Bputc(bout, ' '); 409 else if(linelen != 2) 410 Bprint(bout, "\n "); 411 linelen = 2; 412 } else { 413 if(linelen) { 414 Bputc(bout, '\n'); 415 linelen = 0; 416 } 417 } 418 } 419 420 /* 421 * Fold the runes in null-terminated rp. 422 * Use the sort(1) definition of folding (uppercase to lowercase, 423 * latin1-accented characters to corresponding unaccented chars) 424 */ 425 void 426 fold(Rune *rp) 427 { 428 Rune r; 429 430 while((r = *rp) != 0) { 431 if (rislatin1(r) && latin_fold_tab[r-0xc0]) 432 r = latin_fold_tab[r-0xc0]; 433 if(risupper(r)) 434 r = rtolower(r); 435 *rp++ = r; 436 } 437 } 438 439 /* 440 * Like fold, but put folded result into new 441 * (assumed to have enough space). 442 * old is a regular expression, but we know that 443 * metacharacters aren't affected 444 */ 445 void 446 foldre(char *new, char *old) 447 { 448 Rune r; 449 450 while(*old) { 451 old += chartorune(&r, old); 452 if (rislatin1(r) && latin_fold_tab[r-0xc0]) 453 r = latin_fold_tab[r-0xc0]; 454 if(risupper(r)) 455 r = rtolower(r); 456 new += runetochar(new, &r); 457 } 458 *new = 0; 459 } 460 461 /* 462 * acomp(s, t) returns: 463 * -2 if s strictly precedes t 464 * -1 if s is a prefix of t 465 * 0 if s is the same as t 466 * 1 if t is a prefix of s 467 * 2 if t strictly precedes s 468 */ 469 470 int 471 acomp(Rune *s, Rune *t) 472 { 473 int cs, ct; 474 475 for(;;) { 476 cs = *s; 477 ct = *t; 478 if(cs != ct) 479 break; 480 if(cs == 0) 481 return 0; 482 s++; 483 t++; 484 } 485 if(cs == 0) 486 return -1; 487 if(ct == 0) 488 return 1; 489 if(cs < ct) 490 return -2; 491 return 2; 492 } 493 494 /* 495 * Copy null terminated Runes from 'from' to 'to'. 496 */ 497 void 498 runescpy(Rune *to, Rune *from) 499 { 500 while((*to++ = *from++) != 0) 501 continue; 502 } 503 504 /* 505 * Conversion of unsigned number to long, no overflow detection 506 */ 507 long 508 runetol(Rune *r) 509 { 510 int c; 511 long n; 512 513 n = 0; 514 for(;; r++){ 515 c = *r; 516 if(L'0'<=c && c<=L'9') 517 c -= '0'; 518 else 519 break; 520 n = n*10 + c; 521 } 522 return n; 523 } 524 525 /* 526 * See if there is a rune corresponding to the accented 527 * version of r with accent acc (acc in [LIGS..LIGE-1]), 528 * and return it if so, else return NONE. 529 */ 530 Rune 531 liglookup(Rune acc, Rune r) 532 { 533 Rune *p; 534 535 if(acc < LIGS || acc >= LIGE) 536 return NONE; 537 for(p = ligtab[acc-LIGS].pairs; *p; p += 2) 538 if(*p == r) 539 return *(p+1); 540 return NONE; 541 } 542 543 /* 544 * Maintain a translation table stack (a translation table 545 * is an array of Runes indexed by bytes or 7-bit bytes). 546 * If starting is true, push the curtab onto the stack 547 * and return newtab; else pop the top of the stack and 548 * return it. 549 * If curtab is 0, initialize the stack and return. 550 */ 551 Rune * 552 changett(Rune *curtab, Rune *newtab, int starting) 553 { 554 if(curtab == 0) { 555 ntt = 0; 556 return 0; 557 } 558 if(starting) { 559 if(ntt >= asize(ttabstack)) { 560 if(debug) 561 err("translation stack overflow"); 562 return curtab; 563 } 564 ttabstack[ntt++] = curtab; 565 return newtab; 566 } else { 567 if(ntt == 0) { 568 if(debug) 569 err("translation stack underflow"); 570 return curtab; 571 } 572 return ttabstack[--ntt]; 573 } 574 } 575