1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include "dict.h" 5 6 Dict dicts[] = { 7 {"oed", "Oxford English Dictionary, 2nd Ed.", 8 "/lib/dict/oed2", "/lib/dict/oed2index", 9 oednextoff, oedprintentry, oedprintkey}, 10 {"ahd", "American Heritage Dictionary, 2nd College Ed.", 11 "/lib/ahd/DICT.DB", "/lib/ahd/index", 12 ahdnextoff, ahdprintentry, ahdprintkey}, 13 {"pgw", "Project Gutenberg Webster Dictionary", 14 "/lib/dict/pgw", "/lib/dict/pgwindex", 15 pgwnextoff, pgwprintentry, pgwprintkey}, 16 {"thesaurus", "Collins Thesaurus", 17 "/lib/dict/thesaurus", "/lib/dict/thesindex", 18 thesnextoff, thesprintentry, thesprintkey}, 19 {"roget", "Project Gutenberg Roget's Thesaurus", 20 "/lib/dict/roget", "/lib/dict/rogetindex", 21 rogetnextoff, rogetprintentry, rogetprintkey}, 22 23 {"ce", "Gendai Chinese->English", 24 "/lib/dict/world/sansdata/sandic24.dat", 25 "/lib/dict/world/sansdata/ceindex", 26 worldnextoff, worldprintentry, worldprintkey}, 27 {"ceh", "Gendai Chinese->English (Hanzi index)", 28 "/lib/dict/world/sansdata/sandic24.dat", 29 "/lib/dict/world/sansdata/cehindex", 30 worldnextoff, worldprintentry, worldprintkey}, 31 {"ec", "Gendai English->Chinese", 32 "/lib/dict/world/sansdata/sandic24.dat", 33 "/lib/dict/world/sansdata/ecindex", 34 worldnextoff, worldprintentry, worldprintkey}, 35 36 {"dae", "Gyldendal Danish->English", 37 "/lib/dict/world/gylddata/sandic30.dat", 38 "/lib/dict/world/gylddata/daeindex", 39 worldnextoff, worldprintentry, worldprintkey}, 40 {"eda", "Gyldendal English->Danish", 41 "/lib/dict/world/gylddata/sandic29.dat", 42 "/lib/dict/world/gylddata/edaindex", 43 worldnextoff, worldprintentry, worldprintkey}, 44 45 {"due", "Wolters-Noordhoff Dutch->English", 46 "/lib/dict/world/woltdata/sandic07.dat", 47 "/lib/dict/world/woltdata/deindex", 48 worldnextoff, worldprintentry, worldprintkey}, 49 {"edu", "Wolters-Noordhoff English->Dutch", 50 "/lib/dict/world/woltdata/sandic06.dat", 51 "/lib/dict/world/woltdata/edindex", 52 worldnextoff, worldprintentry, worldprintkey}, 53 54 {"fie", "WSOY Finnish->English", 55 "/lib/dict/world/werndata/sandic32.dat", 56 "/lib/dict/world/werndata/fieindex", 57 worldnextoff, worldprintentry, worldprintkey}, 58 {"efi", "WSOY English->Finnish", 59 "/lib/dict/world/werndata/sandic31.dat", 60 "/lib/dict/world/werndata/efiindex", 61 worldnextoff, worldprintentry, worldprintkey}, 62 63 {"fe", "Collins French->English", 64 "/lib/dict/fe", "/lib/dict/feindex", 65 pcollnextoff, pcollprintentry, pcollprintkey}, 66 {"ef", "Collins English->French", 67 "/lib/dict/ef", "/lib/dict/efindex", 68 pcollnextoff, pcollprintentry, pcollprintkey}, 69 70 {"ge", "Collins German->English", 71 "/lib/dict/ge", "/lib/dict/geindex", 72 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 73 {"eg", "Collins English->German", 74 "/lib/dict/eg", "/lib/dict/egindex", 75 pcollgnextoff, pcollgprintentry, pcollgprintkey}, 76 77 {"ie", "Collins Italian->English", 78 "/lib/dict/ie", "/lib/dict/ieindex", 79 pcollnextoff, pcollprintentry, pcollprintkey}, 80 {"ei", "Collins English->Italian", 81 "/lib/dict/ei", "/lib/dict/eiindex", 82 pcollnextoff, pcollprintentry, pcollprintkey}, 83 84 {"je", "Sanshusha Japanese->English", 85 "/lib/dict/world/sansdata/sandic18.dat", 86 "/lib/dict/world/sansdata/jeindex", 87 worldnextoff, worldprintentry, worldprintkey}, 88 {"jek", "Sanshusha Japanese->English (Kanji index)", 89 "/lib/dict/world/sansdata/sandic18.dat", 90 "/lib/dict/world/sansdata/jekindex", 91 worldnextoff, worldprintentry, worldprintkey}, 92 {"ej", "Sanshusha English->Japanese", 93 "/lib/dict/world/sansdata/sandic18.dat", 94 "/lib/dict/world/sansdata/ejindex", 95 worldnextoff, worldprintentry, worldprintkey}, 96 97 {"tjeg", "Sanshusha technical Japanese->English,German", 98 "/lib/dict/world/sansdata/sandic16.dat", 99 "/lib/dict/world/sansdata/tjegindex", 100 worldnextoff, worldprintentry, worldprintkey}, 101 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)", 102 "/lib/dict/world/sansdata/sandic16.dat", 103 "/lib/dict/world/sansdata/tjegkindex", 104 worldnextoff, worldprintentry, worldprintkey}, 105 {"tegj", "Sanshusha technical English->German,Japanese", 106 "/lib/dict/world/sansdata/sandic16.dat", 107 "/lib/dict/world/sansdata/tegjindex", 108 worldnextoff, worldprintentry, worldprintkey}, 109 {"tgje", "Sanshusha technical German->Japanese,English", 110 "/lib/dict/world/sansdata/sandic16.dat", 111 "/lib/dict/world/sansdata/tgjeindex", 112 worldnextoff, worldprintentry, worldprintkey}, 113 114 {"ne", "Kunnskapforlaget Norwegian->English", 115 "/lib/dict/world/kunndata/sandic28.dat", 116 "/lib/dict/world/kunndata/neindex", 117 worldnextoff, worldprintentry, worldprintkey}, 118 {"en", "Kunnskapforlaget English->Norwegian", 119 "/lib/dict/world/kunndata/sandic27.dat", 120 "/lib/dict/world/kunndata/enindex", 121 worldnextoff, worldprintentry, worldprintkey}, 122 123 {"re", "Leon Ungier Russian->English", 124 "/lib/dict/re", "/lib/dict/reindex", 125 simplenextoff, simpleprintentry, simpleprintkey}, 126 {"er", "Leon Ungier English->Russian", 127 "/lib/dict/re", "/lib/dict/erindex", 128 simplenextoff, simpleprintentry, simpleprintkey}, 129 130 {"se", "Collins Spanish->English", 131 "/lib/dict/se", "/lib/dict/seindex", 132 pcollnextoff, pcollprintentry, pcollprintkey}, 133 {"es", "Collins English->Spanish", 134 "/lib/dict/es", "/lib/dict/esindex", 135 pcollnextoff, pcollprintentry, pcollprintkey}, 136 137 {"swe", "Esselte Studium Swedish->English", 138 "/lib/dict/world/essedata/sandic34.dat", 139 "/lib/dict/world/essedata/sweindex", 140 worldnextoff, worldprintentry, worldprintkey}, 141 {"esw", "Esselte Studium English->Swedish", 142 "/lib/dict/world/essedata/sandic33.dat", 143 "/lib/dict/world/essedata/eswindex", 144 worldnextoff, worldprintentry, worldprintkey}, 145 146 {"movie", "Movies -- by title", 147 "/lib/movie/data", "/lib/dict/movtindex", 148 movienextoff, movieprintentry, movieprintkey}, 149 {"moviea", "Movies -- by actor", 150 "/lib/movie/data", "/lib/dict/movaindex", 151 movienextoff, movieprintentry, movieprintkey}, 152 {"movied", "Movies -- by director", 153 "/lib/movie/data", "/lib/dict/movdindex", 154 movienextoff, movieprintentry, movieprintkey}, 155 156 {"slang", "English Slang", 157 "/lib/dict/slang", "/lib/dict/slangindex", 158 slangnextoff, slangprintentry, slangprintkey}, 159 160 {"robert", "Robert Électronique", 161 "/lib/dict/robert/_pointers", "/lib/dict/robert/_index", 162 robertnextoff, robertindexentry, robertprintkey}, 163 {"robertv", "Robert Électronique - formes des verbes", 164 "/lib/dict/robert/flex.rob", "/lib/dict/robert/_flexindex", 165 robertnextflex, robertflexentry, robertprintkey}, 166 167 {0, 0, 0, 0, 0} 168 }; 169 170 typedef struct Lig Lig; 171 struct Lig { 172 Rune start; /* accent rune */ 173 Rune *pairs; /* <char,accented version> pairs */ 174 }; 175 176 static Lig ligtab[Nligs] = { 177 [LACU-LIGS] {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"}, 178 [LGRV-LIGS] {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"}, 179 [LUML-LIGS] {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"}, 180 [LCED-LIGS] {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"}, 181 [LTIL-LIGS] {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"}, 182 [LBRV-LIGS] {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"}, 183 [LRNG-LIGS] {L'˚', L"AÅaåUŮuů"}, 184 [LDOT-LIGS] {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"}, 185 [LDTB-LIGS] {L'.', L""}, 186 [LFRN-LIGS] {L'⌢', L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"}, 187 [LFRB-LIGS] {L'̯', L""}, 188 [LOGO-LIGS] {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"}, 189 [LMAC-LIGS] {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"}, 190 [LHCK-LIGS] {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"}, 191 [LASP-LIGS] {L'ʽ', L""}, 192 [LLEN-LIGS] {L'ʼ', L""}, 193 [LBRB-LIGS] {L'̮', L""} 194 }; 195 196 Rune *multitab[Nmulti] = { 197 [MAAS-MULTI] L"ʽα", 198 [MALN-MULTI] L"ʼα", 199 [MAND-MULTI] L"and", 200 [MAOQ-MULTI] L"a/q", 201 [MBRA-MULTI] L"<|", 202 [MDD-MULTI] L"..", 203 [MDDD-MULTI] L"...", 204 [MEAS-MULTI] L"ʽε", 205 [MELN-MULTI] L"ʼε", 206 [MEMM-MULTI] L"——", 207 [MHAS-MULTI] L"ʽη", 208 [MHLN-MULTI] L"ʼη", 209 [MIAS-MULTI] L"ʽι", 210 [MILN-MULTI] L"ʼι", 211 [MLCT-MULTI] L"ct", 212 [MLFF-MULTI] L"ff", 213 [MLFFI-MULTI] L"ffi", 214 [MLFFL-MULTI] L"ffl", 215 [MLFL-MULTI] L"fl", 216 [MLFI-MULTI] L"fi", 217 [MLLS-MULTI] L"ɫɫ", 218 [MLST-MULTI] L"st", 219 [MOAS-MULTI] L"ʽο", 220 [MOLN-MULTI] L"ʼο", 221 [MOR-MULTI] L"or", 222 [MRAS-MULTI] L"ʽρ", 223 [MRLN-MULTI] L"ʼρ", 224 [MTT-MULTI] L"~~", 225 [MUAS-MULTI] L"ʽυ", 226 [MULN-MULTI] L"ʼυ", 227 [MWAS-MULTI] L"ʽω", 228 [MWLN-MULTI] L"ʼω", 229 [MOE-MULTI] L"oe", 230 [MES-MULTI] L" ", 231 }; 232 233 static Rune *ttabstack[20]; 234 static int ntt; 235 236 /* 237 * tab is an array of n Assoc's, sorted by key. 238 * Look for key in tab, and return corresponding val 239 * or -1 if not there 240 */ 241 long 242 lookassoc(Assoc *tab, int n, char *key) 243 { 244 Assoc *q; 245 long i, low, high; 246 int r; 247 248 for(low = -1, high = n; high > low+1; ){ 249 i = (high+low)/2; 250 q = &tab[i]; 251 if((r=strcmp(key, q->key))<0) 252 high = i; 253 else if(r == 0) 254 return q->val; 255 else 256 low=i; 257 } 258 return -1; 259 } 260 261 long 262 looknassoc(Nassoc *tab, int n, long key) 263 { 264 Nassoc *q; 265 long i, low, high; 266 267 for(low = -1, high = n; high > low+1; ){ 268 i = (high+low)/2; 269 q = &tab[i]; 270 if(key < q->key) 271 high = i; 272 else if(key == q->key) 273 return q->val; 274 else 275 low=i; 276 } 277 return -1; 278 } 279 280 void 281 err(char *fmt, ...) 282 { 283 char buf[1000]; 284 va_list v; 285 286 va_start(v, fmt); 287 vsnprint(buf, sizeof(buf), fmt, v); 288 va_end(v); 289 fprint(2, "%s: %s\n", argv0, buf); 290 } 291 292 /* 293 * Write the rune r to bout, keeping track of line length 294 * and breaking the lines (at blanks) when they get too long 295 */ 296 void 297 outrune(long r) 298 { 299 if(outinhibit) 300 return; 301 if(++linelen > breaklen && r == L' ') { 302 Bputc(bout, '\n'); 303 linelen = 0; 304 } else 305 Bputrune(bout, r); 306 } 307 308 void 309 outrunes(Rune *rp) 310 { 311 Rune r; 312 313 while((r = *rp++) != 0) 314 outrune(r); 315 } 316 317 /* like outrune, but when arg is know to be a char */ 318 void 319 outchar(int c) 320 { 321 if(outinhibit) 322 return; 323 if(++linelen > breaklen && c == ' ') { 324 c ='\n'; 325 linelen = 0; 326 } 327 Bputc(bout, c); 328 } 329 330 void 331 outchars(char *s) 332 { 333 char c; 334 335 while((c = *s++) != 0) 336 outchar(c); 337 } 338 339 void 340 outprint(char *fmt, ...) 341 { 342 char buf[1000]; 343 va_list v; 344 345 va_start(v, fmt); 346 vsnprint(buf, sizeof(buf), fmt, v); 347 va_end(v); 348 outchars(buf); 349 } 350 351 void 352 outpiece(char *b, char *e) 353 { 354 int c, lastc; 355 356 lastc = 0; 357 while(b < e) { 358 c = *b++; 359 if(c == '\n') 360 c = ' '; 361 if(!(c == ' ' && lastc == ' ')) 362 outchar(c); 363 lastc = c; 364 } 365 } 366 367 /* 368 * Go to new line if not already there; indent if ind != 0. 369 * If ind > 1, leave a blank line too. 370 * Slight hack: assume if current line is only one or two 371 * characters long, then they were spaces. 372 */ 373 void 374 outnl(int ind) 375 { 376 if(outinhibit) 377 return; 378 if(ind) { 379 if(ind > 1) { 380 if(linelen > 2) 381 Bputc(bout, '\n'); 382 Bprint(bout, "\n "); 383 } else if(linelen == 0) 384 Bprint(bout, " "); 385 else if(linelen == 1) 386 Bputc(bout, ' '); 387 else if(linelen != 2) 388 Bprint(bout, "\n "); 389 linelen = 2; 390 } else { 391 if(linelen) { 392 Bputc(bout, '\n'); 393 linelen = 0; 394 } 395 } 396 } 397 398 /* 399 * Fold the runes in null-terminated rp. 400 * Use the sort(1) definition of folding (uppercase to lowercase, 401 * accented characters to corresponding unaccented chars) 402 */ 403 void 404 fold(Rune *rp) 405 { 406 Rune r; 407 408 while((r = *rp) != 0) { 409 r = tobaserune(r); 410 if(isupperrune(r)) 411 r = tolowerrune(r); 412 *rp++ = r; 413 } 414 } 415 416 /* 417 * Like fold, but put folded result into new 418 * (assumed to have enough space). 419 * old is a regular expression, but we know that 420 * metacharacters aren't affected 421 */ 422 void 423 foldre(char *new, char *old) 424 { 425 Rune r; 426 427 while(*old) { 428 old += chartorune(&r, old); 429 r = tobaserune(r); 430 if(isupperrune(r)) 431 r = tolowerrune(r); 432 new += runetochar(new, &r); 433 } 434 *new = 0; 435 } 436 437 /* 438 * acomp(s, t) returns: 439 * -2 if s strictly precedes t 440 * -1 if s is a prefix of t 441 * 0 if s is the same as t 442 * 1 if t is a prefix of s 443 * 2 if t strictly precedes s 444 */ 445 446 int 447 acomp(Rune *s, Rune *t) 448 { 449 int cs, ct; 450 451 for(;;) { 452 cs = *s; 453 ct = *t; 454 if(cs != ct) 455 break; 456 if(cs == 0) 457 return 0; 458 s++; 459 t++; 460 } 461 if(cs == 0) 462 return -1; 463 if(ct == 0) 464 return 1; 465 if(cs < ct) 466 return -2; 467 return 2; 468 } 469 470 /* 471 * Copy null terminated Runes from 'from' to 'to'. 472 */ 473 void 474 runescpy(Rune *to, Rune *from) 475 { 476 while((*to++ = *from++) != 0) 477 continue; 478 } 479 480 /* 481 * Conversion of unsigned number to long, no overflow detection 482 */ 483 long 484 runetol(Rune *r) 485 { 486 int c; 487 long n; 488 489 n = 0; 490 for(;; r++){ 491 c = *r; 492 if(L'0'<=c && c<=L'9') 493 c -= '0'; 494 else 495 break; 496 n = n*10 + c; 497 } 498 return n; 499 } 500 501 /* 502 * See if there is a rune corresponding to the accented 503 * version of r with accent acc (acc in [LIGS..LIGE-1]), 504 * and return it if so, else return NONE. 505 */ 506 Rune 507 liglookup(Rune acc, Rune r) 508 { 509 Rune *p; 510 511 if(acc < LIGS || acc >= LIGE) 512 return NONE; 513 for(p = ligtab[acc-LIGS].pairs; *p; p += 2) 514 if(*p == r) 515 return *(p+1); 516 return NONE; 517 } 518 519 /* 520 * Maintain a translation table stack (a translation table 521 * is an array of Runes indexed by bytes or 7-bit bytes). 522 * If starting is true, push the curtab onto the stack 523 * and return newtab; else pop the top of the stack and 524 * return it. 525 * If curtab is 0, initialize the stack and return. 526 */ 527 Rune * 528 changett(Rune *curtab, Rune *newtab, int starting) 529 { 530 if(curtab == 0) { 531 ntt = 0; 532 return 0; 533 } 534 if(starting) { 535 if(ntt >= asize(ttabstack)) { 536 if(debug) 537 err("translation stack overflow"); 538 return curtab; 539 } 540 ttabstack[ntt++] = curtab; 541 return newtab; 542 } else { 543 if(ntt == 0) { 544 if(debug) 545 err("translation stack underflow"); 546 return curtab; 547 } 548 return ttabstack[--ntt]; 549 } 550 } 551