1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
5
6 Dict dicts[] = {
7 {"oed", "Oxford English Dictionary, 2nd Ed.",
8 "/lib/dict/oed2", "/lib/dict/oed2index",
9 oednextoff, oedprintentry, oedprintkey},
10 {"ahd", "American Heritage Dictionary, 2nd College Ed.",
11 "/lib/ahd/DICT.DB", "/lib/ahd/index",
12 ahdnextoff, ahdprintentry, ahdprintkey},
13 {"pgw", "Project Gutenberg Webster Dictionary",
14 "/lib/dict/pgw", "/lib/dict/pgwindex",
15 pgwnextoff, pgwprintentry, pgwprintkey},
16 {"thesaurus", "Collins Thesaurus",
17 "/lib/dict/thesaurus", "/lib/dict/thesindex",
18 thesnextoff, thesprintentry, thesprintkey},
19 {"roget", "Project Gutenberg Roget's Thesaurus",
20 "/lib/dict/roget", "/lib/dict/rogetindex",
21 rogetnextoff, rogetprintentry, rogetprintkey},
22
23 {"ce", "Gendai Chinese->English",
24 "/lib/dict/world/sansdata/sandic24.dat",
25 "/lib/dict/world/sansdata/ceindex",
26 worldnextoff, worldprintentry, worldprintkey},
27 {"ceh", "Gendai Chinese->English (Hanzi index)",
28 "/lib/dict/world/sansdata/sandic24.dat",
29 "/lib/dict/world/sansdata/cehindex",
30 worldnextoff, worldprintentry, worldprintkey},
31 {"ec", "Gendai English->Chinese",
32 "/lib/dict/world/sansdata/sandic24.dat",
33 "/lib/dict/world/sansdata/ecindex",
34 worldnextoff, worldprintentry, worldprintkey},
35
36 {"dae", "Gyldendal Danish->English",
37 "/lib/dict/world/gylddata/sandic30.dat",
38 "/lib/dict/world/gylddata/daeindex",
39 worldnextoff, worldprintentry, worldprintkey},
40 {"eda", "Gyldendal English->Danish",
41 "/lib/dict/world/gylddata/sandic29.dat",
42 "/lib/dict/world/gylddata/edaindex",
43 worldnextoff, worldprintentry, worldprintkey},
44
45 {"due", "Wolters-Noordhoff Dutch->English",
46 "/lib/dict/world/woltdata/sandic07.dat",
47 "/lib/dict/world/woltdata/deindex",
48 worldnextoff, worldprintentry, worldprintkey},
49 {"edu", "Wolters-Noordhoff English->Dutch",
50 "/lib/dict/world/woltdata/sandic06.dat",
51 "/lib/dict/world/woltdata/edindex",
52 worldnextoff, worldprintentry, worldprintkey},
53
54 {"fie", "WSOY Finnish->English",
55 "/lib/dict/world/werndata/sandic32.dat",
56 "/lib/dict/world/werndata/fieindex",
57 worldnextoff, worldprintentry, worldprintkey},
58 {"efi", "WSOY English->Finnish",
59 "/lib/dict/world/werndata/sandic31.dat",
60 "/lib/dict/world/werndata/efiindex",
61 worldnextoff, worldprintentry, worldprintkey},
62
63 {"fe", "Collins French->English",
64 "/lib/dict/fe", "/lib/dict/feindex",
65 pcollnextoff, pcollprintentry, pcollprintkey},
66 {"ef", "Collins English->French",
67 "/lib/dict/ef", "/lib/dict/efindex",
68 pcollnextoff, pcollprintentry, pcollprintkey},
69
70 {"ge", "Collins German->English",
71 "/lib/dict/ge", "/lib/dict/geindex",
72 pcollgnextoff, pcollgprintentry, pcollgprintkey},
73 {"eg", "Collins English->German",
74 "/lib/dict/eg", "/lib/dict/egindex",
75 pcollgnextoff, pcollgprintentry, pcollgprintkey},
76
77 {"ie", "Collins Italian->English",
78 "/lib/dict/ie", "/lib/dict/ieindex",
79 pcollnextoff, pcollprintentry, pcollprintkey},
80 {"ei", "Collins English->Italian",
81 "/lib/dict/ei", "/lib/dict/eiindex",
82 pcollnextoff, pcollprintentry, pcollprintkey},
83
84 {"je", "Sanshusha Japanese->English",
85 "/lib/dict/world/sansdata/sandic18.dat",
86 "/lib/dict/world/sansdata/jeindex",
87 worldnextoff, worldprintentry, worldprintkey},
88 {"jek", "Sanshusha Japanese->English (Kanji index)",
89 "/lib/dict/world/sansdata/sandic18.dat",
90 "/lib/dict/world/sansdata/jekindex",
91 worldnextoff, worldprintentry, worldprintkey},
92 {"ej", "Sanshusha English->Japanese",
93 "/lib/dict/world/sansdata/sandic18.dat",
94 "/lib/dict/world/sansdata/ejindex",
95 worldnextoff, worldprintentry, worldprintkey},
96
97 {"tjeg", "Sanshusha technical Japanese->English,German",
98 "/lib/dict/world/sansdata/sandic16.dat",
99 "/lib/dict/world/sansdata/tjegindex",
100 worldnextoff, worldprintentry, worldprintkey},
101 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)",
102 "/lib/dict/world/sansdata/sandic16.dat",
103 "/lib/dict/world/sansdata/tjegkindex",
104 worldnextoff, worldprintentry, worldprintkey},
105 {"tegj", "Sanshusha technical English->German,Japanese",
106 "/lib/dict/world/sansdata/sandic16.dat",
107 "/lib/dict/world/sansdata/tegjindex",
108 worldnextoff, worldprintentry, worldprintkey},
109 {"tgje", "Sanshusha technical German->Japanese,English",
110 "/lib/dict/world/sansdata/sandic16.dat",
111 "/lib/dict/world/sansdata/tgjeindex",
112 worldnextoff, worldprintentry, worldprintkey},
113
114 {"ne", "Kunnskapforlaget Norwegian->English",
115 "/lib/dict/world/kunndata/sandic28.dat",
116 "/lib/dict/world/kunndata/neindex",
117 worldnextoff, worldprintentry, worldprintkey},
118 {"en", "Kunnskapforlaget English->Norwegian",
119 "/lib/dict/world/kunndata/sandic27.dat",
120 "/lib/dict/world/kunndata/enindex",
121 worldnextoff, worldprintentry, worldprintkey},
122
123 {"re", "Leon Ungier Russian->English",
124 "/lib/dict/re", "/lib/dict/reindex",
125 simplenextoff, simpleprintentry, simpleprintkey},
126 {"er", "Leon Ungier English->Russian",
127 "/lib/dict/re", "/lib/dict/erindex",
128 simplenextoff, simpleprintentry, simpleprintkey},
129
130 {"se", "Collins Spanish->English",
131 "/lib/dict/se", "/lib/dict/seindex",
132 pcollnextoff, pcollprintentry, pcollprintkey},
133 {"es", "Collins English->Spanish",
134 "/lib/dict/es", "/lib/dict/esindex",
135 pcollnextoff, pcollprintentry, pcollprintkey},
136
137 {"swe", "Esselte Studium Swedish->English",
138 "/lib/dict/world/essedata/sandic34.dat",
139 "/lib/dict/world/essedata/sweindex",
140 worldnextoff, worldprintentry, worldprintkey},
141 {"esw", "Esselte Studium English->Swedish",
142 "/lib/dict/world/essedata/sandic33.dat",
143 "/lib/dict/world/essedata/eswindex",
144 worldnextoff, worldprintentry, worldprintkey},
145
146 {"movie", "Movies -- by title",
147 "/lib/movie/data", "/lib/dict/movtindex",
148 movienextoff, movieprintentry, movieprintkey},
149 {"moviea", "Movies -- by actor",
150 "/lib/movie/data", "/lib/dict/movaindex",
151 movienextoff, movieprintentry, movieprintkey},
152 {"movied", "Movies -- by director",
153 "/lib/movie/data", "/lib/dict/movdindex",
154 movienextoff, movieprintentry, movieprintkey},
155
156 {"slang", "English Slang",
157 "/lib/dict/slang", "/lib/dict/slangindex",
158 slangnextoff, slangprintentry, slangprintkey},
159
160 {"robert", "Robert Électronique",
161 "/lib/dict/robert/_pointers", "/lib/dict/robert/_index",
162 robertnextoff, robertindexentry, robertprintkey},
163 {"robertv", "Robert Électronique - formes des verbes",
164 "/lib/dict/robert/flex.rob", "/lib/dict/robert/_flexindex",
165 robertnextflex, robertflexentry, robertprintkey},
166
167 {0, 0, 0, 0, 0}
168 };
169
170 typedef struct Lig Lig;
171 struct Lig {
172 Rune start; /* accent rune */
173 Rune *pairs; /* <char,accented version> pairs */
174 };
175
176 static Lig ligtab[Nligs] = {
177 [LACU-LIGS] {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"},
178 [LGRV-LIGS] {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"},
179 [LUML-LIGS] {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"},
180 [LCED-LIGS] {L'¸', L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"},
181 [LTIL-LIGS] {L'˜', L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"},
182 [LBRV-LIGS] {L'˘', L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"},
183 [LRNG-LIGS] {L'˚', L"AÅaåUŮuů"},
184 [LDOT-LIGS] {L'˙', L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"},
185 [LDTB-LIGS] {L'.', L""},
186 [LFRN-LIGS] {L'⌢', L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"},
187 [LFRB-LIGS] {L'̯', L""},
188 [LOGO-LIGS] {L'˛', L"AĄaąEĘeęIĮiįıįUŲuų"},
189 [LMAC-LIGS] {L'¯', L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"},
190 [LHCK-LIGS] {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"},
191 [LASP-LIGS] {L'ʽ', L""},
192 [LLEN-LIGS] {L'ʼ', L""},
193 [LBRB-LIGS] {L'̮', L""}
194 };
195
196 Rune *multitab[Nmulti] = {
197 [MAAS-MULTI] L"ʽα",
198 [MALN-MULTI] L"ʼα",
199 [MAND-MULTI] L"and",
200 [MAOQ-MULTI] L"a/q",
201 [MBRA-MULTI] L"<|",
202 [MDD-MULTI] L"..",
203 [MDDD-MULTI] L"...",
204 [MEAS-MULTI] L"ʽε",
205 [MELN-MULTI] L"ʼε",
206 [MEMM-MULTI] L"——",
207 [MHAS-MULTI] L"ʽη",
208 [MHLN-MULTI] L"ʼη",
209 [MIAS-MULTI] L"ʽι",
210 [MILN-MULTI] L"ʼι",
211 [MLCT-MULTI] L"ct",
212 [MLFF-MULTI] L"ff",
213 [MLFFI-MULTI] L"ffi",
214 [MLFFL-MULTI] L"ffl",
215 [MLFL-MULTI] L"fl",
216 [MLFI-MULTI] L"fi",
217 [MLLS-MULTI] L"ɫɫ",
218 [MLST-MULTI] L"st",
219 [MOAS-MULTI] L"ʽο",
220 [MOLN-MULTI] L"ʼο",
221 [MOR-MULTI] L"or",
222 [MRAS-MULTI] L"ʽρ",
223 [MRLN-MULTI] L"ʼρ",
224 [MTT-MULTI] L"~~",
225 [MUAS-MULTI] L"ʽυ",
226 [MULN-MULTI] L"ʼυ",
227 [MWAS-MULTI] L"ʽω",
228 [MWLN-MULTI] L"ʼω",
229 [MOE-MULTI] L"oe",
230 [MES-MULTI] L" ",
231 };
232
233 static Rune *ttabstack[20];
234 static int ntt;
235
236 /*
237 * tab is an array of n Assoc's, sorted by key.
238 * Look for key in tab, and return corresponding val
239 * or -1 if not there
240 */
241 long
lookassoc(Assoc * tab,int n,char * key)242 lookassoc(Assoc *tab, int n, char *key)
243 {
244 Assoc *q;
245 long i, low, high;
246 int r;
247
248 for(low = -1, high = n; high > low+1; ){
249 i = (high+low)/2;
250 q = &tab[i];
251 if((r=strcmp(key, q->key))<0)
252 high = i;
253 else if(r == 0)
254 return q->val;
255 else
256 low=i;
257 }
258 return -1;
259 }
260
261 long
looknassoc(Nassoc * tab,int n,long key)262 looknassoc(Nassoc *tab, int n, long key)
263 {
264 Nassoc *q;
265 long i, low, high;
266
267 for(low = -1, high = n; high > low+1; ){
268 i = (high+low)/2;
269 q = &tab[i];
270 if(key < q->key)
271 high = i;
272 else if(key == q->key)
273 return q->val;
274 else
275 low=i;
276 }
277 return -1;
278 }
279
280 void
err(char * fmt,...)281 err(char *fmt, ...)
282 {
283 char buf[1000];
284 va_list v;
285
286 va_start(v, fmt);
287 vsnprint(buf, sizeof(buf), fmt, v);
288 va_end(v);
289 fprint(2, "%s: %s\n", argv0, buf);
290 }
291
292 /*
293 * Write the rune r to bout, keeping track of line length
294 * and breaking the lines (at blanks) when they get too long
295 */
296 void
outrune(long r)297 outrune(long r)
298 {
299 if(outinhibit)
300 return;
301 if(++linelen > breaklen && r == L' ') {
302 Bputc(bout, '\n');
303 linelen = 0;
304 } else
305 Bputrune(bout, r);
306 }
307
308 void
outrunes(Rune * rp)309 outrunes(Rune *rp)
310 {
311 Rune r;
312
313 while((r = *rp++) != 0)
314 outrune(r);
315 }
316
317 /* like outrune, but when arg is know to be a char */
318 void
outchar(int c)319 outchar(int c)
320 {
321 if(outinhibit)
322 return;
323 if(++linelen > breaklen && c == ' ') {
324 c ='\n';
325 linelen = 0;
326 }
327 Bputc(bout, c);
328 }
329
330 void
outchars(char * s)331 outchars(char *s)
332 {
333 char c;
334
335 while((c = *s++) != 0)
336 outchar(c);
337 }
338
339 void
outprint(char * fmt,...)340 outprint(char *fmt, ...)
341 {
342 char buf[1000];
343 va_list v;
344
345 va_start(v, fmt);
346 vsnprint(buf, sizeof(buf), fmt, v);
347 va_end(v);
348 outchars(buf);
349 }
350
351 void
outpiece(char * b,char * e)352 outpiece(char *b, char *e)
353 {
354 int c, lastc;
355
356 lastc = 0;
357 while(b < e) {
358 c = *b++;
359 if(c == '\n')
360 c = ' ';
361 if(!(c == ' ' && lastc == ' '))
362 outchar(c);
363 lastc = c;
364 }
365 }
366
367 /*
368 * Go to new line if not already there; indent if ind != 0.
369 * If ind > 1, leave a blank line too.
370 * Slight hack: assume if current line is only one or two
371 * characters long, then they were spaces.
372 */
373 void
outnl(int ind)374 outnl(int ind)
375 {
376 if(outinhibit)
377 return;
378 if(ind) {
379 if(ind > 1) {
380 if(linelen > 2)
381 Bputc(bout, '\n');
382 Bprint(bout, "\n ");
383 } else if(linelen == 0)
384 Bprint(bout, " ");
385 else if(linelen == 1)
386 Bputc(bout, ' ');
387 else if(linelen != 2)
388 Bprint(bout, "\n ");
389 linelen = 2;
390 } else {
391 if(linelen) {
392 Bputc(bout, '\n');
393 linelen = 0;
394 }
395 }
396 }
397
398 /*
399 * Fold the runes in null-terminated rp.
400 * Use the sort(1) definition of folding (uppercase to lowercase,
401 * accented characters to corresponding unaccented chars)
402 */
403 void
fold(Rune * rp)404 fold(Rune *rp)
405 {
406 Rune r;
407
408 while((r = *rp) != 0) {
409 r = tobaserune(r);
410 if(isupperrune(r))
411 r = tolowerrune(r);
412 *rp++ = r;
413 }
414 }
415
416 /*
417 * Like fold, but put folded result into new
418 * (assumed to have enough space).
419 * old is a regular expression, but we know that
420 * metacharacters aren't affected
421 */
422 void
foldre(char * new,char * old)423 foldre(char *new, char *old)
424 {
425 Rune r;
426
427 while(*old) {
428 old += chartorune(&r, old);
429 r = tobaserune(r);
430 if(isupperrune(r))
431 r = tolowerrune(r);
432 new += runetochar(new, &r);
433 }
434 *new = 0;
435 }
436
437 /*
438 * acomp(s, t) returns:
439 * -2 if s strictly precedes t
440 * -1 if s is a prefix of t
441 * 0 if s is the same as t
442 * 1 if t is a prefix of s
443 * 2 if t strictly precedes s
444 */
445
446 int
acomp(Rune * s,Rune * t)447 acomp(Rune *s, Rune *t)
448 {
449 int cs, ct;
450
451 for(;;) {
452 cs = *s;
453 ct = *t;
454 if(cs != ct)
455 break;
456 if(cs == 0)
457 return 0;
458 s++;
459 t++;
460 }
461 if(cs == 0)
462 return -1;
463 if(ct == 0)
464 return 1;
465 if(cs < ct)
466 return -2;
467 return 2;
468 }
469
470 /*
471 * Copy null terminated Runes from 'from' to 'to'.
472 */
473 void
runescpy(Rune * to,Rune * from)474 runescpy(Rune *to, Rune *from)
475 {
476 while((*to++ = *from++) != 0)
477 continue;
478 }
479
480 /*
481 * Conversion of unsigned number to long, no overflow detection
482 */
483 long
runetol(Rune * r)484 runetol(Rune *r)
485 {
486 int c;
487 long n;
488
489 n = 0;
490 for(;; r++){
491 c = *r;
492 if(L'0'<=c && c<=L'9')
493 c -= '0';
494 else
495 break;
496 n = n*10 + c;
497 }
498 return n;
499 }
500
501 /*
502 * See if there is a rune corresponding to the accented
503 * version of r with accent acc (acc in [LIGS..LIGE-1]),
504 * and return it if so, else return NONE.
505 */
506 Rune
liglookup(Rune acc,Rune r)507 liglookup(Rune acc, Rune r)
508 {
509 Rune *p;
510
511 if(acc < LIGS || acc >= LIGE)
512 return NONE;
513 for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
514 if(*p == r)
515 return *(p+1);
516 return NONE;
517 }
518
519 /*
520 * Maintain a translation table stack (a translation table
521 * is an array of Runes indexed by bytes or 7-bit bytes).
522 * If starting is true, push the curtab onto the stack
523 * and return newtab; else pop the top of the stack and
524 * return it.
525 * If curtab is 0, initialize the stack and return.
526 */
527 Rune *
changett(Rune * curtab,Rune * newtab,int starting)528 changett(Rune *curtab, Rune *newtab, int starting)
529 {
530 if(curtab == 0) {
531 ntt = 0;
532 return 0;
533 }
534 if(starting) {
535 if(ntt >= asize(ttabstack)) {
536 if(debug)
537 err("translation stack overflow");
538 return curtab;
539 }
540 ttabstack[ntt++] = curtab;
541 return newtab;
542 } else {
543 if(ntt == 0) {
544 if(debug)
545 err("translation stack underflow");
546 return curtab;
547 }
548 return ttabstack[--ntt];
549 }
550 }
551