1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
5
6 /*
7 * Routines for handling dictionaries in the "Paperback Collins"
8 * format (with tags surrounded by >....<)
9 */
10 enum {
11 Buflen=1000,
12 };
13
14 /* More special runes */
15 enum {
16 B = MULTIE+1, /* bold */
17 H, /* headword start */
18 I, /* italics */
19 Ps, /* pronunciation start */
20 Pe, /* pronunciation end */
21 R, /* roman */
22 X, /* headword end */
23 };
24
25 /* Assoc tables must be sorted on first field */
26
27 static Assoc tagtab[] = {
28 {"AA", L'Å'},
29 {"AC", LACU},
30 {"B", B},
31 {"CE", LCED},
32 {"CI", LFRN},
33 {"Di", L'ı'},
34 {"EL", L'-'},
35 {"GR", LGRV},
36 {"H", H},
37 {"I", I},
38 {"OE", L'Œ'},
39 {"R", R},
40 {"TI", LTIL},
41 {"UM", LUML},
42 {"X", X},
43 {"[", Ps},
44 {"]", Pe},
45 {"ac", LACU},
46 {"ce", LCED},
47 {"ci", LFRN},
48 {"gr", LGRV},
49 {"oe", L'œ'},
50 {"supe", L'e'}, /* should be raised */
51 {"supo", L'o'}, /* should be raised */
52 {"ti", LTIL},
53 {"um", LUML},
54 {"{", Ps},
55 {"~", L'~'},
56 {"~~", MTT},
57 };
58
59 static Rune normtab[128] = {
60 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
61 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
62 NONE, NONE, L' ', NONE, NONE, NONE, NONE, NONE,
63 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
64 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
65 /*20*/ L' ', L'!', L'"', L'#', L'$', L'%', L'&', L'\'',
66 L'(', L')', L'*', L'+', L',', L'-', L'.', L'/',
67 /*30*/ L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7',
68 L'8', L'9', L':', L';', TAGE, L'=', TAGS, L'?',
69 /*40*/ L'@', L'A', L'B', L'C', L'D', L'E', L'F', L'G',
70 L'H', L'I', L'J', L'K', L'L', L'M', L'N', L'O',
71 /*50*/ L'P', L'Q', L'R', L'S', L'T', L'U', L'V', L'W',
72 L'X', L'Y', L'Z', L'[', L'\\', L']', L'^', L'_',
73 /*60*/ L'`', L'a', L'b', L'c', L'd', L'e', L'f', L'g',
74 L'h', L'i', L'j', L'k', L'l', L'm', L'n', L'o',
75 /*70*/ L'p', L'q', L'r', L's', L't', L'u', L'v', L'w',
76 L'x', L'y', L'z', L'{', L'|', L'}', L'~', NONE,
77 };
78
79 static char *gettag(char *, char *);
80
81 static Entry curentry;
82 static char tag[Buflen];
83 #define cursize (curentry.end-curentry.start)
84
85 void
pcollprintentry(Entry e,int cmd)86 pcollprintentry(Entry e, int cmd)
87 {
88 char *p, *pe;
89 long r, rprev, t, rlig;
90 int saveoi;
91 Rune *transtab;
92
93 p = e.start;
94 pe = e.end;
95 transtab = normtab;
96 rprev = NONE;
97 changett(0, 0, 0);
98 curentry = e;
99 saveoi = 0;
100 if(cmd == 'h')
101 outinhibit = 1;
102 while(p < pe) {
103 if(cmd == 'r') {
104 outchar(*p++);
105 continue;
106 }
107 r = transtab[(*p++)&0x7F];
108 if(r < NONE) {
109 /* Emit the rune, but buffer in case of ligature */
110 if(rprev != NONE)
111 outrune(rprev);
112 rprev = r;
113 } else if(r == TAGS) {
114 p = gettag(p, pe);
115 t = lookassoc(tagtab, asize(tagtab), tag);
116 if(t == -1) {
117 if(debug && !outinhibit)
118 err("tag %ld %d %s",
119 e.doff, cursize, tag);
120 continue;
121 }
122 if(t < NONE) {
123 if(rprev != NONE)
124 outrune(rprev);
125 rprev = t;
126 } else if(t >= LIGS && t < LIGE) {
127 /* handle possible ligature */
128 rlig = liglookup(t, rprev);
129 if(rlig != NONE)
130 rprev = rlig; /* overwrite rprev */
131 else {
132 /* could print accent, but let's not */
133 if(rprev != NONE) outrune(rprev);
134 rprev = NONE;
135 }
136 } else if(t >= MULTI && t < MULTIE) {
137 if(rprev != NONE) {
138 outrune(rprev);
139 rprev = NONE;
140 }
141 outrunes(multitab[t-MULTI]);
142 } else {
143 if(rprev != NONE) {
144 outrune(rprev);
145 rprev = NONE;
146 }
147 switch(t){
148 case H:
149 if(cmd == 'h')
150 outinhibit = 0;
151 else
152 outnl(0);
153 break;
154 case X:
155 if(cmd == 'h')
156 outinhibit = 1;
157 else
158 outchars(". ");
159 break;
160 case Ps:
161 /* don't know enough of pron. key yet */
162 saveoi = outinhibit;
163 outinhibit = 1;
164 break;
165 case Pe:
166 outinhibit = saveoi;
167 break;
168 }
169 }
170 }
171 }
172 if(cmd == 'h')
173 outinhibit = 0;
174 outnl(0);
175 }
176
177 long
pcollnextoff(long fromoff)178 pcollnextoff(long fromoff)
179 {
180 long a;
181 char *p;
182
183 a = Bseek(bdict, fromoff, 0);
184 if(a < 0)
185 return -1;
186 for(;;) {
187 p = Brdline(bdict, '\n');
188 if(!p)
189 break;
190 if(p[0] == '>' && p[1] == 'H' && p[2] == '<')
191 return (Boffset(bdict)-Blinelen(bdict));
192 }
193 return -1;
194 }
195
196 void
pcollprintkey(void)197 pcollprintkey(void)
198 {
199 Bprint(bout, "No pronunciation key yet\n");
200 }
201
202 /*
203 * f points just after '>'; fe points at end of entry.
204 * Expect next characters from bin to match:
205 * [^ <]+<
206 * tag
207 * Accumulate the tag in tag[].
208 * Return pointer to after final '<'.
209 */
210 static char *
gettag(char * f,char * fe)211 gettag(char *f, char *fe)
212 {
213 char *t;
214 int c, i;
215
216 t = tag;
217 i = Buflen;
218 while(--i > 0) {
219 c = *f++;
220 if(c == '<' || f == fe)
221 break;
222 *t++ = c;
223 }
224 *t = 0;
225 return f;
226 }
227