1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 /* Macros for Rune support of ctype.h-like functions */
5
6 #define isupper(r) (L'A' <= (r) && (r) <= L'Z')
7 #define islower(r) (L'a' <= (r) && (r) <= L'z')
8 #define isalpha(r) (isupper(r) || islower(r))
9 #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
10
11 #define isdigit(r) (L'0' <= (r) && (r) <= L'9')
12
13 #define isalnum(r) (isalpha(r) || isdigit(r))
14
15 #define isspace(r) ((r) == L' ' || (r) == L'\t' \
16 || (0x0A <= (r) && (r) <= 0x0D))
17
18 #define tolower(r) ((r)-'A'+'a')
19
20 #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
21
22 #define WORDSIZ 4000
23 char *filename = "/lib/words";
24 Biobuf *dfile;
25 Biobuf bout;
26 Biobuf bin;
27
28 int fold;
29 int direc;
30 int exact;
31 int iflag;
32 int rev = 1; /*-1 for reverse-ordered file, not implemented*/
33 int (*compare)(Rune*, Rune*);
34 Rune tab = '\t';
35 Rune entry[WORDSIZ];
36 Rune word[WORDSIZ];
37 Rune key[50], orig[50];
38 Rune latin_fold_tab[] =
39 {
40 /* Table to fold latin 1 characters to ASCII equivalents
41 based at Rune value 0xc0
42
43 À Á Â Ã Ä Å Æ Ç
44 È É Ê Ë Ì Í Î Ï
45 Ð Ñ Ò Ó Ô Õ Ö ×
46 Ø Ù Ú Û Ü Ý Þ ß
47 à á â ã ä å æ ç
48 è é ê ë ì í î ï
49 ð ñ ò ó ô õ ö ÷
50 ø ù ú û ü ý þ ÿ
51 */
52 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
53 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
54 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
55 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
56 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
57 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
58 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
59 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
60 };
61
62 int locate(void);
63 int acomp(Rune*, Rune*);
64 int getword(Biobuf*, Rune *rp, int n);
65 void torune(char*, Rune*);
66 void rcanon(Rune*, Rune*);
67 int ncomp(Rune*, Rune*);
68
69 void
usage(void)70 usage(void)
71 {
72 fprint(2, "usage: %s [-dfinx] [-t c] [string] [file]\n", argv0);
73 exits("usage");
74 }
75
76 void
main(int argc,char * argv[])77 main(int argc, char *argv[])
78 {
79 int n;
80
81 Binit(&bin, 0, OREAD);
82 Binit(&bout, 1, OWRITE);
83 compare = acomp;
84 ARGBEGIN{
85 case 'd':
86 direc++;
87 break;
88 case 'f':
89 fold++;
90 break;
91 case 'i':
92 iflag++;
93 break;
94 case 'n':
95 compare = ncomp;
96 break;
97 case 't':
98 chartorune(&tab, EARGF(usage()));
99 break;
100 case 'x':
101 exact++;
102 break;
103 default:
104 fprint(2, "%s: bad option %c\n", argv0, ARGC());
105 usage();
106 } ARGEND
107 if(!iflag){
108 if(argc >= 1) {
109 torune(argv[0], orig);
110 argv++;
111 argc--;
112 } else
113 iflag++;
114 }
115 if(argc < 1) {
116 direc++;
117 fold++;
118 } else
119 filename = argv[0];
120 if (!iflag)
121 rcanon(orig, key);
122 dfile = Bopen(filename, OREAD);
123 if(dfile == 0) {
124 fprint(2, "look: can't open %s\n", filename);
125 exits("no dictionary");
126 }
127 if(!iflag)
128 if(!locate())
129 exits("not found");
130 do {
131 if(iflag) {
132 Bflush(&bout);
133 if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
134 exits(0);
135 rcanon(orig, key);
136 if(!locate())
137 continue;
138 }
139 if (!exact || !acomp(word, key))
140 Bprint(&bout, "%S\n", entry);
141 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
142 rcanon(entry, word);
143 n = compare(key, word);
144 switch(n) {
145 case -1:
146 if(exact)
147 break;
148 case 0:
149 if (!exact || !acomp(word, orig))
150 Bprint(&bout, "%S\n", entry);
151 continue;
152 }
153 break;
154 }
155 } while(iflag);
156 exits(0);
157 }
158
159 int
locate(void)160 locate(void)
161 {
162 vlong top, bot, mid;
163 long c;
164 int n;
165
166 bot = 0;
167 top = Bseek(dfile, 0, 2);
168 for(;;) {
169 mid = (top+bot) / 2;
170 Bseek(dfile, mid, 0);
171 do
172 c = Bgetrune(dfile);
173 while(c>=0 && c!='\n');
174 mid = Boffset(dfile);
175 if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
176 break;
177 rcanon(entry, word);
178 n = compare(key, word);
179 switch(n) {
180 case -2:
181 case -1:
182 case 0:
183 if(top <= mid)
184 break;
185 top = mid;
186 continue;
187 case 1:
188 case 2:
189 bot = mid;
190 continue;
191 }
192 break;
193 }
194 Bseek(dfile, bot, 0);
195 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
196 rcanon(entry, word);
197 n = compare(key, word);
198 switch(n) {
199 case -2:
200 return 0;
201 case -1:
202 if(exact)
203 return 0;
204 case 0:
205 return 1;
206 case 1:
207 case 2:
208 continue;
209 }
210 }
211 return 0;
212 }
213
214 /*
215 * acomp(s, t) returns:
216 * -2 if s strictly precedes t
217 * -1 if s is a prefix of t
218 * 0 if s is the same as t
219 * 1 if t is a prefix of s
220 * 2 if t strictly precedes s
221 */
222
223 int
acomp(Rune * s,Rune * t)224 acomp(Rune *s, Rune *t)
225 {
226 int cs, ct;
227
228 for(;;) {
229 cs = *s;
230 ct = *t;
231 if(cs != ct)
232 break;
233 if(cs == 0)
234 return 0;
235 s++;
236 t++;
237 }
238 if(cs == 0)
239 return -1;
240 if(ct == 0)
241 return 1;
242 if(cs < ct)
243 return -2;
244 return 2;
245 }
246
247 void
torune(char * old,Rune * new)248 torune(char *old, Rune *new)
249 {
250 do old += chartorune(new, old);
251 while(*new++);
252 }
253
254 void
rcanon(Rune * old,Rune * new)255 rcanon(Rune *old, Rune *new)
256 {
257 Rune r;
258
259 while((r = *old++) && r != tab) {
260 if (islatin1(r) && latin_fold_tab[r-0xc0])
261 r = latin_fold_tab[r-0xc0];
262 if(direc)
263 if(!(isalnum(r) || r == L' ' || r == L'\t'))
264 continue;
265 if(fold)
266 if(isupper(r))
267 r = tolower(r);
268 *new++ = r;
269 }
270 *new = 0;
271 }
272
273 int
ncomp(Rune * s,Rune * t)274 ncomp(Rune *s, Rune *t)
275 {
276 Rune *is, *it, *js, *jt;
277 int a, b;
278 int ssgn, tsgn;
279
280 while(isspace(*s))
281 s++;
282 while(isspace(*t))
283 t++;
284 ssgn = tsgn = -2*rev;
285 if(*s == '-') {
286 s++;
287 ssgn = -ssgn;
288 }
289 if(*t == '-') {
290 t++;
291 tsgn = -tsgn;
292 }
293 for(is = s; isdigit(*is); is++)
294 ;
295 for(it = t; isdigit(*it); it++)
296 ;
297 js = is;
298 jt = it;
299 a = 0;
300 if(ssgn == tsgn)
301 while(it>t && is>s)
302 if(b = *--it - *--is)
303 a = b;
304 while(is > s)
305 if(*--is != '0')
306 return -ssgn;
307 while(it > t)
308 if(*--it != '0')
309 return tsgn;
310 if(a)
311 return sgn(a)*ssgn;
312 if(*(s=js) == '.')
313 s++;
314 if(*(t=jt) == '.')
315 t++;
316 if(ssgn == tsgn)
317 while(isdigit(*s) && isdigit(*t))
318 if(a = *t++ - *s++)
319 return sgn(a)*ssgn;
320 while(isdigit(*s))
321 if(*s++ != '0')
322 return -ssgn;
323 while(isdigit(*t))
324 if(*t++ != '0')
325 return tsgn;
326 return 0;
327 }
328
329 int
getword(Biobuf * f,Rune * rp,int n)330 getword(Biobuf *f, Rune *rp, int n)
331 {
332 long c;
333
334 while(n-- > 0) {
335 c = Bgetrune(f);
336 if(c < 0)
337 return 0;
338 if(c == '\n') {
339 *rp = L'\0';
340 return 1;
341 }
342 *rp++ = c;
343 }
344 fprint(2, "Look: word too long. Bailing out.\n");
345 return 0;
346 }
347