1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 /* Macros for Rune support of ctype.h-like functions */ 5 6 #define isupper(r) (L'A' <= (r) && (r) <= L'Z') 7 #define islower(r) (L'a' <= (r) && (r) <= L'z') 8 #define isalpha(r) (isupper(r) || islower(r)) 9 #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF) 10 11 #define isdigit(r) (L'0' <= (r) && (r) <= L'9') 12 13 #define isalnum(r) (isalpha(r) || isdigit(r)) 14 15 #define isspace(r) ((r) == L' ' || (r) == L'\t' \ 16 || (0x0A <= (r) && (r) <= 0x0D)) 17 18 #define tolower(r) ((r)-'A'+'a') 19 20 #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0)) 21 22 #define WORDSIZ 4000 23 char *filename = "/lib/words"; 24 Biobuf *dfile; 25 Biobuf bout; 26 Biobuf bin; 27 28 int fold; 29 int direc; 30 int exact; 31 int iflag; 32 int rev = 1; /*-1 for reverse-ordered file, not implemented*/ 33 int (*compare)(Rune*, Rune*); 34 Rune tab = '\t'; 35 Rune entry[WORDSIZ]; 36 Rune word[WORDSIZ]; 37 Rune key[50], orig[50]; 38 Rune latin_fold_tab[] = 39 { 40 /* Table to fold latin 1 characters to ASCII equivalents 41 based at Rune value 0xc0 42 43 À Á Â Ã Ä Å Æ Ç 44 È É Ê Ë Ì Í Î Ï 45 Ð Ñ Ò Ó Ô Õ Ö × 46 Ø Ù Ú Û Ü Ý Þ ß 47 à á â ã ä å æ ç 48 è é ê ë ì í î ï 49 ð ñ ò ó ô õ ö ÷ 50 ø ù ú û ü ý þ ÿ 51 */ 52 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 53 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 54 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 55 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 56 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 57 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 58 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 59 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', 60 }; 61 62 int locate(void); 63 int acomp(Rune*, Rune*); 64 int getword(Biobuf*, Rune *rp, int n); 65 void torune(char*, Rune*); 66 void rcanon(Rune*, Rune*); 67 int ncomp(Rune*, Rune*); 68 69 void 70 usage(void) 71 { 72 fprint(2, "usage: %s [-dfinx] [-t c] [string] [file]\n", argv0); 73 exits("usage"); 74 } 75 76 void 77 main(int argc, char *argv[]) 78 { 79 int n; 80 81 Binit(&bin, 0, OREAD); 82 Binit(&bout, 1, OWRITE); 83 compare = acomp; 84 ARGBEGIN{ 85 case 'd': 86 direc++; 87 break; 88 case 'f': 89 fold++; 90 break; 91 case 'i': 92 iflag++; 93 break; 94 case 'n': 95 compare = ncomp; 96 break; 97 case 't': 98 chartorune(&tab, EARGF(usage())); 99 break; 100 case 'x': 101 exact++; 102 break; 103 default: 104 fprint(2, "%s: bad option %c\n", argv0, ARGC()); 105 usage(); 106 } ARGEND 107 if(!iflag){ 108 if(argc >= 1) { 109 torune(argv[0], orig); 110 argv++; 111 argc--; 112 } else 113 iflag++; 114 } 115 if(argc < 1) { 116 direc++; 117 fold++; 118 } else 119 filename = argv[0]; 120 if (!iflag) 121 rcanon(orig, key); 122 dfile = Bopen(filename, OREAD); 123 if(dfile == 0) { 124 fprint(2, "look: can't open %s\n", filename); 125 exits("no dictionary"); 126 } 127 if(!iflag) 128 if(!locate()) 129 exits("not found"); 130 do { 131 if(iflag) { 132 Bflush(&bout); 133 if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0]))) 134 exits(0); 135 rcanon(orig, key); 136 if(!locate()) 137 continue; 138 } 139 if (!exact || !acomp(word, key)) 140 Bprint(&bout, "%S\n", entry); 141 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { 142 rcanon(entry, word); 143 n = compare(key, word); 144 switch(n) { 145 case -1: 146 if(exact) 147 break; 148 case 0: 149 if (!exact || !acomp(word, orig)) 150 Bprint(&bout, "%S\n", entry); 151 continue; 152 } 153 break; 154 } 155 } while(iflag); 156 exits(0); 157 } 158 159 int 160 locate(void) 161 { 162 vlong top, bot, mid; 163 long c; 164 int n; 165 166 bot = 0; 167 top = Bseek(dfile, 0, 2); 168 for(;;) { 169 mid = (top+bot) / 2; 170 Bseek(dfile, mid, 0); 171 do 172 c = Bgetrune(dfile); 173 while(c>=0 && c!='\n'); 174 mid = Boffset(dfile); 175 if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) 176 break; 177 rcanon(entry, word); 178 n = compare(key, word); 179 switch(n) { 180 case -2: 181 case -1: 182 case 0: 183 if(top <= mid) 184 break; 185 top = mid; 186 continue; 187 case 1: 188 case 2: 189 bot = mid; 190 continue; 191 } 192 break; 193 } 194 Bseek(dfile, bot, 0); 195 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { 196 rcanon(entry, word); 197 n = compare(key, word); 198 switch(n) { 199 case -2: 200 return 0; 201 case -1: 202 if(exact) 203 return 0; 204 case 0: 205 return 1; 206 case 1: 207 case 2: 208 continue; 209 } 210 } 211 return 0; 212 } 213 214 /* 215 * acomp(s, t) returns: 216 * -2 if s strictly precedes t 217 * -1 if s is a prefix of t 218 * 0 if s is the same as t 219 * 1 if t is a prefix of s 220 * 2 if t strictly precedes s 221 */ 222 223 int 224 acomp(Rune *s, Rune *t) 225 { 226 int cs, ct; 227 228 for(;;) { 229 cs = *s; 230 ct = *t; 231 if(cs != ct) 232 break; 233 if(cs == 0) 234 return 0; 235 s++; 236 t++; 237 } 238 if(cs == 0) 239 return -1; 240 if(ct == 0) 241 return 1; 242 if(cs < ct) 243 return -2; 244 return 2; 245 } 246 247 void 248 torune(char *old, Rune *new) 249 { 250 do old += chartorune(new, old); 251 while(*new++); 252 } 253 254 void 255 rcanon(Rune *old, Rune *new) 256 { 257 Rune r; 258 259 while((r = *old++) && r != tab) { 260 if (islatin1(r) && latin_fold_tab[r-0xc0]) 261 r = latin_fold_tab[r-0xc0]; 262 if(direc) 263 if(!(isalnum(r) || r == L' ' || r == L'\t')) 264 continue; 265 if(fold) 266 if(isupper(r)) 267 r = tolower(r); 268 *new++ = r; 269 } 270 *new = 0; 271 } 272 273 int 274 ncomp(Rune *s, Rune *t) 275 { 276 Rune *is, *it, *js, *jt; 277 int a, b; 278 int ssgn, tsgn; 279 280 while(isspace(*s)) 281 s++; 282 while(isspace(*t)) 283 t++; 284 ssgn = tsgn = -2*rev; 285 if(*s == '-') { 286 s++; 287 ssgn = -ssgn; 288 } 289 if(*t == '-') { 290 t++; 291 tsgn = -tsgn; 292 } 293 for(is = s; isdigit(*is); is++) 294 ; 295 for(it = t; isdigit(*it); it++) 296 ; 297 js = is; 298 jt = it; 299 a = 0; 300 if(ssgn == tsgn) 301 while(it>t && is>s) 302 if(b = *--it - *--is) 303 a = b; 304 while(is > s) 305 if(*--is != '0') 306 return -ssgn; 307 while(it > t) 308 if(*--it != '0') 309 return tsgn; 310 if(a) 311 return sgn(a)*ssgn; 312 if(*(s=js) == '.') 313 s++; 314 if(*(t=jt) == '.') 315 t++; 316 if(ssgn == tsgn) 317 while(isdigit(*s) && isdigit(*t)) 318 if(a = *t++ - *s++) 319 return sgn(a)*ssgn; 320 while(isdigit(*s)) 321 if(*s++ != '0') 322 return -ssgn; 323 while(isdigit(*t)) 324 if(*t++ != '0') 325 return tsgn; 326 return 0; 327 } 328 329 int 330 getword(Biobuf *f, Rune *rp, int n) 331 { 332 long c; 333 334 while(n-- > 0) { 335 c = Bgetrune(f); 336 if(c < 0) 337 return 0; 338 if(c == '\n') { 339 *rp = L'\0'; 340 return 1; 341 } 342 *rp++ = c; 343 } 344 fprint(2, "Look: word too long. Bailing out.\n"); 345 return 0; 346 } 347