1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 /* Macros for Rune support of ctype.h-like functions */ 5 6 #define isupper(r) (L'A' <= (r) && (r) <= L'Z') 7 #define islower(r) (L'a' <= (r) && (r) <= L'z') 8 #define isalpha(r) (isupper(r) || islower(r)) 9 #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF) 10 11 #define isdigit(r) (L'0' <= (r) && (r) <= L'9') 12 13 #define isalnum(r) (isalpha(r) || isdigit(r)) 14 15 #define isspace(r) ((r) == L' ' || (r) == L'\t' \ 16 || (0x0A <= (r) && (r) <= 0x0D)) 17 18 #define tolower(r) ((r)-'A'+'a') 19 20 #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0)) 21 22 #define WORDSIZ 4000 23 char *filename = "/lib/words"; 24 Biobuf *dfile; 25 Biobuf bout; 26 Biobuf bin; 27 28 int fold; 29 int direc; 30 int exact; 31 int iflag; 32 int rev = 1; /*-1 for reverse-ordered file, not implemented*/ 33 int (*compare)(Rune*, Rune*); 34 Rune tab = '\t'; 35 Rune entry[WORDSIZ]; 36 Rune word[WORDSIZ]; 37 Rune key[50], orig[50]; 38 Rune latin_fold_tab[] = 39 { 40 /* Table to fold latin 1 characters to ASCII equivalents 41 based at Rune value 0xc0 42 43 À Á Â Ã Ä Å Æ Ç 44 È É Ê Ë Ì Í Î Ï 45 Ð Ñ Ò Ó Ô Õ Ö × 46 Ø Ù Ú Û Ü Ý Þ ß 47 à á â ã ä å æ ç 48 è é ê ë ì í î ï 49 ð ñ ò ó ô õ ö ÷ 50 ø ù ú û ü ý þ ÿ 51 */ 52 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 53 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 54 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 55 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 56 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 57 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 58 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 59 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', 60 }; 61 62 int locate(void); 63 int acomp(Rune*, Rune*); 64 int getword(Biobuf*, Rune *rp, int n); 65 void torune(char*, Rune*); 66 void rcanon(Rune*, Rune*); 67 int ncomp(Rune*, Rune*); 68 69 void 70 main(int argc, char *argv[]) 71 { 72 Binit(&bin, 0, OREAD); 73 Binit(&bout, 1, OWRITE); 74 compare = acomp; 75 ARGBEGIN{ 76 case 'd': 77 direc++; 78 break; 79 case 'f': 80 fold++; 81 break; 82 case 'i': 83 iflag++; 84 break; 85 case 'n': 86 compare = ncomp; 87 break; 88 case 't': 89 chartorune(&tab,ARGF()); 90 break; 91 case 'x': 92 exact++; 93 break; 94 default: 95 fprint(2, "%s: bad option %c\n", argv0, ARGC()); 96 usage: 97 fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0); 98 exits("usage"); 99 }ARGEND 100 if(!iflag){ 101 if(argc >= 1) { 102 torune(argv[0], orig); 103 argv++; 104 argc--; 105 } else 106 iflag++; 107 } 108 if(argc < 1) { 109 direc++; 110 fold++; 111 } else 112 filename = argv[0]; 113 if (!iflag) 114 rcanon(orig, key); 115 dfile = Bopen(filename, OREAD); 116 if(dfile == 0) { 117 fprint(2, "look: can't open %s\n", filename); 118 exits("no dictionary"); 119 } 120 if(!iflag) 121 if(!locate()) 122 exits("not found"); 123 do { 124 if(iflag) { 125 Bflush(&bout); 126 if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0]))) 127 exits(0); 128 rcanon(orig, key); 129 if(!locate()) 130 continue; 131 } 132 if (!exact || !acomp(word, orig)) 133 Bprint(&bout, "%S\n", entry); 134 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { 135 rcanon(entry, word); 136 switch((*compare)(key, word)) { 137 case -1: 138 if(exact) 139 break; 140 case 0: 141 if (!exact || !acomp(word, orig)) 142 Bprint(&bout, "%S\n", entry); 143 continue; 144 } 145 break; 146 } 147 } while(iflag); 148 exits(0); 149 } 150 151 int 152 locate(void) 153 { 154 long top, bot, mid; 155 long c; 156 157 bot = 0; 158 top = Bseek(dfile, 0L, 2); 159 for(;;) { 160 mid = (top+bot) / 2; 161 Bseek(dfile, mid, 0); 162 do { 163 c = Bgetrune(dfile); 164 mid++; 165 } while(c>=0 && c!='\n'); 166 if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) 167 break; 168 rcanon(entry, word); 169 switch((*compare)(key, word)) { 170 case -2: 171 case -1: 172 case 0: 173 if(top <= mid) 174 break; 175 top = mid; 176 continue; 177 case 1: 178 case 2: 179 bot = mid; 180 continue; 181 } 182 break; 183 } 184 Bseek(dfile, bot, 0); 185 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { 186 rcanon(entry, word); 187 switch((*compare)(key, word)) { 188 case -2: 189 return 0; 190 case -1: 191 if(exact) 192 return 0; 193 case 0: 194 return 1; 195 case 1: 196 case 2: 197 continue; 198 } 199 } 200 return 0; 201 } 202 203 /* 204 * acomp(s, t) returns: 205 * -2 if s strictly precedes t 206 * -1 if s is a prefix of t 207 * 0 if s is the same as t 208 * 1 if t is a prefix of s 209 * 2 if t strictly precedes s 210 */ 211 212 int 213 acomp(Rune *s, Rune *t) 214 { 215 int cs, ct; 216 217 for(;;) { 218 cs = *s; 219 ct = *t; 220 if(cs != ct) 221 break; 222 if(cs == 0) 223 return 0; 224 s++; 225 t++; 226 } 227 if(cs == 0) 228 return -1; 229 if(ct == 0) 230 return 1; 231 if(cs < ct) 232 return -2; 233 return 2; 234 } 235 236 void 237 torune(char *old, Rune *new) 238 { 239 do old += chartorune(new, old); 240 while(*new++); 241 } 242 243 void 244 rcanon(Rune *old, Rune *new) 245 { 246 Rune r; 247 248 while((r = *old++) && r != tab) { 249 if (islatin1(r) && latin_fold_tab[r-0xc0]) 250 r = latin_fold_tab[r-0xc0]; 251 if(direc) 252 if(!(isalnum(r) || r == L' ' || r == L'\t')) 253 continue; 254 if(fold) 255 if(isupper(r)) 256 r = tolower(r); 257 *new++ = r; 258 } 259 *new = 0; 260 } 261 262 int 263 ncomp(Rune *s, Rune *t) 264 { 265 Rune *is, *it, *js, *jt; 266 int a, b; 267 int ssgn, tsgn; 268 269 while(isspace(*s)) 270 s++; 271 while(isspace(*t)) 272 t++; 273 ssgn = tsgn = -2*rev; 274 if(*s == '-') { 275 s++; 276 ssgn = -ssgn; 277 } 278 if(*t == '-') { 279 t++; 280 tsgn = -tsgn; 281 } 282 for(is = s; isdigit(*is); is++) 283 ; 284 for(it = t; isdigit(*it); it++) 285 ; 286 js = is; 287 jt = it; 288 a = 0; 289 if(ssgn == tsgn) 290 while(it>t && is>s) 291 if(b = *--it - *--is) 292 a = b; 293 while(is > s) 294 if(*--is != '0') 295 return -ssgn; 296 while(it > t) 297 if(*--it != '0') 298 return tsgn; 299 if(a) 300 return sgn(a)*ssgn; 301 if(*(s=js) == '.') 302 s++; 303 if(*(t=jt) == '.') 304 t++; 305 if(ssgn == tsgn) 306 while(isdigit(*s) && isdigit(*t)) 307 if(a = *t++ - *s++) 308 return sgn(a)*ssgn; 309 while(isdigit(*s)) 310 if(*s++ != '0') 311 return -ssgn; 312 while(isdigit(*t)) 313 if(*t++ != '0') 314 return tsgn; 315 return 0; 316 } 317 318 int 319 getword(Biobuf *f, Rune *rp, int n) 320 { 321 long c; 322 323 while (n-- > 0) { 324 if ((c = Bgetrune(f)) < 0) 325 return 0; 326 if(c == '\n') { 327 *rp = L'\0'; 328 return 1; 329 } 330 *rp++ = c; 331 } 332 fprint(2, "Look: word too long. Bailing out.\n"); 333 return 0; 334 } 335