xref: /csrg-svn/contrib/bib/src/makekey.c (revision 12915)
1*12915Sgarrison #
2*12915Sgarrison 
3*12915Sgarrison # include "stdio.h"
4*12915Sgarrison # include "ctype.h"
5*12915Sgarrison # include "bib.h"
6*12915Sgarrison 
7*12915Sgarrison char    commlist[MAXCOMM]=   /*  list of strings of common words         */
8*12915Sgarrison      "";
9*12915Sgarrison int firsttime = 1;
10*12915Sgarrison 
11*12915Sgarrison /*  makekey(p,max_klen,common):  compresses *p into a key
12*12915Sgarrison         folds upper to lower case.  ignores non-alphanumeric
13*12915Sgarrison         drops keys of length <= 1.
14*12915Sgarrison         drops words in common (name of file of words, one per line)
15*12915Sgarrison             (first call determines common for all later calls)
16*12915Sgarrison */
17*12915Sgarrison makekey(p,max_klen,common)
18*12915Sgarrison char *p;
19*12915Sgarrison int  max_klen;          /* max key length */
20*12915Sgarrison char *common;
21*12915Sgarrison {   register char *from, *to, *stop;
22*12915Sgarrison 
23*12915Sgarrison     if (firsttime) {firsttime= 0; load_comm(common); }
24*12915Sgarrison 
25*12915Sgarrison     from= p; to= p; stop= max_klen+p;
26*12915Sgarrison     while (*from != NULL  &&  to < stop)
27*12915Sgarrison     {   if      (islower(*from))      *to++ = *from++;
28*12915Sgarrison         else if (isdigit(*from))      *to++ = *from++;
29*12915Sgarrison         else if (isupper(*from))    { *to++ = tolower(*from);  from++; }
30*12915Sgarrison         else                          from++;
31*12915Sgarrison     }
32*12915Sgarrison     *to= NULL;
33*12915Sgarrison 
34*12915Sgarrison     if (to<=p+1 ||
35*12915Sgarrison         lookup(commlist, p) )  *p= NULL;
36*12915Sgarrison }
37*12915Sgarrison 
38*12915Sgarrison /*  list is a string of null terminated strings, final string is null.
39*12915Sgarrison     p is a null terminated string.
40*12915Sgarrison     return 1 if p is a string in list, 0 ow.
41*12915Sgarrison */
42*12915Sgarrison int lookup(list,p)
43*12915Sgarrison char *list, *p;
44*12915Sgarrison {   int len;
45*12915Sgarrison     len= strlen(list);
46*12915Sgarrison     while (len!=0 && strcmp(list,p)!=0)
47*12915Sgarrison     {   list += (len+1);
48*12915Sgarrison         len= strlen(list);
49*12915Sgarrison     }
50*12915Sgarrison     return(len!=0);
51*12915Sgarrison }
52*12915Sgarrison 
53*12915Sgarrison /*  read file common into commlist
54*12915Sgarrison */
55*12915Sgarrison load_comm(common)
56*12915Sgarrison char *common;
57*12915Sgarrison {   FILE    *commfile;          /*  stream of common words                  */
58*12915Sgarrison     char *p, *stop;
59*12915Sgarrison     commfile= fopen(common,"r");
60*12915Sgarrison     if (commfile==NULL) fprintf(stderr, "cannot open '%s'\n", common);
61*12915Sgarrison     else
62*12915Sgarrison     {   /* read commfile into commlist  */
63*12915Sgarrison             p= commlist;    stop= commlist+MAXCOMM-1;
64*12915Sgarrison             while (p<stop && ((*p= getc(commfile))!=EOF))
65*12915Sgarrison             {   if (*p=='\n')   *p= NULL;
66*12915Sgarrison                 p++;
67*12915Sgarrison             }
68*12915Sgarrison             if  (*p==EOF)  *p= NULL;
69*12915Sgarrison             else
70*12915Sgarrison             {   fprintf(stderr, "invert: too many common words\n");
71*12915Sgarrison                 commlist[0]= NULL;
72*12915Sgarrison             }
73*12915Sgarrison         fclose(commfile);
74*12915Sgarrison     }
75*12915Sgarrison }
76*12915Sgarrison 
77