xref: /plan9/sys/src/cmd/dict/mkindex.c (revision 39734e7ed1eb944f5e7b41936007d0d38b560d7f)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
5 
6 /*
7  * Use this to start making an index for a new dictionary.
8  * Get the dictionary-specific nextoff and printentry(_,'h')
9  * commands working, add a record to the dicts[] array below,
10  * and run this program to get a list of offset,headword
11  * pairs
12  */
13 Biobuf	boutbuf;
14 Biobuf	*bdict;
15 Biobuf	*bout = &boutbuf;
16 int	linelen;
17 int	breaklen = 2000;
18 int	outinhibit;
19 int	debug;
20 
21 Dict	*dict;	/* current dictionary */
22 
23 Entry	getentry(long);
24 
25 void
main(int argc,char ** argv)26 main(int argc, char **argv)
27 {
28 	int i;
29 	long a, ae;
30 	char *p;
31 	Entry e;
32 
33 	Binit(&boutbuf, 1, OWRITE);
34 	dict = &dicts[0];
35 	ARGBEGIN {
36 		case 'd':
37 			p = ARGF();
38 			dict = 0;
39 			if(p) {
40 				for(i=0; dicts[i].name; i++)
41 					if(strcmp(p, dicts[i].name)==0) {
42 						dict = &dicts[i];
43 						break;
44 					}
45 			}
46 			if(!dict) {
47 				err("unknown dictionary: %s", p);
48 				exits("nodict");
49 			}
50 			break;
51 		case 'D':
52 			debug++;
53 			break;
54 	ARGEND }
55 	USED(argc,argv);
56 	bdict = Bopen(dict->path, OREAD);
57 	ae = Bseek(bdict, 0, 2);
58 	if(!bdict) {
59 		err("can't open dictionary %s", dict->path);
60 		exits("nodict");
61 	}
62 	for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) {
63 		linelen = 0;
64 		e = getentry(a);
65 		Bprint(bout, "%ld\t", a);
66 		linelen = 4;	/* only has to be approx right */
67 		(*dict->printentry)(e, 'h');
68 	}
69 	exits(0);
70 }
71 
72 Entry
getentry(long b)73 getentry(long b)
74 {
75 	long e, n, dtop;
76 	static Entry ans;
77 	static int anslen = 0;
78 
79 	e = (*dict->nextoff)(b+1);
80 	ans.doff = b;
81 	if(e < 0) {
82 		dtop = Bseek(bdict, 0L, 2);
83 		if(b < dtop) {
84 			e = dtop;
85 		} else {
86 			err("couldn't seek to entry");
87 			ans.start = 0;
88 			ans.end = 0;
89 		}
90 	}
91 	n = e-b;
92 	if(n) {
93 		if(n > anslen) {
94 			ans.start = realloc(ans.start, n);
95 			if(!ans.start) {
96 				err("out of memory");
97 				exits("nomem");
98 			}
99 			anslen = n;
100 		}
101 		Bseek(bdict, b, 0);
102 		n = Bread(bdict, ans.start, n);
103 		ans.end = ans.start + n;
104 	}
105 	return ans;
106 }
107