xref: /plan9/sys/src/cmd/venti/srv/buildindex.c (revision 1206f3fc1b0aab3e32fa15899d31a9b5bfa82d9f)
1368c31abSDavid du Colombier /*
2368c31abSDavid du Colombier  * Rebuild the index from scratch, in place.
3368c31abSDavid du Colombier  */
4368c31abSDavid du Colombier #include "stdinc.h"
5368c31abSDavid du Colombier #include "dat.h"
6368c31abSDavid du Colombier #include "fns.h"
7368c31abSDavid du Colombier 
8368c31abSDavid du Colombier enum
9368c31abSDavid du Colombier {
10368c31abSDavid du Colombier 	MinBufSize = 64*1024,
11368c31abSDavid du Colombier 	MaxBufSize = 4*1024*1024,
12368c31abSDavid du Colombier };
13368c31abSDavid du Colombier 
14368c31abSDavid du Colombier int		dumb;
15368c31abSDavid du Colombier int		errors;
16368c31abSDavid du Colombier char		**isect;
17368c31abSDavid du Colombier int		nisect;
18368c31abSDavid du Colombier int		bloom;
19368c31abSDavid du Colombier int		zero;
20368c31abSDavid du Colombier 
21368c31abSDavid du Colombier u32int	isectmem;
22368c31abSDavid du Colombier u64int	totalbuckets;
23368c31abSDavid du Colombier u64int	totalclumps;
24368c31abSDavid du Colombier Channel	*arenadonechan;
25368c31abSDavid du Colombier Channel	*isectdonechan;
26368c31abSDavid du Colombier Index	*ix;
27368c31abSDavid du Colombier 
28368c31abSDavid du Colombier u64int	arenaentries;
29368c31abSDavid du Colombier u64int	skipentries;
30368c31abSDavid du Colombier u64int	indexentries;
31368c31abSDavid du Colombier 
32368c31abSDavid du Colombier static int shouldprocess(ISect*);
33368c31abSDavid du Colombier static void	isectproc(void*);
34368c31abSDavid du Colombier static void	arenapartproc(void*);
35368c31abSDavid du Colombier 
36368c31abSDavid du Colombier void
usage(void)37368c31abSDavid du Colombier usage(void)
38368c31abSDavid du Colombier {
39f9e1cf08SDavid du Colombier 	fprint(2, "usage: buildindex [-b] [-i isect]... [-M imem] venti.conf\n");
40368c31abSDavid du Colombier 	threadexitsall("usage");
41368c31abSDavid du Colombier }
42368c31abSDavid du Colombier 
43368c31abSDavid du Colombier void
threadmain(int argc,char * argv[])44368c31abSDavid du Colombier threadmain(int argc, char *argv[])
45368c31abSDavid du Colombier {
46f9e1cf08SDavid du Colombier 	int fd, i, napart, nfinish, maxdisks;
47368c31abSDavid du Colombier 	u32int bcmem, imem;
48368c31abSDavid du Colombier 	Config conf;
49368c31abSDavid du Colombier 	Part *p;
50368c31abSDavid du Colombier 
51f9e1cf08SDavid du Colombier 	maxdisks = 100000;
52368c31abSDavid du Colombier 	ventifmtinstall();
53368c31abSDavid du Colombier 	imem = 256*1024*1024;
54368c31abSDavid du Colombier 	ARGBEGIN{
55368c31abSDavid du Colombier 	case 'b':
56368c31abSDavid du Colombier 		bloom = 1;
57368c31abSDavid du Colombier 		break;
58368c31abSDavid du Colombier 	case 'd':	/* debugging - make sure to run all 3 passes */
59368c31abSDavid du Colombier 		dumb = 1;
60368c31abSDavid du Colombier 		break;
61368c31abSDavid du Colombier 	case 'i':
62368c31abSDavid du Colombier 		isect = vtrealloc(isect, (nisect+1)*sizeof(isect[0]));
63368c31abSDavid du Colombier 		isect[nisect++] = EARGF(usage());
64368c31abSDavid du Colombier 		break;
65368c31abSDavid du Colombier 	case 'M':
66368c31abSDavid du Colombier 		imem = unittoull(EARGF(usage()));
67368c31abSDavid du Colombier 		break;
68f9e1cf08SDavid du Colombier 	case 'm':	/* temporary - might go away */
69f9e1cf08SDavid du Colombier 		maxdisks = atoi(EARGF(usage()));
70f9e1cf08SDavid du Colombier 		break;
71368c31abSDavid du Colombier 	default:
72368c31abSDavid du Colombier 		usage();
73368c31abSDavid du Colombier 		break;
74368c31abSDavid du Colombier 	}ARGEND
75368c31abSDavid du Colombier 
76368c31abSDavid du Colombier 	if(argc != 1)
77368c31abSDavid du Colombier 		usage();
78368c31abSDavid du Colombier 
79368c31abSDavid du Colombier 	if(initventi(argv[0], &conf) < 0)
80368c31abSDavid du Colombier 		sysfatal("can't init venti: %r");
81368c31abSDavid du Colombier 	ix = mainindex;
82368c31abSDavid du Colombier 	if(nisect == 0 && ix->bloom)
83368c31abSDavid du Colombier 		bloom = 1;
84368c31abSDavid du Colombier 	if(bloom && ix->bloom && resetbloom(ix->bloom) < 0)
85368c31abSDavid du Colombier 		sysfatal("loadbloom: %r");
86368c31abSDavid du Colombier 	if(bloom && !ix->bloom)
87368c31abSDavid du Colombier 		sysfatal("-b specified but no bloom filter");
88368c31abSDavid du Colombier 	if(!bloom)
89368c31abSDavid du Colombier 		ix->bloom = nil;
90368c31abSDavid du Colombier 	isectmem = imem/ix->nsects;
91368c31abSDavid du Colombier 
92368c31abSDavid du Colombier 	/*
93368c31abSDavid du Colombier 	 * safety first - only need read access to arenas
94368c31abSDavid du Colombier 	 */
95368c31abSDavid du Colombier 	p = nil;
96368c31abSDavid du Colombier 	for(i=0; i<ix->narenas; i++){
97368c31abSDavid du Colombier 		if(ix->arenas[i]->part != p){
98368c31abSDavid du Colombier 			p = ix->arenas[i]->part;
99368c31abSDavid du Colombier 			if((fd = open(p->filename, OREAD)) < 0)
100368c31abSDavid du Colombier 				sysfatal("cannot reopen %s: %r", p->filename);
101368c31abSDavid du Colombier 			dup(fd, p->fd);
102368c31abSDavid du Colombier 			close(fd);
103368c31abSDavid du Colombier 		}
104368c31abSDavid du Colombier 	}
105368c31abSDavid du Colombier 
106368c31abSDavid du Colombier 	/*
107368c31abSDavid du Colombier 	 * need a block for every arena
108368c31abSDavid du Colombier 	 */
109368c31abSDavid du Colombier 	bcmem = maxblocksize * (mainindex->narenas + 16);
110368c31abSDavid du Colombier 	if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
111368c31abSDavid du Colombier 	initdcache(bcmem);
112368c31abSDavid du Colombier 
113368c31abSDavid du Colombier 	totalclumps = 0;
114368c31abSDavid du Colombier 	for(i=0; i<ix->narenas; i++)
115368c31abSDavid du Colombier 		totalclumps += ix->arenas[i]->diskstats.clumps;
116368c31abSDavid du Colombier 
117368c31abSDavid du Colombier 	totalbuckets = 0;
118368c31abSDavid du Colombier 	for(i=0; i<ix->nsects; i++)
119368c31abSDavid du Colombier 		totalbuckets += ix->sects[i]->blocks;
120368c31abSDavid du Colombier 	fprint(2, "%,lld clumps, %,lld buckets\n", totalclumps, totalbuckets);
121368c31abSDavid du Colombier 
122368c31abSDavid du Colombier 	/* start index procs */
123368c31abSDavid du Colombier 	fprint(2, "%T read index\n");
124368c31abSDavid du Colombier 	isectdonechan = chancreate(sizeof(void*), 0);
125368c31abSDavid du Colombier 	for(i=0; i<ix->nsects; i++){
126368c31abSDavid du Colombier 		if(shouldprocess(ix->sects[i])){
127368c31abSDavid du Colombier 			ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0);
128368c31abSDavid du Colombier 			vtproc(isectproc, ix->sects[i]);
129368c31abSDavid du Colombier 		}
130368c31abSDavid du Colombier 	}
131368c31abSDavid du Colombier 
132368c31abSDavid du Colombier 	for(i=0; i<nisect; i++)
133368c31abSDavid du Colombier 		if(isect[i])
134368c31abSDavid du Colombier 			fprint(2, "warning: did not find index section %s\n", isect[i]);
135368c31abSDavid du Colombier 
136368c31abSDavid du Colombier 	/* start arena procs */
137368c31abSDavid du Colombier 	p = nil;
138368c31abSDavid du Colombier 	napart = 0;
139f9e1cf08SDavid du Colombier 	nfinish = 0;
140368c31abSDavid du Colombier 	arenadonechan = chancreate(sizeof(void*), 0);
141368c31abSDavid du Colombier 	for(i=0; i<ix->narenas; i++){
142368c31abSDavid du Colombier 		if(ix->arenas[i]->part != p){
143368c31abSDavid du Colombier 			p = ix->arenas[i]->part;
144368c31abSDavid du Colombier 			vtproc(arenapartproc, p);
145f9e1cf08SDavid du Colombier 			if(++napart >= maxdisks){
146f9e1cf08SDavid du Colombier 				recvp(arenadonechan);
147f9e1cf08SDavid du Colombier 				nfinish++;
148f9e1cf08SDavid du Colombier 			}
149368c31abSDavid du Colombier 		}
150368c31abSDavid du Colombier 	}
151368c31abSDavid du Colombier 
152368c31abSDavid du Colombier 	/* wait for arena procs to finish */
153f9e1cf08SDavid du Colombier 	for(nfinish=0; nfinish<napart; nfinish++)
154368c31abSDavid du Colombier 		recvp(arenadonechan);
155368c31abSDavid du Colombier 
156368c31abSDavid du Colombier 	/* tell index procs to finish */
157368c31abSDavid du Colombier 	for(i=0; i<ix->nsects; i++)
158368c31abSDavid du Colombier 		if(ix->sects[i]->writechan)
159368c31abSDavid du Colombier 			send(ix->sects[i]->writechan, nil);
160368c31abSDavid du Colombier 
161368c31abSDavid du Colombier 	/* wait for index procs to finish */
162368c31abSDavid du Colombier 	for(i=0; i<ix->nsects; i++)
163368c31abSDavid du Colombier 		if(ix->sects[i]->writechan)
164368c31abSDavid du Colombier 			recvp(isectdonechan);
165368c31abSDavid du Colombier 
166368c31abSDavid du Colombier 	if(ix->bloom && writebloom(ix->bloom) < 0)
167368c31abSDavid du Colombier 		fprint(2, "writing bloom filter: %r\n");
168368c31abSDavid du Colombier 
169368c31abSDavid du Colombier 	fprint(2, "%T done arenaentries=%,lld indexed=%,lld (nskip=%,lld)\n",
170368c31abSDavid du Colombier 		arenaentries, indexentries, skipentries);
171368c31abSDavid du Colombier 	threadexitsall(nil);
172368c31abSDavid du Colombier }
173368c31abSDavid du Colombier 
174368c31abSDavid du Colombier static int
shouldprocess(ISect * is)175368c31abSDavid du Colombier shouldprocess(ISect *is)
176368c31abSDavid du Colombier {
177368c31abSDavid du Colombier 	int i;
178368c31abSDavid du Colombier 
179368c31abSDavid du Colombier 	if(nisect == 0)
180368c31abSDavid du Colombier 		return 1;
181368c31abSDavid du Colombier 
182368c31abSDavid du Colombier 	for(i=0; i<nisect; i++)
183368c31abSDavid du Colombier 		if(isect[i] && strcmp(isect[i], is->name) == 0){
184368c31abSDavid du Colombier 			isect[i] = nil;
185368c31abSDavid du Colombier 			return 1;
186368c31abSDavid du Colombier 		}
187368c31abSDavid du Colombier 	return 0;
188368c31abSDavid du Colombier }
189368c31abSDavid du Colombier 
190368c31abSDavid du Colombier static void
add(u64int * a,u64int n)191368c31abSDavid du Colombier add(u64int *a, u64int n)
192368c31abSDavid du Colombier {
193368c31abSDavid du Colombier 	static Lock l;
194368c31abSDavid du Colombier 
195368c31abSDavid du Colombier 	lock(&l);
196368c31abSDavid du Colombier 	*a += n;
197368c31abSDavid du Colombier 	unlock(&l);
198368c31abSDavid du Colombier }
199368c31abSDavid du Colombier 
200368c31abSDavid du Colombier /*
201368c31abSDavid du Colombier  * Read through an arena partition and send each of its IEntries
202368c31abSDavid du Colombier  * to the appropriate index section.  When finished, send on
203368c31abSDavid du Colombier  * arenadonechan.
204368c31abSDavid du Colombier  */
205368c31abSDavid du Colombier enum
206368c31abSDavid du Colombier {
207368c31abSDavid du Colombier 	ClumpChunks = 32*1024,
208368c31abSDavid du Colombier };
209368c31abSDavid du Colombier static void
arenapartproc(void * v)210368c31abSDavid du Colombier arenapartproc(void *v)
211368c31abSDavid du Colombier {
212368c31abSDavid du Colombier 	int i, j, n, nskip, x;
213368c31abSDavid du Colombier 	u32int clump;
214368c31abSDavid du Colombier 	u64int addr, tot;
215368c31abSDavid du Colombier 	Arena *a;
216368c31abSDavid du Colombier 	ClumpInfo *ci, *cis;
217368c31abSDavid du Colombier 	IEntry ie;
218368c31abSDavid du Colombier 	Part *p;
219368c31abSDavid du Colombier 
220368c31abSDavid du Colombier 	p = v;
221368c31abSDavid du Colombier 	threadsetname("arenaproc %s", p->name);
222368c31abSDavid du Colombier 
223368c31abSDavid du Colombier 	nskip = 0;
224368c31abSDavid du Colombier 	tot = 0;
225368c31abSDavid du Colombier 	cis = MKN(ClumpInfo, ClumpChunks);
226368c31abSDavid du Colombier 	for(i=0; i<ix->narenas; i++){
227368c31abSDavid du Colombier 		a = ix->arenas[i];
228368c31abSDavid du Colombier 		if(a->part != p)
229368c31abSDavid du Colombier 			continue;
230368c31abSDavid du Colombier 		if(a->memstats.clumps)
231368c31abSDavid du Colombier 			fprint(2, "%T arena %s: %d entries\n",
232368c31abSDavid du Colombier 				a->name, a->memstats.clumps);
233f9e1cf08SDavid du Colombier 		/*
234f9e1cf08SDavid du Colombier 		 * Running the loop backwards accesses the
235f9e1cf08SDavid du Colombier 		 * clump info blocks forwards, since they are
236f9e1cf08SDavid du Colombier 		 * stored in reverse order at the end of the arena.
237f9e1cf08SDavid du Colombier 		 * This speeds things slightly.
238f9e1cf08SDavid du Colombier 		 */
239f9e1cf08SDavid du Colombier 		addr = ix->amap[i].start + a->memstats.used;
240f9e1cf08SDavid du Colombier 		for(clump=a->memstats.clumps; clump > 0; clump-=n){
241368c31abSDavid du Colombier 			n = ClumpChunks;
242f9e1cf08SDavid du Colombier 			if(n > clump)
243f9e1cf08SDavid du Colombier 				n = clump;
244f9e1cf08SDavid du Colombier 			if(readclumpinfos(a, clump-n, cis, n) != n){
245368c31abSDavid du Colombier 				fprint(2, "%T arena %s: directory read: %r\n", a->name);
246368c31abSDavid du Colombier 				errors = 1;
247368c31abSDavid du Colombier 				break;
248368c31abSDavid du Colombier 			}
249f9e1cf08SDavid du Colombier 			for(j=n-1; j>=0; j--){
250368c31abSDavid du Colombier 				ci = &cis[j];
251368c31abSDavid du Colombier 				ie.ia.type = ci->type;
252368c31abSDavid du Colombier 				ie.ia.size = ci->uncsize;
253f9e1cf08SDavid du Colombier 				addr -= ci->size + ClumpSize;
254368c31abSDavid du Colombier 				ie.ia.addr = addr;
255368c31abSDavid du Colombier 				ie.ia.blocks = (ci->size + ClumpSize + (1<<ABlockLog)-1) >> ABlockLog;
256368c31abSDavid du Colombier 				scorecp(ie.score, ci->score);
257368c31abSDavid du Colombier 				if(ci->type == VtCorruptType)
258368c31abSDavid du Colombier 					nskip++;
259368c31abSDavid du Colombier 				else{
260368c31abSDavid du Colombier 					tot++;
261368c31abSDavid du Colombier 					x = indexsect(ix, ie.score);
262368c31abSDavid du Colombier 					assert(0 <= x && x < ix->nsects);
263368c31abSDavid du Colombier 					if(ix->sects[x]->writechan)
264368c31abSDavid du Colombier 						send(ix->sects[x]->writechan, &ie);
265368c31abSDavid du Colombier 					if(ix->bloom)
266368c31abSDavid du Colombier 						markbloomfilter(ix->bloom, ie.score);
267368c31abSDavid du Colombier 				}
268368c31abSDavid du Colombier 			}
269368c31abSDavid du Colombier 		}
270f9e1cf08SDavid du Colombier 		if(addr != ix->amap[i].start)
271f9e1cf08SDavid du Colombier 			fprint(2, "%T arena %s: clump miscalculation %lld != %lld\n", a->name, addr, ix->amap[i].start);
272368c31abSDavid du Colombier 	}
273368c31abSDavid du Colombier 	add(&arenaentries, tot);
274368c31abSDavid du Colombier 	add(&skipentries, nskip);
275368c31abSDavid du Colombier 	sendp(arenadonechan, p);
276368c31abSDavid du Colombier }
277368c31abSDavid du Colombier 
278368c31abSDavid du Colombier /*
279368c31abSDavid du Colombier  * Convert score into relative bucket number in isect.
280368c31abSDavid du Colombier  * Can pass a packed ientry instead of score - score is first.
281368c31abSDavid du Colombier  */
282368c31abSDavid du Colombier static u32int
score2bucket(ISect * is,uchar * score)283368c31abSDavid du Colombier score2bucket(ISect *is, uchar *score)
284368c31abSDavid du Colombier {
285368c31abSDavid du Colombier 	u32int b;
286368c31abSDavid du Colombier 
287368c31abSDavid du Colombier 	b = hashbits(score, 32)/ix->div;
288368c31abSDavid du Colombier 	if(b < is->start || b >= is->stop){
289368c31abSDavid du Colombier 		fprint(2, "score2bucket: score=%V div=%d b=%ud start=%ud stop=%ud\n",
290368c31abSDavid du Colombier 			score, ix->div, b, is->start, is->stop);
291368c31abSDavid du Colombier 	}
292368c31abSDavid du Colombier 	assert(is->start <= b && b < is->stop);
293368c31abSDavid du Colombier 	return b - is->start;
294368c31abSDavid du Colombier }
295368c31abSDavid du Colombier 
296368c31abSDavid du Colombier /*
297368c31abSDavid du Colombier  * Convert offset in index section to bucket number.
298368c31abSDavid du Colombier  */
299368c31abSDavid du Colombier static u32int
offset2bucket(ISect * is,u64int offset)300368c31abSDavid du Colombier offset2bucket(ISect *is, u64int offset)
301368c31abSDavid du Colombier {
302368c31abSDavid du Colombier 	u32int b;
303368c31abSDavid du Colombier 
304368c31abSDavid du Colombier 	assert(is->blockbase <= offset);
305368c31abSDavid du Colombier 	offset -= is->blockbase;
306368c31abSDavid du Colombier 	b = offset/is->blocksize;
307368c31abSDavid du Colombier 	assert(b < is->stop-is->start);
308368c31abSDavid du Colombier 	return b;
309368c31abSDavid du Colombier }
310368c31abSDavid du Colombier 
311368c31abSDavid du Colombier /*
312368c31abSDavid du Colombier  * Convert bucket number to offset.
313368c31abSDavid du Colombier  */
314368c31abSDavid du Colombier static u64int
bucket2offset(ISect * is,u32int b)315368c31abSDavid du Colombier bucket2offset(ISect *is, u32int b)
316368c31abSDavid du Colombier {
317368c31abSDavid du Colombier 	assert(b <= is->stop-is->start);
318368c31abSDavid du Colombier 	return is->blockbase + (u64int)b*is->blocksize;
319368c31abSDavid du Colombier }
320368c31abSDavid du Colombier 
321368c31abSDavid du Colombier /*
322368c31abSDavid du Colombier  * IEntry buffers to hold initial round of spraying.
323368c31abSDavid du Colombier  */
324368c31abSDavid du Colombier typedef struct Buf Buf;
325368c31abSDavid du Colombier struct Buf
326368c31abSDavid du Colombier {
327368c31abSDavid du Colombier 	Part *part;			/* partition being written */
328368c31abSDavid du Colombier 	uchar *bp;		/* current block */
329368c31abSDavid du Colombier 	uchar *ep;		/* end of block */
330368c31abSDavid du Colombier 	uchar *wp;		/* write position in block */
331368c31abSDavid du Colombier 	u64int boffset;		/* start offset */
332368c31abSDavid du Colombier 	u64int woffset;		/* next write offset */
333368c31abSDavid du Colombier 	u64int eoffset;		/* end offset */
334368c31abSDavid du Colombier 	u32int nentry;		/* number of entries written */
335368c31abSDavid du Colombier };
336368c31abSDavid du Colombier 
337368c31abSDavid du Colombier static void
bflush(Buf * buf)338368c31abSDavid du Colombier bflush(Buf *buf)
339368c31abSDavid du Colombier {
340368c31abSDavid du Colombier 	u32int bufsize;
341368c31abSDavid du Colombier 
342368c31abSDavid du Colombier 	if(buf->woffset >= buf->eoffset)
343368c31abSDavid du Colombier 		sysfatal("buf index chunk overflow - need bigger index");
344368c31abSDavid du Colombier 	bufsize = buf->ep - buf->bp;
345368c31abSDavid du Colombier 	if(writepart(buf->part, buf->woffset, buf->bp, bufsize) < 0){
346368c31abSDavid du Colombier 		fprint(2, "write %s: %r\n", buf->part->name);
347368c31abSDavid du Colombier 		errors = 1;
348368c31abSDavid du Colombier 	}
349368c31abSDavid du Colombier 	buf->woffset += bufsize;
350368c31abSDavid du Colombier 	memset(buf->bp, 0, bufsize);
351368c31abSDavid du Colombier 	buf->wp = buf->bp;
352368c31abSDavid du Colombier }
353368c31abSDavid du Colombier 
354368c31abSDavid du Colombier static void
bwrite(Buf * buf,IEntry * ie)355368c31abSDavid du Colombier bwrite(Buf *buf, IEntry *ie)
356368c31abSDavid du Colombier {
357368c31abSDavid du Colombier 	if(buf->wp+IEntrySize > buf->ep)
358368c31abSDavid du Colombier 		bflush(buf);
359368c31abSDavid du Colombier 	assert(buf->bp <= buf->wp && buf->wp < buf->ep);
360368c31abSDavid du Colombier 	packientry(ie, buf->wp);
361368c31abSDavid du Colombier 	buf->wp += IEntrySize;
362368c31abSDavid du Colombier 	assert(buf->bp <= buf->wp && buf->wp <= buf->ep);
363368c31abSDavid du Colombier 	buf->nentry++;
364368c31abSDavid du Colombier }
365368c31abSDavid du Colombier 
366368c31abSDavid du Colombier /*
367368c31abSDavid du Colombier  * Minibuffer.  In-memory data structure holds our place
368368c31abSDavid du Colombier  * in the buffer but has no block data.  We are writing and
369368c31abSDavid du Colombier  * reading the minibuffers at the same time.  (Careful!)
370368c31abSDavid du Colombier  */
371368c31abSDavid du Colombier typedef struct Minibuf Minibuf;
372368c31abSDavid du Colombier struct Minibuf
373368c31abSDavid du Colombier {
374368c31abSDavid du Colombier 	u64int boffset;		/* start offset */
375368c31abSDavid du Colombier 	u64int roffset;		/* read offset */
376368c31abSDavid du Colombier 	u64int woffset;		/* write offset */
377368c31abSDavid du Colombier 	u64int eoffset;		/* end offset */
378368c31abSDavid du Colombier 	u32int nentry;		/* # entries left to read */
379368c31abSDavid du Colombier 	u32int nwentry;	/* # entries written */
380368c31abSDavid du Colombier };
381368c31abSDavid du Colombier 
382368c31abSDavid du Colombier /*
383368c31abSDavid du Colombier  * Index entry pool.  Used when trying to shuffle around
384368c31abSDavid du Colombier  * the entries in a big buffer into the corresponding M minibuffers.
385368c31abSDavid du Colombier  * Sized to hold M*EntriesPerBlock entries, so that there will always
386368c31abSDavid du Colombier  * either be room in the pool for another block worth of entries
387368c31abSDavid du Colombier  * or there will be an entire block worth of sorted entries to
388368c31abSDavid du Colombier  * write out.
389368c31abSDavid du Colombier  */
390368c31abSDavid du Colombier typedef struct IEntryLink IEntryLink;
391368c31abSDavid du Colombier typedef struct IPool IPool;
392368c31abSDavid du Colombier 
393368c31abSDavid du Colombier struct IEntryLink
394368c31abSDavid du Colombier {
395368c31abSDavid du Colombier 	uchar ie[IEntrySize];		/* raw IEntry */
396368c31abSDavid du Colombier 	IEntryLink *next;		/* next in chain */
397368c31abSDavid du Colombier };
398368c31abSDavid du Colombier 
399368c31abSDavid du Colombier struct IPool
400368c31abSDavid du Colombier {
401368c31abSDavid du Colombier 	ISect *isect;
402368c31abSDavid du Colombier 	u32int buck0;			/* first bucket in pool */
403368c31abSDavid du Colombier 	u32int mbufbuckets;	/* buckets per minibuf */
404368c31abSDavid du Colombier 	IEntryLink *entry;		/* all IEntryLinks */
405368c31abSDavid du Colombier 	u32int nentry;			/* # of IEntryLinks */
406368c31abSDavid du Colombier 	IEntryLink *free;		/* free list */
407368c31abSDavid du Colombier 	u32int nfree;			/* # on free list */
408368c31abSDavid du Colombier 	Minibuf *mbuf;			/* all minibufs */
409368c31abSDavid du Colombier 	u32int nmbuf;			/* # of minibufs */
410368c31abSDavid du Colombier 	IEntryLink **mlist;		/* lists for each minibuf */
411368c31abSDavid du Colombier 	u32int *mcount;		/* # on each mlist[i] */
412368c31abSDavid du Colombier 	u32int bufsize;			/* block buffer size */
413368c31abSDavid du Colombier 	uchar *rbuf;			/* read buffer */
414368c31abSDavid du Colombier 	uchar *wbuf;			/* write buffer */
415368c31abSDavid du Colombier 	u32int epbuf;			/* entries per block buffer */
416368c31abSDavid du Colombier };
417368c31abSDavid du Colombier 
418368c31abSDavid du Colombier /*
419368c31abSDavid du Colombier static int
420368c31abSDavid du Colombier countsokay(IPool *p)
421368c31abSDavid du Colombier {
422368c31abSDavid du Colombier 	int i;
423368c31abSDavid du Colombier 	u64int n;
424368c31abSDavid du Colombier 
425368c31abSDavid du Colombier 	n = 0;
426368c31abSDavid du Colombier 	for(i=0; i<p->nmbuf; i++)
427368c31abSDavid du Colombier 		n += p->mcount[i];
428368c31abSDavid du Colombier 	n += p->nfree;
429368c31abSDavid du Colombier 	if(n != p->nentry){
430368c31abSDavid du Colombier 		print("free %ud:", p->nfree);
431368c31abSDavid du Colombier 		for(i=0; i<p->nmbuf; i++)
432368c31abSDavid du Colombier 			print(" %ud", p->mcount[i]);
433368c31abSDavid du Colombier 		print(" = %lld nentry: %ud\n", n, p->nentry);
434368c31abSDavid du Colombier 	}
435368c31abSDavid du Colombier 	return n == p->nentry;
436368c31abSDavid du Colombier }
437368c31abSDavid du Colombier */
438368c31abSDavid du Colombier 
439368c31abSDavid du Colombier static IPool*
mkipool(ISect * isect,Minibuf * mbuf,u32int nmbuf,u32int mbufbuckets,u32int bufsize)440368c31abSDavid du Colombier mkipool(ISect *isect, Minibuf *mbuf, u32int nmbuf,
441368c31abSDavid du Colombier 	u32int mbufbuckets, u32int bufsize)
442368c31abSDavid du Colombier {
443368c31abSDavid du Colombier 	u32int i, nentry;
444368c31abSDavid du Colombier 	uchar *data;
445368c31abSDavid du Colombier 	IPool *p;
446368c31abSDavid du Colombier 	IEntryLink *l;
447368c31abSDavid du Colombier 
448368c31abSDavid du Colombier 	nentry = (nmbuf+1)*bufsize / IEntrySize;
449368c31abSDavid du Colombier 	p = ezmalloc(sizeof(IPool)
450368c31abSDavid du Colombier 		+nentry*sizeof(IEntry)
451368c31abSDavid du Colombier 		+nmbuf*sizeof(IEntryLink*)
452368c31abSDavid du Colombier 		+nmbuf*sizeof(u32int)
453368c31abSDavid du Colombier 		+3*bufsize);
454368c31abSDavid du Colombier 
455368c31abSDavid du Colombier 	p->isect = isect;
456368c31abSDavid du Colombier 	p->mbufbuckets = mbufbuckets;
457368c31abSDavid du Colombier 	p->bufsize = bufsize;
458368c31abSDavid du Colombier 	p->entry = (IEntryLink*)(p+1);
459368c31abSDavid du Colombier 	p->nentry = nentry;
460368c31abSDavid du Colombier 	p->mlist = (IEntryLink**)(p->entry+nentry);
461368c31abSDavid du Colombier 	p->mcount = (u32int*)(p->mlist+nmbuf);
462368c31abSDavid du Colombier 	p->nmbuf = nmbuf;
463368c31abSDavid du Colombier 	p->mbuf = mbuf;
464368c31abSDavid du Colombier 	data = (uchar*)(p->mcount+nmbuf);
465368c31abSDavid du Colombier 	data += bufsize - (uintptr)data%bufsize;
466368c31abSDavid du Colombier 	p->rbuf = data;
467368c31abSDavid du Colombier 	p->wbuf = data+bufsize;
468368c31abSDavid du Colombier 	p->epbuf = bufsize/IEntrySize;
469368c31abSDavid du Colombier 
470368c31abSDavid du Colombier 	for(i=0; i<p->nentry; i++){
471368c31abSDavid du Colombier 		l = &p->entry[i];
472368c31abSDavid du Colombier 		l->next = p->free;
473368c31abSDavid du Colombier 		p->free = l;
474368c31abSDavid du Colombier 		p->nfree++;
475368c31abSDavid du Colombier 	}
476368c31abSDavid du Colombier 	return p;
477368c31abSDavid du Colombier }
478368c31abSDavid du Colombier 
479368c31abSDavid du Colombier /*
480368c31abSDavid du Colombier  * Add the index entry ie to the pool p.
481368c31abSDavid du Colombier  * Caller must know there is room.
482368c31abSDavid du Colombier  */
483368c31abSDavid du Colombier static void
ipoolinsert(IPool * p,uchar * ie)484368c31abSDavid du Colombier ipoolinsert(IPool *p, uchar *ie)
485368c31abSDavid du Colombier {
486368c31abSDavid du Colombier 	u32int buck, x;
487368c31abSDavid du Colombier 	IEntryLink *l;
488368c31abSDavid du Colombier 
489368c31abSDavid du Colombier 	assert(p->free != nil);
490368c31abSDavid du Colombier 
491368c31abSDavid du Colombier 	buck = score2bucket(p->isect, ie);
492368c31abSDavid du Colombier 	x = (buck-p->buck0) / p->mbufbuckets;
493368c31abSDavid du Colombier 	if(x >= p->nmbuf){
494368c31abSDavid du Colombier 		fprint(2, "buck=%ud mbufbucket=%ud x=%ud\n",
495368c31abSDavid du Colombier 			buck, p->mbufbuckets, x);
496368c31abSDavid du Colombier 	}
497368c31abSDavid du Colombier 	assert(x < p->nmbuf);
498368c31abSDavid du Colombier 
499368c31abSDavid du Colombier 	l = p->free;
500368c31abSDavid du Colombier 	p->free = l->next;
501368c31abSDavid du Colombier 	p->nfree--;
502368c31abSDavid du Colombier 	memmove(l->ie, ie, IEntrySize);
503368c31abSDavid du Colombier 	l->next = p->mlist[x];
504368c31abSDavid du Colombier 	p->mlist[x] = l;
505368c31abSDavid du Colombier 	p->mcount[x]++;
506368c31abSDavid du Colombier }
507368c31abSDavid du Colombier 
508368c31abSDavid du Colombier /*
509368c31abSDavid du Colombier  * Pull out a block containing as many
510368c31abSDavid du Colombier  * entries as possible for minibuffer x.
511368c31abSDavid du Colombier  */
512368c31abSDavid du Colombier static u32int
ipoolgetbuf(IPool * p,u32int x)513368c31abSDavid du Colombier ipoolgetbuf(IPool *p, u32int x)
514368c31abSDavid du Colombier {
515368c31abSDavid du Colombier 	uchar *bp, *ep, *wp;
516368c31abSDavid du Colombier 	IEntryLink *l;
517368c31abSDavid du Colombier 	u32int n;
518368c31abSDavid du Colombier 
519368c31abSDavid du Colombier 	bp = p->wbuf;
520368c31abSDavid du Colombier 	ep = p->wbuf + p->bufsize;
521368c31abSDavid du Colombier 	n = 0;
522368c31abSDavid du Colombier 	assert(x < p->nmbuf);
523368c31abSDavid du Colombier 	for(wp=bp; wp+IEntrySize<=ep && p->mlist[x]; wp+=IEntrySize){
524368c31abSDavid du Colombier 		l = p->mlist[x];
525368c31abSDavid du Colombier 		p->mlist[x] = l->next;
526368c31abSDavid du Colombier 		p->mcount[x]--;
527368c31abSDavid du Colombier 		memmove(wp, l->ie, IEntrySize);
528368c31abSDavid du Colombier 		l->next = p->free;
529368c31abSDavid du Colombier 		p->free = l;
530368c31abSDavid du Colombier 		p->nfree++;
531368c31abSDavid du Colombier 		n++;
532368c31abSDavid du Colombier 	}
533368c31abSDavid du Colombier 	memset(wp, 0, ep-wp);
534368c31abSDavid du Colombier 	return n;
535368c31abSDavid du Colombier }
536368c31abSDavid du Colombier 
537368c31abSDavid du Colombier /*
538368c31abSDavid du Colombier  * Read a block worth of entries from the minibuf
539368c31abSDavid du Colombier  * into the pool.  Caller must know there is room.
540368c31abSDavid du Colombier  */
541368c31abSDavid du Colombier static void
ipoolloadblock(IPool * p,Minibuf * mb)542368c31abSDavid du Colombier ipoolloadblock(IPool *p, Minibuf *mb)
543368c31abSDavid du Colombier {
544368c31abSDavid du Colombier 	u32int i, n;
545368c31abSDavid du Colombier 
546368c31abSDavid du Colombier 	assert(mb->nentry > 0);
547368c31abSDavid du Colombier 	assert(mb->roffset >= mb->woffset);
548368c31abSDavid du Colombier 	assert(mb->roffset < mb->eoffset);
549368c31abSDavid du Colombier 
550368c31abSDavid du Colombier 	n = p->bufsize/IEntrySize;
551368c31abSDavid du Colombier 	if(n > mb->nentry)
552368c31abSDavid du Colombier 		n = mb->nentry;
553368c31abSDavid du Colombier 	if(readpart(p->isect->part, mb->roffset, p->rbuf, p->bufsize) < 0)
554368c31abSDavid du Colombier 		fprint(2, "readpart %s: %r\n", p->isect->part->name);
555368c31abSDavid du Colombier 	else{
556368c31abSDavid du Colombier 		for(i=0; i<n; i++)
557368c31abSDavid du Colombier 			ipoolinsert(p, p->rbuf+i*IEntrySize);
558368c31abSDavid du Colombier 	}
559368c31abSDavid du Colombier 	mb->nentry -= n;
560368c31abSDavid du Colombier 	mb->roffset += p->bufsize;
561368c31abSDavid du Colombier }
562368c31abSDavid du Colombier 
563368c31abSDavid du Colombier /*
564368c31abSDavid du Colombier  * Write out a block worth of entries to minibuffer x.
565368c31abSDavid du Colombier  * If necessary, pick up the data there before overwriting it.
566368c31abSDavid du Colombier  */
567368c31abSDavid du Colombier static void
ipoolflush0(IPool * pool,u32int x)568368c31abSDavid du Colombier ipoolflush0(IPool *pool, u32int x)
569368c31abSDavid du Colombier {
570368c31abSDavid du Colombier 	u32int bufsize;
571368c31abSDavid du Colombier 	Minibuf *mb;
572368c31abSDavid du Colombier 
573368c31abSDavid du Colombier 	mb = pool->mbuf+x;
574368c31abSDavid du Colombier 	bufsize = pool->bufsize;
575368c31abSDavid du Colombier 	mb->nwentry += ipoolgetbuf(pool, x);
576368c31abSDavid du Colombier 	if(mb->nentry > 0 && mb->roffset == mb->woffset){
577368c31abSDavid du Colombier 		assert(pool->nfree >= pool->bufsize/IEntrySize);
578368c31abSDavid du Colombier 		/*
579368c31abSDavid du Colombier 		 * There will be room in the pool -- we just
580368c31abSDavid du Colombier 		 * removed a block worth.
581368c31abSDavid du Colombier 		 */
582368c31abSDavid du Colombier 		ipoolloadblock(pool, mb);
583368c31abSDavid du Colombier 	}
584368c31abSDavid du Colombier 	if(writepart(pool->isect->part, mb->woffset, pool->wbuf, bufsize) < 0)
585368c31abSDavid du Colombier 		fprint(2, "writepart %s: %r\n", pool->isect->part->name);
586368c31abSDavid du Colombier 	mb->woffset += bufsize;
587368c31abSDavid du Colombier }
588368c31abSDavid du Colombier 
589368c31abSDavid du Colombier /*
590368c31abSDavid du Colombier  * Write out some full block of entries.
591368c31abSDavid du Colombier  * (There must be one -- the pool is almost full!)
592368c31abSDavid du Colombier  */
593368c31abSDavid du Colombier static void
ipoolflush1(IPool * pool)594368c31abSDavid du Colombier ipoolflush1(IPool *pool)
595368c31abSDavid du Colombier {
596368c31abSDavid du Colombier 	u32int i;
597368c31abSDavid du Colombier 
598368c31abSDavid du Colombier 	assert(pool->nfree <= pool->epbuf);
599368c31abSDavid du Colombier 
600368c31abSDavid du Colombier 	for(i=0; i<pool->nmbuf; i++){
601368c31abSDavid du Colombier 		if(pool->mcount[i] >= pool->epbuf){
602368c31abSDavid du Colombier 			ipoolflush0(pool, i);
603368c31abSDavid du Colombier 			return;
604368c31abSDavid du Colombier 		}
605368c31abSDavid du Colombier 	}
606368c31abSDavid du Colombier 	/* can't be reached - someone must be full */
607368c31abSDavid du Colombier 	sysfatal("ipoolflush1");
608368c31abSDavid du Colombier }
609368c31abSDavid du Colombier 
610368c31abSDavid du Colombier /*
611368c31abSDavid du Colombier  * Flush all the entries in the pool out to disk.
612368c31abSDavid du Colombier  * Nothing more to read from disk.
613368c31abSDavid du Colombier  */
614368c31abSDavid du Colombier static void
ipoolflush(IPool * pool)615368c31abSDavid du Colombier ipoolflush(IPool *pool)
616368c31abSDavid du Colombier {
617368c31abSDavid du Colombier 	u32int i;
618368c31abSDavid du Colombier 
619368c31abSDavid du Colombier 	for(i=0; i<pool->nmbuf; i++)
620368c31abSDavid du Colombier 		while(pool->mlist[i])
621368c31abSDavid du Colombier 			ipoolflush0(pool, i);
622368c31abSDavid du Colombier 	assert(pool->nfree == pool->nentry);
623368c31abSDavid du Colombier }
624368c31abSDavid du Colombier 
625368c31abSDavid du Colombier /*
626368c31abSDavid du Colombier  * Third pass.  Pick up each minibuffer from disk into
627368c31abSDavid du Colombier  * memory and then write out the buckets.
628368c31abSDavid du Colombier  */
629368c31abSDavid du Colombier 
630368c31abSDavid du Colombier /*
631368c31abSDavid du Colombier  * Compare two packed index entries.
632368c31abSDavid du Colombier  * Usual ordering except break ties by putting higher
633368c31abSDavid du Colombier  * index addresses first (assumes have duplicates
634368c31abSDavid du Colombier  * due to corruption in the lower addresses).
635368c31abSDavid du Colombier  */
636368c31abSDavid du Colombier static int
ientrycmpaddr(const void * va,const void * vb)637368c31abSDavid du Colombier ientrycmpaddr(const void *va, const void *vb)
638368c31abSDavid du Colombier {
639368c31abSDavid du Colombier 	int i;
640368c31abSDavid du Colombier 	uchar *a, *b;
641368c31abSDavid du Colombier 
642368c31abSDavid du Colombier 	a = (uchar*)va;
643368c31abSDavid du Colombier 	b = (uchar*)vb;
644368c31abSDavid du Colombier 	i = ientrycmp(a, b);
645368c31abSDavid du Colombier 	if(i)
646368c31abSDavid du Colombier 		return i;
647368c31abSDavid du Colombier 	return -memcmp(a+IEntryAddrOff, b+IEntryAddrOff, 8);
648368c31abSDavid du Colombier }
649368c31abSDavid du Colombier 
650368c31abSDavid du Colombier static void
zerorange(Part * p,u64int o,u64int e)651368c31abSDavid du Colombier zerorange(Part *p, u64int o, u64int e)
652368c31abSDavid du Colombier {
653368c31abSDavid du Colombier 	static uchar zero[MaxIoSize];
654368c31abSDavid du Colombier 	u32int n;
655368c31abSDavid du Colombier 
656368c31abSDavid du Colombier 	for(; o<e; o+=n){
657368c31abSDavid du Colombier 		n = sizeof zero;
658368c31abSDavid du Colombier 		if(o+n > e)
659368c31abSDavid du Colombier 			n = e-o;
660368c31abSDavid du Colombier 		if(writepart(p, o, zero, n) < 0)
661368c31abSDavid du Colombier 			fprint(2, "writepart %s: %r\n", p->name);
662368c31abSDavid du Colombier 	}
663368c31abSDavid du Colombier }
664368c31abSDavid du Colombier 
665368c31abSDavid du Colombier /*
666368c31abSDavid du Colombier  * Load a minibuffer into memory and write out the
667368c31abSDavid du Colombier  * corresponding buckets.
668368c31abSDavid du Colombier  */
669368c31abSDavid du Colombier static void
sortminibuffer(ISect * is,Minibuf * mb,uchar * buf,u32int nbuf,u32int bufsize)670368c31abSDavid du Colombier sortminibuffer(ISect *is, Minibuf *mb, uchar *buf, u32int nbuf, u32int bufsize)
671368c31abSDavid du Colombier {
672368c31abSDavid du Colombier 	uchar *buckdata, *p, *q, *ep;
673368c31abSDavid du Colombier 	u32int b, lastb, memsize, n;
674368c31abSDavid du Colombier 	u64int o;
675368c31abSDavid du Colombier 	IBucket ib;
676368c31abSDavid du Colombier 	Part *part;
677368c31abSDavid du Colombier 
678368c31abSDavid du Colombier 	part = is->part;
679368c31abSDavid du Colombier 	buckdata = emalloc(is->blocksize);
680368c31abSDavid du Colombier 
681368c31abSDavid du Colombier 	if(mb->nwentry == 0)
682368c31abSDavid du Colombier 		return;
683368c31abSDavid du Colombier 
684368c31abSDavid du Colombier 	/*
685368c31abSDavid du Colombier 	 * read entire buffer.
686368c31abSDavid du Colombier 	 */
687368c31abSDavid du Colombier 	assert(mb->nwentry*IEntrySize <= mb->woffset-mb->boffset);
688368c31abSDavid du Colombier 	assert(mb->woffset-mb->boffset <= nbuf);
689368c31abSDavid du Colombier 	if(readpart(part, mb->boffset, buf, mb->woffset-mb->boffset) < 0){
690368c31abSDavid du Colombier 		fprint(2, "readpart %s: %r\n", part->name);
691368c31abSDavid du Colombier 		errors = 1;
692368c31abSDavid du Colombier 		return;
693368c31abSDavid du Colombier 	}
694368c31abSDavid du Colombier 	assert(*(uint*)buf != 0xa5a5a5a5);
695368c31abSDavid du Colombier 
696368c31abSDavid du Colombier 	/*
697368c31abSDavid du Colombier 	 * remove fragmentation due to IEntrySize
698368c31abSDavid du Colombier 	 * not evenly dividing Bufsize
699368c31abSDavid du Colombier 	 */
700368c31abSDavid du Colombier 	memsize = (bufsize/IEntrySize)*IEntrySize;
701368c31abSDavid du Colombier 	for(o=mb->boffset, p=q=buf; o<mb->woffset; o+=bufsize){
702368c31abSDavid du Colombier 		memmove(p, q, memsize);
703368c31abSDavid du Colombier 		p += memsize;
704368c31abSDavid du Colombier 		q += bufsize;
705368c31abSDavid du Colombier 	}
706368c31abSDavid du Colombier 	ep = buf + mb->nwentry*IEntrySize;
707368c31abSDavid du Colombier 	assert(ep <= buf+nbuf);
708368c31abSDavid du Colombier 
709368c31abSDavid du Colombier 	/*
710368c31abSDavid du Colombier 	 * sort entries
711368c31abSDavid du Colombier 	 */
712368c31abSDavid du Colombier 	qsort(buf, mb->nwentry, IEntrySize, ientrycmpaddr);
713368c31abSDavid du Colombier 
714368c31abSDavid du Colombier 	/*
715368c31abSDavid du Colombier 	 * write buckets out
716368c31abSDavid du Colombier 	 */
717368c31abSDavid du Colombier 	n = 0;
718368c31abSDavid du Colombier 	lastb = offset2bucket(is, mb->boffset);
719368c31abSDavid du Colombier 	for(p=buf; p<ep; p=q){
720368c31abSDavid du Colombier 		b = score2bucket(is, p);
721368c31abSDavid du Colombier 		for(q=p; q<ep && score2bucket(is, q)==b; q+=IEntrySize)
722368c31abSDavid du Colombier 			;
723368c31abSDavid du Colombier 		if(lastb+1 < b && zero)
724368c31abSDavid du Colombier 			zerorange(part, bucket2offset(is, lastb+1), bucket2offset(is, b));
725368c31abSDavid du Colombier 		if(IBucketSize+(q-p) > is->blocksize)
726368c31abSDavid du Colombier 			sysfatal("bucket overflow - make index bigger");
727368c31abSDavid du Colombier 		memmove(buckdata+IBucketSize, p, q-p);
728368c31abSDavid du Colombier 		ib.n = (q-p)/IEntrySize;
729368c31abSDavid du Colombier 		n += ib.n;
730368c31abSDavid du Colombier 		packibucket(&ib, buckdata, is->bucketmagic);
731368c31abSDavid du Colombier 		if(writepart(part, bucket2offset(is, b), buckdata, is->blocksize) < 0)
732368c31abSDavid du Colombier 			fprint(2, "write %s: %r\n", part->name);
733368c31abSDavid du Colombier 		lastb = b;
734368c31abSDavid du Colombier 	}
735368c31abSDavid du Colombier 	if(lastb+1 < is->stop-is->start && zero)
736368c31abSDavid du Colombier 		zerorange(part, bucket2offset(is, lastb+1), bucket2offset(is, is->stop - is->start));
737368c31abSDavid du Colombier 
738368c31abSDavid du Colombier 	if(n != mb->nwentry)
739368c31abSDavid du Colombier 		fprint(2, "sortminibuffer bug: n=%ud nwentry=%ud have=%ld\n", n, mb->nwentry, (ep-buf)/IEntrySize);
740368c31abSDavid du Colombier 
741368c31abSDavid du Colombier 	free(buckdata);
742368c31abSDavid du Colombier }
743368c31abSDavid du Colombier 
744368c31abSDavid du Colombier static void
isectproc(void * v)745368c31abSDavid du Colombier isectproc(void *v)
746368c31abSDavid du Colombier {
747368c31abSDavid du Colombier 	u32int buck, bufbuckets, bufsize, epbuf, i, j;
748368c31abSDavid du Colombier 	u32int mbufbuckets, n, nbucket, nn, space;
749368c31abSDavid du Colombier 	u32int nbuf, nminibuf, xminiclump, prod;
750368c31abSDavid du Colombier 	u64int blocksize, offset, xclump;
751368c31abSDavid du Colombier 	uchar *data, *p;
752368c31abSDavid du Colombier 	Buf *buf;
753368c31abSDavid du Colombier 	IEntry ie;
754368c31abSDavid du Colombier 	IPool *ipool;
755368c31abSDavid du Colombier 	ISect *is;
756368c31abSDavid du Colombier 	Minibuf *mbuf, *mb;
757368c31abSDavid du Colombier 
758368c31abSDavid du Colombier 	is = v;
759368c31abSDavid du Colombier 	blocksize = is->blocksize;
760368c31abSDavid du Colombier 	nbucket = is->stop - is->start;
761368c31abSDavid du Colombier 
762368c31abSDavid du Colombier 	/*
763368c31abSDavid du Colombier 	 * Three passes:
764368c31abSDavid du Colombier 	 *	pass 1 - write index entries from arenas into
765368c31abSDavid du Colombier 	 *		large sequential sections on index disk.
766368c31abSDavid du Colombier 	 *		requires nbuf * bufsize memory.
767368c31abSDavid du Colombier 	 *
768368c31abSDavid du Colombier 	 *	pass 2 - split each section into minibufs.
769368c31abSDavid du Colombier 	 *		requires nminibuf * bufsize memory.
770368c31abSDavid du Colombier 	 *
771368c31abSDavid du Colombier 	 *	pass 3 - read each minibuf into memory and
772368c31abSDavid du Colombier 	 *		write buckets out.
773368c31abSDavid du Colombier 	 *		requires entries/minibuf * IEntrySize memory.
774368c31abSDavid du Colombier 	 *
775368c31abSDavid du Colombier 	 * The larger we set bufsize the less seeking hurts us.
776368c31abSDavid du Colombier 	 *
777368c31abSDavid du Colombier 	 * The fewer sections and minibufs we have, the less
778368c31abSDavid du Colombier 	 * seeking hurts us.
779368c31abSDavid du Colombier 	 *
780368c31abSDavid du Colombier 	 * The fewer sections and minibufs we have, the
781368c31abSDavid du Colombier 	 * more entries we end up with in each minibuf
782368c31abSDavid du Colombier 	 * at the end.
783368c31abSDavid du Colombier 	 *
784368c31abSDavid du Colombier 	 * Shoot for using half our memory to hold each
785368c31abSDavid du Colombier 	 * minibuf.  The chance of a random distribution
786368c31abSDavid du Colombier 	 * getting off by 2x is quite low.
787368c31abSDavid du Colombier 	 *
788368c31abSDavid du Colombier 	 * Once that is decided, figure out the smallest
789368c31abSDavid du Colombier 	 * nminibuf and nsection/biggest bufsize we can use
790368c31abSDavid du Colombier 	 * and still fit in the memory constraints.
791368c31abSDavid du Colombier 	 */
792368c31abSDavid du Colombier 
793368c31abSDavid du Colombier 	/* expected number of clump index entries we'll see */
794368c31abSDavid du Colombier 	xclump = nbucket * (double)totalclumps/totalbuckets;
795368c31abSDavid du Colombier 
796368c31abSDavid du Colombier 	/* number of clumps we want to see in a minibuf */
797368c31abSDavid du Colombier 	xminiclump = isectmem/2/IEntrySize;
798368c31abSDavid du Colombier 
799368c31abSDavid du Colombier 	/* total number of minibufs we need */
800368c31abSDavid du Colombier 	prod = (xclump+xminiclump-1) / xminiclump;
801368c31abSDavid du Colombier 
802368c31abSDavid du Colombier 	/* if possible, skip second pass */
803368c31abSDavid du Colombier 	if(!dumb && prod*MinBufSize < isectmem){
804368c31abSDavid du Colombier 		nbuf = prod;
805368c31abSDavid du Colombier 		nminibuf = 1;
806368c31abSDavid du Colombier 	}else{
807368c31abSDavid du Colombier 		/* otherwise use nsection = sqrt(nmini) */
808368c31abSDavid du Colombier 		for(nbuf=1; nbuf*nbuf<prod; nbuf++)
809368c31abSDavid du Colombier 			;
810368c31abSDavid du Colombier 		if(nbuf*MinBufSize > isectmem)
811368c31abSDavid du Colombier 			sysfatal("not enough memory");
812368c31abSDavid du Colombier 		nminibuf = nbuf;
813368c31abSDavid du Colombier 	}
814cacb6655SDavid du Colombier 	if (nbuf == 0) {
815cacb6655SDavid du Colombier 		fprint(2, "%s: brand-new index, no work to do\n", argv0);
816*1206f3fcSDavid du Colombier 		threadexitsall(0);
817cacb6655SDavid du Colombier 	}
818cacb6655SDavid du Colombier 
819368c31abSDavid du Colombier 	/* size buffer to use extra memory */
820368c31abSDavid du Colombier 	bufsize = MinBufSize;
821368c31abSDavid du Colombier 	while(bufsize*2*nbuf <= isectmem && bufsize < MaxBufSize)
822368c31abSDavid du Colombier 		bufsize *= 2;
823368c31abSDavid du Colombier 	data = emalloc(nbuf*bufsize);
824368c31abSDavid du Colombier 	epbuf = bufsize/IEntrySize;
825368c31abSDavid du Colombier 	fprint(2, "%T %s: %,ud buckets, %,ud groups, %,ud minigroups, %,ud buffer\n",
826368c31abSDavid du Colombier 		is->part->name, nbucket, nbuf, nminibuf, bufsize);
827368c31abSDavid du Colombier 	/*
828368c31abSDavid du Colombier 	 * Accept index entries from arena procs.
829368c31abSDavid du Colombier 	 */
830368c31abSDavid du Colombier 	buf = MKNZ(Buf, nbuf);
831368c31abSDavid du Colombier 	p = data;
832368c31abSDavid du Colombier 	offset = is->blockbase;
833368c31abSDavid du Colombier 	bufbuckets = (nbucket+nbuf-1)/nbuf;
834368c31abSDavid du Colombier 	for(i=0; i<nbuf; i++){
835368c31abSDavid du Colombier 		buf[i].part = is->part;
836368c31abSDavid du Colombier 		buf[i].bp = p;
837368c31abSDavid du Colombier 		buf[i].wp = p;
838368c31abSDavid du Colombier 		p += bufsize;
839368c31abSDavid du Colombier 		buf[i].ep = p;
840368c31abSDavid du Colombier 		buf[i].boffset = offset;
841368c31abSDavid du Colombier 		buf[i].woffset = offset;
842368c31abSDavid du Colombier 		if(i < nbuf-1){
843368c31abSDavid du Colombier 			offset += bufbuckets*blocksize;
844368c31abSDavid du Colombier 			buf[i].eoffset = offset;
845368c31abSDavid du Colombier 		}else{
846368c31abSDavid du Colombier 			offset = is->blockbase + nbucket*blocksize;
847368c31abSDavid du Colombier 			buf[i].eoffset = offset;
848368c31abSDavid du Colombier 		}
849368c31abSDavid du Colombier 	}
850368c31abSDavid du Colombier 	assert(p == data+nbuf*bufsize);
851368c31abSDavid du Colombier 
852368c31abSDavid du Colombier 	n = 0;
853368c31abSDavid du Colombier 	while(recv(is->writechan, &ie) == 1){
854368c31abSDavid du Colombier 		if(ie.ia.addr == 0)
855368c31abSDavid du Colombier 			break;
856368c31abSDavid du Colombier 		buck = score2bucket(is, ie.score);
857368c31abSDavid du Colombier 		i = buck/bufbuckets;
858368c31abSDavid du Colombier 		assert(i < nbuf);
859368c31abSDavid du Colombier 		bwrite(&buf[i], &ie);
860368c31abSDavid du Colombier 		n++;
861368c31abSDavid du Colombier 	}
862368c31abSDavid du Colombier 	add(&indexentries, n);
863368c31abSDavid du Colombier 
864368c31abSDavid du Colombier 	nn = 0;
865368c31abSDavid du Colombier 	for(i=0; i<nbuf; i++){
866368c31abSDavid du Colombier 		bflush(&buf[i]);
867368c31abSDavid du Colombier 		buf[i].bp = nil;
868368c31abSDavid du Colombier 		buf[i].ep = nil;
869368c31abSDavid du Colombier 		buf[i].wp = nil;
870368c31abSDavid du Colombier 		nn += buf[i].nentry;
871368c31abSDavid du Colombier 	}
872368c31abSDavid du Colombier 	if(n != nn)
873368c31abSDavid du Colombier 		fprint(2, "isectproc bug: n=%ud nn=%ud\n", n, nn);
874368c31abSDavid du Colombier 
875368c31abSDavid du Colombier 	free(data);
876368c31abSDavid du Colombier 
877368c31abSDavid du Colombier 	fprint(2, "%T %s: reordering\n", is->part->name);
878368c31abSDavid du Colombier 
879368c31abSDavid du Colombier 	/*
880368c31abSDavid du Colombier 	 * Rearrange entries into minibuffers and then
881368c31abSDavid du Colombier 	 * split each minibuffer into buckets.
882368c31abSDavid du Colombier 	 * The minibuffer must be sized so that it is
883368c31abSDavid du Colombier 	 * a multiple of blocksize -- ipoolloadblock assumes
884368c31abSDavid du Colombier 	 * that each minibuf starts aligned on a blocksize
885368c31abSDavid du Colombier 	 * boundary.
886368c31abSDavid du Colombier 	 */
887368c31abSDavid du Colombier 	mbuf = MKN(Minibuf, nminibuf);
888368c31abSDavid du Colombier 	mbufbuckets = (bufbuckets+nminibuf-1)/nminibuf;
889368c31abSDavid du Colombier 	while(mbufbuckets*blocksize % bufsize)
890368c31abSDavid du Colombier 		mbufbuckets++;
891368c31abSDavid du Colombier 	for(i=0; i<nbuf; i++){
892368c31abSDavid du Colombier 		/*
893368c31abSDavid du Colombier 		 * Set up descriptors.
894368c31abSDavid du Colombier 		 */
895368c31abSDavid du Colombier 		n = buf[i].nentry;
896368c31abSDavid du Colombier 		nn = 0;
897368c31abSDavid du Colombier 		offset = buf[i].boffset;
898368c31abSDavid du Colombier 		memset(mbuf, 0, nminibuf*sizeof(mbuf[0]));
899368c31abSDavid du Colombier 		for(j=0; j<nminibuf; j++){
900368c31abSDavid du Colombier 			mb = &mbuf[j];
901368c31abSDavid du Colombier 			mb->boffset = offset;
902368c31abSDavid du Colombier 			offset += mbufbuckets*blocksize;
903368c31abSDavid du Colombier 			if(offset > buf[i].eoffset)
904368c31abSDavid du Colombier 				offset = buf[i].eoffset;
905368c31abSDavid du Colombier 			mb->eoffset = offset;
906368c31abSDavid du Colombier 			mb->roffset = mb->boffset;
907368c31abSDavid du Colombier 			mb->woffset = mb->boffset;
908368c31abSDavid du Colombier 			mb->nentry = epbuf * (mb->eoffset - mb->boffset)/bufsize;
909368c31abSDavid du Colombier 			if(mb->nentry > buf[i].nentry)
910368c31abSDavid du Colombier 				mb->nentry = buf[i].nentry;
911368c31abSDavid du Colombier 			buf[i].nentry -= mb->nentry;
912368c31abSDavid du Colombier 			nn += mb->nentry;
913368c31abSDavid du Colombier 		}
914368c31abSDavid du Colombier 		if(n != nn)
915368c31abSDavid du Colombier 			fprint(2, "isectproc bug2: n=%ud nn=%ud (i=%d)\n", n, nn, i);;
916368c31abSDavid du Colombier 		/*
917368c31abSDavid du Colombier 		 * Rearrange.
918368c31abSDavid du Colombier 		 */
919368c31abSDavid du Colombier 		if(!dumb && nminibuf == 1){
920368c31abSDavid du Colombier 			mbuf[0].nwentry = mbuf[0].nentry;
921368c31abSDavid du Colombier 			mbuf[0].woffset = buf[i].woffset;
922368c31abSDavid du Colombier 		}else{
923368c31abSDavid du Colombier 			ipool = mkipool(is, mbuf, nminibuf, mbufbuckets, bufsize);
924368c31abSDavid du Colombier 			ipool->buck0 = bufbuckets*i;
925368c31abSDavid du Colombier 			for(j=0; j<nminibuf; j++){
926368c31abSDavid du Colombier 				mb = &mbuf[j];
927368c31abSDavid du Colombier 				while(mb->nentry > 0){
928368c31abSDavid du Colombier 					if(ipool->nfree < epbuf){
929368c31abSDavid du Colombier 						ipoolflush1(ipool);
930368c31abSDavid du Colombier 						/* ipoolflush1 might change mb->nentry */
931368c31abSDavid du Colombier 						continue;
932368c31abSDavid du Colombier 					}
933368c31abSDavid du Colombier 					assert(ipool->nfree >= epbuf);
934368c31abSDavid du Colombier 					ipoolloadblock(ipool, mb);
935368c31abSDavid du Colombier 				}
936368c31abSDavid du Colombier 			}
937368c31abSDavid du Colombier 			ipoolflush(ipool);
938368c31abSDavid du Colombier 			nn = 0;
939368c31abSDavid du Colombier 			for(j=0; j<nminibuf; j++)
940368c31abSDavid du Colombier 				nn += mbuf[j].nwentry;
941368c31abSDavid du Colombier 			if(n != nn)
942368c31abSDavid du Colombier 				fprint(2, "isectproc bug3: n=%ud nn=%ud (i=%d)\n", n, nn, i);
943368c31abSDavid du Colombier 			free(ipool);
944368c31abSDavid du Colombier 		}
945368c31abSDavid du Colombier 
946368c31abSDavid du Colombier 		/*
947368c31abSDavid du Colombier 		 * Make buckets.
948368c31abSDavid du Colombier 		 */
949368c31abSDavid du Colombier 		space = 0;
950368c31abSDavid du Colombier 		for(j=0; j<nminibuf; j++)
951368c31abSDavid du Colombier 			if(space < mbuf[j].woffset - mbuf[j].boffset)
952368c31abSDavid du Colombier 				space = mbuf[j].woffset - mbuf[j].boffset;
953368c31abSDavid du Colombier 
954368c31abSDavid du Colombier 		data = emalloc(space);
955368c31abSDavid du Colombier 		for(j=0; j<nminibuf; j++){
956368c31abSDavid du Colombier 			mb = &mbuf[j];
957368c31abSDavid du Colombier 			sortminibuffer(is, mb, data, space, bufsize);
958368c31abSDavid du Colombier 		}
959368c31abSDavid du Colombier 		free(data);
960368c31abSDavid du Colombier 	}
961368c31abSDavid du Colombier 
962368c31abSDavid du Colombier 	sendp(isectdonechan, is);
963368c31abSDavid du Colombier }
964368c31abSDavid du Colombier 
965368c31abSDavid du Colombier 
966368c31abSDavid du Colombier 
967