xref: /plan9/sys/src/cmd/venti/srv/index.c (revision 92b836f472dc5c766dd22dd5565eac9f3e49077c)
1 /*
2  * Index, mapping scores to log positions.
3  *
4  * The index is made up of some number of index sections, each of
5  * which is typically stored on a different disk.  The blocks in all the
6  * index sections are logically numbered, with each index section
7  * responsible for a range of blocks.  Blocks are typically 8kB.
8  *
9  * The N index blocks are treated as a giant hash table.  The top 32 bits
10  * of score are used as the key for a lookup.  Each index block holds
11  * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
12  *
13  * The index is sized so that a particular bucket is extraordinarily
14  * unlikely to overflow: assuming compressed data blocks are 4kB
15  * on disk, and assuming each block has a 40 byte index entry,
16  * the index data will be 1% of the total data.  Since scores are essentially
17  * random, all buckets should be about the same fullness.
18  * A factor of 5 gives us a wide comfort boundary to account for
19  * random variation.  So the index disk space should be 5% of the arena disk space.
20  */
21 
22 #include "stdinc.h"
23 #include "dat.h"
24 #include "fns.h"
25 
26 static int	initindex1(Index*);
27 static ISect	*initisect1(ISect *is);
28 
29 #define KEY(k,d)	((d) ? (k)>>(32-(d)) : 0)
30 
31 static char IndexMagic[] = "venti index configuration";
32 
33 Index*
initindex(char * name,ISect ** sects,int n)34 initindex(char *name, ISect **sects, int n)
35 {
36 	IFile f;
37 	Index *ix;
38 	ISect *is;
39 	u32int last, blocksize, tabsize;
40 	int i;
41 
42 	if(n <= 0){
43 fprint(2, "bad n\n");
44 		seterr(EOk, "no index sections to initialize index");
45 		return nil;
46 	}
47 	ix = MKZ(Index);
48 	if(ix == nil){
49 fprint(2, "no mem\n");
50 		seterr(EOk, "can't initialize index: out of memory");
51 		freeindex(ix);
52 		return nil;
53 	}
54 
55 	tabsize = sects[0]->tabsize;
56 	if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
57 		return nil;
58 	if(parseindex(&f, ix) < 0){
59 		freeifile(&f);
60 		freeindex(ix);
61 		return nil;
62 	}
63 	freeifile(&f);
64 	if(namecmp(ix->name, name) != 0){
65 		seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
66 		return nil;
67 	}
68 	if(ix->nsects != n){
69 		seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
70 		freeindex(ix);
71 		return nil;
72 	}
73 	ix->sects = sects;
74 	last = 0;
75 	blocksize = ix->blocksize;
76 	for(i = 0; i < ix->nsects; i++){
77 		is = sects[i];
78 		if(namecmp(ix->name, is->index) != 0
79 		|| is->blocksize != blocksize
80 		|| is->tabsize != tabsize
81 		|| namecmp(is->name, ix->smap[i].name) != 0
82 		|| is->start != ix->smap[i].start
83 		|| is->stop != ix->smap[i].stop
84 		|| last != is->start
85 		|| is->start > is->stop){
86 			seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
87 			freeindex(ix);
88 			return nil;
89 		}
90 		last = is->stop;
91 	}
92 	ix->tabsize = tabsize;
93 	ix->buckets = last;
94 
95 	if(initindex1(ix) < 0){
96 		freeindex(ix);
97 		return nil;
98 	}
99 
100 	ix->arenas = MKNZ(Arena*, ix->narenas);
101 	if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
102 		freeindex(ix);
103 		return nil;
104 	}
105 
106 	return ix;
107 }
108 
109 static int
initindex1(Index * ix)110 initindex1(Index *ix)
111 {
112 	u32int buckets;
113 
114 	ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
115 	buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
116 	if(buckets != ix->buckets){
117 		seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
118 		return -1;
119 	}
120 
121 	return 0;
122 }
123 
124 int
wbindex(Index * ix)125 wbindex(Index *ix)
126 {
127 	Fmt f;
128 	ZBlock *b;
129 	int i;
130 
131 	if(ix->nsects == 0){
132 		seterr(EOk, "no sections in index %s", ix->name);
133 		return -1;
134 	}
135 	b = alloczblock(ix->tabsize, 1, ix->blocksize);
136 	if(b == nil){
137 		seterr(EOk, "can't write index configuration: out of memory");
138 		return -1;
139 	}
140 	fmtzbinit(&f, b);
141 	if(outputindex(&f, ix) < 0){
142 		seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
143 		freezblock(b);
144 		return -1;
145 	}
146 	for(i = 0; i < ix->nsects; i++){
147 		if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0
148 		|| flushpart(ix->sects[i]->part) < 0){
149 			seterr(EOk, "can't write index: %r");
150 			freezblock(b);
151 			return -1;
152 		}
153 	}
154 	freezblock(b);
155 
156 	for(i = 0; i < ix->nsects; i++)
157 		if(wbisect(ix->sects[i]) < 0)
158 			return -1;
159 
160 	return 0;
161 }
162 
163 /*
164  * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
165  * version, blocksize: u32int
166  * name: max. ANameSize string
167  * sections, arenas: AMap
168  */
169 int
outputindex(Fmt * f,Index * ix)170 outputindex(Fmt *f, Index *ix)
171 {
172 	if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
173 	|| outputamap(f, ix->smap, ix->nsects) < 0
174 	|| outputamap(f, ix->amap, ix->narenas) < 0)
175 		return -1;
176 	return 0;
177 }
178 
179 int
parseindex(IFile * f,Index * ix)180 parseindex(IFile *f, Index *ix)
181 {
182 	AMapN amn;
183 	u32int v;
184 	char *s;
185 
186 	/*
187 	 * magic
188 	 */
189 	s = ifileline(f);
190 	if(s == nil || strcmp(s, IndexMagic) != 0){
191 		seterr(ECorrupt, "bad index magic for %s", f->name);
192 		return -1;
193 	}
194 
195 	/*
196 	 * version
197 	 */
198 	if(ifileu32int(f, &v) < 0){
199 		seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
200 		return -1;
201 	}
202 	ix->version = v;
203 	if(ix->version != IndexVersion){
204 		seterr(ECorrupt, "bad version number in %s", f->name);
205 		return -1;
206 	}
207 
208 	/*
209 	 * name
210 	 */
211 	if(ifilename(f, ix->name) < 0){
212 		seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
213 		return -1;
214 	}
215 
216 	/*
217 	 * block size
218 	 */
219 	if(ifileu32int(f, &v) < 0){
220 		seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
221 		return -1;
222 	}
223 	ix->blocksize = v;
224 
225 	if(parseamap(f, &amn) < 0)
226 		return -1;
227 	ix->nsects = amn.n;
228 	ix->smap = amn.map;
229 
230 	if(parseamap(f, &amn) < 0)
231 		return -1;
232 	ix->narenas = amn.n;
233 	ix->amap = amn.map;
234 
235 	return 0;
236 }
237 
238 /*
239  * initialize an entirely new index
240  */
241 Index *
newindex(char * name,ISect ** sects,int n)242 newindex(char *name, ISect **sects, int n)
243 {
244 	Index *ix;
245 	AMap *smap;
246 	u64int nb;
247 	u32int div, ub, xb, start, stop, blocksize, tabsize;
248 	int i, j;
249 
250 	if(n < 1){
251 		seterr(EOk, "creating index with no index sections");
252 		return nil;
253 	}
254 
255 	/*
256 	 * compute the total buckets available in the index,
257 	 * and the total buckets which are used.
258 	 */
259 	nb = 0;
260 	blocksize = sects[0]->blocksize;
261 	tabsize = sects[0]->tabsize;
262 	for(i = 0; i < n; i++){
263 		/*
264 		 * allow index, start, and stop to be set if index is correct
265 		 * and start and stop are what we would have picked.
266 		 * this allows calling fmtindex to reformat the index after
267 		 * replacing a bad index section with a freshly formatted one.
268 		 * start and stop are checked below.
269 		 */
270 		if(sects[i]->index[0] != '\0' && strcmp(sects[i]->index, name) != 0){
271 			seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
272 			return nil;
273 		}
274 		if(blocksize != sects[i]->blocksize){
275 			seterr(EOk, "mismatched block sizes in index sections");
276 			return nil;
277 		}
278 		if(tabsize != sects[i]->tabsize){
279 			seterr(EOk, "mismatched config table sizes in index sections");
280 			return nil;
281 		}
282 		nb += sects[i]->blocks;
283 	}
284 
285 	/*
286 	 * check for duplicate names
287 	 */
288 	for(i = 0; i < n; i++){
289 		for(j = i + 1; j < n; j++){
290 			if(namecmp(sects[i]->name, sects[j]->name) == 0){
291 				seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
292 				return nil;
293 			}
294 		}
295 	}
296 
297 	if(nb >= ((u64int)1 << 32)){
298 		fprint(2, "%s: index is 2^32 blocks or more; ignoring some of it\n",
299 			argv0);
300 		nb = ((u64int)1 << 32) - 1;
301 	}
302 
303 	div = (((u64int)1 << 32) + nb - 1) / nb;
304 	if(div < 100){
305 		fprint(2, "%s: index divisor %d too coarse; "
306 			"index larger than needed, ignoring some of it\n",
307 			argv0, div);
308 		div = 100;
309 		nb = (((u64int)1 << 32) - 1) / (100 - 1);
310 	}
311 	ub = (((u64int)1 << 32) - 1) / div + 1;
312 	if(ub > nb){
313 		seterr(EBug, "index initialization math wrong");
314 		return nil;
315 	}
316 	xb = nb - ub;
317 
318 	/*
319 	 * initialize each of the index sections
320 	 * and the section map table
321 	 */
322 	smap = MKNZ(AMap, n);
323 	if(smap == nil){
324 		seterr(EOk, "can't create new index: out of memory");
325 		return nil;
326 	}
327 	start = 0;
328 	for(i = 0; i < n; i++){
329 		stop = start + sects[i]->blocks - xb / n;
330 		if(i == n - 1)
331 			stop = ub;
332 
333 		if(sects[i]->start != 0 || sects[i]->stop != 0)
334 		if(sects[i]->start != start || sects[i]->stop != stop){
335 			seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
336 			return nil;
337 		}
338 
339 		sects[i]->start = start;
340 		sects[i]->stop = stop;
341 		namecp(sects[i]->index, name);
342 
343 		smap[i].start = start;
344 		smap[i].stop = stop;
345 		namecp(smap[i].name, sects[i]->name);
346 		start = stop;
347 	}
348 
349 	/*
350 	 * initialize the index itself
351 	 */
352 	ix = MKZ(Index);
353 	if(ix == nil){
354 		seterr(EOk, "can't create new index: out of memory");
355 		free(smap);
356 		return nil;
357 	}
358 	ix->version = IndexVersion;
359 	namecp(ix->name, name);
360 	ix->sects = sects;
361 	ix->smap = smap;
362 	ix->nsects = n;
363 	ix->blocksize = blocksize;
364 	ix->buckets = ub;
365 	ix->tabsize = tabsize;
366 	ix->div = div;
367 
368 	if(initindex1(ix) < 0){
369 		free(smap);
370 		return nil;
371 	}
372 
373 	return ix;
374 }
375 
376 ISect*
initisect(Part * part)377 initisect(Part *part)
378 {
379 	ISect *is;
380 	ZBlock *b;
381 	int ok;
382 
383 	b = alloczblock(HeadSize, 0, 0);
384 	if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
385 		seterr(EAdmin, "can't read index section header: %r");
386 		return nil;
387 	}
388 
389 	is = MKZ(ISect);
390 	if(is == nil){
391 		freezblock(b);
392 		return nil;
393 	}
394 	is->part = part;
395 	ok = unpackisect(is, b->data);
396 	freezblock(b);
397 	if(ok < 0){
398 		seterr(ECorrupt, "corrupted index section header: %r");
399 		freeisect(is);
400 		return nil;
401 	}
402 
403 	if(is->version != ISectVersion1 && is->version != ISectVersion2){
404 		seterr(EAdmin, "unknown index section version %d", is->version);
405 		freeisect(is);
406 		return nil;
407 	}
408 
409 	return initisect1(is);
410 }
411 
412 ISect*
newisect(Part * part,u32int vers,char * name,u32int blocksize,u32int tabsize)413 newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize)
414 {
415 	ISect *is;
416 	u32int tabbase;
417 
418 	is = MKZ(ISect);
419 	if(is == nil)
420 		return nil;
421 
422 	namecp(is->name, name);
423 	is->version = vers;
424 	is->part = part;
425 	is->blocksize = blocksize;
426 	is->start = 0;
427 	is->stop = 0;
428 	tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
429 	is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
430 	is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
431 	is->bucketmagic = 0;
432 	if(is->version == ISectVersion2){
433 		do{
434 			is->bucketmagic = fastrand();
435 		}while(is->bucketmagic==0);
436 	}
437 	is = initisect1(is);
438 	if(is == nil)
439 		return nil;
440 
441 	return is;
442 }
443 
444 /*
445  * initialize the computed parameters for an index
446  */
447 static ISect*
initisect1(ISect * is)448 initisect1(ISect *is)
449 {
450 	u64int v;
451 
452 	is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
453 	is->blocklog = u64log2(is->blocksize);
454 	if(is->blocksize != (1 << is->blocklog)){
455 		seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
456 		freeisect(is);
457 		return nil;
458 	}
459 	partblocksize(is->part, is->blocksize);
460 	is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
461 	if(is->tabbase >= is->blockbase){
462 		seterr(ECorrupt, "index section config table overlaps bucket storage");
463 		freeisect(is);
464 		return nil;
465 	}
466 	is->tabsize = is->blockbase - is->tabbase;
467 	v = is->part->size & ~(u64int)(is->blocksize - 1);
468 	if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
469 		seterr(ECorrupt, "invalid blocks in index section %s", is->name);
470 		/* ZZZ what to do?
471 		freeisect(is);
472 		return nil;
473 		*/
474 	}
475 
476 	if(is->stop - is->start > is->blocks){
477 		seterr(ECorrupt, "index section overflows available space");
478 		freeisect(is);
479 		return nil;
480 	}
481 	if(is->start > is->stop){
482 		seterr(ECorrupt, "invalid index section range");
483 		freeisect(is);
484 		return nil;
485 	}
486 
487 	return is;
488 }
489 
490 int
wbisect(ISect * is)491 wbisect(ISect *is)
492 {
493 	ZBlock *b;
494 
495 	b = alloczblock(HeadSize, 1, 0);
496 	if(b == nil){
497 		/* ZZZ set error? */
498 		return -1;
499 	}
500 
501 	if(packisect(is, b->data) < 0){
502 		seterr(ECorrupt, "can't make index section header: %r");
503 		freezblock(b);
504 		return -1;
505 	}
506 	if(writepart(is->part, PartBlank, b->data, HeadSize) < 0 || flushpart(is->part) < 0){
507 		seterr(EAdmin, "can't write index section header: %r");
508 		freezblock(b);
509 		return -1;
510 	}
511 	freezblock(b);
512 
513 	return 0;
514 }
515 
516 void
freeisect(ISect * is)517 freeisect(ISect *is)
518 {
519 	if(is == nil)
520 		return;
521 	free(is);
522 }
523 
524 void
freeindex(Index * ix)525 freeindex(Index *ix)
526 {
527 	int i;
528 
529 	if(ix == nil)
530 		return;
531 	free(ix->amap);
532 	free(ix->arenas);
533 	if(ix->sects)
534 		for(i = 0; i < ix->nsects; i++)
535 			freeisect(ix->sects[i]);
536 	free(ix->sects);
537 	free(ix->smap);
538 	free(ix);
539 }
540 
541 /*
542  * write a clump to an available arena in the index
543  * and return the address of the clump within the index.
544 ZZZ question: should this distinguish between an arena
545 filling up and real errors writing the clump?
546  */
547 u64int
writeiclump(Index * ix,Clump * c,u8int * clbuf)548 writeiclump(Index *ix, Clump *c, u8int *clbuf)
549 {
550 	u64int a;
551 	int i;
552 	IAddr ia;
553 	AState as;
554 
555 	trace(TraceLump, "writeiclump enter");
556 	qlock(&ix->writing);
557 	for(i = ix->mapalloc; i < ix->narenas; i++){
558 		a = writeaclump(ix->arenas[i], c, clbuf);
559 		if(a != TWID64){
560 			ix->mapalloc = i;
561 			ia.addr = ix->amap[i].start + a;
562 			ia.type = c->info.type;
563 			ia.size = c->info.uncsize;
564 			ia.blocks = (c->info.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
565 			as.arena = ix->arenas[i];
566 			as.aa = ia.addr;
567 			as.stats = as.arena->memstats;
568 			insertscore(c->info.score, &ia, IEDirty, &as);
569 			qunlock(&ix->writing);
570 			trace(TraceLump, "writeiclump exit");
571 			return ia.addr;
572 		}
573 	}
574 	qunlock(&ix->writing);
575 
576 	seterr(EAdmin, "no space left in arenas");
577 	trace(TraceLump, "writeiclump failed");
578 	return TWID64;
579 }
580 
581 /*
582  * convert an arena index to an relative arena address
583  */
584 Arena*
amapitoa(Index * ix,u64int a,u64int * aa)585 amapitoa(Index *ix, u64int a, u64int *aa)
586 {
587 	int i, r, l, m;
588 
589 	l = 1;
590 	r = ix->narenas - 1;
591 	while(l <= r){
592 		m = (r + l) / 2;
593 		if(ix->amap[m].start <= a)
594 			l = m + 1;
595 		else
596 			r = m - 1;
597 	}
598 	l--;
599 
600 	if(a > ix->amap[l].stop){
601 for(i=0; i<ix->narenas; i++)
602 	print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
603 print("want arena %d for %llux\n", l, a);
604 		seterr(ECrash, "unmapped address passed to amapitoa");
605 		return nil;
606 	}
607 
608 	if(ix->arenas[l] == nil){
609 		seterr(ECrash, "unmapped arena selected in amapitoa");
610 		return nil;
611 	}
612 	*aa = a - ix->amap[l].start;
613 	return ix->arenas[l];
614 }
615 
616 /*
617  * convert an arena index to the bounds of the containing arena group.
618  */
619 Arena*
amapitoag(Index * ix,u64int a,u64int * gstart,u64int * glimit,int * g)620 amapitoag(Index *ix, u64int a, u64int *gstart, u64int *glimit, int *g)
621 {
622 	u64int aa;
623 	Arena *arena;
624 
625 	arena = amapitoa(ix, a, &aa);
626 	if(arena == nil)
627 		return nil;
628 	if(arenatog(arena, aa, gstart, glimit, g) < 0)
629 		return nil;
630 	*gstart += a - aa;
631 	*glimit += a - aa;
632 	return arena;
633 }
634 
635 int
iaddrcmp(IAddr * ia1,IAddr * ia2)636 iaddrcmp(IAddr *ia1, IAddr *ia2)
637 {
638 	return ia1->type != ia2->type
639 		|| ia1->size != ia2->size
640 		|| ia1->blocks != ia2->blocks
641 		|| ia1->addr != ia2->addr;
642 }
643 
644 /*
645  * lookup the score in the partition
646  *
647  * nothing needs to be explicitly locked:
648  * only static parts of ix are used, and
649  * the bucket is locked by the DBlock lock.
650  */
651 int
loadientry(Index * ix,u8int * score,int type,IEntry * ie)652 loadientry(Index *ix, u8int *score, int type, IEntry *ie)
653 {
654 	ISect *is;
655 	DBlock *b;
656 	IBucket ib;
657 	u32int buck;
658 	int h, ok;
659 
660 	ok = -1;
661 
662 	trace(TraceLump, "loadientry enter");
663 
664 	/*
665 	qlock(&stats.lock);
666 	stats.indexreads++;
667 	qunlock(&stats.lock);
668 	*/
669 
670 	if(!inbloomfilter(mainindex->bloom, score)){
671 		trace(TraceLump, "loadientry bloomhit");
672 		return -1;
673 	}
674 
675 	trace(TraceLump, "loadientry loadibucket");
676 	b = loadibucket(ix, score, &is, &buck, &ib);
677 	trace(TraceLump, "loadientry loadedibucket");
678 	if(b == nil)
679 		return -1;
680 
681 	if(okibucket(&ib, is) < 0){
682 		trace(TraceLump, "loadientry badbucket");
683 		goto out;
684 	}
685 
686 	h = bucklook(score, type, ib.data, ib.n);
687 	if(h & 1){
688 		h ^= 1;
689 		trace(TraceLump, "loadientry found");
690 		unpackientry(ie, &ib.data[h]);
691 		ok = 0;
692 		goto out;
693 	}
694 	trace(TraceLump, "loadientry notfound");
695 	addstat(StatBloomFalseMiss, 1);
696 out:
697 	putdblock(b);
698 	trace(TraceLump, "loadientry exit");
699 	return ok;
700 }
701 
702 int
okibucket(IBucket * ib,ISect * is)703 okibucket(IBucket *ib, ISect *is)
704 {
705 	if(ib->n <= is->buckmax)
706 		return 0;
707 
708 	seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
709 		ib->n, is->buckmax, is->start, is->stop);
710 	return -1;
711 }
712 
713 /*
714  * look for score within data;
715  * return 1 | byte index of matching index,
716  * or 0 | index of least element > score
717  */
718 int
bucklook(u8int * score,int otype,u8int * data,int n)719 bucklook(u8int *score, int otype, u8int *data, int n)
720 {
721 	int i, r, l, m, h, c, cc, type;
722 
723 	if(otype == -1)
724 		type = -1;
725 	else
726 		type = vttodisktype(otype);
727 	l = 0;
728 	r = n - 1;
729 	while(l <= r){
730 		m = (r + l) >> 1;
731 		h = m * IEntrySize;
732 		for(i = 0; i < VtScoreSize; i++){
733 			c = score[i];
734 			cc = data[h + i];
735 			if(c != cc){
736 				if(c > cc)
737 					l = m + 1;
738 				else
739 					r = m - 1;
740 				goto cont;
741 			}
742 		}
743 		cc = data[h + IEntryTypeOff];
744 		if(type != cc && type != -1){
745 			if(type > cc)
746 				l = m + 1;
747 			else
748 				r = m - 1;
749 			goto cont;
750 		}
751 		return h | 1;
752 	cont:;
753 	}
754 
755 	return l * IEntrySize;
756 }
757 
758 /*
759  * compare two IEntries; consistent with bucklook
760  */
761 int
ientrycmp(const void * vie1,const void * vie2)762 ientrycmp(const void *vie1, const void *vie2)
763 {
764 	u8int *ie1, *ie2;
765 	int i, v1, v2;
766 
767 	ie1 = (u8int*)vie1;
768 	ie2 = (u8int*)vie2;
769 	for(i = 0; i < VtScoreSize; i++){
770 		v1 = ie1[i];
771 		v2 = ie2[i];
772 		if(v1 != v2){
773 			if(v1 < v2)
774 				return -1;
775 			return 1;
776 		}
777 	}
778 	v1 = ie1[IEntryTypeOff];
779 	v2 = ie2[IEntryTypeOff];
780 	if(v1 != v2){
781 		if(v1 < v2)
782 			return -1;
783 		return 1;
784 	}
785 	return 0;
786 }
787 
788 /*
789  * find the number of the index section holding bucket #buck
790  */
791 int
indexsect0(Index * ix,u32int buck)792 indexsect0(Index *ix, u32int buck)
793 {
794 	int r, l, m;
795 
796 	l = 1;
797 	r = ix->nsects - 1;
798 	while(l <= r){
799 		m = (r + l) >> 1;
800 		if(ix->sects[m]->start <= buck)
801 			l = m + 1;
802 		else
803 			r = m - 1;
804 	}
805 	return l - 1;
806 }
807 
808 /*
809  * load the index block at bucket #buck
810  */
811 static DBlock*
loadibucket0(Index * ix,u32int buck,ISect ** pis,u32int * pbuck,IBucket * ib,int mode)812 loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int mode)
813 {
814 	ISect *is;
815 	DBlock *b;
816 
817 	is = ix->sects[indexsect0(ix, buck)];
818 	if(buck < is->start || is->stop <= buck){
819 		seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
820 		return nil;
821 	}
822 
823 	buck -= is->start;
824 	if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), mode)) == nil)
825 		return nil;
826 
827 	if(pis)
828 		*pis = is;
829 	if(pbuck)
830 		*pbuck = buck;
831 	if(ib)
832 		unpackibucket(ib, b->data, is->bucketmagic);
833 	return b;
834 }
835 
836 /*
837  * find the number of the index section holding score
838  */
839 int
indexsect1(Index * ix,u8int * score)840 indexsect1(Index *ix, u8int *score)
841 {
842 	return indexsect0(ix, hashbits(score, 32) / ix->div);
843 }
844 
845 /*
846  * load the index block responsible for score.
847  */
848 static DBlock*
loadibucket1(Index * ix,u8int * score,ISect ** pis,u32int * pbuck,IBucket * ib)849 loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
850 {
851 	return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
852 }
853 
854 int
indexsect(Index * ix,u8int * score)855 indexsect(Index *ix, u8int *score)
856 {
857 	return indexsect1(ix, score);
858 }
859 
860 DBlock*
loadibucket(Index * ix,u8int * score,ISect ** pis,u32int * pbuck,IBucket * ib)861 loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
862 {
863 	return loadibucket1(ix, score, pis, pbuck, ib);
864 }
865 
866 
867