xref: /plan9-contrib/sys/src/cmd/fossil/source.c (revision b249590635b298a2b629c4458908d752eced8f6f)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 #include "9.h"
6 
7 static int	sizeToDepth(uvlong s, int psize, int dsize);
8 static u32int 	tagGen(void);
9 static Block 	*sourceLoad(Source *r, Entry *e);
10 static Block	*sourceLoadUnlocked(Source *r, Entry *e);
11 static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
12 static int	sourceShrinkSize(Source*, Entry*, uvlong);
13 static int	sourceGrowDepth(Source*, Block*, Entry*, int);
14 
15 #define sourceIsLocked(r)	((r)->b != nil)
16 
17 static Source *
18 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode, int issnapshot)
19 {
20 	int epb;
21 	u32int epoch;
22 	char *pname = nil;
23 	Source *r;
24 	Entry e;
25 
26 	assert(p==nil || sourceIsLocked(p));
27 
28 	if(p == nil){
29 		assert(offset == 0);
30 		epb = 1;
31 	}else
32 		epb = p->dsize / VtEntrySize;
33 
34 	if(b->l.type != BtDir)
35 		goto Bad;
36 
37 	/*
38 	 * a non-active entry is the only thing that
39 	 * can legitimately happen here. all the others
40 	 * get prints.
41 	 */
42 	if(!entryUnpack(&e, b->data, offset % epb)){
43 		pname = sourceName(p);
44 		consPrint("%s: %s %V: sourceAlloc: entryUnpack failed\n",
45 			fs->name, pname, b->score);
46 		goto Bad;
47 	}
48 	if(!(e.flags & VtEntryActive)){
49 		pname = sourceName(p);
50 		if(0) consPrint("%s: %s %V: sourceAlloc: not active\n",
51 			fs->name, pname, e.score);
52 		goto Bad;
53 	}
54 	if(e.psize < 256 || e.dsize < 256){
55 		pname = sourceName(p);
56 		consPrint("%s: %s %V: sourceAlloc: psize %ud or dsize %ud < 256\n",
57 			fs->name, pname, e.score, e.psize, e.dsize);
58 		goto Bad;
59 	}
60 
61 	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
62 		pname = sourceName(p);
63 		consPrint("%s: %s %V: sourceAlloc: depth %ud size %llud "
64 			"psize %ud dsize %ud\n", fs->name, pname,
65 			e.score, e.depth, e.size, e.psize, e.dsize);
66 		goto Bad;
67 	}
68 
69 	if((e.flags & VtEntryLocal) && e.tag == 0){
70 		pname = sourceName(p);
71 		consPrint("%s: %s %V: sourceAlloc: flags %#ux tag %#ux\n",
72 			fs->name, pname, e.score, e.flags, e.tag);
73 		goto Bad;
74 	}
75 
76 	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
77 		pname = sourceName(p);
78 		consPrint("%s: %s %V: sourceAlloc: psize %ud or dsize %ud "
79 			"> blocksize %ud\n", fs->name, pname, e.score,
80 			e.psize, e.dsize, fs->blockSize);
81 		goto Bad;
82 	}
83 
84 	epoch = b->l.epoch;
85 	if(mode == OReadWrite){
86 		if(e.snap != 0){
87 			vtSetError(ESnapRO);
88 			return nil;
89 		}
90 	}else if(e.snap != 0){
91 		if(e.snap < fs->elo){
92 			vtSetError(ESnapOld);
93 			return nil;
94 		}
95 		if(e.snap >= fs->ehi)
96 			goto Bad;
97 		epoch = e.snap;
98 	}
99 
100 	r = vtMemAllocZ(sizeof(Source));
101 	r->fs = fs;
102 	r->mode = mode;
103 	r->issnapshot = issnapshot;
104 	r->dsize = e.dsize;
105 	r->gen = e.gen;
106 	r->dir = (e.flags & VtEntryDir) != 0;
107 	r->lk = vtLockAlloc();
108 	r->ref = 1;
109 	r->parent = p;
110 	if(p){
111 		vtLock(p->lk);
112 		assert(mode == OReadOnly || p->mode == OReadWrite);
113 		p->ref++;
114 		vtUnlock(p->lk);
115 	}
116 	r->epoch = epoch;
117 //	consPrint("sourceAlloc: have %V be.%d fse.%d %s\n", b->score,
118 //		b->l.epoch, r->fs->ehi, mode == OReadWrite? "rw": "ro");
119 	memmove(r->score, b->score, VtScoreSize);
120 	r->scoreEpoch = b->l.epoch;
121 	r->offset = offset;
122 	r->epb = epb;
123 	r->tag = b->l.tag;
124 
125 //	consPrint("%s: sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
126 
127 	return r;
128 Bad:
129 	free(pname);
130 	vtSetError(EBadEntry);
131 	return nil;
132 }
133 
134 Source *
135 sourceRoot(Fs *fs, u32int addr, int mode)
136 {
137 	Source *r;
138 	Block *b;
139 
140 	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
141 	if(b == nil)
142 		return nil;
143 
144 	if(mode == OReadWrite && b->l.epoch != fs->ehi){
145 		consPrint("sourceRoot: fs->ehi = %ud, b->l = %L\n",
146 			fs->ehi, &b->l);
147 		blockPut(b);
148 		vtSetError(EBadRoot);
149 		return nil;
150 	}
151 
152 	r = sourceAlloc(fs, b, nil, 0, mode, 0);
153 	blockPut(b);
154 	return r;
155 }
156 
157 Source *
158 sourceOpen(Source *r, ulong offset, int mode, int issnapshot)
159 {
160 	ulong bn;
161 	Block *b;
162 
163 	assert(sourceIsLocked(r));
164 	if(r->mode == OReadWrite)
165 		assert(r->epoch == r->b->l.epoch);
166 	if(!r->dir){
167 		vtSetError(ENotDir);
168 		return nil;
169 	}
170 
171 	bn = offset/(r->dsize/VtEntrySize);
172 
173 	b = sourceBlock(r, bn, mode);
174 	if(b == nil)
175 		return nil;
176 	r = sourceAlloc(r->fs, b, r, offset, mode, issnapshot);
177 	blockPut(b);
178 	return r;
179 }
180 
181 Source *
182 sourceCreate(Source *r, int dsize, int dir, u32int offset)
183 {
184 	int i, epb, psize;
185 	u32int bn, size;
186 	Block *b;
187 	Entry e;
188 	Source *rr;
189 
190 	assert(sourceIsLocked(r));
191 
192 	if(!r->dir){
193 		vtSetError(ENotDir);
194 		return nil;
195 	}
196 
197 	epb = r->dsize/VtEntrySize;
198 	psize = (dsize/VtScoreSize)*VtScoreSize;
199 
200 	size = sourceGetDirSize(r);
201 	if(offset == 0){
202 		/*
203 		 * look at a random block to see if we can find an empty entry
204 		 */
205 		offset = lnrand(size+1);
206 		offset -= offset % epb;
207 	}
208 
209 	/* try the given block and then try the last block */
210 	for(;;){
211 		bn = offset/epb;
212 		b = sourceBlock(r, bn, OReadWrite);
213 		if(b == nil)
214 			return nil;
215 		for(i=offset%r->epb; i<epb; i++){
216 			entryUnpack(&e, b->data, i);
217 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
218 				goto Found;
219 		}
220 		blockPut(b);
221 		if(offset == size){
222 			fprint(2, "sourceCreate: cannot happen\n");
223 			vtSetError("sourceCreate: cannot happen");
224 			return nil;
225 		}
226 		offset = size;
227 	}
228 
229 Found:
230 	/* found an entry - gen already set */
231 	e.psize = psize;
232 	e.dsize = dsize;
233 	assert(psize && dsize);
234 	e.flags = VtEntryActive;
235 	if(dir)
236 		e.flags |= VtEntryDir;
237 	e.depth = 0;
238 	e.size = 0;
239 	memmove(e.score, vtZeroScore, VtScoreSize);
240 	e.tag = 0;
241 	e.snap = 0;
242 	e.archive = 0;
243 	entryPack(&e, b->data, i);
244 	blockDirty(b);
245 
246 	offset = bn*epb + i;
247 	if(offset+1 > size){
248 		if(!sourceSetDirSize(r, offset+1)){
249 			blockPut(b);
250 			return nil;
251 		}
252 	}
253 
254 	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite, 0);
255 	blockPut(b);
256 	return rr;
257 }
258 
259 static int
260 sourceKill(Source *r, int doremove)
261 {
262 	Entry e;
263 	Block *b;
264 	u32int addr;
265 	u32int tag;
266 	int type;
267 
268 	assert(sourceIsLocked(r));
269 	b = sourceLoad(r, &e);
270 	if(b == nil)
271 		return 0;
272 
273 	assert(b->l.epoch == r->fs->ehi);
274 
275 	if(doremove==0 && e.size == 0){
276 		/* already truncated */
277 		blockPut(b);
278 		return 1;
279 	}
280 
281 	/* remember info on link we are removing */
282 	addr = globalToLocal(e.score);
283 	type = entryType(&e);
284 	tag = e.tag;
285 
286 	if(doremove){
287 		if(e.gen != ~0)
288 			e.gen++;
289 		e.dsize = 0;
290 		e.psize = 0;
291 		e.flags = 0;
292 	}else{
293 		e.flags &= ~VtEntryLocal;
294 	}
295 	e.depth = 0;
296 	e.size = 0;
297 	e.tag = 0;
298 	memmove(e.score, vtZeroScore, VtScoreSize);
299 	entryPack(&e, b->data, r->offset % r->epb);
300 	blockDirty(b);
301 	if(addr != NilBlock)
302 		blockRemoveLink(b, addr, type, tag, 1);
303 	blockPut(b);
304 
305 	if(doremove){
306 		sourceUnlock(r);
307 		sourceClose(r);
308 	}
309 
310 	return 1;
311 }
312 
313 int
314 sourceRemove(Source *r)
315 {
316 	return sourceKill(r, 1);
317 }
318 
319 int
320 sourceTruncate(Source *r)
321 {
322 	return sourceKill(r, 0);
323 }
324 
325 uvlong
326 sourceGetSize(Source *r)
327 {
328 	Entry e;
329 	Block *b;
330 
331 	assert(sourceIsLocked(r));
332 	b = sourceLoad(r, &e);
333 	if(b == nil)
334 		return 0;
335 	blockPut(b);
336 
337 	return e.size;
338 }
339 
340 static int
341 sourceShrinkSize(Source *r, Entry *e, uvlong size)
342 {
343 	int i, type, ppb;
344 	uvlong ptrsz;
345 	u32int addr;
346 	uchar score[VtScoreSize];
347 	Block *b;
348 
349 	type = entryType(e);
350 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
351 	if(b == nil)
352 		return 0;
353 
354 	ptrsz = e->dsize;
355 	ppb = e->psize/VtScoreSize;
356 	for(i=0; i+1<e->depth; i++)
357 		ptrsz *= ppb;
358 
359 	while(type&BtLevelMask){
360 		if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
361 			/* not worth copying the block just so we can zero some of it */
362 			blockPut(b);
363 			return 0;
364 		}
365 
366 		/*
367 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
368 		 */
369 
370 		/* zero the pointers to unnecessary blocks */
371 		i = (size+ptrsz-1)/ptrsz;
372 		for(; i<ppb; i++){
373 			addr = globalToLocal(b->data+i*VtScoreSize);
374 			memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
375 			blockDirty(b);
376 			if(addr != NilBlock)
377 				blockRemoveLink(b, addr, type-1, e->tag, 1);
378 		}
379 
380 		/* recurse (go around again) on the partially necessary block */
381 		i = size/ptrsz;
382 		size = size%ptrsz;
383 		if(size == 0){
384 			blockPut(b);
385 			return 1;
386 		}
387 		ptrsz /= ppb;
388 		type--;
389 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
390 		blockPut(b);
391 		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
392 		if(b == nil)
393 			return 0;
394 	}
395 
396 	if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
397 		blockPut(b);
398 		return 0;
399 	}
400 
401 	/*
402 	 * No one ever truncates BtDir blocks.
403 	 */
404 	if(type == BtData && e->dsize > size){
405 		memset(b->data+size, 0, e->dsize-size);
406 		blockDirty(b);
407 	}
408 	blockPut(b);
409 	return 1;
410 }
411 
412 int
413 sourceSetSize(Source *r, uvlong size)
414 {
415 	int depth;
416 	Entry e;
417 	Block *b;
418 
419 	assert(sourceIsLocked(r));
420 	if(size == 0)
421 		return sourceTruncate(r);
422 
423 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
424 		vtSetError(ETooBig);
425 		return 0;
426 	}
427 
428 	b = sourceLoad(r, &e);
429 	if(b == nil)
430 		return 0;
431 
432 	/* quick out */
433 	if(e.size == size){
434 		blockPut(b);
435 		return 1;
436 	}
437 
438 	depth = sizeToDepth(size, e.psize, e.dsize);
439 
440 	if(depth < e.depth){
441 		if(!sourceShrinkDepth(r, b, &e, depth)){
442 			blockPut(b);
443 			return 0;
444 		}
445 	}else if(depth > e.depth){
446 		if(!sourceGrowDepth(r, b, &e, depth)){
447 			blockPut(b);
448 			return 0;
449 		}
450 	}
451 
452 	if(size < e.size)
453 		sourceShrinkSize(r, &e, size);
454 
455 	e.size = size;
456 	entryPack(&e, b->data, r->offset % r->epb);
457 	blockDirty(b);
458 	blockPut(b);
459 
460 	return 1;
461 }
462 
463 int
464 sourceSetDirSize(Source *r, ulong ds)
465 {
466 	uvlong size;
467 	int epb;
468 
469 	assert(sourceIsLocked(r));
470 	epb = r->dsize/VtEntrySize;
471 
472 	size = (uvlong)r->dsize*(ds/epb);
473 	size += VtEntrySize*(ds%epb);
474 	return sourceSetSize(r, size);
475 }
476 
477 ulong
478 sourceGetDirSize(Source *r)
479 {
480 	ulong ds;
481 	uvlong size;
482 	int epb;
483 
484 	assert(sourceIsLocked(r));
485 	epb = r->dsize/VtEntrySize;
486 
487 	size = sourceGetSize(r);
488 	ds = epb*(size/r->dsize);
489 	ds += (size%r->dsize)/VtEntrySize;
490 	return ds;
491 }
492 
493 int
494 sourceGetEntry(Source *r, Entry *e)
495 {
496 	Block *b;
497 
498 	assert(sourceIsLocked(r));
499 	b = sourceLoad(r, e);
500 	if(b == nil)
501 		return 0;
502 	blockPut(b);
503 
504 	return 1;
505 }
506 
507 /*
508  * Must be careful with this.  Doesn't record
509  * dependencies, so don't introduce any!
510  */
511 int
512 sourceSetEntry(Source *r, Entry *e)
513 {
514 	Block *b;
515 	Entry oe;
516 
517 	assert(sourceIsLocked(r));
518 	b = sourceLoad(r, &oe);
519 	if(b == nil)
520 		return 0;
521 	entryPack(e, b->data, r->offset%r->epb);
522 	blockDirty(b);
523 	blockPut(b);
524 
525 	return 1;
526 }
527 
528 static Block *
529 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
530 {
531 	Block *b;
532 	Cache *c;
533 	u32int addr;
534 	int type;
535 	uchar oscore[VtScoreSize], score[VtScoreSize];
536 	Entry oe;
537 
538 	c = fs->cache;
539 
540 	if((p->l.type & BtLevelMask) == 0){
541 		assert(p->l.type == BtDir);
542 		type = entryType(e);
543 		b = cacheGlobal(c, e->score, type, e->tag, mode);
544 	}else{
545 		type = p->l.type - 1;
546 		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
547 	}
548 
549 	if(b)
550 		b->pc = getcallerpc(&p);
551 
552 	if(b == nil || mode == OReadOnly)
553 		return b;
554 
555 	if(p->l.epoch != fs->ehi){
556 		fprint(2, "blockWalk: parent not writable\n");
557 		abort();
558 	}
559 	if(b->l.epoch == fs->ehi)
560 		return b;
561 
562 	oe = *e;
563 
564 	/*
565 	 * Copy on write.
566 	 */
567 	if(e->tag == 0){
568 		assert(p->l.type == BtDir);
569 		e->tag = tagGen();
570 		e->flags |= VtEntryLocal;
571 	}
572 
573 	addr = b->addr;
574 	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
575 	if(b == nil)
576 		return nil;
577 
578 	b->pc = getcallerpc(&p);
579 	assert(b->l.epoch == fs->ehi);
580 
581 	blockDirty(b);
582 	memmove(score, b->score, VtScoreSize);
583 	if(p->l.type == BtDir){
584 		memmove(e->score, b->score, VtScoreSize);
585 		entryPack(e, p->data, index);
586 		blockDependency(p, b, index, nil, &oe);
587 	}else{
588 		memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
589 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
590 		blockDependency(p, b, index, oscore, nil);
591 	}
592 	blockDirty(p);
593 
594 	if(addr != NilBlock)
595 		blockRemoveLink(p, addr, type, e->tag, 0);
596 
597 	return b;
598 }
599 
600 /*
601  * Change the depth of the source r.
602  * The entry e for r is contained in block p.
603  */
604 static int
605 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
606 {
607 	Block *b, *bb;
608 	u32int tag;
609 	int type;
610 	Entry oe;
611 
612 	assert(sourceIsLocked(r));
613 	assert(depth <= VtPointerDepth);
614 
615 	type = entryType(e);
616 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
617 	if(b == nil)
618 		return 0;
619 
620 	tag = e->tag;
621 	if(tag == 0)
622 		tag = tagGen();
623 
624 	oe = *e;
625 
626 	/*
627 	 * Keep adding layers until we get to the right depth
628 	 * or an error occurs.
629 	 */
630 	while(e->depth < depth){
631 		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
632 		if(bb == nil)
633 			break;
634 //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
635 		memmove(bb->data, b->score, VtScoreSize);
636 		memmove(e->score, bb->score, VtScoreSize);
637 		e->depth++;
638 		type++;
639 		e->tag = tag;
640 		e->flags |= VtEntryLocal;
641 		blockDependency(bb, b, 0, vtZeroScore, nil);
642 		blockPut(b);
643 		b = bb;
644 		blockDirty(b);
645 	}
646 
647 	entryPack(e, p->data, r->offset % r->epb);
648 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
649 	blockPut(b);
650 	blockDirty(p);
651 
652 	return e->depth == depth;
653 }
654 
655 static int
656 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
657 {
658 	Block *b, *nb, *ob, *rb;
659 	u32int tag;
660 	int type, d;
661 	Entry oe;
662 
663 	assert(sourceIsLocked(r));
664 	assert(depth <= VtPointerDepth);
665 
666 	type = entryType(e);
667 	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
668 	if(rb == nil)
669 		return 0;
670 
671 	tag = e->tag;
672 	if(tag == 0)
673 		tag = tagGen();
674 
675 	/*
676 	 * Walk down to the new root block.
677 	 * We may stop early, but something is better than nothing.
678 	 */
679 	oe = *e;
680 
681 	ob = nil;
682 	b = rb;
683 /* BUG: explain type++.  i think it is a real bug */
684 	for(d=e->depth; d > depth; d--, type++){
685 		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
686 		if(nb == nil)
687 			break;
688 		if(ob!=nil && ob!=rb)
689 			blockPut(ob);
690 		ob = b;
691 		b = nb;
692 	}
693 
694 	if(b == rb){
695 		blockPut(rb);
696 		return 0;
697 	}
698 
699 	/*
700 	 * Right now, e points at the root block rb, b is the new root block,
701 	 * and ob points at b.  To update:
702 	 *
703 	 *	(i) change e to point at b
704 	 *	(ii) zero the pointer ob -> b
705 	 *	(iii) free the root block
706 	 *
707 	 * p (the block containing e) must be written before
708 	 * anything else.
709  	 */
710 
711 	/* (i) */
712 	e->depth = d;
713 	/* might have been local and now global; reverse cannot happen */
714 	if(globalToLocal(b->score) == NilBlock)
715 		e->flags &= ~VtEntryLocal;
716 	memmove(e->score, b->score, VtScoreSize);
717 	entryPack(e, p->data, r->offset % r->epb);
718 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
719 	blockDirty(p);
720 
721 	/* (ii) */
722 	memmove(ob->data, vtZeroScore, VtScoreSize);
723 	blockDependency(ob, p, 0, b->score, nil);
724 	blockDirty(ob);
725 
726 	/* (iii) */
727 	if(rb->addr != NilBlock)
728 		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
729 
730 	blockPut(rb);
731 	if(ob!=nil && ob!=rb)
732 		blockPut(ob);
733 	blockPut(b);
734 
735 	return d == depth;
736 }
737 
738 /*
739  * Normally we return the block at the given number.
740  * If early is set, we stop earlier in the tree.  Setting early
741  * to 1 gives us the block that contains the pointer to bn.
742  */
743 Block *
744 _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
745 {
746 	Block *b, *bb;
747 	int index[VtPointerDepth+1];
748 	Entry e;
749 	int i, np;
750 	int m;
751 
752 	assert(sourceIsLocked(r));
753 	assert(bn != NilBlock);
754 
755 	/* mode for intermediate block */
756 	m = mode;
757 	if(m == OOverWrite)
758 		m = OReadWrite;
759 
760 	b = sourceLoad(r, &e);
761 	if(b == nil)
762 		return nil;
763 	if(r->issnapshot && (e.flags & VtEntryNoArchive)){
764 		blockPut(b);
765 		vtSetError(ENotArchived);
766 		return nil;
767 	}
768 
769 	if(tag){
770 		if(e.tag == 0)
771 			e.tag = tag;
772 		else if(e.tag != tag){
773 			fprint(2, "tag mismatch\n");
774 			vtSetError("tag mismatch");
775 			goto Err;
776 		}
777 	}
778 
779 	np = e.psize/VtScoreSize;
780 	memset(index, 0, sizeof(index));
781 	for(i=0; bn > 0; i++){
782 		if(i >= VtPointerDepth){
783 			vtSetError(EBadAddr);
784 			goto Err;
785 		}
786 		index[i] = bn % np;
787 		bn /= np;
788 	}
789 
790 	if(i > e.depth){
791 		if(mode == OReadOnly){
792 			vtSetError(EBadAddr);
793 			goto Err;
794 		}
795 		if(!sourceGrowDepth(r, b, &e, i))
796 			goto Err;
797 	}
798 
799 	index[e.depth] = r->offset % r->epb;
800 
801 	for(i=e.depth; i>=early; i--){
802 		bb = blockWalk(b, index[i], m, r->fs, &e);
803 		if(bb == nil)
804 			goto Err;
805 		blockPut(b);
806 		b = bb;
807 	}
808 	b->pc = getcallerpc(&r);
809 	return b;
810 Err:
811 	blockPut(b);
812 	return nil;
813 }
814 
815 Block*
816 sourceBlock(Source *r, ulong bn, int mode)
817 {
818 	Block *b;
819 
820 	b = _sourceBlock(r, bn, mode, 0, 0);
821 	if(b)
822 		b->pc = getcallerpc(&r);
823 	return b;
824 }
825 
826 void
827 sourceClose(Source *r)
828 {
829 	if(r == nil)
830 		return;
831 	vtLock(r->lk);
832 	r->ref--;
833 	if(r->ref){
834 		vtUnlock(r->lk);
835 		return;
836 	}
837 	assert(r->ref == 0);
838 	vtUnlock(r->lk);
839 	if(r->parent)
840 		sourceClose(r->parent);
841 	vtLockFree(r->lk);
842 	memset(r, ~0, sizeof(*r));
843 	vtMemFree(r);
844 }
845 
846 /*
847  * Retrieve the block containing the entry for r.
848  * If a snapshot has happened, we might need
849  * to get a new copy of the block.  We avoid this
850  * in the common case by caching the score for
851  * the block and the last epoch in which it was valid.
852  *
853  * We use r->mode to tell the difference between active
854  * file system sources (OReadWrite) and sources for the
855  * snapshot file system (OReadOnly).
856  */
857 static Block*
858 sourceLoadBlock(Source *r, int mode)
859 {
860 	u32int addr;
861 	Block *b;
862 
863 	switch(r->mode){
864 	default:
865 		assert(0);
866 	case OReadWrite:
867 		assert(r->mode == OReadWrite);
868 		/*
869 		 * This needn't be true -- we might bump the low epoch
870 		 * to reclaim some old blocks, but since this score is
871 		 * OReadWrite, the blocks must all still be open, so none
872 		 * are reclaimed.  Thus it's okay that the epoch is so low.
873 		 * Proceed.
874 		assert(r->epoch >= r->fs->elo);
875 		 */
876 		if(r->epoch == r->fs->ehi){
877 			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
878 			if(b == nil)
879 				return nil;
880 			assert(r->epoch == b->l.epoch);
881 			return b;
882 		}
883 		assert(r->parent != nil);
884 		if(!sourceLock(r->parent, OReadWrite))
885 			return nil;
886 		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
887 		sourceUnlock(r->parent);
888 		if(b == nil)
889 			return nil;
890 		assert(b->l.epoch == r->fs->ehi);
891 	//	fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
892 		memmove(r->score, b->score, VtScoreSize);
893 		r->scoreEpoch = b->l.epoch;
894 		r->tag = b->l.tag;
895 		r->epoch = r->fs->ehi;
896 		return b;
897 
898 	case OReadOnly:
899 		addr = globalToLocal(r->score);
900 		if(addr == NilBlock)
901 			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
902 
903 		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
904 		if(b)
905 			return b;
906 
907 		/*
908 		 * If it failed because the epochs don't match, the block has been
909 		 * archived and reclaimed.  Rewalk from the parent and get the
910 		 * new pointer.  This can't happen in the OReadWrite case
911 		 * above because blocks in the current epoch don't get
912 		 * reclaimed.  The fact that we're OReadOnly means we're
913 		 * a snapshot.  (Or else the file system is read-only, but then
914 		 * the archiver isn't going around deleting blocks.)
915 		 */
916 		if(strcmp(vtGetError(), ELabelMismatch) == 0){
917 			if(!sourceLock(r->parent, OReadOnly))
918 				return nil;
919 			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
920 			sourceUnlock(r->parent);
921 			if(b){
922 				fprint(2, "sourceAlloc: lost %V found %V\n",
923 					r->score, b->score);
924 				memmove(r->score, b->score, VtScoreSize);
925 				r->scoreEpoch = b->l.epoch;
926 				return b;
927 			}
928 		}
929 		return nil;
930 	}
931 }
932 
933 int
934 sourceLock(Source *r, int mode)
935 {
936 	Block *b;
937 
938 	if(mode == -1)
939 		mode = r->mode;
940 
941 	b = sourceLoadBlock(r, mode);
942 	if(b == nil)
943 		return 0;
944 	/*
945 	 * The fact that we are holding b serves as the
946 	 * lock entitling us to write to r->b.
947 	 */
948 	assert(r->b == nil);
949 	r->b = b;
950 	if(r->mode == OReadWrite)
951 		assert(r->epoch == r->b->l.epoch);
952 	return 1;
953 }
954 
955 /*
956  * Lock two (usually sibling) sources.  This needs special care
957  * because the Entries for both sources might be in the same block.
958  * We also try to lock blocks in left-to-right order within the tree.
959  */
960 int
961 sourceLock2(Source *r, Source *rr, int mode)
962 {
963 	Block *b, *bb;
964 
965 	if(rr == nil)
966 		return sourceLock(r, mode);
967 
968 	if(mode == -1)
969 		mode = r->mode;
970 
971 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
972 		b = sourceLoadBlock(r, mode);
973 		if(b == nil)
974 			return 0;
975 		if(memcmp(r->score, rr->score, VtScoreSize) != 0){
976 			memmove(rr->score, b->score, VtScoreSize);
977 			rr->scoreEpoch = b->l.epoch;
978 			rr->tag = b->l.tag;
979 			rr->epoch = rr->fs->ehi;
980 		}
981 		blockDupLock(b);
982 		bb = b;
983 	}else if(r->parent==rr->parent || r->offset > rr->offset){
984 		bb = sourceLoadBlock(rr, mode);
985 		b = sourceLoadBlock(r, mode);
986 	}else{
987 		b = sourceLoadBlock(r, mode);
988 		bb = sourceLoadBlock(rr, mode);
989 	}
990 	if(b == nil || bb == nil){
991 		if(b)
992 			blockPut(b);
993 		if(bb)
994 			blockPut(bb);
995 		return 0;
996 	}
997 
998 	/*
999 	 * The fact that we are holding b and bb serves
1000 	 * as the lock entitling us to write to r->b and rr->b.
1001 	 */
1002 	r->b = b;
1003 	rr->b = bb;
1004 	return 1;
1005 }
1006 
1007 void
1008 sourceUnlock(Source *r)
1009 {
1010 	Block *b;
1011 
1012 	if(r->b == nil){
1013 		fprint(2, "sourceUnlock: already unlocked\n");
1014 		abort();
1015 	}
1016 	b = r->b;
1017 	r->b = nil;
1018 	blockPut(b);
1019 }
1020 
1021 static Block*
1022 sourceLoad(Source *r, Entry *e)
1023 {
1024 	Block *b;
1025 
1026 	assert(sourceIsLocked(r));
1027 	b = r->b;
1028 	if(!entryUnpack(e, b->data, r->offset % r->epb))
1029 		return nil;
1030 	if(e->gen != r->gen){
1031 		vtSetError(ERemoved);
1032 		return nil;
1033 	}
1034 	blockDupLock(b);
1035 	return b;
1036 }
1037 
1038 static int
1039 sizeToDepth(uvlong s, int psize, int dsize)
1040 {
1041 	int np;
1042 	int d;
1043 
1044 	/* determine pointer depth */
1045 	np = psize/VtScoreSize;
1046 	s = (s + dsize - 1)/dsize;
1047 	for(d = 0; s > 1; d++)
1048 		s = (s + np - 1)/np;
1049 	return d;
1050 }
1051 
1052 static u32int
1053 tagGen(void)
1054 {
1055 	u32int tag;
1056 
1057 	for(;;){
1058 		tag = lrand();
1059 		if(tag >= UserTag)
1060 			break;
1061 	}
1062 	return tag;
1063 }
1064 
1065 char *
1066 sourceName(Source *s)
1067 {
1068 	return fileName(s->file);
1069 }
1070