xref: /plan9/sys/src/cmd/fossil/source.c (revision f19e7b749ec99577072cd8e44030fe810f42c7ad)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static int	sizeToDepth(uvlong s, int psize, int dsize);
7 static u32int 	tagGen(void);
8 static Block 	*sourceLoad(Source *r, Entry *e);
9 static Block	*sourceLoadUnlocked(Source *r, Entry *e);
10 static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
11 static int	sourceShrinkSize(Source*, Entry*, uvlong);
12 static int	sourceGrowDepth(Source*, Block*, Entry*, int);
13 
14 #define sourceIsLocked(r)	((r)->b != nil)
15 
16 static Source *
17 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode)
18 {
19 	Source *r;
20 	int epb;
21 	u32int epoch;
22 	Entry e;
23 
24 	assert(p==nil || sourceIsLocked(p));
25 
26 	if(p == nil){
27 		assert(offset == 0);
28 		epb = 1;
29 	}else
30 		epb = p->dsize / VtEntrySize;
31 
32 	if(b->l.type != BtDir)
33 		goto Bad;
34 
35 	/*
36 	 * a non-active entry is the only thing that
37 	 * can legitimately happen here. all the others
38 	 * get prints.
39 	 */
40 	if(!entryUnpack(&e, b->data, offset % epb)){
41 		fprint(2, "entryUnpack failed\n");
42 		goto Bad;
43 	}
44 	if(!(e.flags & VtEntryActive)){
45 		if(0)fprint(2, "not active\n");
46 		goto Bad;
47 	}
48 	if(e.psize < 256 || e.dsize < 256){
49 		fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
50 		goto Bad;
51 	}
52 
53 	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
54 		fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", e.depth, e.size, e.psize, e.dsize);
55 		goto Bad;
56 	}
57 
58 	if((e.flags & VtEntryLocal) && e.tag == 0){
59 		fprint(2, "flags %#ux tag %#ux\n", e.flags, e.tag);
60 		goto Bad;
61 	}
62 
63 	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
64 		fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, fs->blockSize);
65 		goto Bad;
66 	}
67 
68 	epoch = b->l.epoch;
69 	if(mode == OReadWrite){
70 		if(e.snap != 0){
71 			vtSetError(ESnapRO);
72 			return nil;
73 		}
74 	}else if(e.snap != 0){
75 		if(e.snap < fs->elo){
76 			vtSetError(ESnapOld);
77 			return nil;
78 		}
79 		if(e.snap >= fs->ehi)
80 			goto Bad;
81 		epoch = e.snap;
82 	}
83 
84 	r = vtMemAllocZ(sizeof(Source));
85 	r->fs = fs;
86 	r->mode = mode;
87 	r->dsize = e.dsize;
88 	r->gen = e.gen;
89 	r->dir = (e.flags & VtEntryDir) != 0;
90 	r->lk = vtLockAlloc();
91 	r->ref = 1;
92 	r->parent = p;
93 	if(p){
94 		vtLock(p->lk);
95 		assert(mode == OReadOnly || p->mode == OReadWrite);
96 		p->ref++;
97 		vtUnlock(p->lk);
98 	}
99 	r->epoch = epoch;
100 //fprint(2, "sourceAlloc have %V be.%d fse.%d %s\n", b->score, b->l.epoch, r->fs->ehi, mode==OReadWrite ? "rw" : "ro");
101 	memmove(r->score, b->score, VtScoreSize);
102 	r->scoreEpoch = b->l.epoch;
103 	r->offset = offset;
104 	r->epb = epb;
105 	r->tag = b->l.tag;
106 
107 //fprint(2, "sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
108 
109 	return r;
110 Bad:
111 	vtSetError(EBadEntry);
112 	return nil;
113 
114 }
115 
116 Source *
117 sourceRoot(Fs *fs, u32int addr, int mode)
118 {
119 	Source *r;
120 	Block *b;
121 
122 	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
123 	if(b == nil)
124 		return nil;
125 
126 	if(mode == OReadWrite && b->l.epoch != fs->ehi){
127 		fprint(2, "sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l);
128 		blockPut(b);
129 		vtSetError(EBadRoot);
130 		return nil;
131 	}
132 
133 	r = sourceAlloc(fs, b, nil, 0, mode);
134 	blockPut(b);
135 	return r;
136 }
137 
138 Source *
139 sourceOpen(Source *r, ulong offset, int mode)
140 {
141 	ulong bn;
142 	Block *b;
143 
144 	assert(sourceIsLocked(r));
145 	if(r->mode == OReadWrite)
146 		assert(r->epoch == r->b->l.epoch);
147 	if(!r->dir){
148 		vtSetError(ENotDir);
149 		return nil;
150 	}
151 
152 	bn = offset/(r->dsize/VtEntrySize);
153 
154 	b = sourceBlock(r, bn, mode);
155 	if(b == nil)
156 		return nil;
157 	r = sourceAlloc(r->fs, b, r, offset, mode);
158 	blockPut(b);
159 	return r;
160 }
161 
162 Source *
163 sourceCreate(Source *r, int dsize, int dir, u32int offset)
164 {
165 	int i;
166 	Block *b;
167 	u32int bn, size;
168 	Entry e;
169 	int epb;
170 	int psize;
171 	Source *rr;
172 
173 	assert(sourceIsLocked(r));
174 
175 	if(!r->dir){
176 		vtSetError(ENotDir);
177 		return nil;
178 	}
179 
180 	epb = r->dsize/VtEntrySize;
181 	psize = (dsize/VtScoreSize)*VtScoreSize;
182 
183 	size = sourceGetDirSize(r);
184 	if(offset == 0){
185 		/*
186 		 * look at a random block to see if we can find an empty entry
187 		 */
188 		offset = lnrand(size+1);
189 		offset -= offset % epb;
190 	}
191 
192 	/* try the given block and then try the last block */
193 	for(;;){
194 		bn = offset/epb;
195 		b = sourceBlock(r, bn, OReadWrite);
196 		if(b == nil)
197 			return nil;
198 		for(i=offset%r->epb; i<epb; i++){
199 			entryUnpack(&e, b->data, i);
200 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
201 				goto Found;
202 		}
203 		blockPut(b);
204 		if(offset == size){
205 			fprint(2, "sourceCreate: cannot happen\n");
206 			vtSetError("sourceCreate: cannot happen");
207 			return nil;
208 		}
209 		offset = size;
210 	}
211 
212 Found:
213 	/* found an entry - gen already set */
214 	e.psize = psize;
215 	e.dsize = dsize;
216 	assert(psize && dsize);
217 	e.flags = VtEntryActive;
218 	if(dir)
219 		e.flags |= VtEntryDir;
220 	e.depth = 0;
221 	e.size = 0;
222 	memmove(e.score, vtZeroScore, VtScoreSize);
223 	e.tag = 0;
224 	e.snap = 0;
225 	e.archive = 0;
226 	entryPack(&e, b->data, i);
227 	blockDirty(b);
228 
229 	offset = bn*epb + i;
230 	if(offset+1 > size){
231 		if(!sourceSetDirSize(r, offset+1)){
232 			blockPut(b);
233 			return nil;
234 		}
235 	}
236 
237 	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite);
238 	blockPut(b);
239 	return rr;
240 }
241 
242 static int
243 sourceKill(Source *r, int doremove)
244 {
245 	Entry e;
246 	Block *b;
247 	u32int addr;
248 	u32int tag;
249 	int type;
250 
251 	assert(sourceIsLocked(r));
252 	b = sourceLoad(r, &e);
253 	if(b == nil)
254 		return 0;
255 
256 	assert(b->l.epoch == r->fs->ehi);
257 
258 	if(doremove==0 && e.size == 0){
259 		/* already truncated */
260 		blockPut(b);
261 		return 1;
262 	}
263 
264 	/* remember info on link we are removing */
265 	addr = globalToLocal(e.score);
266 	type = entryType(&e);
267 	tag = e.tag;
268 
269 	if(doremove){
270 		if(e.gen != ~0)
271 			e.gen++;
272 		e.dsize = 0;
273 		e.psize = 0;
274 		e.flags = 0;
275 	}else{
276 		e.flags &= ~VtEntryLocal;
277 	}
278 	e.depth = 0;
279 	e.size = 0;
280 	e.tag = 0;
281 	memmove(e.score, vtZeroScore, VtScoreSize);
282 	entryPack(&e, b->data, r->offset % r->epb);
283 	blockDirty(b);
284 	if(addr != NilBlock)
285 		blockRemoveLink(b, addr, type, tag, 1);
286 	blockPut(b);
287 
288 	if(doremove){
289 		sourceUnlock(r);
290 		sourceClose(r);
291 	}
292 
293 	return 1;
294 }
295 
296 int
297 sourceRemove(Source *r)
298 {
299 	return sourceKill(r, 1);
300 }
301 
302 int
303 sourceTruncate(Source *r)
304 {
305 	return sourceKill(r, 0);
306 }
307 
308 uvlong
309 sourceGetSize(Source *r)
310 {
311 	Entry e;
312 	Block *b;
313 
314 	assert(sourceIsLocked(r));
315 	b = sourceLoad(r, &e);
316 	if(b == nil)
317 		return 0;
318 	blockPut(b);
319 
320 	return e.size;
321 }
322 
323 static int
324 sourceShrinkSize(Source *r, Entry *e, uvlong size)
325 {
326 	int i, type, ppb;
327 	uvlong ptrsz;
328 	u32int addr;
329 	uchar score[VtScoreSize];
330 	Block *b;
331 
332 	type = entryType(e);
333 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
334 	if(b == nil)
335 		return 0;
336 
337 	ptrsz = e->dsize;
338 	ppb = e->psize/VtScoreSize;
339 	for(i=0; i+1<e->depth; i++)
340 		ptrsz *= ppb;
341 
342 	while(type&BtLevelMask){
343 		if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
344 			/* not worth copying the block just so we can zero some of it */
345 			blockPut(b);
346 			return 0;
347 		}
348 
349 		/*
350 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
351 		 */
352 
353 		/* zero the pointers to unnecessary blocks */
354 		i = (size+ptrsz-1)/ptrsz;
355 		for(; i<ppb; i++){
356 			addr = globalToLocal(b->data+i*VtScoreSize);
357 			memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
358 			blockDirty(b);
359 			if(addr != NilBlock)
360 				blockRemoveLink(b, addr, type-1, e->tag, 1);
361 		}
362 
363 		/* recurse (go around again) on the partially necessary block */
364 		i = size/ptrsz;
365 		size = size%ptrsz;
366 		if(size == 0){
367 			blockPut(b);
368 			return 1;
369 		}
370 		ptrsz /= ppb;
371 		type--;
372 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
373 		blockPut(b);
374 		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
375 		if(b == nil)
376 			return 0;
377 	}
378 
379 	if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
380 		blockPut(b);
381 		return 0;
382 	}
383 
384 	/*
385 	 * No one ever truncates BtDir blocks.
386 	 */
387 	if(type == BtData && e->dsize > size){
388 		memset(b->data+size, 0, e->dsize-size);
389 		blockDirty(b);
390 	}
391 	blockPut(b);
392 	return 1;
393 }
394 
395 int
396 sourceSetSize(Source *r, uvlong size)
397 {
398 	int depth;
399 	Entry e;
400 	Block *b;
401 
402 	assert(sourceIsLocked(r));
403 	if(size == 0)
404 		return sourceTruncate(r);
405 
406 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
407 		vtSetError(ETooBig);
408 		return 0;
409 	}
410 
411 	b = sourceLoad(r, &e);
412 	if(b == nil)
413 		return 0;
414 
415 	/* quick out */
416 	if(e.size == size){
417 		blockPut(b);
418 		return 1;
419 	}
420 
421 	depth = sizeToDepth(size, e.psize, e.dsize);
422 
423 	if(depth < e.depth){
424 		if(!sourceShrinkDepth(r, b, &e, depth)){
425 			blockPut(b);
426 			return 0;
427 		}
428 	}else if(depth > e.depth){
429 		if(!sourceGrowDepth(r, b, &e, depth)){
430 			blockPut(b);
431 			return 0;
432 		}
433 	}
434 
435 	if(size < e.size)
436 		sourceShrinkSize(r, &e, size);
437 
438 	e.size = size;
439 	entryPack(&e, b->data, r->offset % r->epb);
440 	blockDirty(b);
441 	blockPut(b);
442 
443 	return 1;
444 }
445 
446 int
447 sourceSetDirSize(Source *r, ulong ds)
448 {
449 	uvlong size;
450 	int epb;
451 
452 	assert(sourceIsLocked(r));
453 	epb = r->dsize/VtEntrySize;
454 
455 	size = (uvlong)r->dsize*(ds/epb);
456 	size += VtEntrySize*(ds%epb);
457 	return sourceSetSize(r, size);
458 }
459 
460 ulong
461 sourceGetDirSize(Source *r)
462 {
463 	ulong ds;
464 	uvlong size;
465 	int epb;
466 
467 	assert(sourceIsLocked(r));
468 	epb = r->dsize/VtEntrySize;
469 
470 	size = sourceGetSize(r);
471 	ds = epb*(size/r->dsize);
472 	ds += (size%r->dsize)/VtEntrySize;
473 	return ds;
474 }
475 
476 int
477 sourceGetEntry(Source *r, Entry *e)
478 {
479 	Block *b;
480 
481 	assert(sourceIsLocked(r));
482 	b = sourceLoad(r, e);
483 	if(b == nil)
484 		return 0;
485 	blockPut(b);
486 
487 	return 1;
488 }
489 
490 /*
491  * Must be careful with this.  Doesn't record
492  * dependencies, so don't introduce any!
493  */
494 int
495 sourceSetEntry(Source *r, Entry *e)
496 {
497 	Block *b;
498 	Entry oe;
499 
500 	assert(sourceIsLocked(r));
501 	b = sourceLoad(r, &oe);
502 	if(b == nil)
503 		return 0;
504 	entryPack(e, b->data, r->offset%r->epb);
505 	blockDirty(b);
506 	blockPut(b);
507 
508 	return 1;
509 }
510 
511 static Block *
512 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
513 {
514 	Block *b;
515 	Cache *c;
516 	u32int addr;
517 	int type;
518 	uchar oscore[VtScoreSize], score[VtScoreSize];
519 	Entry oe;
520 
521 	c = fs->cache;
522 
523 	if((p->l.type & BtLevelMask) == 0){
524 		assert(p->l.type == BtDir);
525 		type = entryType(e);
526 		b = cacheGlobal(c, e->score, type, e->tag, mode);
527 	}else{
528 		type = p->l.type - 1;
529 		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
530 	}
531 
532 	if(b)
533 		b->pc = getcallerpc(&p);
534 
535 	if(b == nil || mode == OReadOnly)
536 		return b;
537 
538 	if(p->l.epoch != fs->ehi){
539 		fprint(2, "blockWalk: parent not writable\n");
540 		abort();
541 	}
542 	if(b->l.epoch == fs->ehi)
543 		return b;
544 
545 	oe = *e;
546 
547 	/*
548 	 * Copy on write.
549 	 */
550 	if(e->tag == 0){
551 		assert(p->l.type == BtDir);
552 		e->tag = tagGen();
553 		e->flags |= VtEntryLocal;
554 	}
555 
556 	addr = b->addr;
557 	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
558 	if(b == nil)
559 		return nil;
560 
561 	b->pc = getcallerpc(&p);
562 	assert(b->l.epoch == fs->ehi);
563 
564 	blockDirty(b);
565 	memmove(score, b->score, VtScoreSize);
566 	if(p->l.type == BtDir){
567 		memmove(e->score, b->score, VtScoreSize);
568 		entryPack(e, p->data, index);
569 		blockDependency(p, b, index, nil, &oe);
570 	}else{
571 		memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
572 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
573 		blockDependency(p, b, index, oscore, nil);
574 	}
575 	blockDirty(p);
576 
577 	if(addr != NilBlock)
578 		blockRemoveLink(p, addr, type, e->tag, 0);
579 
580 	return b;
581 }
582 
583 /*
584  * Change the depth of the source r.
585  * The entry e for r is contained in block p.
586  */
587 static int
588 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
589 {
590 	Block *b, *bb;
591 	u32int tag;
592 	int type;
593 	Entry oe;
594 
595 	assert(sourceIsLocked(r));
596 	assert(depth <= VtPointerDepth);
597 
598 	type = entryType(e);
599 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
600 	if(b == nil)
601 		return 0;
602 
603 	tag = e->tag;
604 	if(tag == 0)
605 		tag = tagGen();
606 
607 	oe = *e;
608 
609 	/*
610 	 * Keep adding layers until we get to the right depth
611 	 * or an error occurs.
612 	 */
613 	while(e->depth < depth){
614 		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
615 		if(bb == nil)
616 			break;
617 //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
618 		memmove(bb->data, b->score, VtScoreSize);
619 		memmove(e->score, bb->score, VtScoreSize);
620 		e->depth++;
621 		type++;
622 		e->tag = tag;
623 		e->flags |= VtEntryLocal;
624 		blockDependency(bb, b, 0, vtZeroScore, nil);
625 		blockPut(b);
626 		b = bb;
627 		blockDirty(b);
628 	}
629 
630 	entryPack(e, p->data, r->offset % r->epb);
631 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
632 	blockPut(b);
633 	blockDirty(p);
634 
635 	return e->depth == depth;
636 }
637 
638 static int
639 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
640 {
641 	Block *b, *nb, *ob, *rb;
642 	u32int tag;
643 	int type, d;
644 	Entry oe;
645 
646 	assert(sourceIsLocked(r));
647 	assert(depth <= VtPointerDepth);
648 
649 	type = entryType(e);
650 	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
651 	if(rb == nil)
652 		return 0;
653 
654 	tag = e->tag;
655 	if(tag == 0)
656 		tag = tagGen();
657 
658 	/*
659 	 * Walk down to the new root block.
660 	 * We may stop early, but something is better than nothing.
661 	 */
662 	oe = *e;
663 
664 	ob = nil;
665 	b = rb;
666 /* BUG: explain type++.  i think it is a real bug */
667 	for(d=e->depth; d > depth; d--, type++){
668 		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
669 		if(nb == nil)
670 			break;
671 		if(ob!=nil && ob!=rb)
672 			blockPut(ob);
673 		ob = b;
674 		b = nb;
675 	}
676 
677 	if(b == rb){
678 		blockPut(rb);
679 		return 0;
680 	}
681 
682 	/*
683 	 * Right now, e points at the root block rb, b is the new root block,
684 	 * and ob points at b.  To update:
685 	 *
686 	 *	(i) change e to point at b
687 	 *	(ii) zero the pointer ob -> b
688 	 *	(iii) free the root block
689 	 *
690 	 * p (the block containing e) must be written before
691 	 * anything else.
692  	 */
693 
694 	/* (i) */
695 	e->depth = d;
696 	/* might have been local and now global; reverse cannot happen */
697 	if(globalToLocal(b->score) == NilBlock)
698 		e->flags &= ~VtEntryLocal;
699 	memmove(e->score, b->score, VtScoreSize);
700 	entryPack(e, p->data, r->offset % r->epb);
701 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
702 	blockDirty(p);
703 
704 	/* (ii) */
705 	memmove(ob->data, vtZeroScore, VtScoreSize);
706 	blockDependency(ob, p, 0, b->score, nil);
707 	blockDirty(ob);
708 
709 	/* (iii) */
710 	if(rb->addr != NilBlock)
711 		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
712 
713 	blockPut(rb);
714 	if(ob!=nil && ob!=rb)
715 		blockPut(ob);
716 	blockPut(b);
717 
718 	return d == depth;
719 }
720 
721 /*
722  * Normally we return the block at the given number.
723  * If early is set, we stop earlier in the tree.  Setting early
724  * to 1 gives us the block that contains the pointer to bn.
725  */
726 Block *
727 _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
728 {
729 	Block *b, *bb;
730 	int index[VtPointerDepth+1];
731 	Entry e;
732 	int i, np;
733 	int m;
734 
735 	assert(sourceIsLocked(r));
736 	assert(bn != NilBlock);
737 
738 	/* mode for intermediate block */
739 	m = mode;
740 	if(m == OOverWrite)
741 		m = OReadWrite;
742 
743 	b = sourceLoad(r, &e);
744 	if(b == nil)
745 		return nil;
746 	if(r->mode == OReadOnly && (e.flags & VtEntryNoArchive)){
747 		blockPut(b);
748 		vtSetError(ENotArchived);
749 		return nil;
750 	}
751 
752 	if(tag){
753 		if(e.tag == 0)
754 			e.tag = tag;
755 		else if(e.tag != tag){
756 			fprint(2, "tag mismatch\n");
757 			vtSetError("tag mismatch");
758 			goto Err;
759 		}
760 	}
761 
762 	np = e.psize/VtScoreSize;
763 	memset(index, 0, sizeof(index));
764 	for(i=0; bn > 0; i++){
765 		if(i >= VtPointerDepth){
766 			vtSetError(EBadAddr);
767 			goto Err;
768 		}
769 		index[i] = bn % np;
770 		bn /= np;
771 	}
772 
773 	if(i > e.depth){
774 		if(mode == OReadOnly){
775 			vtSetError(EBadAddr);
776 			goto Err;
777 		}
778 		if(!sourceGrowDepth(r, b, &e, i))
779 			goto Err;
780 	}
781 
782 	index[e.depth] = r->offset % r->epb;
783 
784 	for(i=e.depth; i>=early; i--){
785 		bb = blockWalk(b, index[i], m, r->fs, &e);
786 		if(bb == nil)
787 			goto Err;
788 		blockPut(b);
789 		b = bb;
790 	}
791 	b->pc = getcallerpc(&r);
792 	return b;
793 Err:
794 	blockPut(b);
795 	return nil;
796 }
797 
798 Block*
799 sourceBlock(Source *r, ulong bn, int mode)
800 {
801 	Block *b;
802 
803 	b = _sourceBlock(r, bn, mode, 0, 0);
804 	if(b)
805 		b->pc = getcallerpc(&r);
806 	return b;
807 }
808 
809 void
810 sourceClose(Source *r)
811 {
812 	if(r == nil)
813 		return;
814 	vtLock(r->lk);
815 	r->ref--;
816 	if(r->ref){
817 		vtUnlock(r->lk);
818 		return;
819 	}
820 	assert(r->ref == 0);
821 	vtUnlock(r->lk);
822 	if(r->parent)
823 		sourceClose(r->parent);
824 	vtLockFree(r->lk);
825 	memset(r, ~0, sizeof(*r));
826 	vtMemFree(r);
827 }
828 
829 /*
830  * Retrieve the block containing the entry for r.
831  * If a snapshot has happened, we might need
832  * to get a new copy of the block.  We avoid this
833  * in the common case by caching the score for
834  * the block and the last epoch in which it was valid.
835  *
836  * We use r->mode to tell the difference between active
837  * file system sources (OReadWrite) and sources for the
838  * snapshot file system (OReadOnly).
839  */
840 static Block*
841 sourceLoadBlock(Source *r, int mode)
842 {
843 	u32int addr;
844 	Block *b;
845 
846 	switch(r->mode){
847 	default:
848 		assert(0);
849 	case OReadWrite:
850 		assert(r->mode == OReadWrite);
851 		/*
852 		 * This needn't be true -- we might bump the low epoch
853 		 * to reclaim some old blocks, but since this score is
854 		 * OReadWrite, the blocks must all still be open, so none
855 		 * are reclaimed.  Thus it's okay that the epoch is so low.
856 		 * Proceed.
857 		assert(r->epoch >= r->fs->elo);
858 		 */
859 		if(r->epoch == r->fs->ehi){
860 			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
861 			if(b == nil)
862 				return nil;
863 			assert(r->epoch == b->l.epoch);
864 			return b;
865 		}
866 		assert(r->parent != nil);
867 		if(!sourceLock(r->parent, OReadWrite))
868 			return nil;
869 		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
870 		sourceUnlock(r->parent);
871 		if(b == nil)
872 			return nil;
873 		assert(b->l.epoch == r->fs->ehi);
874 	//	fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
875 		memmove(r->score, b->score, VtScoreSize);
876 		r->scoreEpoch = b->l.epoch;
877 		r->tag = b->l.tag;
878 		r->epoch = r->fs->ehi;
879 		return b;
880 
881 	case OReadOnly:
882 		addr = globalToLocal(r->score);
883 		if(addr == NilBlock)
884 			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
885 
886 		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
887 		if(b)
888 			return b;
889 
890 		/*
891 		 * If it failed because the epochs don't match, the block has been
892 		 * archived and reclaimed.  Rewalk from the parent and get the
893 		 * new pointer.  This can't happen in the OReadWrite case
894 		 * above because blocks in the current epoch don't get
895 		 * reclaimed.  The fact that we're OReadOnly means we're
896 		 * a snapshot.  (Or else the file system is read-only, but then
897 		 * the archiver isn't going around deleting blocks.)
898 		 */
899 		if(strcmp(vtGetError(), ELabelMismatch) == 0){
900 			if(!sourceLock(r->parent, OReadOnly))
901 				return nil;
902 			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
903 			sourceUnlock(r->parent);
904 			if(b){
905 				fprint(2, "sourceAlloc: lost %V found %V\n",
906 					r->score, b->score);
907 				memmove(r->score, b->score, VtScoreSize);
908 				r->scoreEpoch = b->l.epoch;
909 				return b;
910 			}
911 		}
912 		return nil;
913 	}
914 }
915 
916 int
917 sourceLock(Source *r, int mode)
918 {
919 	Block *b;
920 
921 	if(mode == -1)
922 		mode = r->mode;
923 
924 	b = sourceLoadBlock(r, mode);
925 	if(b == nil)
926 		return 0;
927 	/*
928 	 * The fact that we are holding b serves as the
929 	 * lock entitling us to write to r->b.
930 	 */
931 	assert(r->b == nil);
932 	r->b = b;
933 	if(r->mode == OReadWrite)
934 		assert(r->epoch == r->b->l.epoch);
935 	return 1;
936 }
937 
938 /*
939  * Lock two (usually sibling) sources.  This needs special care
940  * because the Entries for both sources might be in the same block.
941  * We also try to lock blocks in left-to-right order within the tree.
942  */
943 int
944 sourceLock2(Source *r, Source *rr, int mode)
945 {
946 	Block *b, *bb;
947 
948 	if(rr == nil)
949 		return sourceLock(r, mode);
950 
951 	if(mode == -1)
952 		mode = r->mode;
953 
954 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
955 		b = sourceLoadBlock(r, mode);
956 		if(b == nil)
957 			return 0;
958 		if(memcmp(r->score, rr->score, VtScoreSize) != 0){
959 			memmove(rr->score, b->score, VtScoreSize);
960 			rr->scoreEpoch = b->l.epoch;
961 			rr->tag = b->l.tag;
962 			rr->epoch = rr->fs->ehi;
963 		}
964 		blockDupLock(b);
965 		bb = b;
966 	}else if(r->parent==rr->parent || r->offset > rr->offset){
967 		bb = sourceLoadBlock(rr, mode);
968 		b = sourceLoadBlock(r, mode);
969 	}else{
970 		b = sourceLoadBlock(r, mode);
971 		bb = sourceLoadBlock(rr, mode);
972 	}
973 	if(b == nil || bb == nil){
974 		if(b)
975 			blockPut(b);
976 		if(bb)
977 			blockPut(bb);
978 		return 0;
979 	}
980 
981 	/*
982 	 * The fact that we are holding b and bb serves
983 	 * as the lock entitling us to write to r->b and rr->b.
984 	 */
985 	r->b = b;
986 	rr->b = bb;
987 	return 1;
988 }
989 
990 void
991 sourceUnlock(Source *r)
992 {
993 	Block *b;
994 
995 	if(r->b == nil){
996 		fprint(2, "sourceUnlock: already unlocked\n");
997 		abort();
998 	}
999 	b = r->b;
1000 	r->b = nil;
1001 	blockPut(b);
1002 }
1003 
1004 static Block*
1005 sourceLoad(Source *r, Entry *e)
1006 {
1007 	Block *b;
1008 
1009 	assert(sourceIsLocked(r));
1010 	b = r->b;
1011 	if(!entryUnpack(e, b->data, r->offset % r->epb))
1012 		return nil;
1013 	if(e->gen != r->gen){
1014 		vtSetError(ERemoved);
1015 		return nil;
1016 	}
1017 	blockDupLock(b);
1018 	return b;
1019 }
1020 
1021 static int
1022 sizeToDepth(uvlong s, int psize, int dsize)
1023 {
1024 	int np;
1025 	int d;
1026 
1027 	/* determine pointer depth */
1028 	np = psize/VtScoreSize;
1029 	s = (s + dsize - 1)/dsize;
1030 	for(d = 0; s > 1; d++)
1031 		s = (s + np - 1)/np;
1032 	return d;
1033 }
1034 
1035 static u32int
1036 tagGen(void)
1037 {
1038 	u32int tag;
1039 
1040 	for(;;){
1041 		tag = lrand();
1042 		if(tag >= UserTag)
1043 			break;
1044 	}
1045 	return tag;
1046 }
1047