xref: /plan9/sys/src/cmd/fossil/source.c (revision fcdc259e6e3e2507c70cf7008b5ba7ae5c45aa9e)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static int	sizeToDepth(uvlong s, int psize, int dsize);
7 static u32int 	tagGen(void);
8 static Block 	*sourceLoad(Source *r, Entry *e);
9 static Block	*sourceLoadUnlocked(Source *r, Entry *e);
10 static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
11 static int	sourceShrinkSize(Source*, Entry*, uvlong);
12 static int	sourceGrowDepth(Source*, Block*, Entry*, int);
13 
14 #define sourceIsLocked(r)	((r)->b != nil)
15 
16 static Source *
17 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode)
18 {
19 	Source *r;
20 	int epb;
21 	u32int epoch;
22 	Entry e;
23 
24 	assert(p==nil || sourceIsLocked(p));
25 
26 	if(p == nil){
27 		assert(offset == 0);
28 		epb = 1;
29 	}else
30 		epb = p->dsize / VtEntrySize;
31 
32 	if(b->l.type != BtDir)
33 		goto Bad;
34 
35 	/*
36 	 * a non-active entry is the only thing that
37 	 * can legitimately happen here. all the others
38 	 * get prints.
39 	 */
40 	if(!entryUnpack(&e, b->data, offset % epb)){
41 		fprint(2, "entryUnpack failed\n");
42 		goto Bad;
43 	}
44 	if(!(e.flags & VtEntryActive)){
45 		if(0)fprint(2, "not active\n");
46 		goto Bad;
47 	}
48 	if(e.psize < 256 || e.dsize < 256){
49 		fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
50 		goto Bad;
51 	}
52 
53 	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
54 		fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", e.depth, e.size, e.psize, e.dsize);
55 		goto Bad;
56 	}
57 
58 	if((e.flags & VtEntryLocal) && e.tag == 0){
59 		fprint(2, "flags %#ux tag %#ux\n", e.flags, e.tag);
60 		goto Bad;
61 	}
62 
63 	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
64 		fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, fs->blockSize);
65 		goto Bad;
66 	}
67 
68 	epoch = b->l.epoch;
69 	if(mode == OReadWrite){
70 		if(e.snap != 0){
71 			vtSetError(ESnapRO);
72 			return nil;
73 		}
74 	}else if(e.snap != 0){
75 		if(e.snap < fs->elo){
76 			vtSetError(ESnapOld);
77 			return nil;
78 		}
79 		if(e.snap >= fs->ehi)
80 			goto Bad;
81 		epoch = e.snap;
82 	}
83 
84 	r = vtMemAllocZ(sizeof(Source));
85 	r->fs = fs;
86 	r->mode = mode;
87 	r->dsize = e.dsize;
88 	r->gen = e.gen;
89 	r->dir = (e.flags & VtEntryDir) != 0;
90 	r->lk = vtLockAlloc();
91 	r->ref = 1;
92 	r->parent = p;
93 	if(p){
94 		vtLock(p->lk);
95 		assert(mode == OReadOnly || p->mode == OReadWrite);
96 		p->ref++;
97 		vtUnlock(p->lk);
98 	}
99 	r->epoch = epoch;
100 //fprint(2, "sourceAlloc have %V be.%d fse.%d %s\n", b->score, b->l.epoch, r->fs->ehi, mode==OReadWrite ? "rw" : "ro");
101 	memmove(r->score, b->score, VtScoreSize);
102 	r->scoreEpoch = b->l.epoch;
103 	r->offset = offset;
104 	r->epb = epb;
105 	r->tag = b->l.tag;
106 
107 //fprint(2, "sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
108 
109 	return r;
110 Bad:
111 	vtSetError(EBadEntry);
112 	return nil;
113 
114 }
115 
116 Source *
117 sourceRoot(Fs *fs, u32int addr, int mode)
118 {
119 	Source *r;
120 	Block *b;
121 
122 	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
123 	if(b == nil)
124 		return nil;
125 
126 	if(mode == OReadWrite)
127 	if((b->l.state&BsClosed) || b->l.epoch != fs->ehi){
128 		fprint(2, "sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l);
129 		blockPut(b);
130 		vtSetError(EBadRoot);
131 		return nil;
132 	}
133 
134 	r = sourceAlloc(fs, b, nil, 0, mode);
135 	blockPut(b);
136 	return r;
137 }
138 
139 Source *
140 sourceOpen(Source *r, ulong offset, int mode)
141 {
142 	ulong bn;
143 	Block *b;
144 
145 	assert(sourceIsLocked(r));
146 	if(r->mode == OReadWrite)
147 		assert(r->epoch == r->b->l.epoch);
148 	if(!r->dir){
149 		vtSetError(ENotDir);
150 		return nil;
151 	}
152 
153 	bn = offset/(r->dsize/VtEntrySize);
154 
155 	b = sourceBlock(r, bn, mode);
156 	if(b == nil)
157 		return nil;
158 	r = sourceAlloc(r->fs, b, r, offset, mode);
159 	blockPut(b);
160 	return r;
161 }
162 
163 Source *
164 sourceCreate(Source *r, int dsize, int dir, u32int offset)
165 {
166 	int i;
167 	Block *b;
168 	u32int bn, size;
169 	Entry e;
170 	int epb;
171 	int psize;
172 	Source *rr;
173 
174 	assert(sourceIsLocked(r));
175 
176 	if(!r->dir){
177 		vtSetError(ENotDir);
178 		return nil;
179 	}
180 
181 	epb = r->dsize/VtEntrySize;
182 	psize = (dsize/VtScoreSize)*VtScoreSize;
183 
184 	size = sourceGetDirSize(r);
185 	if(offset == 0){
186 		/*
187 		 * look at a random block to see if we can find an empty entry
188 		 */
189 		offset = lnrand(size+1);
190 		offset -= offset % epb;
191 	}
192 
193 	/* try the given block and then try the last block */
194 	for(;;){
195 		bn = offset/epb;
196 		b = sourceBlock(r, bn, OReadWrite);
197 		if(b == nil)
198 			return nil;
199 		for(i=offset%r->epb; i<epb; i++){
200 			entryUnpack(&e, b->data, i);
201 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
202 				goto Found;
203 		}
204 		blockPut(b);
205 		if(offset == size){
206 			fprint(2, "sourceCreate: cannot happen\n");
207 			vtSetError("sourceCreate: cannot happen");
208 			return nil;
209 		}
210 		offset = size;
211 	}
212 
213 Found:
214 	/* found an entry - gen already set */
215 	e.psize = psize;
216 	e.dsize = dsize;
217 	assert(psize && dsize);
218 	e.flags = VtEntryActive;
219 	if(dir)
220 		e.flags |= VtEntryDir;
221 	e.depth = 0;
222 	e.size = 0;
223 	memmove(e.score, vtZeroScore, VtScoreSize);
224 	e.tag = 0;
225 	e.snap = 0;
226 	e.archive = 0;
227 	entryPack(&e, b->data, i);
228 	blockDirty(b);
229 
230 	offset = bn*epb + i;
231 	if(offset+1 > size){
232 		if(!sourceSetDirSize(r, offset+1)){
233 			blockPut(b);
234 			return nil;
235 		}
236 	}
237 
238 	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite);
239 	blockPut(b);
240 	return rr;
241 }
242 
243 static int
244 sourceKill(Source *r, int doremove)
245 {
246 	Entry e;
247 	Block *b;
248 	u32int addr;
249 	u32int tag;
250 	int type;
251 
252 	assert(sourceIsLocked(r));
253 	b = sourceLoad(r, &e);
254 	if(b == nil)
255 		return 0;
256 
257 	assert(b->l.epoch == r->fs->ehi);
258 
259 	if(doremove==0 && e.size == 0){
260 		/* already truncated */
261 		blockPut(b);
262 		return 1;
263 	}
264 
265 	/* remember info on link we are removing */
266 	addr = globalToLocal(e.score);
267 	type = entryType(&e);
268 	tag = e.tag;
269 
270 	if(doremove){
271 		if(e.gen != ~0)
272 			e.gen++;
273 		e.dsize = 0;
274 		e.psize = 0;
275 		e.flags = 0;
276 	}else{
277 		e.flags &= ~VtEntryLocal;
278 	}
279 	e.depth = 0;
280 	e.size = 0;
281 	e.tag = 0;
282 	memmove(e.score, vtZeroScore, VtScoreSize);
283 	entryPack(&e, b->data, r->offset % r->epb);
284 	blockDirty(b);
285 	if(addr != NilBlock)
286 		blockRemoveLink(b, addr, type, tag);
287 	blockPut(b);
288 
289 	if(doremove){
290 		sourceUnlock(r);
291 		sourceClose(r);
292 	}
293 
294 	return 1;
295 }
296 
297 int
298 sourceRemove(Source *r)
299 {
300 	return sourceKill(r, 1);
301 }
302 
303 int
304 sourceTruncate(Source *r)
305 {
306 	return sourceKill(r, 0);
307 }
308 
309 uvlong
310 sourceGetSize(Source *r)
311 {
312 	Entry e;
313 	Block *b;
314 
315 	assert(sourceIsLocked(r));
316 	b = sourceLoad(r, &e);
317 	if(b == nil)
318 		return 0;
319 	blockPut(b);
320 
321 	return e.size;
322 }
323 
324 static int
325 sourceShrinkSize(Source *r, Entry *e, uvlong size)
326 {
327 	int i, type, ppb;
328 	uvlong ptrsz;
329 	u32int addr;
330 	uchar score[VtScoreSize];
331 	Block *b;
332 
333 	type = entryType(e);
334 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
335 	if(b == nil)
336 		return 0;
337 
338 	ptrsz = e->dsize;
339 	ppb = e->psize/VtScoreSize;
340 	for(i=0; i+1<e->depth; i++)
341 		ptrsz *= ppb;
342 
343 	while(type&BtLevelMask){
344 		if(b->addr == NilBlock
345 		|| (b->l.state&BsClosed)
346 		|| b->l.epoch != r->fs->ehi){
347 			/* not worth copying the block just so we can zero some of it */
348 			blockPut(b);
349 			return 0;
350 		}
351 
352 		/*
353 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
354 		 */
355 
356 		/* zero the pointers to unnecessary blocks */
357 		i = (size+ptrsz-1)/ptrsz;
358 		for(; i<ppb; i++){
359 			addr = globalToLocal(b->data+i*VtScoreSize);
360 			memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
361 			blockDirty(b);
362 			if(addr != NilBlock)
363 				blockRemoveLink(b, addr, type-1, e->tag);
364 		}
365 
366 		/* recurse (go around again) on the partially necessary block */
367 		i = size/ptrsz;
368 		size = size%ptrsz;
369 		if(size == 0){
370 			blockPut(b);
371 			return 1;
372 		}
373 		ptrsz /= ppb;
374 		type--;
375 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
376 		blockPut(b);
377 		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
378 		if(b == nil)
379 			return 0;
380 	}
381 
382 	if(b->addr == NilBlock
383 	|| (b->l.state&BsClosed)
384 	|| b->l.epoch != r->fs->ehi){
385 		blockPut(b);
386 		return 0;
387 	}
388 
389 	/*
390 	 * No one ever truncates BtDir blocks.
391 	 */
392 	if(type == BtData && e->dsize > size){
393 		memset(b->data+size, 0, e->dsize-size);
394 		blockDirty(b);
395 	}
396 	blockPut(b);
397 	return 1;
398 }
399 
400 int
401 sourceSetSize(Source *r, uvlong size)
402 {
403 	int depth;
404 	Entry e;
405 	Block *b;
406 
407 	assert(sourceIsLocked(r));
408 	if(size == 0)
409 		return sourceTruncate(r);
410 
411 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
412 		vtSetError(ETooBig);
413 		return 0;
414 	}
415 
416 	b = sourceLoad(r, &e);
417 	if(b == nil)
418 		return 0;
419 
420 	/* quick out */
421 	if(e.size == size){
422 		blockPut(b);
423 		return 1;
424 	}
425 
426 	depth = sizeToDepth(size, e.psize, e.dsize);
427 
428 	if(depth < e.depth){
429 		if(!sourceShrinkDepth(r, b, &e, depth)){
430 			blockPut(b);
431 			return 0;
432 		}
433 	}else if(depth > e.depth){
434 		if(!sourceGrowDepth(r, b, &e, depth)){
435 			blockPut(b);
436 			return 0;
437 		}
438 	}
439 
440 	if(size < e.size)
441 		sourceShrinkSize(r, &e, size);
442 
443 	e.size = size;
444 	entryPack(&e, b->data, r->offset % r->epb);
445 	blockDirty(b);
446 	blockPut(b);
447 
448 	return 1;
449 }
450 
451 int
452 sourceSetDirSize(Source *r, ulong ds)
453 {
454 	uvlong size;
455 	int epb;
456 
457 	assert(sourceIsLocked(r));
458 	epb = r->dsize/VtEntrySize;
459 
460 	size = (uvlong)r->dsize*(ds/epb);
461 	size += VtEntrySize*(ds%epb);
462 	return sourceSetSize(r, size);
463 }
464 
465 ulong
466 sourceGetDirSize(Source *r)
467 {
468 	ulong ds;
469 	uvlong size;
470 	int epb;
471 
472 	assert(sourceIsLocked(r));
473 	epb = r->dsize/VtEntrySize;
474 
475 	size = sourceGetSize(r);
476 	ds = epb*(size/r->dsize);
477 	ds += (size%r->dsize)/VtEntrySize;
478 	return ds;
479 }
480 
481 int
482 sourceGetEntry(Source *r, Entry *e)
483 {
484 	Block *b;
485 
486 	assert(sourceIsLocked(r));
487 	b = sourceLoad(r, e);
488 	if(b == nil)
489 		return 0;
490 	blockPut(b);
491 
492 	return 1;
493 }
494 
495 /*
496  * Must be careful with this.  Doesn't record
497  * dependencies, so don't introduce any!
498  */
499 int
500 sourceSetEntry(Source *r, Entry *e)
501 {
502 	Block *b;
503 	Entry oe;
504 
505 	assert(sourceIsLocked(r));
506 	b = sourceLoad(r, &oe);
507 	if(b == nil)
508 		return 0;
509 	entryPack(e, b->data, r->offset%r->epb);
510 	blockDirty(b);
511 	blockPut(b);
512 
513 	return 1;
514 }
515 
516 static Block *
517 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
518 {
519 	Block *b;
520 	Cache *c;
521 	u32int addr;
522 	int type;
523 	uchar oscore[VtScoreSize], score[VtScoreSize];
524 	Entry oe;
525 
526 	c = fs->cache;
527 
528 	if((p->l.type & BtLevelMask) == 0){
529 		assert(p->l.type == BtDir);
530 		type = entryType(e);
531 		b = cacheGlobal(c, e->score, type, e->tag, mode);
532 	}else{
533 		type = p->l.type - 1;
534 		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
535 	}
536 
537 	if(b)
538 		b->pc = getcallerpc(&p);
539 
540 	if(b == nil || mode == OReadOnly)
541 		return b;
542 
543 	assert(!(p->l.state&BsClosed) && p->l.epoch == fs->ehi);
544 	if(!(b->l.state&BsClosed) && b->l.epoch == fs->ehi)
545 		return b;
546 
547 	oe = *e;
548 
549 	/*
550 	 * Copy on write.
551 	 */
552 	if(e->tag == 0){
553 		assert(p->l.type == BtDir);
554 		e->tag = tagGen();
555 		e->flags |= VtEntryLocal;
556 	}
557 
558 	addr = b->addr;
559 	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
560 	if(b == nil)
561 		return nil;
562 
563 	b->pc = getcallerpc(&p);
564 	assert(b->l.epoch == fs->ehi);
565 
566 	blockDirty(b);
567 	memmove(score, b->score, VtScoreSize);
568 	if(p->l.type == BtDir){
569 		memmove(e->score, b->score, VtScoreSize);
570 		entryPack(e, p->data, index);
571 		blockDependency(p, b, index, nil, &oe);
572 	}else{
573 		memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
574 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
575 		blockDependency(p, b, index, oscore, nil);
576 	}
577 	blockDirty(p);
578 
579 	if(addr != NilBlock)
580 		blockRemoveLink(p, addr, type, e->tag);
581 
582 	return b;
583 }
584 
585 /*
586  * Change the depth of the source r.
587  * The entry e for r is contained in block p.
588  */
589 static int
590 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
591 {
592 	Block *b, *bb;
593 	u32int tag;
594 	int type;
595 	Entry oe;
596 
597 	assert(sourceIsLocked(r));
598 	assert(depth <= VtPointerDepth);
599 
600 	type = entryType(e);
601 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
602 	if(b == nil)
603 		return 0;
604 
605 	tag = e->tag;
606 	if(tag == 0)
607 		tag = tagGen();
608 
609 	oe = *e;
610 
611 	/*
612 	 * Keep adding layers until we get to the right depth
613 	 * or an error occurs.
614 	 */
615 	while(e->depth < depth){
616 		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
617 		if(bb == nil)
618 			break;
619 //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
620 		memmove(bb->data, b->score, VtScoreSize);
621 		memmove(e->score, bb->score, VtScoreSize);
622 		e->depth++;
623 		type++;
624 		e->tag = tag;
625 		e->flags |= VtEntryLocal;
626 		blockDependency(bb, b, 0, vtZeroScore, nil);
627 		blockPut(b);
628 		b = bb;
629 		blockDirty(b);
630 	}
631 
632 	entryPack(e, p->data, r->offset % r->epb);
633 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
634 	blockPut(b);
635 	blockDirty(p);
636 
637 	return e->depth == depth;
638 }
639 
640 static int
641 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
642 {
643 	Block *b, *nb, *ob, *rb;
644 	u32int tag;
645 	int type, d;
646 	Entry oe;
647 
648 	assert(sourceIsLocked(r));
649 	assert(depth <= VtPointerDepth);
650 
651 	type = entryType(e);
652 	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
653 	if(rb == nil)
654 		return 0;
655 
656 	tag = e->tag;
657 	if(tag == 0)
658 		tag = tagGen();
659 
660 	/*
661 	 * Walk down to the new root block.
662 	 * We may stop early, but something is better than nothing.
663 	 */
664 	oe = *e;
665 
666 	ob = nil;
667 	b = rb;
668 /* BUG: explain type++.  i think it is a real bug */
669 	for(d=e->depth; d > depth; d--, type++){
670 		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
671 		if(nb == nil)
672 			break;
673 		if(ob!=nil && ob!=rb)
674 			blockPut(ob);
675 		ob = b;
676 		b = nb;
677 	}
678 
679 	if(b == rb){
680 		blockPut(rb);
681 		return 0;
682 	}
683 
684 	/*
685 	 * Right now, e points at the root block rb, b is the new root block,
686 	 * and ob points at b.  To update:
687 	 *
688 	 *	(i) change e to point at b
689 	 *	(ii) zero the pointer ob -> b
690 	 *	(iii) free the root block
691 	 *
692 	 * p (the block containing e) must be written before
693 	 * anything else.
694  	 */
695 
696 	/* (i) */
697 	e->depth = d;
698 	memmove(e->score, b->score, VtScoreSize);
699 	entryPack(e, p->data, r->offset % r->epb);
700 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
701 	blockDirty(p);
702 
703 	/* (ii) */
704 	memmove(ob->data, vtZeroScore, VtScoreSize);
705 	blockDependency(ob, p, 0, b->score, nil);
706 	blockDirty(ob);
707 
708 	/* (iii) */
709 	if(rb->addr != NilBlock)
710 		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag);
711 
712 	blockPut(rb);
713 	if(ob!=nil && ob!=rb)
714 		blockPut(ob);
715 	blockPut(b);
716 
717 	return d == depth;
718 }
719 
720 /*
721  * Normally we return the block at the given number.
722  * If early is set, we stop earlier in the tree.  Setting early
723  * to 1 gives us the block that contains the pointer to bn.
724  */
725 Block *
726 _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
727 {
728 	Block *b, *bb;
729 	int index[VtPointerDepth+1];
730 	Entry e;
731 	int i, np;
732 	int m;
733 
734 	assert(sourceIsLocked(r));
735 	assert(bn != NilBlock);
736 
737 	/* mode for intermediate block */
738 	m = mode;
739 	if(m == OOverWrite)
740 		m = OReadWrite;
741 
742 	b = sourceLoad(r, &e);
743 	if(b == nil)
744 		return nil;
745 	if(r->mode == OReadOnly && (e.flags & VtEntryNoArchive)){
746 		blockPut(b);
747 		vtSetError(ENotArchived);
748 		return nil;
749 	}
750 
751 	if(tag){
752 		if(e.tag == 0)
753 			e.tag = tag;
754 		else if(e.tag != tag){
755 			fprint(2, "tag mismatch\n");
756 			vtSetError("tag mismatch");
757 			goto Err;
758 		}
759 	}
760 
761 	np = e.psize/VtScoreSize;
762 	memset(index, 0, sizeof(index));
763 	for(i=0; bn > 0; i++){
764 		if(i >= VtPointerDepth){
765 			vtSetError(EBadAddr);
766 			goto Err;
767 		}
768 		index[i] = bn % np;
769 		bn /= np;
770 	}
771 
772 	if(i > e.depth){
773 		if(mode == OReadOnly){
774 			vtSetError(EBadAddr);
775 			goto Err;
776 		}
777 		if(!sourceGrowDepth(r, b, &e, i))
778 			goto Err;
779 	}
780 
781 	index[e.depth] = r->offset % r->epb;
782 
783 	for(i=e.depth; i>=early; i--){
784 		bb = blockWalk(b, index[i], m, r->fs, &e);
785 		if(bb == nil)
786 			goto Err;
787 		blockPut(b);
788 		b = bb;
789 	}
790 	b->pc = getcallerpc(&r);
791 	return b;
792 Err:
793 	blockPut(b);
794 	return nil;
795 }
796 
797 Block*
798 sourceBlock(Source *r, ulong bn, int mode)
799 {
800 	Block *b;
801 
802 	b = _sourceBlock(r, bn, mode, 0, 0);
803 	if(b)
804 		b->pc = getcallerpc(&r);
805 	return b;
806 }
807 
808 void
809 sourceClose(Source *r)
810 {
811 	if(r == nil)
812 		return;
813 	vtLock(r->lk);
814 	r->ref--;
815 	if(r->ref){
816 		vtUnlock(r->lk);
817 		return;
818 	}
819 	assert(r->ref == 0);
820 	vtUnlock(r->lk);
821 	if(r->parent)
822 		sourceClose(r->parent);
823 	vtLockFree(r->lk);
824 	memset(r, ~0, sizeof(*r));
825 	vtMemFree(r);
826 }
827 
828 /*
829  * Retrieve the block containing the entry for r.
830  * If a snapshot has happened, we might need
831  * to get a new copy of the block.  We avoid this
832  * in the common case by caching the score for
833  * the block and the last epoch in which it was valid.
834  *
835  * We use r->mode to tell the difference between active
836  * file system sources (OReadWrite) and sources for the
837  * snapshot file system (OReadOnly).
838  */
839 static Block*
840 sourceLoadBlock(Source *r, int mode)
841 {
842 	u32int addr;
843 	Block *b;
844 
845 	switch(r->mode){
846 	default:
847 		assert(0);
848 	case OReadWrite:
849 		assert(r->mode == OReadWrite);
850 		/*
851 		 * This needn't be true -- we might bump the low epoch
852 		 * to reclaim some old blocks, but since this score is
853 		 * OReadWrite, the blocks must all still be open, so none
854 		 * are reclaimed.  Thus it's okay that the epoch is so low.
855 		 * Proceed.
856 		assert(r->epoch >= r->fs->elo);
857 		 */
858 		if(r->epoch == r->fs->ehi){
859 			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
860 			if(b == nil)
861 				return nil;
862 			assert(r->epoch == b->l.epoch);
863 			return b;
864 		}
865 		assert(r->parent != nil);
866 		if(!sourceLock(r->parent, OReadWrite))
867 			return nil;
868 		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
869 		sourceUnlock(r->parent);
870 		if(b == nil)
871 			return nil;
872 		assert(b->l.epoch == r->fs->ehi);
873 		memmove(r->score, b->score, VtScoreSize);
874 		r->scoreEpoch = b->l.epoch;
875 		r->tag = b->l.tag;
876 		r->epoch = r->fs->ehi;
877 		return b;
878 
879 	case OReadOnly:
880 		addr = globalToLocal(r->score);
881 		if(addr == NilBlock)
882 			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
883 
884 		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
885 		if(b)
886 			return b;
887 
888 		/*
889 		 * If it failed because the epochs don't match, the block has been
890 		 * archived and reclaimed.  Rewalk from the parent and get the
891 		 * new pointer.  This can't happen in the OReadWrite case
892 		 * above because blocks in the current epoch don't get
893 		 * reclaimed.  The fact that we're OReadOnly means we're
894 		 * a snapshot.  (Or else the file system is read-only, but then
895 		 * the archiver isn't going around deleting blocks.)
896 		 */
897 		if(strcmp(vtGetError(), ELabelMismatch) == 0){
898 			if(!sourceLock(r->parent, OReadOnly))
899 				return nil;
900 			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
901 			sourceUnlock(r->parent);
902 			if(b){
903 				fprint(2, "sourceAlloc: lost %V found %V\n",
904 					r->score, b->score);
905 				memmove(r->score, b->score, VtScoreSize);
906 				r->scoreEpoch = b->l.epoch;
907 				return b;
908 			}
909 		}
910 		return nil;
911 	}
912 }
913 
914 int
915 sourceLock(Source *r, int mode)
916 {
917 	Block *b;
918 
919 	if(mode == -1)
920 		mode = r->mode;
921 
922 	b = sourceLoadBlock(r, mode);
923 	if(b == nil)
924 		return 0;
925 	/*
926 	 * The fact that we are holding b serves as the
927 	 * lock entitling us to write to r->b.
928 	 */
929 	assert(r->b == nil);
930 	r->b = b;
931 	if(r->mode == OReadWrite)
932 		assert(r->epoch == r->b->l.epoch);
933 	return 1;
934 }
935 
936 /*
937  * Lock two (usually sibling) sources.  This needs special care
938  * because the Entries for both sources might be in the same block.
939  * We also try to lock blocks in left-to-right order within the tree.
940  */
941 int
942 sourceLock2(Source *r, Source *rr, int mode)
943 {
944 	Block *b, *bb;
945 
946 	if(rr == nil)
947 		return sourceLock(r, mode);
948 
949 	if(mode == -1)
950 		mode = r->mode;
951 
952 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
953 		b = sourceLoadBlock(r, mode);
954 		if(b == nil)
955 			return 0;
956 		blockDupLock(b);
957 		bb = b;
958 	}else if(r->parent==rr->parent || r->offset > rr->offset){
959 		bb = sourceLoadBlock(rr, mode);
960 		b = sourceLoadBlock(r, mode);
961 	}else{
962 		b = sourceLoadBlock(r, mode);
963 		bb = sourceLoadBlock(rr, mode);
964 	}
965 	if(b == nil || bb == nil){
966 		if(b)
967 			blockPut(b);
968 		if(bb)
969 			blockPut(bb);
970 		return 0;
971 	}
972 
973 	/*
974 	 * The fact that we are holding b and bb serves
975 	 * as the lock entitling us to write to r->b and rr->b.
976 	 */
977 	r->b = b;
978 	rr->b = bb;
979 	return 1;
980 }
981 
982 void
983 sourceUnlock(Source *r)
984 {
985 	Block *b;
986 
987 	if(r->b == nil){
988 		fprint(2, "sourceUnlock: already unlocked\n");
989 		abort();
990 	}
991 	b = r->b;
992 	r->b = nil;
993 	blockPut(b);
994 }
995 
996 static Block*
997 sourceLoad(Source *r, Entry *e)
998 {
999 	Block *b;
1000 
1001 	assert(sourceIsLocked(r));
1002 	b = r->b;
1003 	if(!entryUnpack(e, b->data, r->offset % r->epb))
1004 		return nil;
1005 	if(e->gen != r->gen){
1006 		vtSetError(ERemoved);
1007 		return nil;
1008 	}
1009 	blockDupLock(b);
1010 	return b;
1011 }
1012 
1013 static int
1014 sizeToDepth(uvlong s, int psize, int dsize)
1015 {
1016 	int np;
1017 	int d;
1018 
1019 	/* determine pointer depth */
1020 	np = psize/VtScoreSize;
1021 	s = (s + dsize - 1)/dsize;
1022 	for(d = 0; s > 1; d++)
1023 		s = (s + np - 1)/np;
1024 	return d;
1025 }
1026 
1027 static u32int
1028 tagGen(void)
1029 {
1030 	u32int tag;
1031 
1032 	for(;;){
1033 		tag = lrand();
1034 		if(tag >= UserTag)
1035 			break;
1036 	}
1037 	return tag;
1038 }
1039