xref: /plan9-contrib/sys/src/cmd/fossil/source.c (revision a6a9e07217f318acf170f99684a55fba5200524f)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static int	sizeToDepth(uvlong s, int psize, int dsize);
7 static u32int 	tagGen(void);
8 static Block 	*sourceLoad(Source *r, Entry *e);
9 static Block	*sourceLoadUnlocked(Source *r, Entry *e);
10 static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
11 static int	sourceShrinkSize(Source*, Entry*, uvlong);
12 static int	sourceGrowDepth(Source*, Block*, Entry*, int);
13 
14 #define sourceIsLocked(r)	((r)->b != nil)
15 
16 static Source *
17 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode)
18 {
19 	Source *r;
20 	int epb;
21 	u32int epoch;
22 	Entry e;
23 
24 	assert(p==nil || sourceIsLocked(p));
25 
26 	if(p == nil){
27 		assert(offset == 0);
28 		epb = 1;
29 	}else
30 		epb = p->dsize / VtEntrySize;
31 
32 	if(b->l.type != BtDir)
33 		goto Bad;
34 
35 	/*
36 	 * a non-active entry is the only thing that
37 	 * can legitimately happen here. all the others
38 	 * get prints.
39 	 */
40 	if(!entryUnpack(&e, b->data, offset % epb)){
41 		fprint(2, "entryUnpack failed\n");
42 		goto Bad;
43 	}
44 	if(!(e.flags & VtEntryActive)){
45 		if(0)fprint(2, "not active\n");
46 		goto Bad;
47 	}
48 	if(e.psize < 256 || e.dsize < 256){
49 		fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
50 		goto Bad;
51 	}
52 
53 	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
54 		fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", e.depth, e.size, e.psize, e.dsize);
55 		goto Bad;
56 	}
57 
58 	if((e.flags & VtEntryLocal) && e.tag == 0){
59 		fprint(2, "flags %#ux tag %#ux\n", e.flags, e.tag);
60 		goto Bad;
61 	}
62 
63 	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
64 		fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, fs->blockSize);
65 		goto Bad;
66 	}
67 
68 	epoch = b->l.epoch;
69 	if(mode == OReadWrite){
70 		if(e.snap != 0){
71 			vtSetError(ESnapRO);
72 			return nil;
73 		}
74 	}else if(e.snap != 0){
75 		if(e.snap < fs->elo){
76 			vtSetError(ESnapOld);
77 			return nil;
78 		}
79 		if(e.snap >= fs->ehi)
80 			goto Bad;
81 		epoch = e.snap;
82 	}
83 
84 	r = vtMemAllocZ(sizeof(Source));
85 	r->fs = fs;
86 	r->mode = mode;
87 	r->dsize = e.dsize;
88 	r->gen = e.gen;
89 	r->dir = (e.flags & VtEntryDir) != 0;
90 	r->lk = vtLockAlloc();
91 	r->ref = 1;
92 	r->parent = p;
93 	if(p){
94 		vtLock(p->lk);
95 		assert(mode == OReadOnly || p->mode == OReadWrite);
96 		p->ref++;
97 		vtUnlock(p->lk);
98 	}
99 	r->epoch = epoch;
100 //fprint(2, "sourceAlloc have %V be.%d fse.%d %s\n", b->score, b->l.epoch, r->fs->ehi, mode==OReadWrite ? "rw" : "ro");
101 	memmove(r->score, b->score, VtScoreSize);
102 	r->scoreEpoch = b->l.epoch;
103 	r->offset = offset;
104 	r->epb = epb;
105 	r->tag = b->l.tag;
106 
107 //fprint(2, "sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
108 
109 	return r;
110 Bad:
111 	vtSetError(EBadEntry);
112 	return nil;
113 
114 }
115 
116 Source *
117 sourceRoot(Fs *fs, u32int addr, int mode)
118 {
119 	Source *r;
120 	Block *b;
121 
122 	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
123 	if(b == nil)
124 		return nil;
125 
126 	if(mode == OReadWrite)
127 	if((b->l.state&BsClosed) || b->l.epoch != fs->ehi){
128 		fprint(2, "sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l);
129 		blockPut(b);
130 		vtSetError(EBadRoot);
131 		return nil;
132 	}
133 
134 	r = sourceAlloc(fs, b, nil, 0, mode);
135 	blockPut(b);
136 	return r;
137 }
138 
139 Source *
140 sourceOpen(Source *r, ulong offset, int mode)
141 {
142 	ulong bn;
143 	Block *b;
144 
145 	assert(sourceIsLocked(r));
146 	if(r->mode == OReadWrite)
147 		assert(r->epoch == r->b->l.epoch);
148 	if(!r->dir){
149 		vtSetError(ENotDir);
150 		return nil;
151 	}
152 
153 	bn = offset/(r->dsize/VtEntrySize);
154 
155 	b = sourceBlock(r, bn, mode);
156 	if(b == nil)
157 		return nil;
158 	r = sourceAlloc(r->fs, b, r, offset, mode);
159 	blockPut(b);
160 	return r;
161 }
162 
163 Source *
164 sourceCreate(Source *r, int dsize, int dir, u32int offset)
165 {
166 	int i;
167 	Block *b;
168 	u32int bn, size;
169 	Entry e;
170 	int epb;
171 	int psize;
172 	Source *rr;
173 
174 	assert(sourceIsLocked(r));
175 
176 	if(!r->dir){
177 		vtSetError(ENotDir);
178 		return nil;
179 	}
180 
181 	epb = r->dsize/VtEntrySize;
182 	psize = (dsize/VtScoreSize)*VtScoreSize;
183 
184 	size = sourceGetDirSize(r);
185 	if(offset == 0){
186 		/*
187 		 * look at a random block to see if we can find an empty entry
188 		 */
189 		offset = lnrand(size+1);
190 		offset -= offset % epb;
191 	}
192 
193 	/* try the given block and then try the last block */
194 	for(;;){
195 		bn = offset/epb;
196 		b = sourceBlock(r, bn, OReadWrite);
197 		if(b == nil)
198 			return nil;
199 		for(i=offset%r->epb; i<epb; i++){
200 			entryUnpack(&e, b->data, i);
201 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
202 				goto Found;
203 		}
204 		blockPut(b);
205 		if(offset == size){
206 			fprint(2, "sourceCreate: cannot happen\n");
207 			vtSetError("sourceCreate: cannot happen");
208 			return nil;
209 		}
210 		offset = size;
211 	}
212 
213 Found:
214 	/* found an entry - gen already set */
215 	e.psize = psize;
216 	e.dsize = dsize;
217 	e.flags = VtEntryActive;
218 	if(dir)
219 		e.flags |= VtEntryDir;
220 	e.depth = 0;
221 	e.size = 0;
222 	memmove(e.score, vtZeroScore, VtScoreSize);
223 	e.tag = 0;
224 	e.snap = 0;
225 	e.archive = 0;
226 	entryPack(&e, b->data, i);
227 	blockDirty(b);
228 
229 	offset = bn*epb + i;
230 	if(offset+1 > size){
231 		if(!sourceSetDirSize(r, offset+1)){
232 			blockPut(b);
233 			return nil;
234 		}
235 	}
236 
237 	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite);
238 	blockPut(b);
239 	return rr;
240 }
241 
242 static int
243 sourceKill(Source *r, int doremove)
244 {
245 	Entry e;
246 	Block *b;
247 	u32int addr;
248 	u32int tag;
249 	int type;
250 
251 	assert(sourceIsLocked(r));
252 	b = sourceLoad(r, &e);
253 	if(b == nil)
254 		return 0;
255 
256 	assert(b->l.epoch == r->fs->ehi);
257 
258 	if(doremove==0 && e.size == 0){
259 		/* already truncated */
260 		blockPut(b);
261 		return 1;
262 	}
263 
264 	/* remember info on link we are removing */
265 	addr = globalToLocal(e.score);
266 	type = entryType(&e);
267 	tag = e.tag;
268 
269 	if(doremove){
270 		if(e.gen != ~0)
271 			e.gen++;
272 		e.dsize = 0;
273 		e.psize = 0;
274 		e.flags = 0;
275 	}else{
276 		e.flags &= ~VtEntryLocal;
277 	}
278 	e.depth = 0;
279 	e.size = 0;
280 	e.tag = 0;
281 	memmove(e.score, vtZeroScore, VtScoreSize);
282 	entryPack(&e, b->data, r->offset % r->epb);
283 	blockDirty(b);
284 	if(addr != NilBlock)
285 		blockRemoveLink(b, addr, type, tag);
286 	blockPut(b);
287 
288 	if(doremove){
289 		sourceUnlock(r);
290 		sourceClose(r);
291 	}
292 
293 	return 1;
294 }
295 
296 int
297 sourceRemove(Source *r)
298 {
299 	return sourceKill(r, 1);
300 }
301 
302 int
303 sourceTruncate(Source *r)
304 {
305 	return sourceKill(r, 0);
306 }
307 
308 uvlong
309 sourceGetSize(Source *r)
310 {
311 	Entry e;
312 	Block *b;
313 
314 	assert(sourceIsLocked(r));
315 	b = sourceLoad(r, &e);
316 	if(b == nil)
317 		return 0;
318 	blockPut(b);
319 
320 	return e.size;
321 }
322 
323 static int
324 sourceShrinkSize(Source *r, Entry *e, uvlong size)
325 {
326 	int i, type, ppb;
327 	uvlong ptrsz;
328 	u32int addr;
329 	uchar score[VtScoreSize];
330 	Block *b;
331 
332 	type = entryType(e);
333 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
334 	if(b == nil)
335 		return 0;
336 
337 	ptrsz = e->dsize;
338 	ppb = e->psize/VtScoreSize;
339 	for(i=0; i+1<e->depth; i++)
340 		ptrsz *= ppb;
341 
342 	while(type&BtLevelMask){
343 		if(b->addr == NilBlock
344 		|| (b->l.state&BsClosed)
345 		|| b->l.epoch != r->fs->ehi){
346 			/* not worth copying the block just so we can zero some of it */
347 			blockPut(b);
348 			return 0;
349 		}
350 
351 		/*
352 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
353 		 */
354 
355 		/* zero the pointers to unnecessary blocks */
356 		i = (size+ptrsz-1)/ptrsz;
357 		for(; i<ppb; i++){
358 			addr = globalToLocal(b->data+i*VtScoreSize);
359 			memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
360 			blockDirty(b);
361 			if(addr != NilBlock)
362 				blockRemoveLink(b, addr, type-1, e->tag);
363 		}
364 
365 		/* recurse (go around again) on the partially necessary block */
366 		i = size/ptrsz;
367 		size = size%ptrsz;
368 		if(size == 0){
369 			blockPut(b);
370 			return 1;
371 		}
372 		ptrsz /= ppb;
373 		type--;
374 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
375 		blockPut(b);
376 		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
377 		if(b == nil)
378 			return 0;
379 	}
380 
381 	if(b->addr == NilBlock
382 	|| (b->l.state&BsClosed)
383 	|| b->l.epoch != r->fs->ehi){
384 		blockPut(b);
385 		return 0;
386 	}
387 
388 	/*
389 	 * No one ever truncates BtDir blocks.
390 	 */
391 	if(type == BtData && e->dsize > size){
392 		memset(b->data+size, 0, e->dsize-size);
393 		blockDirty(b);
394 	}
395 	blockPut(b);
396 	return 1;
397 }
398 
399 int
400 sourceSetSize(Source *r, uvlong size)
401 {
402 	int depth;
403 	Entry e;
404 	Block *b;
405 
406 	assert(sourceIsLocked(r));
407 	if(size == 0)
408 		return sourceTruncate(r);
409 
410 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
411 		vtSetError(ETooBig);
412 		return 0;
413 	}
414 
415 	b = sourceLoad(r, &e);
416 	if(b == nil)
417 		return 0;
418 
419 	/* quick out */
420 	if(e.size == size){
421 		blockPut(b);
422 		return 1;
423 	}
424 
425 	depth = sizeToDepth(size, e.psize, e.dsize);
426 
427 	if(depth < e.depth){
428 		if(!sourceShrinkDepth(r, b, &e, depth)){
429 			blockPut(b);
430 			return 0;
431 		}
432 	}else if(depth > e.depth){
433 		if(!sourceGrowDepth(r, b, &e, depth)){
434 			blockPut(b);
435 			return 0;
436 		}
437 	}
438 
439 	if(size < e.size)
440 		sourceShrinkSize(r, &e, size);
441 
442 	e.size = size;
443 	entryPack(&e, b->data, r->offset % r->epb);
444 	blockDirty(b);
445 	blockPut(b);
446 
447 	return 1;
448 }
449 
450 int
451 sourceSetDirSize(Source *r, ulong ds)
452 {
453 	uvlong size;
454 	int epb;
455 
456 	assert(sourceIsLocked(r));
457 	epb = r->dsize/VtEntrySize;
458 
459 	size = (uvlong)r->dsize*(ds/epb);
460 	size += VtEntrySize*(ds%epb);
461 	return sourceSetSize(r, size);
462 }
463 
464 ulong
465 sourceGetDirSize(Source *r)
466 {
467 	ulong ds;
468 	uvlong size;
469 	int epb;
470 
471 	assert(sourceIsLocked(r));
472 	epb = r->dsize/VtEntrySize;
473 
474 	size = sourceGetSize(r);
475 	ds = epb*(size/r->dsize);
476 	ds += (size%r->dsize)/VtEntrySize;
477 	return ds;
478 }
479 
480 int
481 sourceGetEntry(Source *r, Entry *e)
482 {
483 	Block *b;
484 
485 	assert(sourceIsLocked(r));
486 	b = sourceLoad(r, e);
487 	if(b == nil)
488 		return 0;
489 	blockPut(b);
490 
491 	return 1;
492 }
493 
494 static Block *
495 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
496 {
497 	Block *b;
498 	Cache *c;
499 	u32int addr;
500 	int type;
501 	uchar oscore[VtScoreSize];
502 	Entry oe;
503 
504 	c = fs->cache;
505 
506 	if((p->l.type & BtLevelMask) == 0){
507 		assert(p->l.type == BtDir);
508 		type = entryType(e);
509 		b = cacheGlobal(c, e->score, type, e->tag, mode);
510 	}else{
511 		type = p->l.type - 1;
512 		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
513 	}
514 
515 	if(b == nil || mode == OReadOnly)
516 		return b;
517 
518 	assert(!(p->l.state&BsClosed) && p->l.epoch == fs->ehi);
519 	if(!(b->l.state&BsClosed) && b->l.epoch == fs->ehi)
520 		return b;
521 
522 	oe = *e;
523 
524 	/*
525 	 * Copy on write.
526 	 */
527 	if(e->tag == 0){
528 		assert(p->l.type == BtDir);
529 		e->tag = tagGen();
530 		e->flags |= VtEntryLocal;
531 	}
532 
533 	addr = b->addr;
534 	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
535 	if(b == nil)
536 		return nil;
537 
538 	assert(b->l.epoch == fs->ehi);
539 
540 	blockDirty(b);
541 	if(p->l.type == BtDir){
542 		memmove(e->score, b->score, VtScoreSize);
543 		entryPack(e, p->data, index);
544 		blockDependency(p, b, index, nil, &oe);
545 	}else{
546 		memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
547 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
548 		blockDependency(p, b, index, oscore, nil);
549 	}
550 	blockDirty(p);
551 
552 	if(addr != NilBlock)
553 		blockRemoveLink(p, addr, type, e->tag);
554 
555 	return b;
556 }
557 
558 /*
559  * Change the depth of the source r.
560  * The entry e for r is contained in block p.
561  */
562 static int
563 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
564 {
565 	Block *b, *bb;
566 	u32int tag;
567 	int type;
568 	Entry oe;
569 
570 	assert(sourceIsLocked(r));
571 	assert(depth <= VtPointerDepth);
572 
573 	type = entryType(e);
574 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
575 	if(b == nil)
576 		return 0;
577 
578 	tag = e->tag;
579 	if(tag == 0)
580 		tag = tagGen();
581 
582 	oe = *e;
583 
584 	/*
585 	 * Keep adding layers until we get to the right depth
586 	 * or an error occurs.
587 	 */
588 	while(e->depth < depth){
589 		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
590 		if(bb == nil)
591 			break;
592 		memmove(bb->data, b->score, VtScoreSize);
593 		memmove(e->score, bb->score, VtScoreSize);
594 		e->depth++;
595 		type++;
596 		e->tag = tag;
597 		e->flags |= VtEntryLocal;
598 		blockDependency(bb, b, 0, vtZeroScore, nil);
599 		blockPut(b);
600 		blockDirty(bb);
601 		b = bb;
602 	}
603 
604 	entryPack(e, p->data, r->offset % r->epb);
605 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
606 	blockPut(b);
607 	blockDirty(p);
608 
609 	return e->depth == depth;
610 }
611 
612 static int
613 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
614 {
615 	Block *b, *nb, *ob, *rb;
616 	u32int tag;
617 	int type, d;
618 	Entry oe;
619 
620 	assert(sourceIsLocked(r));
621 	assert(depth <= VtPointerDepth);
622 
623 	type = entryType(e);
624 	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
625 	if(rb == nil)
626 		return 0;
627 
628 	tag = e->tag;
629 	if(tag == 0)
630 		tag = tagGen();
631 
632 	/*
633 	 * Walk down to the new root block.
634 	 * We may stop early, but something is better than nothing.
635 	 */
636 	oe = *e;
637 
638 	ob = nil;
639 	b = rb;
640 	for(d=e->depth; d > depth; d--, type++){
641 		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
642 		if(nb == nil)
643 			break;
644 		if(ob!=nil && ob!=rb)
645 			blockPut(ob);
646 		ob = b;
647 		b = nb;
648 	}
649 
650 	if(b == rb){
651 		blockPut(rb);
652 		return 0;
653 	}
654 
655 	/*
656 	 * Right now, e points at the root block rb, b is the new root block,
657 	 * and ob points at b.  To update:
658 	 *
659 	 *	(i) change e to point at b
660 	 *	(ii) zero the pointer ob -> b
661 	 *	(iii) free the root block
662 	 *
663 	 * p (the block containing e) must be written before
664 	 * anything else.
665  	 */
666 
667 	/* (i) */
668 	e->depth = d;
669 	memmove(e->score, b->score, VtScoreSize);
670 	entryPack(e, p->data, r->offset % r->epb);
671 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
672 	blockDirty(p);
673 
674 	/* (ii) */
675 	memmove(ob->data, vtZeroScore, VtScoreSize);
676 	blockDependency(ob, p, 0, b->score, nil);
677 	blockDirty(ob);
678 
679 	/* (iii) */
680 	if(rb->addr != NilBlock)
681 		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag);
682 
683 	blockPut(rb);
684 	if(ob!=nil && ob!=rb)
685 		blockPut(ob);
686 	blockPut(b);
687 
688 	return d == depth;
689 }
690 
691 Block *
692 sourceBlock(Source *r, ulong bn, int mode)
693 {
694 	Block *b, *bb;
695 	int index[VtPointerDepth+1];
696 	Entry e;
697 	int i, np;
698 	int m;
699 
700 	assert(sourceIsLocked(r));
701 	assert(bn != NilBlock);
702 
703 	/* mode for intermediate block */
704 	m = mode;
705 	if(m == OOverWrite)
706 		m = OReadWrite;
707 
708 	b = sourceLoad(r, &e);
709 	if(b == nil)
710 		return nil;
711 
712 	np = e.psize/VtScoreSize;
713 	memset(index, 0, sizeof(index));
714 	for(i=0; bn > 0; i++){
715 		if(i >= VtPointerDepth){
716 			vtSetError(EBadAddr);
717 			goto Err;
718 		}
719 		index[i] = bn % np;
720 		bn /= np;
721 	}
722 
723 	if(i > e.depth){
724 		if(mode == OReadOnly){
725 			vtSetError(EBadAddr);
726 			goto Err;
727 		}
728 		if(!sourceGrowDepth(r, b, &e, i))
729 			goto Err;
730 	}
731 
732 	index[e.depth] = r->offset % r->epb;
733 
734 	for(i=e.depth; i>=0; i--){
735 		bb = blockWalk(b, index[i], m, r->fs, &e);
736 		if(bb == nil)
737 			goto Err;
738 		blockPut(b);
739 		b = bb;
740 	}
741 	return b;
742 Err:
743 	blockPut(b);
744 	return nil;
745 }
746 
747 void
748 sourceClose(Source *r)
749 {
750 	if(r == nil)
751 		return;
752 	vtLock(r->lk);
753 	r->ref--;
754 	if(r->ref){
755 		vtUnlock(r->lk);
756 		return;
757 	}
758 	assert(r->ref == 0);
759 	vtUnlock(r->lk);
760 	if(r->parent)
761 		sourceClose(r->parent);
762 	vtLockFree(r->lk);
763 	memset(r, ~0, sizeof(*r));
764 	vtMemFree(r);
765 }
766 
767 /*
768  * Retrieve the block containing the entry for r.
769  * If a snapshot has happened, we might need
770  * to get a new copy of the block.  We avoid this
771  * in the common case by caching the score for
772  * the block and the last epoch in which it was valid.
773  *
774  * We use r->mode to tell the difference between active
775  * file system sources (OReadWrite) and sources for the
776  * snapshot file system (OReadOnly).
777  */
778 static Block*
779 sourceLoadBlock(Source *r, int mode)
780 {
781 	u32int addr;
782 	Block *b;
783 
784 	switch(r->mode){
785 	default:
786 		assert(0);
787 	case OReadWrite:
788 		assert(r->mode == OReadWrite);
789 		/*
790 		 * This needn't be true -- we might bump the low epoch
791 		 * to reclaim some old blocks, but since this score is
792 		 * OReadWrite, the blocks must all still be open, so none
793 		 * are reclaimed.  Thus it's okay that the epoch is so low.
794 		 * Proceed.
795 		assert(r->epoch >= r->fs->elo);
796 		 */
797 		if(r->epoch == r->fs->ehi){
798 			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
799 			assert(r->epoch == b->l.epoch);
800 			if(b == nil)
801 				return nil;
802 			return b;
803 		}
804 		assert(r->parent != nil);
805 		if(!sourceLock(r->parent, OReadWrite))
806 			return nil;
807 		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
808 		sourceUnlock(r->parent);
809 		if(b == nil)
810 			return nil;
811 		assert(b->l.epoch == r->fs->ehi);
812 		memmove(r->score, b->score, VtScoreSize);
813 		r->scoreEpoch = b->l.epoch;
814 		r->tag = b->l.tag;
815 		r->epoch = r->fs->ehi;
816 		return b;
817 
818 	case OReadOnly:
819 		addr = globalToLocal(r->score);
820 		if(addr == NilBlock)
821 			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
822 
823 		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
824 		if(b)
825 			return b;
826 
827 		/*
828 		 * If it failed because the epochs don't match, the block has been
829 		 * archived and reclaimed.  Rewalk from the parent and get the
830 		 * new pointer.  This can't happen in the OReadWrite case
831 		 * above because blocks in the current epoch don't get
832 		 * reclaimed.  The fact that we're OReadOnly means we're
833 		 * a snapshot.  (Or else the file system is read-only, but then
834 		 * the archiver isn't going around deleting blocks.)
835 		 */
836 		if(strcmp(vtGetError(), ELabelMismatch) == 0){
837 			if(!sourceLock(r->parent, OReadOnly))
838 				return nil;
839 			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
840 			sourceUnlock(r->parent);
841 			if(b){
842 				fprint(2, "sourceAlloc: lost %V found %V\n",
843 					r->score, b->score);
844 				memmove(r->score, b->score, VtScoreSize);
845 				r->scoreEpoch = b->l.epoch;
846 				return b;
847 			}
848 		}
849 		return nil;
850 	}
851 }
852 
853 int
854 sourceLock(Source *r, int mode)
855 {
856 	Block *b;
857 
858 	if(mode == -1)
859 		mode = r->mode;
860 
861 	b = sourceLoadBlock(r, mode);
862 	if(b == nil)
863 		return 0;
864 	/*
865 	 * The fact that we are holding b serves as the
866 	 * lock entitling us to write to r->b.
867 	 */
868 	assert(r->b == nil);
869 	r->b = b;
870 	if(r->mode == OReadWrite)
871 		assert(r->epoch == r->b->l.epoch);
872 	return 1;
873 }
874 
875 /*
876  * Lock two (usually sibling) sources.  This needs special care
877  * because the Entries for both sources might be in the same block.
878  * We also try to lock blocks in left-to-right order within the tree.
879  */
880 int
881 sourceLock2(Source *r, Source *rr, int mode)
882 {
883 	Block *b, *bb;
884 
885 	if(rr == nil)
886 		return sourceLock(r, mode);
887 
888 	if(mode == -1)
889 		mode = r->mode;
890 
891 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
892 		b = sourceLoadBlock(r, mode);
893 		if(b == nil)
894 			return 0;
895 		blockDupLock(b);
896 		bb = b;
897 	}else if(r->parent==rr->parent || r->offset > rr->offset){
898 		bb = sourceLoadBlock(rr, mode);
899 		b = sourceLoadBlock(r, mode);
900 	}else{
901 		b = sourceLoadBlock(r, mode);
902 		bb = sourceLoadBlock(rr, mode);
903 	}
904 	if(b == nil || bb == nil){
905 		if(b)
906 			blockPut(b);
907 		if(bb)
908 			blockPut(bb);
909 		return 0;
910 	}
911 
912 	/*
913 	 * The fact that we are holding b and bb serves
914 	 * as the lock entitling us to write to r->b and rr->b.
915 	 */
916 	r->b = b;
917 	rr->b = bb;
918 	return 1;
919 }
920 
921 void
922 sourceUnlock(Source *r)
923 {
924 	Block *b;
925 
926 	if(r->b == nil){
927 		fprint(2, "sourceUnlock: already unlocked\n");
928 		abort();
929 	}
930 	b = r->b;
931 	r->b = nil;
932 	blockPut(b);
933 }
934 
935 static Block*
936 sourceLoad(Source *r, Entry *e)
937 {
938 	Block *b;
939 
940 	assert(sourceIsLocked(r));
941 	b = r->b;
942 	if(!entryUnpack(e, b->data, r->offset % r->epb))
943 		return nil;
944 	if(e->gen != r->gen){
945 		vtSetError(ERemoved);
946 		return nil;
947 	}
948 	blockDupLock(b);
949 	return b;
950 }
951 
952 static int
953 sizeToDepth(uvlong s, int psize, int dsize)
954 {
955 	int np;
956 	int d;
957 
958 	/* determine pointer depth */
959 	np = psize/VtScoreSize;
960 	s = (s + dsize - 1)/dsize;
961 	for(d = 0; s > 1; d++)
962 		s = (s + np - 1)/np;
963 	return d;
964 }
965 
966 static u32int
967 tagGen(void)
968 {
969 	u32int tag;
970 
971 	for(;;){
972 		tag = lrand();
973 		if(tag >= UserTag)
974 			break;
975 	}
976 	return tag;
977 }
978