xref: /plan9/sys/src/cmd/fossil/source.c (revision dc5a79c1208f0704eeb474acc990728f8b4854f5)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static int	sizeToDepth(uvlong s, int psize, int dsize);
7 static u32int 	tagGen(void);
8 static Block 	*sourceLoad(Source *r, Entry *e);
9 static Block	*sourceLoadUnlocked(Source *r, Entry *e);
10 static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
11 static int	sourceShrinkSize(Source*, Entry*, uvlong);
12 static int	sourceGrowDepth(Source*, Block*, Entry*, int);
13 
14 #define sourceIsLocked(r)	((r)->b != nil)
15 
16 static Source *
17 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode)
18 {
19 	Source *r;
20 	int epb;
21 	u32int epoch;
22 	Entry e;
23 
24 	assert(p==nil || sourceIsLocked(p));
25 
26 	if(p == nil){
27 		assert(offset == 0);
28 		epb = 1;
29 	}else
30 		epb = p->dsize / VtEntrySize;
31 
32 	if(b->l.type != BtDir)
33 		goto Bad;
34 
35 	/*
36 	 * a non-active entry is the only thing that
37 	 * can legitimately happen here. all the others
38 	 * get prints.
39 	 */
40 	if(!entryUnpack(&e, b->data, offset % epb)){
41 		fprint(2, "entryUnpack failed\n");
42 		goto Bad;
43 	}
44 	if(!(e.flags & VtEntryActive)){
45 		if(0)fprint(2, "not active\n");
46 		goto Bad;
47 	}
48 	if(e.psize < 256 || e.dsize < 256){
49 		fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
50 		goto Bad;
51 	}
52 
53 	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
54 		fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", e.depth, e.size, e.psize, e.dsize);
55 		goto Bad;
56 	}
57 
58 	if((e.flags & VtEntryLocal) && e.tag == 0){
59 		fprint(2, "flags %#ux tag %#ux\n", e.flags, e.tag);
60 		goto Bad;
61 	}
62 
63 	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
64 		fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, fs->blockSize);
65 		goto Bad;
66 	}
67 
68 	epoch = b->l.epoch;
69 	if(mode == OReadWrite){
70 		if(e.snap != 0){
71 			vtSetError(ESnapRO);
72 			return nil;
73 		}
74 	}else if(e.snap != 0){
75 		if(e.snap < fs->elo){
76 			vtSetError(ESnapOld);
77 			return nil;
78 		}
79 		if(e.snap >= fs->ehi)
80 			goto Bad;
81 		epoch = e.snap;
82 	}
83 
84 	r = vtMemAllocZ(sizeof(Source));
85 	r->fs = fs;
86 	r->mode = mode;
87 	r->dsize = e.dsize;
88 	r->gen = e.gen;
89 	r->dir = (e.flags & VtEntryDir) != 0;
90 	r->lk = vtLockAlloc();
91 	r->ref = 1;
92 	r->parent = p;
93 	if(p){
94 		vtLock(p->lk);
95 		assert(mode == OReadOnly || p->mode == OReadWrite);
96 		p->ref++;
97 		vtUnlock(p->lk);
98 	}
99 	r->epoch = epoch;
100 //fprint(2, "sourceAlloc have %V be.%d fse.%d %s\n", b->score, b->l.epoch, r->fs->ehi, mode==OReadWrite ? "rw" : "ro");
101 	memmove(r->score, b->score, VtScoreSize);
102 	r->scoreEpoch = b->l.epoch;
103 	r->offset = offset;
104 	r->epb = epb;
105 	r->tag = b->l.tag;
106 
107 //fprint(2, "sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
108 
109 	return r;
110 Bad:
111 	vtSetError(EBadEntry);
112 	return nil;
113 
114 }
115 
116 Source *
117 sourceRoot(Fs *fs, u32int addr, int mode)
118 {
119 	Source *r;
120 	Block *b;
121 
122 	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
123 	if(b == nil)
124 		return nil;
125 
126 	if(mode == OReadWrite)
127 	if((b->l.state&BsClosed) || b->l.epoch != fs->ehi){
128 		fprint(2, "sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l);
129 		blockPut(b);
130 		vtSetError(EBadRoot);
131 		return nil;
132 	}
133 
134 	r = sourceAlloc(fs, b, nil, 0, mode);
135 	blockPut(b);
136 	return r;
137 }
138 
139 Source *
140 sourceOpen(Source *r, ulong offset, int mode)
141 {
142 	ulong bn;
143 	Block *b;
144 
145 	assert(sourceIsLocked(r));
146 	if(r->mode == OReadWrite)
147 		assert(r->epoch == r->b->l.epoch);
148 	if(!r->dir){
149 		vtSetError(ENotDir);
150 		return nil;
151 	}
152 
153 	bn = offset/(r->dsize/VtEntrySize);
154 
155 	b = sourceBlock(r, bn, mode);
156 	if(b == nil)
157 		return nil;
158 	r = sourceAlloc(r->fs, b, r, offset, mode);
159 	blockPut(b);
160 	return r;
161 }
162 
163 Source *
164 sourceCreate(Source *r, int dsize, int dir, u32int offset)
165 {
166 	int i;
167 	Block *b;
168 	u32int bn, size;
169 	Entry e;
170 	int epb;
171 	int psize;
172 	Source *rr;
173 
174 	assert(sourceIsLocked(r));
175 
176 	if(!r->dir){
177 		vtSetError(ENotDir);
178 		return nil;
179 	}
180 
181 	epb = r->dsize/VtEntrySize;
182 	psize = (dsize/VtScoreSize)*VtScoreSize;
183 
184 	size = sourceGetDirSize(r);
185 	if(offset == 0){
186 		/*
187 		 * look at a random block to see if we can find an empty entry
188 		 */
189 		offset = lnrand(size+1);
190 		offset -= offset % epb;
191 	}
192 
193 	/* try the given block and then try the last block */
194 	for(;;){
195 		bn = offset/epb;
196 		b = sourceBlock(r, bn, OReadWrite);
197 		if(b == nil)
198 			return nil;
199 		for(i=offset%r->epb; i<epb; i++){
200 			entryUnpack(&e, b->data, i);
201 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
202 				goto Found;
203 		}
204 		blockPut(b);
205 		if(offset == size){
206 			fprint(2, "sourceCreate: cannot happen\n");
207 			vtSetError("sourceCreate: cannot happen");
208 			return nil;
209 		}
210 		offset = size;
211 	}
212 
213 Found:
214 	/* found an entry - gen already set */
215 	e.psize = psize;
216 	e.dsize = dsize;
217 	e.flags = VtEntryActive;
218 	if(dir)
219 		e.flags |= VtEntryDir;
220 	e.depth = 0;
221 	e.size = 0;
222 	memmove(e.score, vtZeroScore, VtScoreSize);
223 	e.tag = 0;
224 	e.snap = 0;
225 	e.archive = 0;
226 	entryPack(&e, b->data, i);
227 	blockDirty(b);
228 
229 	offset = bn*epb + i;
230 	if(offset+1 > size){
231 		if(!sourceSetDirSize(r, offset+1)){
232 			blockPut(b);
233 			return nil;
234 		}
235 	}
236 
237 	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite);
238 	blockPut(b);
239 	return rr;
240 }
241 
242 static int
243 sourceKill(Source *r, int doremove)
244 {
245 	Entry e;
246 	Block *b;
247 	u32int addr;
248 	u32int tag;
249 	int type;
250 
251 	assert(sourceIsLocked(r));
252 	b = sourceLoad(r, &e);
253 	if(b == nil)
254 		return 0;
255 
256 	assert(b->l.epoch == r->fs->ehi);
257 
258 	if(doremove==0 && e.size == 0){
259 		/* already truncated */
260 		blockPut(b);
261 		return 1;
262 	}
263 
264 	/* remember info on link we are removing */
265 	addr = globalToLocal(e.score);
266 	type = entryType(&e);
267 	tag = e.tag;
268 
269 	if(doremove){
270 		if(e.gen != ~0)
271 			e.gen++;
272 		e.dsize = 0;
273 		e.psize = 0;
274 		e.flags = 0;
275 	}else{
276 		e.flags &= ~VtEntryLocal;
277 	}
278 	e.depth = 0;
279 	e.size = 0;
280 	e.tag = 0;
281 	memmove(e.score, vtZeroScore, VtScoreSize);
282 	entryPack(&e, b->data, r->offset % r->epb);
283 	blockDirty(b);
284 	if(addr != NilBlock)
285 		blockRemoveLink(b, addr, type, tag);
286 	blockPut(b);
287 
288 	if(doremove){
289 		sourceUnlock(r);
290 		sourceClose(r);
291 	}
292 
293 	return 1;
294 }
295 
296 int
297 sourceRemove(Source *r)
298 {
299 	return sourceKill(r, 1);
300 }
301 
302 int
303 sourceTruncate(Source *r)
304 {
305 	return sourceKill(r, 0);
306 }
307 
308 uvlong
309 sourceGetSize(Source *r)
310 {
311 	Entry e;
312 	Block *b;
313 
314 	assert(sourceIsLocked(r));
315 	b = sourceLoad(r, &e);
316 	if(b == nil)
317 		return 0;
318 	blockPut(b);
319 
320 	return e.size;
321 }
322 
323 static int
324 sourceShrinkSize(Source *r, Entry *e, uvlong size)
325 {
326 	int i, type, ppb;
327 	uvlong ptrsz;
328 	u32int addr;
329 	uchar score[VtScoreSize];
330 	Block *b;
331 
332 	type = entryType(e);
333 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
334 	if(b == nil)
335 		return 0;
336 
337 	ptrsz = e->dsize;
338 	ppb = e->psize/VtScoreSize;
339 	for(i=0; i+1<e->depth; i++)
340 		ptrsz *= ppb;
341 
342 	while(type&BtLevelMask){
343 		if(b->addr == NilBlock
344 		|| (b->l.state&BsClosed)
345 		|| b->l.epoch != r->fs->ehi){
346 			/* not worth copying the block just so we can zero some of it */
347 			blockPut(b);
348 			return 0;
349 		}
350 
351 		/*
352 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
353 		 */
354 
355 		/* zero the pointers to unnecessary blocks */
356 		i = (size+ptrsz-1)/ptrsz;
357 		for(; i<ppb; i++){
358 			addr = globalToLocal(b->data+i*VtScoreSize);
359 			memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
360 			blockDirty(b);
361 			if(addr != NilBlock)
362 				blockRemoveLink(b, addr, type-1, e->tag);
363 		}
364 
365 		/* recurse (go around again) on the partially necessary block */
366 		i = size/ptrsz;
367 		size = size%ptrsz;
368 		if(size == 0){
369 			blockPut(b);
370 			return 1;
371 		}
372 		ptrsz /= ppb;
373 		type--;
374 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
375 		blockPut(b);
376 		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
377 		if(b == nil)
378 			return 0;
379 	}
380 
381 	if(b->addr == NilBlock
382 	|| (b->l.state&BsClosed)
383 	|| b->l.epoch != r->fs->ehi){
384 		blockPut(b);
385 		return 0;
386 	}
387 
388 	/*
389 	 * No one ever truncates BtDir blocks.
390 	 */
391 	if(type == BtData && e->dsize > size){
392 		memset(b->data+size, 0, e->dsize-size);
393 		blockDirty(b);
394 	}
395 	blockPut(b);
396 	return 1;
397 }
398 
399 int
400 sourceSetSize(Source *r, uvlong size)
401 {
402 	int depth;
403 	Entry e;
404 	Block *b;
405 
406 	assert(sourceIsLocked(r));
407 	if(size == 0)
408 		return sourceTruncate(r);
409 
410 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
411 		vtSetError(ETooBig);
412 		return 0;
413 	}
414 
415 	b = sourceLoad(r, &e);
416 	if(b == nil)
417 		return 0;
418 
419 	/* quick out */
420 	if(e.size == size){
421 		blockPut(b);
422 		return 1;
423 	}
424 
425 	depth = sizeToDepth(size, e.psize, e.dsize);
426 
427 	if(depth < e.depth){
428 		if(!sourceShrinkDepth(r, b, &e, depth)){
429 			blockPut(b);
430 			return 0;
431 		}
432 	}else if(depth > e.depth){
433 		if(!sourceGrowDepth(r, b, &e, depth)){
434 			blockPut(b);
435 			return 0;
436 		}
437 	}
438 
439 	if(size < e.size)
440 		sourceShrinkSize(r, &e, size);
441 
442 	e.size = size;
443 	entryPack(&e, b->data, r->offset % r->epb);
444 	blockDirty(b);
445 	blockPut(b);
446 
447 	return 1;
448 }
449 
450 int
451 sourceSetDirSize(Source *r, ulong ds)
452 {
453 	uvlong size;
454 	int epb;
455 
456 	assert(sourceIsLocked(r));
457 	epb = r->dsize/VtEntrySize;
458 
459 	size = (uvlong)r->dsize*(ds/epb);
460 	size += VtEntrySize*(ds%epb);
461 	return sourceSetSize(r, size);
462 }
463 
464 ulong
465 sourceGetDirSize(Source *r)
466 {
467 	ulong ds;
468 	uvlong size;
469 	int epb;
470 
471 	assert(sourceIsLocked(r));
472 	epb = r->dsize/VtEntrySize;
473 
474 	size = sourceGetSize(r);
475 	ds = epb*(size/r->dsize);
476 	ds += (size%r->dsize)/VtEntrySize;
477 	return ds;
478 }
479 
480 int
481 sourceGetEntry(Source *r, Entry *e)
482 {
483 	Block *b;
484 
485 	assert(sourceIsLocked(r));
486 	b = sourceLoad(r, e);
487 	if(b == nil)
488 		return 0;
489 	blockPut(b);
490 
491 	return 1;
492 }
493 
494 static Block *
495 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
496 {
497 	Block *b;
498 	Cache *c;
499 	u32int addr;
500 	int type;
501 	uchar oscore[VtScoreSize];
502 	Entry oe;
503 
504 	c = fs->cache;
505 
506 	if((p->l.type & BtLevelMask) == 0){
507 		assert(p->l.type == BtDir);
508 		type = entryType(e);
509 		b = cacheGlobal(c, e->score, type, e->tag, mode);
510 	}else{
511 		type = p->l.type - 1;
512 		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
513 	}
514 
515 	if(b == nil || mode == OReadOnly)
516 		return b;
517 
518 	assert(!(p->l.state&BsClosed) && p->l.epoch == fs->ehi);
519 	if(!(b->l.state&BsClosed) && b->l.epoch == fs->ehi)
520 		return b;
521 
522 	oe = *e;
523 
524 	/*
525 	 * Copy on write.
526 	 */
527 	if(e->tag == 0){
528 		assert(p->l.type == BtDir);
529 		e->tag = tagGen();
530 		e->flags |= VtEntryLocal;
531 	}
532 
533 	addr = b->addr;
534 	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
535 	if(b == nil)
536 		return nil;
537 
538 	assert(b->l.epoch == fs->ehi);
539 
540 	blockDirty(b);
541 	if(p->l.type == BtDir){
542 		memmove(e->score, b->score, VtScoreSize);
543 		entryPack(e, p->data, index);
544 		blockDependency(p, b, index, nil, &oe);
545 	}else{
546 		memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
547 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
548 		blockDependency(p, b, index, oscore, nil);
549 	}
550 	blockDirty(p);
551 
552 	if(addr != NilBlock)
553 		blockRemoveLink(p, addr, type, e->tag);
554 
555 	return b;
556 }
557 
558 /*
559  * Change the depth of the source r.
560  * The entry e for r is contained in block p.
561  */
562 static int
563 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
564 {
565 	Block *b, *bb;
566 	u32int tag;
567 	int type;
568 	Entry oe;
569 
570 	assert(sourceIsLocked(r));
571 	assert(depth <= VtPointerDepth);
572 
573 	type = entryType(e);
574 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
575 	if(b == nil)
576 		return 0;
577 
578 	tag = e->tag;
579 	if(tag == 0)
580 		tag = tagGen();
581 
582 	oe = *e;
583 
584 	/*
585 	 * Keep adding layers until we get to the right depth
586 	 * or an error occurs.
587 	 */
588 	while(e->depth < depth){
589 		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
590 		if(bb == nil)
591 			break;
592 //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
593 		memmove(bb->data, b->score, VtScoreSize);
594 		memmove(e->score, bb->score, VtScoreSize);
595 		e->depth++;
596 		type++;
597 		e->tag = tag;
598 		e->flags |= VtEntryLocal;
599 		blockDependency(bb, b, 0, vtZeroScore, nil);
600 		blockPut(b);
601 		blockDirty(bb);
602 		b = bb;
603 	}
604 
605 	entryPack(e, p->data, r->offset % r->epb);
606 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
607 	blockPut(b);
608 	blockDirty(p);
609 
610 	return e->depth == depth;
611 }
612 
613 static int
614 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
615 {
616 	Block *b, *nb, *ob, *rb;
617 	u32int tag;
618 	int type, d;
619 	Entry oe;
620 
621 	assert(sourceIsLocked(r));
622 	assert(depth <= VtPointerDepth);
623 
624 	type = entryType(e);
625 	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
626 	if(rb == nil)
627 		return 0;
628 
629 	tag = e->tag;
630 	if(tag == 0)
631 		tag = tagGen();
632 
633 	/*
634 	 * Walk down to the new root block.
635 	 * We may stop early, but something is better than nothing.
636 	 */
637 	oe = *e;
638 
639 	ob = nil;
640 	b = rb;
641 /* BUG: explain type++.  i think it is a real bug */
642 	for(d=e->depth; d > depth; d--, type++){
643 		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
644 		if(nb == nil)
645 			break;
646 		if(ob!=nil && ob!=rb)
647 			blockPut(ob);
648 		ob = b;
649 		b = nb;
650 	}
651 
652 	if(b == rb){
653 		blockPut(rb);
654 		return 0;
655 	}
656 
657 	/*
658 	 * Right now, e points at the root block rb, b is the new root block,
659 	 * and ob points at b.  To update:
660 	 *
661 	 *	(i) change e to point at b
662 	 *	(ii) zero the pointer ob -> b
663 	 *	(iii) free the root block
664 	 *
665 	 * p (the block containing e) must be written before
666 	 * anything else.
667  	 */
668 
669 	/* (i) */
670 	e->depth = d;
671 	memmove(e->score, b->score, VtScoreSize);
672 	entryPack(e, p->data, r->offset % r->epb);
673 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
674 	blockDirty(p);
675 
676 	/* (ii) */
677 	memmove(ob->data, vtZeroScore, VtScoreSize);
678 	blockDependency(ob, p, 0, b->score, nil);
679 	blockDirty(ob);
680 
681 	/* (iii) */
682 	if(rb->addr != NilBlock)
683 		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag);
684 
685 	blockPut(rb);
686 	if(ob!=nil && ob!=rb)
687 		blockPut(ob);
688 	blockPut(b);
689 
690 	return d == depth;
691 }
692 
693 Block *
694 sourceBlock(Source *r, ulong bn, int mode)
695 {
696 	Block *b, *bb;
697 	int index[VtPointerDepth+1];
698 	Entry e;
699 	int i, np;
700 	int m;
701 
702 	assert(sourceIsLocked(r));
703 	assert(bn != NilBlock);
704 
705 	/* mode for intermediate block */
706 	m = mode;
707 	if(m == OOverWrite)
708 		m = OReadWrite;
709 
710 	b = sourceLoad(r, &e);
711 	if(b == nil)
712 		return nil;
713 
714 	np = e.psize/VtScoreSize;
715 	memset(index, 0, sizeof(index));
716 	for(i=0; bn > 0; i++){
717 		if(i >= VtPointerDepth){
718 			vtSetError(EBadAddr);
719 			goto Err;
720 		}
721 		index[i] = bn % np;
722 		bn /= np;
723 	}
724 
725 	if(i > e.depth){
726 		if(mode == OReadOnly){
727 			vtSetError(EBadAddr);
728 			goto Err;
729 		}
730 		if(!sourceGrowDepth(r, b, &e, i))
731 			goto Err;
732 	}
733 
734 	index[e.depth] = r->offset % r->epb;
735 
736 	for(i=e.depth; i>=0; i--){
737 		bb = blockWalk(b, index[i], m, r->fs, &e);
738 		if(bb == nil)
739 			goto Err;
740 		blockPut(b);
741 		b = bb;
742 	}
743 	return b;
744 Err:
745 	blockPut(b);
746 	return nil;
747 }
748 
749 void
750 sourceClose(Source *r)
751 {
752 	if(r == nil)
753 		return;
754 	vtLock(r->lk);
755 	r->ref--;
756 	if(r->ref){
757 		vtUnlock(r->lk);
758 		return;
759 	}
760 	assert(r->ref == 0);
761 	vtUnlock(r->lk);
762 	if(r->parent)
763 		sourceClose(r->parent);
764 	vtLockFree(r->lk);
765 	memset(r, ~0, sizeof(*r));
766 	vtMemFree(r);
767 }
768 
769 /*
770  * Retrieve the block containing the entry for r.
771  * If a snapshot has happened, we might need
772  * to get a new copy of the block.  We avoid this
773  * in the common case by caching the score for
774  * the block and the last epoch in which it was valid.
775  *
776  * We use r->mode to tell the difference between active
777  * file system sources (OReadWrite) and sources for the
778  * snapshot file system (OReadOnly).
779  */
780 static Block*
781 sourceLoadBlock(Source *r, int mode)
782 {
783 	u32int addr;
784 	Block *b;
785 
786 	switch(r->mode){
787 	default:
788 		assert(0);
789 	case OReadWrite:
790 		assert(r->mode == OReadWrite);
791 		/*
792 		 * This needn't be true -- we might bump the low epoch
793 		 * to reclaim some old blocks, but since this score is
794 		 * OReadWrite, the blocks must all still be open, so none
795 		 * are reclaimed.  Thus it's okay that the epoch is so low.
796 		 * Proceed.
797 		assert(r->epoch >= r->fs->elo);
798 		 */
799 		if(r->epoch == r->fs->ehi){
800 			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
801 			assert(r->epoch == b->l.epoch);
802 			if(b == nil)
803 				return nil;
804 			return b;
805 		}
806 		assert(r->parent != nil);
807 		if(!sourceLock(r->parent, OReadWrite))
808 			return nil;
809 		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
810 		sourceUnlock(r->parent);
811 		if(b == nil)
812 			return nil;
813 		assert(b->l.epoch == r->fs->ehi);
814 		memmove(r->score, b->score, VtScoreSize);
815 		r->scoreEpoch = b->l.epoch;
816 		r->tag = b->l.tag;
817 		r->epoch = r->fs->ehi;
818 		return b;
819 
820 	case OReadOnly:
821 		addr = globalToLocal(r->score);
822 		if(addr == NilBlock)
823 			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
824 
825 		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
826 		if(b)
827 			return b;
828 
829 		/*
830 		 * If it failed because the epochs don't match, the block has been
831 		 * archived and reclaimed.  Rewalk from the parent and get the
832 		 * new pointer.  This can't happen in the OReadWrite case
833 		 * above because blocks in the current epoch don't get
834 		 * reclaimed.  The fact that we're OReadOnly means we're
835 		 * a snapshot.  (Or else the file system is read-only, but then
836 		 * the archiver isn't going around deleting blocks.)
837 		 */
838 		if(strcmp(vtGetError(), ELabelMismatch) == 0){
839 			if(!sourceLock(r->parent, OReadOnly))
840 				return nil;
841 			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
842 			sourceUnlock(r->parent);
843 			if(b){
844 				fprint(2, "sourceAlloc: lost %V found %V\n",
845 					r->score, b->score);
846 				memmove(r->score, b->score, VtScoreSize);
847 				r->scoreEpoch = b->l.epoch;
848 				return b;
849 			}
850 		}
851 		return nil;
852 	}
853 }
854 
855 int
856 sourceLock(Source *r, int mode)
857 {
858 	Block *b;
859 
860 	if(mode == -1)
861 		mode = r->mode;
862 
863 	b = sourceLoadBlock(r, mode);
864 	if(b == nil)
865 		return 0;
866 	/*
867 	 * The fact that we are holding b serves as the
868 	 * lock entitling us to write to r->b.
869 	 */
870 	assert(r->b == nil);
871 	r->b = b;
872 	if(r->mode == OReadWrite)
873 		assert(r->epoch == r->b->l.epoch);
874 	return 1;
875 }
876 
877 /*
878  * Lock two (usually sibling) sources.  This needs special care
879  * because the Entries for both sources might be in the same block.
880  * We also try to lock blocks in left-to-right order within the tree.
881  */
882 int
883 sourceLock2(Source *r, Source *rr, int mode)
884 {
885 	Block *b, *bb;
886 
887 	if(rr == nil)
888 		return sourceLock(r, mode);
889 
890 	if(mode == -1)
891 		mode = r->mode;
892 
893 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
894 		b = sourceLoadBlock(r, mode);
895 		if(b == nil)
896 			return 0;
897 		blockDupLock(b);
898 		bb = b;
899 	}else if(r->parent==rr->parent || r->offset > rr->offset){
900 		bb = sourceLoadBlock(rr, mode);
901 		b = sourceLoadBlock(r, mode);
902 	}else{
903 		b = sourceLoadBlock(r, mode);
904 		bb = sourceLoadBlock(rr, mode);
905 	}
906 	if(b == nil || bb == nil){
907 		if(b)
908 			blockPut(b);
909 		if(bb)
910 			blockPut(bb);
911 		return 0;
912 	}
913 
914 	/*
915 	 * The fact that we are holding b and bb serves
916 	 * as the lock entitling us to write to r->b and rr->b.
917 	 */
918 	r->b = b;
919 	rr->b = bb;
920 	return 1;
921 }
922 
923 void
924 sourceUnlock(Source *r)
925 {
926 	Block *b;
927 
928 	if(r->b == nil){
929 		fprint(2, "sourceUnlock: already unlocked\n");
930 		abort();
931 	}
932 	b = r->b;
933 	r->b = nil;
934 	blockPut(b);
935 }
936 
937 static Block*
938 sourceLoad(Source *r, Entry *e)
939 {
940 	Block *b;
941 
942 	assert(sourceIsLocked(r));
943 	b = r->b;
944 	if(!entryUnpack(e, b->data, r->offset % r->epb))
945 		return nil;
946 	if(e->gen != r->gen){
947 		vtSetError(ERemoved);
948 		return nil;
949 	}
950 	blockDupLock(b);
951 	return b;
952 }
953 
954 static int
955 sizeToDepth(uvlong s, int psize, int dsize)
956 {
957 	int np;
958 	int d;
959 
960 	/* determine pointer depth */
961 	np = psize/VtScoreSize;
962 	s = (s + dsize - 1)/dsize;
963 	for(d = 0; s > 1; d++)
964 		s = (s + np - 1)/np;
965 	return d;
966 }
967 
968 static u32int
969 tagGen(void)
970 {
971 	u32int tag;
972 
973 	for(;;){
974 		tag = lrand();
975 		if(tag >= UserTag)
976 			break;
977 	}
978 	return tag;
979 }
980