xref: /plan9-contrib/sys/src/cmd/fossil/source.c (revision 5e96a66c77eb9140492ca53f857cbbf108e128ed)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static int	sizeToDepth(uvlong s, int psize, int dsize);
7 static u32int 	tagGen(void);
8 static Block 	*sourceLoad(Source *r, Entry *e);
9 static Block	*sourceLoadUnlocked(Source *r, Entry *e);
10 static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
11 static int	sourceShrinkSize(Source*, Entry*, uvlong);
12 static int	sourceGrowDepth(Source*, Block*, Entry*, int);
13 
14 #define sourceIsLocked(r)	((r)->b != nil)
15 
16 static Source *
17 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode)
18 {
19 	Source *r;
20 	int epb;
21 	u32int epoch;
22 	Entry e;
23 
24 	assert(p==nil || sourceIsLocked(p));
25 
26 	if(p == nil){
27 		assert(offset == 0);
28 		epb = 1;
29 	}else
30 		epb = p->dsize / VtEntrySize;
31 
32 	if(b->l.type != BtDir)
33 		goto Bad;
34 
35 	/*
36 	 * a non-active entry is the only thing that
37 	 * can legitimately happen here. all the others
38 	 * get prints.
39 	 */
40 	if(!entryUnpack(&e, b->data, offset % epb)){
41 		fprint(2, "entryUnpack failed\n");
42 		goto Bad;
43 	}
44 	if(!(e.flags & VtEntryActive)){
45 		if(0)fprint(2, "not active\n");
46 		goto Bad;
47 	}
48 	if(e.psize < 256 || e.dsize < 256){
49 		fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
50 		goto Bad;
51 	}
52 
53 	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
54 		fprint(2, "depth %ud size %llud psize %ud dsize %ud\n", e.depth, e.size, e.psize, e.dsize);
55 		goto Bad;
56 	}
57 
58 	if((e.flags & VtEntryLocal) && e.tag == 0){
59 		fprint(2, "flags %#ux tag %#ux\n", e.flags, e.tag);
60 		goto Bad;
61 	}
62 
63 	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
64 		fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, fs->blockSize);
65 		goto Bad;
66 	}
67 
68 	epoch = b->l.epoch;
69 	if(mode == OReadWrite){
70 		if(e.snap != 0){
71 			vtSetError(ESnapRO);
72 			return nil;
73 		}
74 	}else if(e.snap != 0){
75 		if(e.snap < fs->elo){
76 			vtSetError(ESnapOld);
77 			return nil;
78 		}
79 		if(e.snap >= fs->ehi)
80 			goto Bad;
81 		epoch = e.snap;
82 	}
83 
84 	r = vtMemAllocZ(sizeof(Source));
85 	r->fs = fs;
86 	r->mode = mode;
87 	r->dsize = e.dsize;
88 	r->gen = e.gen;
89 	r->dir = (e.flags & VtEntryDir) != 0;
90 	r->lk = vtLockAlloc();
91 	r->ref = 1;
92 	r->parent = p;
93 	if(p){
94 		vtLock(p->lk);
95 		assert(mode == OReadOnly || p->mode == OReadWrite);
96 		p->ref++;
97 		vtUnlock(p->lk);
98 	}
99 	r->epoch = epoch;
100 //fprint(2, "sourceAlloc have %V be.%d fse.%d %s\n", b->score, b->l.epoch, r->fs->ehi, mode==OReadWrite ? "rw" : "ro");
101 	memmove(r->score, b->score, VtScoreSize);
102 	r->scoreEpoch = b->l.epoch;
103 	r->offset = offset;
104 	r->epb = epb;
105 	r->tag = b->l.tag;
106 
107 //fprint(2, "sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
108 
109 	return r;
110 Bad:
111 	vtSetError(EBadEntry);
112 	return nil;
113 
114 }
115 
116 Source *
117 sourceRoot(Fs *fs, u32int addr, int mode)
118 {
119 	Source *r;
120 	Block *b;
121 
122 	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
123 	if(b == nil)
124 		return nil;
125 
126 	if(mode == OReadWrite)
127 	if((b->l.state&BsClosed) || b->l.epoch != fs->ehi){
128 		fprint(2, "sourceRoot: fs->ehi = %ud, b->l = %L\n", fs->ehi, &b->l);
129 		blockPut(b);
130 		vtSetError(EBadRoot);
131 		return nil;
132 	}
133 
134 	r = sourceAlloc(fs, b, nil, 0, mode);
135 	blockPut(b);
136 	return r;
137 }
138 
139 Source *
140 sourceOpen(Source *r, ulong offset, int mode)
141 {
142 	ulong bn;
143 	Block *b;
144 
145 	assert(sourceIsLocked(r));
146 	if(r->mode == OReadWrite)
147 		assert(r->epoch == r->b->l.epoch);
148 	if(!r->dir){
149 		vtSetError(ENotDir);
150 		return nil;
151 	}
152 
153 	bn = offset/(r->dsize/VtEntrySize);
154 
155 	b = sourceBlock(r, bn, mode);
156 	if(b == nil)
157 		return nil;
158 	r = sourceAlloc(r->fs, b, r, offset, mode);
159 	blockPut(b);
160 	return r;
161 }
162 
163 Source *
164 sourceCreate(Source *r, int dsize, int dir, u32int offset)
165 {
166 	int i;
167 	Block *b;
168 	u32int bn, size;
169 	Entry e;
170 	int epb;
171 	int psize;
172 	Source *rr;
173 
174 	assert(sourceIsLocked(r));
175 
176 	if(!r->dir){
177 		vtSetError(ENotDir);
178 		return nil;
179 	}
180 
181 	epb = r->dsize/VtEntrySize;
182 	psize = (dsize/VtScoreSize)*VtScoreSize;
183 
184 	size = sourceGetDirSize(r);
185 	if(offset == 0){
186 		/*
187 		 * look at a random block to see if we can find an empty entry
188 		 */
189 		offset = lnrand(size+1);
190 		offset -= offset % epb;
191 	}
192 
193 	/* try the given block and then try the last block */
194 	for(;;){
195 		bn = offset/epb;
196 		b = sourceBlock(r, bn, OReadWrite);
197 		if(b == nil)
198 			return nil;
199 		for(i=offset%r->epb; i<epb; i++){
200 			entryUnpack(&e, b->data, i);
201 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
202 				goto Found;
203 		}
204 		blockPut(b);
205 		if(offset == size){
206 			fprint(2, "sourceCreate: cannot happen\n");
207 			vtSetError("sourceCreate: cannot happen");
208 			return nil;
209 		}
210 		offset = size;
211 	}
212 
213 Found:
214 	/* found an entry - gen already set */
215 	e.psize = psize;
216 	e.dsize = dsize;
217 	e.flags = VtEntryActive;
218 	if(dir)
219 		e.flags |= VtEntryDir;
220 	e.depth = 0;
221 	e.size = 0;
222 	memmove(e.score, vtZeroScore, VtScoreSize);
223 	e.tag = 0;
224 	e.snap = 0;
225 	e.archive = 0;
226 	entryPack(&e, b->data, i);
227 	blockDirty(b);
228 
229 	offset = bn*epb + i;
230 	if(offset+1 > size){
231 		if(!sourceSetDirSize(r, offset+1)){
232 			blockPut(b);
233 			return nil;
234 		}
235 	}
236 
237 	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite);
238 	blockPut(b);
239 	return rr;
240 }
241 
242 static int
243 sourceKill(Source *r, int doremove)
244 {
245 	Entry e;
246 	Block *b;
247 	u32int addr;
248 	u32int tag;
249 	int type;
250 
251 	assert(sourceIsLocked(r));
252 	b = sourceLoad(r, &e);
253 	if(b == nil)
254 		return 0;
255 
256 	assert(b->l.epoch == r->fs->ehi);
257 
258 	if(doremove==0 && e.size == 0){
259 		/* already truncated */
260 		blockPut(b);
261 		return 1;
262 	}
263 
264 	/* remember info on link we are removing */
265 	addr = globalToLocal(e.score);
266 	type = entryType(&e);
267 	tag = e.tag;
268 
269 	if(doremove){
270 		if(e.gen != ~0)
271 			e.gen++;
272 		e.dsize = 0;
273 		e.psize = 0;
274 		e.flags = 0;
275 	}else{
276 		e.flags &= ~VtEntryLocal;
277 	}
278 	e.depth = 0;
279 	e.size = 0;
280 	e.tag = 0;
281 	memmove(e.score, vtZeroScore, VtScoreSize);
282 	entryPack(&e, b->data, r->offset % r->epb);
283 	blockDirty(b);
284 	if(addr != NilBlock)
285 		blockRemoveLink(b, addr, type, tag);
286 	blockPut(b);
287 
288 	if(doremove){
289 		sourceUnlock(r);
290 		sourceClose(r);
291 	}
292 
293 	return 1;
294 }
295 
296 int
297 sourceRemove(Source *r)
298 {
299 	return sourceKill(r, 1);
300 }
301 
302 int
303 sourceTruncate(Source *r)
304 {
305 	return sourceKill(r, 0);
306 }
307 
308 uvlong
309 sourceGetSize(Source *r)
310 {
311 	Entry e;
312 	Block *b;
313 
314 	assert(sourceIsLocked(r));
315 	b = sourceLoad(r, &e);
316 	if(b == nil)
317 		return 0;
318 	blockPut(b);
319 
320 	return e.size;
321 }
322 
323 static int
324 sourceShrinkSize(Source *r, Entry *e, uvlong size)
325 {
326 	int i, type, ppb;
327 	uvlong ptrsz;
328 	u32int addr;
329 	uchar score[VtScoreSize];
330 	Block *b;
331 
332 	type = entryType(e);
333 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
334 	if(b == nil)
335 		return 0;
336 
337 	ptrsz = e->dsize;
338 	ppb = e->psize/VtScoreSize;
339 	for(i=0; i+1<e->depth; i++)
340 		ptrsz *= ppb;
341 
342 	while(type&BtLevelMask){
343 		if(b->addr == NilBlock
344 		|| (b->l.state&BsClosed)
345 		|| b->l.epoch != r->fs->ehi){
346 			/* not worth copying the block just so we can zero some of it */
347 			blockPut(b);
348 			return 0;
349 		}
350 
351 		/*
352 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
353 		 */
354 
355 		/* zero the pointers to unnecessary blocks */
356 		i = (size+ptrsz-1)/ptrsz;
357 		for(; i<ppb; i++){
358 			addr = globalToLocal(b->data+i*VtScoreSize);
359 			memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
360 			blockDirty(b);
361 			if(addr != NilBlock)
362 				blockRemoveLink(b, addr, type-1, e->tag);
363 		}
364 
365 		/* recurse (go around again) on the partially necessary block */
366 		i = size/ptrsz;
367 		size = size%ptrsz;
368 		if(size == 0){
369 			blockPut(b);
370 			return 1;
371 		}
372 		ptrsz /= ppb;
373 		type--;
374 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
375 		blockPut(b);
376 		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
377 		if(b == nil)
378 			return 0;
379 	}
380 
381 	if(b->addr == NilBlock
382 	|| (b->l.state&BsClosed)
383 	|| b->l.epoch != r->fs->ehi){
384 		blockPut(b);
385 		return 0;
386 	}
387 
388 	/*
389 	 * No one ever truncates BtDir blocks.
390 	 */
391 	if(type == BtData && e->dsize > size){
392 		memset(b->data+size, 0, e->dsize-size);
393 		blockDirty(b);
394 	}
395 	blockPut(b);
396 	return 1;
397 }
398 
399 int
400 sourceSetSize(Source *r, uvlong size)
401 {
402 	int depth;
403 	Entry e;
404 	Block *b;
405 
406 	assert(sourceIsLocked(r));
407 	if(size == 0)
408 		return sourceTruncate(r);
409 
410 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
411 		vtSetError(ETooBig);
412 		return 0;
413 	}
414 
415 	b = sourceLoad(r, &e);
416 	if(b == nil)
417 		return 0;
418 
419 	/* quick out */
420 	if(e.size == size){
421 		blockPut(b);
422 		return 1;
423 	}
424 
425 	depth = sizeToDepth(size, e.psize, e.dsize);
426 
427 	if(depth < e.depth){
428 		if(!sourceShrinkDepth(r, b, &e, depth)){
429 			blockPut(b);
430 			return 0;
431 		}
432 	}else if(depth > e.depth){
433 		if(!sourceGrowDepth(r, b, &e, depth)){
434 			blockPut(b);
435 			return 0;
436 		}
437 	}
438 
439 	if(size < e.size)
440 		sourceShrinkSize(r, &e, size);
441 
442 	e.size = size;
443 	entryPack(&e, b->data, r->offset % r->epb);
444 	blockDirty(b);
445 	blockPut(b);
446 
447 	return 1;
448 }
449 
450 int
451 sourceSetDirSize(Source *r, ulong ds)
452 {
453 	uvlong size;
454 	int epb;
455 
456 	assert(sourceIsLocked(r));
457 	epb = r->dsize/VtEntrySize;
458 
459 	size = (uvlong)r->dsize*(ds/epb);
460 	size += VtEntrySize*(ds%epb);
461 	return sourceSetSize(r, size);
462 }
463 
464 ulong
465 sourceGetDirSize(Source *r)
466 {
467 	ulong ds;
468 	uvlong size;
469 	int epb;
470 
471 	assert(sourceIsLocked(r));
472 	epb = r->dsize/VtEntrySize;
473 
474 	size = sourceGetSize(r);
475 	ds = epb*(size/r->dsize);
476 	ds += (size%r->dsize)/VtEntrySize;
477 	return ds;
478 }
479 
480 int
481 sourceGetEntry(Source *r, Entry *e)
482 {
483 	Block *b;
484 
485 	assert(sourceIsLocked(r));
486 	b = sourceLoad(r, e);
487 	if(b == nil)
488 		return 0;
489 	blockPut(b);
490 
491 	return 1;
492 }
493 
494 static Block *
495 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
496 {
497 	Block *b;
498 	Cache *c;
499 	u32int addr;
500 	int type;
501 
502 	c = fs->cache;
503 
504 	if((p->l.type & BtLevelMask) == 0){
505 		assert(p->l.type == BtDir);
506 		type = entryType(e);
507 		b = cacheGlobal(c, e->score, type, e->tag, mode);
508 	}else{
509 		type = p->l.type - 1;
510 		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
511 	}
512 
513 	if(b == nil || mode == OReadOnly)
514 		return b;
515 
516 	assert(!(p->l.state&BsClosed) && p->l.epoch == fs->ehi);
517 	if(!(b->l.state&BsClosed) && b->l.epoch == fs->ehi)
518 		return b;
519 
520 	/*
521 	 * Copy on write.
522 	 */
523 	if(e->tag == 0){
524 		assert(p->l.type == BtDir);
525 		e->tag = tagGen();
526 		e->flags |= VtEntryLocal;
527 	}
528 
529 	addr = b->addr;
530 	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
531 	if(b == nil)
532 		return nil;
533 
534 	assert(b->l.epoch == fs->ehi);
535 
536 	if(p->l.type == BtDir){
537 		memmove(e->score, b->score, VtScoreSize);
538 		entryPack(e, p->data, index);
539 	}else{
540 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
541 	}
542 	blockDirty(b);
543 	blockDependency(p, b, index, b->score);
544 	blockDirty(p);
545 
546 	if(addr != NilBlock)
547 		blockRemoveLink(p, addr, type, e->tag);
548 
549 	return b;
550 }
551 
552 /*
553  * Change the depth of the source r.
554  * The entry e for r is contained in block p.
555  */
556 static int
557 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
558 {
559 	Block *b, *bb;
560 	u32int tag;
561 	int type;
562 
563 	assert(sourceIsLocked(r));
564 	assert(depth <= VtPointerDepth);
565 
566 	type = entryType(e);
567 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
568 	if(b == nil)
569 		return 0;
570 
571 	tag = e->tag;
572 	if(tag == 0)
573 		tag = tagGen();
574 
575 	/*
576 	 * Keep adding layers until we get to the right depth
577 	 * or an error occurs.
578 	 */
579 	while(e->depth < depth){
580 		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
581 		if(bb == nil)
582 			break;
583 		memmove(bb->data, b->score, VtScoreSize);
584 		memmove(e->score, bb->score, VtScoreSize);
585 		e->depth++;
586 		type++;
587 		e->tag = tag;
588 		e->flags |= VtEntryLocal;
589 		blockDependency(bb, b, -1, nil);
590 		blockPut(b);
591 		blockDirty(bb);
592 		b = bb;
593 	}
594 
595 	entryPack(e, p->data, r->offset % r->epb);
596 	blockDependency(p, b, -1, nil);
597 	blockPut(b);
598 	blockDirty(p);
599 
600 	return e->depth == depth;
601 }
602 
603 static int
604 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
605 {
606 	Block *b, *nb, *ob, *rb;
607 	u32int tag;
608 	int type, d;
609 
610 	assert(sourceIsLocked(r));
611 	assert(depth <= VtPointerDepth);
612 
613 	type = entryType(e);
614 	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
615 	if(rb == nil)
616 		return 0;
617 
618 	tag = e->tag;
619 	if(tag == 0)
620 		tag = tagGen();
621 
622 	/*
623 	 * Walk down to the new root block.
624 	 * We may stop early, but something is better than nothing.
625 	 */
626 	ob = nil;
627 	b = rb;
628 	for(d=e->depth; d > depth; d--, type++){
629 		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
630 		if(nb == nil)
631 			break;
632 		if(ob!=nil && ob!=rb)
633 			blockPut(ob);
634 		ob = b;
635 		b = nb;
636 	}
637 
638 	if(b == rb){
639 		blockPut(rb);
640 		return 0;
641 	}
642 
643 	/*
644 	 * Right now, e points at the root block rb, b is the new root block,
645 	 * and ob points at b.  To update:
646 	 *
647 	 *	(i) change e to point at b
648 	 *	(ii) zero the pointer ob -> b
649 	 *	(iii) free the root block
650 	 *
651 	 * p (the block containing e) must be written before
652 	 * anything else.
653  	 */
654 
655 	/* (i) */
656 	e->depth = d;
657 	memmove(e->score, b->score, VtScoreSize);
658 	entryPack(e, p->data, r->offset % r->epb);
659 	blockDirty(p);
660 
661 	/* (ii) */
662 	memmove(ob->data, vtZeroScore, VtScoreSize);
663 	blockDependency(ob, p, -1, nil);
664 	blockDirty(ob);
665 
666 	/* (iii) */
667 	if(rb->addr != NilBlock)
668 		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag);
669 
670 	blockPut(rb);
671 	if(ob!=nil && ob!=rb)
672 		blockPut(ob);
673 	if(b!=rb)
674 		blockPut(b);
675 
676 	return d == depth;
677 }
678 
679 Block *
680 sourceBlock(Source *r, ulong bn, int mode)
681 {
682 	Block *b, *bb;
683 	int index[VtPointerDepth+1];
684 	Entry e;
685 	int i, np;
686 	int m;
687 
688 	assert(sourceIsLocked(r));
689 	assert(bn != NilBlock);
690 
691 	/* mode for intermediate block */
692 	m = mode;
693 	if(m == OOverWrite)
694 		m = OReadWrite;
695 
696 	b = sourceLoad(r, &e);
697 	if(b == nil)
698 		return nil;
699 
700 	np = e.psize/VtScoreSize;
701 	memset(index, 0, sizeof(index));
702 	for(i=0; bn > 0; i++){
703 		if(i >= VtPointerDepth){
704 			vtSetError(EBadAddr);
705 			goto Err;
706 		}
707 		index[i] = bn % np;
708 		bn /= np;
709 	}
710 
711 	if(i > e.depth){
712 		if(mode == OReadOnly){
713 			vtSetError(EBadAddr);
714 			goto Err;
715 		}
716 		if(!sourceGrowDepth(r, b, &e, i))
717 			goto Err;
718 	}
719 
720 	index[e.depth] = r->offset % r->epb;
721 
722 	for(i=e.depth; i>=0; i--){
723 		bb = blockWalk(b, index[i], m, r->fs, &e);
724 		if(bb == nil)
725 			goto Err;
726 		blockPut(b);
727 		b = bb;
728 	}
729 	return b;
730 Err:
731 	blockPut(b);
732 	return nil;
733 }
734 
735 void
736 sourceClose(Source *r)
737 {
738 	if(r == nil)
739 		return;
740 	vtLock(r->lk);
741 	r->ref--;
742 	if(r->ref){
743 		vtUnlock(r->lk);
744 		return;
745 	}
746 	assert(r->ref == 0);
747 	vtUnlock(r->lk);
748 	if(r->parent)
749 		sourceClose(r->parent);
750 	vtLockFree(r->lk);
751 	memset(r, ~0, sizeof(*r));
752 	vtMemFree(r);
753 }
754 
755 /*
756  * Retrieve the block containing the entry for r.
757  * If a snapshot has happened, we might need
758  * to get a new copy of the block.  We avoid this
759  * in the common case by caching the score for
760  * the block and the last epoch in which it was valid.
761  *
762  * We use r->mode to tell the difference between active
763  * file system sources (OReadWrite) and sources for the
764  * snapshot file system (OReadOnly).
765  */
766 static Block*
767 sourceLoadBlock(Source *r, int mode)
768 {
769 	u32int addr;
770 	Block *b;
771 
772 	switch(r->mode){
773 	default:
774 		assert(0);
775 	case OReadWrite:
776 		assert(r->mode == OReadWrite);
777 		assert(r->epoch >= r->fs->elo);
778 		if(r->epoch == r->fs->ehi){
779 			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
780 			assert(r->epoch == b->l.epoch);
781 			if(b == nil)
782 				return nil;
783 			return b;
784 		}
785 		assert(r->parent != nil);
786 		if(!sourceLock(r->parent, OReadWrite))
787 			return nil;
788 		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
789 		sourceUnlock(r->parent);
790 		if(b == nil)
791 			return nil;
792 		assert(b->l.epoch == r->fs->ehi);
793 		memmove(r->score, b->score, VtScoreSize);
794 		r->scoreEpoch = b->l.epoch;
795 		r->tag = b->l.tag;
796 		r->epoch = r->fs->ehi;
797 		return b;
798 
799 	case OReadOnly:
800 		addr = globalToLocal(r->score);
801 		if(addr == NilBlock)
802 			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
803 
804 		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
805 		if(b)
806 			return b;
807 
808 		/*
809 		 * If it failed because the epochs don't match, the block has been
810 		 * archived and reclaimed.  Rewalk from the parent and get the
811 		 * new pointer.  This can't happen in the OReadWrite case
812 		 * above because blocks in the current epoch don't get
813 		 * reclaimed.  The fact that we're OReadOnly means we're
814 		 * a snapshot.  (Or else the file system is read-only, but then
815 		 * the archiver isn't going around deleting blocks.)
816 		 */
817 		if(strcmp(vtGetError(), ELabelMismatch) == 0){
818 			if(!sourceLock(r->parent, OReadOnly))
819 				return nil;
820 			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
821 			sourceUnlock(r->parent);
822 			if(b){
823 				fprint(2, "sourceAlloc: lost %V found %V\n",
824 					r->score, b->score);
825 				memmove(r->score, b->score, VtScoreSize);
826 				r->scoreEpoch = b->l.epoch;
827 				return b;
828 			}
829 		}
830 		return nil;
831 	}
832 }
833 
834 int
835 sourceLock(Source *r, int mode)
836 {
837 	Block *b;
838 
839 	if(mode == -1)
840 		mode = r->mode;
841 
842 	b = sourceLoadBlock(r, mode);
843 	if(b == nil)
844 		return 0;
845 	/*
846 	 * The fact that we are holding b serves as the
847 	 * lock entitling us to write to r->b.
848 	 */
849 	assert(r->b == nil);
850 	r->b = b;
851 	if(r->mode == OReadWrite)
852 		assert(r->epoch == r->b->l.epoch);
853 	return 1;
854 }
855 
856 /*
857  * Lock two (usually sibling) sources.  This needs special care
858  * because the Entries for both sources might be in the same block.
859  * We also try to lock blocks in left-to-right order within the tree.
860  */
861 int
862 sourceLock2(Source *r, Source *rr, int mode)
863 {
864 	Block *b, *bb;
865 
866 	if(rr == nil)
867 		return sourceLock(r, mode);
868 
869 	if(mode == -1)
870 		mode = r->mode;
871 
872 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
873 		b = sourceLoadBlock(r, mode);
874 		if(b == nil)
875 			return 0;
876 		blockDupLock(b);
877 		bb = b;
878 	}else if(r->parent==rr->parent || r->offset > rr->offset){
879 		bb = sourceLoadBlock(rr, mode);
880 		b = sourceLoadBlock(r, mode);
881 	}else{
882 		b = sourceLoadBlock(r, mode);
883 		bb = sourceLoadBlock(rr, mode);
884 	}
885 	if(b == nil || bb == nil){
886 		if(b)
887 			blockPut(b);
888 		if(bb)
889 			blockPut(bb);
890 		return 0;
891 	}
892 
893 	/*
894 	 * The fact that we are holding b and bb serves
895 	 * as the lock entitling us to write to r->b and rr->b.
896 	 */
897 	r->b = b;
898 	rr->b = bb;
899 	return 1;
900 }
901 
902 void
903 sourceUnlock(Source *r)
904 {
905 	Block *b;
906 
907 	if(r->b == nil){
908 		fprint(2, "sourceUnlock: already unlocked\n");
909 		abort();
910 	}
911 	b = r->b;
912 	r->b = nil;
913 	blockPut(b);
914 }
915 
916 static Block*
917 sourceLoad(Source *r, Entry *e)
918 {
919 	Block *b;
920 
921 	assert(sourceIsLocked(r));
922 	b = r->b;
923 	if(!entryUnpack(e, b->data, r->offset % r->epb))
924 		return nil;
925 	if(e->gen != r->gen){
926 		vtSetError(ERemoved);
927 		return nil;
928 	}
929 	blockDupLock(b);
930 	return b;
931 }
932 
933 static int
934 sizeToDepth(uvlong s, int psize, int dsize)
935 {
936 	int np;
937 	int d;
938 
939 	/* determine pointer depth */
940 	np = psize/VtScoreSize;
941 	s = (s + dsize - 1)/dsize;
942 	for(d = 0; s > 1; d++)
943 		s = (s + np - 1)/np;
944 	return d;
945 }
946 
947 static u32int
948 tagGen(void)
949 {
950 	u32int tag;
951 
952 	for(;;){
953 		tag = lrand();
954 		if(tag >= UserTag)
955 			break;
956 	}
957 	return tag;
958 }
959