xref: /plan9-contrib/sys/src/cmd/fossil/source.c (revision 8deabd962e84f51c67a12f970084955d97d8a8f2)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static int	sizeToDepth(uvlong s, int psize, int dsize);
7 static u32int 	tagGen(void);
8 static Block 	*sourceLoad(Source *r, Entry *e);
9 static Block	*sourceLoadUnlocked(Source *r, Entry *e);
10 static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
11 static int	sourceShrinkSize(Source*, Entry*, uvlong);
12 static int	sourceGrowDepth(Source*, Block*, Entry*, int);
13 
14 #define sourceIsLocked(r)	((r)->b != nil)
15 
16 static Source *
17 sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode, int issnapshot)
18 {
19 	Source *r;
20 	int epb;
21 	u32int epoch;
22 	Entry e;
23 
24 	assert(p==nil || sourceIsLocked(p));
25 
26 	if(p == nil){
27 		assert(offset == 0);
28 		epb = 1;
29 	}else
30 		epb = p->dsize / VtEntrySize;
31 
32 	if(b->l.type != BtDir)
33 		goto Bad;
34 
35 	/*
36 	 * a non-active entry is the only thing that
37 	 * can legitimately happen here. all the others
38 	 * get prints.
39 	 */
40 	if(!entryUnpack(&e, b->data, offset % epb)){
41 		fprint(2, "%s: %V: sourceAlloc: entryUnpack failed\n", argv0,
42 			b->score);
43 		goto Bad;
44 	}
45 	if(!(e.flags & VtEntryActive)){
46 		if(0) fprint(2, "%s: %V: sourceAlloc: not active\n",
47 			argv0, e.score);
48 		goto Bad;
49 	}
50 	if(e.psize < 256 || e.dsize < 256){
51 		fprint(2, "%s: %V: sourceAlloc: psize %ud dsize %ud\n",
52 			argv0, e.score, e.psize, e.dsize);
53 		goto Bad;
54 	}
55 
56 	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
57 		fprint(2, "%s: %V: sourceAlloc: depth %ud size %llud psize %ud dsize %ud\n",
58 			argv0, e.score, e.depth, e.size, e.psize, e.dsize);
59 		goto Bad;
60 	}
61 
62 	if((e.flags & VtEntryLocal) && e.tag == 0){
63 		fprint(2, "%s: %V: sourceAlloc: flags %#ux tag %#ux\n",
64 			argv0, e.score, e.flags, e.tag);
65 		goto Bad;
66 	}
67 
68 	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
69 		fprint(2, "%s: %V: sourceAlloc: psize %ud dsize %ud blocksize %ud\n",
70 			argv0, e.score, e.psize, e.dsize, fs->blockSize);
71 		goto Bad;
72 	}
73 
74 	epoch = b->l.epoch;
75 	if(mode == OReadWrite){
76 		if(e.snap != 0){
77 			vtSetError(ESnapRO);
78 			return nil;
79 		}
80 	}else if(e.snap != 0){
81 		if(e.snap < fs->elo){
82 			vtSetError(ESnapOld);
83 			return nil;
84 		}
85 		if(e.snap >= fs->ehi)
86 			goto Bad;
87 		epoch = e.snap;
88 	}
89 
90 	r = vtMemAllocZ(sizeof(Source));
91 	r->fs = fs;
92 	r->mode = mode;
93 	r->issnapshot = issnapshot;
94 	r->dsize = e.dsize;
95 	r->gen = e.gen;
96 	r->dir = (e.flags & VtEntryDir) != 0;
97 	r->lk = vtLockAlloc();
98 	r->ref = 1;
99 	r->parent = p;
100 	if(p){
101 		vtLock(p->lk);
102 		assert(mode == OReadOnly || p->mode == OReadWrite);
103 		p->ref++;
104 		vtUnlock(p->lk);
105 	}
106 	r->epoch = epoch;
107 //	fprint(2, "%s: sourceAlloc: have %V be.%d fse.%d %s\n", argv0, b->score,
108 //		b->l.epoch, r->fs->ehi, mode == OReadWrite? "rw": "ro");
109 	memmove(r->score, b->score, VtScoreSize);
110 	r->scoreEpoch = b->l.epoch;
111 	r->offset = offset;
112 	r->epb = epb;
113 	r->tag = b->l.tag;
114 
115 //	fprint(2, "%s: sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
116 
117 	return r;
118 Bad:
119 	vtSetError(EBadEntry);
120 	return nil;
121 }
122 
123 Source *
124 sourceRoot(Fs *fs, u32int addr, int mode)
125 {
126 	Source *r;
127 	Block *b;
128 
129 	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
130 	if(b == nil)
131 		return nil;
132 
133 	if(mode == OReadWrite && b->l.epoch != fs->ehi){
134 		fprint(2, "%s: sourceRoot: fs->ehi = %ud, b->l = %L\n",
135 			argv0, fs->ehi, &b->l);
136 		blockPut(b);
137 		vtSetError(EBadRoot);
138 		return nil;
139 	}
140 
141 	r = sourceAlloc(fs, b, nil, 0, mode, 0);
142 	blockPut(b);
143 	return r;
144 }
145 
146 Source *
147 sourceOpen(Source *r, ulong offset, int mode, int issnapshot)
148 {
149 	ulong bn;
150 	Block *b;
151 
152 	assert(sourceIsLocked(r));
153 	if(r->mode == OReadWrite)
154 		assert(r->epoch == r->b->l.epoch);
155 	if(!r->dir){
156 		vtSetError(ENotDir);
157 		return nil;
158 	}
159 
160 	bn = offset/(r->dsize/VtEntrySize);
161 
162 	b = sourceBlock(r, bn, mode);
163 	if(b == nil)
164 		return nil;
165 	r = sourceAlloc(r->fs, b, r, offset, mode, issnapshot);
166 	blockPut(b);
167 	return r;
168 }
169 
170 Source *
171 sourceCreate(Source *r, int dsize, int dir, u32int offset)
172 {
173 	int i;
174 	Block *b;
175 	u32int bn, size;
176 	Entry e;
177 	int epb;
178 	int psize;
179 	Source *rr;
180 
181 	assert(sourceIsLocked(r));
182 
183 	if(!r->dir){
184 		vtSetError(ENotDir);
185 		return nil;
186 	}
187 
188 	epb = r->dsize/VtEntrySize;
189 	psize = (dsize/VtScoreSize)*VtScoreSize;
190 
191 	size = sourceGetDirSize(r);
192 	if(offset == 0){
193 		/*
194 		 * look at a random block to see if we can find an empty entry
195 		 */
196 		offset = lnrand(size+1);
197 		offset -= offset % epb;
198 	}
199 
200 	/* try the given block and then try the last block */
201 	for(;;){
202 		bn = offset/epb;
203 		b = sourceBlock(r, bn, OReadWrite);
204 		if(b == nil)
205 			return nil;
206 		for(i=offset%r->epb; i<epb; i++){
207 			entryUnpack(&e, b->data, i);
208 			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
209 				goto Found;
210 		}
211 		blockPut(b);
212 		if(offset == size){
213 			fprint(2, "sourceCreate: cannot happen\n");
214 			vtSetError("sourceCreate: cannot happen");
215 			return nil;
216 		}
217 		offset = size;
218 	}
219 
220 Found:
221 	/* found an entry - gen already set */
222 	e.psize = psize;
223 	e.dsize = dsize;
224 	assert(psize && dsize);
225 	e.flags = VtEntryActive;
226 	if(dir)
227 		e.flags |= VtEntryDir;
228 	e.depth = 0;
229 	e.size = 0;
230 	memmove(e.score, vtZeroScore, VtScoreSize);
231 	e.tag = 0;
232 	e.snap = 0;
233 	e.archive = 0;
234 	entryPack(&e, b->data, i);
235 	blockDirty(b);
236 
237 	offset = bn*epb + i;
238 	if(offset+1 > size){
239 		if(!sourceSetDirSize(r, offset+1)){
240 			blockPut(b);
241 			return nil;
242 		}
243 	}
244 
245 	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite, 0);
246 	blockPut(b);
247 	return rr;
248 }
249 
250 static int
251 sourceKill(Source *r, int doremove)
252 {
253 	Entry e;
254 	Block *b;
255 	u32int addr;
256 	u32int tag;
257 	int type;
258 
259 	assert(sourceIsLocked(r));
260 	b = sourceLoad(r, &e);
261 	if(b == nil)
262 		return 0;
263 
264 	assert(b->l.epoch == r->fs->ehi);
265 
266 	if(doremove==0 && e.size == 0){
267 		/* already truncated */
268 		blockPut(b);
269 		return 1;
270 	}
271 
272 	/* remember info on link we are removing */
273 	addr = globalToLocal(e.score);
274 	type = entryType(&e);
275 	tag = e.tag;
276 
277 	if(doremove){
278 		if(e.gen != ~0)
279 			e.gen++;
280 		e.dsize = 0;
281 		e.psize = 0;
282 		e.flags = 0;
283 	}else{
284 		e.flags &= ~VtEntryLocal;
285 	}
286 	e.depth = 0;
287 	e.size = 0;
288 	e.tag = 0;
289 	memmove(e.score, vtZeroScore, VtScoreSize);
290 	entryPack(&e, b->data, r->offset % r->epb);
291 	blockDirty(b);
292 	if(addr != NilBlock)
293 		blockRemoveLink(b, addr, type, tag, 1);
294 	blockPut(b);
295 
296 	if(doremove){
297 		sourceUnlock(r);
298 		sourceClose(r);
299 	}
300 
301 	return 1;
302 }
303 
304 int
305 sourceRemove(Source *r)
306 {
307 	return sourceKill(r, 1);
308 }
309 
310 int
311 sourceTruncate(Source *r)
312 {
313 	return sourceKill(r, 0);
314 }
315 
316 uvlong
317 sourceGetSize(Source *r)
318 {
319 	Entry e;
320 	Block *b;
321 
322 	assert(sourceIsLocked(r));
323 	b = sourceLoad(r, &e);
324 	if(b == nil)
325 		return 0;
326 	blockPut(b);
327 
328 	return e.size;
329 }
330 
331 static int
332 sourceShrinkSize(Source *r, Entry *e, uvlong size)
333 {
334 	int i, type, ppb;
335 	uvlong ptrsz;
336 	u32int addr;
337 	uchar score[VtScoreSize];
338 	Block *b;
339 
340 	type = entryType(e);
341 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
342 	if(b == nil)
343 		return 0;
344 
345 	ptrsz = e->dsize;
346 	ppb = e->psize/VtScoreSize;
347 	for(i=0; i+1<e->depth; i++)
348 		ptrsz *= ppb;
349 
350 	while(type&BtLevelMask){
351 		if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
352 			/* not worth copying the block just so we can zero some of it */
353 			blockPut(b);
354 			return 0;
355 		}
356 
357 		/*
358 		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
359 		 */
360 
361 		/* zero the pointers to unnecessary blocks */
362 		i = (size+ptrsz-1)/ptrsz;
363 		for(; i<ppb; i++){
364 			addr = globalToLocal(b->data+i*VtScoreSize);
365 			memmove(b->data+i*VtScoreSize, vtZeroScore, VtScoreSize);
366 			blockDirty(b);
367 			if(addr != NilBlock)
368 				blockRemoveLink(b, addr, type-1, e->tag, 1);
369 		}
370 
371 		/* recurse (go around again) on the partially necessary block */
372 		i = size/ptrsz;
373 		size = size%ptrsz;
374 		if(size == 0){
375 			blockPut(b);
376 			return 1;
377 		}
378 		ptrsz /= ppb;
379 		type--;
380 		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
381 		blockPut(b);
382 		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
383 		if(b == nil)
384 			return 0;
385 	}
386 
387 	if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
388 		blockPut(b);
389 		return 0;
390 	}
391 
392 	/*
393 	 * No one ever truncates BtDir blocks.
394 	 */
395 	if(type == BtData && e->dsize > size){
396 		memset(b->data+size, 0, e->dsize-size);
397 		blockDirty(b);
398 	}
399 	blockPut(b);
400 	return 1;
401 }
402 
403 int
404 sourceSetSize(Source *r, uvlong size)
405 {
406 	int depth;
407 	Entry e;
408 	Block *b;
409 
410 	assert(sourceIsLocked(r));
411 	if(size == 0)
412 		return sourceTruncate(r);
413 
414 	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
415 		vtSetError(ETooBig);
416 		return 0;
417 	}
418 
419 	b = sourceLoad(r, &e);
420 	if(b == nil)
421 		return 0;
422 
423 	/* quick out */
424 	if(e.size == size){
425 		blockPut(b);
426 		return 1;
427 	}
428 
429 	depth = sizeToDepth(size, e.psize, e.dsize);
430 
431 	if(depth < e.depth){
432 		if(!sourceShrinkDepth(r, b, &e, depth)){
433 			blockPut(b);
434 			return 0;
435 		}
436 	}else if(depth > e.depth){
437 		if(!sourceGrowDepth(r, b, &e, depth)){
438 			blockPut(b);
439 			return 0;
440 		}
441 	}
442 
443 	if(size < e.size)
444 		sourceShrinkSize(r, &e, size);
445 
446 	e.size = size;
447 	entryPack(&e, b->data, r->offset % r->epb);
448 	blockDirty(b);
449 	blockPut(b);
450 
451 	return 1;
452 }
453 
454 int
455 sourceSetDirSize(Source *r, ulong ds)
456 {
457 	uvlong size;
458 	int epb;
459 
460 	assert(sourceIsLocked(r));
461 	epb = r->dsize/VtEntrySize;
462 
463 	size = (uvlong)r->dsize*(ds/epb);
464 	size += VtEntrySize*(ds%epb);
465 	return sourceSetSize(r, size);
466 }
467 
468 ulong
469 sourceGetDirSize(Source *r)
470 {
471 	ulong ds;
472 	uvlong size;
473 	int epb;
474 
475 	assert(sourceIsLocked(r));
476 	epb = r->dsize/VtEntrySize;
477 
478 	size = sourceGetSize(r);
479 	ds = epb*(size/r->dsize);
480 	ds += (size%r->dsize)/VtEntrySize;
481 	return ds;
482 }
483 
484 int
485 sourceGetEntry(Source *r, Entry *e)
486 {
487 	Block *b;
488 
489 	assert(sourceIsLocked(r));
490 	b = sourceLoad(r, e);
491 	if(b == nil)
492 		return 0;
493 	blockPut(b);
494 
495 	return 1;
496 }
497 
498 /*
499  * Must be careful with this.  Doesn't record
500  * dependencies, so don't introduce any!
501  */
502 int
503 sourceSetEntry(Source *r, Entry *e)
504 {
505 	Block *b;
506 	Entry oe;
507 
508 	assert(sourceIsLocked(r));
509 	b = sourceLoad(r, &oe);
510 	if(b == nil)
511 		return 0;
512 	entryPack(e, b->data, r->offset%r->epb);
513 	blockDirty(b);
514 	blockPut(b);
515 
516 	return 1;
517 }
518 
519 static Block *
520 blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
521 {
522 	Block *b;
523 	Cache *c;
524 	u32int addr;
525 	int type;
526 	uchar oscore[VtScoreSize], score[VtScoreSize];
527 	Entry oe;
528 
529 	c = fs->cache;
530 
531 	if((p->l.type & BtLevelMask) == 0){
532 		assert(p->l.type == BtDir);
533 		type = entryType(e);
534 		b = cacheGlobal(c, e->score, type, e->tag, mode);
535 	}else{
536 		type = p->l.type - 1;
537 		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
538 	}
539 
540 	if(b)
541 		b->pc = getcallerpc(&p);
542 
543 	if(b == nil || mode == OReadOnly)
544 		return b;
545 
546 	if(p->l.epoch != fs->ehi){
547 		fprint(2, "blockWalk: parent not writable\n");
548 		abort();
549 	}
550 	if(b->l.epoch == fs->ehi)
551 		return b;
552 
553 	oe = *e;
554 
555 	/*
556 	 * Copy on write.
557 	 */
558 	if(e->tag == 0){
559 		assert(p->l.type == BtDir);
560 		e->tag = tagGen();
561 		e->flags |= VtEntryLocal;
562 	}
563 
564 	addr = b->addr;
565 	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
566 	if(b == nil)
567 		return nil;
568 
569 	b->pc = getcallerpc(&p);
570 	assert(b->l.epoch == fs->ehi);
571 
572 	blockDirty(b);
573 	memmove(score, b->score, VtScoreSize);
574 	if(p->l.type == BtDir){
575 		memmove(e->score, b->score, VtScoreSize);
576 		entryPack(e, p->data, index);
577 		blockDependency(p, b, index, nil, &oe);
578 	}else{
579 		memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
580 		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
581 		blockDependency(p, b, index, oscore, nil);
582 	}
583 	blockDirty(p);
584 
585 	if(addr != NilBlock)
586 		blockRemoveLink(p, addr, type, e->tag, 0);
587 
588 	return b;
589 }
590 
591 /*
592  * Change the depth of the source r.
593  * The entry e for r is contained in block p.
594  */
595 static int
596 sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
597 {
598 	Block *b, *bb;
599 	u32int tag;
600 	int type;
601 	Entry oe;
602 
603 	assert(sourceIsLocked(r));
604 	assert(depth <= VtPointerDepth);
605 
606 	type = entryType(e);
607 	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
608 	if(b == nil)
609 		return 0;
610 
611 	tag = e->tag;
612 	if(tag == 0)
613 		tag = tagGen();
614 
615 	oe = *e;
616 
617 	/*
618 	 * Keep adding layers until we get to the right depth
619 	 * or an error occurs.
620 	 */
621 	while(e->depth < depth){
622 		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
623 		if(bb == nil)
624 			break;
625 //fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
626 		memmove(bb->data, b->score, VtScoreSize);
627 		memmove(e->score, bb->score, VtScoreSize);
628 		e->depth++;
629 		type++;
630 		e->tag = tag;
631 		e->flags |= VtEntryLocal;
632 		blockDependency(bb, b, 0, vtZeroScore, nil);
633 		blockPut(b);
634 		b = bb;
635 		blockDirty(b);
636 	}
637 
638 	entryPack(e, p->data, r->offset % r->epb);
639 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
640 	blockPut(b);
641 	blockDirty(p);
642 
643 	return e->depth == depth;
644 }
645 
646 static int
647 sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
648 {
649 	Block *b, *nb, *ob, *rb;
650 	u32int tag;
651 	int type, d;
652 	Entry oe;
653 
654 	assert(sourceIsLocked(r));
655 	assert(depth <= VtPointerDepth);
656 
657 	type = entryType(e);
658 	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
659 	if(rb == nil)
660 		return 0;
661 
662 	tag = e->tag;
663 	if(tag == 0)
664 		tag = tagGen();
665 
666 	/*
667 	 * Walk down to the new root block.
668 	 * We may stop early, but something is better than nothing.
669 	 */
670 	oe = *e;
671 
672 	ob = nil;
673 	b = rb;
674 /* BUG: explain type++.  i think it is a real bug */
675 	for(d=e->depth; d > depth; d--, type++){
676 		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
677 		if(nb == nil)
678 			break;
679 		if(ob!=nil && ob!=rb)
680 			blockPut(ob);
681 		ob = b;
682 		b = nb;
683 	}
684 
685 	if(b == rb){
686 		blockPut(rb);
687 		return 0;
688 	}
689 
690 	/*
691 	 * Right now, e points at the root block rb, b is the new root block,
692 	 * and ob points at b.  To update:
693 	 *
694 	 *	(i) change e to point at b
695 	 *	(ii) zero the pointer ob -> b
696 	 *	(iii) free the root block
697 	 *
698 	 * p (the block containing e) must be written before
699 	 * anything else.
700  	 */
701 
702 	/* (i) */
703 	e->depth = d;
704 	/* might have been local and now global; reverse cannot happen */
705 	if(globalToLocal(b->score) == NilBlock)
706 		e->flags &= ~VtEntryLocal;
707 	memmove(e->score, b->score, VtScoreSize);
708 	entryPack(e, p->data, r->offset % r->epb);
709 	blockDependency(p, b, r->offset % r->epb, nil, &oe);
710 	blockDirty(p);
711 
712 	/* (ii) */
713 	memmove(ob->data, vtZeroScore, VtScoreSize);
714 	blockDependency(ob, p, 0, b->score, nil);
715 	blockDirty(ob);
716 
717 	/* (iii) */
718 	if(rb->addr != NilBlock)
719 		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
720 
721 	blockPut(rb);
722 	if(ob!=nil && ob!=rb)
723 		blockPut(ob);
724 	blockPut(b);
725 
726 	return d == depth;
727 }
728 
729 /*
730  * Normally we return the block at the given number.
731  * If early is set, we stop earlier in the tree.  Setting early
732  * to 1 gives us the block that contains the pointer to bn.
733  */
734 Block *
735 _sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
736 {
737 	Block *b, *bb;
738 	int index[VtPointerDepth+1];
739 	Entry e;
740 	int i, np;
741 	int m;
742 
743 	assert(sourceIsLocked(r));
744 	assert(bn != NilBlock);
745 
746 	/* mode for intermediate block */
747 	m = mode;
748 	if(m == OOverWrite)
749 		m = OReadWrite;
750 
751 	b = sourceLoad(r, &e);
752 	if(b == nil)
753 		return nil;
754 	if(r->issnapshot && (e.flags & VtEntryNoArchive)){
755 		blockPut(b);
756 		vtSetError(ENotArchived);
757 		return nil;
758 	}
759 
760 	if(tag){
761 		if(e.tag == 0)
762 			e.tag = tag;
763 		else if(e.tag != tag){
764 			fprint(2, "tag mismatch\n");
765 			vtSetError("tag mismatch");
766 			goto Err;
767 		}
768 	}
769 
770 	np = e.psize/VtScoreSize;
771 	memset(index, 0, sizeof(index));
772 	for(i=0; bn > 0; i++){
773 		if(i >= VtPointerDepth){
774 			vtSetError(EBadAddr);
775 			goto Err;
776 		}
777 		index[i] = bn % np;
778 		bn /= np;
779 	}
780 
781 	if(i > e.depth){
782 		if(mode == OReadOnly){
783 			vtSetError(EBadAddr);
784 			goto Err;
785 		}
786 		if(!sourceGrowDepth(r, b, &e, i))
787 			goto Err;
788 	}
789 
790 	index[e.depth] = r->offset % r->epb;
791 
792 	for(i=e.depth; i>=early; i--){
793 		bb = blockWalk(b, index[i], m, r->fs, &e);
794 		if(bb == nil)
795 			goto Err;
796 		blockPut(b);
797 		b = bb;
798 	}
799 	b->pc = getcallerpc(&r);
800 	return b;
801 Err:
802 	blockPut(b);
803 	return nil;
804 }
805 
806 Block*
807 sourceBlock(Source *r, ulong bn, int mode)
808 {
809 	Block *b;
810 
811 	b = _sourceBlock(r, bn, mode, 0, 0);
812 	if(b)
813 		b->pc = getcallerpc(&r);
814 	return b;
815 }
816 
817 void
818 sourceClose(Source *r)
819 {
820 	if(r == nil)
821 		return;
822 	vtLock(r->lk);
823 	r->ref--;
824 	if(r->ref){
825 		vtUnlock(r->lk);
826 		return;
827 	}
828 	assert(r->ref == 0);
829 	vtUnlock(r->lk);
830 	if(r->parent)
831 		sourceClose(r->parent);
832 	vtLockFree(r->lk);
833 	memset(r, ~0, sizeof(*r));
834 	vtMemFree(r);
835 }
836 
837 /*
838  * Retrieve the block containing the entry for r.
839  * If a snapshot has happened, we might need
840  * to get a new copy of the block.  We avoid this
841  * in the common case by caching the score for
842  * the block and the last epoch in which it was valid.
843  *
844  * We use r->mode to tell the difference between active
845  * file system sources (OReadWrite) and sources for the
846  * snapshot file system (OReadOnly).
847  */
848 static Block*
849 sourceLoadBlock(Source *r, int mode)
850 {
851 	u32int addr;
852 	Block *b;
853 
854 	switch(r->mode){
855 	default:
856 		assert(0);
857 	case OReadWrite:
858 		assert(r->mode == OReadWrite);
859 		/*
860 		 * This needn't be true -- we might bump the low epoch
861 		 * to reclaim some old blocks, but since this score is
862 		 * OReadWrite, the blocks must all still be open, so none
863 		 * are reclaimed.  Thus it's okay that the epoch is so low.
864 		 * Proceed.
865 		assert(r->epoch >= r->fs->elo);
866 		 */
867 		if(r->epoch == r->fs->ehi){
868 			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
869 			if(b == nil)
870 				return nil;
871 			assert(r->epoch == b->l.epoch);
872 			return b;
873 		}
874 		assert(r->parent != nil);
875 		if(!sourceLock(r->parent, OReadWrite))
876 			return nil;
877 		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
878 		sourceUnlock(r->parent);
879 		if(b == nil)
880 			return nil;
881 		assert(b->l.epoch == r->fs->ehi);
882 	//	fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
883 		memmove(r->score, b->score, VtScoreSize);
884 		r->scoreEpoch = b->l.epoch;
885 		r->tag = b->l.tag;
886 		r->epoch = r->fs->ehi;
887 		return b;
888 
889 	case OReadOnly:
890 		addr = globalToLocal(r->score);
891 		if(addr == NilBlock)
892 			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
893 
894 		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
895 		if(b)
896 			return b;
897 
898 		/*
899 		 * If it failed because the epochs don't match, the block has been
900 		 * archived and reclaimed.  Rewalk from the parent and get the
901 		 * new pointer.  This can't happen in the OReadWrite case
902 		 * above because blocks in the current epoch don't get
903 		 * reclaimed.  The fact that we're OReadOnly means we're
904 		 * a snapshot.  (Or else the file system is read-only, but then
905 		 * the archiver isn't going around deleting blocks.)
906 		 */
907 		if(strcmp(vtGetError(), ELabelMismatch) == 0){
908 			if(!sourceLock(r->parent, OReadOnly))
909 				return nil;
910 			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
911 			sourceUnlock(r->parent);
912 			if(b){
913 				fprint(2, "sourceAlloc: lost %V found %V\n",
914 					r->score, b->score);
915 				memmove(r->score, b->score, VtScoreSize);
916 				r->scoreEpoch = b->l.epoch;
917 				return b;
918 			}
919 		}
920 		return nil;
921 	}
922 }
923 
924 int
925 sourceLock(Source *r, int mode)
926 {
927 	Block *b;
928 
929 	if(mode == -1)
930 		mode = r->mode;
931 
932 	b = sourceLoadBlock(r, mode);
933 	if(b == nil)
934 		return 0;
935 	/*
936 	 * The fact that we are holding b serves as the
937 	 * lock entitling us to write to r->b.
938 	 */
939 	assert(r->b == nil);
940 	r->b = b;
941 	if(r->mode == OReadWrite)
942 		assert(r->epoch == r->b->l.epoch);
943 	return 1;
944 }
945 
946 /*
947  * Lock two (usually sibling) sources.  This needs special care
948  * because the Entries for both sources might be in the same block.
949  * We also try to lock blocks in left-to-right order within the tree.
950  */
951 int
952 sourceLock2(Source *r, Source *rr, int mode)
953 {
954 	Block *b, *bb;
955 
956 	if(rr == nil)
957 		return sourceLock(r, mode);
958 
959 	if(mode == -1)
960 		mode = r->mode;
961 
962 	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
963 		b = sourceLoadBlock(r, mode);
964 		if(b == nil)
965 			return 0;
966 		if(memcmp(r->score, rr->score, VtScoreSize) != 0){
967 			memmove(rr->score, b->score, VtScoreSize);
968 			rr->scoreEpoch = b->l.epoch;
969 			rr->tag = b->l.tag;
970 			rr->epoch = rr->fs->ehi;
971 		}
972 		blockDupLock(b);
973 		bb = b;
974 	}else if(r->parent==rr->parent || r->offset > rr->offset){
975 		bb = sourceLoadBlock(rr, mode);
976 		b = sourceLoadBlock(r, mode);
977 	}else{
978 		b = sourceLoadBlock(r, mode);
979 		bb = sourceLoadBlock(rr, mode);
980 	}
981 	if(b == nil || bb == nil){
982 		if(b)
983 			blockPut(b);
984 		if(bb)
985 			blockPut(bb);
986 		return 0;
987 	}
988 
989 	/*
990 	 * The fact that we are holding b and bb serves
991 	 * as the lock entitling us to write to r->b and rr->b.
992 	 */
993 	r->b = b;
994 	rr->b = bb;
995 	return 1;
996 }
997 
998 void
999 sourceUnlock(Source *r)
1000 {
1001 	Block *b;
1002 
1003 	if(r->b == nil){
1004 		fprint(2, "sourceUnlock: already unlocked\n");
1005 		abort();
1006 	}
1007 	b = r->b;
1008 	r->b = nil;
1009 	blockPut(b);
1010 }
1011 
1012 static Block*
1013 sourceLoad(Source *r, Entry *e)
1014 {
1015 	Block *b;
1016 
1017 	assert(sourceIsLocked(r));
1018 	b = r->b;
1019 	if(!entryUnpack(e, b->data, r->offset % r->epb))
1020 		return nil;
1021 	if(e->gen != r->gen){
1022 		vtSetError(ERemoved);
1023 		return nil;
1024 	}
1025 	blockDupLock(b);
1026 	return b;
1027 }
1028 
1029 static int
1030 sizeToDepth(uvlong s, int psize, int dsize)
1031 {
1032 	int np;
1033 	int d;
1034 
1035 	/* determine pointer depth */
1036 	np = psize/VtScoreSize;
1037 	s = (s + dsize - 1)/dsize;
1038 	for(d = 0; s > 1; d++)
1039 		s = (s + np - 1)/np;
1040 	return d;
1041 }
1042 
1043 static u32int
1044 tagGen(void)
1045 {
1046 	u32int tag;
1047 
1048 	for(;;){
1049 		tag = lrand();
1050 		if(tag >= UserTag)
1051 			break;
1052 	}
1053 	return tag;
1054 }
1055