xref: /plan9/sys/src/cmd/fossil/fs.c (revision ff8c3af2f44d95267f67219afa20ba82ff6cf7e4)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static void fsMetaFlush(void *a);
7 static Snap *snapInit(Fs*);
8 static void snapClose(Snap*);
9 
10 Fs *
11 fsOpen(char *file, VtSession *z, long ncache, int mode)
12 {
13 	Fs *fs;
14 	Disk *disk;
15 	int fd;
16 	Block *b, *bs;
17 	Super super;
18 	int m;
19 	uchar oscore[VtScoreSize];
20 
21 	switch(mode){
22 	default:
23 		vtSetError(EBadMode);
24 		return nil;
25 	case OReadOnly:
26 		m = OREAD;
27 		break;
28 	case OReadWrite:
29 		m = ORDWR;
30 		break;
31 	}
32 	fd = open(file, m);
33 	if(fd < 0){
34 		vtSetError("open %s: %r", file);
35 		return nil;
36 	}
37 
38 	bwatchInit();
39 	disk = diskAlloc(fd);
40 	if(disk == nil){
41 		vtSetError("diskAlloc: %R");
42 		close(fd);
43 		return nil;
44 	}
45 
46 	fs = vtMemAllocZ(sizeof(Fs));
47 	fs->mode = mode;
48 	fs->blockSize = diskBlockSize(disk);
49 	fs->elk = vtLockAlloc();
50 	fs->cache = cacheAlloc(disk, z, ncache, mode);
51 	if(mode == OReadWrite && z)
52 		fs->arch = archInit(fs->cache, disk, fs, z);
53 	fs->z = z;
54 
55 	b = cacheLocal(fs->cache, PartSuper, 0, mode);
56 	if(b == nil)
57 		goto Err;
58 	if(!superUnpack(&super, b->data)){
59 		blockPut(b);
60 		vtSetError("bad super block");
61 		goto Err;
62 	}
63 	blockPut(b);
64 
65 	fs->ehi = super.epochHigh;
66 	fs->elo = super.epochLow;
67 
68 //fprint(2, "fs->ehi %d fs->elo %d active=%d\n", fs->ehi, fs->elo, super.active);
69 
70 	fs->source = sourceRoot(fs, super.active, mode);
71 	if(fs->source == nil){
72 		/*
73 		 * Perhaps it failed because the block is copy-on-write.
74 		 * Do the copy and try again.
75 		 */
76 		if(mode == OReadOnly || strcmp(vtGetError(), EBadRoot) != 0)
77 			goto Err;
78 		b = cacheLocalData(fs->cache, super.active, BtDir, RootTag, OReadWrite, 0);
79 		if(b == nil){
80 			vtSetError("cacheLocalData: %R");
81 			goto Err;
82 		}
83 		if(!(b->l.state&BsClosed) && b->l.epoch == fs->ehi){
84 			blockPut(b);
85 			vtSetError("bad root source block");
86 			goto Err;
87 		}
88 		b = blockCopy(b, RootTag, fs->ehi, fs->elo);
89 		if(b == nil)
90 			goto Err;
91 		localToGlobal(super.active, oscore);
92 		super.active = b->addr;
93 		bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
94 		if(bs == nil){
95 			blockPut(b);
96 			vtSetError("cacheLocal: %R");
97 			goto Err;
98 		}
99 		superPack(&super, bs->data);
100 		blockDependency(bs, b, 0, oscore, nil);
101 		blockDirty(bs);
102 		blockPut(bs);
103 		blockPut(b);
104 		fs->source = sourceRoot(fs, super.active, mode);
105 		if(fs->source == nil){
106 			vtSetError("sourceRoot: %R");
107 			goto Err;
108 		}
109 	}
110 
111 //fprint(2, "got fs source\n");
112 
113 	vtRLock(fs->elk);
114 	fs->file = fileRoot(fs->source);
115 	vtRUnlock(fs->elk);
116 	if(fs->file == nil){
117 		vtSetError("fileRoot: %R");
118 		goto Err;
119 	}
120 
121 //fprint(2, "got file root\n");
122 
123 	if(mode == OReadWrite){
124 		fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
125 		fs->snap = snapInit(fs);
126 	}
127 	return fs;
128 
129 Err:
130 fprint(2, "fsOpen error\n");
131 	fsClose(fs);
132 	return nil;
133 }
134 
135 void
136 fsClose(Fs *fs)
137 {
138 	vtRLock(fs->elk);
139 	periodicKill(fs->metaFlush);
140 	snapClose(fs->snap);
141 	if(fs->file){
142 		fileMetaFlush(fs->file, 0);
143 		if(!fileDecRef(fs->file))
144 			vtFatal("fsClose: files still in use: %r\n");
145 	}
146 	fs->file = nil;
147 	sourceClose(fs->source);
148 	cacheFree(fs->cache);
149 	if(fs->arch)
150 		archFree(fs->arch);
151 	vtRUnlock(fs->elk);
152 	vtLockFree(fs->elk);
153 	memset(fs, ~0, sizeof(Fs));
154 	vtMemFree(fs);
155 }
156 
157 int
158 fsRedial(Fs *fs, char *host)
159 {
160 	if(!vtRedial(fs->z, host))
161 		return 0;
162 	if(!vtConnect(fs->z, 0))
163 		return 0;
164 	return 1;
165 }
166 
167 File *
168 fsGetRoot(Fs *fs)
169 {
170 	return fileIncRef(fs->file);
171 }
172 
173 int
174 fsGetBlockSize(Fs *fs)
175 {
176 	return fs->blockSize;
177 }
178 
179 Block*
180 superGet(Cache *c, Super* super)
181 {
182 	Block *b;
183 
184 	if((b = cacheLocal(c, PartSuper, 0, OReadWrite)) == nil){
185 		fprint(2, "superGet: cacheLocal failed: %R");
186 		return nil;
187 	}
188 	if(!superUnpack(super, b->data)){
189 		fprint(2, "superGet: superUnpack failed: %R");
190 		blockPut(b);
191 		return nil;
192 	}
193 
194 	return b;
195 }
196 
197 void
198 superPut(Block* b, Super* super, int forceWrite)
199 {
200 	superPack(super, b->data);
201 	blockDirty(b);
202 	if(forceWrite){
203 		while(!blockWrite(b)){
204 			/* BUG: what should really happen here? */
205 			fprint(2, "could not write super block; waiting 10 seconds\n");
206 			sleep(10*000);
207 		}
208 		while(b->iostate != BioClean && b->iostate != BioDirty){
209 			assert(b->iostate == BioWriting);
210 			vtSleep(b->ioready);
211 		}
212 		/*
213 		 * it's okay that b might still be dirty.
214 		 * that means it got written out but with an old root pointer,
215 		 * but the other fields went out, and those are the ones
216 		 * we really care about.  (specifically, epochHigh; see fsSnapshot).
217 		 */
218 	}
219 	blockPut(b);
220 }
221 
222 /*
223  * Prepare the directory to store a snapshot.
224  * Temporary snapshots go into /snapshot/yyyy/mmdd/hhmm[.#]
225  * Archival snapshots go into /archive/yyyy/mmdd[.#].
226  *
227  * TODO This should be rewritten to eliminate most of the duplication.
228  */
229 static File*
230 fileOpenSnapshot(Fs *fs, int doarchive)
231 {
232 	int n;
233 	char buf[30], *s;
234 	File *dir, *f;
235 	Tm now;
236 
237 	if(doarchive){
238 		/*
239 		 * a snapshot intended to be archived to venti.
240 		 */
241 		dir = fileOpen(fs, "/archive");
242 		if(dir == nil)
243 			return nil;
244 		now = *localtime(time(0));
245 
246 		/* yyyy */
247 		snprint(buf, sizeof(buf), "%d", now.year+1900);
248 		f = fileWalk(dir, buf);
249 		if(f == nil)
250 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
251 		fileDecRef(dir);
252 		if(f == nil)
253 			return nil;
254 		dir = f;
255 
256 		/* mmdd[#] */
257 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
258 		s = buf+strlen(buf);
259 		for(n=0;; n++){
260 			if(n)
261 				seprint(s, buf+sizeof(buf), ".%d", n);
262 			f = fileWalk(dir, buf);
263 			if(f != nil){
264 				fileDecRef(f);
265 				continue;
266 			}
267 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
268 			break;
269 		}
270 		fileDecRef(dir);
271 		return f;
272 	}else{
273 		/*
274 		 * Just a temporary snapshot
275 		 * We'll use /snapshot/yyyy/mmdd/hhmm.
276 		 * There may well be a better naming scheme.
277 		 * (I'd have used hh:mm but ':' is reserved in Microsoft file systems.)
278 		 */
279 		dir = fileOpen(fs, "/snapshot");
280 		if(dir == nil)
281 			return nil;
282 
283 		now = *localtime(time(0));
284 
285 		/* yyyy */
286 		snprint(buf, sizeof(buf), "%d", now.year+1900);
287 		f = fileWalk(dir, buf);
288 		if(f == nil)
289 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
290 		fileDecRef(dir);
291 		if(f == nil)
292 			return nil;
293 		dir = f;
294 
295 		/* mmdd */
296 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
297 		f = fileWalk(dir, buf);
298 		if(f == nil)
299 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
300 		fileDecRef(dir);
301 		if(f == nil)
302 			return nil;
303 		dir = f;
304 
305 		/* hhmm[.#] */
306 		snprint(buf, sizeof buf, "%02d%02d", now.hour, now.min);
307 		s = buf+strlen(buf);
308 		for(n=0;; n++){
309 			if(n)
310 				seprint(s, buf+sizeof(buf), ".%d", n);
311 			f = fileWalk(dir, buf);
312 			if(f != nil){
313 				fileDecRef(f);
314 				continue;
315 			}
316 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
317 			break;
318 		}
319 		fileDecRef(dir);
320 		return f;
321 	}
322 }
323 
324 static int
325 fsNeedArch(Fs *fs, uint archMinute)
326 {
327 	int need;
328 	File *f;
329 	char buf[100];
330 	Tm now;
331 	ulong then;
332 
333 	then = time(0);
334 	now = *localtime(then);
335 
336 	/* back up to yesterday if necessary */
337 	if(now.hour < archMinute/60
338 	|| now.hour == archMinute/60 && now.min < archMinute%60)
339 		now = *localtime(then-86400);
340 
341 	snprint(buf, sizeof buf, "/archive/%d/%02d%02d",
342 		now.year+1900, now.mon+1, now.mday);
343 	need = 1;
344 	vtRLock(fs->elk);
345 	f = fileOpen(fs, buf);
346 	if(f){
347 		need = 0;
348 		fileDecRef(f);
349 	}
350 	vtRUnlock(fs->elk);
351 	return need;
352 }
353 
354 int
355 fsEpochLow(Fs *fs, u32int low)
356 {
357 	Block *bs;
358 	Super super;
359 
360 	vtLock(fs->elk);
361 	if(low > fs->ehi){
362 		vtSetError("bad low epoch (must be <= %ud)", fs->ehi);
363 		vtUnlock(fs->elk);
364 		return 0;
365 	}
366 
367 	if((bs = superGet(fs->cache, &super)) == nil){
368 		vtUnlock(fs->elk);
369 		return 0;
370 	}
371 
372 	super.epochLow = low;
373 	fs->elo = low;
374 	superPut(bs, &super, 1);
375 	vtUnlock(fs->elk);
376 
377 	return 1;
378 }
379 
380 static int
381 bumpEpoch(Fs *fs, int doarchive)
382 {
383 	uchar oscore[VtScoreSize];
384 	u32int oldaddr;
385 	Block *b, *bs;
386 	Entry e;
387 	Source *r;
388 	Super super;
389 
390 	/*
391 	 * Duplicate the root block.
392 	 *
393 	 * As a hint to flchk, the garbage collector,
394 	 * and any (human) debuggers, store a pointer
395 	 * to the old root block in entry 1 of the new root block.
396 	 */
397 	r = fs->source;
398 	b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
399 	if(b == nil)
400 		return 0;
401 
402 	memset(&e, 0, sizeof e);
403 	e.flags = VtEntryActive | VtEntryLocal | VtEntryDir;
404 	memmove(e.score, b->score, VtScoreSize);
405 	e.tag = RootTag;
406 	e.snap = b->l.epoch;
407 
408 	b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
409 	if(b == nil){
410 		fprint(2, "bumpEpoch: blockCopy: %R\n");
411 		return 0;
412 	}
413 
414 	if(0) fprint(2, "snapshot root from %d to %d\n", oldaddr, b->addr);
415 	entryPack(&e, b->data, 1);
416 	blockDirty(b);
417 
418 	/*
419 	 * Update the superblock with the new root and epoch.
420 	 */
421 	if((bs = superGet(fs->cache, &super)) == nil)
422 		return 0;
423 
424 	fs->ehi++;
425 	memmove(r->score, b->score, VtScoreSize);
426 	r->epoch = fs->ehi;
427 
428 	super.epochHigh = fs->ehi;
429 	oldaddr = super.active;
430 	super.active = b->addr;
431 	if(doarchive)
432 		super.next = oldaddr;
433 
434 	/*
435 	 * Record that the new super.active can't get written out until
436 	 * the new b gets written out.  Until then, use the old value.
437 	 */
438 	localToGlobal(oldaddr, oscore);
439 	blockDependency(bs, b, 0, oscore, nil);
440 	blockPut(b);
441 
442 	/*
443 	 * We force the super block to disk so that super.epochHigh gets updated.
444 	 * Otherwise, if we crash and come back, we might incorrectly treat as active
445 	 * some of the blocks that making up the snapshot we just created.
446 	 * Basically every block in the active file system and all the blocks in
447 	 * the recently-created snapshot depend on the super block now.
448 	 * Rather than record all those dependencies, we just force the block to disk.
449 	 *
450 	 * Note that blockWrite might actually (will probably) send a slightly outdated
451 	 * super.active to disk.  It will be the address of the most recent root that has
452 	 * gone to disk.
453 	 */
454 	superPut(bs, &super, 1);
455 
456 	return 1;
457 }
458 
459 int
460 saveQid(Fs *fs)
461 {
462 	Block *b;
463 	Super super;
464 	u64int qidMax;
465 
466 	if((b = superGet(fs->cache, &super)) == nil)
467 		return 0;
468 	qidMax = super.qid;
469 	blockPut(b);
470 
471 	if(!fileSetQidSpace(fs->file, 0, qidMax))
472 		return 0;
473 
474 	return 1;
475 }
476 
477 int
478 fsSnapshot(Fs *fs, int doarchive)
479 {
480 	File *src, *dst;
481 
482 	assert(fs->mode == OReadWrite);
483 
484 	dst = nil;
485 
486 	if(fs->halted){
487 		vtSetError("file system is halted");
488 		return 0;
489 	}
490 
491 	/*
492 	 * Freeze file system activity.
493 	 */
494 	vtLock(fs->elk);
495 
496 	/*
497 	 * Get the root of the directory we're going to save.
498 	 */
499 	src = fileOpen(fs, "/active");
500 	if(src == nil)
501 		goto Err;
502 
503 	/*
504 	 * It is important that we maintain the invariant that:
505 	 *	if both b and bb are marked as Active with epoch e
506 	 *	and b points at bb, then no other pointers to bb exist.
507 	 *
508 	 * The archiver uses this property to aggressively reclaim
509 	 * such blocks once they have been stored on Venti, and
510 	 * blockCleanup knows about this property as well.
511 	 *
512 	 * Let's say src->source is block sb, and src->msource is block
513 	 * mb.  Let's also say that block b holds the Entry structures for
514 	 * both src->source and src->msource (their Entry structures might
515 	 * be in different blocks, but the argument is the same).
516 	 * That is, right now we have:
517 	 *
518 	 *	b	Active w/ epoch e, holds ptrs to sb and mb.
519 	 *	sb	Active w/ epoch e.
520 	 *	mb	Active w/ epoch e.
521 	 *
522 	 * With things as they are now, the invariant requires that
523 	 * b holds the only pointers to sb and mb.  We want to record
524 	 * pointers to sb and mb in new Entries corresponding to dst,
525 	 * which breaks the invariant.  Thus we need to do something
526 	 * about b.  Specifically, we bump the file system's epoch and
527 	 * then rewalk the path from the root down to and including b.
528 	 * This will copy-on-write as we walk, so now the state will be:
529 	 *
530 	 *	b	Snap w/ epoch e, holds ptrs to sb and mb.
531 	 *	new-b	Active w/ epoch e+1, holds ptrs to sb and mb.
532 	 *	sb	Active w/ epoch e.
533 	 *	mb	Active w/ epoch e.
534 	 *
535 	 * In this state, it's perfectly okay to add pointers to dst, which
536 	 * will live in a block marked Active with epoch e+1.
537 	 *
538 	 * Of course, we need to make sure that the copied path makes
539 	 * it out to disk before the new dst block; if the dst block goes out
540 	 * first and then we crash, the invariant is violated.  Rather than
541 	 * deal with the dependencies, we just sync the file system to disk
542 	 * right now.
543 	 */
544 	if(!bumpEpoch(fs, 0) || !fileWalkSources(src))
545 		goto Err;
546 
547 	/*
548 	 * Sync to disk.
549 	 */
550 	cacheFlush(fs->cache, 1);
551 
552 	/*
553 	 * Create the directory where we will store the copy of src.
554 	 */
555 	dst = fileOpenSnapshot(fs, doarchive);
556 	if(dst == nil)
557 		goto Err;
558 
559 	/*
560 	 * Actually make the copy by setting dst's source and msource
561 	 * to be src's.
562 	 */
563 	if(!fileSnapshot(dst, src, fs->ehi-1, doarchive))
564 		goto Err;
565 
566 	fileDecRef(src);
567 	fileDecRef(dst);
568 	src = nil;
569 	dst = nil;
570 
571 	/*
572 	 * Make another copy of the file system.  This one is for the
573 	 * archiver, so that the file system we archive has the recently
574 	 * added snapshot both in /active and in /archive/yyyy/mmdd[.#].
575 	 */
576 	if(doarchive){
577 		if(!saveQid(fs))
578 			goto Err;
579 		if(!bumpEpoch(fs, 1))
580 			goto Err;
581 	}
582 
583 	vtUnlock(fs->elk);
584 
585 	/* BUG? can fs->arch fall out from under us here? */
586 	if(doarchive && fs->arch)
587 		archKick(fs->arch);
588 
589 	return 1;
590 
591 Err:
592 	fprint(2, "fsSnapshot: %R\n");
593 	if(src)
594 		fileDecRef(src);
595 	if(dst)
596 		fileDecRef(dst);
597 	vtUnlock(fs->elk);
598 	return 0;
599 }
600 
601 int
602 fsVac(Fs *fs, char *name, uchar score[VtScoreSize])
603 {
604 	int r;
605 	DirEntry de;
606 	Entry e, ee;
607 	File *f;
608 
609 	vtRLock(fs->elk);
610 	f = fileOpen(fs, name);
611 	if(f == nil){
612 		vtRUnlock(fs->elk);
613 		return 0;
614 	}
615 
616 	if(!fileGetSources(f, &e, &ee, 0) || !fileGetDir(f, &de)){
617 		fileDecRef(f);
618 		vtRUnlock(fs->elk);
619 		return 0;
620 	}
621 	fileDecRef(f);
622 
623 	r = mkVac(fs->z, fs->blockSize, &e, &ee, &de, score);
624 	vtRUnlock(fs->elk);
625 	return r;
626 }
627 
628 static int
629 vtWriteBlock(VtSession *z, uchar *buf, uint n, uint type, uchar score[VtScoreSize])
630 {
631 	if(!vtWrite(z, score, type, buf, n))
632 		return 0;
633 	if(!vtSha1Check(score, buf, n))
634 		return 0;
635 	return 1;
636 }
637 
638 int
639 mkVac(VtSession *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uchar score[VtScoreSize])
640 {
641 	uchar buf[8192];
642 	int i;
643 	uchar *p;
644 	uint n;
645 	DirEntry de;
646 	Entry e, ee, eee;
647 	MetaBlock mb;
648 	MetaEntry me;
649 	VtRoot root;
650 
651 	e = *pe;
652 	ee = *pee;
653 	de = *pde;
654 
655 	if(globalToLocal(e.score) != NilBlock
656 	|| (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){
657 		vtSetError("can only vac paths already stored on venti");
658 		return 0;
659 	}
660 
661 	/*
662 	 * Build metadata source for root.
663 	 */
664 	n = deSize(&de);
665 	if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){
666 		vtSetError("DirEntry too big");
667 		return 0;
668 	}
669 	memset(buf, 0, sizeof buf);
670 	mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1);
671 	p = mbAlloc(&mb, n);
672 	if(p == nil)
673 		abort();
674 	mbSearch(&mb, de.elem, &i, &me);
675 	assert(me.p == nil);
676 	me.p = p;
677 	me.size = n;
678 	dePack(&de, &me);
679 	mbInsert(&mb, i, &me);
680 	mbPack(&mb);
681 
682 	eee.size = n+MetaHeaderSize+MetaIndexSize;
683 	if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score))
684 		return 0;
685 	eee.psize = 8192;
686 	eee.dsize = 8192;
687 	eee.depth = 0;
688 	eee.flags = VtEntryActive;
689 
690 	/*
691 	 * Build root source with three entries in it.
692 	 */
693 	entryPack(&e, buf, 0);
694 	entryPack(&ee, buf, 1);
695 	entryPack(&eee, buf, 2);
696 
697 	n = VtEntrySize*3;
698 	memset(&root, 0, sizeof root);
699 	if(!vtWriteBlock(z, buf, n, VtDirType, root.score))
700 		return 0;
701 
702 	/*
703 	 * Save root.
704 	 */
705 	root.version = VtRootVersion;
706 	strecpy(root.type, root.type+sizeof root.type, "vac");
707 	strecpy(root.name, root.name+sizeof root.name, de.elem);
708 	root.blockSize = blockSize;
709 	vtRootPack(&root, buf);
710 	if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score))
711 		return 0;
712 
713 	return 1;
714 }
715 
716 int
717 fsSync(Fs *fs)
718 {
719 	vtLock(fs->elk);
720 	fileMetaFlush(fs->file, 1);
721 	cacheFlush(fs->cache, 1);
722 	vtUnlock(fs->elk);
723 	return 1;
724 }
725 
726 int
727 fsHalt(Fs *fs)
728 {
729 	vtLock(fs->elk);
730 	fs->halted = 1;
731 	fileMetaFlush(fs->file, 1);
732 	cacheFlush(fs->cache, 1);
733 	return 1;
734 }
735 
736 int
737 fsUnhalt(Fs *fs)
738 {
739 	if(!fs->halted)
740 		return 0;
741 	fs->halted = 0;
742 	vtUnlock(fs->elk);
743 	return 1;
744 }
745 
746 int
747 fsNextQid(Fs *fs, u64int *qid)
748 {
749 	Block *b;
750 	Super super;
751 
752 	if((b = superGet(fs->cache, &super)) == nil)
753 		return 0;
754 
755 	*qid = super.qid++;
756 
757 	/*
758 	 * It's okay if the super block doesn't go to disk immediately,
759 	 * since fileMetaAlloc will record a dependency between the
760 	 * block holding this qid and the super block.  See file.c:/^fileMetaAlloc.
761 	 */
762 	superPut(b, &super, 0);
763 	return 1;
764 }
765 
766 static void
767 fsMetaFlush(void *a)
768 {
769 	Fs *fs = a;
770 
771 	vtRLock(fs->elk);
772 	fileMetaFlush(fs->file, 1);
773 	vtRUnlock(fs->elk);
774 	cacheFlush(fs->cache, 0);
775 }
776 
777 static int
778 fsEsearch1(File *f, char *path, u32int savetime, u32int *plo)
779 {
780 	int n, r;
781 	DirEntry de;
782 	DirEntryEnum *dee;
783 	File *ff;
784 	Entry e, ee;
785 	char *t;
786 
787 	dee = deeOpen(f);
788 	if(dee == nil)
789 		return 0;
790 
791 	n = 0;
792 	for(;;){
793 		r = deeRead(dee, &de);
794 		if(r <= 0)
795 			break;
796 		if(de.mode & ModeSnapshot){
797 			if((ff = fileWalk(f, de.elem)) != nil){
798 				if(fileGetSources(ff, &e, &ee, 0))
799 					if(de.mtime >= savetime && e.snap != 0)
800 						if(e.snap < *plo)
801 							*plo = e.snap;
802 				fileDecRef(ff);
803 			}
804 		}
805 		else if(de.mode & ModeDir){
806 			if((ff = fileWalk(f, de.elem)) != nil){
807 				t = smprint("%s/%s", path, de.elem);
808 				n += fsEsearch1(ff, t, savetime, plo);
809 				vtMemFree(t);
810 				fileDecRef(ff);
811 			}
812 		}
813 		deCleanup(&de);
814 		if(r < 0)
815 			break;
816 	}
817 	deeClose(dee);
818 
819 	return n;
820 }
821 
822 static int
823 fsEsearch(Fs *fs, char *path, u32int savetime, u32int *plo)
824 {
825 	int n;
826 	File *f;
827 	DirEntry de;
828 
829 	f = fileOpen(fs, path);
830 	if(f == nil)
831 		return 0;
832 	if(!fileGetDir(f, &de)){
833 		fileDecRef(f);
834 		return 0;
835 	}
836 	if((de.mode & ModeDir) == 0){
837 		fileDecRef(f);
838 		deCleanup(&de);
839 		return 0;
840 	}
841 	deCleanup(&de);
842 	n = fsEsearch1(f, path, savetime, plo);
843 	fileDecRef(f);
844 	return n;
845 }
846 
847 void
848 fsSnapshotCleanup(Fs *fs, u32int age)
849 {
850 	u32int lo;
851 
852 	/*
853 	 * Find the best low epoch we can use,
854 	 * given that we need to save all the unventied archives
855 	 * and all the snapshots younger than age.
856 	 */
857 	vtRLock(fs->elk);
858 	lo = fs->ehi;
859 	fsEsearch(fs, "/archive", 0, &lo);
860 	fsEsearch(fs, "/snapshot", time(0)-age*60, &lo);
861 	vtRUnlock(fs->elk);
862 
863 	fsEpochLow(fs, lo);
864 	fsSnapshotRemove(fs);
865 }
866 
867 /* remove all snapshots that have expired */
868 /* return number of directory entries remaining */
869 static int
870 fsRsearch1(File *f, char *s)
871 {
872 	int n, r;
873 	DirEntry de;
874 	DirEntryEnum *dee;
875 	File *ff;
876 	char *t;
877 
878 	dee = deeOpen(f);
879 	if(dee == nil)
880 		return 0;
881 
882 	n = 0;
883 	for(;;){
884 		r = deeRead(dee, &de);
885 		if(r <= 0)
886 			break;
887 		n++;
888 		if(de.mode & ModeSnapshot){
889 			if((ff = fileWalk(f, de.elem)) != nil)
890 				fileDecRef(ff);
891 			else if(strcmp(vtGetError(), ESnapOld) == 0){
892 				if(fileClri(f, de.elem, "adm"))
893 					n--;
894 			}
895 		}
896 		else if(de.mode & ModeDir){
897 			if((ff = fileWalk(f, de.elem)) != nil){
898 				t = smprint("%s/%s", s, de.elem);
899 				if(fsRsearch1(ff, t) == 0)
900 					if(fileRemove(ff, "adm"))
901 						n--;
902 				vtMemFree(t);
903 				fileDecRef(ff);
904 			}
905 		}
906 		deCleanup(&de);
907 		if(r < 0)
908 			break;
909 	}
910 	deeClose(dee);
911 
912 	return n;
913 }
914 
915 static int
916 fsRsearch(Fs *fs, char *path)
917 {
918 	File *f;
919 	DirEntry de;
920 
921 	f = fileOpen(fs, path);
922 	if(f == nil)
923 		return 0;
924 	if(!fileGetDir(f, &de)){
925 		fileDecRef(f);
926 		return 0;
927 	}
928 	if((de.mode & ModeDir) == 0){
929 		fileDecRef(f);
930 		deCleanup(&de);
931 		return 0;
932 	}
933 	deCleanup(&de);
934 	fsRsearch1(f, path);
935 	fileDecRef(f);
936 	return 1;
937 }
938 
939 void
940 fsSnapshotRemove(Fs *fs)
941 {
942 	vtRLock(fs->elk);
943 	fsRsearch(fs, "/snapshot");
944 	vtRUnlock(fs->elk);
945 }
946 
947 struct Snap
948 {
949 	Fs *fs;
950 	Periodic *tick;
951 	VtLock *lk;
952 	uint snapMinutes;
953 	uint archMinute;
954 	uint snapLife;
955 	u32int lastSnap;
956 	u32int lastArch;
957 	u32int lastCleanup;
958 	uint ignore;
959 };
960 
961 static void
962 snapEvent(void *v)
963 {
964 	Snap *s;
965 	u32int now, min;
966 	Tm tm;
967 	int need;
968 
969 	s = v;
970 
971 	now = time(0)/60;
972 	vtLock(s->lk);
973 
974 	/*
975 	 * Snapshots happen every snapMinutes minutes.
976 	 * If we miss a snapshot (for example, because we
977 	 * were down), we wait for the next one.
978 	 */
979 	if(s->snapMinutes != ~0 && s->snapMinutes != 0
980 	&& now%s->snapMinutes==0 && now != s->lastSnap){
981 		if(!fsSnapshot(s->fs, 0))
982 			fprint(2, "fsSnapshot snap: %R\n");
983 		s->lastSnap = now;
984 	}
985 
986 	/*
987 	 * Archival snapshots happen at archMinute.
988 	 * If we miss an archive (for example, because we
989 	 * were down), we do it as soon as possible.
990 	 */
991 	tm = *localtime(now*60);
992 	min = tm.hour*60+tm.min;
993 	if(s->archMinute != ~0){
994 		need = 0;
995 		if(min == s->archMinute && now != s->lastArch)
996 			need = 1;
997 		if(s->lastArch == 0){
998 			s->lastArch = 1;
999 			if(fsNeedArch(s->fs, s->archMinute))
1000 				need = 1;
1001 		}
1002 		if(need){
1003 			fsSnapshot(s->fs, 1);
1004 			s->lastArch = now;
1005 		}
1006 	}
1007 
1008 	/*
1009 	 * Snapshot cleanup happens every snaplife or every day.
1010 	 */
1011 	if(s->snapLife != ~0
1012 	&& (s->lastCleanup+s->snapLife < now || s->lastCleanup+24*60 < now)){
1013 		fsSnapshotCleanup(s->fs, s->snapLife);
1014 		s->lastCleanup = now;
1015 	}
1016 	vtUnlock(s->lk);
1017 }
1018 
1019 static Snap*
1020 snapInit(Fs *fs)
1021 {
1022 	Snap *s;
1023 
1024 	s = vtMemAllocZ(sizeof(Snap));
1025 	s->fs = fs;
1026 	s->tick = periodicAlloc(snapEvent, s, 10*1000);
1027 	s->lk = vtLockAlloc();
1028 	s->snapMinutes = -1;
1029 	s->archMinute = -1;
1030 	s->snapLife = -1;
1031 	s->ignore = 5*2;	/* wait five minutes for clock to stabilize */
1032 	return s;
1033 }
1034 
1035 void
1036 snapGetTimes(Snap *s, u32int *arch, u32int *snap, u32int *snaplen)
1037 {
1038 	vtLock(s->lk);
1039 	*snap = s->snapMinutes;
1040 	*arch = s->archMinute;
1041 	*snaplen = s->snapLife;
1042 	vtUnlock(s->lk);
1043 }
1044 
1045 void
1046 snapSetTimes(Snap *s, u32int arch, u32int snap, u32int snaplen)
1047 {
1048 	vtLock(s->lk);
1049 	s->snapMinutes = snap;
1050 	s->archMinute = arch;
1051 	s->snapLife = snaplen;
1052 	vtUnlock(s->lk);
1053 }
1054 
1055 static void
1056 snapClose(Snap *s)
1057 {
1058 	if(s == nil)
1059 		return;
1060 
1061 	periodicKill(s->tick);
1062 	vtMemFree(s);
1063 }
1064 
1065