xref: /plan9/sys/src/cmd/fossil/fs.c (revision 282e677fa45fb578cdb8bc2c412ac084c367776e)
1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4 #include "error.h"
5 
6 static void fsMetaFlush(void *a);
7 static Snap *snapInit(Fs*);
8 static void snapClose(Snap*);
9 
10 Fs *
11 fsOpen(char *file, VtSession *z, long ncache, int mode)
12 {
13 	Fs *fs;
14 	Disk *disk;
15 	int fd;
16 	Block *b, *bs;
17 	Super super;
18 	int m;
19 	uchar oscore[VtScoreSize];
20 
21 	switch(mode){
22 	default:
23 		vtSetError(EBadMode);
24 		return nil;
25 	case OReadOnly:
26 		m = OREAD;
27 		break;
28 	case OReadWrite:
29 		m = ORDWR;
30 		break;
31 	}
32 	fd = open(file, m);
33 	if(fd < 0){
34 		vtSetError("open %s: %r", file);
35 		return nil;
36 	}
37 
38 	bwatchInit();
39 	disk = diskAlloc(fd);
40 	if(disk == nil){
41 		vtSetError("diskAlloc: %R");
42 		close(fd);
43 		return nil;
44 	}
45 
46 	fs = vtMemAllocZ(sizeof(Fs));
47 	fs->mode = mode;
48 	fs->blockSize = diskBlockSize(disk);
49 	fs->elk = vtLockAlloc();
50 	fs->cache = cacheAlloc(disk, z, ncache, mode);
51 	if(mode == OReadWrite && z)
52 		fs->arch = archInit(fs->cache, disk, fs, z);
53 	fs->z = z;
54 
55 	b = cacheLocal(fs->cache, PartSuper, 0, mode);
56 	if(b == nil)
57 		goto Err;
58 	if(!superUnpack(&super, b->data)){
59 		blockPut(b);
60 		vtSetError("bad super block");
61 		goto Err;
62 	}
63 	blockPut(b);
64 
65 	fs->ehi = super.epochHigh;
66 	fs->elo = super.epochLow;
67 
68 //fprint(2, "fs->ehi %d fs->elo %d active=%d\n", fs->ehi, fs->elo, super.active);
69 
70 	fs->source = sourceRoot(fs, super.active, mode);
71 	if(fs->source == nil){
72 		/*
73 		 * Perhaps it failed because the block is copy-on-write.
74 		 * Do the copy and try again.
75 		 */
76 		if(mode == OReadOnly || strcmp(vtGetError(), EBadRoot) != 0)
77 			goto Err;
78 		b = cacheLocalData(fs->cache, super.active, BtDir, RootTag, OReadWrite, 0);
79 		if(b == nil){
80 			vtSetError("cacheLocalData: %R");
81 			goto Err;
82 		}
83 		if(b->l.epoch == fs->ehi){
84 			blockPut(b);
85 			vtSetError("bad root source block");
86 			goto Err;
87 		}
88 		b = blockCopy(b, RootTag, fs->ehi, fs->elo);
89 		if(b == nil)
90 			goto Err;
91 		localToGlobal(super.active, oscore);
92 		super.active = b->addr;
93 		bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
94 		if(bs == nil){
95 			blockPut(b);
96 			vtSetError("cacheLocal: %R");
97 			goto Err;
98 		}
99 		superPack(&super, bs->data);
100 		blockDependency(bs, b, 0, oscore, nil);
101 		blockPut(b);
102 		blockDirty(bs);
103 		blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
104 		blockPut(bs);
105 		fs->source = sourceRoot(fs, super.active, mode);
106 		if(fs->source == nil){
107 			vtSetError("sourceRoot: %R");
108 			goto Err;
109 		}
110 	}
111 
112 //fprint(2, "got fs source\n");
113 
114 	vtRLock(fs->elk);
115 	fs->file = fileRoot(fs->source);
116 	vtRUnlock(fs->elk);
117 	if(fs->file == nil){
118 		vtSetError("fileRoot: %R");
119 		goto Err;
120 	}
121 
122 //fprint(2, "got file root\n");
123 
124 	if(mode == OReadWrite){
125 		fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
126 		fs->snap = snapInit(fs);
127 	}
128 	return fs;
129 
130 Err:
131 fprint(2, "fsOpen error\n");
132 	fsClose(fs);
133 	return nil;
134 }
135 
136 void
137 fsClose(Fs *fs)
138 {
139 	vtRLock(fs->elk);
140 	periodicKill(fs->metaFlush);
141 	snapClose(fs->snap);
142 	if(fs->file){
143 		fileMetaFlush(fs->file, 0);
144 		if(!fileDecRef(fs->file))
145 			vtFatal("fsClose: files still in use: %r\n");
146 	}
147 	fs->file = nil;
148 	sourceClose(fs->source);
149 	cacheFree(fs->cache);
150 	if(fs->arch)
151 		archFree(fs->arch);
152 	vtRUnlock(fs->elk);
153 	vtLockFree(fs->elk);
154 	memset(fs, ~0, sizeof(Fs));
155 	vtMemFree(fs);
156 }
157 
158 int
159 fsRedial(Fs *fs, char *host)
160 {
161 	if(!vtRedial(fs->z, host))
162 		return 0;
163 	if(!vtConnect(fs->z, 0))
164 		return 0;
165 	return 1;
166 }
167 
168 File *
169 fsGetRoot(Fs *fs)
170 {
171 	return fileIncRef(fs->file);
172 }
173 
174 int
175 fsGetBlockSize(Fs *fs)
176 {
177 	return fs->blockSize;
178 }
179 
180 Block*
181 superGet(Cache *c, Super* super)
182 {
183 	Block *b;
184 
185 	if((b = cacheLocal(c, PartSuper, 0, OReadWrite)) == nil){
186 		fprint(2, "superGet: cacheLocal failed: %R");
187 		return nil;
188 	}
189 	if(!superUnpack(super, b->data)){
190 		fprint(2, "superGet: superUnpack failed: %R");
191 		blockPut(b);
192 		return nil;
193 	}
194 
195 	return b;
196 }
197 
198 void
199 superWrite(Block* b, Super* super, int forceWrite)
200 {
201 	superPack(super, b->data);
202 	blockDirty(b);
203 	if(forceWrite){
204 		while(!blockWrite(b)){
205 			/* BUG: what should really happen here? */
206 			fprint(2, "could not write super block; waiting 10 seconds\n");
207 			sleep(10*000);
208 		}
209 		while(b->iostate != BioClean && b->iostate != BioDirty){
210 			assert(b->iostate == BioWriting);
211 			vtSleep(b->ioready);
212 		}
213 		/*
214 		 * it's okay that b might still be dirty.
215 		 * that means it got written out but with an old root pointer,
216 		 * but the other fields went out, and those are the ones
217 		 * we really care about.  (specifically, epochHigh; see fsSnapshot).
218 		 */
219 	}
220 }
221 
222 /*
223  * Prepare the directory to store a snapshot.
224  * Temporary snapshots go into /snapshot/yyyy/mmdd/hhmm[.#]
225  * Archival snapshots go into /archive/yyyy/mmdd[.#].
226  *
227  * TODO This should be rewritten to eliminate most of the duplication.
228  */
229 static File*
230 fileOpenSnapshot(Fs *fs, char *dstpath, int doarchive)
231 {
232 	int n;
233 	char buf[30], *s, *p, *elem;
234 	File *dir, *f;
235 	Tm now;
236 
237 	if(dstpath){
238 		if((p = strrchr(dstpath, '/')) != nil){
239 			*p++ = '\0';
240 			elem = p;
241 			p = dstpath;
242 			if(*p == '\0')
243 				p = "/";
244 		}else{
245 			p = "/";
246 			elem = dstpath;
247 		}
248 		if((dir = fileOpen(fs, p)) == nil)
249 			return nil;
250 		f = fileCreate(dir, elem, ModeDir|ModeSnapshot|0555, "adm");
251 		fileDecRef(dir);
252 		return f;
253 	}else if(doarchive){
254 		/*
255 		 * a snapshot intended to be archived to venti.
256 		 */
257 		dir = fileOpen(fs, "/archive");
258 		if(dir == nil)
259 			return nil;
260 		now = *localtime(time(0));
261 
262 		/* yyyy */
263 		snprint(buf, sizeof(buf), "%d", now.year+1900);
264 		f = fileWalk(dir, buf);
265 		if(f == nil)
266 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
267 		fileDecRef(dir);
268 		if(f == nil)
269 			return nil;
270 		dir = f;
271 
272 		/* mmdd[#] */
273 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
274 		s = buf+strlen(buf);
275 		for(n=0;; n++){
276 			if(n)
277 				seprint(s, buf+sizeof(buf), ".%d", n);
278 			f = fileWalk(dir, buf);
279 			if(f != nil){
280 				fileDecRef(f);
281 				continue;
282 			}
283 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
284 			break;
285 		}
286 		fileDecRef(dir);
287 		return f;
288 	}else{
289 		/*
290 		 * Just a temporary snapshot
291 		 * We'll use /snapshot/yyyy/mmdd/hhmm.
292 		 * There may well be a better naming scheme.
293 		 * (I'd have used hh:mm but ':' is reserved in Microsoft file systems.)
294 		 */
295 		dir = fileOpen(fs, "/snapshot");
296 		if(dir == nil)
297 			return nil;
298 
299 		now = *localtime(time(0));
300 
301 		/* yyyy */
302 		snprint(buf, sizeof(buf), "%d", now.year+1900);
303 		f = fileWalk(dir, buf);
304 		if(f == nil)
305 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
306 		fileDecRef(dir);
307 		if(f == nil)
308 			return nil;
309 		dir = f;
310 
311 		/* mmdd */
312 		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
313 		f = fileWalk(dir, buf);
314 		if(f == nil)
315 			f = fileCreate(dir, buf, ModeDir|0555, "adm");
316 		fileDecRef(dir);
317 		if(f == nil)
318 			return nil;
319 		dir = f;
320 
321 		/* hhmm[.#] */
322 		snprint(buf, sizeof buf, "%02d%02d", now.hour, now.min);
323 		s = buf+strlen(buf);
324 		for(n=0;; n++){
325 			if(n)
326 				seprint(s, buf+sizeof(buf), ".%d", n);
327 			f = fileWalk(dir, buf);
328 			if(f != nil){
329 				fileDecRef(f);
330 				continue;
331 			}
332 			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
333 			break;
334 		}
335 		fileDecRef(dir);
336 		return f;
337 	}
338 }
339 
340 static int
341 fsNeedArch(Fs *fs, uint archMinute)
342 {
343 	int need;
344 	File *f;
345 	char buf[100];
346 	Tm now;
347 	ulong then;
348 
349 	then = time(0);
350 	now = *localtime(then);
351 
352 	/* back up to yesterday if necessary */
353 	if(now.hour < archMinute/60
354 	|| now.hour == archMinute/60 && now.min < archMinute%60)
355 		now = *localtime(then-86400);
356 
357 	snprint(buf, sizeof buf, "/archive/%d/%02d%02d",
358 		now.year+1900, now.mon+1, now.mday);
359 	need = 1;
360 	vtRLock(fs->elk);
361 	f = fileOpen(fs, buf);
362 	if(f){
363 		need = 0;
364 		fileDecRef(f);
365 	}
366 	vtRUnlock(fs->elk);
367 	return need;
368 }
369 
370 int
371 fsEpochLow(Fs *fs, u32int low)
372 {
373 	Block *bs;
374 	Super super;
375 
376 	vtLock(fs->elk);
377 	if(low > fs->ehi){
378 		vtSetError("bad low epoch (must be <= %ud)", fs->ehi);
379 		vtUnlock(fs->elk);
380 		return 0;
381 	}
382 
383 	if((bs = superGet(fs->cache, &super)) == nil){
384 		vtUnlock(fs->elk);
385 		return 0;
386 	}
387 
388 	super.epochLow = low;
389 	fs->elo = low;
390 	superWrite(bs, &super, 1);
391 	blockPut(bs);
392 	vtUnlock(fs->elk);
393 
394 	return 1;
395 }
396 
397 static int
398 bumpEpoch(Fs *fs, int doarchive)
399 {
400 	uchar oscore[VtScoreSize];
401 	u32int oldaddr;
402 	Block *b, *bs;
403 	Entry e;
404 	Source *r;
405 	Super super;
406 
407 	/*
408 	 * Duplicate the root block.
409 	 *
410 	 * As a hint to flchk, the garbage collector,
411 	 * and any (human) debuggers, store a pointer
412 	 * to the old root block in entry 1 of the new root block.
413 	 */
414 	r = fs->source;
415 	b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
416 	if(b == nil)
417 		return 0;
418 
419 	memset(&e, 0, sizeof e);
420 	e.flags = VtEntryActive | VtEntryLocal | VtEntryDir;
421 	memmove(e.score, b->score, VtScoreSize);
422 	e.tag = RootTag;
423 	e.snap = b->l.epoch;
424 
425 	b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
426 	if(b == nil){
427 		fprint(2, "bumpEpoch: blockCopy: %R\n");
428 		return 0;
429 	}
430 
431 	if(0) fprint(2, "snapshot root from %d to %d\n", oldaddr, b->addr);
432 	entryPack(&e, b->data, 1);
433 	blockDirty(b);
434 
435 	/*
436 	 * Update the superblock with the new root and epoch.
437 	 */
438 	if((bs = superGet(fs->cache, &super)) == nil)
439 		return 0;
440 
441 	fs->ehi++;
442 	memmove(r->score, b->score, VtScoreSize);
443 	r->epoch = fs->ehi;
444 
445 	super.epochHigh = fs->ehi;
446 	oldaddr = super.active;
447 	super.active = b->addr;
448 	if(doarchive)
449 		super.next = oldaddr;
450 
451 	/*
452 	 * Record that the new super.active can't get written out until
453 	 * the new b gets written out.  Until then, use the old value.
454 	 */
455 	localToGlobal(oldaddr, oscore);
456 	blockDependency(bs, b, 0, oscore, nil);
457 	blockPut(b);
458 
459 	/*
460 	 * We force the super block to disk so that super.epochHigh gets updated.
461 	 * Otherwise, if we crash and come back, we might incorrectly treat as active
462 	 * some of the blocks that making up the snapshot we just created.
463 	 * Basically every block in the active file system and all the blocks in
464 	 * the recently-created snapshot depend on the super block now.
465 	 * Rather than record all those dependencies, we just force the block to disk.
466 	 *
467 	 * Note that blockWrite might actually (will probably) send a slightly outdated
468 	 * super.active to disk.  It will be the address of the most recent root that has
469 	 * gone to disk.
470 	 */
471 	superWrite(bs, &super, 1);
472 	blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
473 	blockPut(bs);
474 
475 	return 1;
476 }
477 
478 int
479 saveQid(Fs *fs)
480 {
481 	Block *b;
482 	Super super;
483 	u64int qidMax;
484 
485 	if((b = superGet(fs->cache, &super)) == nil)
486 		return 0;
487 	qidMax = super.qid;
488 	blockPut(b);
489 
490 	if(!fileSetQidSpace(fs->file, 0, qidMax))
491 		return 0;
492 
493 	return 1;
494 }
495 
496 int
497 fsSnapshot(Fs *fs, char *srcpath, char *dstpath, int doarchive)
498 {
499 	File *src, *dst;
500 
501 	assert(fs->mode == OReadWrite);
502 
503 	dst = nil;
504 
505 	if(fs->halted){
506 		vtSetError("file system is halted");
507 		return 0;
508 	}
509 
510 	/*
511 	 * Freeze file system activity.
512 	 */
513 	vtLock(fs->elk);
514 
515 	/*
516 	 * Get the root of the directory we're going to save.
517 	 */
518 	if(srcpath == nil)
519 		srcpath = "/active";
520 	src = fileOpen(fs, srcpath);
521 	if(src == nil)
522 		goto Err;
523 
524 	/*
525 	 * It is important that we maintain the invariant that:
526 	 *	if both b and bb are marked as Active with start epoch e
527 	 *	and b points at bb, then no other pointers to bb exist.
528 	 *
529 	 * When bb is unlinked from b, its close epoch is set to b's epoch.
530 	 * A block with epoch == close epoch is
531 	 * treated as free by cacheAllocBlock; this aggressively
532 	 * reclaims blocks after they have been stored to Venti.
533 	 *
534 	 * Let's say src->source is block sb, and src->msource is block
535 	 * mb.  Let's also say that block b holds the Entry structures for
536 	 * both src->source and src->msource (their Entry structures might
537 	 * be in different blocks, but the argument is the same).
538 	 * That is, right now we have:
539 	 *
540 	 *	b	Active w/ epoch e, holds ptrs to sb and mb.
541 	 *	sb	Active w/ epoch e.
542 	 *	mb	Active w/ epoch e.
543 	 *
544 	 * With things as they are now, the invariant requires that
545 	 * b holds the only pointers to sb and mb.  We want to record
546 	 * pointers to sb and mb in new Entries corresponding to dst,
547 	 * which breaks the invariant.  Thus we need to do something
548 	 * about b.  Specifically, we bump the file system's epoch and
549 	 * then rewalk the path from the root down to and including b.
550 	 * This will copy-on-write as we walk, so now the state will be:
551 	 *
552 	 *	b	Snap w/ epoch e, holds ptrs to sb and mb.
553 	 *	new-b	Active w/ epoch e+1, holds ptrs to sb and mb.
554 	 *	sb	Active w/ epoch e.
555 	 *	mb	Active w/ epoch e.
556 	 *
557 	 * In this state, it's perfectly okay to make more pointers to sb and mb.
558 	 */
559 	if(!bumpEpoch(fs, 0) || !fileWalkSources(src))
560 		goto Err;
561 
562 	/*
563 	 * Sync to disk.  I'm not sure this is necessary, but better safe than sorry.
564 	 */
565 	cacheFlush(fs->cache, 1);
566 
567 	/*
568 	 * Create the directory where we will store the copy of src.
569 	 */
570 	dst = fileOpenSnapshot(fs, dstpath, doarchive);
571 	if(dst == nil)
572 		goto Err;
573 
574 	/*
575 	 * Actually make the copy by setting dst's source and msource
576 	 * to be src's.
577 	 */
578 	if(!fileSnapshot(dst, src, fs->ehi-1, doarchive))
579 		goto Err;
580 
581 	fileDecRef(src);
582 	fileDecRef(dst);
583 	src = nil;
584 	dst = nil;
585 
586 	/*
587 	 * Make another copy of the file system.  This one is for the
588 	 * archiver, so that the file system we archive has the recently
589 	 * added snapshot both in /active and in /archive/yyyy/mmdd[.#].
590 	 */
591 	if(doarchive){
592 		if(!saveQid(fs))
593 			goto Err;
594 		if(!bumpEpoch(fs, 1))
595 			goto Err;
596 	}
597 
598 	vtUnlock(fs->elk);
599 
600 	/* BUG? can fs->arch fall out from under us here? */
601 	if(doarchive && fs->arch)
602 		archKick(fs->arch);
603 
604 	return 1;
605 
606 Err:
607 	fprint(2, "fsSnapshot: %R\n");
608 	if(src)
609 		fileDecRef(src);
610 	if(dst)
611 		fileDecRef(dst);
612 	vtUnlock(fs->elk);
613 	return 0;
614 }
615 
616 int
617 fsVac(Fs *fs, char *name, uchar score[VtScoreSize])
618 {
619 	int r;
620 	DirEntry de;
621 	Entry e, ee;
622 	File *f;
623 
624 	vtRLock(fs->elk);
625 	f = fileOpen(fs, name);
626 	if(f == nil){
627 		vtRUnlock(fs->elk);
628 		return 0;
629 	}
630 
631 	if(!fileGetSources(f, &e, &ee) || !fileGetDir(f, &de)){
632 		fileDecRef(f);
633 		vtRUnlock(fs->elk);
634 		return 0;
635 	}
636 	fileDecRef(f);
637 
638 	r = mkVac(fs->z, fs->blockSize, &e, &ee, &de, score);
639 	vtRUnlock(fs->elk);
640 	return r;
641 }
642 
643 static int
644 vtWriteBlock(VtSession *z, uchar *buf, uint n, uint type, uchar score[VtScoreSize])
645 {
646 	if(!vtWrite(z, score, type, buf, n))
647 		return 0;
648 	if(!vtSha1Check(score, buf, n))
649 		return 0;
650 	return 1;
651 }
652 
653 int
654 mkVac(VtSession *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uchar score[VtScoreSize])
655 {
656 	uchar buf[8192];
657 	int i;
658 	uchar *p;
659 	uint n;
660 	DirEntry de;
661 	Entry e, ee, eee;
662 	MetaBlock mb;
663 	MetaEntry me;
664 	VtRoot root;
665 
666 	e = *pe;
667 	ee = *pee;
668 	de = *pde;
669 
670 	if(globalToLocal(e.score) != NilBlock
671 	|| (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){
672 		vtSetError("can only vac paths already stored on venti");
673 		return 0;
674 	}
675 
676 	/*
677 	 * Build metadata source for root.
678 	 */
679 	n = deSize(&de);
680 	if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){
681 		vtSetError("DirEntry too big");
682 		return 0;
683 	}
684 	memset(buf, 0, sizeof buf);
685 	mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1);
686 	p = mbAlloc(&mb, n);
687 	if(p == nil)
688 		abort();
689 	mbSearch(&mb, de.elem, &i, &me);
690 	assert(me.p == nil);
691 	me.p = p;
692 	me.size = n;
693 	dePack(&de, &me);
694 	mbInsert(&mb, i, &me);
695 	mbPack(&mb);
696 
697 	eee.size = n+MetaHeaderSize+MetaIndexSize;
698 	if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score))
699 		return 0;
700 	eee.psize = 8192;
701 	eee.dsize = 8192;
702 	eee.depth = 0;
703 	eee.flags = VtEntryActive;
704 
705 	/*
706 	 * Build root source with three entries in it.
707 	 */
708 	entryPack(&e, buf, 0);
709 	entryPack(&ee, buf, 1);
710 	entryPack(&eee, buf, 2);
711 
712 	n = VtEntrySize*3;
713 	memset(&root, 0, sizeof root);
714 	if(!vtWriteBlock(z, buf, n, VtDirType, root.score))
715 		return 0;
716 
717 	/*
718 	 * Save root.
719 	 */
720 	root.version = VtRootVersion;
721 	strecpy(root.type, root.type+sizeof root.type, "vac");
722 	strecpy(root.name, root.name+sizeof root.name, de.elem);
723 	root.blockSize = blockSize;
724 	vtRootPack(&root, buf);
725 	if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score))
726 		return 0;
727 
728 	return 1;
729 }
730 
731 int
732 fsSync(Fs *fs)
733 {
734 	vtLock(fs->elk);
735 	fileMetaFlush(fs->file, 1);
736 	cacheFlush(fs->cache, 1);
737 	vtUnlock(fs->elk);
738 	return 1;
739 }
740 
741 int
742 fsHalt(Fs *fs)
743 {
744 	vtLock(fs->elk);
745 	fs->halted = 1;
746 	fileMetaFlush(fs->file, 1);
747 	cacheFlush(fs->cache, 1);
748 	return 1;
749 }
750 
751 int
752 fsUnhalt(Fs *fs)
753 {
754 	if(!fs->halted)
755 		return 0;
756 	fs->halted = 0;
757 	vtUnlock(fs->elk);
758 	return 1;
759 }
760 
761 int
762 fsNextQid(Fs *fs, u64int *qid)
763 {
764 	Block *b;
765 	Super super;
766 
767 	if((b = superGet(fs->cache, &super)) == nil)
768 		return 0;
769 
770 	*qid = super.qid++;
771 
772 	/*
773 	 * It's okay if the super block doesn't go to disk immediately,
774 	 * since fileMetaAlloc will record a dependency between the
775 	 * block holding this qid and the super block.  See file.c:/^fileMetaAlloc.
776 	 */
777 	superWrite(b, &super, 0);
778 	blockPut(b);
779 	return 1;
780 }
781 
782 static void
783 fsMetaFlush(void *a)
784 {
785 	Fs *fs = a;
786 
787 	vtRLock(fs->elk);
788 	fileMetaFlush(fs->file, 1);
789 	vtRUnlock(fs->elk);
790 	cacheFlush(fs->cache, 0);
791 }
792 
793 static int
794 fsEsearch1(File *f, char *path, u32int savetime, u32int *plo)
795 {
796 	int n, r;
797 	DirEntry de;
798 	DirEntryEnum *dee;
799 	File *ff;
800 	Entry e, ee;
801 	char *t;
802 
803 	dee = deeOpen(f);
804 	if(dee == nil)
805 		return 0;
806 
807 	n = 0;
808 	for(;;){
809 		r = deeRead(dee, &de);
810 		if(r <= 0)
811 			break;
812 		if(de.mode & ModeSnapshot){
813 			if((ff = fileWalk(f, de.elem)) != nil){
814 				if(fileGetSources(ff, &e, &ee))
815 					if(de.mtime >= savetime && e.snap != 0)
816 						if(e.snap < *plo)
817 							*plo = e.snap;
818 				fileDecRef(ff);
819 			}
820 		}
821 		else if(de.mode & ModeDir){
822 			if((ff = fileWalk(f, de.elem)) != nil){
823 				t = smprint("%s/%s", path, de.elem);
824 				n += fsEsearch1(ff, t, savetime, plo);
825 				vtMemFree(t);
826 				fileDecRef(ff);
827 			}
828 		}
829 		deCleanup(&de);
830 		if(r < 0)
831 			break;
832 	}
833 	deeClose(dee);
834 
835 	return n;
836 }
837 
838 static int
839 fsEsearch(Fs *fs, char *path, u32int savetime, u32int *plo)
840 {
841 	int n;
842 	File *f;
843 	DirEntry de;
844 
845 	f = fileOpen(fs, path);
846 	if(f == nil)
847 		return 0;
848 	if(!fileGetDir(f, &de)){
849 		fileDecRef(f);
850 		return 0;
851 	}
852 	if((de.mode & ModeDir) == 0){
853 		fileDecRef(f);
854 		deCleanup(&de);
855 		return 0;
856 	}
857 	deCleanup(&de);
858 	n = fsEsearch1(f, path, savetime, plo);
859 	fileDecRef(f);
860 	return n;
861 }
862 
863 void
864 fsSnapshotCleanup(Fs *fs, u32int age)
865 {
866 	u32int lo;
867 
868 	/*
869 	 * Find the best low epoch we can use,
870 	 * given that we need to save all the unventied archives
871 	 * and all the snapshots younger than age.
872 	 */
873 	vtRLock(fs->elk);
874 	lo = fs->ehi;
875 	fsEsearch(fs, "/archive", 0, &lo);
876 	fsEsearch(fs, "/snapshot", time(0)-age*60, &lo);
877 	vtRUnlock(fs->elk);
878 
879 	fsEpochLow(fs, lo);
880 	fsSnapshotRemove(fs);
881 }
882 
883 /* remove all snapshots that have expired */
884 /* return number of directory entries remaining */
885 static int
886 fsRsearch1(File *f, char *s)
887 {
888 	int n, r;
889 	DirEntry de;
890 	DirEntryEnum *dee;
891 	File *ff;
892 	char *t;
893 
894 	dee = deeOpen(f);
895 	if(dee == nil)
896 		return 0;
897 
898 	n = 0;
899 	for(;;){
900 		r = deeRead(dee, &de);
901 		if(r <= 0)
902 			break;
903 		n++;
904 		if(de.mode & ModeSnapshot){
905 			if((ff = fileWalk(f, de.elem)) != nil)
906 				fileDecRef(ff);
907 			else if(strcmp(vtGetError(), ESnapOld) == 0){
908 				if(fileClri(f, de.elem, "adm"))
909 					n--;
910 			}
911 		}
912 		else if(de.mode & ModeDir){
913 			if((ff = fileWalk(f, de.elem)) != nil){
914 				t = smprint("%s/%s", s, de.elem);
915 				if(fsRsearch1(ff, t) == 0)
916 					if(fileRemove(ff, "adm"))
917 						n--;
918 				vtMemFree(t);
919 				fileDecRef(ff);
920 			}
921 		}
922 		deCleanup(&de);
923 		if(r < 0)
924 			break;
925 	}
926 	deeClose(dee);
927 
928 	return n;
929 }
930 
931 static int
932 fsRsearch(Fs *fs, char *path)
933 {
934 	File *f;
935 	DirEntry de;
936 
937 	f = fileOpen(fs, path);
938 	if(f == nil)
939 		return 0;
940 	if(!fileGetDir(f, &de)){
941 		fileDecRef(f);
942 		return 0;
943 	}
944 	if((de.mode & ModeDir) == 0){
945 		fileDecRef(f);
946 		deCleanup(&de);
947 		return 0;
948 	}
949 	deCleanup(&de);
950 	fsRsearch1(f, path);
951 	fileDecRef(f);
952 	return 1;
953 }
954 
955 void
956 fsSnapshotRemove(Fs *fs)
957 {
958 	vtRLock(fs->elk);
959 	fsRsearch(fs, "/snapshot");
960 	vtRUnlock(fs->elk);
961 }
962 
963 struct Snap
964 {
965 	Fs *fs;
966 	Periodic *tick;
967 	VtLock *lk;
968 	uint snapMinutes;
969 	uint archMinute;
970 	uint snapLife;
971 	u32int lastSnap;
972 	u32int lastArch;
973 	u32int lastCleanup;
974 	uint ignore;
975 };
976 
977 static void
978 snapEvent(void *v)
979 {
980 	Snap *s;
981 	u32int now, min;
982 	Tm tm;
983 	int need;
984 
985 	s = v;
986 
987 	now = time(0)/60;
988 	vtLock(s->lk);
989 
990 	/*
991 	 * Snapshots happen every snapMinutes minutes.
992 	 * If we miss a snapshot (for example, because we
993 	 * were down), we wait for the next one.
994 	 */
995 	if(s->snapMinutes != ~0 && s->snapMinutes != 0
996 	&& now%s->snapMinutes==0 && now != s->lastSnap){
997 		if(!fsSnapshot(s->fs, nil, nil, 0))
998 			fprint(2, "fsSnapshot snap: %R\n");
999 		s->lastSnap = now;
1000 	}
1001 
1002 	/*
1003 	 * Archival snapshots happen at archMinute.
1004 	 * If we miss an archive (for example, because we
1005 	 * were down), we do it as soon as possible.
1006 	 */
1007 	tm = *localtime(now*60);
1008 	min = tm.hour*60+tm.min;
1009 	if(s->archMinute != ~0){
1010 		need = 0;
1011 		if(min == s->archMinute && now != s->lastArch)
1012 			need = 1;
1013 		if(s->lastArch == 0){
1014 			s->lastArch = 1;
1015 			if(fsNeedArch(s->fs, s->archMinute))
1016 				need = 1;
1017 		}
1018 		if(need){
1019 			fsSnapshot(s->fs, nil, nil, 1);
1020 			s->lastArch = now;
1021 		}
1022 	}
1023 
1024 	/*
1025 	 * Snapshot cleanup happens every snaplife or every day.
1026 	 */
1027 	if(s->snapLife != ~0
1028 	&& (s->lastCleanup+s->snapLife < now || s->lastCleanup+24*60 < now)){
1029 		fsSnapshotCleanup(s->fs, s->snapLife);
1030 		s->lastCleanup = now;
1031 	}
1032 	vtUnlock(s->lk);
1033 }
1034 
1035 static Snap*
1036 snapInit(Fs *fs)
1037 {
1038 	Snap *s;
1039 
1040 	s = vtMemAllocZ(sizeof(Snap));
1041 	s->fs = fs;
1042 	s->tick = periodicAlloc(snapEvent, s, 10*1000);
1043 	s->lk = vtLockAlloc();
1044 	s->snapMinutes = -1;
1045 	s->archMinute = -1;
1046 	s->snapLife = -1;
1047 	s->ignore = 5*2;	/* wait five minutes for clock to stabilize */
1048 	return s;
1049 }
1050 
1051 void
1052 snapGetTimes(Snap *s, u32int *arch, u32int *snap, u32int *snaplen)
1053 {
1054 	if(s == nil){
1055 		*snap = -1;
1056 		*arch = -1;
1057 		*snaplen = -1;
1058 		return;
1059 	}
1060 
1061 	vtLock(s->lk);
1062 	*snap = s->snapMinutes;
1063 	*arch = s->archMinute;
1064 	*snaplen = s->snapLife;
1065 	vtUnlock(s->lk);
1066 }
1067 
1068 void
1069 snapSetTimes(Snap *s, u32int arch, u32int snap, u32int snaplen)
1070 {
1071 	if(s == nil)
1072 		return;
1073 
1074 	vtLock(s->lk);
1075 	s->snapMinutes = snap;
1076 	s->archMinute = arch;
1077 	s->snapLife = snaplen;
1078 	vtUnlock(s->lk);
1079 }
1080 
1081 static void
1082 snapClose(Snap *s)
1083 {
1084 	if(s == nil)
1085 		return;
1086 
1087 	periodicKill(s->tick);
1088 	vtMemFree(s);
1089 }
1090 
1091