xref: /plan9/sys/src/cmd/aux/flashfs/journal.c (revision ff8c3af2f44d95267f67219afa20ba82ff6cf7e4)
1 #include <u.h>
2 #include <libc.h>
3 #include <auth.h>
4 #include <fcall.h>
5 #include <thread.h>
6 #include <9p.h>
7 #include "flashfs.h"
8 
9 enum
10 {
11 	debug	= 0,
12 	diags	= 1,
13 };
14 
15 typedef struct	Gen	Gen;
16 typedef struct	Sect	Sect;
17 
18 struct Gen
19 {
20 	int	gnum;
21 	int	count;
22 	int	low;
23 	int	high;
24 	Sect*	head;
25 	Sect*	tail;
26 	Sect*	dup;
27 	Sect**	vect;
28 };
29 
30 struct Sect
31 {
32 	int	sect;
33 	ulong	seq;
34 	int	coff;
35 	int	toff;
36 	int	sum;
37 	ulong	time;
38 	Sect*	next;
39 };
40 
41 static	Gen	gens[2];
42 static	Sect*	freehead;
43 static	Sect*	freetail;
44 static	int	nfree;
45 static	int	nbad;
46 static	ulong	ltime;
47 static	int	cursect;
48 
49 /*
50  *	If we have a delta then file times are in the future.
51  *	But they drift back to reality.
52  */
53 ulong
54 now(void)
55 {
56 	ulong cur, drift;
57 	static ulong last;
58 
59 	cur = time(0);
60 	if(cur < last)
61 		delta += last - cur;
62 	else if(delta != 0 && last != 0) {
63 		drift = (cur - last + 1) / 2;
64 		if(drift > delta)
65 			delta = 0;
66 		else
67 			delta -= drift;
68 	}
69 	last = cur;
70 	return cur + delta;
71 }
72 
73 static void
74 damaged(char *mesg)
75 {
76 	sysfatal("damaged filesystem: %s\n", mesg);
77 }
78 
79 static void
80 lddisc(char *mesg)
81 {
82 	if(debug)
83 		fprint(2, "discard %s\n", mesg);
84 }
85 
86 static Sect *
87 getsect(void)
88 {
89 	Sect *t;
90 
91 	t = freehead;
92 	freehead = t->next;
93 	if(freehead == nil)
94 		freetail = nil;
95 	nfree--;
96 	return t;
97 }
98 
99 static void
100 newsect(Gen *g, Sect *s)
101 {
102 	int m, n, err;
103 	uchar hdr[2*3];
104 
105 	if(debug)
106 		fprint(2, "new %d %ld\n", g->gnum, s->seq);
107 	if(g->tail == nil)
108 		g->head = s;
109 	else
110 		g->tail->next = s;
111 	g->tail = s;
112 	s->next = nil;
113 	m = putc3(&hdr[0], g->gnum);
114 	n = putc3(&hdr[m], s->seq);
115 	s->toff = MAGSIZE + m + n;
116 	s->coff = s->toff + 4;
117 	s->time = NOTIME;
118 	s->sum = 0;
119 	err = 1;
120 	for(;;) {
121 		if(writedata(err, s->sect, hdr, m + n, MAGSIZE)
122 		&& writedata(err, s->sect, magic, MAGSIZE, 0))
123 			break;
124 		clearsect(s->sect);
125 		err = 0;
126 	}
127 }
128 
129 static Sect *
130 newsum(Gen *g, ulong seq)
131 {
132 	Sect *t;
133 
134 	if(nfree == 0)
135 		damaged("no free sector for summary");
136 	t = getsect();
137 	t->seq = seq;
138 	newsect(g, t);
139 	return t;
140 }
141 
142 static void
143 freesect(Sect *s)
144 {
145 	clearsect(s->sect);
146 	if(freetail == nil)
147 		freehead = s;
148 	else
149 		freetail->next = s;
150 	freetail = s;
151 	s->next = nil;
152 	nfree++;
153 }
154 
155 static void
156 dupsect(Sect *s, int renum)
157 {
158 	Sect *t;
159 	Renum r;
160 	uchar *b;
161 	int err, n;
162 	ulong doff, off;
163 
164 	if(nfree == 0)
165 		damaged("no free for copy");
166 	t = getsect();
167 	b = sectbuff;
168 	off = s->coff;
169 	readdata(s->sect, b, off, 0);
170 	doff = s->toff;
171 	if(s->time == NOTIME)
172 		doff += 4;
173 	// Window 5
174 	err = 1;
175 	for(;;) {
176 		if(writedata(err, t->sect, b + 1, s->toff - 1, 1)
177 		&& writedata(err, t->sect, b + doff, off - doff, doff)
178 		&& writedata(err, t->sect, b, 1, 0))
179 			break;
180 		clearsect(t->sect);
181 		err = 0;
182 	}
183 	if(renum) {
184 		r.old = s->sect;
185 		r.new = t->sect;
186 		erenum(&r);
187 	}
188 	n = s->sect;
189 	s->sect = t->sect;
190 	t->sect = n;
191 	freesect(t);
192 	if(cursect >= 0)
193 		readdata(cursect, b, sectsize, 0);
194 }
195 
196 static void
197 gswap(void)
198 {
199 	Gen t;
200 
201 	t = gens[0];
202 	gens[0] = gens[1];
203 	gens[1] = t;
204 }
205 
206 static void
207 checkdata(void)
208 {
209 	Gen *g;
210 
211 	g = &gens[0];
212 	if(g->dup != nil) {	// Window 5 damage
213 		if(diags)
214 			fprint(2, "%s: window 5 damage\n", argv0);
215 		freesect(g->dup);
216 		g->dup = nil;
217 	}
218 }
219 
220 static void
221 checksweep(void)
222 {
223 	Gen *g;
224 	Jrec j;
225 	uchar *b;
226 	int n, op;
227 	Sect *s, *t, *u;
228 	long off, seq, soff;
229 
230 	g = &gens[1];
231 	if(g->dup != nil) {	// Window 5 damage
232 		if(diags)
233 			fprint(2, "%s: window 5 damage\n", argv0);
234 		freesect(g->dup);
235 		g->dup = nil;
236 	}
237 
238 	s = g->head;
239 	if(s != g->tail) {
240 		while(s->next != g->tail)
241 			s = s->next;
242 	}
243 
244 	b = sectbuff;
245 	op = -1;
246 	seq = -1;
247 	soff = 0;
248 	u = nil;
249 	t = s;
250 	for(;;) {
251 		readdata(t->sect, b, sectsize, 0);
252 		off = t->toff + 4;
253 		for(;;) {
254 			n = convM2J(&j, &b[off]);
255 			if(n < 0) {
256 				if(j.type != 0xFF) {
257 					if(debug)
258 						fprint(2, "s[%d] @ %d %ld\n", j.type, t->sect, off);
259 					damaged("bad Jrec");
260 				}
261 				break;
262 			}
263 			switch(j.type) {
264 			case FT_SUMMARY:
265 			case FT_SUMBEG:
266 				seq = j.seq;
267 			case FT_SUMEND:
268 				op = j.type;
269 				soff = off;
270 				u = t;
271 				break;
272 			case FT_WRITE:
273 			case FT_AWRITE:
274 				off += j.size;
275 			}
276 			off += n;
277 		}
278 		t = t->next;
279 		if(t == nil)
280 			break;
281 	}
282 
283 	if(op == FT_SUMBEG) {		// Window 1 damage
284 		if(diags)
285 			fprint(2, "%s: window 1 damage\n", argv0);
286 		if(u != s) {
287 			freesect(u);
288 			s->next = nil;
289 			g->tail = s;
290 		}
291 		s->coff = soff;
292 		dupsect(s, 0);
293 		seq--;
294 	}
295 
296 	if(seq >= 0) {
297 		g = &gens[0];
298 		if(seq > g->low)
299 			damaged("high sweep");
300 		if(seq == g->low) {	// Window 2 damage
301 			if(diags)
302 				fprint(2, "%s: window 2 damage\n", argv0);
303 			s = g->head;
304 			g->head = s->next;
305 			freesect(s);
306 			if(g->head == nil) {
307 				g->tail = nil;
308 				g->gnum += 2;
309 				newsum(g, 0);
310 				gswap();
311 			}
312 		}
313 	}
314 }
315 
316 void
317 load1(Sect *s, int parity)
318 {
319 	int n;
320 	Jrec j;
321 	uchar *b;
322 	char *err;
323 	Extent *x;
324 	Entry *d, *e;
325 	ulong ctime, off, mtime;
326 
327 	if(s->sect < 0 && readonly)	// readonly damaged
328 		return;
329 
330 	b = sectbuff;
331 	readdata(s->sect, b, sectsize, 0);
332 	s->sum = 0;
333 	off = s->toff;
334 	ctime = get4(&b[off]);
335 	off += 4;
336 
337 	for(;;) {
338 		n = convM2J(&j, &b[off]);
339 		if(n < 0) {
340 			if(j.type != 0xFF) {
341 				if(debug)
342 					fprint(2, "l[%d] @  %d %ld\n", j.type, s->sect, off);
343 				damaged("bad Jrec");
344 			}
345 			s->coff = off;
346 			break;
347 		}
348 		off += n;
349 		if(debug)
350 			fprint(2, "get %J\n", &j);
351 		switch(j.type) {
352 		case FT_create:
353 			ctime += j.mtime;
354 		create:
355 			d = elookup(j.parent);
356 			if(d == nil) {
357 				lddisc("parent");
358 				break;
359 			}
360 			d->ref++;
361 			e = ecreate(d, j.name, j.fnum, j.mode, ctime, &err);
362 			if(e == nil) {
363 				d->ref--;
364 				lddisc("create");
365 				break;
366 			}
367 			e->ref--;
368 			if(j.type == FT_trunc)
369 				e->mode = j.mode;
370 			break;
371 		case FT_chmod:
372 			e = elookup(j.fnum);
373 			if(e == nil) {
374 				lddisc("lookup");
375 				break;
376 			}
377 			echmod(e, j.mode, j.mnum);
378 			break;
379 		case FT_REMOVE:
380 			e = elookup(j.fnum);
381 			if(e == nil) {
382 				lddisc("lookup");
383 				break;
384 			}
385 			if(eremove(e) != nil) {
386 				lddisc("remove");
387 				break;
388 			}
389 			break;
390 		case FT_AWRITE:
391 			mtime = 0;
392 			goto write;
393 		case FT_WRITE:
394 			ctime += j.mtime;
395 			mtime = ctime;
396 		write:
397 			x = emalloc9p(sizeof(Extent));
398 			x->sect = s->sect;
399 			x->addr = off;
400 			x->off = j.offset;
401 			x->size = j.size;
402 			off += j.size;
403 			e = elookup(j.fnum);
404 			if(e == nil) {
405 				lddisc("lookup");
406 				break;
407 			}
408 			ewrite(e, x, parity, mtime);
409 			break;
410 		case FT_trunc:
411 			ctime += j.mtime;
412 			e = elookup(j.tnum);
413 			if(e == nil) {
414 				if(debug)
415 					fprint(2, "-> create\n");
416 				goto create;
417 			}
418 			etrunc(e, j.fnum, ctime);
419 			break;
420 		case FT_SUMMARY:
421 		case FT_SUMBEG:
422 		case FT_SUMEND:
423 			s->sum += n;
424 			break;
425 		default:
426 			damaged("load1 botch");
427 		}
428 	}
429 
430 	if(s->sum > Nsum)
431 		s->sum = Nsum;
432 
433 	s->time = ctime;
434 	if(ctime != NOTIME && ctime > ltime)
435 		ltime = ctime;
436 }
437 
438 void
439 load0(int parity)
440 {
441 	Sect *s;
442 
443 	if(debug)
444 		fprint(2, "load %d\n", parity);
445 	for(s = gens[parity].head; s != nil; s = s->next)
446 		load1(s, parity);
447 }
448 
449 void
450 loadfs(int ro)
451 {
452 	Gen *g;
453 	Sect *s;
454 	ulong u, v;
455 	int i, j, n;
456 	uchar hdr[MAXHDR];
457 
458 	readonly = ro;
459 	fmtinstall('J', Jconv);
460 
461 	for(i = 0; i < 2; i++) {
462 		g = &gens[i];
463 		g->gnum = -1;
464 		g->low = nsects;
465 		g->high = -1;
466 		g->count = 0;
467 		g->head = nil;
468 		g->tail = nil;
469 	}
470 
471 	for(i = 0; i < nsects; i++) {
472 		readdata(i, hdr, MAXHDR, 0);
473 		if(memcmp(hdr, magic, MAGSIZE) == 0) {
474 			n = MAGSIZE + getc3(&hdr[MAGSIZE], &u);
475 			getc3(&hdr[n], &v);
476 			if(debug)
477 				fprint(2, "%d: %ld %ld\n", i, u, v);
478 			for(j = 0; j < 2; j++) {
479 				g = &gens[j];
480 				if(g->gnum == u) {
481 					g->count++;
482 					if(v < g->low)
483 						g->low = v;
484 					if(v > g->high)
485 						g->high = v;
486 					break;
487 				}
488 				else if(g->gnum < 0) {
489 					g->gnum = u;
490 					g->count = 1;
491 					g->low = v;
492 					g->high = v;
493 					break;
494 				}
495 			}
496 			if(j == 2)
497 				damaged("unexpected generation");
498 		}
499 		else if(hdr[0] == 0xFF) {
500 			nfree++;
501 			s = emalloc9p(sizeof(Sect));
502 			s->sect = i;
503 			s->next = freehead;
504 			freehead = s;
505 			if(freetail == nil)
506 				freetail = s;
507 		}
508 		else
509 			nbad++;
510 	}
511 
512 	if(nbad > 0)
513 		damaged("bad sectors");
514 
515 	if(gens[0].gnum < 0)
516 		damaged("no data");
517 
518 	if(gens[1].gnum < 0) {		// Window 3 death
519 		if(diags)
520 			fprint(2, "%s: window 3 damage\n", argv0);
521 		g = &gens[1];
522 		g->gnum = gens[0].gnum + 1;
523 		g->low = 0;
524 		g->high = 0;
525 		g->count = 1;
526 		if(!readonly)
527 			newsum(g, 0);
528 	}
529 
530 	if(gens[0].gnum > gens[1].gnum)
531 		gswap();
532 
533 	for(i = 0; i < 2; i++) {
534 		g = &gens[i];
535 		n = g->count;
536 		if(n <= g->high - g->low)
537 			damaged("missing sectors");
538 		g->vect = emalloc9p(n * sizeof(Sect *));
539 		for(j = 0; j < n; j++) {
540 			s = emalloc9p(sizeof(Sect));
541 			s->sect = -1;
542 			g->vect[j] = s;
543 		}
544 	}
545 
546 	if(debug) {
547 		for(j = 0; j < 2; j++) {
548 			g = &gens[j];
549 			print("%d\t%d\t%d-%d\n", g->gnum, g->count, g->low, g->high);
550 		}
551 	}
552 
553 	for(i = 0; i < nsects; i++) {
554 		readdata(i, hdr, MAXHDR, 0);
555 		if(memcmp(hdr, magic, MAGSIZE) == 0) {
556 			n = MAGSIZE + getc3(&hdr[MAGSIZE], &u);
557 			n += getc3(&hdr[n], &v);
558 			g = nil;
559 			for(j = 0; j < 2; j++) {
560 				g = &gens[j];
561 				if(g->gnum == u)
562 					break;
563 			}
564 			if(j == 2)
565 				damaged("generation botch");
566 			s = g->vect[v - g->low];
567 			s->seq = v;
568 			s->toff = n;
569 			if(s->sect < 0)
570 				s->sect = i;
571 			else if(v == g->high && g->dup == nil) {
572 				s = emalloc9p(sizeof(Sect));
573 				s->sect = i;
574 				g->dup = s;
575 			}
576 			else
577 				damaged("dup block");
578 		}
579 	}
580 
581 	for(j = 0; j < 2; j++) {
582 		g = &gens[j];
583 		for(i = 0; i < g->count; i++) {
584 			s = g->vect[i];
585 			if(g->tail == nil)
586 				g->head = s;
587 			else
588 				g->tail->next = s;
589 			g->tail = s;
590 			s->next = nil;
591 		}
592 		free(g->vect);
593 	}
594 
595 	cursect = -1;
596 
597 	if(!readonly) {
598 		checkdata();
599 		checksweep();
600 	}
601 
602 	load0(1);
603 	load0(0);
604 
605 	if(ltime != 0) {
606 		u = now();
607 		if(u < ltime) {
608 			delta = ltime - u;
609 			if(diags)
610 				fprint(2, "%s: check clock: delta = %lud\n", argv0, delta);
611 		}
612 	}
613 
614 	limit = 80 * nsects * sectsize / 100;
615 	maxwrite = sectsize - MAXHDR - Nwrite - Nsum;
616 	if(maxwrite > WRSIZE)
617 		maxwrite = WRSIZE;
618 }
619 
620 static int
621 sputw(Sect *s, Jrec *j, int mtime, Extent *x, void *a)
622 {
623 	ulong t;
624 	int err, n, r;
625 	uchar buff[Nmax], type[1];
626 
627 	if(debug)
628 		fprint(2, "put %J\n", j);
629 
630 	if(mtime) {
631 		t = j->mtime;
632 		if(s->time == NOTIME) {
633 			put4(buff, t);
634 			if(!writedata(1, s->sect, buff, 4, s->toff)) {
635 				dupsect(s, 1);
636 				writedata(0, s->sect, buff, 4, s->toff);
637 			}
638 			s->time = t;
639 			j->mtime = 0;
640 		}
641 		else {
642 			j->mtime = t - s->time;
643 			s->time = t;
644 		}
645 	}
646 
647 	n = convJ2M(j, buff);
648 	if(n < 0)
649 		damaged("convJ2M");
650 
651 	// Window 4
652 	err = 2;
653 	for(;;) {
654 		err--;
655 		if(!err)
656 			dupsect(s, 1);	// Window 4 damage
657 		t = s->coff + 1;
658 		if(!writedata(err, s->sect, buff, n, t))
659 			continue;
660 
661 		t += n;
662 		r = n;
663 		if(x != nil) {
664 			x->sect = s->sect;
665 			x->addr = t;
666 			if(!writedata(err, s->sect, a, j->size, t))
667 				continue;
668 			t += j->size;
669 			r += j->size;
670 		}
671 
672 		type[0] = j->type;
673 		if(!writedata(err, s->sect, type, 1, s->coff))
674 			continue;
675 		r++;
676 		break;
677 	}
678 	s->coff = t;
679 	return r;
680 }
681 
682 static void
683 summarize(void)
684 {
685 	Gen *g;
686 	uchar *b;
687 	Entry *e;
688 	Extent *x;
689 	Jrec j, sum;
690 	Sect *s, *t;
691 	ulong off, ctime;
692 	int n, bytes, more, mtime, sumd;
693 
694 	g = &gens[eparity];
695 	s = g->head;
696 	g->head = s->next;
697 	if(g->head == nil)
698 		g->tail = nil;
699 	g = &gens[eparity^1];
700 	t = g->tail;
701 	b = sectbuff;
702 	x = nil;
703 
704 	if(debug)
705 		fprint(2, "summarize\n");
706 
707 	for(;;) {	// Window 1
708 		readdata(s->sect, b, sectsize, 0);
709 		off = s->toff;
710 		ctime = get4(&b[off]);
711 		off += 4;
712 		sumd = 0;
713 
714 		cursect = s->sect;
715 		while(b[off] != 0xFF) {
716 			n = convM2J(&j, &b[off]);
717 			if(n < 0)
718 				damaged("bad Jrec");
719 			if(debug)
720 				fprint(2, "read %J\n", &j);
721 			off += n;
722 			bytes = n;
723 			mtime = 0;
724 			switch(j.type) {
725 			case FT_create:
726 				ctime += j.mtime;
727 				mtime = 1;
728 			create:
729 				e = elookup(j.fnum);
730 				if(e == nil)
731 					continue;
732 				break;
733 			case FT_chmod:
734 				e = elookup(j.fnum);
735 				if(e == nil || j.mnum != e->mnum)
736 					continue;
737 				break;
738 			case FT_REMOVE:
739 				e = elookup(j.fnum);
740 				if(e == nil)
741 					continue;
742 				break;
743 			case FT_trunc:
744 				ctime += j.mtime;
745 				mtime = 1;
746 				e = elookup(j.tnum);
747 				if(e == nil) {
748 					if(debug)
749 						fprint(2, "-> create\n");
750 					j.type = FT_create;
751 					goto create;
752 				}
753 				break;
754 			case FT_AWRITE:
755 				goto write;
756 			case FT_WRITE:
757 				ctime += j.mtime;
758 				mtime = 1;
759 			write:
760 				e = elookup(j.fnum);
761 				if(e == nil) {
762 					off += j.size;
763 					continue;
764 				}
765 				x = esum(e, s->sect, off, &more);
766 				if(x == nil) {
767 					damaged("missing extent");
768 					off += j.size;
769 					continue;
770 				}
771 				if(more) {
772 					j.type = FT_AWRITE;
773 					mtime = 0;
774 				}
775 				bytes += j.size;
776 				break;
777 			case FT_SUMMARY:
778 			case FT_SUMBEG:
779 			case FT_SUMEND:
780 				continue;
781 			default:
782 				damaged("summarize botch");
783 			}
784 
785 			if(!sumd) {
786 				if(t->coff + Nsumbeg >= sectsize - 1)
787 					t = newsum(g, t->seq + 1);
788 				sum.type = FT_SUMBEG;
789 				sum.seq = s->seq;
790 				if(debug)
791 					fprint(2, "+ %J\n", &sum);
792 				t->sum += sputw(t, &sum, 0, nil, nil);
793 				if(t->sum >= Nsum)
794 					t->sum = Nsum;
795 				sumd = 1;
796 			}
797 
798 			if(t->coff + bytes >= sectsize - Nsum + t->sum - 1)
799 				t = newsum(g, t->seq + 1);
800 
801 			if(mtime)
802 				j.mtime = ctime;
803 
804 			switch(j.type) {
805 			case FT_AWRITE:
806 			case FT_WRITE:
807 				sputw(t, &j, mtime, x, &b[off]);
808 				off += j.size;
809 				break;
810 			default:
811 				sputw(t, &j, mtime, nil, nil);
812 			}
813 		}
814 		cursect = -1;
815 
816 		if(t->coff + Nsumbeg >= sectsize - 1)
817 			t = newsum(g, t->seq + 1);
818 		if(sumd)
819 			sum.type = FT_SUMEND;
820 		else {
821 			sum.type = FT_SUMMARY;
822 			sum.seq = s->seq;
823 		}
824 		if(debug)
825 			fprint(2, "+ %J\n", &sum);
826 		t->sum += sputw(t, &sum, 0, nil, nil);
827 		if(t->sum >= Nsum)
828 			t->sum = Nsum;
829 
830 		// Window 2
831 		freesect(s);
832 		g = &gens[eparity];
833 		s = g->head;
834 		if(s == nil) {
835 			// Window 3
836 			g->gnum += 2;
837 			newsum(g, 0);
838 			eparity ^= 1;
839 			return;
840 		}
841 
842 		if(nfree >= Nfree)
843 			return;
844 
845 		g->head = s->next;
846 		if(g->head == nil)
847 			g->tail = nil;
848 		g = &gens[eparity^1];
849 	}
850 }
851 
852 char *
853 need(int bytes)
854 {
855 	Gen *g;
856 	int sums;
857 	Sect *s, *t;
858 
859 	sums = 0;
860 	for(;;) {
861 		s = gens[eparity].tail;
862 		if(s->coff + bytes < sectsize - Nsum + s->sum - 1)
863 			return nil;
864 
865 		if(nfree >= Nfree)
866 			break;
867 
868 		if(sums == 2) {
869 			readonly = 1;
870 			return "generation full";
871 		}
872 
873 		summarize();
874 		sums++;
875 	}
876 
877 	g = &gens[eparity];
878 	t = getsect();
879 	t->seq = g->tail->seq + 1;
880 	newsect(g, t);
881 	return nil;
882 }
883 
884 void
885 putw(Jrec *j, int mtime, Extent *x, void *a)
886 {
887 	sputw(gens[eparity].tail, j, mtime, x, a);
888 }
889 
890 void
891 put(Jrec *j, int mtime)
892 {
893 	sputw(gens[eparity].tail, j, mtime, nil, nil);
894 }
895