xref: /inferno-os/os/port/devftl.c (revision 7ef44d652ae9e5e1f5b3465d73684e4a54de73c0)
1 /*
2  * basic Flash Translation Layer driver
3  *	see for instance the Intel technical paper
4  *	``Understanding the Flash Translation Layer (FTL) Specification''
5  *	Order number 297816-001 (online at www.intel.com)
6  *
7  * a public driver by David Hinds, dhinds@allegro.stanford.edu
8  * further helps with some details.
9  *
10  * this driver uses the common simplification of never storing
11  * the VBM on the medium (a waste of precious flash!) but
12  * rather building it on the fly as the block maps are read.
13  *
14  * Plan 9 driver (c) 1997 by C H Forsyth (forsyth@terzarima.net)
15  *	This driver may be used or adapted by anyone for any non-commercial purpose.
16  *
17  * adapted for Inferno 1998 by C H Forsyth, Vita Nuova Limited, York, England (forsyth@vitanuova.com)
18  *
19  * TO DO:
20  *	check error handling details for get/put flash
21  *	bad block handling
22  *	reserved space in formatted size
23  *	possibly block size as parameter
24  */
25 
26 #include "u.h"
27 #include "../port/lib.h"
28 #include "../port/error.h"
29 #include "mem.h"
30 #include "dat.h"
31 #include "fns.h"
32 
33 #include "kernel.h"
34 
35 typedef struct Ftl Ftl;
36 typedef struct Merase Merase;
37 typedef struct Terase Terase;
38 
39 enum {
40 	Eshift = 18,	/* 2^18=256k; log2(eraseunit) */
41 	Flashseg = 1<<Eshift,
42 	Bshift = 9,		/* 2^9=512 */
43 	Bsize = 1<<Bshift,
44 	BAMoffset = 0x100,
45 	Nolimit = ~0,
46 	USABLEPCT = 95,	/* release only this % to client */
47 
48 	FTLDEBUG = 0,
49 
50 	NPART = 4,
51 };
52 
53 /* erase unit header (defined by FTL specification) */
54 struct Merase {
55 	uchar	linktuple[5];
56 	uchar	orgtuple[10];
57 	uchar	nxfer;
58 	uchar	nerase[4];
59 	uchar	id[2];
60 	uchar	bshift;
61 	uchar	eshift;
62 	uchar	pstart[2];
63 	uchar	nunits[2];
64 	uchar	psize[4];
65 	uchar	vbmbase[4];
66 	uchar	nvbm[2];
67 	uchar	flags;
68 	uchar	code;
69 	uchar	serial[4];
70 	uchar	altoffset[4];
71 	uchar	bamoffset[4];
72 	uchar	rsv2[12];
73 };
74 #define	ERASEHDRLEN	64
75 
76 enum {
77 	/* special unit IDs */
78 	XferID = 0xffff,
79 	XferBusy = 0x7fff,
80 
81 	/* special BAM addresses */
82 	Bfree = 0xffffffff,
83 	Bwriting = 0xfffffffe,
84 	Bdeleted = 0,
85 
86 	/* block types */
87 	TypeShift = 7,
88 	BlockType = (1<<TypeShift)-1,
89 	ControlBlock = 0x30,
90 	DataBlock = 0x40,
91 	ReplacePage = 0x60,
92 	BadBlock = 0x70,
93 };
94 
95 #define	BTYPE(b)	((b) & BlockType)
96 #define	BADDR(b)	((b) & ~BlockType)
97 #define	BNO(va)	(((ulong)(va))>>Bshift)
98 #define	MKBAM(b,t)	(((b)<<Bshift)|(t))
99 
100 struct Terase {
101 	int	x;
102 	int	id;
103 	ulong	offset;
104 	ulong	bamoffset;
105 	ulong	nbam;
106 	ulong*	bam;
107 	ulong	bamx;
108 	ulong	nfree;
109 	ulong	nused;
110 	ulong	ndead;
111 	ulong	nbad;
112 	ulong	nerase;
113 };
114 
115 struct Ftl {
116 	QLock;
117 	Ref;
118 
119 	Chan*	flash;
120 	Chan*	flashctl;
121 	ulong	base;	/* base of flash region */
122 	ulong	size;	/* size of flash region */
123 	ulong	segsize;	/* size of flash segment (erase unit) */
124 	int	eshift;	/* log2(erase-unit-size) */
125 	int	bshift;	/* log2(bsize) */
126 	int	bsize;
127 	int	nunit;	/* number of segments (erase units) */
128 	Terase**	unit;
129 	int	lastx;	/* index in unit of last allocation */
130 	int	xfer;		/* index in unit of current transfer unit (-1 if none) */
131 	ulong	nfree;	/* total free space in blocks */
132 	ulong	nblock;	/* total space in blocks */
133 	ulong	rwlimit;	/* user-visible block limit (`formatted size') */
134 	ulong*	vbm;		/* virtual block map */
135 	ulong	fstart;	/* address of first block of data in a segment */
136 	int	trace;	/* (debugging) trace of read/write actions */
137 	int	detach;	/* free Ftl on last close */
138 
139 	/* scavenging variables */
140 	QLock	wantq;
141 	Rendez	wantr;
142 	Rendez	workr;
143 	int	needspace;
144 	int	hasproc;
145 	int	npart;		/* over and above ftldata */
146 	struct {
147 		ulong start, size;
148 		ulong rwlimit;
149 		char *name;	/* nil if slot unused */
150 	} part[NPART];
151 };
152 
153 enum {
154 	/* Ftl.detach */
155 	Detached = 1,	/* detach on close */
156 	Deferred	/* scavenger must free it */
157 };
158 
159 /* little endian */
160 #define	GET2(p)	(((p)[1]<<8)|(p)[0])
161 #define	GET4(p)	(((((((p)[3]<<8)|(p)[2])<<8)|(p)[1])<<8)|(p)[0])
162 #define	PUT2(p,v)	(((p)[1]=(v)>>8),((p)[0]=(v)))
163 #define	PUT4(p,v)	(((p)[3]=(v)>>24),((p)[2]=(v)>>16),((p)[1]=(v)>>8),((p)[0]=(v)))
164 
165 static	Lock	ftllock;
166 static	Ftl	*ftls;
167 static	int	ftlpct = USABLEPCT;
168 
169 static	ulong	allocblk(Ftl*);
170 static	int	erasedetect(Ftl *ftl, ulong base, ulong size, ushort *pstart, ushort *nunits);
171 static	void	eraseflash(Ftl*, ulong);
172 static	void	erasefree(Terase*);
173 static	void	eraseinit(Ftl*, ulong, int, int);
174 static	Terase*	eraseload(Ftl*, int, ulong);
175 static	void	ftlfree(Ftl*);
176 static	void	getflash(Ftl*, void*, ulong, long);
177 static	int	mapblk(Ftl*, ulong, Terase**, ulong*);
178 static	Ftl*	mkftl(char*, ulong, ulong, int, char*);
179 static	void	putbam(Ftl*, Terase*, int, ulong);
180 static	void	putflash(Ftl*, ulong, void*, long);
181 static	int	scavenge(Ftl*);
182 
183 enum {
184 	Qdir,
185 	Qctl,
186 	Qdata,
187 };
188 
189 #define DATAQID(q) ((q) >= Qdata && (q) <= Qdata + NPART)
190 
191 static void
192 ftlpartcmd(Ftl *ftl, char **fields, int nfields)
193 {
194 	ulong start, end;
195 	char *p;
196 	int n, newn;
197 
198 	/* name start [end] */
199 	if(nfields < 2 || nfields > 3)
200 		error(Ebadarg);
201 	if(ftl->npart >= NPART)
202 		error("table full");
203 	if(strcmp(fields[0], "ctl") == 0 || strcmp(fields[0], "data") == 0)
204 		error(Ebadarg);
205 	newn = -1;
206 	for(n = 0; n < NPART; n++){
207 		if(ftl->part[n].name == nil){
208 			if(newn < 0)
209 				newn = n;
210 			continue;
211 		}
212 		if(strcmp(fields[0], ftl->part[n].name + 3) == 0)
213 			error(Ebadarg);
214 	}
215 	start = strtoul(fields[1], 0, 0);
216 	if(nfields > 2)
217 		end = strtoul(fields[2], 0, 0);
218 	else
219 		end = ftl->rwlimit * Bsize;
220 	if(start >= end || start % Bsize || end % Bsize)
221 		error(Ebadarg);
222 	ftl->part[newn].start = start;
223 	ftl->part[newn].size = end - start;
224 	ftl->part[newn].rwlimit = end / Bsize;
225 	free(ftl->part[newn].name);
226 	p = malloc(strlen(fields[0]) + 3 + 1);
227 	strcpy(p, "ftl");
228 	strcat(p, fields[0]);
229 	ftl->part[newn].name = p;
230 	ftl->npart++;
231 }
232 
233 static void
234 ftldelpartcmd(Ftl *ftl, char **fields, int nfields)
235 {
236 	int n;
237 	// name
238 	if(nfields != 1)
239 		error(Ebadarg);
240 	for(n = 0; n < NPART; n++)
241 		if(strcmp(fields[0], ftl->part[n].name + 3) == 0){
242 			free(ftl->part[n].name);
243 			ftl->part[n].name = nil;
244 			ftl->npart--;
245 			return;
246 		}
247 	error(Ebadarg);
248 }
249 
250 static int
251 ftlgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
252 {
253 	int n;
254 	switch(i){
255 	case DEVDOTDOT:
256 		devdir(c, (Qid){Qdir, 0, QTDIR}, "#X", 0, eve, 0555, dp);
257 		break;
258 	case 0:
259 		devdir(c, (Qid){Qctl, 0, QTFILE}, "ftlctl", 0, eve, 0660, dp);
260 		break;
261 	case 1:
262 		devdir(c, (Qid){Qdata, 0, QTFILE}, "ftldata", ftls ? ftls->rwlimit * Bsize : 0, eve, 0660, dp);
263 		break;
264 	default:
265 		if(ftls == nil)
266 			return -1;
267 		i -= 2;
268 		if(i >= ftls->npart)
269 			return -1;
270 		for(n = 0; n < NPART; n++)
271 			if(ftls->part[n].name != nil){
272 				if(i == 0)
273 					break;
274 				i--;
275 			}
276 		if(i != 0){
277 			print("wierd\n");
278 			return -1;
279 		}
280 		devdir(c, (Qid){Qdata + 1 + n, 0, QTFILE}, ftls->part[n].name, ftls->part[n].size, eve, 0660, dp);
281 	}
282 	return 1;
283 }
284 
285 static Ftl *
286 ftlget(void)
287 {
288 	Ftl *ftl;
289 
290 	lock(&ftllock);
291 	ftl = ftls;
292 	if(ftl != nil)
293 		incref(ftl);
294 	unlock(&ftllock);
295 	return ftl;
296 }
297 
298 static void
299 ftlput(Ftl *ftl)
300 {
301 	if(ftl != nil){
302 		lock(&ftllock);
303 		if(decref(ftl) == 0 && ftl->detach == Detached){
304 			ftls = nil;
305 			if(ftl->hasproc){	/* no lock needed: can't change if ftl->ref==0 */
306 				ftl->detach = Deferred;
307 				wakeup(&ftl->workr);
308 			}else
309 				ftlfree(ftl);
310 		}
311 		unlock(&ftllock);
312 	}
313 }
314 
315 static Chan *
316 ftlattach(char *spec)
317 {
318 	return devattach('X', spec);
319 }
320 
321 static Walkqid*
322 ftlwalk(Chan *c, Chan *nc, char **name, int nname)
323 {
324 	Walkqid *wq;
325 
326 	wq = devwalk(c, nc, name, nname, 0, 0, ftlgen);
327 	if(wq != nil && wq->clone != nil && wq->clone != c)
328 		if(DATAQID(wq->clone->qid.path))
329 			wq->clone->aux = ftlget();
330 	return wq;
331 }
332 
333 static int
334 ftlstat(Chan *c, uchar *dp, int n)
335 {
336 	return devstat(c, dp, n, 0, 0, ftlgen);
337 }
338 
339 static Chan*
340 ftlopen(Chan *c, int omode)
341 {
342 	Ftl *ftl;
343 	omode = openmode(omode);
344 	if(DATAQID(c->qid.path)){
345 		ftl = ftls;
346 		if(ftl == nil)
347 			error(Enodev);
348 		if(strcmp(up->env->user, eve)!=0)
349 			error(Eperm);
350 	}
351 	else if(c->qid.path == Qctl){
352 		if(strcmp(up->env->user, eve)!=0)
353 			error(Eperm);
354 	}
355 	c = devopen(c, omode, 0, 0, ftlgen);
356 	if(DATAQID(c->qid.path)){
357 		c->aux = ftlget();
358 		if(c->aux == nil)
359 			error(Enodev);
360 	}
361 	return c;
362 }
363 
364 static void
365 ftlclose(Chan *c)
366 {
367 	if(DATAQID(c->qid.path) && (c->flag&COPEN) != 0)
368 		ftlput((Ftl*)c->aux);
369 }
370 
371 static long
372 ftlread(Chan *c, void *buf, long n, vlong offset)
373 {
374 	Ftl *ftl;
375 	Terase *e;
376 	int nb;
377 	uchar *a;
378 	ulong pb;
379 	if(c->qid.type & QTDIR)
380 		return devdirread(c, buf, n, 0, 0, ftlgen);
381 
382 	if(DATAQID(c->qid.path)){
383 		ulong rwlimit;
384 
385 		if(n <= 0 || n%Bsize || offset%Bsize)
386 			error(Eio);
387 		ftl = c->aux;
388 		if(c->qid.path > Qdata){
389 			int p = c->qid.path - Qdata - 1;
390 			offset += ftl->part[p].start;
391 			rwlimit = ftl->part[p].rwlimit;
392 		}
393 		else
394 			rwlimit = ftl->rwlimit;
395 		nb = n/Bsize;
396 		offset /= Bsize;
397 		if(offset >= rwlimit)
398 			return 0;
399 		if(offset+nb > rwlimit)
400 			nb = rwlimit - offset;
401 		a = buf;
402 		for(n = 0; n < nb; n++){
403 			qlock(ftl);
404 			if(waserror()){
405 				qunlock(ftl);
406 				nexterror();
407 			}
408 			if(mapblk(ftl, offset+n, &e, &pb))
409 				getflash(ftl, a, e->offset + pb*Bsize, Bsize);
410 			else
411 				memset(a, 0, Bsize);
412 			poperror();
413 			qunlock(ftl);
414 			a += Bsize;
415 		}
416 		return a-(uchar*)buf;
417 	}
418 
419 	if(c->qid.path != Qctl)
420 		error(Egreg);
421 
422 	return 0;
423 }
424 
425 static long
426 ftlwrite(Chan *c, void *buf, long n, vlong offset)
427 {
428 	char cmd[64], *fields[6];
429 	int ns, i, k, nb;
430 	uchar *a;
431 	Terase *e, *oe;
432 	ulong ob, v, base, size, segsize;
433 	Ftl *ftl;
434 
435 	if(n <= 0)
436 		return 0;
437 
438 	if(DATAQID(c->qid.path)){
439 		ulong rwlimit;
440 		ftl = c->aux;
441 		if(n <= 0 || n%Bsize || offset%Bsize)
442 			error(Eio);
443 		if(c->qid.path > Qdata){
444 			int p = c->qid.path - Qdata - 1;
445 			offset += ftl->part[p].start;
446 			rwlimit = ftl->part[p].rwlimit;
447 		}
448 		else
449 			rwlimit = ftl->rwlimit;
450 		nb = n/Bsize;
451 		offset /= Bsize;
452 		if(offset >= rwlimit)
453 			return 0;
454 		if(offset+nb > rwlimit)
455 			nb = rwlimit - offset;
456 		a = buf;
457 		for(n = 0; n < nb; n++){
458 			ns = 0;
459 			while((v = allocblk(ftl)) == 0)
460 				if(!scavenge(ftl) || ++ns > 3){
461 					static int stop;
462 
463 					if(stop < 3){
464 						stop++;
465 						print("ftl: flash memory full\n");
466 					}
467 					error("flash memory full");
468 				}
469 			qlock(ftl);
470 			if(waserror()){
471 				qunlock(ftl);
472 				nexterror();
473 			}
474 			if(!mapblk(ftl, offset+n, &oe, &ob))
475 				oe = nil;
476 			e = ftl->unit[v>>16];
477 			v &= 0xffff;
478 			putflash(ftl, e->offset + v*Bsize, a, Bsize);
479 			putbam(ftl, e, v, MKBAM(offset+n, DataBlock));
480 			/* both old and new block references exist in this window (can't be closed?) */
481 			ftl->vbm[offset+n] = (e->x<<16) | v;
482 			if(oe != nil){
483 				putbam(ftl, oe, ob, Bdeleted);
484 				oe->ndead++;
485 			}
486 			poperror();
487 			qunlock(ftl);
488 			a += Bsize;
489 		}
490 		return a-(uchar*)buf;
491 	}
492 	else if(c->qid.path == Qctl){
493 		if(n > sizeof(cmd)-1)
494 			n = sizeof(cmd)-1;
495 		memmove(cmd, buf, n);
496 		cmd[n] = 0;
497 		i = getfields(cmd, fields, 6, 1, " \t\n");
498 		if(i <= 0)
499 			error(Ebadarg);
500 		if(i >= 2 && (strcmp(fields[0], "init") == 0 || strcmp(fields[0], "format") == 0)){
501 			if(i > 2)
502 				base = strtoul(fields[2], nil, 0);
503 			else
504 				base = 0;	/* TO DO: hunt for signature */
505 			if(i > 3)
506 				size = strtoul(fields[3], nil, 0);
507 			else
508 				size = Nolimit;
509 			if(i > 4)
510 				segsize = strtoul(fields[4], nil, 0);
511 			else
512 				segsize = 0;
513 			/* segsize must be power of two and size and base must be multiples of it
514 			 * if segsize is zero, then use whatever the device returns
515 			 */
516 			if(segsize != 0 && (segsize > size
517 				|| segsize&(segsize-1)
518 				|| (base != Nolimit && base&(segsize-1))
519 				|| size == 0
520 				|| (size != Nolimit && size&(segsize-1))))
521 				error(Ebadarg);
522 			if(segsize == 0)
523 				k = 0;
524 			else {
525 				for(k=0; k<32 && (1<<k) != segsize; k++)
526 					;
527 			}
528 			if(ftls != nil)
529 				error(Einuse);
530 			ftls = mkftl(fields[1], base, size, k, fields[0]);
531 		}else if(strcmp(fields[0], "scavenge") == 0){
532 			if(ftls != nil)
533 				print("scavenge %d\n", scavenge(ftls));
534 		}else if(strcmp(fields[0], "trace") == 0){
535 			if(ftls != nil)
536 				ftls->trace = i>1? strtol(fields[1], nil, 0): 1;
537 		}else if(strcmp(fields[0], "detach") == 0){
538 			if((ftl = ftlget()) != nil){
539 				if(ftl->ref > 1){
540 					ftlput(ftl);
541 					error(Einuse);
542 				}
543 				ftl->detach = Detached;
544 				ftlput(ftl);
545 			}else
546 				error(Enodev);
547 		}else if(strcmp(fields[0], "part")==0){
548 			if((ftl = ftlget()) != nil){
549 				if(ftl->ref > 1){
550 					ftlput(ftl);
551 					error(Einuse);
552 				}
553 				if(waserror()){
554 					ftlput(ftl);
555 					nexterror();
556 				}
557 				ftlpartcmd(ftl, fields + 1, i - 1);
558 				poperror();
559 				ftlput(ftl);
560 			}else
561 				error(Enodev);
562 		}else if(strcmp(fields[0], "delpart")==0){
563 			if((ftl = ftlget()) != nil){
564 				if(ftl->ref > 1){
565 					ftlput(ftl);
566 					error(Einuse);
567 				}
568 				if(waserror()){
569 					ftlput(ftl);
570 					nexterror();
571 				}
572 				ftldelpartcmd(ftls, fields + 1, i - 1);
573 				poperror();
574 				ftlput(ftl);
575 			}else
576 				error(Enodev);
577 		}else if(i >= 2 && strcmp(fields[0], "pct")==0){
578 			v = strtoul(fields[1], nil, 0);
579 			if(v >= 50)
580 				ftlpct = v;
581 		}else
582 			error(Ebadarg);
583 		return n;
584 	}
585 	error(Egreg);
586 	return 0;		/* not reached */
587 }
588 
589 static Chan *
590 ftlkopen(char *name, char *suffix, int mode)
591 {
592 	Chan *c;
593 	char *fn;
594 	int fd;
595 
596 	if(suffix != nil && *suffix){
597 		fn = smalloc(strlen(name)+strlen(suffix)+1);
598 		if(fn == nil)
599 			return nil;
600 		strcpy(fn, name);
601 		strcat(fn, suffix);
602 		fd = kopen(fn, mode);
603 		free(fn);
604 	}else
605 		fd = kopen(name, mode);
606 	if(fd < 0)
607 		return nil;
608 	c = fdtochan(up->env->fgrp, fd, mode, 0, 1);
609 	kclose(fd);
610 	return c;
611 }
612 
613 static ulong
614 ftlfsize(Chan *c)
615 {
616 	uchar dbuf[STATFIXLEN+32*4];
617 	Dir d;
618 	int n;
619 
620 	n = devtab[c->type]->stat(c, dbuf, sizeof(dbuf));
621 	if(convM2D(dbuf, n, &d, nil) == 0)
622 		return 0;
623 	return d.length;
624 }
625 
626 static Ftl *
627 mkftl(char *fname, ulong base, ulong size, int eshift, char *op)
628 {
629 	int i, j, nov, segblocks, n, badseg, old, valid;
630 	ulong limit;
631 	Terase *e;
632 	Ftl *ftl;
633 	char buf[64], *fields[8];
634 	ulong ea;
635 	Chan *statfd;
636 
637 	ftl = malloc(sizeof(*ftl));
638 	if(ftl == nil)
639 		error(Enomem);
640 	e = nil;
641 	if(waserror()){
642 		ftlfree(ftl);
643 		if(e)
644 			free(e);
645 		nexterror();
646 	}
647 	ftl->lastx = 0;
648 	ftl->trace = 0;
649 	ftl->flash = ftlkopen(fname, "", ORDWR);
650 	if(ftl->flash == nil)
651 		error(up->env->errstr);
652 	ftl->flashctl = ftlkopen(fname, "ctl", ORDWR);
653 	if(ftl->flashctl == nil)
654 		error(up->env->errstr);
655 	old = 1;
656 	statfd = ftlkopen(fname, "stat", OREAD);	/* old scheme */
657 	if(statfd == nil){
658 		statfd = ftl->flashctl;	/* new just uses ctl */
659 		old = 0;
660 	}
661 	statfd->offset = 0;
662 	if((n = kchanio(statfd, buf, sizeof(buf), OREAD)) <= 0){
663 		print("ftl: read stat/ctl failed: %s\n", up->env->errstr);
664 		error(up->env->errstr);
665 	}
666 	if(n >= sizeof(buf))
667 		n = sizeof(buf)-1;
668 	buf[n] = 0;
669 	if(statfd != ftl->flashctl)
670 		cclose(statfd);
671 
672 	n = getfields(buf, fields, nelem(fields), 1, " \t\n");
673 	ea = 0;
674 	if(old){
675 		if(n >= 4)
676 			if((ea = strtoul(fields[3], nil, 16)) < 8*1024)
677 				ea = 0;	/* assume the format is wrong */
678 	}else{
679 		if(n >= 7)
680 			if((ea = strtoul(fields[6], nil, 0)) < 2*1024)
681 				ea = 0;	/* assume the format is wrong */
682 	}
683 	if(ea != 0){
684 		for(i=0; i < 32 && (1<<i) != ea; i++)
685 			;
686 		if(eshift && i != eshift)
687 			print("ftl: overriding erasesize %d with %d\n", 1 << eshift, 1 << i);
688 		eshift = i;
689 		if(FTLDEBUG)
690 			print("ftl: e=%lud eshift=%d\n", ea, i);
691 	}
692 	if(eshift == 0)
693 		error("no erasesize");
694 
695 	limit = ftlfsize(ftl->flash);
696 	if(limit == 0)
697 		error("no space for flash translation");
698 	ftl->segsize = 1 << eshift;
699 	if(base == Nolimit){
700 		ushort pstart, nunits;
701 		erasedetect(ftl, 0, limit, &pstart, &nunits);
702 		base = pstart * ftl->segsize;
703 		size = nunits * ftl->segsize;
704 		print("ftl: partition in %s at 0x%.8lux, length 0x%.8lux\n", fname, base, size);
705 	} else if(size == Nolimit)
706 		size = limit-base;
707 	if(base >= limit || size > limit || base+size > limit || eshift < 8 || (1<<eshift) > size){
708 		print("ftl: bad: base=%#lux limit=%#lux size=%ld eshift=%d\n", base, limit, size, eshift);
709 		error("bad flash space parameters");
710 	}
711 	if(FTLDEBUG)
712 		print("%s flash %s #%lux:#%lux limit #%lux\n", op, fname, base, size, limit);
713 	ftl->base = base;
714 	ftl->size = size;
715 	ftl->bshift = Bshift;
716 	ftl->bsize = Bsize;
717 	ftl->eshift = eshift;
718 	ftl->nunit = size>>eshift;
719 	nov = ((ftl->segsize/Bsize)*4 + BAMoffset + Bsize - 1)/Bsize;	/* number of overhead blocks per segment (header, and BAM itself) */
720 	ftl->fstart = nov;
721 	segblocks = ftl->segsize/Bsize - nov;
722 	ftl->nblock = ftl->nunit*segblocks;
723 	if(ftl->nblock > 0x10000){
724 		/* oops - too many blocks */
725 		ftl->nunit = 0x10000 / segblocks;
726 		ftl->nblock = ftl->nunit * segblocks;
727 		size = ftl->nunit * ftl->segsize;
728 		ftl->size = size;
729 		print("ftl: too many blocks - limiting to %ld bytes %d units %lud blocks\n",
730 		    size, ftl->nunit, ftl->nblock);
731 	}
732 	ftl->vbm = malloc(ftl->nblock*sizeof(*ftl->vbm));
733 	ftl->unit = malloc(ftl->nunit*sizeof(*ftl->unit));
734 	if(ftl->vbm == nil || ftl->unit == nil)
735 		error(Enomem);
736 	if(strcmp(op, "format") == 0){
737 		for(i=0; i<ftl->nunit-1; i++)
738 			eraseinit(ftl, i*ftl->segsize, i, 1);
739 		eraseinit(ftl, i*ftl->segsize, XferID, 1);
740 	}
741 	badseg = -1;
742 	ftl->xfer = -1;
743 	valid = 0;
744 	for(i=0; i<ftl->nunit; i++){
745 		e = eraseload(ftl, i, i*ftl->segsize);
746 		if(e == nil){
747 			print("ftl: logical segment %d: bad format\n", i);
748 			if(badseg == -1)
749 				badseg = i;
750 			else
751 				badseg = -2;
752 			continue;
753 		}
754 		if(e->id == XferBusy){
755 			e->nerase++;
756 			eraseinit(ftl, e->offset, XferID, e->nerase);
757 			e->id = XferID;
758 		}
759 		for(j=0; j<ftl->nunit; j++)
760 			if(ftl->unit[j] != nil && ftl->unit[j]->id == e->id){
761 				print("ftl: duplicate erase unit #%x\n", e->id);
762 				erasefree(e);
763 				e = nil;
764 				break;
765 			}
766 		if(e){
767 			valid++;
768 			ftl->unit[e->x] = e;
769 			if(e->id == XferID)
770 				ftl->xfer = e->x;
771 			if(FTLDEBUG)
772 				print("ftl: unit %d:#%x used %lud free %lud dead %lud bad %lud nerase %lud\n",
773 					e->x, e->id, e->nused, e->nfree, e->ndead, e->nbad, e->nerase);
774 			e = nil;
775 			USED(e);
776 		}
777 	}
778 	if(badseg >= 0){
779 		if(ftl->xfer >= 0)
780 			error("invalid ftl format");
781 		i = badseg;
782 		eraseinit(ftl, i*ftl->segsize, XferID, 1);
783 		e = eraseload(ftl, i, i*ftl->segsize);
784 		if(e == nil)
785 			error("bad ftl format");
786 		ftl->unit[e->x] = e;
787 		ftl->xfer = e->x;
788 		print("ftl: recovered transfer unit %d\n", e->x);
789 		valid++;
790 		e = nil;
791 		USED(e);
792 	}
793 	if(ftl->xfer < 0 && valid <= 0 || ftl->xfer >= 0 && valid <= 1)
794 		error("no valid flash data units");
795 	if(ftl->xfer < 0)
796 		error("ftl: no transfer unit: device is WORM\n");
797 	else
798 		ftl->nblock -= segblocks;	/* discount transfer segment */
799 	if(ftl->nblock >= 1000)
800 		ftl->rwlimit = ftl->nblock-100;	/* TO DO: variable reserve */
801 	else
802 		ftl->rwlimit = ftl->nblock*ftlpct/100;
803 	poperror();
804 	return ftl;
805 }
806 
807 static void
808 ftlfree(Ftl *ftl)
809 {
810 	int i, n;
811 
812 	if(ftl != nil){
813 		if(ftl->flashctl != nil)
814 			cclose(ftl->flashctl);
815 		if(ftl->flash != nil)
816 			cclose(ftl->flash);
817 
818 		if(ftl->unit){
819 			for(i = 0; i < ftl->nunit; i++)
820 				erasefree(ftl->unit[i]);
821 			free(ftl->unit);
822 		}
823 		free(ftl->vbm);
824 		for(n = 0; n < NPART; n++)
825 			free(ftl->part[n].name);
826 		free(ftl);
827 	}
828 }
829 
830 /*
831  * this simple greedy algorithm weighted by nerase does seem to lead
832  * to even wear of erase units (cf. the eNVy file system)
833  */
834 static Terase *
835 bestcopy(Ftl *ftl)
836 {
837 	Terase *e, *be;
838 	int i;
839 
840 	be = nil;
841 	for(i=0; i<ftl->nunit; i++)
842 		if((e = ftl->unit[i]) != nil && e->id != XferID && e->id != XferBusy && e->ndead+e->nbad &&
843 		    (be == nil || e->nerase <= be->nerase && e->ndead >= be->ndead))
844 			be = e;
845 	return be;
846 }
847 
848 static int
849 copyunit(Ftl *ftl, Terase *from, Terase *to)
850 {
851 	int i, nb;
852 	uchar id[2];
853 	ulong *bam;
854 	uchar *buf;
855 	ulong v, bno;
856 
857 	if(FTLDEBUG || ftl->trace)
858 		print("ftl: copying %d (#%lux) to #%lux\n", from->id, from->offset, to->offset);
859 	to->nbam = 0;
860 	free(to->bam);
861 	to->bam = nil;
862 	bam = nil;
863 	buf = malloc(Bsize);
864 	if(buf == nil)
865 		return 0;
866 	if(waserror()){
867 		free(buf);
868 		free(bam);
869 		return 0;
870 	}
871 	PUT2(id, XferBusy);
872 	putflash(ftl, to->offset+offsetof(Merase,id[0]), id, 2);
873 	/* make new BAM */
874 	nb = from->nbam*sizeof(*to->bam);
875 	bam = malloc(nb);
876 	if(bam == nil)
877 		error(Enomem);
878 	memmove(bam, from->bam, nb);
879 	to->nused = 0;
880 	to->nbad = 0;
881 	to->nfree = 0;
882 	to->ndead = 0;
883 	for(i = 0; i < from->nbam; i++)
884 		switch(bam[i]){
885 		case Bwriting:
886 		case Bdeleted:
887 		case Bfree:
888 			bam[i] = Bfree;
889 			to->nfree++;
890 			break;
891 		default:
892 			switch(bam[i]&BlockType){
893 			default:
894 			case BadBlock:	/* it isn't necessarily bad in this unit */
895 				to->nfree++;
896 				bam[i] = Bfree;
897 				break;
898 			case DataBlock:
899 			case ReplacePage:
900 				v = bam[i];
901 				bno = BNO(v & ~BlockType);
902 				if(i < ftl->fstart || bno >= ftl->nblock){
903 					print("ftl: unit %d:#%x bad bam[%d]=#%lux\n", from->x, from->id, i, v);
904 					to->nfree++;
905 					bam[i] = Bfree;
906 					break;
907 				}
908 				getflash(ftl, buf, from->offset+i*Bsize, Bsize);
909 				putflash(ftl, to->offset+i*Bsize, buf, Bsize);
910 				to->nused++;
911 				break;
912 			case ControlBlock:
913 				to->nused++;
914 				break;
915 			}
916 		}
917 	for(i=0; i<from->nbam; i++){
918 		uchar *p = (uchar*)&bam[i];
919 		v = bam[i];
920 		if(v != Bfree && ftl->trace > 1)
921 			print("to[%d]=#%lux\n", i, v);
922 		PUT4(p, v);
923 	}
924 	putflash(ftl, to->bamoffset, bam, nb);	/* BUG: PUT4 */
925 	for(i=0; i<from->nbam; i++){
926 		uchar *p = (uchar*)&bam[i];
927 		v = bam[i];
928 		PUT4(p, v);
929 	}
930 	to->id = from->id;
931 	PUT2(id, to->id);
932 	putflash(ftl, to->offset+offsetof(Merase,id[0]), id, 2);
933 	to->nbam = from->nbam;
934 	to->bam = bam;
935 	ftl->nfree += to->nfree - from->nfree;
936 	poperror();
937 	free(buf);
938 	return 1;
939 }
940 
941 static int
942 mustscavenge(void *a)
943 {
944 	return ((Ftl*)a)->needspace || ((Ftl*)a)->detach == Deferred;
945 }
946 
947 static int
948 donescavenge(void *a)
949 {
950 	return ((Ftl*)a)->needspace == 0;
951 }
952 
953 static void
954 scavengeproc(void *arg)
955 {
956 	Ftl *ftl;
957 	int i;
958 	Terase *e, *ne;
959 
960 	ftl = arg;
961 	if(waserror()){
962 		print("ftl: kproc noted\n");
963 		pexit("ftldeath", 0);
964 	}
965 	for(;;){
966 		sleep(&ftl->workr, mustscavenge, ftl);
967 		if(ftl->detach == Deferred){
968 			ftlfree(ftl);
969 			pexit("", 0);
970 		}
971 		if(FTLDEBUG || ftl->trace)
972 			print("ftl: scavenge %ld\n", ftl->nfree);
973 		qlock(ftl);
974 		if(waserror()){
975 			qunlock(ftl);
976 			nexterror();
977 		}
978 		e = bestcopy(ftl);
979 		if(e == nil || ftl->xfer < 0 || (ne = ftl->unit[ftl->xfer]) == nil || ne->id != XferID || e == ne)
980 			goto Fail;
981 		if(copyunit(ftl, e, ne)){
982 			i = ne->x; ne->x = e->x; e->x = i;
983 			ftl->unit[ne->x] = ne;
984 			ftl->unit[e->x] = e;
985 			ftl->xfer = e->x;
986 			e->id = XferID;
987 			e->nbam = 0;
988 			free(e->bam);
989 			e->bam = nil;
990 			e->bamx = 0;
991 			e->nerase++;
992 			eraseinit(ftl, e->offset, XferID, e->nerase);
993 		}
994 	Fail:
995 		if(FTLDEBUG || ftl->trace)
996 			print("ftl: end scavenge %ld\n", ftl->nfree);
997 		ftl->needspace = 0;
998 		wakeup(&ftl->wantr);
999 		poperror();
1000 		qunlock(ftl);
1001 	}
1002 }
1003 
1004 static int
1005 scavenge(Ftl *ftl)
1006 {
1007 	if(ftl->xfer < 0 || bestcopy(ftl) == nil)
1008 		return 0;	/* you worm! */
1009 
1010 	qlock(ftl);
1011 	if(waserror()){
1012 		qunlock(ftl);
1013 		return 0;
1014 	}
1015 	if(!ftl->hasproc){
1016 		ftl->hasproc = 1;
1017 		kproc("ftl.scavenge", scavengeproc, ftl, 0);
1018 	}
1019 	ftl->needspace = 1;
1020 	wakeup(&ftl->workr);
1021 	poperror();
1022 	qunlock(ftl);
1023 
1024 	qlock(&ftl->wantq);
1025 	if(waserror()){
1026 		qunlock(&ftl->wantq);
1027 		nexterror();
1028 	}
1029 	while(ftl->needspace)
1030 		sleep(&ftl->wantr, donescavenge, ftl);
1031 	poperror();
1032 	qunlock(&ftl->wantq);
1033 
1034 	return ftl->nfree;
1035 }
1036 
1037 static void
1038 putbam(Ftl *ftl, Terase *e, int n, ulong entry)
1039 {
1040 	uchar b[4];
1041 
1042 	e->bam[n] = entry;
1043 	PUT4(b, entry);
1044 	putflash(ftl, e->bamoffset + n*4, b, 4);
1045 }
1046 
1047 static ulong
1048 allocblk(Ftl *ftl)
1049 {
1050 	Terase *e;
1051 	int i, j;
1052 
1053 	qlock(ftl);
1054 	i = ftl->lastx;
1055 	do{
1056 		e = ftl->unit[i];
1057 		if(e != nil && e->id != XferID && e->nfree){
1058 			ftl->lastx = i;
1059 			for(j=e->bamx; j<e->nbam; j++)
1060 				if(e->bam[j] == Bfree){
1061 					putbam(ftl, e, j, Bwriting);
1062 					ftl->nfree--;
1063 					e->nfree--;
1064 					e->bamx = j+1;
1065 					qunlock(ftl);
1066 					return (e->x<<16) | j;
1067 				}
1068 			e->nfree = 0;
1069 			qunlock(ftl);
1070 			print("ftl: unit %d:#%x nfree %ld but not free in BAM\n", e->x, e->id, e->nfree);
1071 			qlock(ftl);
1072 		}
1073 		if(++i >= ftl->nunit)
1074 			i = 0;
1075 	}while(i != ftl->lastx);
1076 	qunlock(ftl);
1077 	return 0;
1078 }
1079 
1080 static int
1081 mapblk(Ftl *ftl, ulong bno, Terase **ep, ulong *bp)
1082 {
1083 	ulong v;
1084 	int x;
1085 
1086 	if(bno < ftl->nblock){
1087 		v = ftl->vbm[bno];
1088 		if(v == 0 || v == ~0)
1089 			return 0;
1090 		x = v>>16;
1091 		if(x >= ftl->nunit || x == ftl->xfer || ftl->unit[x] == nil){
1092 			print("ftl: corrupt format: bad block mapping %lud -> unit #%x\n", bno, x);
1093 			return 0;
1094 		}
1095 		*ep = ftl->unit[x];
1096 		*bp = v & 0xFFFF;
1097 		return 1;
1098 	}
1099 	return 0;
1100 }
1101 
1102 static void
1103 eraseinit(Ftl *ftl, ulong offset, int id, int nerase)
1104 {
1105 	union {
1106 		Merase;
1107 		uchar	block[ERASEHDRLEN];
1108 	} *m;
1109 	uchar *bam, *p;
1110 	int i, nov;
1111 
1112 	nov = ((ftl->segsize/Bsize)*4 + BAMoffset + Bsize - 1)/Bsize;	/* number of overhead blocks (header, and BAM itself) */
1113 	if(nov*Bsize >= ftl->segsize)
1114 		error("ftl -- too small for files");
1115 	eraseflash(ftl, offset);
1116 	m = malloc(sizeof(*m));
1117 	if(m == nil)
1118 		error(Enomem);
1119 	memset(m, 0xFF, sizeof(*m));
1120 	m->linktuple[0] = 0x13;
1121 	m->linktuple[1] = 0x3;
1122 	memmove(m->linktuple+2, "CIS", 3);
1123 	m->orgtuple[0] = 0x46;
1124 	m->orgtuple[1] = 0x57;
1125 	m->orgtuple[2] = 0x00;
1126 	memmove(m->orgtuple+3, "FTL100", 7);
1127 	m->nxfer = 1;
1128 	PUT4(m->nerase, nerase);
1129 	PUT2(m->id, id);
1130 	m->bshift = ftl->bshift;
1131 	m->eshift = ftl->eshift;
1132 	PUT2(m->pstart, ftl->base >> ftl->eshift);
1133 	PUT2(m->nunits, ftl->nunit);
1134 	PUT4(m->psize, ftl->size - nov*Bsize*ftl->nunit);
1135 	PUT4(m->vbmbase, 0xffffffff);	/* we always calculate the VBM */
1136 	PUT2(m->nvbm, 0);
1137 	m->flags = 0;
1138 	m->code = 0xFF;
1139 	memmove(m->serial, "Inf1", 4);
1140 	PUT4(m->altoffset, 0);
1141 	PUT4(m->bamoffset, BAMoffset);
1142 	putflash(ftl, offset, m, ERASEHDRLEN);
1143 	free(m);
1144 	if(id == XferID)
1145 		return;
1146 	nov *= 4;	/* now bytes of BAM */
1147 	bam = malloc(nov);
1148 	if(bam == nil)
1149 		error(Enomem);
1150 	for(i=0; i<nov; i += 4){
1151 		p = bam+i;
1152 		PUT4(p, ControlBlock);	/* reserve them */
1153 	}
1154 	putflash(ftl, offset+BAMoffset, bam, nov);
1155 	free(bam);
1156 }
1157 
1158 static int
1159 erasedetect(Ftl *ftl, ulong base, ulong size, ushort *pstart, ushort *nunits)
1160 {
1161 	ulong o;
1162 	int rv;
1163 
1164 	union {
1165 		Merase;
1166 		uchar	block[ERASEHDRLEN];
1167 	} *m;
1168 	m = malloc(sizeof(*m));
1169 	if(m == nil)
1170 		error(Enomem);
1171 	rv  = 0;
1172 	for(o = base; o < base + size; o += ftl->segsize){
1173 		if(waserror())
1174 			continue;
1175 		getflash(ftl, m, o, ERASEHDRLEN);
1176 		poperror();
1177 		if(memcmp(m->orgtuple + 3, "FTL100", 7) == 0
1178 		    && memcmp(m->serial, "Inf1", 4) == 0){
1179 			*pstart = GET2(m->pstart);
1180 			*nunits = GET2(m->nunits);
1181 			rv = 1;
1182 			break;
1183 		}
1184 	}
1185 	free(m);
1186 	return rv;
1187 }
1188 
1189 static Terase *
1190 eraseload(Ftl *ftl, int x, ulong offset)
1191 {
1192 	union {
1193 		Merase;
1194 		uchar	block[ERASEHDRLEN];
1195 	} *m;
1196 	Terase *e;
1197 	uchar *p;
1198 	int i, nbam;
1199 	ulong bno, v;
1200 
1201 	m = malloc(sizeof(*m));
1202 	if(m == nil)
1203 		error(Enomem);
1204 	if(waserror()){
1205 		free(m);
1206 		return nil;
1207 	}
1208 	getflash(ftl, m, offset, ERASEHDRLEN);
1209 	poperror();
1210 	if(memcmp(m->orgtuple+3, "FTL100", 7) != 0 ||
1211 	   memcmp(m->serial, "Inf1", 4) != 0){
1212 		free(m);
1213 print("%8.8lux: bad sig\n", offset);
1214 		return nil;
1215 	}
1216 	e = malloc(sizeof(*e));
1217 	if(e == nil){
1218 		free(m);
1219 		error(Enomem);
1220 	}
1221 	e->x = x;
1222 	e->id = GET2(m->id);
1223 	e->offset = offset;
1224 	e->bamoffset = GET4(m->bamoffset);
1225 	e->nerase = GET4(m->nerase);
1226 	free(m);
1227 	m = nil;
1228 	USED(m);
1229 	if(e->bamoffset != BAMoffset){
1230 		free(e);
1231 print("%8.8lux: bad bamoffset %8.8lux\n", offset, e->bamoffset);
1232 		return nil;
1233 	}
1234 	e->bamoffset += offset;
1235 	if(e->id == XferID || e->id == XferBusy){
1236 		e->bam = nil;
1237 		e->nbam = 0;
1238 		return e;
1239 	}
1240 	nbam = ftl->segsize/Bsize;
1241 	e->bam = malloc(nbam*sizeof(*e->bam));
1242 	e->nbam = nbam;
1243 	if(waserror()){
1244 		free(e);
1245 		nexterror();
1246 	}
1247 	getflash(ftl, e->bam, e->bamoffset, nbam*4);
1248 	poperror();
1249 	/* scan BAM to build VBM */
1250 	e->bamx = 0;
1251 	for(i=0; i<nbam; i++){
1252 		p = (uchar*)&e->bam[i];
1253 		e->bam[i] = v = GET4(p);
1254 		if(v == Bwriting || v == Bdeleted)
1255 			e->ndead++;
1256 		else if(v == Bfree){
1257 			if(e->bamx == 0)
1258 				e->bamx = i;
1259 			e->nfree++;
1260 			ftl->nfree++;
1261 		}else{
1262 			switch(v & BlockType){
1263 			case ControlBlock:
1264 				break;
1265 			case DataBlock:
1266 				/* add to VBM */
1267 				if(v & (1<<31))
1268 					break;	/* negative => VBM page, ignored */
1269 				bno = BNO(v & ~BlockType);
1270 				if(i < ftl->fstart || bno >= ftl->nblock){
1271 					print("ftl: unit %d:#%x bad bam[%d]=#%lux\n", e->x, e->id, i, v);
1272 					e->nbad++;
1273 					break;
1274 				}
1275 				ftl->vbm[bno] = (e->x<<16) | i;
1276 				e->nused++;
1277 				break;
1278 			case ReplacePage:
1279 				/* replacement VBM page; ignored */
1280 				break;
1281 			default:
1282 				print("ftl: unit %d:#%x bad bam[%d]=%lux\n", e->x, e->id, i, v);
1283 			case BadBlock:
1284 				e->nbad++;
1285 				break;
1286 			}
1287 		}
1288 	}
1289 	return e;
1290 }
1291 
1292 static void
1293 erasefree(Terase *e)
1294 {
1295 	if(e){
1296 		free(e->bam);
1297 		free(e);
1298 	}
1299 }
1300 
1301 static void
1302 eraseflash(Ftl *ftl, ulong offset)
1303 {
1304 	char cmd[40];
1305 
1306 	offset += ftl->base;
1307 	if(FTLDEBUG || ftl->trace)
1308 		print("ftl: erase seg @#%lux\n", offset);
1309 	snprint(cmd, sizeof(cmd), "erase 0x%8.8lux", offset);
1310 	if(kchanio(ftl->flashctl, cmd, strlen(cmd), OWRITE) <= 0){
1311 		print("ftl: erase failed: %s\n", up->env->errstr);
1312 		error(up->env->errstr);
1313 	}
1314 }
1315 
1316 static void
1317 putflash(Ftl *ftl, ulong offset, void *buf, long n)
1318 {
1319 	offset += ftl->base;
1320 	if(ftl->trace)
1321 		print("ftl: write(#%lux, %ld)\n", offset, n);
1322 	ftl->flash->offset = offset;
1323 	if(kchanio(ftl->flash, buf, n, OWRITE) != n){
1324 		print("ftl: flash write error: %s\n", up->env->errstr);
1325 		error(up->env->errstr);
1326 	}
1327 }
1328 
1329 static void
1330 getflash(Ftl *ftl, void *buf, ulong offset, long n)
1331 {
1332 	offset += ftl->base;
1333 	if(ftl->trace)
1334 		print("ftl: read(#%lux, %ld)\n", offset, n);
1335 	ftl->flash->offset = offset;
1336 	if(kchanio(ftl->flash, buf, n, OREAD) != n){
1337 		print("ftl: flash read error %s\n", up->env->errstr);
1338 		error(up->env->errstr);
1339 	}
1340 }
1341 
1342 Dev ftldevtab = {
1343 	'X',	/* TO DO */
1344 	"ftl",
1345 
1346 	devreset,
1347 	devinit,
1348 	devshutdown,
1349 	ftlattach,
1350 	ftlwalk,
1351 	ftlstat,
1352 	ftlopen,
1353 	devcreate,
1354 	ftlclose,
1355 	ftlread,
1356 	devbread,
1357 	ftlwrite,
1358 	devbwrite,
1359 	devremove,
1360 	devwstat,
1361 };
1362