xref: /inferno-os/os/port/devftl.c (revision ecc9caba0e344ed50c05ee8156b2734f4d76e463)
1 /*
2  * basic Flash Translation Layer driver
3  *	see for instance the Intel technical paper
4  *	``Understanding the Flash Translation Layer (FTL) Specification''
5  *	Order number 297816-001 (online at www.intel.com)
6  *
7  * a public driver by David Hinds, dhinds@allegro.stanford.edu
8  * further helps with some details.
9  *
10  * this driver uses the common simplification of never storing
11  * the VBM on the medium (a waste of precious flash!) but
12  * rather building it on the fly as the block maps are read.
13  *
14  * Plan 9 driver (c) 1997 by C H Forsyth (forsyth@terzarima.net)
15  *	This driver may be used or adapted by anyone for any non-commercial purpose.
16  *
17  * adapted for Inferno 1998 by C H Forsyth, Vita Nuova Limited, York, England (forsyth@vitanuova.com)
18  *
19  * TO DO:
20  *	check error handling details for get/put flash
21  *	bad block handling
22  *	reserved space in formatted size
23  *	possibly block size as parameter
24  */
25 
26 #include "u.h"
27 #include "../port/lib.h"
28 #include "../port/error.h"
29 #include "mem.h"
30 #include "dat.h"
31 #include "fns.h"
32 
33 #include "kernel.h"
34 
35 #ifndef offsetof
36 #define	offsetof(s, m)	(ulong)(&(((s*)0)->m))
37 #endif
38 
39 typedef struct Ftl Ftl;
40 typedef struct Merase Merase;
41 typedef struct Terase Terase;
42 
43 enum {
44 	Eshift = 18,	/* 2^18=256k; log2(eraseunit) */
45 	Flashseg = 1<<Eshift,
46 	Bshift = 9,		/* 2^9=512 */
47 	Bsize = 1<<Bshift,
48 	BAMoffset = 0x100,
49 	Nolimit = ~0,
50 	USABLEPCT = 95,	/* release only this % to client */
51 
52 	FTLDEBUG = 0,
53 
54 	NPART = 4,
55 };
56 
57 /* erase unit header (defined by FTL specification) */
58 struct Merase {
59 	uchar	linktuple[5];
60 	uchar	orgtuple[10];
61 	uchar	nxfer;
62 	uchar	nerase[4];
63 	uchar	id[2];
64 	uchar	bshift;
65 	uchar	eshift;
66 	uchar	pstart[2];
67 	uchar	nunits[2];
68 	uchar	psize[4];
69 	uchar	vbmbase[4];
70 	uchar	nvbm[2];
71 	uchar	flags;
72 	uchar	code;
73 	uchar	serial[4];
74 	uchar	altoffset[4];
75 	uchar	bamoffset[4];
76 	uchar	rsv2[12];
77 };
78 #define	ERASEHDRLEN	64
79 
80 enum {
81 	/* special unit IDs */
82 	XferID = 0xffff,
83 	XferBusy = 0x7fff,
84 
85 	/* special BAM addresses */
86 	Bfree = 0xffffffff,
87 	Bwriting = 0xfffffffe,
88 	Bdeleted = 0,
89 
90 	/* block types */
91 	TypeShift = 7,
92 	BlockType = (1<<TypeShift)-1,
93 	ControlBlock = 0x30,
94 	DataBlock = 0x40,
95 	ReplacePage = 0x60,
96 	BadBlock = 0x70,
97 };
98 
99 #define	BTYPE(b)	((b) & BlockType)
100 #define	BADDR(b)	((b) & ~BlockType)
101 #define	BNO(va)	(((ulong)(va))>>Bshift)
102 #define	MKBAM(b,t)	(((b)<<Bshift)|(t))
103 
104 struct Terase {
105 	int	x;
106 	int	id;
107 	ulong	offset;
108 	ulong	bamoffset;
109 	ulong	nbam;
110 	ulong*	bam;
111 	ulong	bamx;
112 	ulong	nfree;
113 	ulong	nused;
114 	ulong	ndead;
115 	ulong	nbad;
116 	ulong	nerase;
117 };
118 
119 struct Ftl {
120 	QLock;
121 	Ref;
122 
123 	Chan*	flash;
124 	Chan*	flashctl;
125 	ulong	base;	/* base of flash region */
126 	ulong	size;	/* size of flash region */
127 	ulong	segsize;	/* size of flash segment (erase unit) */
128 	int	eshift;	/* log2(erase-unit-size) */
129 	int	bshift;	/* log2(bsize) */
130 	int	bsize;
131 	int	nunit;	/* number of segments (erase units) */
132 	Terase**	unit;
133 	int	lastx;	/* index in unit of last allocation */
134 	int	xfer;		/* index in unit of current transfer unit (-1 if none) */
135 	ulong	nfree;	/* total free space in blocks */
136 	ulong	nblock;	/* total space in blocks */
137 	ulong	rwlimit;	/* user-visible block limit (`formatted size') */
138 	ulong*	vbm;		/* virtual block map */
139 	ulong	fstart;	/* address of first block of data in a segment */
140 	int	trace;	/* (debugging) trace of read/write actions */
141 	int	detach;	/* free Ftl on last close */
142 
143 	/* scavenging variables */
144 	QLock	wantq;
145 	Rendez	wantr;
146 	Rendez	workr;
147 	int	needspace;
148 	int	hasproc;
149 	int	npart;		/* over and above ftldata */
150 	struct {
151 		ulong start, size;
152 		ulong rwlimit;
153 		char *name;	/* nil if slot unused */
154 	} part[NPART];
155 };
156 
157 enum {
158 	/* Ftl.detach */
159 	Detached = 1,	/* detach on close */
160 	Deferred	/* scavenger must free it */
161 };
162 
163 /* little endian */
164 #define	GET2(p)	(((p)[1]<<8)|(p)[0])
165 #define	GET4(p)	(((((((p)[3]<<8)|(p)[2])<<8)|(p)[1])<<8)|(p)[0])
166 #define	PUT2(p,v)	(((p)[1]=(v)>>8),((p)[0]=(v)))
167 #define	PUT4(p,v)	(((p)[3]=(v)>>24),((p)[2]=(v)>>16),((p)[1]=(v)>>8),((p)[0]=(v)))
168 
169 static	Lock	ftllock;
170 static	Ftl	*ftls;
171 static	int	ftlpct = USABLEPCT;
172 
173 static	ulong	allocblk(Ftl*);
174 static	int	erasedetect(Ftl *ftl, ulong base, ulong size, ushort *pstart, ushort *nunits);
175 static	void	eraseflash(Ftl*, ulong);
176 static	void	erasefree(Terase*);
177 static	void	eraseinit(Ftl*, ulong, int, int);
178 static	Terase*	eraseload(Ftl*, int, ulong);
179 static	void	ftlfree(Ftl*);
180 static	void	getflash(Ftl*, void*, ulong, long);
181 static	int	mapblk(Ftl*, ulong, Terase**, ulong*);
182 static	Ftl*	mkftl(char*, ulong, ulong, int, char*);
183 static	void	putbam(Ftl*, Terase*, int, ulong);
184 static	void	putflash(Ftl*, ulong, void*, long);
185 static	int	scavenge(Ftl*);
186 
187 enum {
188 	Qdir,
189 	Qctl,
190 	Qdata,
191 };
192 
193 #define DATAQID(q) ((q) >= Qdata && (q) <= Qdata + NPART)
194 
195 static void
196 ftlpartcmd(Ftl *ftl, char **fields, int nfields)
197 {
198 	ulong start, end;
199 	char *p;
200 	int n, newn;
201 
202 	/* name start [end] */
203 	if(nfields < 2 || nfields > 3)
204 		error(Ebadarg);
205 	if(ftl->npart >= NPART)
206 		error("table full");
207 	if(strcmp(fields[0], "ctl") == 0 || strcmp(fields[0], "data") == 0)
208 		error(Ebadarg);
209 	newn = -1;
210 	for(n = 0; n < NPART; n++){
211 		if(ftl->part[n].name == nil){
212 			if(newn < 0)
213 				newn = n;
214 			continue;
215 		}
216 		if(strcmp(fields[0], ftl->part[n].name + 3) == 0)
217 			error(Ebadarg);
218 	}
219 	start = strtoul(fields[1], 0, 0);
220 	if(nfields > 2)
221 		end = strtoul(fields[2], 0, 0);
222 	else
223 		end = ftl->rwlimit * Bsize;
224 	if(start >= end || start % Bsize || end % Bsize)
225 		error(Ebadarg);
226 	ftl->part[newn].start = start;
227 	ftl->part[newn].size = end - start;
228 	ftl->part[newn].rwlimit = end / Bsize;
229 	free(ftl->part[newn].name);
230 	p = malloc(strlen(fields[0]) + 3 + 1);
231 	strcpy(p, "ftl");
232 	strcat(p, fields[0]);
233 	ftl->part[newn].name = p;
234 	ftl->npart++;
235 }
236 
237 static void
238 ftldelpartcmd(Ftl *ftl, char **fields, int nfields)
239 {
240 	int n;
241 	// name
242 	if(nfields != 1)
243 		error(Ebadarg);
244 	for(n = 0; n < NPART; n++)
245 		if(strcmp(fields[0], ftl->part[n].name + 3) == 0){
246 			free(ftl->part[n].name);
247 			ftl->part[n].name = nil;
248 			ftl->npart--;
249 			return;
250 		}
251 	error(Ebadarg);
252 }
253 
254 static int
255 ftlgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp)
256 {
257 	int n;
258 	switch(i){
259 	case DEVDOTDOT:
260 		devdir(c, (Qid){Qdir, 0, QTDIR}, "#X", 0, eve, 0555, dp);
261 		break;
262 	case 0:
263 		devdir(c, (Qid){Qctl, 0, QTFILE}, "ftlctl", 0, eve, 0660, dp);
264 		break;
265 	case 1:
266 		devdir(c, (Qid){Qdata, 0, QTFILE}, "ftldata", ftls ? ftls->rwlimit * Bsize : 0, eve, 0660, dp);
267 		break;
268 	default:
269 		if(ftls == nil)
270 			return -1;
271 		i -= 2;
272 		if(i >= ftls->npart)
273 			return -1;
274 		for(n = 0; n < NPART; n++)
275 			if(ftls->part[n].name != nil){
276 				if(i == 0)
277 					break;
278 				i--;
279 			}
280 		if(i != 0){
281 			print("wierd\n");
282 			return -1;
283 		}
284 		devdir(c, (Qid){Qdata + 1 + n, 0, QTFILE}, ftls->part[n].name, ftls->part[n].size, eve, 0660, dp);
285 	}
286 	return 1;
287 }
288 
289 static Ftl *
290 ftlget(void)
291 {
292 	Ftl *ftl;
293 
294 	lock(&ftllock);
295 	ftl = ftls;
296 	if(ftl != nil)
297 		incref(ftl);
298 	unlock(&ftllock);
299 	return ftl;
300 }
301 
302 static void
303 ftlput(Ftl *ftl)
304 {
305 	if(ftl != nil){
306 		lock(&ftllock);
307 		if(decref(ftl) == 0 && ftl->detach == Detached){
308 			ftls = nil;
309 			if(ftl->hasproc){	/* no lock needed: can't change if ftl->ref==0 */
310 				ftl->detach = Deferred;
311 				wakeup(&ftl->workr);
312 			}else
313 				ftlfree(ftl);
314 		}
315 		unlock(&ftllock);
316 	}
317 }
318 
319 static Chan *
320 ftlattach(char *spec)
321 {
322 	return devattach('X', spec);
323 }
324 
325 static Walkqid*
326 ftlwalk(Chan *c, Chan *nc, char **name, int nname)
327 {
328 	Walkqid *wq;
329 
330 	wq = devwalk(c, nc, name, nname, 0, 0, ftlgen);
331 	if(wq != nil && wq->clone != nil && wq->clone != c)
332 		if(DATAQID(wq->clone->qid.path))
333 			wq->clone->aux = ftlget();
334 	return wq;
335 }
336 
337 static int
338 ftlstat(Chan *c, uchar *dp, int n)
339 {
340 	return devstat(c, dp, n, 0, 0, ftlgen);
341 }
342 
343 static Chan*
344 ftlopen(Chan *c, int omode)
345 {
346 	Ftl *ftl;
347 	omode = openmode(omode);
348 	if(DATAQID(c->qid.path)){
349 		ftl = ftls;
350 		if(ftl == nil)
351 			error(Enodev);
352 		if(strcmp(up->env->user, eve)!=0)
353 			error(Eperm);
354 	}
355 	else if(c->qid.path == Qctl){
356 		if(strcmp(up->env->user, eve)!=0)
357 			error(Eperm);
358 	}
359 	c = devopen(c, omode, 0, 0, ftlgen);
360 	if(DATAQID(c->qid.path)){
361 		c->aux = ftlget();
362 		if(c->aux == nil)
363 			error(Enodev);
364 	}
365 	return c;
366 }
367 
368 static void
369 ftlclose(Chan *c)
370 {
371 	if(DATAQID(c->qid.path) && (c->flag&COPEN) != 0)
372 		ftlput((Ftl*)c->aux);
373 }
374 
375 static long
376 ftlread(Chan *c, void *buf, long n, vlong offset)
377 {
378 	Ftl *ftl;
379 	Terase *e;
380 	int nb;
381 	uchar *a;
382 	ulong pb;
383 	if(c->qid.type & QTDIR)
384 		return devdirread(c, buf, n, 0, 0, ftlgen);
385 
386 	if(DATAQID(c->qid.path)){
387 		ulong rwlimit;
388 
389 		if(n <= 0 || n%Bsize || offset%Bsize)
390 			error(Eio);
391 		ftl = c->aux;
392 		if(c->qid.path > Qdata){
393 			int p = c->qid.path - Qdata - 1;
394 			offset += ftl->part[p].start;
395 			rwlimit = ftl->part[p].rwlimit;
396 		}
397 		else
398 			rwlimit = ftl->rwlimit;
399 		nb = n/Bsize;
400 		offset /= Bsize;
401 		if(offset >= rwlimit)
402 			return 0;
403 		if(offset+nb > rwlimit)
404 			nb = rwlimit - offset;
405 		a = buf;
406 		for(n = 0; n < nb; n++){
407 			qlock(ftl);
408 			if(waserror()){
409 				qunlock(ftl);
410 				nexterror();
411 			}
412 			if(mapblk(ftl, offset+n, &e, &pb))
413 				getflash(ftl, a, e->offset + pb*Bsize, Bsize);
414 			else
415 				memset(a, 0, Bsize);
416 			poperror();
417 			qunlock(ftl);
418 			a += Bsize;
419 		}
420 		return a-(uchar*)buf;
421 	}
422 
423 	if(c->qid.path != Qctl)
424 		error(Egreg);
425 
426 	return 0;
427 }
428 
429 static long
430 ftlwrite(Chan *c, void *buf, long n, vlong offset)
431 {
432 	char cmd[64], *fields[6];
433 	int ns, i, k, nb;
434 	uchar *a;
435 	Terase *e, *oe;
436 	ulong ob, v, base, size, segsize;
437 	Ftl *ftl;
438 
439 	if(n <= 0)
440 		return 0;
441 
442 	if(DATAQID(c->qid.path)){
443 		ulong rwlimit;
444 		ftl = c->aux;
445 		if(n <= 0 || n%Bsize || offset%Bsize)
446 			error(Eio);
447 		if(c->qid.path > Qdata){
448 			int p = c->qid.path - Qdata - 1;
449 			offset += ftl->part[p].start;
450 			rwlimit = ftl->part[p].rwlimit;
451 		}
452 		else
453 			rwlimit = ftl->rwlimit;
454 		nb = n/Bsize;
455 		offset /= Bsize;
456 		if(offset >= rwlimit)
457 			return 0;
458 		if(offset+nb > rwlimit)
459 			nb = rwlimit - offset;
460 		a = buf;
461 		for(n = 0; n < nb; n++){
462 			ns = 0;
463 			while((v = allocblk(ftl)) == 0)
464 				if(!scavenge(ftl) || ++ns > 3){
465 					static int stop;
466 
467 					if(stop < 3){
468 						stop++;
469 						print("ftl: flash memory full\n");
470 					}
471 					error("flash memory full");
472 				}
473 			qlock(ftl);
474 			if(waserror()){
475 				qunlock(ftl);
476 				nexterror();
477 			}
478 			if(!mapblk(ftl, offset+n, &oe, &ob))
479 				oe = nil;
480 			e = ftl->unit[v>>16];
481 			v &= 0xffff;
482 			putflash(ftl, e->offset + v*Bsize, a, Bsize);
483 			putbam(ftl, e, v, MKBAM(offset+n, DataBlock));
484 			/* both old and new block references exist in this window (can't be closed?) */
485 			ftl->vbm[offset+n] = (e->x<<16) | v;
486 			if(oe != nil){
487 				putbam(ftl, oe, ob, Bdeleted);
488 				oe->ndead++;
489 			}
490 			poperror();
491 			qunlock(ftl);
492 			a += Bsize;
493 		}
494 		return a-(uchar*)buf;
495 	}
496 	else if(c->qid.path == Qctl){
497 		if(n > sizeof(cmd)-1)
498 			n = sizeof(cmd)-1;
499 		memmove(cmd, buf, n);
500 		cmd[n] = 0;
501 		i = getfields(cmd, fields, 6, 1, " \t\n");
502 		if(i <= 0)
503 			error(Ebadarg);
504 		if(i >= 2 && (strcmp(fields[0], "init") == 0 || strcmp(fields[0], "format") == 0)){
505 			if(i > 2)
506 				base = strtoul(fields[2], nil, 0);
507 			else
508 				base = 0;	/* TO DO: hunt for signature */
509 			if(i > 3)
510 				size = strtoul(fields[3], nil, 0);
511 			else
512 				size = Nolimit;
513 			if(i > 4)
514 				segsize = strtoul(fields[4], nil, 0);
515 			else
516 				segsize = 0;
517 			/* segsize must be power of two and size and base must be multiples of it
518 			 * if segsize is zero, then use whatever the device returns
519 			 */
520 			if(segsize != 0 && (segsize > size
521 				|| segsize&(segsize-1)
522 				|| (base != Nolimit && base&(segsize-1))
523 				|| size == 0
524 				|| (size != Nolimit && size&(segsize-1))))
525 				error(Ebadarg);
526 			if(segsize == 0)
527 				k = 0;
528 			else {
529 				for(k=0; k<32 && (1<<k) != segsize; k++)
530 					;
531 			}
532 			if(ftls != nil)
533 				error(Einuse);
534 			ftls = mkftl(fields[1], base, size, k, fields[0]);
535 		}else if(strcmp(fields[0], "scavenge") == 0){
536 			if(ftls != nil)
537 				print("scavenge %d\n", scavenge(ftls));
538 		}else if(strcmp(fields[0], "trace") == 0){
539 			if(ftls != nil)
540 				ftls->trace = i>1? strtol(fields[1], nil, 0): 1;
541 		}else if(strcmp(fields[0], "detach") == 0){
542 			if((ftl = ftlget()) != nil){
543 				if(ftl->ref > 1){
544 					ftlput(ftl);
545 					error(Einuse);
546 				}
547 				ftl->detach = Detached;
548 				ftlput(ftl);
549 			}else
550 				error(Enodev);
551 		}else if(strcmp(fields[0], "part")==0){
552 			if((ftl = ftlget()) != nil){
553 				if(ftl->ref > 1){
554 					ftlput(ftl);
555 					error(Einuse);
556 				}
557 				if(waserror()){
558 					ftlput(ftl);
559 					nexterror();
560 				}
561 				ftlpartcmd(ftl, fields + 1, i - 1);
562 				poperror();
563 				ftlput(ftl);
564 			}else
565 				error(Enodev);
566 		}else if(strcmp(fields[0], "delpart")==0){
567 			if((ftl = ftlget()) != nil){
568 				if(ftl->ref > 1){
569 					ftlput(ftl);
570 					error(Einuse);
571 				}
572 				if(waserror()){
573 					ftlput(ftl);
574 					nexterror();
575 				}
576 				ftldelpartcmd(ftls, fields + 1, i - 1);
577 				poperror();
578 				ftlput(ftl);
579 			}else
580 				error(Enodev);
581 		}else if(i >= 2 && strcmp(fields[0], "pct")==0){
582 			v = strtoul(fields[1], nil, 0);
583 			if(v >= 50)
584 				ftlpct = v;
585 		}else
586 			error(Ebadarg);
587 		return n;
588 	}
589 	error(Egreg);
590 	return 0;		/* not reached */
591 }
592 
593 static Chan *
594 ftlkopen(char *name, char *suffix, int mode)
595 {
596 	Chan *c;
597 	char *fn;
598 	int fd;
599 
600 	if(suffix != nil && *suffix){
601 		fn = smalloc(strlen(name)+strlen(suffix)+1);
602 		if(fn == nil)
603 			return nil;
604 		strcpy(fn, name);
605 		strcat(fn, suffix);
606 		fd = kopen(fn, mode);
607 		free(fn);
608 	}else
609 		fd = kopen(name, mode);
610 	if(fd < 0)
611 		return nil;
612 	c = fdtochan(up->env->fgrp, fd, mode, 0, 1);
613 	kclose(fd);
614 	return c;
615 }
616 
617 static ulong
618 ftlfsize(Chan *c)
619 {
620 	uchar dbuf[STATFIXLEN+32*4];
621 	Dir d;
622 	int n;
623 
624 	n = devtab[c->type]->stat(c, dbuf, sizeof(dbuf));
625 	if(convM2D(dbuf, n, &d, nil) == 0)
626 		return 0;
627 	return d.length;
628 }
629 
630 static Ftl *
631 mkftl(char *fname, ulong base, ulong size, int eshift, char *op)
632 {
633 	int i, j, nov, segblocks, n, badseg, old, valid;
634 	ulong limit;
635 	Terase *e;
636 	Ftl *ftl;
637 	char buf[64], *fields[8];
638 	ulong ea;
639 	Chan *statfd;
640 
641 	ftl = malloc(sizeof(*ftl));
642 	if(ftl == nil)
643 		error(Enomem);
644 	e = nil;
645 	if(waserror()){
646 		ftlfree(ftl);
647 		if(e)
648 			free(e);
649 		nexterror();
650 	}
651 	ftl->lastx = 0;
652 	ftl->trace = 0;
653 	ftl->flash = ftlkopen(fname, "", ORDWR);
654 	if(ftl->flash == nil)
655 		error(up->env->errstr);
656 	ftl->flashctl = ftlkopen(fname, "ctl", ORDWR);
657 	if(ftl->flashctl == nil)
658 		error(up->env->errstr);
659 	old = 1;
660 	statfd = ftlkopen(fname, "stat", OREAD);	/* old scheme */
661 	if(statfd == nil){
662 		statfd = ftl->flashctl;	/* new just uses ctl */
663 		old = 0;
664 	}
665 	statfd->offset = 0;
666 	if((n = kchanio(statfd, buf, sizeof(buf), OREAD)) <= 0){
667 		print("ftl: read stat/ctl failed: %s\n", up->env->errstr);
668 		error(up->env->errstr);
669 	}
670 	if(n >= sizeof(buf))
671 		n = sizeof(buf)-1;
672 	buf[n] = 0;
673 	if(statfd != ftl->flashctl)
674 		cclose(statfd);
675 
676 	n = getfields(buf, fields, nelem(fields), 1, " \t\n");
677 	ea = 0;
678 	if(old){
679 		if(n >= 4)
680 			if((ea = strtoul(fields[3], nil, 16)) < 8*1024)
681 				ea = 0;	/* assume the format is wrong */
682 	}else{
683 		if(n >= 7)
684 			if((ea = strtoul(fields[6], nil, 0)) < 2*1024)
685 				ea = 0;	/* assume the format is wrong */
686 	}
687 	if(ea != 0){
688 		for(i=0; i < 32 && (1<<i) != ea; i++)
689 			;
690 		if(eshift && i != eshift)
691 			print("ftl: overriding erasesize %d with %d\n", 1 << eshift, 1 << i);
692 		eshift = i;
693 		if(FTLDEBUG)
694 			print("ftl: e=%lud eshift=%d\n", ea, i);
695 	}
696 	if(eshift == 0)
697 		error("no erasesize");
698 
699 	limit = ftlfsize(ftl->flash);
700 	if(limit == 0)
701 		error("no space for flash translation");
702 	ftl->segsize = 1 << eshift;
703 	if(base == Nolimit){
704 		ushort pstart, nunits;
705 		erasedetect(ftl, 0, limit, &pstart, &nunits);
706 		base = pstart * ftl->segsize;
707 		size = nunits * ftl->segsize;
708 		print("ftl: partition in %s at 0x%.8lux, length 0x%.8lux\n", fname, base, size);
709 	} else if(size == Nolimit)
710 		size = limit-base;
711 	if(base >= limit || size > limit || base+size > limit || eshift < 8 || (1<<eshift) > size){
712 		print("ftl: bad: base=%#lux limit=%#lux size=%ld eshift=%d\n", base, limit, size, eshift);
713 		error("bad flash space parameters");
714 	}
715 	if(FTLDEBUG)
716 		print("%s flash %s #%lux:#%lux limit #%lux\n", op, fname, base, size, limit);
717 	ftl->base = base;
718 	ftl->size = size;
719 	ftl->bshift = Bshift;
720 	ftl->bsize = Bsize;
721 	ftl->eshift = eshift;
722 	ftl->nunit = size>>eshift;
723 	nov = ((ftl->segsize/Bsize)*4 + BAMoffset + Bsize - 1)/Bsize;	/* number of overhead blocks per segment (header, and BAM itself) */
724 	ftl->fstart = nov;
725 	segblocks = ftl->segsize/Bsize - nov;
726 	ftl->nblock = ftl->nunit*segblocks;
727 	if(ftl->nblock > 0x10000){
728 		/* oops - too many blocks */
729 		ftl->nunit = 0x10000 / segblocks;
730 		ftl->nblock = ftl->nunit * segblocks;
731 		size = ftl->nunit * ftl->segsize;
732 		ftl->size = size;
733 		print("ftl: too many blocks - limiting to %ld bytes %d units %lud blocks\n",
734 		    size, ftl->nunit, ftl->nblock);
735 	}
736 	ftl->vbm = malloc(ftl->nblock*sizeof(*ftl->vbm));
737 	ftl->unit = malloc(ftl->nunit*sizeof(*ftl->unit));
738 	if(ftl->vbm == nil || ftl->unit == nil)
739 		error(Enomem);
740 	if(strcmp(op, "format") == 0){
741 		for(i=0; i<ftl->nunit-1; i++)
742 			eraseinit(ftl, i*ftl->segsize, i, 1);
743 		eraseinit(ftl, i*ftl->segsize, XferID, 1);
744 	}
745 	badseg = -1;
746 	ftl->xfer = -1;
747 	valid = 0;
748 	for(i=0; i<ftl->nunit; i++){
749 		e = eraseload(ftl, i, i*ftl->segsize);
750 		if(e == nil){
751 			print("ftl: logical segment %d: bad format\n", i);
752 			if(badseg == -1)
753 				badseg = i;
754 			else
755 				badseg = -2;
756 			continue;
757 		}
758 		if(e->id == XferBusy){
759 			e->nerase++;
760 			eraseinit(ftl, e->offset, XferID, e->nerase);
761 			e->id = XferID;
762 		}
763 		for(j=0; j<ftl->nunit; j++)
764 			if(ftl->unit[j] != nil && ftl->unit[j]->id == e->id){
765 				print("ftl: duplicate erase unit #%x\n", e->id);
766 				erasefree(e);
767 				e = nil;
768 				break;
769 			}
770 		if(e){
771 			valid++;
772 			ftl->unit[e->x] = e;
773 			if(e->id == XferID)
774 				ftl->xfer = e->x;
775 			if(FTLDEBUG)
776 				print("ftl: unit %d:#%x used %lud free %lud dead %lud bad %lud nerase %lud\n",
777 					e->x, e->id, e->nused, e->nfree, e->ndead, e->nbad, e->nerase);
778 			e = nil;
779 			USED(e);
780 		}
781 	}
782 	if(badseg >= 0){
783 		if(ftl->xfer >= 0)
784 			error("invalid ftl format");
785 		i = badseg;
786 		eraseinit(ftl, i*ftl->segsize, XferID, 1);
787 		e = eraseload(ftl, i, i*ftl->segsize);
788 		if(e == nil)
789 			error("bad ftl format");
790 		ftl->unit[e->x] = e;
791 		ftl->xfer = e->x;
792 		print("ftl: recovered transfer unit %d\n", e->x);
793 		valid++;
794 		e = nil;
795 		USED(e);
796 	}
797 	if(ftl->xfer < 0 && valid <= 0 || ftl->xfer >= 0 && valid <= 1)
798 		error("no valid flash data units");
799 	if(ftl->xfer < 0)
800 		error("ftl: no transfer unit: device is WORM\n");
801 	else
802 		ftl->nblock -= segblocks;	/* discount transfer segment */
803 	if(ftl->nblock >= 1000)
804 		ftl->rwlimit = ftl->nblock-100;	/* TO DO: variable reserve */
805 	else
806 		ftl->rwlimit = ftl->nblock*ftlpct/100;
807 	poperror();
808 	return ftl;
809 }
810 
811 static void
812 ftlfree(Ftl *ftl)
813 {
814 	int i, n;
815 
816 	if(ftl != nil){
817 		if(ftl->flashctl != nil)
818 			cclose(ftl->flashctl);
819 		if(ftl->flash != nil)
820 			cclose(ftl->flash);
821 
822 		if(ftl->unit){
823 			for(i = 0; i < ftl->nunit; i++)
824 				erasefree(ftl->unit[i]);
825 			free(ftl->unit);
826 		}
827 		free(ftl->vbm);
828 		for(n = 0; n < NPART; n++)
829 			free(ftl->part[n].name);
830 		free(ftl);
831 	}
832 }
833 
834 /*
835  * this simple greedy algorithm weighted by nerase does seem to lead
836  * to even wear of erase units (cf. the eNVy file system)
837  */
838 static Terase *
839 bestcopy(Ftl *ftl)
840 {
841 	Terase *e, *be;
842 	int i;
843 
844 	be = nil;
845 	for(i=0; i<ftl->nunit; i++)
846 		if((e = ftl->unit[i]) != nil && e->id != XferID && e->id != XferBusy && e->ndead+e->nbad &&
847 		    (be == nil || e->nerase <= be->nerase && e->ndead >= be->ndead))
848 			be = e;
849 	return be;
850 }
851 
852 static int
853 copyunit(Ftl *ftl, Terase *from, Terase *to)
854 {
855 	int i, nb;
856 	uchar id[2];
857 	ulong *bam;
858 	uchar *buf;
859 	ulong v, bno;
860 
861 	if(FTLDEBUG || ftl->trace)
862 		print("ftl: copying %d (#%lux) to #%lux\n", from->id, from->offset, to->offset);
863 	to->nbam = 0;
864 	free(to->bam);
865 	to->bam = nil;
866 	bam = nil;
867 	buf = malloc(Bsize);
868 	if(buf == nil)
869 		return 0;
870 	if(waserror()){
871 		free(buf);
872 		free(bam);
873 		return 0;
874 	}
875 	PUT2(id, XferBusy);
876 	putflash(ftl, to->offset+offsetof(Merase,id[0]), id, 2);
877 	/* make new BAM */
878 	nb = from->nbam*sizeof(*to->bam);
879 	bam = malloc(nb);
880 	if(bam == nil)
881 		error(Enomem);
882 	memmove(bam, from->bam, nb);
883 	to->nused = 0;
884 	to->nbad = 0;
885 	to->nfree = 0;
886 	to->ndead = 0;
887 	for(i = 0; i < from->nbam; i++)
888 		switch(bam[i]){
889 		case Bwriting:
890 		case Bdeleted:
891 		case Bfree:
892 			bam[i] = Bfree;
893 			to->nfree++;
894 			break;
895 		default:
896 			switch(bam[i]&BlockType){
897 			default:
898 			case BadBlock:	/* it isn't necessarily bad in this unit */
899 				to->nfree++;
900 				bam[i] = Bfree;
901 				break;
902 			case DataBlock:
903 			case ReplacePage:
904 				v = bam[i];
905 				bno = BNO(v & ~BlockType);
906 				if(i < ftl->fstart || bno >= ftl->nblock){
907 					print("ftl: unit %d:#%x bad bam[%d]=#%lux\n", from->x, from->id, i, v);
908 					to->nfree++;
909 					bam[i] = Bfree;
910 					break;
911 				}
912 				getflash(ftl, buf, from->offset+i*Bsize, Bsize);
913 				putflash(ftl, to->offset+i*Bsize, buf, Bsize);
914 				to->nused++;
915 				break;
916 			case ControlBlock:
917 				to->nused++;
918 				break;
919 			}
920 		}
921 	for(i=0; i<from->nbam; i++){
922 		uchar *p = (uchar*)&bam[i];
923 		v = bam[i];
924 		if(v != Bfree && ftl->trace > 1)
925 			print("to[%d]=#%lux\n", i, v);
926 		PUT4(p, v);
927 	}
928 	putflash(ftl, to->bamoffset, bam, nb);	/* BUG: PUT4 */
929 	for(i=0; i<from->nbam; i++){
930 		uchar *p = (uchar*)&bam[i];
931 		v = bam[i];
932 		PUT4(p, v);
933 	}
934 	to->id = from->id;
935 	PUT2(id, to->id);
936 	putflash(ftl, to->offset+offsetof(Merase,id[0]), id, 2);
937 	to->nbam = from->nbam;
938 	to->bam = bam;
939 	ftl->nfree += to->nfree - from->nfree;
940 	poperror();
941 	free(buf);
942 	return 1;
943 }
944 
945 static int
946 mustscavenge(void *a)
947 {
948 	return ((Ftl*)a)->needspace || ((Ftl*)a)->detach == Deferred;
949 }
950 
951 static int
952 donescavenge(void *a)
953 {
954 	return ((Ftl*)a)->needspace == 0;
955 }
956 
957 static void
958 scavengeproc(void *arg)
959 {
960 	Ftl *ftl;
961 	int i;
962 	Terase *e, *ne;
963 
964 	ftl = arg;
965 	if(waserror()){
966 		print("ftl: kproc noted\n");
967 		pexit("ftldeath", 0);
968 	}
969 	for(;;){
970 		sleep(&ftl->workr, mustscavenge, ftl);
971 		if(ftl->detach == Deferred){
972 			ftlfree(ftl);
973 			pexit("", 0);
974 		}
975 		if(FTLDEBUG || ftl->trace)
976 			print("ftl: scavenge %ld\n", ftl->nfree);
977 		qlock(ftl);
978 		if(waserror()){
979 			qunlock(ftl);
980 			nexterror();
981 		}
982 		e = bestcopy(ftl);
983 		if(e == nil || ftl->xfer < 0 || (ne = ftl->unit[ftl->xfer]) == nil || ne->id != XferID || e == ne)
984 			goto Fail;
985 		if(copyunit(ftl, e, ne)){
986 			i = ne->x; ne->x = e->x; e->x = i;
987 			ftl->unit[ne->x] = ne;
988 			ftl->unit[e->x] = e;
989 			ftl->xfer = e->x;
990 			e->id = XferID;
991 			e->nbam = 0;
992 			free(e->bam);
993 			e->bam = nil;
994 			e->bamx = 0;
995 			e->nerase++;
996 			eraseinit(ftl, e->offset, XferID, e->nerase);
997 		}
998 	Fail:
999 		if(FTLDEBUG || ftl->trace)
1000 			print("ftl: end scavenge %ld\n", ftl->nfree);
1001 		ftl->needspace = 0;
1002 		wakeup(&ftl->wantr);
1003 		poperror();
1004 		qunlock(ftl);
1005 	}
1006 }
1007 
1008 static int
1009 scavenge(Ftl *ftl)
1010 {
1011 	if(ftl->xfer < 0 || bestcopy(ftl) == nil)
1012 		return 0;	/* you worm! */
1013 
1014 	qlock(ftl);
1015 	if(waserror()){
1016 		qunlock(ftl);
1017 		return 0;
1018 	}
1019 	if(!ftl->hasproc){
1020 		ftl->hasproc = 1;
1021 		kproc("ftl.scavenge", scavengeproc, ftl, 0);
1022 	}
1023 	ftl->needspace = 1;
1024 	wakeup(&ftl->workr);
1025 	poperror();
1026 	qunlock(ftl);
1027 
1028 	qlock(&ftl->wantq);
1029 	if(waserror()){
1030 		qunlock(&ftl->wantq);
1031 		nexterror();
1032 	}
1033 	while(ftl->needspace)
1034 		sleep(&ftl->wantr, donescavenge, ftl);
1035 	poperror();
1036 	qunlock(&ftl->wantq);
1037 
1038 	return ftl->nfree;
1039 }
1040 
1041 static void
1042 putbam(Ftl *ftl, Terase *e, int n, ulong entry)
1043 {
1044 	uchar b[4];
1045 
1046 	e->bam[n] = entry;
1047 	PUT4(b, entry);
1048 	putflash(ftl, e->bamoffset + n*4, b, 4);
1049 }
1050 
1051 static ulong
1052 allocblk(Ftl *ftl)
1053 {
1054 	Terase *e;
1055 	int i, j;
1056 
1057 	qlock(ftl);
1058 	i = ftl->lastx;
1059 	do{
1060 		e = ftl->unit[i];
1061 		if(e != nil && e->id != XferID && e->nfree){
1062 			ftl->lastx = i;
1063 			for(j=e->bamx; j<e->nbam; j++)
1064 				if(e->bam[j] == Bfree){
1065 					putbam(ftl, e, j, Bwriting);
1066 					ftl->nfree--;
1067 					e->nfree--;
1068 					e->bamx = j+1;
1069 					qunlock(ftl);
1070 					return (e->x<<16) | j;
1071 				}
1072 			e->nfree = 0;
1073 			qunlock(ftl);
1074 			print("ftl: unit %d:#%x nfree %ld but not free in BAM\n", e->x, e->id, e->nfree);
1075 			qlock(ftl);
1076 		}
1077 		if(++i >= ftl->nunit)
1078 			i = 0;
1079 	}while(i != ftl->lastx);
1080 	qunlock(ftl);
1081 	return 0;
1082 }
1083 
1084 static int
1085 mapblk(Ftl *ftl, ulong bno, Terase **ep, ulong *bp)
1086 {
1087 	ulong v;
1088 	int x;
1089 
1090 	if(bno < ftl->nblock){
1091 		v = ftl->vbm[bno];
1092 		if(v == 0 || v == ~0)
1093 			return 0;
1094 		x = v>>16;
1095 		if(x >= ftl->nunit || x == ftl->xfer || ftl->unit[x] == nil){
1096 			print("ftl: corrupt format: bad block mapping %lud -> unit #%x\n", bno, x);
1097 			return 0;
1098 		}
1099 		*ep = ftl->unit[x];
1100 		*bp = v & 0xFFFF;
1101 		return 1;
1102 	}
1103 	return 0;
1104 }
1105 
1106 static void
1107 eraseinit(Ftl *ftl, ulong offset, int id, int nerase)
1108 {
1109 	union {
1110 		Merase;
1111 		uchar	block[ERASEHDRLEN];
1112 	} *m;
1113 	uchar *bam, *p;
1114 	int i, nov;
1115 
1116 	nov = ((ftl->segsize/Bsize)*4 + BAMoffset + Bsize - 1)/Bsize;	/* number of overhead blocks (header, and BAM itself) */
1117 	if(nov*Bsize >= ftl->segsize)
1118 		error("ftl -- too small for files");
1119 	eraseflash(ftl, offset);
1120 	m = malloc(sizeof(*m));
1121 	if(m == nil)
1122 		error(Enomem);
1123 	memset(m, 0xFF, sizeof(*m));
1124 	m->linktuple[0] = 0x13;
1125 	m->linktuple[1] = 0x3;
1126 	memmove(m->linktuple+2, "CIS", 3);
1127 	m->orgtuple[0] = 0x46;
1128 	m->orgtuple[1] = 0x57;
1129 	m->orgtuple[2] = 0x00;
1130 	memmove(m->orgtuple+3, "FTL100", 7);
1131 	m->nxfer = 1;
1132 	PUT4(m->nerase, nerase);
1133 	PUT2(m->id, id);
1134 	m->bshift = ftl->bshift;
1135 	m->eshift = ftl->eshift;
1136 	PUT2(m->pstart, ftl->base >> ftl->eshift);
1137 	PUT2(m->nunits, ftl->nunit);
1138 	PUT4(m->psize, ftl->size - nov*Bsize*ftl->nunit);
1139 	PUT4(m->vbmbase, 0xffffffff);	/* we always calculate the VBM */
1140 	PUT2(m->nvbm, 0);
1141 	m->flags = 0;
1142 	m->code = 0xFF;
1143 	memmove(m->serial, "Inf1", 4);
1144 	PUT4(m->altoffset, 0);
1145 	PUT4(m->bamoffset, BAMoffset);
1146 	putflash(ftl, offset, m, ERASEHDRLEN);
1147 	free(m);
1148 	if(id == XferID)
1149 		return;
1150 	nov *= 4;	/* now bytes of BAM */
1151 	bam = malloc(nov);
1152 	if(bam == nil)
1153 		error(Enomem);
1154 	for(i=0; i<nov; i += 4){
1155 		p = bam+i;
1156 		PUT4(p, ControlBlock);	/* reserve them */
1157 	}
1158 	putflash(ftl, offset+BAMoffset, bam, nov);
1159 	free(bam);
1160 }
1161 
1162 static int
1163 erasedetect(Ftl *ftl, ulong base, ulong size, ushort *pstart, ushort *nunits)
1164 {
1165 	ulong o;
1166 	int rv;
1167 
1168 	union {
1169 		Merase;
1170 		uchar	block[ERASEHDRLEN];
1171 	} *m;
1172 	m = malloc(sizeof(*m));
1173 	if(m == nil)
1174 		error(Enomem);
1175 	rv  = 0;
1176 	for(o = base; o < base + size; o += ftl->segsize){
1177 		if(waserror())
1178 			continue;
1179 		getflash(ftl, m, o, ERASEHDRLEN);
1180 		poperror();
1181 		if(memcmp(m->orgtuple + 3, "FTL100", 7) == 0
1182 		    && memcmp(m->serial, "Inf1", 4) == 0){
1183 			*pstart = GET2(m->pstart);
1184 			*nunits = GET2(m->nunits);
1185 			rv = 1;
1186 			break;
1187 		}
1188 	}
1189 	free(m);
1190 	return rv;
1191 }
1192 
1193 static Terase *
1194 eraseload(Ftl *ftl, int x, ulong offset)
1195 {
1196 	union {
1197 		Merase;
1198 		uchar	block[ERASEHDRLEN];
1199 	} *m;
1200 	Terase *e;
1201 	uchar *p;
1202 	int i, nbam;
1203 	ulong bno, v;
1204 
1205 	m = malloc(sizeof(*m));
1206 	if(m == nil)
1207 		error(Enomem);
1208 	if(waserror()){
1209 		free(m);
1210 		return nil;
1211 	}
1212 	getflash(ftl, m, offset, ERASEHDRLEN);
1213 	poperror();
1214 	if(memcmp(m->orgtuple+3, "FTL100", 7) != 0 ||
1215 	   memcmp(m->serial, "Inf1", 4) != 0){
1216 		free(m);
1217 print("%8.8lux: bad sig\n", offset);
1218 		return nil;
1219 	}
1220 	e = malloc(sizeof(*e));
1221 	if(e == nil){
1222 		free(m);
1223 		error(Enomem);
1224 	}
1225 	e->x = x;
1226 	e->id = GET2(m->id);
1227 	e->offset = offset;
1228 	e->bamoffset = GET4(m->bamoffset);
1229 	e->nerase = GET4(m->nerase);
1230 	free(m);
1231 	m = nil;
1232 	USED(m);
1233 	if(e->bamoffset != BAMoffset){
1234 		free(e);
1235 print("%8.8lux: bad bamoffset %8.8lux\n", offset, e->bamoffset);
1236 		return nil;
1237 	}
1238 	e->bamoffset += offset;
1239 	if(e->id == XferID || e->id == XferBusy){
1240 		e->bam = nil;
1241 		e->nbam = 0;
1242 		return e;
1243 	}
1244 	nbam = ftl->segsize/Bsize;
1245 	e->bam = malloc(nbam*sizeof(*e->bam));
1246 	e->nbam = nbam;
1247 	if(waserror()){
1248 		free(e);
1249 		nexterror();
1250 	}
1251 	getflash(ftl, e->bam, e->bamoffset, nbam*4);
1252 	poperror();
1253 	/* scan BAM to build VBM */
1254 	e->bamx = 0;
1255 	for(i=0; i<nbam; i++){
1256 		p = (uchar*)&e->bam[i];
1257 		e->bam[i] = v = GET4(p);
1258 		if(v == Bwriting || v == Bdeleted)
1259 			e->ndead++;
1260 		else if(v == Bfree){
1261 			if(e->bamx == 0)
1262 				e->bamx = i;
1263 			e->nfree++;
1264 			ftl->nfree++;
1265 		}else{
1266 			switch(v & BlockType){
1267 			case ControlBlock:
1268 				break;
1269 			case DataBlock:
1270 				/* add to VBM */
1271 				if(v & (1<<31))
1272 					break;	/* negative => VBM page, ignored */
1273 				bno = BNO(v & ~BlockType);
1274 				if(i < ftl->fstart || bno >= ftl->nblock){
1275 					print("ftl: unit %d:#%x bad bam[%d]=#%lux\n", e->x, e->id, i, v);
1276 					e->nbad++;
1277 					break;
1278 				}
1279 				ftl->vbm[bno] = (e->x<<16) | i;
1280 				e->nused++;
1281 				break;
1282 			case ReplacePage:
1283 				/* replacement VBM page; ignored */
1284 				break;
1285 			default:
1286 				print("ftl: unit %d:#%x bad bam[%d]=%lux\n", e->x, e->id, i, v);
1287 			case BadBlock:
1288 				e->nbad++;
1289 				break;
1290 			}
1291 		}
1292 	}
1293 	return e;
1294 }
1295 
1296 static void
1297 erasefree(Terase *e)
1298 {
1299 	if(e){
1300 		free(e->bam);
1301 		free(e);
1302 	}
1303 }
1304 
1305 static void
1306 eraseflash(Ftl *ftl, ulong offset)
1307 {
1308 	char cmd[40];
1309 
1310 	offset += ftl->base;
1311 	if(FTLDEBUG || ftl->trace)
1312 		print("ftl: erase seg @#%lux\n", offset);
1313 	snprint(cmd, sizeof(cmd), "erase 0x%8.8lux", offset);
1314 	if(kchanio(ftl->flashctl, cmd, strlen(cmd), OWRITE) <= 0){
1315 		print("ftl: erase failed: %s\n", up->env->errstr);
1316 		error(up->env->errstr);
1317 	}
1318 }
1319 
1320 static void
1321 putflash(Ftl *ftl, ulong offset, void *buf, long n)
1322 {
1323 	offset += ftl->base;
1324 	if(ftl->trace)
1325 		print("ftl: write(#%lux, %ld)\n", offset, n);
1326 	ftl->flash->offset = offset;
1327 	if(kchanio(ftl->flash, buf, n, OWRITE) != n){
1328 		print("ftl: flash write error: %s\n", up->env->errstr);
1329 		error(up->env->errstr);
1330 	}
1331 }
1332 
1333 static void
1334 getflash(Ftl *ftl, void *buf, ulong offset, long n)
1335 {
1336 	offset += ftl->base;
1337 	if(ftl->trace)
1338 		print("ftl: read(#%lux, %ld)\n", offset, n);
1339 	ftl->flash->offset = offset;
1340 	if(kchanio(ftl->flash, buf, n, OREAD) != n){
1341 		print("ftl: flash read error %s\n", up->env->errstr);
1342 		error(up->env->errstr);
1343 	}
1344 }
1345 
1346 Dev ftldevtab = {
1347 	'X',	/* TO DO */
1348 	"ftl",
1349 
1350 	devreset,
1351 	devinit,
1352 	devshutdown,
1353 	ftlattach,
1354 	ftlwalk,
1355 	ftlstat,
1356 	ftlopen,
1357 	devcreate,
1358 	ftlclose,
1359 	ftlread,
1360 	devbread,
1361 	ftlwrite,
1362 	devbwrite,
1363 	devremove,
1364 	devwstat,
1365 };
1366