xref: /inferno-os/liblogfs/write.c (revision a6b3238f419e5f5ccc08dd86121c357264e54987)
1 #include "logfsos.h"
2 #include "logfs.h"
3 #include "local.h"
4 
5 typedef struct AllocState AllocState;
6 struct AllocState {
7 	long oldblock;
8 	int markbad;
9 };
10 
11 Pageset
12 logfsdatapagemask(int pages, int base)
13 {
14 	if(pages == BITSPERSET)
15 		return ~(Pageset)0;
16 	return (((Pageset)1 << pages) - 1) << (BITSPERSET - base - pages);
17 }
18 
19 static Pageset
20 fastgap(Pageset w, uint n)
21 {
22 	Pageset s;
23 //print("fastgap(0x%.8ux, %d)\n", w, n);
24 	if(w == 0 || n < 1 || n > BITSPERSET)
25 		return 0;
26 /*
27 #	unroll the following loop 5 times:
28 #		while(n > 1){
29 #			s := n >> 1;
30 #			w &= w<<s;
31 #			n -= s;
32 #		}
33 */
34 	s = n >> 1;
35 	w &= w << s;
36 	n -= s;
37 	s = n >> 1;
38 	w &= w << s;
39 	n -= s;
40 	s = n >> 1;
41 	w &= w << s;
42 	n -= s;
43 	s = n >> 1;
44 	w &= w << s;
45 	n -= s;
46 	s = n >> 1;
47 	if(BITSPERSET == 64){	/* extra time if 64 bits */
48 		w &= w << s;
49 		n -= s;
50 		s = n >> 1;
51 	}
52 	return w & (w << s);
53 }
54 
55 static int
56 nlz(Pageset x)
57 {
58 	int n, c;
59 
60 	if(x == 0)
61 		return BITSPERSET;
62 	if(x & PAGETOP)
63 		return 0;
64 	n = BITSPERSET;
65 	c = BITSPERSET/2;
66 	do {
67 		Pageset y;
68 		y = x >> c;
69 		if(y != 0) {
70 			n -= c;
71 			x = y;
72 		}
73 	} while((c >>= 1) != 0);
74 	return n - x;
75 }
76 
77 static Pageset
78 findgap(Pageset w, uint n)
79 {
80 	Pageset m;
81 
82 	do {
83 		m  = fastgap(w, n);
84 		if(m)
85 			break;
86 		n--;
87 	} while(n);
88 	if(n == 0)
89 		return 0;
90 	return logfsdatapagemask(n, nlz(m));
91 }
92 
93 static int
94 bitcount(Pageset mask)
95 {
96 	Pageset m;
97 	int rv;
98 
99 	rv = 0;
100 	for(m = PAGETOP; m != 0; m >>= 1)
101 		if(mask & m)
102 			rv++;
103 	return rv;
104 }
105 
106 static char *
107 allocdatapages(LogfsServer *server, u32int count, int *countp, long *blockindexp, int *pagep, u32int *flashaddr, AllocState *state)
108 {
109 	LogfsLowLevel *ll = server->ll;
110 	long b, blockindex;
111 	DataBlock *db;
112 	int pagebase;
113 	u32int pages = (count + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
114 	u32int gapmask;
115 	long bestfreeblockindex;
116 	int bestfree;
117 	int pagesperblock = 1 << ll->l2pagesperblock;
118 	int apages;
119 	char *errmsg;
120 	int didsomething;
121 
122 	state->oldblock = -1;
123 	state->markbad = 0;
124 	if(pages > pagesperblock)
125 		pages = pagesperblock;
126 	/*
127 	 * fill in gaps first
128 	 */
129 	bestfreeblockindex = -1;
130 	bestfree = 0;
131 	for(blockindex = 0; blockindex < server->ndatablocks; blockindex++) {
132 		db = server->datablock + blockindex;
133 		if(db->block < 0)
134 			continue;
135 		gapmask = findgap(db->free & ~db->dirty, pages);
136 //print("blockindex %ld free 0x%.8ux dirty 0x%.8ux gapmask %.8ux\n", blockindex, db->free, db->dirty, gapmask);
137 		if(gapmask != 0) {
138 			/*
139 			 * this is free and !dirty
140 			 */
141 			b = db->block;
142 			USED(b);
143 			goto done;
144 		}
145 		else {
146 			int free = bitcount(db->free & logfsdatapagemask(pagesperblock, 0));
147 			if(free > 0 && (bestfreeblockindex < 0 || free > bestfree)) {
148 				bestfreeblockindex = blockindex;
149 				bestfree = free;
150 			}
151 		}
152 	}
153 //print("out of space - need to clean up a data block\n");
154 	if(bestfreeblockindex >= 0) {
155 //print("best block index %ld (%ld) %d bits\n", bestfreeblockindex, server->datablock[bestfreeblockindex].block, bestfree);
156 		/*
157 		 * clean up data block
158 		 */
159 		b = logfsfindfreeblock(ll, AllocReasonTransfer);
160 		while(b >= 0) {
161 			char *errmsg;
162 			LogfsLowLevelReadResult llrr;
163 			long oldblock;
164 			int markedbad;
165 
166 			db = server->datablock + bestfreeblockindex;
167 			oldblock = db->block;
168 			errmsg = logfsservercopyactivedata(server, b, bestfreeblockindex, 0, &llrr, &markedbad);
169 			if(errmsg) {
170 				if(!markedbad)
171 					return errmsg;
172 				b = logfsfindfreeblock(ll, AllocReasonTransfer);
173 			}
174 			else {
175 				Pageset available;
176 				/*
177 				 * if page0 is free, then we must ensure that we use it otherwise
178 				 * in tagged storage such as nand, the block tag is not written
179 				 * in all cases, it is safer to erase the block afterwards to
180 				 * preserve the data for as long as possible (we could choose
181 				 * to erase the old block now if page0 has already been copied)
182 				 */
183 				blockindex = bestfreeblockindex;
184 				state->oldblock = oldblock;
185 				state->markbad = llrr != LogfsLowLevelReadResultOk;
186 				available = db->free & ~db->dirty;
187 				if(available & PAGETOP)
188 					available = logfsdatapagemask(nlz(~available), 0);
189 				gapmask = findgap(available, pages);
190 				goto done;
191 			}
192 		}
193 	}
194 	/*
195 	 * use already erased blocks, so long as there are a few free
196 	 */
197 	b = logfsfindfreeblock(ll, AllocReasonDataExtend);
198 	if(b >= 0) {
199 useerased:
200 		for(blockindex = 0, db = server->datablock; blockindex < server->ndatablocks; blockindex++, db++)
201 			if(db->block < 0)
202 				break;
203 		if(blockindex == server->ndatablocks)
204 			server->ndatablocks++;
205 		db->path = mkdatapath(blockindex, 0);
206 		db->block = b;
207 		(*ll->setblocktag)(ll, b, LogfsTdata);
208 		(*ll->setblockpath)(ll, b, db->path);
209 		db->free = logfsdatapagemask(pagesperblock, 0);
210 		db->dirty = 0;
211 		gapmask = db->free;
212 		goto done;
213 	}
214 	/*
215 	 * last resort; try to steal from log
216 	 */
217 //print("last resort\n");
218 	errmsg = logfsserverlogsweep(server, 0, &didsomething);
219 	if(errmsg)
220 		return errmsg;
221 	if(didsomething) {
222 		/*
223 		 * this can only create whole free blocks, so...
224 		 */
225 //print("findfree after last resort\n");
226 		b = logfsfindfreeblock(ll, AllocReasonDataExtend);
227 		if(b >= 0) {
228 //print("*********************************************************\n");
229 			goto useerased;
230 		}
231 	}
232 	*countp = 0;
233 	return nil;
234 done:
235 	/*
236 	 * common finish - needs gapmask, blockindex, db
237 	 */
238 	apages = bitcount(gapmask);
239 	pagebase = nlz(gapmask);
240 	if(apages > pages)
241 		apages = pages;
242 	gapmask = logfsdatapagemask(apages, pagebase);
243 	if(server->trace > 1)
244 		print("allocdatapages: block %ld(%ld) pages %d mask 0x%.8ux pagebase %d apages %d\n",
245 			blockindex, db->block, pages, gapmask, pagebase, apages);
246 	db->free &= ~gapmask;
247 	db->dirty |= gapmask;
248 	*pagep = pagebase;
249 	*blockindexp = blockindex;
250 	*flashaddr = logfsspo2flashaddr(server, blockindex, pagebase, 0);
251 	*countp = apages << ll->l2pagesize;
252 	if(*countp > count)
253 		*countp = count;
254 	return nil;
255 }
256 
257 typedef struct Page {
258 	u32int pageaddr;
259 	int ref;
260 } Page;
261 
262 typedef struct DataStructure {
263 	LogfsServer *server;
264 	int nentries;
265 	int maxentries;
266 	Page *array;
267 } DataStructure;
268 
269 static int
270 deltapage(DataStructure *ds, u32int pageaddr, int add, int delta)
271 {
272 	int i;
273 	for(i = 0; i < ds->nentries; i++)
274 		if(ds->array[i].pageaddr == pageaddr) {
275 			ds->array[i].ref += delta;
276 			return 1;
277 		}
278 	if(!add)
279 		return 1;
280 	if(ds->maxentries == 0) {
281 		ds->array = logfsrealloc(nil, sizeof(Page) * 100);
282 		if(ds->array == nil)
283 			return 0;
284 		ds->maxentries = 100;
285 	}
286 	else if(ds->nentries >= ds->maxentries) {
287 		void *a = logfsrealloc(ds->array, ds->maxentries * 2 * sizeof(Page));
288 		if(a == nil)
289 			return 0;
290 		ds->array = a;
291 		ds->maxentries *= 2;
292 	}
293 	ds->array[ds->nentries].pageaddr = pageaddr;
294 	ds->array[ds->nentries++].ref = delta;
295 	return 1;
296 }
297 
298 /*
299  * only called for data addresses
300  */
301 static int
302 deltapages(DataStructure *ds, LogfsLowLevel *ll, u32int baseflashaddr, int range, int add, int delta)
303 {
304 	long seq;
305 	int page, offset;
306 	int pages;
307 	u32int pageaddr;
308 	int x;
309 
310 //print("deltapages(%ud, %ud, %d, %d)\n", baseflashaddr, limitflashaddr, add, delta);
311 	logfsflashaddr2spo(ds->server, baseflashaddr, &seq, &page, &offset);
312 	pages = (offset + range + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
313 	pageaddr = (seq << ll->l2pagesperblock) + page;
314  	for(x = 0; x < pages; x++, pageaddr++)
315 		if(!deltapage(ds, pageaddr, add, delta))
316 			return 0;
317 	return 1;
318 }
319 
320 static int
321 findpageset(void *magic, u32int baseoffset, u32int limitoffset, Extent *e, u32int extentoffset)
322 {
323 	DataStructure *ds = magic;
324 	LogfsLowLevel *ll;
325 	u32int flashaddr;
326 	u32int range;
327 	u32int residue;
328 
329 	if(e == nil || (e->flashaddr & LogAddr) != 0)
330 		return 1;
331 	ll = ds->server->ll;
332 //print("baseoffset %ud limitoffset %ud min %ud max %ud\n", baseoffset, limitoffset, e->min, e->max);
333 	flashaddr = e->flashaddr;
334 	if(extentoffset)
335 		if(!deltapages(ds, ll, flashaddr, extentoffset, 1, 1))
336 			return -1;
337 	flashaddr += extentoffset;
338 	range = limitoffset - baseoffset;
339 	if(!deltapages(ds, ll, flashaddr, range, 1, -1))
340 		return -1;
341 	flashaddr += range;
342 	residue = e->max - e->min - (extentoffset + range);
343 	if(residue)
344 		if(!deltapages(ds, ll, flashaddr, residue, 1, 1))
345 			return -1;
346 	return 1;
347 }
348 
349 static int
350 addpagereferences(void *magic, Extent *e, int hole)
351 {
352 	DataStructure *ds = magic;
353 
354 	if(hole || (e->flashaddr & LogAddr) != 0)
355 		return 1;
356 	return deltapages(ds, ds->server->ll, e->flashaddr, e->max - e->min, 0, 1) ? 1 : -1;
357 }
358 
359 static char *
360 zappages(LogfsServer *server, Entry *e, u32int min, u32int max)
361 {
362 	DataStructure ds;
363 	long seq;
364 	int x, rv, page;
365 	Page *p;
366 
367 	if(min >= e->u.file.length)
368 		return nil;		/* no checks necessary */
369 	if(min == 0 && max >= e->u.file.length) {
370 		/* replacing entire file */
371 		logfsextentlistwalk(e->u.file.extent, logfsunconditionallymarkfreeanddirty, server);
372 		return nil;
373 	}
374 	/* hard after that - this will need to be improved */
375 	/*
376 	 * current algorithm
377 	 * build a list of all pages referenced by the extents being removed, and count the
378  	 * number of references
379 	 * then subtract the number of references to each page in entire file
380 	 * any pages with a reference count == 0 can be removed
381 	 */
382 	ds.server = server;
383 	ds.nentries = 0;
384 	ds.maxentries = 0;
385 	ds.array = nil;
386 	rv = logfsextentlistwalkrange(e->u.file.extent, findpageset, &ds, min, max);
387 	if(rv < 0 || ds.nentries == 0)
388 		goto Out;
389 	if(server->trace > 1){
390 		print("pass 1\n");
391 		for(x = 0; x < ds.nentries; x++){
392 			p = &ds.array[x];
393 			seq = p->pageaddr >> server->ll->l2pagesperblock;
394 			page = p->pageaddr & ((1 << server->ll->l2pagesperblock) - 1);
395 			print("block %lud page %ud ref %d\n", seq, page, p->ref);
396 		}
397 		print("pass 2\n");
398 	}
399 	rv = logfsextentlistwalk(e->u.file.extent, addpagereferences, &ds);
400 	if(rv >= 0){
401 		for(x = 0; x < ds.nentries; x++){
402 			p = &ds.array[x];
403 			seq = p->pageaddr >> server->ll->l2pagesperblock;
404 			page = p->pageaddr & ((1 << server->ll->l2pagesperblock) - 1);
405 			if(server->trace > 1)
406 				print("block %lud page %ud ref %d\n", seq, page, p->ref);
407 			if(p->ref == 0)
408 				logfsfreedatapages(server, seq, logfsdatapagemask(1, page));
409 		}
410 	}
411 Out:
412 	logfsfreemem(ds.array);
413 	return rv < 0 ? Enomem : nil;
414 }
415 
416 static void
417 disposeofoldblock(LogfsServer *server, AllocState *state)
418 {
419 	if(state->oldblock >= 0) {
420 		if(server->testflags & LogfsTestDontFettleDataBlock) {
421 			/* take the block out of commission */
422 			(*server->ll->setblocktag)(server->ll, state->oldblock, LogfsTworse);
423 			server->testflags &= ~LogfsTestDontFettleDataBlock;
424 		}
425 		else {
426 			if(state->markbad)
427 				(*server->ll->markblockbad)(server->ll, state->oldblock);
428 			else
429 				logfsbootfettleblock(server->lb, state->oldblock, LogfsTnone, ~0, nil);
430 		}
431 		state->oldblock = -1;
432 	}
433 }
434 
435 char *
436 logfsserverwrite(LogfsServer *server, u32int fid, u32int offset, u32int count, uchar *buf, u32int *rcount)
437 {
438 	Fid *f;
439 	Entry *e;
440 	u32int now;
441 	char *muid;
442 	int muidlen;
443 	LogfsLowLevel *ll = server->ll;
444 
445 	if(server->trace > 1)
446 		print("logfsserverwrite(%ud, %ud, %ud)\n", fid, offset, count);
447 	f = logfsfidmapfindentry(server->fidmap, fid);
448 	if(f == nil)
449 		return logfsebadfid;
450 	if(f->openmode < 0)
451 		return logfsefidnotopen;
452 	if((f->openmode & 3) == OREAD)
453 		return logfseaccess;
454 	e = f->entry;
455 	if(e->deadandgone)
456 		return Eio;
457 	if(e->qid.type & QTDIR)
458 		return Eperm;
459 	if(e->perm & DMAPPEND)
460 		offset = e->u.file.length;
461 	now = logfsnow();
462 	*rcount = count;
463 	muid = logfsisfindidfromname(server->is, f->uname);
464 	muidlen = strlen(muid);
465 	while(count) {
466 		Extent extent;
467 		int thistime;
468 		char *errmsg;
469 		thistime = lognicesizeforwrite(server, 1, count, muidlen);
470 		if(thistime == 0) {
471 			int p;
472 			u32int n;
473 			long blockindex;
474 			int pagebase;
475 			AllocState state;
476 			int pagesize = 1 << ll->l2pagesize;
477 		reallocate:
478 			errmsg = allocdatapages(server, count, &thistime, &blockindex, &pagebase, &extent.flashaddr, &state);
479 			if(errmsg)
480 				return errmsg;
481 			if(thistime == 0)
482 				return logfselogfull;
483 			for(p = pagebase, n = 0; n < thistime; p++, n += pagesize) {
484 				u32int mask;
485 				DataBlock *db = server->datablock + blockindex;
486 				errmsg = (*ll->writepage)(ll, buf + n, db->block, p);
487 				if(errmsg) {
488 					if(strcmp(errmsg, Eio) != 0) {
489 						/*
490 						 * something horrid happened down below
491 						 * recover without writing any more than we have to
492 						 */
493 						if(p != 0) {
494 							/*
495 							 * page 0 was either written already, or has been written in this loop
496 							 * thus the block referenced is valid on the media. all we need to do
497 							 * is lose the old block, mark the written pages as free (so they can
498 							 * be scavenged), and don't bother with the log message
499 							 */
500 							disposeofoldblock(server, &state);
501 							mask = logfsdatapagemask(p - pagebase - 1, pagebase);
502 							db->free |= mask;
503 							db->dirty |= mask;
504 							return errmsg;
505 						}
506 						/*
507 						 * page 0 failed to write (so nothing written at all)
508 						 * this is either an entirely free block (no erased block in savestate),
509 						 * or a copy of a scavenged block (erased block in savestate)
510 						 */
511 						if(state.oldblock < 0) {
512 							/*
513 							 * newly selected erased block (blockindex == server->ndatablocks - 1)
514 							 * mark it bad, lose it from the datablock table
515 							 */
516 							(*ll->markblockbad)(ll, db->block);
517 							db->block = -1;
518 							if(blockindex == server->ndatablocks - 1)
519 								server->ndatablocks--;
520 							return errmsg;
521 						}
522 						/*
523 						 * page 0 of a data scavenge copy
524 						 * mark it bad, restore state (old block)
525 						 */
526 						(*ll->markblockbad)(ll, db->block);
527 						db->block = state.oldblock;
528 						return errmsg;
529 					}
530 					/*
531 					 * write error on target block
532 					 *
533 					 * if it is a replacement (state saved)
534 					 *	mark the new block bad, restore state and try again
535 					 *
536 					 * if it is not replaced (no state saved)
537 					 *	replace block, and try again
538 					 */
539 					if(state.oldblock >= 0) {
540 						(*ll->markblockbad)(ll, db->block);
541 						db->block = state.oldblock;
542 					}
543 					else {
544 						errmsg = logfsserverreplacedatablock(server, blockindex);
545 						if(errmsg)
546 							return errmsg;
547 					}
548 					goto reallocate;
549 				}
550 				mask = logfsdatapagemask(1, p);
551 				db->free &= ~mask;
552 				db->dirty |= mask;
553 			}
554 			/* well, we managed to write the data out */
555 			errmsg = logfslogwrite(server, 1, e->qid.path, offset, thistime, now, e->u.file.cvers,
556 				muid, nil, &extent.flashaddr);
557 			/*
558 			 * now we can dispose of the original data block, if any
559 			 * this is regardless of whether we succeeded in writing a log message, as
560 			 * if this block is not erased, there will be a duplicate
561 			 */
562 			disposeofoldblock(server, &state);
563 		}
564 		else {
565 			if(thistime > count)
566 				thistime = count;
567 			errmsg = logfslogwrite(server, 1, e->qid.path, offset, thistime, now, e->u.file.cvers,
568 				muid, buf, &extent.flashaddr);
569 		}
570 		/*
571 		 * here if we failed to write the log message
572 		 */
573 		if(errmsg)
574 			return errmsg;
575 		if(server->trace > 1)
576 			print("logfsserverwrite: %d bytes at flashaddr 0x%.8ux\n", thistime, extent.flashaddr);
577 		extent.min = offset;
578 		extent.max = offset + thistime;
579 		errmsg = zappages(server, e, extent.min, extent.max);
580 		if(errmsg)
581 			return errmsg;
582 		errmsg = logfsextentlistinsert(e->u.file.extent, &extent, nil);
583 		if(errmsg)
584 			return errmsg;
585 		e->muid = muid;
586 		e->mtime = now;
587 		offset += thistime;
588 		if(e->u.file.length < offset)
589 			e->u.file.length = offset;
590 		count -= thistime;
591 		buf += thistime;
592 		e->qid.vers++;
593 	}
594 	return nil;
595 }
596