xref: /plan9/sys/src/9/port/swap.c (revision d1be6b086622eecc0da76db1fbd64349a5e85293)
1 #include	"u.h"
2 #include	"../port/lib.h"
3 #include	"mem.h"
4 #include	"dat.h"
5 #include	"fns.h"
6 #include	"../port/error.h"
7 
8 static int	canflush(Proc*, Segment*);
9 static void	executeio(void);
10 static int	needpages(void*);
11 static void	pageout(Proc*, Segment*);
12 static void	pagepte(int, Page**);
13 static void	pager(void*);
14 
15 Image 	swapimage;
16 
17 static 	int	swopen;
18 static	Page	**iolist;
19 static	int	ioptr;
20 
21 static	ulong	genage, genclock, gencount;
22 static	uvlong	gensum;
23 
24 static void
gentick(void)25 gentick(void)
26 {
27 	genclock++;
28 	if(gencount)
29 		genage = gensum / gencount;
30 	else
31 		genage = 0;
32 	gensum = gencount = 0;
33 }
34 
35 void
swapinit(void)36 swapinit(void)
37 {
38 	swapalloc.swmap = xalloc(conf.nswap);
39 	swapalloc.top = &swapalloc.swmap[conf.nswap];
40 	swapalloc.alloc = swapalloc.swmap;
41 	swapalloc.last = swapalloc.swmap;
42 	swapalloc.free = conf.nswap;
43 	iolist = xalloc(conf.nswppo*sizeof(Page*));
44 	if(swapalloc.swmap == 0 || iolist == 0)
45 		panic("swapinit: not enough memory");
46 
47 	swapimage.notext = 1;
48 }
49 
50 ulong
newswap(void)51 newswap(void)
52 {
53 	uchar *look;
54 
55 	lock(&swapalloc);
56 
57 	if(swapalloc.free == 0){
58 		unlock(&swapalloc);
59 		return ~0;
60 	}
61 
62 	look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
63 	if(look == 0)
64 		panic("inconsistent swap");
65 
66 	*look = 1;
67 	swapalloc.last = look;
68 	swapalloc.free--;
69 	unlock(&swapalloc);
70 	return (look-swapalloc.swmap) * BY2PG;
71 }
72 
73 void
putswap(Page * p)74 putswap(Page *p)
75 {
76 	uchar *idx;
77 
78 	lock(&swapalloc);
79 	idx = &swapalloc.swmap[((ulong)p)/BY2PG];
80 	if(--(*idx) == 0) {
81 		swapalloc.free++;
82 		if(idx < swapalloc.last)
83 			swapalloc.last = idx;
84 	}
85 	if(*idx >= 254)
86 		panic("putswap %#p == %ud", p, *idx);
87 	unlock(&swapalloc);
88 }
89 
90 void
dupswap(Page * p)91 dupswap(Page *p)
92 {
93 	lock(&swapalloc);
94 	if(++swapalloc.swmap[((ulong)p)/BY2PG] == 0)
95 		panic("dupswap");
96 	unlock(&swapalloc);
97 }
98 
99 int
swapcount(ulong daddr)100 swapcount(ulong daddr)
101 {
102 	return swapalloc.swmap[daddr/BY2PG];
103 }
104 
105 void
kickpager(void)106 kickpager(void)
107 {
108 	static int started;
109 
110 	if(started)
111 		wakeup(&swapalloc.r);
112 	else {
113 		kproc("pager", pager, 0);
114 		started = 1;
115 	}
116 }
117 
118 static void
pager(void * junk)119 pager(void *junk)
120 {
121 	int i;
122 	Segment *s;
123 	Proc *p, *ep;
124 
125 	if(waserror())
126 		panic("pager: os error");
127 
128 	p = proctab(0);
129 	ep = &p[conf.nproc];
130 
131 loop:
132 	up->psstate = "Idle";
133 	wakeup(&palloc.r);
134 	sleep(&swapalloc.r, needpages, 0);
135 
136 	while(needpages(junk)) {
137 		if(swapimage.c) {
138 			p++;
139 			if(p >= ep){
140 				p = proctab(0);
141 				gentick();
142 			}
143 
144 			if(p->state == Dead || p->noswap)
145 				continue;
146 
147 			if(!canqlock(&p->seglock))
148 				continue;		/* process changing its segments */
149 
150 			for(i = 0; i < NSEG; i++) {
151 				if(!needpages(junk)){
152 					qunlock(&p->seglock);
153 					goto loop;
154 				}
155 
156 				if(s = p->seg[i]) {
157 					switch(s->type&SG_TYPE) {
158 					default:
159 						break;
160 					case SG_TEXT:
161 						pageout(p, s);
162 						break;
163 					case SG_DATA:
164 					case SG_BSS:
165 					case SG_STACK:
166 					case SG_SHARED:
167 						up->psstate = "Pageout";
168 						pageout(p, s);
169 						if(ioptr != 0) {
170 							up->psstate = "I/O";
171 							executeio();
172 						}
173 						break;
174 					}
175 				}
176 			}
177 			qunlock(&p->seglock);
178 		} else {
179 			print("out of memory\n");
180 			killbig("out of memory");
181 			freebroken();		/* can use the memory */
182 
183 			/* Emulate the old system if no swap channel */
184 			if(!swapimage.c)
185 				tsleep(&up->sleep, return0, 0, 5000);
186 		}
187 	}
188 	goto loop;
189 }
190 
191 static void
pageout(Proc * p,Segment * s)192 pageout(Proc *p, Segment *s)
193 {
194 	int type, i, size;
195 	ulong age;
196 	Pte *l;
197 	Page **pg, *entry;
198 
199 	if(!canqlock(&s->lk))	/* We cannot afford to wait, we will surely deadlock */
200 		return;
201 
202 	if(s->steal) {		/* Protected by /dev/proc */
203 		qunlock(&s->lk);
204 		return;
205 	}
206 
207 	if(!canflush(p, s)) {	/* Able to invalidate all tlbs with references */
208 		qunlock(&s->lk);
209 		putseg(s);
210 		return;
211 	}
212 
213 	if(waserror()) {
214 		qunlock(&s->lk);
215 		putseg(s);
216 		return;
217 	}
218 
219 	/* Pass through the pte tables looking for memory pages to swap out */
220 	type = s->type&SG_TYPE;
221 	size = s->mapsize;
222 	for(i = 0; i < size; i++) {
223 		l = s->map[i];
224 		if(l == 0)
225 			continue;
226 		for(pg = l->first; pg < l->last; pg++) {
227 			entry = *pg;
228 			if(pagedout(entry))
229 				continue;
230 
231 			if(entry->modref & PG_REF) {
232 				entry->modref &= ~PG_REF;
233 				entry->gen = genclock;
234 			}
235 
236 			if(genclock < entry->gen)
237 				age = ~(entry->gen - genclock);
238 			else
239 				age = genclock - entry->gen;
240 			gensum += age;
241 			gencount++;
242 			if(age <= genage)
243 				continue;
244 
245 			pagepte(type, pg);
246 
247 			if(ioptr >= conf.nswppo)
248 				goto out;
249 		}
250 	}
251 out:
252 	poperror();
253 	qunlock(&s->lk);
254 	putseg(s);
255 }
256 
257 static int
canflush(Proc * p,Segment * s)258 canflush(Proc *p, Segment *s)
259 {
260 	int i;
261 	Proc *ep;
262 
263 	lock(s);
264 	if(s->ref == 1) {		/* Easy if we are the only user */
265 		s->ref++;
266 		unlock(s);
267 		return canpage(p);
268 	}
269 	s->ref++;
270 	unlock(s);
271 
272 	/* Now we must do hardwork to ensure all processes which have tlb
273 	 * entries for this segment will be flushed if we succeed in paging it out
274 	 */
275 	p = proctab(0);
276 	ep = &p[conf.nproc];
277 	while(p < ep) {
278 		if(p->state != Dead) {
279 			for(i = 0; i < NSEG; i++)
280 				if(p->seg[i] == s)
281 					if(!canpage(p))
282 						return 0;
283 		}
284 		p++;
285 	}
286 	return 1;
287 }
288 
289 static void
pagepte(int type,Page ** pg)290 pagepte(int type, Page **pg)
291 {
292 	ulong daddr;
293 	Page *outp;
294 
295 	outp = *pg;
296 	switch(type) {
297 	case SG_TEXT:				/* Revert to demand load */
298 		putpage(outp);
299 		*pg = 0;
300 		break;
301 
302 	case SG_DATA:
303 	case SG_BSS:
304 	case SG_STACK:
305 	case SG_SHARED:
306 		/*
307 		 *  get a new swap address and clear any pages
308 		 *  referring to it from the cache
309 		 */
310 		daddr = newswap();
311 		if(daddr == ~0)
312 			break;
313 		cachedel(&swapimage, daddr);
314 
315 		lock(outp);
316 
317 		/* forget anything that it used to cache */
318 		uncachepage(outp);
319 
320 		/*
321 		 *  incr the reference count to make sure it sticks around while
322 		 *  being written
323 		 */
324 		outp->ref++;
325 
326 		/*
327 		 *  enter it into the cache so that a fault happening
328 		 *  during the write will grab the page from the cache
329 		 *  rather than one partially written to the disk
330 		 */
331 		outp->daddr = daddr;
332 		cachepage(outp, &swapimage);
333 		*pg = (Page*)(daddr|PG_ONSWAP);
334 		unlock(outp);
335 
336 		/* Add page to IO transaction list */
337 		iolist[ioptr++] = outp;
338 		break;
339 	}
340 }
341 
342 void
pagersummary(void)343 pagersummary(void)
344 {
345 	print("%lud/%lud memory %lud/%lud swap %d iolist\n",
346 		palloc.user-palloc.freecount,
347 		palloc.user, conf.nswap-swapalloc.free, conf.nswap,
348 		ioptr);
349 }
350 
351 static int
pageiocomp(void * a,void * b)352 pageiocomp(void *a, void *b)
353 {
354 	Page *p1, *p2;
355 
356 	p1 = *(Page **)a;
357 	p2 = *(Page **)b;
358 	if(p1->daddr > p2->daddr)
359 		return 1;
360 	else
361 		return -1;
362 }
363 
364 static void
executeio(void)365 executeio(void)
366 {
367 	Page *out;
368 	int i, n;
369 	Chan *c;
370 	char *kaddr;
371 	KMap *k;
372 
373 	c = swapimage.c;
374 	qsort(iolist, ioptr, sizeof iolist[0], pageiocomp);
375 	for(i = 0; i < ioptr; i++) {
376 		if(ioptr > conf.nswppo)
377 			panic("executeio: ioptr %d > %d", ioptr, conf.nswppo);
378 		out = iolist[i];
379 		k = kmap(out);
380 		kaddr = (char*)VA(k);
381 
382 		if(waserror())
383 			panic("executeio: page out I/O error");
384 
385 		n = devtab[c->type]->write(c, kaddr, BY2PG, out->daddr);
386 		if(n != BY2PG)
387 			nexterror();
388 
389 		kunmap(k);
390 		poperror();
391 
392 		/* Free up the page after I/O */
393 		lock(out);
394 		out->ref--;
395 		unlock(out);
396 		putpage(out);
397 	}
398 	ioptr = 0;
399 }
400 
401 static int
needpages(void *)402 needpages(void*)
403 {
404 	return palloc.freecount < swapalloc.headroom;
405 }
406 
407 void
setswapchan(Chan * c)408 setswapchan(Chan *c)
409 {
410 	uchar dirbuf[sizeof(Dir)+100];
411 	Dir d;
412 	int n;
413 
414 	if(swapimage.c) {
415 		if(swapalloc.free != conf.nswap){
416 			cclose(c);
417 			error(Einuse);
418 		}
419 		cclose(swapimage.c);
420 	}
421 
422 	/*
423 	 *  if this isn't a file, set the swap space
424 	 *  to be at most the size of the partition
425 	 */
426 	if(devtab[c->type]->dc != L'M'){
427 		n = devtab[c->type]->stat(c, dirbuf, sizeof dirbuf);
428 		if(n <= 0){
429 			cclose(c);
430 			error("stat failed in setswapchan");
431 		}
432 		convM2D(dirbuf, n, &d, nil);
433 		if(d.length < conf.nswap*BY2PG){
434 			conf.nswap = d.length/BY2PG;
435 			swapalloc.top = &swapalloc.swmap[conf.nswap];
436 			swapalloc.free = conf.nswap;
437 		}
438 	}
439 
440 	swapimage.c = c;
441 }
442 
443 int
swapfull(void)444 swapfull(void)
445 {
446 	return swapalloc.free < conf.nswap/10;
447 }
448