xref: /plan9/sys/src/9/port/segment.c (revision 499069debb03e99ea217d35fd87fb49e96918a92)
1 #include	"u.h"
2 #include	"../port/lib.h"
3 #include	"mem.h"
4 #include	"dat.h"
5 #include	"fns.h"
6 #include	"../port/error.h"
7 
8 static void	imagereclaim(void);
9 static void	imagechanreclaim(void);
10 
11 #include "io.h"
12 
13 /*
14  * Attachable segment types
15  */
16 static Physseg physseg[10] = {
17 	{ SG_SHARED,	"shared",	0,	SEGMAXSIZE,	0, 	0 },
18 	{ SG_BSS,	"memory",	0,	SEGMAXSIZE,	0,	0 },
19 	{ 0,		0,		0,	0,		0,	0 },
20 };
21 
22 static Lock physseglock;
23 
24 #define NFREECHAN	64
25 #define IHASHSIZE	64
26 #define ihash(s)	imagealloc.hash[s%IHASHSIZE]
27 static struct Imagealloc
28 {
29 	Lock;
30 	Image	*free;
31 	Image	*hash[IHASHSIZE];
32 	QLock	ireclaim;	/* mutex on reclaiming free images */
33 
34 	Chan	**freechan;	/* free image channels */
35 	int	nfreechan;	/* number of free channels */
36 	int	szfreechan;	/* size of freechan array */
37 	QLock	fcreclaim;	/* mutex on reclaiming free channels */
38 }imagealloc;
39 
40 Segment* (*_globalsegattach)(Proc*, char*);
41 
42 void
43 initseg(void)
44 {
45 	Image *i, *ie;
46 
47 	imagealloc.free = xalloc(conf.nimage*sizeof(Image));
48 	ie = &imagealloc.free[conf.nimage-1];
49 	for(i = imagealloc.free; i < ie; i++)
50 		i->next = i+1;
51 	i->next = 0;
52 	imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
53 	imagealloc.szfreechan = NFREECHAN;
54 }
55 
56 Segment *
57 newseg(int type, ulong base, ulong size)
58 {
59 	Segment *s;
60 	int mapsize;
61 
62 	if(size > (SEGMAPSIZE*PTEPERTAB))
63 		error(Enovmem);
64 
65 	if(swapfull())
66 		error(Enoswap);
67 	s = smalloc(sizeof(Segment));
68 	s->ref = 1;
69 	s->type = type;
70 	s->base = base;
71 	s->top = base+(size*BY2PG);
72 	s->size = size;
73 
74 	mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
75 	if(mapsize > nelem(s->ssegmap)){
76 		mapsize *= 2;
77 		if(mapsize > (SEGMAPSIZE*PTEPERTAB))
78 			mapsize = (SEGMAPSIZE*PTEPERTAB);
79 		s->map = smalloc(mapsize*sizeof(Pte*));
80 		s->mapsize = mapsize;
81 	}
82 	else{
83 		s->map = s->ssegmap;
84 		s->mapsize = nelem(s->ssegmap);
85 	}
86 
87 	return s;
88 }
89 
90 void
91 putseg(Segment *s)
92 {
93 	Pte **pp, **emap;
94 	Image *i;
95 
96 	if(s == 0)
97 		return;
98 
99 	i = s->image;
100 	if(i != 0) {
101 		lock(i);
102 		lock(s);
103 		if(i->s == s && s->ref == 1)
104 			i->s = 0;
105 		unlock(i);
106 	}
107 	else
108 		lock(s);
109 
110 	s->ref--;
111 	if(s->ref != 0) {
112 		unlock(s);
113 		return;
114 	}
115 	unlock(s);
116 
117 	qlock(&s->lk);
118 	if(i)
119 		putimage(i);
120 
121 	emap = &s->map[s->mapsize];
122 	for(pp = s->map; pp < emap; pp++)
123 		if(*pp)
124 			freepte(s, *pp);
125 
126 	qunlock(&s->lk);
127 	if(s->map != s->ssegmap)
128 		free(s->map);
129 	if(s->profile != 0)
130 		free(s->profile);
131 	free(s);
132 }
133 
134 void
135 relocateseg(Segment *s, ulong offset)
136 {
137 	Page **pg, *x;
138 	Pte *pte, **p, **endpte;
139 
140 	endpte = &s->map[s->mapsize];
141 	for(p = s->map; p < endpte; p++) {
142 		if(*p == 0)
143 			continue;
144 		pte = *p;
145 		for(pg = pte->first; pg <= pte->last; pg++) {
146 			if(x = *pg)
147 				x->va += offset;
148 		}
149 	}
150 }
151 
152 Segment*
153 dupseg(Segment **seg, int segno, int share)
154 {
155 	int i, size;
156 	Pte *pte;
157 	Segment *n, *s;
158 
159 	SET(n);
160 	s = seg[segno];
161 
162 	qlock(&s->lk);
163 	if(waserror()){
164 		qunlock(&s->lk);
165 		nexterror();
166 	}
167 	switch(s->type&SG_TYPE) {
168 	case SG_TEXT:		/* New segment shares pte set */
169 	case SG_SHARED:
170 	case SG_PHYSICAL:
171 		goto sameseg;
172 
173 	case SG_STACK:
174 		n = newseg(s->type, s->base, s->size);
175 		break;
176 
177 	case SG_BSS:		/* Just copy on write */
178 		if(share)
179 			goto sameseg;
180 		n = newseg(s->type, s->base, s->size);
181 		break;
182 
183 	case SG_DATA:		/* Copy on write plus demand load info */
184 		if(segno == TSEG){
185 			poperror();
186 			qunlock(&s->lk);
187 			return data2txt(s);
188 		}
189 
190 		if(share)
191 			goto sameseg;
192 		n = newseg(s->type, s->base, s->size);
193 
194 		incref(s->image);
195 		n->image = s->image;
196 		n->fstart = s->fstart;
197 		n->flen = s->flen;
198 		break;
199 	}
200 	size = s->mapsize;
201 	for(i = 0; i < size; i++)
202 		if(pte = s->map[i])
203 			n->map[i] = ptecpy(pte);
204 
205 	n->flushme = s->flushme;
206 	if(s->ref > 1)
207 		procflushseg(s);
208 	poperror();
209 	qunlock(&s->lk);
210 	return n;
211 
212 sameseg:
213 	incref(s);
214 	poperror();
215 	qunlock(&s->lk);
216 	return s;
217 }
218 
219 void
220 segpage(Segment *s, Page *p)
221 {
222 	Pte **pte;
223 	ulong off;
224 	Page **pg;
225 
226 	if(p->va < s->base || p->va >= s->top)
227 		panic("segpage");
228 
229 	off = p->va - s->base;
230 	pte = &s->map[off/PTEMAPMEM];
231 	if(*pte == 0)
232 		*pte = ptealloc();
233 
234 	pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
235 	*pg = p;
236 	if(pg < (*pte)->first)
237 		(*pte)->first = pg;
238 	if(pg > (*pte)->last)
239 		(*pte)->last = pg;
240 }
241 
242 Image*
243 attachimage(int type, Chan *c, ulong base, ulong len)
244 {
245 	Image *i, **l;
246 
247 	/* reclaim any free channels from reclaimed segments */
248 	if(imagealloc.nfreechan)
249 		imagechanreclaim();
250 
251 	lock(&imagealloc);
252 
253 	/*
254 	 * Search the image cache for remains of the text from a previous
255 	 * or currently running incarnation
256 	 */
257 	for(i = ihash(c->qid.path); i; i = i->hash) {
258 		if(c->qid.path == i->qid.path) {
259 			lock(i);
260 			if(eqqid(c->qid, i->qid) &&
261 			   eqqid(c->mqid, i->mqid) &&
262 			   c->mchan == i->mchan &&
263 			   c->type == i->type) {
264 				goto found;
265 			}
266 			unlock(i);
267 		}
268 	}
269 
270 	/*
271 	 * imagereclaim dumps pages from the free list which are cached by image
272 	 * structures. This should free some image structures.
273 	 */
274 	while(!(i = imagealloc.free)) {
275 		unlock(&imagealloc);
276 		imagereclaim();
277 		sched();
278 		lock(&imagealloc);
279 	}
280 
281 	imagealloc.free = i->next;
282 
283 	lock(i);
284 	incref(c);
285 	i->c = c;
286 	i->type = c->type;
287 	i->qid = c->qid;
288 	i->mqid = c->mqid;
289 	i->mchan = c->mchan;
290 	l = &ihash(c->qid.path);
291 	i->hash = *l;
292 	*l = i;
293 found:
294 	unlock(&imagealloc);
295 
296 	if(i->s == 0) {
297 		/* Disaster after commit in exec */
298 		if(waserror()) {
299 			unlock(i);
300 			pexit(Enovmem, 1);
301 		}
302 		i->s = newseg(type, base, len);
303 		i->s->image = i;
304 		i->ref++;
305 		poperror();
306 	}
307 	else
308 		incref(i->s);
309 
310 	return i;
311 }
312 
313 static struct {
314 	int	calls;			/* times imagereclaim was called */
315 	int	loops;			/* times the main loop was run */
316 	uvlong	ticks;			/* total time in the main loop */
317 	uvlong	maxt;			/* longest time in main loop */
318 } irstats;
319 
320 static void
321 imagereclaim(void)
322 {
323 	int n;
324 	Page *p;
325 	uvlong ticks;
326 
327 	irstats.calls++;
328 	/* Somebody is already cleaning the page cache */
329 	if(!canqlock(&imagealloc.ireclaim))
330 		return;
331 
332 	lock(&palloc);
333 	ticks = fastticks(nil);
334 	n = 0;
335 	/*
336 	 * All the pages with images backing them are at the
337 	 * end of the list (see putpage) so start there and work
338 	 * backward.
339 	 */
340 	for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
341 		if(p->ref == 0 && canlock(p)) {
342 			if(p->ref == 0) {
343 				n++;
344 				uncachepage(p);
345 			}
346 			unlock(p);
347 		}
348 	}
349 	ticks = fastticks(nil) - ticks;
350 	unlock(&palloc);
351 	irstats.loops++;
352 	irstats.ticks += ticks;
353 	if(ticks > irstats.maxt)
354 		irstats.maxt = ticks;
355 	//print("T%llud+", ticks);
356 	qunlock(&imagealloc.ireclaim);
357 }
358 
359 /*
360  *  since close can block, this has to be called outside of
361  *  spin locks.
362  */
363 static void
364 imagechanreclaim(void)
365 {
366 	Chan *c;
367 
368 	/* Somebody is already cleaning the image chans */
369 	if(!canqlock(&imagealloc.fcreclaim))
370 		return;
371 
372 	/*
373 	 * We don't have to recheck that nfreechan > 0 after we
374 	 * acquire the lock, because we're the only ones who decrement
375 	 * it (the other lock contender increments it), and there's only
376 	 * one of us thanks to the qlock above.
377 	 */
378 	while(imagealloc.nfreechan > 0){
379 		lock(&imagealloc);
380 		imagealloc.nfreechan--;
381 		c = imagealloc.freechan[imagealloc.nfreechan];
382 		unlock(&imagealloc);
383 		cclose(c);
384 	}
385 
386 	qunlock(&imagealloc.fcreclaim);
387 }
388 
389 void
390 putimage(Image *i)
391 {
392 	Chan *c, **cp;
393 	Image *f, **l;
394 
395 	if(i->notext)
396 		return;
397 
398 	lock(i);
399 	if(--i->ref == 0) {
400 		l = &ihash(i->qid.path);
401 		mkqid(&i->qid, ~0, ~0, QTFILE);
402 		unlock(i);
403 		c = i->c;
404 
405 		lock(&imagealloc);
406 		for(f = *l; f; f = f->hash) {
407 			if(f == i) {
408 				*l = i->hash;
409 				break;
410 			}
411 			l = &f->hash;
412 		}
413 
414 		i->next = imagealloc.free;
415 		imagealloc.free = i;
416 
417 		/* defer freeing channel till we're out of spin lock's */
418 		if(imagealloc.nfreechan == imagealloc.szfreechan){
419 			imagealloc.szfreechan += NFREECHAN;
420 			cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
421 			if(cp == nil)
422 				panic("putimage");
423 			memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
424 			free(imagealloc.freechan);
425 			imagealloc.freechan = cp;
426 		}
427 		imagealloc.freechan[imagealloc.nfreechan++] = c;
428 		unlock(&imagealloc);
429 
430 		return;
431 	}
432 	unlock(i);
433 }
434 
435 long
436 ibrk(ulong addr, int seg)
437 {
438 	Segment *s, *ns;
439 	ulong newtop, newsize;
440 	int i, mapsize;
441 	Pte **map;
442 
443 	s = up->seg[seg];
444 	if(s == 0)
445 		error(Ebadarg);
446 
447 	if(addr == 0)
448 		return s->base;
449 
450 	qlock(&s->lk);
451 
452 	/* We may start with the bss overlapping the data */
453 	if(addr < s->base) {
454 		if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
455 			qunlock(&s->lk);
456 			error(Enovmem);
457 		}
458 		addr = s->base;
459 	}
460 
461 	newtop = PGROUND(addr);
462 	newsize = (newtop-s->base)/BY2PG;
463 	if(newtop < s->top) {
464 		mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
465 		s->top = newtop;
466 		s->size = newsize;
467 		qunlock(&s->lk);
468 		flushmmu();
469 		return 0;
470 	}
471 
472 	if(swapfull()){
473 		qunlock(&s->lk);
474 		error(Enoswap);
475 	}
476 
477 	for(i = 0; i < NSEG; i++) {
478 		ns = up->seg[i];
479 		if(ns == 0 || ns == s)
480 			continue;
481 		if(newtop >= ns->base && newtop < ns->top) {
482 			qunlock(&s->lk);
483 			error(Esoverlap);
484 		}
485 	}
486 
487 	if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
488 		qunlock(&s->lk);
489 		error(Enovmem);
490 	}
491 	mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
492 	if(mapsize > s->mapsize){
493 		map = smalloc(mapsize*sizeof(Pte*));
494 		memmove(map, s->map, s->mapsize*sizeof(Pte*));
495 		if(s->map != s->ssegmap)
496 			free(s->map);
497 		s->map = map;
498 		s->mapsize = mapsize;
499 	}
500 
501 	s->top = newtop;
502 	s->size = newsize;
503 	qunlock(&s->lk);
504 	return 0;
505 }
506 
507 /*
508  *  called with s->lk locked
509  */
510 void
511 mfreeseg(Segment *s, ulong start, int pages)
512 {
513 	int i, j, size;
514 	ulong soff;
515 	Page *pg;
516 	Page *list;
517 
518 	soff = start-s->base;
519 	j = (soff&(PTEMAPMEM-1))/BY2PG;
520 
521 	size = s->mapsize;
522 	list = nil;
523 	for(i = soff/PTEMAPMEM; i < size; i++) {
524 		if(pages <= 0)
525 			break;
526 		if(s->map[i] == 0) {
527 			pages -= PTEPERTAB-j;
528 			j = 0;
529 			continue;
530 		}
531 		while(j < PTEPERTAB) {
532 			pg = s->map[i]->pages[j];
533 			/*
534 			 * We want to zero s->map[i]->page[j] and putpage(pg),
535 			 * but we have to make sure other processors flush the
536 			 * entry from their TLBs before the page is freed.
537 			 * We construct a list of the pages to be freed, zero
538 			 * the entries, then (below) call procflushseg, and call
539 			 * putpage on the whole list.
540 			 *
541 			 * Swapped-out pages don't appear in TLBs, so it's okay
542 			 * to putswap those pages before procflushseg.
543 			 */
544 			if(pg){
545 				if(onswap(pg))
546 					putswap(pg);
547 				else{
548 					pg->next = list;
549 					list = pg;
550 				}
551 				s->map[i]->pages[j] = 0;
552 			}
553 			if(--pages == 0)
554 				goto out;
555 			j++;
556 		}
557 		j = 0;
558 	}
559 out:
560 	/* flush this seg in all other processes */
561 	if(s->ref > 1)
562 		procflushseg(s);
563 
564 	/* free the pages */
565 	for(pg = list; pg != nil; pg = list){
566 		list = list->next;
567 		putpage(pg);
568 	}
569 }
570 
571 Segment*
572 isoverlap(Proc *p, ulong va, int len)
573 {
574 	int i;
575 	Segment *ns;
576 	ulong newtop;
577 
578 	newtop = va+len;
579 	for(i = 0; i < NSEG; i++) {
580 		ns = p->seg[i];
581 		if(ns == 0)
582 			continue;
583 		if((newtop > ns->base && newtop <= ns->top) ||
584 		   (va >= ns->base && va < ns->top))
585 			return ns;
586 	}
587 	return nil;
588 }
589 
590 int
591 addphysseg(Physseg* new)
592 {
593 	Physseg *ps;
594 
595 	/*
596 	 * Check not already entered and there is room
597 	 * for a new entry and the terminating null entry.
598 	 */
599 	lock(&physseglock);
600 	for(ps = physseg; ps->name; ps++){
601 		if(strcmp(ps->name, new->name) == 0){
602 			unlock(&physseglock);
603 			return -1;
604 		}
605 	}
606 	if(ps-physseg >= nelem(physseg)-2){
607 		unlock(&physseglock);
608 		return -1;
609 	}
610 
611 	*ps = *new;
612 	unlock(&physseglock);
613 
614 	return 0;
615 }
616 
617 int
618 isphysseg(char *name)
619 {
620 	Physseg *ps;
621 	int rv = 0;
622 
623 	lock(&physseglock);
624 	for(ps = physseg; ps->name; ps++){
625 		if(strcmp(ps->name, name) == 0){
626 			rv = 1;
627 			break;
628 		}
629 	}
630 	unlock(&physseglock);
631 	return rv;
632 }
633 
634 ulong
635 segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
636 {
637 	int sno;
638 	Segment *s, *os;
639 	Physseg *ps;
640 
641 	if(va != 0 && va >= USTKTOP)
642 		error(Ebadarg);
643 
644 	validaddr((ulong)name, 1, 0);
645 	vmemchr(name, 0, ~0);
646 
647 	for(sno = 0; sno < NSEG; sno++)
648 		if(p->seg[sno] == nil && sno != ESEG)
649 			break;
650 
651 	if(sno == NSEG)
652 		error(Enovmem);
653 
654 	/*
655 	 *  first look for a global segment with the
656 	 *  same name
657 	 */
658 	if(_globalsegattach != nil){
659 		s = (*_globalsegattach)(p, name);
660 		if(s != nil){
661 			p->seg[sno] = s;
662 			return s->base;
663 		}
664 	}
665 
666 	len = PGROUND(len);
667 	if(len == 0)
668 		error(Ebadarg);
669 
670 	/*
671 	 * Find a hole in the address space.
672 	 * Starting at the lowest possible stack address - len,
673 	 * check for an overlapping segment, and repeat at the
674 	 * base of that segment - len until either a hole is found
675 	 * or the address space is exhausted.
676 	 */
677 	if(va == 0) {
678 		va = p->seg[SSEG]->base - len;
679 		for(;;) {
680 			os = isoverlap(p, va, len);
681 			if(os == nil)
682 				break;
683 			va = os->base;
684 			if(len > va)
685 				error(Enovmem);
686 			va -= len;
687 		}
688 	}
689 
690 	va = va&~(BY2PG-1);
691 	if(isoverlap(p, va, len) != nil)
692 		error(Esoverlap);
693 
694 	for(ps = physseg; ps->name; ps++)
695 		if(strcmp(name, ps->name) == 0)
696 			goto found;
697 
698 	error(Ebadarg);
699 found:
700 	if(len > ps->size)
701 		error(Enovmem);
702 
703 	attr &= ~SG_TYPE;		/* Turn off what is not allowed */
704 	attr |= ps->attr;		/* Copy in defaults */
705 
706 	s = newseg(attr, va, len/BY2PG);
707 	s->pseg = ps;
708 	p->seg[sno] = s;
709 
710 	return va;
711 }
712 
713 void
714 pteflush(Pte *pte, int s, int e)
715 {
716 	int i;
717 	Page *p;
718 
719 	for(i = s; i < e; i++) {
720 		p = pte->pages[i];
721 		if(pagedout(p) == 0)
722 			memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
723 	}
724 }
725 
726 long
727 syssegflush(ulong *arg)
728 {
729 	Segment *s;
730 	ulong addr, l;
731 	Pte *pte;
732 	int chunk, ps, pe, len;
733 
734 	addr = arg[0];
735 	len = arg[1];
736 
737 	while(len > 0) {
738 		s = seg(up, addr, 1);
739 		if(s == 0)
740 			error(Ebadarg);
741 
742 		s->flushme = 1;
743 	more:
744 		l = len;
745 		if(addr+l > s->top)
746 			l = s->top - addr;
747 
748 		ps = addr-s->base;
749 		pte = s->map[ps/PTEMAPMEM];
750 		ps &= PTEMAPMEM-1;
751 		pe = PTEMAPMEM;
752 		if(pe-ps > l){
753 			pe = ps + l;
754 			pe = (pe+BY2PG-1)&~(BY2PG-1);
755 		}
756 		if(pe == ps) {
757 			qunlock(&s->lk);
758 			error(Ebadarg);
759 		}
760 
761 		if(pte)
762 			pteflush(pte, ps/BY2PG, pe/BY2PG);
763 
764 		chunk = pe-ps;
765 		len -= chunk;
766 		addr += chunk;
767 
768 		if(len > 0 && addr < s->top)
769 			goto more;
770 
771 		qunlock(&s->lk);
772 	}
773 	flushmmu();
774 	return 0;
775 }
776 
777 void
778 segclock(ulong pc)
779 {
780 	Segment *s;
781 
782 	s = up->seg[TSEG];
783 	if(s == 0 || s->profile == 0)
784 		return;
785 
786 	s->profile[0] += TK2MS(1);
787 	if(pc >= s->base && pc < s->top) {
788 		pc -= s->base;
789 		s->profile[pc>>LRESPROF] += TK2MS(1);
790 	}
791 }
792