xref: /plan9/sys/src/9/port/segment.c (revision d1be6b086622eecc0da76db1fbd64349a5e85293)
1 #include	"u.h"
2 #include	"../port/lib.h"
3 #include	"mem.h"
4 #include	"dat.h"
5 #include	"fns.h"
6 #include	"../port/error.h"
7 
8 static void	imagereclaim(void);
9 static void	imagechanreclaim(void);
10 
11 #include "io.h"
12 
13 /*
14  * Attachable segment types
15  */
16 static Physseg physseg[10] = {
17 	{ SG_SHARED,	"shared",	0,	SEGMAXSIZE,	0, 	0 },
18 	{ SG_BSS,	"memory",	0,	SEGMAXSIZE,	0,	0 },
19 	{ 0,		0,		0,	0,		0,	0 },
20 };
21 
22 static Lock physseglock;
23 
24 #define NFREECHAN	64
25 #define IHASHSIZE	64
26 #define ihash(s)	imagealloc.hash[s%IHASHSIZE]
27 static struct Imagealloc
28 {
29 	Lock;
30 	Image	*free;
31 	Image	*hash[IHASHSIZE];
32 	QLock	ireclaim;	/* mutex on reclaiming free images */
33 
34 	Chan	**freechan;	/* free image channels */
35 	int	nfreechan;	/* number of free channels */
36 	int	szfreechan;	/* size of freechan array */
37 	QLock	fcreclaim;	/* mutex on reclaiming free channels */
38 }imagealloc;
39 
40 Segment* (*_globalsegattach)(Proc*, char*);
41 
42 void
initseg(void)43 initseg(void)
44 {
45 	Image *i, *ie;
46 
47 	imagealloc.free = xalloc(conf.nimage*sizeof(Image));
48 	if (imagealloc.free == nil)
49 		panic("initseg: no memory");
50 	ie = &imagealloc.free[conf.nimage-1];
51 	for(i = imagealloc.free; i < ie; i++)
52 		i->next = i+1;
53 	i->next = 0;
54 	imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
55 	imagealloc.szfreechan = NFREECHAN;
56 }
57 
58 Segment *
newseg(int type,ulong base,ulong size)59 newseg(int type, ulong base, ulong size)
60 {
61 	Segment *s;
62 	int mapsize;
63 
64 	if(size > (SEGMAPSIZE*PTEPERTAB))
65 		error(Enovmem);
66 
67 	s = smalloc(sizeof(Segment));
68 	s->ref = 1;
69 	s->type = type;
70 	s->base = base;
71 	s->top = base+(size*BY2PG);
72 	s->size = size;
73 	s->sema.prev = &s->sema;
74 	s->sema.next = &s->sema;
75 
76 	mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
77 	if(mapsize > nelem(s->ssegmap)){
78 		mapsize *= 2;
79 		if(mapsize > (SEGMAPSIZE*PTEPERTAB))
80 			mapsize = (SEGMAPSIZE*PTEPERTAB);
81 		s->map = smalloc(mapsize*sizeof(Pte*));
82 		s->mapsize = mapsize;
83 	}
84 	else{
85 		s->map = s->ssegmap;
86 		s->mapsize = nelem(s->ssegmap);
87 	}
88 
89 	return s;
90 }
91 
92 void
putseg(Segment * s)93 putseg(Segment *s)
94 {
95 	Pte **pp, **emap;
96 	Image *i;
97 
98 	if(s == 0)
99 		return;
100 
101 	i = s->image;
102 	if(i != 0) {
103 		lock(i);
104 		lock(s);
105 		if(i->s == s && s->ref == 1)
106 			i->s = 0;
107 		unlock(i);
108 	}
109 	else
110 		lock(s);
111 
112 	s->ref--;
113 	if(s->ref != 0) {
114 		unlock(s);
115 		return;
116 	}
117 	unlock(s);
118 
119 	qlock(&s->lk);
120 	if(i)
121 		putimage(i);
122 
123 	emap = &s->map[s->mapsize];
124 	for(pp = s->map; pp < emap; pp++)
125 		if(*pp)
126 			freepte(s, *pp);
127 
128 	qunlock(&s->lk);
129 	if(s->map != s->ssegmap)
130 		free(s->map);
131 	if(s->profile != 0)
132 		free(s->profile);
133 	free(s);
134 }
135 
136 void
relocateseg(Segment * s,ulong offset)137 relocateseg(Segment *s, ulong offset)
138 {
139 	Page **pg, *x;
140 	Pte *pte, **p, **endpte;
141 
142 	endpte = &s->map[s->mapsize];
143 	for(p = s->map; p < endpte; p++) {
144 		if(*p == 0)
145 			continue;
146 		pte = *p;
147 		for(pg = pte->first; pg <= pte->last; pg++) {
148 			if(x = *pg)
149 				x->va += offset;
150 		}
151 	}
152 }
153 
154 Segment*
dupseg(Segment ** seg,int segno,int share)155 dupseg(Segment **seg, int segno, int share)
156 {
157 	int i, size;
158 	Pte *pte;
159 	Segment *n, *s;
160 
161 	SET(n);
162 	s = seg[segno];
163 
164 	qlock(&s->lk);
165 	if(waserror()){
166 		qunlock(&s->lk);
167 		nexterror();
168 	}
169 	switch(s->type&SG_TYPE) {
170 	case SG_TEXT:		/* New segment shares pte set */
171 	case SG_SHARED:
172 	case SG_PHYSICAL:
173 		goto sameseg;
174 
175 	case SG_STACK:
176 		n = newseg(s->type, s->base, s->size);
177 		break;
178 
179 	case SG_BSS:		/* Just copy on write */
180 		if(share)
181 			goto sameseg;
182 		n = newseg(s->type, s->base, s->size);
183 		break;
184 
185 	case SG_DATA:		/* Copy on write plus demand load info */
186 		if(segno == TSEG){
187 			poperror();
188 			qunlock(&s->lk);
189 			return data2txt(s);
190 		}
191 
192 		if(share)
193 			goto sameseg;
194 		n = newseg(s->type, s->base, s->size);
195 
196 		incref(s->image);
197 		n->image = s->image;
198 		n->fstart = s->fstart;
199 		n->flen = s->flen;
200 		break;
201 	}
202 	size = s->mapsize;
203 	for(i = 0; i < size; i++)
204 		if(pte = s->map[i])
205 			n->map[i] = ptecpy(pte);
206 
207 	n->flushme = s->flushme;
208 	if(s->ref > 1)
209 		procflushseg(s);
210 	poperror();
211 	qunlock(&s->lk);
212 	return n;
213 
214 sameseg:
215 	incref(s);
216 	poperror();
217 	qunlock(&s->lk);
218 	return s;
219 }
220 
221 void
segpage(Segment * s,Page * p)222 segpage(Segment *s, Page *p)
223 {
224 	Pte **pte;
225 	ulong off;
226 	Page **pg;
227 
228 	if(p->va < s->base || p->va >= s->top)
229 		panic("segpage");
230 
231 	off = p->va - s->base;
232 	pte = &s->map[off/PTEMAPMEM];
233 	if(*pte == 0)
234 		*pte = ptealloc();
235 
236 	pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
237 	*pg = p;
238 	if(pg < (*pte)->first)
239 		(*pte)->first = pg;
240 	if(pg > (*pte)->last)
241 		(*pte)->last = pg;
242 }
243 
244 Image*
attachimage(int type,Chan * c,ulong base,ulong len)245 attachimage(int type, Chan *c, ulong base, ulong len)
246 {
247 	Image *i, **l;
248 
249 	/* reclaim any free channels from reclaimed segments */
250 	if(imagealloc.nfreechan)
251 		imagechanreclaim();
252 
253 	lock(&imagealloc);
254 
255 	/*
256 	 * Search the image cache for remains of the text from a previous
257 	 * or currently running incarnation
258 	 */
259 	for(i = ihash(c->qid.path); i; i = i->hash) {
260 		if(c->qid.path == i->qid.path) {
261 			lock(i);
262 			if(eqqid(c->qid, i->qid) &&
263 			   eqqid(c->mqid, i->mqid) &&
264 			   c->mchan == i->mchan &&
265 			   c->type == i->type) {
266 				goto found;
267 			}
268 			unlock(i);
269 		}
270 	}
271 
272 	/*
273 	 * imagereclaim dumps pages from the free list which are cached by image
274 	 * structures. This should free some image structures.
275 	 */
276 	while(!(i = imagealloc.free)) {
277 		unlock(&imagealloc);
278 		imagereclaim();
279 		sched();
280 		lock(&imagealloc);
281 	}
282 
283 	imagealloc.free = i->next;
284 
285 	lock(i);
286 	incref(c);
287 	i->c = c;
288 	i->type = c->type;
289 	i->qid = c->qid;
290 	i->mqid = c->mqid;
291 	i->mchan = c->mchan;
292 	l = &ihash(c->qid.path);
293 	i->hash = *l;
294 	*l = i;
295 found:
296 	unlock(&imagealloc);
297 
298 	if(i->s == 0) {
299 		/* Disaster after commit in exec */
300 		if(waserror()) {
301 			unlock(i);
302 			pexit(Enovmem, 1);
303 		}
304 		i->s = newseg(type, base, len);
305 		i->s->image = i;
306 		i->ref++;
307 		poperror();
308 	}
309 	else
310 		incref(i->s);
311 
312 	return i;
313 }
314 
315 static struct {
316 	int	calls;			/* times imagereclaim was called */
317 	int	loops;			/* times the main loop was run */
318 	uvlong	ticks;			/* total time in the main loop */
319 	uvlong	maxt;			/* longest time in main loop */
320 } irstats;
321 
322 static void
imagereclaim(void)323 imagereclaim(void)
324 {
325 	int n;
326 	Page *p;
327 	uvlong ticks;
328 
329 	irstats.calls++;
330 	/* Somebody is already cleaning the page cache */
331 	if(!canqlock(&imagealloc.ireclaim))
332 		return;
333 
334 	lock(&palloc);
335 	ticks = fastticks(nil);
336 	n = 0;
337 	/*
338 	 * All the pages with images backing them are at the
339 	 * end of the list (see putpage) so start there and work
340 	 * backward.
341 	 */
342 	for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
343 		if(p->ref == 0 && canlock(p)) {
344 			if(p->ref == 0) {
345 				n++;
346 				uncachepage(p);
347 			}
348 			unlock(p);
349 		}
350 	}
351 	ticks = fastticks(nil) - ticks;
352 	unlock(&palloc);
353 	irstats.loops++;
354 	irstats.ticks += ticks;
355 	if(ticks > irstats.maxt)
356 		irstats.maxt = ticks;
357 	//print("T%llud+", ticks);
358 	qunlock(&imagealloc.ireclaim);
359 }
360 
361 /*
362  *  since close can block, this has to be called outside of
363  *  spin locks.
364  */
365 static void
imagechanreclaim(void)366 imagechanreclaim(void)
367 {
368 	Chan *c;
369 
370 	/* Somebody is already cleaning the image chans */
371 	if(!canqlock(&imagealloc.fcreclaim))
372 		return;
373 
374 	/*
375 	 * We don't have to recheck that nfreechan > 0 after we
376 	 * acquire the lock, because we're the only ones who decrement
377 	 * it (the other lock contender increments it), and there's only
378 	 * one of us thanks to the qlock above.
379 	 */
380 	while(imagealloc.nfreechan > 0){
381 		lock(&imagealloc);
382 		imagealloc.nfreechan--;
383 		c = imagealloc.freechan[imagealloc.nfreechan];
384 		unlock(&imagealloc);
385 		cclose(c);
386 	}
387 
388 	qunlock(&imagealloc.fcreclaim);
389 }
390 
391 void
putimage(Image * i)392 putimage(Image *i)
393 {
394 	Chan *c, **cp;
395 	Image *f, **l;
396 
397 	if(i->notext)
398 		return;
399 
400 	lock(i);
401 	if(--i->ref == 0) {
402 		l = &ihash(i->qid.path);
403 		mkqid(&i->qid, ~0, ~0, QTFILE);
404 		unlock(i);
405 		c = i->c;
406 
407 		lock(&imagealloc);
408 		for(f = *l; f; f = f->hash) {
409 			if(f == i) {
410 				*l = i->hash;
411 				break;
412 			}
413 			l = &f->hash;
414 		}
415 
416 		i->next = imagealloc.free;
417 		imagealloc.free = i;
418 
419 		/* defer freeing channel till we're out of spin lock's */
420 		if(imagealloc.nfreechan == imagealloc.szfreechan){
421 			imagealloc.szfreechan += NFREECHAN;
422 			cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
423 			if(cp == nil)
424 				panic("putimage");
425 			memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
426 			free(imagealloc.freechan);
427 			imagealloc.freechan = cp;
428 		}
429 		imagealloc.freechan[imagealloc.nfreechan++] = c;
430 		unlock(&imagealloc);
431 
432 		return;
433 	}
434 	unlock(i);
435 }
436 
437 long
ibrk(ulong addr,int seg)438 ibrk(ulong addr, int seg)
439 {
440 	Segment *s, *ns;
441 	ulong newtop, newsize;
442 	int i, mapsize;
443 	Pte **map;
444 
445 	s = up->seg[seg];
446 	if(s == 0)
447 		error(Ebadarg);
448 
449 	if(addr == 0)
450 		return s->base;
451 
452 	qlock(&s->lk);
453 
454 	/* We may start with the bss overlapping the data */
455 	if(addr < s->base) {
456 		if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
457 			qunlock(&s->lk);
458 			error(Enovmem);
459 		}
460 		addr = s->base;
461 	}
462 
463 	newtop = PGROUND(addr);
464 	newsize = (newtop-s->base)/BY2PG;
465 	if(newtop < s->top) {
466 		/*
467 		 * do not shrink a segment shared with other procs, as the
468 		 * to-be-freed address space may have been passed to the kernel
469 		 * already by another proc and is past the validaddr stage.
470 		 */
471 		if(s->ref > 1){
472 			qunlock(&s->lk);
473 			error(Einuse);
474 		}
475 		mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
476 		s->top = newtop;
477 		s->size = newsize;
478 		qunlock(&s->lk);
479 		flushmmu();
480 		return 0;
481 	}
482 
483 	for(i = 0; i < NSEG; i++) {
484 		ns = up->seg[i];
485 		if(ns == 0 || ns == s)
486 			continue;
487 		if(newtop >= ns->base && newtop < ns->top) {
488 			qunlock(&s->lk);
489 			error(Esoverlap);
490 		}
491 	}
492 
493 	if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
494 		qunlock(&s->lk);
495 		error(Enovmem);
496 	}
497 	mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
498 	if(mapsize > s->mapsize){
499 		map = smalloc(mapsize*sizeof(Pte*));
500 		memmove(map, s->map, s->mapsize*sizeof(Pte*));
501 		if(s->map != s->ssegmap)
502 			free(s->map);
503 		s->map = map;
504 		s->mapsize = mapsize;
505 	}
506 
507 	s->top = newtop;
508 	s->size = newsize;
509 	qunlock(&s->lk);
510 	return 0;
511 }
512 
513 /*
514  *  called with s->lk locked
515  */
516 void
mfreeseg(Segment * s,ulong start,int pages)517 mfreeseg(Segment *s, ulong start, int pages)
518 {
519 	int i, j, size;
520 	ulong soff;
521 	Page *pg;
522 	Page *list;
523 
524 	soff = start-s->base;
525 	j = (soff&(PTEMAPMEM-1))/BY2PG;
526 
527 	size = s->mapsize;
528 	list = nil;
529 	for(i = soff/PTEMAPMEM; i < size; i++) {
530 		if(pages <= 0)
531 			break;
532 		if(s->map[i] == 0) {
533 			pages -= PTEPERTAB-j;
534 			j = 0;
535 			continue;
536 		}
537 		while(j < PTEPERTAB) {
538 			pg = s->map[i]->pages[j];
539 			/*
540 			 * We want to zero s->map[i]->page[j] and putpage(pg),
541 			 * but we have to make sure other processors flush the
542 			 * entry from their TLBs before the page is freed.
543 			 * We construct a list of the pages to be freed, zero
544 			 * the entries, then (below) call procflushseg, and call
545 			 * putpage on the whole list.
546 			 *
547 			 * Swapped-out pages don't appear in TLBs, so it's okay
548 			 * to putswap those pages before procflushseg.
549 			 */
550 			if(pg){
551 				if(onswap(pg))
552 					putswap(pg);
553 				else{
554 					pg->next = list;
555 					list = pg;
556 				}
557 				s->map[i]->pages[j] = 0;
558 			}
559 			if(--pages == 0)
560 				goto out;
561 			j++;
562 		}
563 		j = 0;
564 	}
565 out:
566 	/* flush this seg in all other processes */
567 	if(s->ref > 1)
568 		procflushseg(s);
569 
570 	/* free the pages */
571 	for(pg = list; pg != nil; pg = list){
572 		list = list->next;
573 		putpage(pg);
574 	}
575 }
576 
577 Segment*
isoverlap(Proc * p,ulong va,int len)578 isoverlap(Proc *p, ulong va, int len)
579 {
580 	int i;
581 	Segment *ns;
582 	ulong newtop;
583 
584 	newtop = va+len;
585 	for(i = 0; i < NSEG; i++) {
586 		ns = p->seg[i];
587 		if(ns == 0)
588 			continue;
589 		if((newtop > ns->base && newtop <= ns->top) ||
590 		   (va >= ns->base && va < ns->top))
591 			return ns;
592 	}
593 	return nil;
594 }
595 
596 int
addphysseg(Physseg * new)597 addphysseg(Physseg* new)
598 {
599 	Physseg *ps;
600 
601 	/*
602 	 * Check not already entered and there is room
603 	 * for a new entry and the terminating null entry.
604 	 */
605 	lock(&physseglock);
606 	for(ps = physseg; ps->name; ps++){
607 		if(strcmp(ps->name, new->name) == 0){
608 			unlock(&physseglock);
609 			return -1;
610 		}
611 	}
612 	if(ps-physseg >= nelem(physseg)-2){
613 		unlock(&physseglock);
614 		return -1;
615 	}
616 
617 	*ps = *new;
618 	unlock(&physseglock);
619 
620 	return 0;
621 }
622 
623 int
isphysseg(char * name)624 isphysseg(char *name)
625 {
626 	Physseg *ps;
627 	int rv = 0;
628 
629 	lock(&physseglock);
630 	for(ps = physseg; ps->name; ps++){
631 		if(strcmp(ps->name, name) == 0){
632 			rv = 1;
633 			break;
634 		}
635 	}
636 	unlock(&physseglock);
637 	return rv;
638 }
639 
640 ulong
segattach(Proc * p,ulong attr,char * name,ulong va,ulong len)641 segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
642 {
643 	int sno;
644 	Segment *s, *os;
645 	Physseg *ps;
646 
647 	if(va != 0 && va >= USTKTOP)
648 		error(Ebadarg);
649 
650 	validaddr((ulong)name, 1, 0);
651 	vmemchr(name, 0, ~0);
652 
653 	for(sno = 0; sno < NSEG; sno++)
654 		if(p->seg[sno] == nil && sno != ESEG)
655 			break;
656 
657 	if(sno == NSEG)
658 		error(Enovmem);
659 
660 	/*
661 	 *  first look for a global segment with the
662 	 *  same name
663 	 */
664 	if(_globalsegattach != nil){
665 		s = (*_globalsegattach)(p, name);
666 		if(s != nil){
667 			p->seg[sno] = s;
668 			return s->base;
669 		}
670 	}
671 
672 	len = PGROUND(len);
673 	if(len == 0)
674 		error(Ebadarg);
675 
676 	/*
677 	 * Find a hole in the address space.
678 	 * Starting at the lowest possible stack address - len,
679 	 * check for an overlapping segment, and repeat at the
680 	 * base of that segment - len until either a hole is found
681 	 * or the address space is exhausted.  Ensure that we don't
682 	 * map the zero page.
683 	 */
684 	if(va == 0) {
685 		for (os = p->seg[SSEG]; os != nil; os = isoverlap(p, va, len)) {
686 			va = os->base;
687 			if(len >= va)
688 				error(Enovmem);
689 			va -= len;
690 		}
691 		va &= ~(BY2PG-1);
692 	} else {
693 		va &= ~(BY2PG-1);
694 		if(va == 0 || va >= USTKTOP)
695 			error(Ebadarg);
696 	}
697 
698 	if(isoverlap(p, va, len) != nil)
699 		error(Esoverlap);
700 
701 	for(ps = physseg; ps->name; ps++)
702 		if(strcmp(name, ps->name) == 0)
703 			goto found;
704 
705 	error(Ebadarg);
706 found:
707 	if(len > ps->size)
708 		error(Enovmem);
709 
710 	attr &= ~SG_TYPE;		/* Turn off what is not allowed */
711 	attr |= ps->attr;		/* Copy in defaults */
712 
713 	s = newseg(attr, va, len/BY2PG);
714 	s->pseg = ps;
715 	p->seg[sno] = s;
716 
717 	return va;
718 }
719 
720 void
pteflush(Pte * pte,int s,int e)721 pteflush(Pte *pte, int s, int e)
722 {
723 	int i;
724 	Page *p;
725 
726 	for(i = s; i < e; i++) {
727 		p = pte->pages[i];
728 		if(pagedout(p) == 0)
729 			memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
730 	}
731 }
732 
733 long
syssegflush(ulong * arg)734 syssegflush(ulong *arg)
735 {
736 	Segment *s;
737 	ulong addr, l;
738 	Pte *pte;
739 	int chunk, ps, pe, len;
740 
741 	addr = arg[0];
742 	len = arg[1];
743 
744 	while(len > 0) {
745 		s = seg(up, addr, 1);
746 		if(s == 0)
747 			error(Ebadarg);
748 
749 		s->flushme = 1;
750 	more:
751 		l = len;
752 		if(addr+l > s->top)
753 			l = s->top - addr;
754 
755 		ps = addr-s->base;
756 		pte = s->map[ps/PTEMAPMEM];
757 		ps &= PTEMAPMEM-1;
758 		pe = PTEMAPMEM;
759 		if(pe-ps > l){
760 			pe = ps + l;
761 			pe = (pe+BY2PG-1)&~(BY2PG-1);
762 		}
763 		if(pe == ps) {
764 			qunlock(&s->lk);
765 			error(Ebadarg);
766 		}
767 
768 		if(pte)
769 			pteflush(pte, ps/BY2PG, pe/BY2PG);
770 
771 		chunk = pe-ps;
772 		len -= chunk;
773 		addr += chunk;
774 
775 		if(len > 0 && addr < s->top)
776 			goto more;
777 
778 		qunlock(&s->lk);
779 	}
780 	flushmmu();
781 	return 0;
782 }
783 
784 void
segclock(ulong pc)785 segclock(ulong pc)
786 {
787 	Segment *s;
788 
789 	s = up->seg[TSEG];
790 	if(s == 0 || s->profile == 0)
791 		return;
792 
793 	s->profile[0] += TK2MS(1);
794 	if(pc >= s->base && pc < s->top) {
795 		pc -= s->base;
796 		s->profile[pc>>LRESPROF] += TK2MS(1);
797 	}
798 }
799 
800