xref: /plan9/sys/src/9/port/segment.c (revision f9e1cf08d3be51592e03e639fc848a68dc31a55e)
1 #include	"u.h"
2 #include	"../port/lib.h"
3 #include	"mem.h"
4 #include	"dat.h"
5 #include	"fns.h"
6 #include	"../port/error.h"
7 
8 static void	imagereclaim(void);
9 static void	imagechanreclaim(void);
10 
11 #include "io.h"
12 
13 /*
14  * Attachable segment types
15  */
16 static Physseg physseg[10] = {
17 	{ SG_SHARED,	"shared",	0,	SEGMAXSIZE,	0, 	0 },
18 	{ SG_BSS,	"memory",	0,	SEGMAXSIZE,	0,	0 },
19 	{ 0,		0,		0,	0,		0,	0 },
20 };
21 
22 static Lock physseglock;
23 
24 #define NFREECHAN	64
25 #define IHASHSIZE	64
26 #define ihash(s)	imagealloc.hash[s%IHASHSIZE]
27 static struct Imagealloc
28 {
29 	Lock;
30 	Image	*free;
31 	Image	*hash[IHASHSIZE];
32 	QLock	ireclaim;	/* mutex on reclaiming free images */
33 
34 	Chan	**freechan;	/* free image channels */
35 	int	nfreechan;	/* number of free channels */
36 	int	szfreechan;	/* size of freechan array */
37 	QLock	fcreclaim;	/* mutex on reclaiming free channels */
38 }imagealloc;
39 
40 Segment* (*_globalsegattach)(Proc*, char*);
41 
42 void
43 initseg(void)
44 {
45 	Image *i, *ie;
46 
47 	imagealloc.free = xalloc(conf.nimage*sizeof(Image));
48 	if (imagealloc.free == nil)
49 		panic("initseg: no memory");
50 	ie = &imagealloc.free[conf.nimage-1];
51 	for(i = imagealloc.free; i < ie; i++)
52 		i->next = i+1;
53 	i->next = 0;
54 	imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
55 	imagealloc.szfreechan = NFREECHAN;
56 }
57 
58 Segment *
59 newseg(int type, ulong base, ulong size)
60 {
61 	Segment *s;
62 	int mapsize;
63 
64 	if(size > (SEGMAPSIZE*PTEPERTAB))
65 		error(Enovmem);
66 
67 	if(swapfull())
68 		error(Enoswap);
69 	s = smalloc(sizeof(Segment));
70 	s->ref = 1;
71 	s->type = type;
72 	s->base = base;
73 	s->top = base+(size*BY2PG);
74 	s->size = size;
75 	s->sema.prev = &s->sema;
76 	s->sema.next = &s->sema;
77 
78 	mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
79 	if(mapsize > nelem(s->ssegmap)){
80 		mapsize *= 2;
81 		if(mapsize > (SEGMAPSIZE*PTEPERTAB))
82 			mapsize = (SEGMAPSIZE*PTEPERTAB);
83 		s->map = smalloc(mapsize*sizeof(Pte*));
84 		s->mapsize = mapsize;
85 	}
86 	else{
87 		s->map = s->ssegmap;
88 		s->mapsize = nelem(s->ssegmap);
89 	}
90 
91 	return s;
92 }
93 
94 void
95 putseg(Segment *s)
96 {
97 	Pte **pp, **emap;
98 	Image *i;
99 
100 	if(s == 0)
101 		return;
102 
103 	i = s->image;
104 	if(i != 0) {
105 		lock(i);
106 		lock(s);
107 		if(i->s == s && s->ref == 1)
108 			i->s = 0;
109 		unlock(i);
110 	}
111 	else
112 		lock(s);
113 
114 	s->ref--;
115 	if(s->ref != 0) {
116 		unlock(s);
117 		return;
118 	}
119 	unlock(s);
120 
121 	qlock(&s->lk);
122 	if(i)
123 		putimage(i);
124 
125 	emap = &s->map[s->mapsize];
126 	for(pp = s->map; pp < emap; pp++)
127 		if(*pp)
128 			freepte(s, *pp);
129 
130 	qunlock(&s->lk);
131 	if(s->map != s->ssegmap)
132 		free(s->map);
133 	if(s->profile != 0)
134 		free(s->profile);
135 	free(s);
136 }
137 
138 void
139 relocateseg(Segment *s, ulong offset)
140 {
141 	Page **pg, *x;
142 	Pte *pte, **p, **endpte;
143 
144 	endpte = &s->map[s->mapsize];
145 	for(p = s->map; p < endpte; p++) {
146 		if(*p == 0)
147 			continue;
148 		pte = *p;
149 		for(pg = pte->first; pg <= pte->last; pg++) {
150 			if(x = *pg)
151 				x->va += offset;
152 		}
153 	}
154 }
155 
156 Segment*
157 dupseg(Segment **seg, int segno, int share)
158 {
159 	int i, size;
160 	Pte *pte;
161 	Segment *n, *s;
162 
163 	SET(n);
164 	s = seg[segno];
165 
166 	qlock(&s->lk);
167 	if(waserror()){
168 		qunlock(&s->lk);
169 		nexterror();
170 	}
171 	switch(s->type&SG_TYPE) {
172 	case SG_TEXT:		/* New segment shares pte set */
173 	case SG_SHARED:
174 	case SG_PHYSICAL:
175 		goto sameseg;
176 
177 	case SG_STACK:
178 		n = newseg(s->type, s->base, s->size);
179 		break;
180 
181 	case SG_BSS:		/* Just copy on write */
182 		if(share)
183 			goto sameseg;
184 		n = newseg(s->type, s->base, s->size);
185 		break;
186 
187 	case SG_DATA:		/* Copy on write plus demand load info */
188 		if(segno == TSEG){
189 			poperror();
190 			qunlock(&s->lk);
191 			return data2txt(s);
192 		}
193 
194 		if(share)
195 			goto sameseg;
196 		n = newseg(s->type, s->base, s->size);
197 
198 		incref(s->image);
199 		n->image = s->image;
200 		n->fstart = s->fstart;
201 		n->flen = s->flen;
202 		break;
203 	}
204 	size = s->mapsize;
205 	for(i = 0; i < size; i++)
206 		if(pte = s->map[i])
207 			n->map[i] = ptecpy(pte);
208 
209 	n->flushme = s->flushme;
210 	if(s->ref > 1)
211 		procflushseg(s);
212 	poperror();
213 	qunlock(&s->lk);
214 	return n;
215 
216 sameseg:
217 	incref(s);
218 	poperror();
219 	qunlock(&s->lk);
220 	return s;
221 }
222 
223 void
224 segpage(Segment *s, Page *p)
225 {
226 	Pte **pte;
227 	ulong off;
228 	Page **pg;
229 
230 	if(p->va < s->base || p->va >= s->top)
231 		panic("segpage");
232 
233 	off = p->va - s->base;
234 	pte = &s->map[off/PTEMAPMEM];
235 	if(*pte == 0)
236 		*pte = ptealloc();
237 
238 	pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
239 	*pg = p;
240 	if(pg < (*pte)->first)
241 		(*pte)->first = pg;
242 	if(pg > (*pte)->last)
243 		(*pte)->last = pg;
244 }
245 
246 Image*
247 attachimage(int type, Chan *c, ulong base, ulong len)
248 {
249 	Image *i, **l;
250 
251 	/* reclaim any free channels from reclaimed segments */
252 	if(imagealloc.nfreechan)
253 		imagechanreclaim();
254 
255 	lock(&imagealloc);
256 
257 	/*
258 	 * Search the image cache for remains of the text from a previous
259 	 * or currently running incarnation
260 	 */
261 	for(i = ihash(c->qid.path); i; i = i->hash) {
262 		if(c->qid.path == i->qid.path) {
263 			lock(i);
264 			if(eqqid(c->qid, i->qid) &&
265 			   eqqid(c->mqid, i->mqid) &&
266 			   c->mchan == i->mchan &&
267 			   c->type == i->type) {
268 				goto found;
269 			}
270 			unlock(i);
271 		}
272 	}
273 
274 	/*
275 	 * imagereclaim dumps pages from the free list which are cached by image
276 	 * structures. This should free some image structures.
277 	 */
278 	while(!(i = imagealloc.free)) {
279 		unlock(&imagealloc);
280 		imagereclaim();
281 		sched();
282 		lock(&imagealloc);
283 	}
284 
285 	imagealloc.free = i->next;
286 
287 	lock(i);
288 	incref(c);
289 	i->c = c;
290 	i->type = c->type;
291 	i->qid = c->qid;
292 	i->mqid = c->mqid;
293 	i->mchan = c->mchan;
294 	l = &ihash(c->qid.path);
295 	i->hash = *l;
296 	*l = i;
297 found:
298 	unlock(&imagealloc);
299 
300 	if(i->s == 0) {
301 		/* Disaster after commit in exec */
302 		if(waserror()) {
303 			unlock(i);
304 			pexit(Enovmem, 1);
305 		}
306 		i->s = newseg(type, base, len);
307 		i->s->image = i;
308 		i->ref++;
309 		poperror();
310 	}
311 	else
312 		incref(i->s);
313 
314 	return i;
315 }
316 
317 static struct {
318 	int	calls;			/* times imagereclaim was called */
319 	int	loops;			/* times the main loop was run */
320 	uvlong	ticks;			/* total time in the main loop */
321 	uvlong	maxt;			/* longest time in main loop */
322 } irstats;
323 
324 static void
325 imagereclaim(void)
326 {
327 	int n;
328 	Page *p;
329 	uvlong ticks;
330 
331 	irstats.calls++;
332 	/* Somebody is already cleaning the page cache */
333 	if(!canqlock(&imagealloc.ireclaim))
334 		return;
335 
336 	lock(&palloc);
337 	ticks = fastticks(nil);
338 	n = 0;
339 	/*
340 	 * All the pages with images backing them are at the
341 	 * end of the list (see putpage) so start there and work
342 	 * backward.
343 	 */
344 	for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
345 		if(p->ref == 0 && canlock(p)) {
346 			if(p->ref == 0) {
347 				n++;
348 				uncachepage(p);
349 			}
350 			unlock(p);
351 		}
352 	}
353 	ticks = fastticks(nil) - ticks;
354 	unlock(&palloc);
355 	irstats.loops++;
356 	irstats.ticks += ticks;
357 	if(ticks > irstats.maxt)
358 		irstats.maxt = ticks;
359 	//print("T%llud+", ticks);
360 	qunlock(&imagealloc.ireclaim);
361 }
362 
363 /*
364  *  since close can block, this has to be called outside of
365  *  spin locks.
366  */
367 static void
368 imagechanreclaim(void)
369 {
370 	Chan *c;
371 
372 	/* Somebody is already cleaning the image chans */
373 	if(!canqlock(&imagealloc.fcreclaim))
374 		return;
375 
376 	/*
377 	 * We don't have to recheck that nfreechan > 0 after we
378 	 * acquire the lock, because we're the only ones who decrement
379 	 * it (the other lock contender increments it), and there's only
380 	 * one of us thanks to the qlock above.
381 	 */
382 	while(imagealloc.nfreechan > 0){
383 		lock(&imagealloc);
384 		imagealloc.nfreechan--;
385 		c = imagealloc.freechan[imagealloc.nfreechan];
386 		unlock(&imagealloc);
387 		cclose(c);
388 	}
389 
390 	qunlock(&imagealloc.fcreclaim);
391 }
392 
393 void
394 putimage(Image *i)
395 {
396 	Chan *c, **cp;
397 	Image *f, **l;
398 
399 	if(i->notext)
400 		return;
401 
402 	lock(i);
403 	if(--i->ref == 0) {
404 		l = &ihash(i->qid.path);
405 		mkqid(&i->qid, ~0, ~0, QTFILE);
406 		unlock(i);
407 		c = i->c;
408 
409 		lock(&imagealloc);
410 		for(f = *l; f; f = f->hash) {
411 			if(f == i) {
412 				*l = i->hash;
413 				break;
414 			}
415 			l = &f->hash;
416 		}
417 
418 		i->next = imagealloc.free;
419 		imagealloc.free = i;
420 
421 		/* defer freeing channel till we're out of spin lock's */
422 		if(imagealloc.nfreechan == imagealloc.szfreechan){
423 			imagealloc.szfreechan += NFREECHAN;
424 			cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
425 			if(cp == nil)
426 				panic("putimage");
427 			memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
428 			free(imagealloc.freechan);
429 			imagealloc.freechan = cp;
430 		}
431 		imagealloc.freechan[imagealloc.nfreechan++] = c;
432 		unlock(&imagealloc);
433 
434 		return;
435 	}
436 	unlock(i);
437 }
438 
439 long
440 ibrk(ulong addr, int seg)
441 {
442 	Segment *s, *ns;
443 	ulong newtop, newsize;
444 	int i, mapsize;
445 	Pte **map;
446 
447 	s = up->seg[seg];
448 	if(s == 0)
449 		error(Ebadarg);
450 
451 	if(addr == 0)
452 		return s->base;
453 
454 	qlock(&s->lk);
455 
456 	/* We may start with the bss overlapping the data */
457 	if(addr < s->base) {
458 		if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
459 			qunlock(&s->lk);
460 			error(Enovmem);
461 		}
462 		addr = s->base;
463 	}
464 
465 	newtop = PGROUND(addr);
466 	newsize = (newtop-s->base)/BY2PG;
467 	if(newtop < s->top) {
468 		mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
469 		s->top = newtop;
470 		s->size = newsize;
471 		qunlock(&s->lk);
472 		flushmmu();
473 		return 0;
474 	}
475 
476 	if(swapfull()){
477 		qunlock(&s->lk);
478 		error(Enoswap);
479 	}
480 
481 	for(i = 0; i < NSEG; i++) {
482 		ns = up->seg[i];
483 		if(ns == 0 || ns == s)
484 			continue;
485 		if(newtop >= ns->base && newtop < ns->top) {
486 			qunlock(&s->lk);
487 			error(Esoverlap);
488 		}
489 	}
490 
491 	if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
492 		qunlock(&s->lk);
493 		error(Enovmem);
494 	}
495 	mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
496 	if(mapsize > s->mapsize){
497 		map = smalloc(mapsize*sizeof(Pte*));
498 		memmove(map, s->map, s->mapsize*sizeof(Pte*));
499 		if(s->map != s->ssegmap)
500 			free(s->map);
501 		s->map = map;
502 		s->mapsize = mapsize;
503 	}
504 
505 	s->top = newtop;
506 	s->size = newsize;
507 	qunlock(&s->lk);
508 	return 0;
509 }
510 
511 /*
512  *  called with s->lk locked
513  */
514 void
515 mfreeseg(Segment *s, ulong start, int pages)
516 {
517 	int i, j, size;
518 	ulong soff;
519 	Page *pg;
520 	Page *list;
521 
522 	soff = start-s->base;
523 	j = (soff&(PTEMAPMEM-1))/BY2PG;
524 
525 	size = s->mapsize;
526 	list = nil;
527 	for(i = soff/PTEMAPMEM; i < size; i++) {
528 		if(pages <= 0)
529 			break;
530 		if(s->map[i] == 0) {
531 			pages -= PTEPERTAB-j;
532 			j = 0;
533 			continue;
534 		}
535 		while(j < PTEPERTAB) {
536 			pg = s->map[i]->pages[j];
537 			/*
538 			 * We want to zero s->map[i]->page[j] and putpage(pg),
539 			 * but we have to make sure other processors flush the
540 			 * entry from their TLBs before the page is freed.
541 			 * We construct a list of the pages to be freed, zero
542 			 * the entries, then (below) call procflushseg, and call
543 			 * putpage on the whole list.
544 			 *
545 			 * Swapped-out pages don't appear in TLBs, so it's okay
546 			 * to putswap those pages before procflushseg.
547 			 */
548 			if(pg){
549 				if(onswap(pg))
550 					putswap(pg);
551 				else{
552 					pg->next = list;
553 					list = pg;
554 				}
555 				s->map[i]->pages[j] = 0;
556 			}
557 			if(--pages == 0)
558 				goto out;
559 			j++;
560 		}
561 		j = 0;
562 	}
563 out:
564 	/* flush this seg in all other processes */
565 	if(s->ref > 1)
566 		procflushseg(s);
567 
568 	/* free the pages */
569 	for(pg = list; pg != nil; pg = list){
570 		list = list->next;
571 		putpage(pg);
572 	}
573 }
574 
575 Segment*
576 isoverlap(Proc *p, ulong va, int len)
577 {
578 	int i;
579 	Segment *ns;
580 	ulong newtop;
581 
582 	newtop = va+len;
583 	for(i = 0; i < NSEG; i++) {
584 		ns = p->seg[i];
585 		if(ns == 0)
586 			continue;
587 		if((newtop > ns->base && newtop <= ns->top) ||
588 		   (va >= ns->base && va < ns->top))
589 			return ns;
590 	}
591 	return nil;
592 }
593 
594 int
595 addphysseg(Physseg* new)
596 {
597 	Physseg *ps;
598 
599 	/*
600 	 * Check not already entered and there is room
601 	 * for a new entry and the terminating null entry.
602 	 */
603 	lock(&physseglock);
604 	for(ps = physseg; ps->name; ps++){
605 		if(strcmp(ps->name, new->name) == 0){
606 			unlock(&physseglock);
607 			return -1;
608 		}
609 	}
610 	if(ps-physseg >= nelem(physseg)-2){
611 		unlock(&physseglock);
612 		return -1;
613 	}
614 
615 	*ps = *new;
616 	unlock(&physseglock);
617 
618 	return 0;
619 }
620 
621 int
622 isphysseg(char *name)
623 {
624 	Physseg *ps;
625 	int rv = 0;
626 
627 	lock(&physseglock);
628 	for(ps = physseg; ps->name; ps++){
629 		if(strcmp(ps->name, name) == 0){
630 			rv = 1;
631 			break;
632 		}
633 	}
634 	unlock(&physseglock);
635 	return rv;
636 }
637 
638 ulong
639 segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
640 {
641 	int sno;
642 	Segment *s, *os;
643 	Physseg *ps;
644 
645 	if(va != 0 && va >= USTKTOP)
646 		error(Ebadarg);
647 
648 	validaddr((ulong)name, 1, 0);
649 	vmemchr(name, 0, ~0);
650 
651 	for(sno = 0; sno < NSEG; sno++)
652 		if(p->seg[sno] == nil && sno != ESEG)
653 			break;
654 
655 	if(sno == NSEG)
656 		error(Enovmem);
657 
658 	/*
659 	 *  first look for a global segment with the
660 	 *  same name
661 	 */
662 	if(_globalsegattach != nil){
663 		s = (*_globalsegattach)(p, name);
664 		if(s != nil){
665 			p->seg[sno] = s;
666 			return s->base;
667 		}
668 	}
669 
670 	len = PGROUND(len);
671 	if(len == 0)
672 		error(Ebadarg);
673 
674 	/*
675 	 * Find a hole in the address space.
676 	 * Starting at the lowest possible stack address - len,
677 	 * check for an overlapping segment, and repeat at the
678 	 * base of that segment - len until either a hole is found
679 	 * or the address space is exhausted.
680 	 */
681 	if(va == 0) {
682 		va = p->seg[SSEG]->base - len;
683 		for(;;) {
684 			os = isoverlap(p, va, len);
685 			if(os == nil)
686 				break;
687 			va = os->base;
688 			if(len > va)
689 				error(Enovmem);
690 			va -= len;
691 		}
692 	}
693 
694 	va = va&~(BY2PG-1);
695 	if(isoverlap(p, va, len) != nil)
696 		error(Esoverlap);
697 
698 	for(ps = physseg; ps->name; ps++)
699 		if(strcmp(name, ps->name) == 0)
700 			goto found;
701 
702 	error(Ebadarg);
703 found:
704 	if(len > ps->size)
705 		error(Enovmem);
706 
707 	attr &= ~SG_TYPE;		/* Turn off what is not allowed */
708 	attr |= ps->attr;		/* Copy in defaults */
709 
710 	s = newseg(attr, va, len/BY2PG);
711 	s->pseg = ps;
712 	p->seg[sno] = s;
713 
714 	return va;
715 }
716 
717 void
718 pteflush(Pte *pte, int s, int e)
719 {
720 	int i;
721 	Page *p;
722 
723 	for(i = s; i < e; i++) {
724 		p = pte->pages[i];
725 		if(pagedout(p) == 0)
726 			memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
727 	}
728 }
729 
730 long
731 syssegflush(ulong *arg)
732 {
733 	Segment *s;
734 	ulong addr, l;
735 	Pte *pte;
736 	int chunk, ps, pe, len;
737 
738 	addr = arg[0];
739 	len = arg[1];
740 
741 	while(len > 0) {
742 		s = seg(up, addr, 1);
743 		if(s == 0)
744 			error(Ebadarg);
745 
746 		s->flushme = 1;
747 	more:
748 		l = len;
749 		if(addr+l > s->top)
750 			l = s->top - addr;
751 
752 		ps = addr-s->base;
753 		pte = s->map[ps/PTEMAPMEM];
754 		ps &= PTEMAPMEM-1;
755 		pe = PTEMAPMEM;
756 		if(pe-ps > l){
757 			pe = ps + l;
758 			pe = (pe+BY2PG-1)&~(BY2PG-1);
759 		}
760 		if(pe == ps) {
761 			qunlock(&s->lk);
762 			error(Ebadarg);
763 		}
764 
765 		if(pte)
766 			pteflush(pte, ps/BY2PG, pe/BY2PG);
767 
768 		chunk = pe-ps;
769 		len -= chunk;
770 		addr += chunk;
771 
772 		if(len > 0 && addr < s->top)
773 			goto more;
774 
775 		qunlock(&s->lk);
776 	}
777 	flushmmu();
778 	return 0;
779 }
780 
781 void
782 segclock(ulong pc)
783 {
784 	Segment *s;
785 
786 	s = up->seg[TSEG];
787 	if(s == 0 || s->profile == 0)
788 		return;
789 
790 	s->profile[0] += TK2MS(1);
791 	if(pc >= s->base && pc < s->top) {
792 		pc -= s->base;
793 		s->profile[pc>>LRESPROF] += TK2MS(1);
794 	}
795 }
796 
797