1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7
8 static void imagereclaim(void);
9 static void imagechanreclaim(void);
10
11 #include "io.h"
12
13 /*
14 * Attachable segment types
15 */
16 static Physseg physseg[10] = {
17 { SG_SHARED, "shared", 0, SEGMAXSIZE, 0, 0 },
18 { SG_BSS, "memory", 0, SEGMAXSIZE, 0, 0 },
19 { 0, 0, 0, 0, 0, 0 },
20 };
21
22 static Lock physseglock;
23
24 #define NFREECHAN 64
25 #define IHASHSIZE 64
26 #define ihash(s) imagealloc.hash[s%IHASHSIZE]
27 static struct Imagealloc
28 {
29 Lock;
30 Image *free;
31 Image *hash[IHASHSIZE];
32 QLock ireclaim; /* mutex on reclaiming free images */
33
34 Chan **freechan; /* free image channels */
35 int nfreechan; /* number of free channels */
36 int szfreechan; /* size of freechan array */
37 QLock fcreclaim; /* mutex on reclaiming free channels */
38 }imagealloc;
39
40 Segment* (*_globalsegattach)(Proc*, char*);
41
42 void
initseg(void)43 initseg(void)
44 {
45 Image *i, *ie;
46
47 imagealloc.free = xalloc(conf.nimage*sizeof(Image));
48 if (imagealloc.free == nil)
49 panic("initseg: no memory");
50 ie = &imagealloc.free[conf.nimage-1];
51 for(i = imagealloc.free; i < ie; i++)
52 i->next = i+1;
53 i->next = 0;
54 imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
55 imagealloc.szfreechan = NFREECHAN;
56 }
57
58 Segment *
newseg(int type,ulong base,ulong size)59 newseg(int type, ulong base, ulong size)
60 {
61 Segment *s;
62 int mapsize;
63
64 if(size > (SEGMAPSIZE*PTEPERTAB))
65 error(Enovmem);
66
67 s = smalloc(sizeof(Segment));
68 s->ref = 1;
69 s->type = type;
70 s->base = base;
71 s->top = base+(size*BY2PG);
72 s->size = size;
73 s->sema.prev = &s->sema;
74 s->sema.next = &s->sema;
75
76 mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
77 if(mapsize > nelem(s->ssegmap)){
78 mapsize *= 2;
79 if(mapsize > SEGMAPSIZE)
80 mapsize = SEGMAPSIZE;
81 s->map = smalloc(mapsize*sizeof(Pte*));
82 s->mapsize = mapsize;
83 }
84 else{
85 s->map = s->ssegmap;
86 s->mapsize = nelem(s->ssegmap);
87 }
88
89 return s;
90 }
91
92 void
putseg(Segment * s)93 putseg(Segment *s)
94 {
95 Pte **pp, **emap;
96 Image *i;
97
98 if(s == 0)
99 return;
100
101 i = s->image;
102 if(i != 0) {
103 lock(i);
104 lock(s);
105 if(i->s == s && s->ref == 1)
106 i->s = 0;
107 unlock(i);
108 }
109 else
110 lock(s);
111
112 s->ref--;
113 if(s->ref != 0) {
114 unlock(s);
115 return;
116 }
117 unlock(s);
118
119 qlock(&s->lk);
120 if(i)
121 putimage(i);
122
123 emap = &s->map[s->mapsize];
124 for(pp = s->map; pp < emap; pp++)
125 if(*pp)
126 freepte(s, *pp);
127
128 qunlock(&s->lk);
129 if(s->map != s->ssegmap)
130 free(s->map);
131 if(s->profile != 0)
132 free(s->profile);
133 free(s);
134 }
135
136 void
relocateseg(Segment * s,ulong offset)137 relocateseg(Segment *s, ulong offset)
138 {
139 Page **pg, *x;
140 Pte *pte, **p, **endpte;
141
142 endpte = &s->map[s->mapsize];
143 for(p = s->map; p < endpte; p++) {
144 if(*p == 0)
145 continue;
146 pte = *p;
147 for(pg = pte->first; pg <= pte->last; pg++) {
148 if(x = *pg)
149 x->va += offset;
150 }
151 }
152 }
153
154 Segment*
dupseg(Segment ** seg,int segno,int share)155 dupseg(Segment **seg, int segno, int share)
156 {
157 int i, size;
158 Pte *pte;
159 Segment *n, *s;
160
161 SET(n);
162 s = seg[segno];
163
164 qlock(&s->lk);
165 if(waserror()){
166 qunlock(&s->lk);
167 nexterror();
168 }
169 switch(s->type&SG_TYPE) {
170 case SG_TEXT: /* New segment shares pte set */
171 case SG_SHARED:
172 case SG_PHYSICAL:
173 goto sameseg;
174
175 case SG_STACK:
176 n = newseg(s->type, s->base, s->size);
177 break;
178
179 case SG_BSS: /* Just copy on write */
180 if(share)
181 goto sameseg;
182 n = newseg(s->type, s->base, s->size);
183 break;
184
185 case SG_DATA: /* Copy on write plus demand load info */
186 if(segno == TSEG){
187 poperror();
188 qunlock(&s->lk);
189 return data2txt(s);
190 }
191
192 if(share)
193 goto sameseg;
194 n = newseg(s->type, s->base, s->size);
195
196 incref(s->image);
197 n->image = s->image;
198 n->fstart = s->fstart;
199 n->flen = s->flen;
200 break;
201 }
202 size = s->mapsize;
203 for(i = 0; i < size; i++)
204 if(pte = s->map[i])
205 n->map[i] = ptecpy(pte);
206
207 n->flushme = s->flushme;
208 if(s->ref > 1)
209 procflushseg(s);
210 poperror();
211 qunlock(&s->lk);
212 return n;
213
214 sameseg:
215 incref(s);
216 poperror();
217 qunlock(&s->lk);
218 return s;
219 }
220
221 void
segpage(Segment * s,Page * p)222 segpage(Segment *s, Page *p)
223 {
224 Pte **pte;
225 ulong off;
226 Page **pg;
227
228 if(p->va < s->base || p->va >= s->top)
229 panic("segpage");
230
231 off = p->va - s->base;
232 pte = &s->map[off/PTEMAPMEM];
233 if(*pte == 0)
234 *pte = ptealloc();
235
236 pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
237 *pg = p;
238 if(pg < (*pte)->first)
239 (*pte)->first = pg;
240 if(pg > (*pte)->last)
241 (*pte)->last = pg;
242 }
243
244 Image*
attachimage(int type,Chan * c,ulong base,ulong len)245 attachimage(int type, Chan *c, ulong base, ulong len)
246 {
247 Image *i, **l;
248
249 /* reclaim any free channels from reclaimed segments */
250 if(imagealloc.nfreechan)
251 imagechanreclaim();
252
253 lock(&imagealloc);
254
255 /*
256 * Search the image cache for remains of the text from a previous
257 * or currently running incarnation
258 */
259 for(i = ihash(c->qid.path); i; i = i->hash) {
260 if(c->qid.path == i->qid.path) {
261 lock(i);
262 if(eqqid(c->qid, i->qid) &&
263 eqqid(c->mqid, i->mqid) &&
264 c->mchan == i->mchan &&
265 c->type == i->type) {
266 goto found;
267 }
268 unlock(i);
269 }
270 }
271
272 /*
273 * imagereclaim dumps pages from the free list which are cached by image
274 * structures. This should free some image structures.
275 */
276 while(!(i = imagealloc.free)) {
277 unlock(&imagealloc);
278 imagereclaim();
279 sched();
280 lock(&imagealloc);
281 }
282
283 imagealloc.free = i->next;
284
285 lock(i);
286 incref(c);
287 i->c = c;
288 i->type = c->type;
289 i->qid = c->qid;
290 i->mqid = c->mqid;
291 i->mchan = c->mchan;
292 l = &ihash(c->qid.path);
293 i->hash = *l;
294 *l = i;
295 found:
296 unlock(&imagealloc);
297
298 if(i->s == 0) {
299 /* Disaster after commit in exec */
300 if(waserror()) {
301 unlock(i);
302 pexit(Enovmem, 1);
303 }
304 i->s = newseg(type, base, len);
305 i->s->image = i;
306 i->ref++;
307 poperror();
308 }
309 else
310 incref(i->s);
311
312 return i;
313 }
314
315 static struct {
316 int calls; /* times imagereclaim was called */
317 int loops; /* times the main loop was run */
318 uvlong ticks; /* total time in the main loop */
319 uvlong maxt; /* longest time in main loop */
320 } irstats;
321
322 static void
imagereclaim(void)323 imagereclaim(void)
324 {
325 int n;
326 Page *p;
327 uvlong ticks;
328
329 irstats.calls++;
330 /* Somebody is already cleaning the page cache */
331 if(!canqlock(&imagealloc.ireclaim))
332 return;
333
334 lock(&palloc);
335 ticks = fastticks(nil);
336 n = 0;
337 /*
338 * All the pages with images backing them are at the
339 * end of the list (see putpage) so start there and work
340 * backward.
341 */
342 for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
343 if(p->ref == 0 && canlock(p)) {
344 if(p->ref == 0) {
345 n++;
346 uncachepage(p);
347 pageunchain(p);
348 pagechainhead(p);
349 }
350 unlock(p);
351 }
352 }
353 ticks = fastticks(nil) - ticks;
354 unlock(&palloc);
355 irstats.loops++;
356 irstats.ticks += ticks;
357 if(ticks > irstats.maxt)
358 irstats.maxt = ticks;
359 //print("T%llud+", ticks);
360 qunlock(&imagealloc.ireclaim);
361 }
362
363 /*
364 * since close can block, this has to be called outside of
365 * spin locks.
366 */
367 static void
imagechanreclaim(void)368 imagechanreclaim(void)
369 {
370 Chan *c;
371
372 /* Somebody is already cleaning the image chans */
373 if(!canqlock(&imagealloc.fcreclaim))
374 return;
375
376 /*
377 * We don't have to recheck that nfreechan > 0 after we
378 * acquire the lock, because we're the only ones who decrement
379 * it (the other lock contender increments it), and there's only
380 * one of us thanks to the qlock above.
381 */
382 while(imagealloc.nfreechan > 0){
383 lock(&imagealloc);
384 imagealloc.nfreechan--;
385 c = imagealloc.freechan[imagealloc.nfreechan];
386 unlock(&imagealloc);
387 cclose(c);
388 }
389
390 qunlock(&imagealloc.fcreclaim);
391 }
392
393 void
putimage(Image * i)394 putimage(Image *i)
395 {
396 Chan *c, **cp;
397 Image *f, **l;
398
399 if(i->notext)
400 return;
401
402 lock(i);
403 if(--i->ref == 0) {
404 l = &ihash(i->qid.path);
405 mkqid(&i->qid, ~0, ~0, QTFILE);
406 unlock(i);
407 c = i->c;
408
409 lock(&imagealloc);
410 for(f = *l; f; f = f->hash) {
411 if(f == i) {
412 *l = i->hash;
413 break;
414 }
415 l = &f->hash;
416 }
417
418 i->next = imagealloc.free;
419 imagealloc.free = i;
420
421 /* defer freeing channel till we're out of spin lock's */
422 if(imagealloc.nfreechan == imagealloc.szfreechan){
423 imagealloc.szfreechan += NFREECHAN;
424 cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
425 if(cp == nil)
426 panic("putimage");
427 memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
428 free(imagealloc.freechan);
429 imagealloc.freechan = cp;
430 }
431 imagealloc.freechan[imagealloc.nfreechan++] = c;
432 unlock(&imagealloc);
433
434 return;
435 }
436 unlock(i);
437 }
438
439 long
ibrk(ulong addr,int seg)440 ibrk(ulong addr, int seg)
441 {
442 Segment *s, *ns;
443 ulong newtop, newsize;
444 int i, mapsize;
445 Pte **map;
446
447 s = up->seg[seg];
448 if(s == 0)
449 error(Ebadarg);
450
451 if(addr == 0)
452 return s->base;
453
454 qlock(&s->lk);
455
456 /* We may start with the bss overlapping the data */
457 if(addr < s->base) {
458 if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
459 qunlock(&s->lk);
460 error(Enovmem);
461 }
462 addr = s->base;
463 }
464
465 newtop = PGROUND(addr);
466 newsize = (newtop-s->base)/BY2PG;
467 if(newtop < s->top) {
468 /*
469 * do not shrink a segment shared with other procs, as the
470 * to-be-freed address space may have been passed to the kernel
471 * already by another proc and is past the validaddr stage.
472 */
473 if(s->ref > 1){
474 qunlock(&s->lk);
475 error(Einuse);
476 }
477 mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
478 s->top = newtop;
479 s->size = newsize;
480 qunlock(&s->lk);
481 flushmmu();
482 return 0;
483 }
484
485 for(i = 0; i < NSEG; i++) {
486 ns = up->seg[i];
487 if(ns == 0 || ns == s)
488 continue;
489 if(newtop > ns->base && s->base < ns->top) {
490 qunlock(&s->lk);
491 error(Esoverlap);
492 }
493 }
494
495 if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
496 qunlock(&s->lk);
497 error(Enovmem);
498 }
499 mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
500 if(mapsize > s->mapsize){
501 map = smalloc(mapsize*sizeof(Pte*));
502 memmove(map, s->map, s->mapsize*sizeof(Pte*));
503 if(s->map != s->ssegmap)
504 free(s->map);
505 s->map = map;
506 s->mapsize = mapsize;
507 }
508
509 s->top = newtop;
510 s->size = newsize;
511 qunlock(&s->lk);
512 return 0;
513 }
514
515 /*
516 * called with s->lk locked
517 */
518 void
mfreeseg(Segment * s,ulong start,int pages)519 mfreeseg(Segment *s, ulong start, int pages)
520 {
521 int i, j, size;
522 ulong soff;
523 Page *pg;
524 Page *list;
525
526 soff = start-s->base;
527 j = (soff&(PTEMAPMEM-1))/BY2PG;
528
529 size = s->mapsize;
530 list = nil;
531 for(i = soff/PTEMAPMEM; i < size; i++) {
532 if(pages <= 0)
533 break;
534 if(s->map[i] == 0) {
535 pages -= PTEPERTAB-j;
536 j = 0;
537 continue;
538 }
539 while(j < PTEPERTAB) {
540 pg = s->map[i]->pages[j];
541 /*
542 * We want to zero s->map[i]->page[j] and putpage(pg),
543 * but we have to make sure other processors flush the
544 * entry from their TLBs before the page is freed.
545 * We construct a list of the pages to be freed, zero
546 * the entries, then (below) call procflushseg, and call
547 * putpage on the whole list.
548 *
549 * Swapped-out pages don't appear in TLBs, so it's okay
550 * to putswap those pages before procflushseg.
551 */
552 if(pg){
553 if(onswap(pg))
554 putswap(pg);
555 else{
556 pg->next = list;
557 list = pg;
558 }
559 s->map[i]->pages[j] = 0;
560 }
561 if(--pages == 0)
562 goto out;
563 j++;
564 }
565 j = 0;
566 }
567 out:
568 /* flush this seg in all other processes */
569 if(s->ref > 1)
570 procflushseg(s);
571
572 /* free the pages */
573 for(pg = list; pg != nil; pg = list){
574 list = list->next;
575 putpage(pg);
576 }
577 }
578
579 Segment*
isoverlap(Proc * p,ulong va,int len)580 isoverlap(Proc *p, ulong va, int len)
581 {
582 int i;
583 Segment *ns;
584 ulong newtop;
585
586 newtop = va+len;
587 for(i = 0; i < NSEG; i++) {
588 ns = p->seg[i];
589 if(ns == 0)
590 continue;
591 if(newtop > ns->base && va < ns->top)
592 return ns;
593 }
594 return nil;
595 }
596
597 int
addphysseg(Physseg * new)598 addphysseg(Physseg* new)
599 {
600 Physseg *ps;
601
602 /*
603 * Check not already entered and there is room
604 * for a new entry and the terminating null entry.
605 */
606 lock(&physseglock);
607 for(ps = physseg; ps->name; ps++){
608 if(strcmp(ps->name, new->name) == 0){
609 unlock(&physseglock);
610 return -1;
611 }
612 }
613 if(ps-physseg >= nelem(physseg)-2){
614 unlock(&physseglock);
615 return -1;
616 }
617
618 *ps = *new;
619 unlock(&physseglock);
620
621 return 0;
622 }
623
624 int
isphysseg(char * name)625 isphysseg(char *name)
626 {
627 Physseg *ps;
628 int rv = 0;
629
630 lock(&physseglock);
631 for(ps = physseg; ps->name; ps++){
632 if(strcmp(ps->name, name) == 0){
633 rv = 1;
634 break;
635 }
636 }
637 unlock(&physseglock);
638 return rv;
639 }
640
641 ulong
segattach(Proc * p,ulong attr,char * name,ulong va,ulong len)642 segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
643 {
644 int sno;
645 Segment *s, *os;
646 Physseg *ps;
647
648 if(va != 0 && va >= USTKTOP)
649 error(Ebadarg);
650
651 validaddr((ulong)name, 1, 0);
652 vmemchr(name, 0, ~0);
653
654 for(sno = 0; sno < NSEG; sno++)
655 if(p->seg[sno] == nil && sno != ESEG)
656 break;
657
658 if(sno == NSEG)
659 error(Enovmem);
660
661 /*
662 * first look for a global segment with the
663 * same name
664 */
665 if(_globalsegattach != nil){
666 s = (*_globalsegattach)(p, name);
667 if(s != nil){
668 p->seg[sno] = s;
669 return s->base;
670 }
671 }
672
673 len = PGROUND(len);
674 if(len == 0)
675 error(Ebadarg);
676
677 /*
678 * Find a hole in the address space.
679 * Starting at the lowest possible stack address - len,
680 * check for an overlapping segment, and repeat at the
681 * base of that segment - len until either a hole is found
682 * or the address space is exhausted. Ensure that we don't
683 * map the zero page.
684 */
685 if(va == 0) {
686 for (os = p->seg[SSEG]; os != nil; os = isoverlap(p, va, len)) {
687 va = os->base;
688 if(len >= va)
689 error(Enovmem);
690 va -= len;
691 }
692 va &= ~(BY2PG-1);
693 } else {
694 va &= ~(BY2PG-1);
695 if(va == 0 || va >= USTKTOP)
696 error(Ebadarg);
697 }
698
699 if(isoverlap(p, va, len) != nil)
700 error(Esoverlap);
701
702 for(ps = physseg; ps->name; ps++)
703 if(strcmp(name, ps->name) == 0)
704 goto found;
705
706 error(Ebadarg);
707 found:
708 if(len > ps->size)
709 error(Enovmem);
710
711 attr &= ~SG_TYPE; /* Turn off what is not allowed */
712 attr |= ps->attr; /* Copy in defaults */
713
714 s = newseg(attr, va, len/BY2PG);
715 s->pseg = ps;
716 p->seg[sno] = s;
717
718 return va;
719 }
720
721 void
pteflush(Pte * pte,int s,int e)722 pteflush(Pte *pte, int s, int e)
723 {
724 int i;
725 Page *p;
726
727 for(i = s; i < e; i++) {
728 p = pte->pages[i];
729 if(pagedout(p) == 0)
730 memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
731 }
732 }
733
734 long
syssegflush(ulong * arg)735 syssegflush(ulong *arg)
736 {
737 Segment *s;
738 ulong addr, l;
739 Pte *pte;
740 int chunk, ps, pe, len;
741
742 addr = arg[0];
743 len = arg[1];
744
745 while(len > 0) {
746 s = seg(up, addr, 1);
747 if(s == 0)
748 error(Ebadarg);
749
750 s->flushme = 1;
751 more:
752 l = len;
753 if(addr+l > s->top)
754 l = s->top - addr;
755
756 ps = addr-s->base;
757 pte = s->map[ps/PTEMAPMEM];
758 ps &= PTEMAPMEM-1;
759 pe = PTEMAPMEM;
760 if(pe-ps > l){
761 pe = ps + l;
762 pe = (pe+BY2PG-1)&~(BY2PG-1);
763 }
764 if(pe == ps) {
765 qunlock(&s->lk);
766 error(Ebadarg);
767 }
768
769 if(pte)
770 pteflush(pte, ps/BY2PG, pe/BY2PG);
771
772 chunk = pe-ps;
773 len -= chunk;
774 addr += chunk;
775
776 if(len > 0 && addr < s->top)
777 goto more;
778
779 qunlock(&s->lk);
780 }
781 flushmmu();
782 return 0;
783 }
784
785 void
segclock(ulong pc)786 segclock(ulong pc)
787 {
788 Segment *s;
789
790 s = up->seg[TSEG];
791 if(s == 0 || s->profile == 0)
792 return;
793
794 s->profile[0] += TK2MS(1);
795 if(pc >= s->base && pc < s->top) {
796 pc -= s->base;
797 s->profile[pc>>LRESPROF] += TK2MS(1);
798 }
799 }
800
801