1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7
8 static void imagereclaim(void);
9 static void imagechanreclaim(void);
10
11 #include "io.h"
12
13 /*
14 * Attachable segment types
15 */
16 static Physseg physseg[10] = {
17 { SG_SHARED, "shared", 0, SEGMAXSIZE, 0, 0 },
18 { SG_BSS, "memory", 0, SEGMAXSIZE, 0, 0 },
19 { 0, 0, 0, 0, 0, 0 },
20 };
21
22 static Lock physseglock;
23
24 #define NFREECHAN 64
25 #define IHASHSIZE 64
26 #define ihash(s) imagealloc.hash[s%IHASHSIZE]
27 static struct Imagealloc
28 {
29 Lock;
30 Image *free;
31 Image *hash[IHASHSIZE];
32 QLock ireclaim; /* mutex on reclaiming free images */
33
34 Chan **freechan; /* free image channels */
35 int nfreechan; /* number of free channels */
36 int szfreechan; /* size of freechan array */
37 QLock fcreclaim; /* mutex on reclaiming free channels */
38 }imagealloc;
39
40 Segment* (*_globalsegattach)(Proc*, char*);
41
42 void
initseg(void)43 initseg(void)
44 {
45 Image *i, *ie;
46
47 imagealloc.free = xalloc(conf.nimage*sizeof(Image));
48 if (imagealloc.free == nil)
49 panic("initseg: no memory");
50 ie = &imagealloc.free[conf.nimage-1];
51 for(i = imagealloc.free; i < ie; i++)
52 i->next = i+1;
53 i->next = 0;
54 imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
55 imagealloc.szfreechan = NFREECHAN;
56 }
57
58 Segment *
newseg(int type,ulong base,ulong size)59 newseg(int type, ulong base, ulong size)
60 {
61 Segment *s;
62 int mapsize;
63
64 if(size > (SEGMAPSIZE*PTEPERTAB))
65 error(Enovmem);
66
67 s = smalloc(sizeof(Segment));
68 s->ref = 1;
69 s->type = type;
70 s->base = base;
71 s->top = base+(size*BY2PG);
72 s->size = size;
73 s->sema.prev = &s->sema;
74 s->sema.next = &s->sema;
75
76 mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
77 if(mapsize > nelem(s->ssegmap)){
78 mapsize *= 2;
79 if(mapsize > (SEGMAPSIZE*PTEPERTAB))
80 mapsize = (SEGMAPSIZE*PTEPERTAB);
81 s->map = smalloc(mapsize*sizeof(Pte*));
82 s->mapsize = mapsize;
83 }
84 else{
85 s->map = s->ssegmap;
86 s->mapsize = nelem(s->ssegmap);
87 }
88
89 return s;
90 }
91
92 void
putseg(Segment * s)93 putseg(Segment *s)
94 {
95 Pte **pp, **emap;
96 Image *i;
97
98 if(s == 0)
99 return;
100
101 i = s->image;
102 if(i != 0) {
103 lock(i);
104 lock(s);
105 if(i->s == s && s->ref == 1)
106 i->s = 0;
107 unlock(i);
108 }
109 else
110 lock(s);
111
112 s->ref--;
113 if(s->ref != 0) {
114 unlock(s);
115 return;
116 }
117 unlock(s);
118
119 qlock(&s->lk);
120 if(i)
121 putimage(i);
122
123 emap = &s->map[s->mapsize];
124 for(pp = s->map; pp < emap; pp++)
125 if(*pp)
126 freepte(s, *pp);
127
128 qunlock(&s->lk);
129 if(s->map != s->ssegmap)
130 free(s->map);
131 if(s->profile != 0)
132 free(s->profile);
133 free(s);
134 }
135
136 void
relocateseg(Segment * s,ulong offset)137 relocateseg(Segment *s, ulong offset)
138 {
139 Page **pg, *x;
140 Pte *pte, **p, **endpte;
141
142 endpte = &s->map[s->mapsize];
143 for(p = s->map; p < endpte; p++) {
144 if(*p == 0)
145 continue;
146 pte = *p;
147 for(pg = pte->first; pg <= pte->last; pg++) {
148 if(x = *pg)
149 x->va += offset;
150 }
151 }
152 }
153
154 Segment*
dupseg(Segment ** seg,int segno,int share)155 dupseg(Segment **seg, int segno, int share)
156 {
157 int i, size;
158 Pte *pte;
159 Segment *n, *s;
160
161 SET(n);
162 s = seg[segno];
163
164 qlock(&s->lk);
165 if(waserror()){
166 qunlock(&s->lk);
167 nexterror();
168 }
169 switch(s->type&SG_TYPE) {
170 case SG_TEXT: /* New segment shares pte set */
171 case SG_SHARED:
172 case SG_PHYSICAL:
173 goto sameseg;
174
175 case SG_STACK:
176 n = newseg(s->type, s->base, s->size);
177 break;
178
179 case SG_BSS: /* Just copy on write */
180 if(share)
181 goto sameseg;
182 n = newseg(s->type, s->base, s->size);
183 break;
184
185 case SG_DATA: /* Copy on write plus demand load info */
186 if(segno == TSEG){
187 poperror();
188 qunlock(&s->lk);
189 return data2txt(s);
190 }
191
192 if(share)
193 goto sameseg;
194 n = newseg(s->type, s->base, s->size);
195
196 incref(s->image);
197 n->image = s->image;
198 n->fstart = s->fstart;
199 n->flen = s->flen;
200 break;
201 }
202 size = s->mapsize;
203 for(i = 0; i < size; i++)
204 if(pte = s->map[i])
205 n->map[i] = ptecpy(pte);
206
207 n->flushme = s->flushme;
208 if(s->ref > 1)
209 procflushseg(s);
210 poperror();
211 qunlock(&s->lk);
212 return n;
213
214 sameseg:
215 incref(s);
216 poperror();
217 qunlock(&s->lk);
218 return s;
219 }
220
221 void
segpage(Segment * s,Page * p)222 segpage(Segment *s, Page *p)
223 {
224 Pte **pte;
225 ulong off;
226 Page **pg;
227
228 if(p->va < s->base || p->va >= s->top)
229 panic("segpage");
230
231 off = p->va - s->base;
232 pte = &s->map[off/PTEMAPMEM];
233 if(*pte == 0)
234 *pte = ptealloc();
235
236 pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
237 *pg = p;
238 if(pg < (*pte)->first)
239 (*pte)->first = pg;
240 if(pg > (*pte)->last)
241 (*pte)->last = pg;
242 }
243
244 Image*
attachimage(int type,Chan * c,ulong base,ulong len)245 attachimage(int type, Chan *c, ulong base, ulong len)
246 {
247 Image *i, **l;
248
249 /* reclaim any free channels from reclaimed segments */
250 if(imagealloc.nfreechan)
251 imagechanreclaim();
252
253 lock(&imagealloc);
254
255 /*
256 * Search the image cache for remains of the text from a previous
257 * or currently running incarnation
258 */
259 for(i = ihash(c->qid.path); i; i = i->hash) {
260 if(c->qid.path == i->qid.path) {
261 lock(i);
262 if(eqqid(c->qid, i->qid) &&
263 eqqid(c->mqid, i->mqid) &&
264 c->mchan == i->mchan &&
265 c->type == i->type) {
266 goto found;
267 }
268 unlock(i);
269 }
270 }
271
272 /*
273 * imagereclaim dumps pages from the free list which are cached by image
274 * structures. This should free some image structures.
275 */
276 while(!(i = imagealloc.free)) {
277 unlock(&imagealloc);
278 imagereclaim();
279 sched();
280 lock(&imagealloc);
281 }
282
283 imagealloc.free = i->next;
284
285 lock(i);
286 incref(c);
287 i->c = c;
288 i->type = c->type;
289 i->qid = c->qid;
290 i->mqid = c->mqid;
291 i->mchan = c->mchan;
292 l = &ihash(c->qid.path);
293 i->hash = *l;
294 *l = i;
295 found:
296 unlock(&imagealloc);
297
298 if(i->s == 0) {
299 /* Disaster after commit in exec */
300 if(waserror()) {
301 unlock(i);
302 pexit(Enovmem, 1);
303 }
304 i->s = newseg(type, base, len);
305 i->s->image = i;
306 i->ref++;
307 poperror();
308 }
309 else
310 incref(i->s);
311
312 return i;
313 }
314
315 static struct {
316 int calls; /* times imagereclaim was called */
317 int loops; /* times the main loop was run */
318 uvlong ticks; /* total time in the main loop */
319 uvlong maxt; /* longest time in main loop */
320 } irstats;
321
322 static void
imagereclaim(void)323 imagereclaim(void)
324 {
325 int n;
326 Page *p;
327 uvlong ticks;
328
329 irstats.calls++;
330 /* Somebody is already cleaning the page cache */
331 if(!canqlock(&imagealloc.ireclaim))
332 return;
333
334 lock(&palloc);
335 ticks = fastticks(nil);
336 n = 0;
337 /*
338 * All the pages with images backing them are at the
339 * end of the list (see putpage) so start there and work
340 * backward.
341 */
342 for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
343 if(p->ref == 0 && canlock(p)) {
344 if(p->ref == 0) {
345 n++;
346 uncachepage(p);
347 }
348 unlock(p);
349 }
350 }
351 ticks = fastticks(nil) - ticks;
352 unlock(&palloc);
353 irstats.loops++;
354 irstats.ticks += ticks;
355 if(ticks > irstats.maxt)
356 irstats.maxt = ticks;
357 //print("T%llud+", ticks);
358 qunlock(&imagealloc.ireclaim);
359 }
360
361 /*
362 * since close can block, this has to be called outside of
363 * spin locks.
364 */
365 static void
imagechanreclaim(void)366 imagechanreclaim(void)
367 {
368 Chan *c;
369
370 /* Somebody is already cleaning the image chans */
371 if(!canqlock(&imagealloc.fcreclaim))
372 return;
373
374 /*
375 * We don't have to recheck that nfreechan > 0 after we
376 * acquire the lock, because we're the only ones who decrement
377 * it (the other lock contender increments it), and there's only
378 * one of us thanks to the qlock above.
379 */
380 while(imagealloc.nfreechan > 0){
381 lock(&imagealloc);
382 imagealloc.nfreechan--;
383 c = imagealloc.freechan[imagealloc.nfreechan];
384 unlock(&imagealloc);
385 cclose(c);
386 }
387
388 qunlock(&imagealloc.fcreclaim);
389 }
390
391 void
putimage(Image * i)392 putimage(Image *i)
393 {
394 Chan *c, **cp;
395 Image *f, **l;
396
397 if(i->notext)
398 return;
399
400 lock(i);
401 if(--i->ref == 0) {
402 l = &ihash(i->qid.path);
403 mkqid(&i->qid, ~0, ~0, QTFILE);
404 unlock(i);
405 c = i->c;
406
407 lock(&imagealloc);
408 for(f = *l; f; f = f->hash) {
409 if(f == i) {
410 *l = i->hash;
411 break;
412 }
413 l = &f->hash;
414 }
415
416 i->next = imagealloc.free;
417 imagealloc.free = i;
418
419 /* defer freeing channel till we're out of spin lock's */
420 if(imagealloc.nfreechan == imagealloc.szfreechan){
421 imagealloc.szfreechan += NFREECHAN;
422 cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
423 if(cp == nil)
424 panic("putimage");
425 memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
426 free(imagealloc.freechan);
427 imagealloc.freechan = cp;
428 }
429 imagealloc.freechan[imagealloc.nfreechan++] = c;
430 unlock(&imagealloc);
431
432 return;
433 }
434 unlock(i);
435 }
436
437 long
ibrk(ulong addr,int seg)438 ibrk(ulong addr, int seg)
439 {
440 Segment *s, *ns;
441 ulong newtop, newsize;
442 int i, mapsize;
443 Pte **map;
444
445 s = up->seg[seg];
446 if(s == 0)
447 error(Ebadarg);
448
449 if(addr == 0)
450 return s->base;
451
452 qlock(&s->lk);
453
454 /* We may start with the bss overlapping the data */
455 if(addr < s->base) {
456 if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
457 qunlock(&s->lk);
458 error(Enovmem);
459 }
460 addr = s->base;
461 }
462
463 newtop = PGROUND(addr);
464 newsize = (newtop-s->base)/BY2PG;
465 if(newtop < s->top) {
466 /*
467 * do not shrink a segment shared with other procs, as the
468 * to-be-freed address space may have been passed to the kernel
469 * already by another proc and is past the validaddr stage.
470 */
471 if(s->ref > 1){
472 qunlock(&s->lk);
473 error(Einuse);
474 }
475 mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
476 s->top = newtop;
477 s->size = newsize;
478 qunlock(&s->lk);
479 flushmmu();
480 return 0;
481 }
482
483 for(i = 0; i < NSEG; i++) {
484 ns = up->seg[i];
485 if(ns == 0 || ns == s)
486 continue;
487 if(newtop >= ns->base && newtop < ns->top) {
488 qunlock(&s->lk);
489 error(Esoverlap);
490 }
491 }
492
493 if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
494 qunlock(&s->lk);
495 error(Enovmem);
496 }
497 mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
498 if(mapsize > s->mapsize){
499 map = smalloc(mapsize*sizeof(Pte*));
500 memmove(map, s->map, s->mapsize*sizeof(Pte*));
501 if(s->map != s->ssegmap)
502 free(s->map);
503 s->map = map;
504 s->mapsize = mapsize;
505 }
506
507 s->top = newtop;
508 s->size = newsize;
509 qunlock(&s->lk);
510 return 0;
511 }
512
513 /*
514 * called with s->lk locked
515 */
516 void
mfreeseg(Segment * s,ulong start,int pages)517 mfreeseg(Segment *s, ulong start, int pages)
518 {
519 int i, j, size;
520 ulong soff;
521 Page *pg;
522 Page *list;
523
524 soff = start-s->base;
525 j = (soff&(PTEMAPMEM-1))/BY2PG;
526
527 size = s->mapsize;
528 list = nil;
529 for(i = soff/PTEMAPMEM; i < size; i++) {
530 if(pages <= 0)
531 break;
532 if(s->map[i] == 0) {
533 pages -= PTEPERTAB-j;
534 j = 0;
535 continue;
536 }
537 while(j < PTEPERTAB) {
538 pg = s->map[i]->pages[j];
539 /*
540 * We want to zero s->map[i]->page[j] and putpage(pg),
541 * but we have to make sure other processors flush the
542 * entry from their TLBs before the page is freed.
543 * We construct a list of the pages to be freed, zero
544 * the entries, then (below) call procflushseg, and call
545 * putpage on the whole list.
546 *
547 * Swapped-out pages don't appear in TLBs, so it's okay
548 * to putswap those pages before procflushseg.
549 */
550 if(pg){
551 if(onswap(pg))
552 putswap(pg);
553 else{
554 pg->next = list;
555 list = pg;
556 }
557 s->map[i]->pages[j] = 0;
558 }
559 if(--pages == 0)
560 goto out;
561 j++;
562 }
563 j = 0;
564 }
565 out:
566 /* flush this seg in all other processes */
567 if(s->ref > 1)
568 procflushseg(s);
569
570 /* free the pages */
571 for(pg = list; pg != nil; pg = list){
572 list = list->next;
573 putpage(pg);
574 }
575 }
576
577 Segment*
isoverlap(Proc * p,ulong va,int len)578 isoverlap(Proc *p, ulong va, int len)
579 {
580 int i;
581 Segment *ns;
582 ulong newtop;
583
584 newtop = va+len;
585 for(i = 0; i < NSEG; i++) {
586 ns = p->seg[i];
587 if(ns == 0)
588 continue;
589 if((newtop > ns->base && newtop <= ns->top) ||
590 (va >= ns->base && va < ns->top))
591 return ns;
592 }
593 return nil;
594 }
595
596 int
addphysseg(Physseg * new)597 addphysseg(Physseg* new)
598 {
599 Physseg *ps;
600
601 /*
602 * Check not already entered and there is room
603 * for a new entry and the terminating null entry.
604 */
605 lock(&physseglock);
606 for(ps = physseg; ps->name; ps++){
607 if(strcmp(ps->name, new->name) == 0){
608 unlock(&physseglock);
609 return -1;
610 }
611 }
612 if(ps-physseg >= nelem(physseg)-2){
613 unlock(&physseglock);
614 return -1;
615 }
616
617 *ps = *new;
618 unlock(&physseglock);
619
620 return 0;
621 }
622
623 int
isphysseg(char * name)624 isphysseg(char *name)
625 {
626 Physseg *ps;
627 int rv = 0;
628
629 lock(&physseglock);
630 for(ps = physseg; ps->name; ps++){
631 if(strcmp(ps->name, name) == 0){
632 rv = 1;
633 break;
634 }
635 }
636 unlock(&physseglock);
637 return rv;
638 }
639
640 ulong
segattach(Proc * p,ulong attr,char * name,ulong va,ulong len)641 segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
642 {
643 int sno;
644 Segment *s, *os;
645 Physseg *ps;
646
647 if(va != 0 && va >= USTKTOP)
648 error(Ebadarg);
649
650 validaddr((ulong)name, 1, 0);
651 vmemchr(name, 0, ~0);
652
653 for(sno = 0; sno < NSEG; sno++)
654 if(p->seg[sno] == nil && sno != ESEG)
655 break;
656
657 if(sno == NSEG)
658 error(Enovmem);
659
660 /*
661 * first look for a global segment with the
662 * same name
663 */
664 if(_globalsegattach != nil){
665 s = (*_globalsegattach)(p, name);
666 if(s != nil){
667 p->seg[sno] = s;
668 return s->base;
669 }
670 }
671
672 len = PGROUND(len);
673 if(len == 0)
674 error(Ebadarg);
675
676 /*
677 * Find a hole in the address space.
678 * Starting at the lowest possible stack address - len,
679 * check for an overlapping segment, and repeat at the
680 * base of that segment - len until either a hole is found
681 * or the address space is exhausted. Ensure that we don't
682 * map the zero page.
683 */
684 if(va == 0) {
685 for (os = p->seg[SSEG]; os != nil; os = isoverlap(p, va, len)) {
686 va = os->base;
687 if(len >= va)
688 error(Enovmem);
689 va -= len;
690 }
691 va &= ~(BY2PG-1);
692 } else {
693 va &= ~(BY2PG-1);
694 if(va == 0 || va >= USTKTOP)
695 error(Ebadarg);
696 }
697
698 if(isoverlap(p, va, len) != nil)
699 error(Esoverlap);
700
701 for(ps = physseg; ps->name; ps++)
702 if(strcmp(name, ps->name) == 0)
703 goto found;
704
705 error(Ebadarg);
706 found:
707 if(len > ps->size)
708 error(Enovmem);
709
710 attr &= ~SG_TYPE; /* Turn off what is not allowed */
711 attr |= ps->attr; /* Copy in defaults */
712
713 s = newseg(attr, va, len/BY2PG);
714 s->pseg = ps;
715 p->seg[sno] = s;
716
717 return va;
718 }
719
720 void
pteflush(Pte * pte,int s,int e)721 pteflush(Pte *pte, int s, int e)
722 {
723 int i;
724 Page *p;
725
726 for(i = s; i < e; i++) {
727 p = pte->pages[i];
728 if(pagedout(p) == 0)
729 memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
730 }
731 }
732
733 long
syssegflush(ulong * arg)734 syssegflush(ulong *arg)
735 {
736 Segment *s;
737 ulong addr, l;
738 Pte *pte;
739 int chunk, ps, pe, len;
740
741 addr = arg[0];
742 len = arg[1];
743
744 while(len > 0) {
745 s = seg(up, addr, 1);
746 if(s == 0)
747 error(Ebadarg);
748
749 s->flushme = 1;
750 more:
751 l = len;
752 if(addr+l > s->top)
753 l = s->top - addr;
754
755 ps = addr-s->base;
756 pte = s->map[ps/PTEMAPMEM];
757 ps &= PTEMAPMEM-1;
758 pe = PTEMAPMEM;
759 if(pe-ps > l){
760 pe = ps + l;
761 pe = (pe+BY2PG-1)&~(BY2PG-1);
762 }
763 if(pe == ps) {
764 qunlock(&s->lk);
765 error(Ebadarg);
766 }
767
768 if(pte)
769 pteflush(pte, ps/BY2PG, pe/BY2PG);
770
771 chunk = pe-ps;
772 len -= chunk;
773 addr += chunk;
774
775 if(len > 0 && addr < s->top)
776 goto more;
777
778 qunlock(&s->lk);
779 }
780 flushmmu();
781 return 0;
782 }
783
784 void
segclock(ulong pc)785 segclock(ulong pc)
786 {
787 Segment *s;
788
789 s = up->seg[TSEG];
790 if(s == 0 || s->profile == 0)
791 return;
792
793 s->profile[0] += TK2MS(1);
794 if(pc >= s->base && pc < s->top) {
795 pc -= s->base;
796 s->profile[pc>>LRESPROF] += TK2MS(1);
797 }
798 }
799
800