1 #include "u.h" 2 #include "../port/lib.h" 3 #include "mem.h" 4 #include "dat.h" 5 #include "fns.h" 6 #include "../port/error.h" 7 8 static void imagereclaim(void); 9 static void imagechanreclaim(void); 10 11 #include "io.h" 12 13 /* 14 * Attachable segment types 15 */ 16 static Physseg physseg[10] = { 17 { SG_SHARED, "shared", 0, SEGMAXSIZE, 0, 0 }, 18 { SG_BSS, "memory", 0, SEGMAXSIZE, 0, 0 }, 19 { 0, 0, 0, 0, 0, 0 }, 20 }; 21 22 static Lock physseglock; 23 24 #define NFREECHAN 64 25 #define IHASHSIZE 64 26 #define ihash(s) imagealloc.hash[s%IHASHSIZE] 27 static struct Imagealloc 28 { 29 Lock; 30 Image *free; 31 Image *hash[IHASHSIZE]; 32 QLock ireclaim; /* mutex on reclaiming free images */ 33 34 Chan **freechan; /* free image channels */ 35 int nfreechan; /* number of free channels */ 36 int szfreechan; /* size of freechan array */ 37 QLock fcreclaim; /* mutex on reclaiming free channels */ 38 }imagealloc; 39 40 Segment* (*_globalsegattach)(Proc*, char*); 41 42 void 43 initseg(void) 44 { 45 Image *i, *ie; 46 47 imagealloc.free = xalloc(conf.nimage*sizeof(Image)); 48 if (imagealloc.free == nil) 49 panic("initseg: no memory"); 50 ie = &imagealloc.free[conf.nimage-1]; 51 for(i = imagealloc.free; i < ie; i++) 52 i->next = i+1; 53 i->next = 0; 54 imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*)); 55 imagealloc.szfreechan = NFREECHAN; 56 } 57 58 Segment * 59 newseg(int type, ulong base, ulong size) 60 { 61 Segment *s; 62 int mapsize; 63 64 if(size > (SEGMAPSIZE*PTEPERTAB)) 65 error(Enovmem); 66 67 s = smalloc(sizeof(Segment)); 68 s->ref = 1; 69 s->type = type; 70 s->base = base; 71 s->top = base+(size*BY2PG); 72 s->size = size; 73 s->sema.prev = &s->sema; 74 s->sema.next = &s->sema; 75 76 mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB; 77 if(mapsize > nelem(s->ssegmap)){ 78 mapsize *= 2; 79 if(mapsize > SEGMAPSIZE) 80 mapsize = SEGMAPSIZE; 81 s->map = smalloc(mapsize*sizeof(Pte*)); 82 s->mapsize = mapsize; 83 } 84 else{ 85 s->map = s->ssegmap; 86 s->mapsize = nelem(s->ssegmap); 87 } 88 89 return s; 90 } 91 92 void 93 putseg(Segment *s) 94 { 95 Pte **pp, **emap; 96 Image *i; 97 98 if(s == 0) 99 return; 100 101 i = s->image; 102 if(i != 0) { 103 lock(i); 104 lock(s); 105 if(i->s == s && s->ref == 1) 106 i->s = 0; 107 unlock(i); 108 } 109 else 110 lock(s); 111 112 s->ref--; 113 if(s->ref != 0) { 114 unlock(s); 115 return; 116 } 117 unlock(s); 118 119 qlock(&s->lk); 120 if(i) 121 putimage(i); 122 123 emap = &s->map[s->mapsize]; 124 for(pp = s->map; pp < emap; pp++) 125 if(*pp) 126 freepte(s, *pp); 127 128 qunlock(&s->lk); 129 if(s->map != s->ssegmap) 130 free(s->map); 131 if(s->profile != 0) 132 free(s->profile); 133 free(s); 134 } 135 136 void 137 relocateseg(Segment *s, ulong offset) 138 { 139 Page **pg, *x; 140 Pte *pte, **p, **endpte; 141 142 endpte = &s->map[s->mapsize]; 143 for(p = s->map; p < endpte; p++) { 144 if(*p == 0) 145 continue; 146 pte = *p; 147 for(pg = pte->first; pg <= pte->last; pg++) { 148 if(x = *pg) 149 x->va += offset; 150 } 151 } 152 } 153 154 Segment* 155 dupseg(Segment **seg, int segno, int share) 156 { 157 int i, size; 158 Pte *pte; 159 Segment *n, *s; 160 161 SET(n); 162 s = seg[segno]; 163 164 qlock(&s->lk); 165 if(waserror()){ 166 qunlock(&s->lk); 167 nexterror(); 168 } 169 switch(s->type&SG_TYPE) { 170 case SG_TEXT: /* New segment shares pte set */ 171 case SG_SHARED: 172 case SG_PHYSICAL: 173 goto sameseg; 174 175 case SG_STACK: 176 n = newseg(s->type, s->base, s->size); 177 break; 178 179 case SG_BSS: /* Just copy on write */ 180 if(share) 181 goto sameseg; 182 n = newseg(s->type, s->base, s->size); 183 break; 184 185 case SG_DATA: /* Copy on write plus demand load info */ 186 if(segno == TSEG){ 187 poperror(); 188 qunlock(&s->lk); 189 return data2txt(s); 190 } 191 192 if(share) 193 goto sameseg; 194 n = newseg(s->type, s->base, s->size); 195 196 incref(s->image); 197 n->image = s->image; 198 n->fstart = s->fstart; 199 n->flen = s->flen; 200 break; 201 } 202 size = s->mapsize; 203 for(i = 0; i < size; i++) 204 if(pte = s->map[i]) 205 n->map[i] = ptecpy(pte); 206 207 n->flushme = s->flushme; 208 if(s->ref > 1) 209 procflushseg(s); 210 poperror(); 211 qunlock(&s->lk); 212 return n; 213 214 sameseg: 215 incref(s); 216 poperror(); 217 qunlock(&s->lk); 218 return s; 219 } 220 221 void 222 segpage(Segment *s, Page *p) 223 { 224 Pte **pte; 225 ulong off; 226 Page **pg; 227 228 if(p->va < s->base || p->va >= s->top) 229 panic("segpage"); 230 231 off = p->va - s->base; 232 pte = &s->map[off/PTEMAPMEM]; 233 if(*pte == 0) 234 *pte = ptealloc(); 235 236 pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG]; 237 *pg = p; 238 if(pg < (*pte)->first) 239 (*pte)->first = pg; 240 if(pg > (*pte)->last) 241 (*pte)->last = pg; 242 } 243 244 Image* 245 attachimage(int type, Chan *c, ulong base, ulong len) 246 { 247 Image *i, **l; 248 249 /* reclaim any free channels from reclaimed segments */ 250 if(imagealloc.nfreechan) 251 imagechanreclaim(); 252 253 lock(&imagealloc); 254 255 /* 256 * Search the image cache for remains of the text from a previous 257 * or currently running incarnation 258 */ 259 for(i = ihash(c->qid.path); i; i = i->hash) { 260 if(c->qid.path == i->qid.path) { 261 lock(i); 262 if(eqqid(c->qid, i->qid) && 263 eqqid(c->mqid, i->mqid) && 264 c->mchan == i->mchan && 265 c->type == i->type) { 266 goto found; 267 } 268 unlock(i); 269 } 270 } 271 272 /* 273 * imagereclaim dumps pages from the free list which are cached by image 274 * structures. This should free some image structures. 275 */ 276 while(!(i = imagealloc.free)) { 277 unlock(&imagealloc); 278 imagereclaim(); 279 sched(); 280 lock(&imagealloc); 281 } 282 283 imagealloc.free = i->next; 284 285 lock(i); 286 incref(c); 287 i->c = c; 288 i->type = c->type; 289 i->qid = c->qid; 290 i->mqid = c->mqid; 291 i->mchan = c->mchan; 292 l = &ihash(c->qid.path); 293 i->hash = *l; 294 *l = i; 295 found: 296 unlock(&imagealloc); 297 298 if(i->s == 0) { 299 /* Disaster after commit in exec */ 300 if(waserror()) { 301 unlock(i); 302 pexit(Enovmem, 1); 303 } 304 i->s = newseg(type, base, len); 305 i->s->image = i; 306 i->ref++; 307 poperror(); 308 } 309 else 310 incref(i->s); 311 312 return i; 313 } 314 315 static struct { 316 int calls; /* times imagereclaim was called */ 317 int loops; /* times the main loop was run */ 318 uvlong ticks; /* total time in the main loop */ 319 uvlong maxt; /* longest time in main loop */ 320 } irstats; 321 322 static void 323 imagereclaim(void) 324 { 325 int n; 326 Page *p; 327 uvlong ticks; 328 329 irstats.calls++; 330 /* Somebody is already cleaning the page cache */ 331 if(!canqlock(&imagealloc.ireclaim)) 332 return; 333 334 lock(&palloc); 335 ticks = fastticks(nil); 336 n = 0; 337 /* 338 * All the pages with images backing them are at the 339 * end of the list (see putpage) so start there and work 340 * backward. 341 */ 342 for(p = palloc.tail; p && p->image && n<1000; p = p->prev) { 343 if(p->ref == 0 && canlock(p)) { 344 if(p->ref == 0) { 345 n++; 346 uncachepage(p); 347 pageunchain(p); 348 pagechainhead(p); 349 } 350 unlock(p); 351 } 352 } 353 ticks = fastticks(nil) - ticks; 354 unlock(&palloc); 355 irstats.loops++; 356 irstats.ticks += ticks; 357 if(ticks > irstats.maxt) 358 irstats.maxt = ticks; 359 //print("T%llud+", ticks); 360 qunlock(&imagealloc.ireclaim); 361 } 362 363 /* 364 * since close can block, this has to be called outside of 365 * spin locks. 366 */ 367 static void 368 imagechanreclaim(void) 369 { 370 Chan *c; 371 372 /* Somebody is already cleaning the image chans */ 373 if(!canqlock(&imagealloc.fcreclaim)) 374 return; 375 376 /* 377 * We don't have to recheck that nfreechan > 0 after we 378 * acquire the lock, because we're the only ones who decrement 379 * it (the other lock contender increments it), and there's only 380 * one of us thanks to the qlock above. 381 */ 382 while(imagealloc.nfreechan > 0){ 383 lock(&imagealloc); 384 imagealloc.nfreechan--; 385 c = imagealloc.freechan[imagealloc.nfreechan]; 386 unlock(&imagealloc); 387 cclose(c); 388 } 389 390 qunlock(&imagealloc.fcreclaim); 391 } 392 393 void 394 putimage(Image *i) 395 { 396 Chan *c, **cp; 397 Image *f, **l; 398 399 if(i->notext) 400 return; 401 402 lock(i); 403 if(--i->ref == 0) { 404 l = &ihash(i->qid.path); 405 mkqid(&i->qid, ~0, ~0, QTFILE); 406 unlock(i); 407 c = i->c; 408 409 lock(&imagealloc); 410 for(f = *l; f; f = f->hash) { 411 if(f == i) { 412 *l = i->hash; 413 break; 414 } 415 l = &f->hash; 416 } 417 418 i->next = imagealloc.free; 419 imagealloc.free = i; 420 421 /* defer freeing channel till we're out of spin lock's */ 422 if(imagealloc.nfreechan == imagealloc.szfreechan){ 423 imagealloc.szfreechan += NFREECHAN; 424 cp = malloc(imagealloc.szfreechan*sizeof(Chan*)); 425 if(cp == nil) 426 panic("putimage"); 427 memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*)); 428 free(imagealloc.freechan); 429 imagealloc.freechan = cp; 430 } 431 imagealloc.freechan[imagealloc.nfreechan++] = c; 432 unlock(&imagealloc); 433 434 return; 435 } 436 unlock(i); 437 } 438 439 long 440 ibrk(ulong addr, int seg) 441 { 442 Segment *s, *ns; 443 ulong newtop, newsize; 444 int i, mapsize; 445 Pte **map; 446 447 s = up->seg[seg]; 448 if(s == 0) 449 error(Ebadarg); 450 451 if(addr == 0) 452 return s->base; 453 454 qlock(&s->lk); 455 456 /* We may start with the bss overlapping the data */ 457 if(addr < s->base) { 458 if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) { 459 qunlock(&s->lk); 460 error(Enovmem); 461 } 462 addr = s->base; 463 } 464 465 newtop = PGROUND(addr); 466 newsize = (newtop-s->base)/BY2PG; 467 if(newtop < s->top) { 468 /* 469 * do not shrink a segment shared with other procs, as the 470 * to-be-freed address space may have been passed to the kernel 471 * already by another proc and is past the validaddr stage. 472 */ 473 if(s->ref > 1){ 474 qunlock(&s->lk); 475 error(Einuse); 476 } 477 mfreeseg(s, newtop, (s->top-newtop)/BY2PG); 478 s->top = newtop; 479 s->size = newsize; 480 qunlock(&s->lk); 481 flushmmu(); 482 return 0; 483 } 484 485 for(i = 0; i < NSEG; i++) { 486 ns = up->seg[i]; 487 if(ns == 0 || ns == s) 488 continue; 489 if(newtop > ns->base && s->base < ns->top) { 490 qunlock(&s->lk); 491 error(Esoverlap); 492 } 493 } 494 495 if(newsize > (SEGMAPSIZE*PTEPERTAB)) { 496 qunlock(&s->lk); 497 error(Enovmem); 498 } 499 mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB; 500 if(mapsize > s->mapsize){ 501 map = smalloc(mapsize*sizeof(Pte*)); 502 memmove(map, s->map, s->mapsize*sizeof(Pte*)); 503 if(s->map != s->ssegmap) 504 free(s->map); 505 s->map = map; 506 s->mapsize = mapsize; 507 } 508 509 s->top = newtop; 510 s->size = newsize; 511 qunlock(&s->lk); 512 return 0; 513 } 514 515 /* 516 * called with s->lk locked 517 */ 518 void 519 mfreeseg(Segment *s, ulong start, int pages) 520 { 521 int i, j, size; 522 ulong soff; 523 Page *pg; 524 Page *list; 525 526 soff = start-s->base; 527 j = (soff&(PTEMAPMEM-1))/BY2PG; 528 529 size = s->mapsize; 530 list = nil; 531 for(i = soff/PTEMAPMEM; i < size; i++) { 532 if(pages <= 0) 533 break; 534 if(s->map[i] == 0) { 535 pages -= PTEPERTAB-j; 536 j = 0; 537 continue; 538 } 539 while(j < PTEPERTAB) { 540 pg = s->map[i]->pages[j]; 541 /* 542 * We want to zero s->map[i]->page[j] and putpage(pg), 543 * but we have to make sure other processors flush the 544 * entry from their TLBs before the page is freed. 545 * We construct a list of the pages to be freed, zero 546 * the entries, then (below) call procflushseg, and call 547 * putpage on the whole list. 548 * 549 * Swapped-out pages don't appear in TLBs, so it's okay 550 * to putswap those pages before procflushseg. 551 */ 552 if(pg){ 553 if(onswap(pg)) 554 putswap(pg); 555 else{ 556 pg->next = list; 557 list = pg; 558 } 559 s->map[i]->pages[j] = 0; 560 } 561 if(--pages == 0) 562 goto out; 563 j++; 564 } 565 j = 0; 566 } 567 out: 568 /* flush this seg in all other processes */ 569 if(s->ref > 1) 570 procflushseg(s); 571 572 /* free the pages */ 573 for(pg = list; pg != nil; pg = list){ 574 list = list->next; 575 putpage(pg); 576 } 577 } 578 579 Segment* 580 isoverlap(Proc *p, ulong va, int len) 581 { 582 int i; 583 Segment *ns; 584 ulong newtop; 585 586 newtop = va+len; 587 for(i = 0; i < NSEG; i++) { 588 ns = p->seg[i]; 589 if(ns == 0) 590 continue; 591 if(newtop > ns->base && va < ns->top) 592 return ns; 593 } 594 return nil; 595 } 596 597 int 598 addphysseg(Physseg* new) 599 { 600 Physseg *ps; 601 602 /* 603 * Check not already entered and there is room 604 * for a new entry and the terminating null entry. 605 */ 606 lock(&physseglock); 607 for(ps = physseg; ps->name; ps++){ 608 if(strcmp(ps->name, new->name) == 0){ 609 unlock(&physseglock); 610 return -1; 611 } 612 } 613 if(ps-physseg >= nelem(physseg)-2){ 614 unlock(&physseglock); 615 return -1; 616 } 617 618 *ps = *new; 619 unlock(&physseglock); 620 621 return 0; 622 } 623 624 int 625 isphysseg(char *name) 626 { 627 Physseg *ps; 628 int rv = 0; 629 630 lock(&physseglock); 631 for(ps = physseg; ps->name; ps++){ 632 if(strcmp(ps->name, name) == 0){ 633 rv = 1; 634 break; 635 } 636 } 637 unlock(&physseglock); 638 return rv; 639 } 640 641 ulong 642 segattach(Proc *p, ulong attr, char *name, ulong va, ulong len) 643 { 644 int sno; 645 Segment *s, *os; 646 Physseg *ps; 647 648 if(va != 0 && va >= USTKTOP) 649 error(Ebadarg); 650 651 validaddr((ulong)name, 1, 0); 652 vmemchr(name, 0, ~0); 653 654 for(sno = 0; sno < NSEG; sno++) 655 if(p->seg[sno] == nil && sno != ESEG) 656 break; 657 658 if(sno == NSEG) 659 error(Enovmem); 660 661 /* 662 * first look for a global segment with the 663 * same name 664 */ 665 if(_globalsegattach != nil){ 666 s = (*_globalsegattach)(p, name); 667 if(s != nil){ 668 p->seg[sno] = s; 669 return s->base; 670 } 671 } 672 673 len = PGROUND(len); 674 if(len == 0) 675 error(Ebadarg); 676 677 /* 678 * Find a hole in the address space. 679 * Starting at the lowest possible stack address - len, 680 * check for an overlapping segment, and repeat at the 681 * base of that segment - len until either a hole is found 682 * or the address space is exhausted. Ensure that we don't 683 * map the zero page. 684 */ 685 if(va == 0) { 686 for (os = p->seg[SSEG]; os != nil; os = isoverlap(p, va, len)) { 687 va = os->base; 688 if(len >= va) 689 error(Enovmem); 690 va -= len; 691 } 692 va &= ~(BY2PG-1); 693 } else { 694 va &= ~(BY2PG-1); 695 if(va == 0 || va >= USTKTOP) 696 error(Ebadarg); 697 } 698 699 if(isoverlap(p, va, len) != nil) 700 error(Esoverlap); 701 702 for(ps = physseg; ps->name; ps++) 703 if(strcmp(name, ps->name) == 0) 704 goto found; 705 706 error(Ebadarg); 707 found: 708 if(len > ps->size) 709 error(Enovmem); 710 711 attr &= ~SG_TYPE; /* Turn off what is not allowed */ 712 attr |= ps->attr; /* Copy in defaults */ 713 714 s = newseg(attr, va, len/BY2PG); 715 s->pseg = ps; 716 p->seg[sno] = s; 717 718 return va; 719 } 720 721 void 722 pteflush(Pte *pte, int s, int e) 723 { 724 int i; 725 Page *p; 726 727 for(i = s; i < e; i++) { 728 p = pte->pages[i]; 729 if(pagedout(p) == 0) 730 memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl)); 731 } 732 } 733 734 long 735 syssegflush(ulong *arg) 736 { 737 Segment *s; 738 ulong addr, l; 739 Pte *pte; 740 int chunk, ps, pe, len; 741 742 addr = arg[0]; 743 len = arg[1]; 744 745 while(len > 0) { 746 s = seg(up, addr, 1); 747 if(s == 0) 748 error(Ebadarg); 749 750 s->flushme = 1; 751 more: 752 l = len; 753 if(addr+l > s->top) 754 l = s->top - addr; 755 756 ps = addr-s->base; 757 pte = s->map[ps/PTEMAPMEM]; 758 ps &= PTEMAPMEM-1; 759 pe = PTEMAPMEM; 760 if(pe-ps > l){ 761 pe = ps + l; 762 pe = (pe+BY2PG-1)&~(BY2PG-1); 763 } 764 if(pe == ps) { 765 qunlock(&s->lk); 766 error(Ebadarg); 767 } 768 769 if(pte) 770 pteflush(pte, ps/BY2PG, pe/BY2PG); 771 772 chunk = pe-ps; 773 len -= chunk; 774 addr += chunk; 775 776 if(len > 0 && addr < s->top) 777 goto more; 778 779 qunlock(&s->lk); 780 } 781 flushmmu(); 782 return 0; 783 } 784 785 void 786 segclock(ulong pc) 787 { 788 Segment *s; 789 790 s = up->seg[TSEG]; 791 if(s == 0 || s->profile == 0) 792 return; 793 794 s->profile[0] += TK2MS(1); 795 if(pc >= s->base && pc < s->top) { 796 pc -= s->base; 797 s->profile[pc>>LRESPROF] += TK2MS(1); 798 } 799 } 800 801