10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* 23*428Ssl108498 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include <sys/mman.h> 300Sstevel@tonic-gate #include <sys/param.h> 310Sstevel@tonic-gate #include <sys/stat.h> 320Sstevel@tonic-gate #include <sys/types.h> 330Sstevel@tonic-gate #include <assert.h> 340Sstevel@tonic-gate #include <errno.h> 350Sstevel@tonic-gate #include <fcntl.h> 360Sstevel@tonic-gate #include <libproc.h> 370Sstevel@tonic-gate #include <limits.h> 380Sstevel@tonic-gate #include <procfs.h> 390Sstevel@tonic-gate #include <stdio.h> 400Sstevel@tonic-gate #include <stdlib.h> 410Sstevel@tonic-gate #include <strings.h> 420Sstevel@tonic-gate #include <time.h> 430Sstevel@tonic-gate #include <unistd.h> 440Sstevel@tonic-gate #include "rcapd.h" 450Sstevel@tonic-gate #include "rcapd_rfd.h" 460Sstevel@tonic-gate #include "rcapd_mapping.h" 470Sstevel@tonic-gate #include "utils.h" 480Sstevel@tonic-gate 490Sstevel@tonic-gate static int lpc_xmap_update(lprocess_t *); 500Sstevel@tonic-gate #ifdef DEBUG 510Sstevel@tonic-gate extern int lmapping_dump_diff(lmapping_t *lm1, lmapping_t *lm2); 520Sstevel@tonic-gate #endif /* DEBUG */ 530Sstevel@tonic-gate 540Sstevel@tonic-gate /* 550Sstevel@tonic-gate * The number of file descriptors required to grab a process and create an 560Sstevel@tonic-gate * agent in it. 570Sstevel@tonic-gate */ 580Sstevel@tonic-gate #define PGRAB_FD_COUNT 10 590Sstevel@tonic-gate 600Sstevel@tonic-gate /* 610Sstevel@tonic-gate * Record a position in an address space as it corresponds to a prpageheader_t 620Sstevel@tonic-gate * and affiliated structures. 630Sstevel@tonic-gate */ 640Sstevel@tonic-gate typedef struct prpageheader_cur { 650Sstevel@tonic-gate int pr_nmap; /* number of mappings in address space */ 660Sstevel@tonic-gate int pr_map; /* number of this mapping */ 670Sstevel@tonic-gate uint64_t pr_pgoff; /* page offset into mapping */ 680Sstevel@tonic-gate uint64_t pr_npage; /* number of pages in mapping */ 690Sstevel@tonic-gate uint64_t pr_pagesize; /* page size of mapping */ 700Sstevel@tonic-gate uintptr_t pr_addr; /* base of mapping */ 710Sstevel@tonic-gate prpageheader_t *pr_prpageheader; /* associated page header */ 720Sstevel@tonic-gate void *pr_pdaddr; /* address of page's byte in pagedata */ 730Sstevel@tonic-gate prxmap_t *pr_xmap; /* array containing per-segment information */ 740Sstevel@tonic-gate int pr_nxmap; /* number of xmaps in array */ 750Sstevel@tonic-gate int64_t pr_rss; /* number of resident pages in mapping, */ 760Sstevel@tonic-gate /* or -1 if xmap is out of sync */ 770Sstevel@tonic-gate int64_t pr_pg_rss; /* number of pageable pages in mapping, or -1 */ 780Sstevel@tonic-gate } prpageheader_cur_t; 790Sstevel@tonic-gate 800Sstevel@tonic-gate static struct ps_prochandle *scan_pr; /* currently-scanned process's handle */ 810Sstevel@tonic-gate 820Sstevel@tonic-gate typedef enum { 830Sstevel@tonic-gate STDL_NORMAL, 840Sstevel@tonic-gate STDL_HIGH 850Sstevel@tonic-gate } st_debug_level_t; 860Sstevel@tonic-gate 870Sstevel@tonic-gate /* 880Sstevel@tonic-gate * Output a scanning-related debug message. 890Sstevel@tonic-gate */ 900Sstevel@tonic-gate /*PRINTFLIKE3*/ /*ARGSUSED*/ 910Sstevel@tonic-gate static void 920Sstevel@tonic-gate st_debug(st_debug_level_t level, lcollection_t *lcol, char *msg, ...) 930Sstevel@tonic-gate { 940Sstevel@tonic-gate #ifdef DEBUG_MSG 950Sstevel@tonic-gate va_list alist; 960Sstevel@tonic-gate char *buf; 970Sstevel@tonic-gate size_t len; 980Sstevel@tonic-gate 990Sstevel@tonic-gate if (get_message_priority() < ((level == STDL_HIGH) ? RCM_DEBUG_HIGH 1000Sstevel@tonic-gate : RCM_DEBUG)) 1010Sstevel@tonic-gate return; 1020Sstevel@tonic-gate 1030Sstevel@tonic-gate len = strlen(msg) + LINELEN; 1040Sstevel@tonic-gate buf = malloc(len); 1050Sstevel@tonic-gate if (buf == NULL) 1060Sstevel@tonic-gate return; 1070Sstevel@tonic-gate (void) snprintf(buf, len, "%s %s scanner %s", rcfg.rcfg_mode_name, 1080Sstevel@tonic-gate lcol->lcol_name, msg); 1090Sstevel@tonic-gate 1100Sstevel@tonic-gate va_start(alist, msg); 1110Sstevel@tonic-gate vdprintfe(RCM_DEBUG, buf, alist); 1120Sstevel@tonic-gate va_end(alist); 1130Sstevel@tonic-gate 1140Sstevel@tonic-gate free(buf); 1150Sstevel@tonic-gate #endif /* DEBUG_MSG */ 1160Sstevel@tonic-gate } 1170Sstevel@tonic-gate 1180Sstevel@tonic-gate /* 1190Sstevel@tonic-gate * Determine the collection's current victim, based on its last. The last will 1200Sstevel@tonic-gate * be returned, or, if invalid, any other valid process, if the collection has 1210Sstevel@tonic-gate * any. 1220Sstevel@tonic-gate */ 1230Sstevel@tonic-gate static lprocess_t * 1240Sstevel@tonic-gate get_valid_victim(lcollection_t *lcol, lprocess_t *lpc) 1250Sstevel@tonic-gate { 1260Sstevel@tonic-gate if (lpc == NULL || !lcollection_member(lcol, lpc)) 1270Sstevel@tonic-gate lpc = lcol->lcol_lprocess; 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate /* 1300Sstevel@tonic-gate * Find the next scannable process, and make it the victim. 1310Sstevel@tonic-gate */ 1320Sstevel@tonic-gate while (lpc != NULL && lpc->lpc_unscannable != 0) 1330Sstevel@tonic-gate lpc = lpc->lpc_next; 1340Sstevel@tonic-gate 1350Sstevel@tonic-gate return (lpc); 1360Sstevel@tonic-gate } 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate /* 1390Sstevel@tonic-gate * Get a process's combined current pagedata (per-page referenced and modified 1400Sstevel@tonic-gate * bits) and set the supplied pointer to it. The caller is responsible for 1410Sstevel@tonic-gate * freeing the data. If the pagedata is unreadable, a nonzero value is 1420Sstevel@tonic-gate * returned, and errno is set. Otherwise, 0 is returned. 1430Sstevel@tonic-gate */ 1440Sstevel@tonic-gate static int 1450Sstevel@tonic-gate get_pagedata(prpageheader_t **pghpp, int fd) 1460Sstevel@tonic-gate { 1470Sstevel@tonic-gate int res; 1480Sstevel@tonic-gate struct stat st; 1490Sstevel@tonic-gate 1500Sstevel@tonic-gate redo: 1510Sstevel@tonic-gate errno = 0; 1520Sstevel@tonic-gate if (fstat(fd, &st) != 0) { 1530Sstevel@tonic-gate debug("cannot stat pagedata\n"); 1540Sstevel@tonic-gate return (-1); 1550Sstevel@tonic-gate } 1560Sstevel@tonic-gate 1570Sstevel@tonic-gate errno = 0; 1580Sstevel@tonic-gate *pghpp = malloc(st.st_size); 1590Sstevel@tonic-gate if (*pghpp == NULL) { 1600Sstevel@tonic-gate debug("cannot malloc() %ld bytes for pagedata", st.st_size); 1610Sstevel@tonic-gate return (-1); 1620Sstevel@tonic-gate } 1630Sstevel@tonic-gate (void) bzero(*pghpp, st.st_size); 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate errno = 0; 1660Sstevel@tonic-gate if ((res = read(fd, *pghpp, st.st_size)) != st.st_size) { 1670Sstevel@tonic-gate free(*pghpp); 1680Sstevel@tonic-gate *pghpp = NULL; 1690Sstevel@tonic-gate if (res > 0 || errno == E2BIG) { 1700Sstevel@tonic-gate debug("pagedata changed size, retrying\n"); 1710Sstevel@tonic-gate goto redo; 1720Sstevel@tonic-gate } else { 1730Sstevel@tonic-gate debug("cannot read pagedata"); 1740Sstevel@tonic-gate return (-1); 1750Sstevel@tonic-gate } 1760Sstevel@tonic-gate } 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate return (0); 1790Sstevel@tonic-gate } 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate /* 1820Sstevel@tonic-gate * Return the count of kilobytes of pages represented by the given pagedata 1830Sstevel@tonic-gate * which meet the given criteria, having pages which are in all of the states 1840Sstevel@tonic-gate * specified by the mask, and in none of the states in the notmask. If the 1850Sstevel@tonic-gate * CP_CLEAR flag is set, the pagedata will also be cleared. 1860Sstevel@tonic-gate */ 1870Sstevel@tonic-gate #define CP_CLEAR 1 1880Sstevel@tonic-gate static uint64_t 1890Sstevel@tonic-gate count_pages(prpageheader_t *pghp, int flags, int mask, int notmask) 1900Sstevel@tonic-gate { 1910Sstevel@tonic-gate int map; 1920Sstevel@tonic-gate caddr_t cur, end; 1930Sstevel@tonic-gate prpageheader_t pgh = *pghp; 1940Sstevel@tonic-gate prasmap_t *asmapp; 1950Sstevel@tonic-gate uint64_t count = 0; 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate cur = (caddr_t)pghp + sizeof (*pghp); 1980Sstevel@tonic-gate for (map = 0; map < pgh.pr_nmap; map++) { 1990Sstevel@tonic-gate asmapp = (prasmap_t *)(uintptr_t)cur; 2000Sstevel@tonic-gate cur += sizeof (*asmapp); 2010Sstevel@tonic-gate end = cur + asmapp->pr_npage; 2020Sstevel@tonic-gate while (cur < end) { 2030Sstevel@tonic-gate if ((*cur & mask) == mask && (*cur & notmask) == 0) 2040Sstevel@tonic-gate count += asmapp->pr_pagesize / 1024; 2050Sstevel@tonic-gate if ((flags & CP_CLEAR) != 0) 2060Sstevel@tonic-gate *cur = 0; 2070Sstevel@tonic-gate cur++; 2080Sstevel@tonic-gate } 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate /* 2110Sstevel@tonic-gate * Skip to next 64-bit-aligned address to get the next 2120Sstevel@tonic-gate * prasmap_t. 2130Sstevel@tonic-gate */ 2140Sstevel@tonic-gate cur = (caddr_t)((intptr_t)(cur + 7) & ~7); 2150Sstevel@tonic-gate } 2160Sstevel@tonic-gate 2170Sstevel@tonic-gate return (count); 2180Sstevel@tonic-gate } 2190Sstevel@tonic-gate 2200Sstevel@tonic-gate /* 2210Sstevel@tonic-gate * Return the amount of memory (in kilobytes) that hasn't been referenced or 2220Sstevel@tonic-gate * modified, which memory which will be paged out first. Should be written to 2230Sstevel@tonic-gate * exclude nonresident pages when sufficient interfaces exist. 2240Sstevel@tonic-gate */ 2250Sstevel@tonic-gate static uint64_t 2260Sstevel@tonic-gate unrm_size(lprocess_t *lpc) 2270Sstevel@tonic-gate { 2280Sstevel@tonic-gate return (count_pages(lpc->lpc_prpageheader, CP_CLEAR, 2290Sstevel@tonic-gate 0, PG_MODIFIED | PG_REFERENCED)); 2300Sstevel@tonic-gate } 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate /* 2330Sstevel@tonic-gate * Advance a prpageheader_cur_t to the address space's next mapping, returning 2340Sstevel@tonic-gate * its address, or NULL if there is none. Any known nonpageable or nonresident 2350Sstevel@tonic-gate * mappings will be skipped over. 2360Sstevel@tonic-gate */ 2370Sstevel@tonic-gate static uintptr_t 2380Sstevel@tonic-gate advance_prpageheader_cur_nextmapping(prpageheader_cur_t *pcp) 2390Sstevel@tonic-gate { 2400Sstevel@tonic-gate prasmap_t *pap; 2410Sstevel@tonic-gate int i; 2420Sstevel@tonic-gate 2430Sstevel@tonic-gate next: 2440Sstevel@tonic-gate ASSERT(pcp->pr_map < pcp->pr_nmap); 2450Sstevel@tonic-gate if ((pcp->pr_map + 1) == pcp->pr_nmap) 2460Sstevel@tonic-gate return (NULL); 2470Sstevel@tonic-gate pcp->pr_map++; 2480Sstevel@tonic-gate if (pcp->pr_pgoff < pcp->pr_npage) { 249*428Ssl108498 pcp->pr_pdaddr = (caddr_t)(uintptr_t) 250*428Ssl108498 ((uintptr_t)pcp->pr_pdaddr + 2510Sstevel@tonic-gate (pcp->pr_npage - pcp->pr_pgoff)); 2520Sstevel@tonic-gate pcp->pr_pgoff = pcp->pr_npage; 2530Sstevel@tonic-gate } 2540Sstevel@tonic-gate /* 2550Sstevel@tonic-gate * Skip to next 64-bit-aligned address to get the next prasmap_t. 2560Sstevel@tonic-gate */ 2570Sstevel@tonic-gate pcp->pr_pdaddr = (caddr_t)(((uintptr_t)pcp->pr_pdaddr + 7) & ~7); 2580Sstevel@tonic-gate pap = (prasmap_t *)pcp->pr_pdaddr; 2590Sstevel@tonic-gate pcp->pr_pgoff = 0; 2600Sstevel@tonic-gate pcp->pr_npage = pap->pr_npage; 2610Sstevel@tonic-gate pcp->pr_pagesize = pap->pr_pagesize; 2620Sstevel@tonic-gate pcp->pr_addr = pap->pr_vaddr; 2630Sstevel@tonic-gate pcp->pr_pdaddr = pap + 1; 2640Sstevel@tonic-gate 2650Sstevel@tonic-gate /* 2660Sstevel@tonic-gate * Skip any known nonpageable mappings. Currently, the only one 2670Sstevel@tonic-gate * detected is the schedctl page. 2680Sstevel@tonic-gate */ 2690Sstevel@tonic-gate if ((pap->pr_mflags ^ (MA_SHARED | MA_READ | MA_WRITE | MA_EXEC | 2700Sstevel@tonic-gate MA_ANON)) == 0 && pap->pr_npage == 1) { 2710Sstevel@tonic-gate debug("identified nonpageable schedctl mapping at %p\n", 2720Sstevel@tonic-gate (void *)pcp->pr_addr); 2730Sstevel@tonic-gate goto next; 2740Sstevel@tonic-gate } 2750Sstevel@tonic-gate 2760Sstevel@tonic-gate /* 2770Sstevel@tonic-gate * Skip mappings with no resident pages. If the xmap does not 2780Sstevel@tonic-gate * correspond to the pagedata for any reason, it will be ignored. 2790Sstevel@tonic-gate */ 2800Sstevel@tonic-gate pcp->pr_rss = -1; 2810Sstevel@tonic-gate pcp->pr_pg_rss = -1; 2820Sstevel@tonic-gate for (i = 0; i < pcp->pr_nxmap; i++) { 2830Sstevel@tonic-gate prxmap_t *xmap = &pcp->pr_xmap[i]; 2840Sstevel@tonic-gate 2850Sstevel@tonic-gate if (pcp->pr_addr == xmap->pr_vaddr && xmap->pr_size == 2860Sstevel@tonic-gate (pcp->pr_npage * pcp->pr_pagesize)) { 2870Sstevel@tonic-gate pcp->pr_rss = xmap->pr_rss; 2880Sstevel@tonic-gate /* 2890Sstevel@tonic-gate * Remove COW pages from the pageable RSS count. 2900Sstevel@tonic-gate */ 2910Sstevel@tonic-gate if ((xmap->pr_mflags & MA_SHARED) == 0) 2920Sstevel@tonic-gate pcp->pr_pg_rss = xmap->pr_anon; 2930Sstevel@tonic-gate break; 2940Sstevel@tonic-gate } 2950Sstevel@tonic-gate } 2960Sstevel@tonic-gate if (pcp->pr_rss == 0) { 2970Sstevel@tonic-gate debug("identified nonresident mapping at 0x%p\n", 2980Sstevel@tonic-gate (void *)pcp->pr_addr); 2990Sstevel@tonic-gate goto next; 3000Sstevel@tonic-gate } else if (pcp->pr_pg_rss == 0) { 3010Sstevel@tonic-gate debug("identified unpageable mapping at 0x%p\n", 3020Sstevel@tonic-gate (void *)pcp->pr_addr); 3030Sstevel@tonic-gate goto next; 3040Sstevel@tonic-gate } 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate return (pcp->pr_addr); 3070Sstevel@tonic-gate } 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate /* 3100Sstevel@tonic-gate * Advance a prpageheader_cur_t to the mapping's next page, returning its 3110Sstevel@tonic-gate * address, or NULL if there is none. 3120Sstevel@tonic-gate */ 3130Sstevel@tonic-gate static void * 3140Sstevel@tonic-gate advance_prpageheader_cur(prpageheader_cur_t *pcp) 3150Sstevel@tonic-gate { 3160Sstevel@tonic-gate ASSERT(pcp->pr_pgoff < pcp->pr_npage); 3170Sstevel@tonic-gate if ((pcp->pr_pgoff + 1) == pcp->pr_npage) 3180Sstevel@tonic-gate return (NULL); 3190Sstevel@tonic-gate pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + 1; 3200Sstevel@tonic-gate pcp->pr_pgoff++; 3210Sstevel@tonic-gate 3220Sstevel@tonic-gate ASSERT((*(char *)pcp->pr_pdaddr & ~(PG_MODIFIED | PG_REFERENCED)) == 0); 3230Sstevel@tonic-gate return ((caddr_t)pcp->pr_addr + pcp->pr_pgoff * pcp->pr_pagesize); 3240Sstevel@tonic-gate } 3250Sstevel@tonic-gate 3260Sstevel@tonic-gate /* 3270Sstevel@tonic-gate * Initialize a prpageheader_cur_t, positioned at the first page of the mapping 3280Sstevel@tonic-gate * of an address space. 3290Sstevel@tonic-gate */ 3300Sstevel@tonic-gate static void * 3310Sstevel@tonic-gate set_prpageheader_cur(prpageheader_cur_t *pcp, prpageheader_t *php, 3320Sstevel@tonic-gate prxmap_t *xmap, int nxmap) 3330Sstevel@tonic-gate { 3340Sstevel@tonic-gate bzero(pcp, sizeof (*pcp)); 3350Sstevel@tonic-gate pcp->pr_nmap = php->pr_nmap; 3360Sstevel@tonic-gate pcp->pr_map = -1; 3370Sstevel@tonic-gate pcp->pr_prpageheader = php; 3380Sstevel@tonic-gate pcp->pr_xmap = xmap; 3390Sstevel@tonic-gate pcp->pr_nxmap = nxmap; 3400Sstevel@tonic-gate pcp->pr_pdaddr = (prpageheader_t *)php + 1; 3410Sstevel@tonic-gate 3420Sstevel@tonic-gate return ((void *)advance_prpageheader_cur_nextmapping(pcp)); 3430Sstevel@tonic-gate } 3440Sstevel@tonic-gate 3450Sstevel@tonic-gate /* 3460Sstevel@tonic-gate * Position a prpageheader_cur_t to the mapped address greater or equal to the 3470Sstevel@tonic-gate * given value. 3480Sstevel@tonic-gate */ 3490Sstevel@tonic-gate static void * 3500Sstevel@tonic-gate set_prpageheader_cur_addr(prpageheader_cur_t *pcp, prpageheader_t *php, 3510Sstevel@tonic-gate prxmap_t *xmap, int nxmap, void *naddr) 3520Sstevel@tonic-gate { 3530Sstevel@tonic-gate void *addr = set_prpageheader_cur(pcp, php, xmap, nxmap); 3540Sstevel@tonic-gate 3550Sstevel@tonic-gate while (addr != NULL && addr <= naddr) 3560Sstevel@tonic-gate if (naddr < (void *)((caddr_t)pcp->pr_addr + 3570Sstevel@tonic-gate pcp->pr_pagesize * pcp->pr_npage)) { 3580Sstevel@tonic-gate uint64_t pgdiff = ((uintptr_t)naddr - 3590Sstevel@tonic-gate (uintptr_t)pcp->pr_addr) / pcp->pr_pagesize; 3600Sstevel@tonic-gate pcp->pr_pgoff += pgdiff; 3610Sstevel@tonic-gate pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + pgdiff; 3620Sstevel@tonic-gate addr = (caddr_t)pcp->pr_addr + pcp->pr_pagesize * 3630Sstevel@tonic-gate pcp->pr_pgoff; 3640Sstevel@tonic-gate break; 3650Sstevel@tonic-gate } else 3660Sstevel@tonic-gate addr = 3670Sstevel@tonic-gate (void *)advance_prpageheader_cur_nextmapping(pcp); 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate return (addr); 3700Sstevel@tonic-gate } 3710Sstevel@tonic-gate 3720Sstevel@tonic-gate static void 3730Sstevel@tonic-gate revoke_pagedata(rfd_t *rfd) 3740Sstevel@tonic-gate { 3750Sstevel@tonic-gate lprocess_t *lpc = rfd->rfd_data; 3760Sstevel@tonic-gate 3770Sstevel@tonic-gate st_debug(STDL_NORMAL, lpc->lpc_collection, "revoking pagedata for" 3780Sstevel@tonic-gate " process %d\n", (int)lpc->lpc_pid); 3790Sstevel@tonic-gate ASSERT(lpc->lpc_pgdata_fd != -1); 3800Sstevel@tonic-gate lpc->lpc_pgdata_fd = -1; 3810Sstevel@tonic-gate } 3820Sstevel@tonic-gate 3830Sstevel@tonic-gate #ifdef DEBUG 3840Sstevel@tonic-gate static void 3850Sstevel@tonic-gate mklmapping(lmapping_t **lm, prpageheader_t *pgh) 3860Sstevel@tonic-gate { 3870Sstevel@tonic-gate prpageheader_cur_t cur; 3880Sstevel@tonic-gate void *addr; 3890Sstevel@tonic-gate 3900Sstevel@tonic-gate addr = set_prpageheader_cur(&cur, pgh, NULL, -1); 3910Sstevel@tonic-gate ASSERT(*lm == NULL); 3920Sstevel@tonic-gate while (addr != NULL) { 3930Sstevel@tonic-gate (void) lmapping_insert(lm, cur.pr_addr, cur.pr_npage * 3940Sstevel@tonic-gate cur.pr_pagesize); 3950Sstevel@tonic-gate addr = (void *)advance_prpageheader_cur_nextmapping(&cur); 3960Sstevel@tonic-gate } 3970Sstevel@tonic-gate } 3980Sstevel@tonic-gate 3990Sstevel@tonic-gate static void 4000Sstevel@tonic-gate lmapping_dump(lmapping_t *lm) 4010Sstevel@tonic-gate { 4020Sstevel@tonic-gate debug("lm: %p\n", (void *)lm); 4030Sstevel@tonic-gate while (lm != NULL) { 4040Sstevel@tonic-gate debug("\t(%p, %llx\n", (void *)lm->lm_addr, 4050Sstevel@tonic-gate (unsigned long long)lm->lm_size); 4060Sstevel@tonic-gate lm = lm->lm_next; 4070Sstevel@tonic-gate } 4080Sstevel@tonic-gate } 4090Sstevel@tonic-gate #endif /* DEBUG */ 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate /* 4120Sstevel@tonic-gate * OR two prpagedata_t which are supposedly snapshots of the same address 4130Sstevel@tonic-gate * space. Intersecting mappings with different page sizes are tolerated but 4140Sstevel@tonic-gate * not normalized (not accurate). If the mappings of the two snapshots differ 4150Sstevel@tonic-gate * in any regard, the supplied mappings_changed flag will be set. 4160Sstevel@tonic-gate */ 4170Sstevel@tonic-gate static void 4180Sstevel@tonic-gate OR_pagedata(prpageheader_t *src, prpageheader_t *dst, int *mappings_changedp) 4190Sstevel@tonic-gate { 4200Sstevel@tonic-gate prpageheader_cur_t src_cur; 4210Sstevel@tonic-gate prpageheader_cur_t dst_cur; 4220Sstevel@tonic-gate uintptr_t src_addr; 4230Sstevel@tonic-gate uintptr_t dst_addr; 4240Sstevel@tonic-gate int mappings_changed = 0; 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate /* 4270Sstevel@tonic-gate * OR source pagedata with the destination, for pages of intersecting 4280Sstevel@tonic-gate * mappings. 4290Sstevel@tonic-gate */ 4300Sstevel@tonic-gate src_addr = (uintptr_t)set_prpageheader_cur(&src_cur, src, NULL, -1); 4310Sstevel@tonic-gate dst_addr = (uintptr_t)set_prpageheader_cur(&dst_cur, dst, NULL, -1); 4320Sstevel@tonic-gate while (src_addr != NULL && dst_addr != NULL) { 4330Sstevel@tonic-gate while (src_addr == dst_addr && src_addr != NULL) { 4340Sstevel@tonic-gate *(char *)dst_cur.pr_pdaddr |= 4350Sstevel@tonic-gate *(char *)src_cur.pr_pdaddr; 4360Sstevel@tonic-gate src_addr = (uintptr_t)advance_prpageheader_cur( 4370Sstevel@tonic-gate &src_cur); 4380Sstevel@tonic-gate dst_addr = (uintptr_t)advance_prpageheader_cur( 4390Sstevel@tonic-gate &dst_cur); 4400Sstevel@tonic-gate } 4410Sstevel@tonic-gate if (src_addr != dst_addr) 4420Sstevel@tonic-gate mappings_changed = 1; 4430Sstevel@tonic-gate src_addr = advance_prpageheader_cur_nextmapping(&src_cur); 4440Sstevel@tonic-gate dst_addr = advance_prpageheader_cur_nextmapping(&dst_cur); 4450Sstevel@tonic-gate while (src_addr != dst_addr && src_addr != NULL && dst_addr != 4460Sstevel@tonic-gate NULL) { 4470Sstevel@tonic-gate mappings_changed = 1; 4480Sstevel@tonic-gate if (src_addr < dst_addr) 4490Sstevel@tonic-gate src_addr = advance_prpageheader_cur_nextmapping( 4500Sstevel@tonic-gate &src_cur); 4510Sstevel@tonic-gate else 4520Sstevel@tonic-gate dst_addr = advance_prpageheader_cur_nextmapping( 4530Sstevel@tonic-gate &dst_cur); 4540Sstevel@tonic-gate } 4550Sstevel@tonic-gate } 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate *mappings_changedp = mappings_changed; 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate /* 4610Sstevel@tonic-gate * Merge the current pagedata with that on hand. If the pagedata is 4620Sstevel@tonic-gate * unretrievable for any reason, such as the process having exited or being a 4630Sstevel@tonic-gate * zombie, a nonzero value is returned, the process should be marked 4640Sstevel@tonic-gate * unscannable, and future attempts to scan it should be avoided, since the 4650Sstevel@tonic-gate * symptom is probably permament. If the mappings of either pagedata 4660Sstevel@tonic-gate * differ in any respect, the supplied callback will be invoked once. 4670Sstevel@tonic-gate */ 4680Sstevel@tonic-gate static int 4690Sstevel@tonic-gate merge_current_pagedata(lprocess_t *lpc, 4700Sstevel@tonic-gate void(*mappings_changed_cb) (lprocess_t *)) 4710Sstevel@tonic-gate { 4720Sstevel@tonic-gate prpageheader_t *pghp; 4730Sstevel@tonic-gate int mappings_changed = 0; 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate if (lpc->lpc_pgdata_fd < 0 || get_pagedata(&pghp, lpc->lpc_pgdata_fd) != 4760Sstevel@tonic-gate 0) { 4770Sstevel@tonic-gate char pathbuf[PROC_PATH_MAX]; 4780Sstevel@tonic-gate 4790Sstevel@tonic-gate (void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/pagedata", 4800Sstevel@tonic-gate (int)lpc->lpc_pid); 4810Sstevel@tonic-gate if ((lpc->lpc_pgdata_fd = rfd_open(pathbuf, 1, RFD_PAGEDATA, 4820Sstevel@tonic-gate revoke_pagedata, lpc, O_RDONLY, 0)) < 0 || 4830Sstevel@tonic-gate get_pagedata(&pghp, lpc->lpc_pgdata_fd) != 0) 4840Sstevel@tonic-gate return (-1); 4850Sstevel@tonic-gate debug("starting/resuming pagedata collection for %d\n", 4860Sstevel@tonic-gate (int)lpc->lpc_pid); 4870Sstevel@tonic-gate } 4880Sstevel@tonic-gate debug("process %d: %llu/%llukB r/m'd since last read\n", 4890Sstevel@tonic-gate (int)lpc->lpc_pid, (unsigned long long)count_pages(pghp, 0, 4900Sstevel@tonic-gate PG_MODIFIED | PG_REFERENCED, 0), (unsigned long long)lpc->lpc_rss); 4910Sstevel@tonic-gate if (lpc->lpc_prpageheader != NULL) { 4920Sstevel@tonic-gate /* 4930Sstevel@tonic-gate * OR the two snapshots. 4940Sstevel@tonic-gate */ 4950Sstevel@tonic-gate #ifdef DEBUG 4960Sstevel@tonic-gate lmapping_t *old = NULL; 4970Sstevel@tonic-gate lmapping_t *new = NULL; 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate mklmapping(&new, pghp); 5000Sstevel@tonic-gate mklmapping(&old, lpc->lpc_prpageheader); 5010Sstevel@tonic-gate #endif /* DEBUG */ 5020Sstevel@tonic-gate OR_pagedata(lpc->lpc_prpageheader, pghp, &mappings_changed); 5030Sstevel@tonic-gate #ifdef DEBUG 5040Sstevel@tonic-gate if (((mappings_changed != 0) ^ 5050Sstevel@tonic-gate (lmapping_dump_diff(old, new) != 0))) { 5060Sstevel@tonic-gate debug("lmapping_changed inconsistent with lmapping\n"); 5070Sstevel@tonic-gate debug("old\n"); 5080Sstevel@tonic-gate lmapping_dump(old); 5090Sstevel@tonic-gate debug("new\n"); 5100Sstevel@tonic-gate lmapping_dump(new); 5110Sstevel@tonic-gate debug("ignored\n"); 5120Sstevel@tonic-gate lmapping_dump(lpc->lpc_ignore); 5130Sstevel@tonic-gate ASSERT(0); 5140Sstevel@tonic-gate } 5150Sstevel@tonic-gate lmapping_free(&new); 5160Sstevel@tonic-gate lmapping_free(&old); 5170Sstevel@tonic-gate #endif /* DEBUG */ 5180Sstevel@tonic-gate free(lpc->lpc_prpageheader); 5190Sstevel@tonic-gate } else 5200Sstevel@tonic-gate mappings_changed = 1; 5210Sstevel@tonic-gate lpc->lpc_prpageheader = pghp; 5220Sstevel@tonic-gate debug("process %d: %llu/%llukB r/m'd since hand swept\n", 5230Sstevel@tonic-gate (int)lpc->lpc_pid, (unsigned long long)count_pages(pghp, 0, 5240Sstevel@tonic-gate PG_MODIFIED | PG_REFERENCED, 0), 5250Sstevel@tonic-gate (unsigned long long)lpc->lpc_rss); 5260Sstevel@tonic-gate if (mappings_changed != 0) { 5270Sstevel@tonic-gate debug("process %d: mappings changed\n", (int)lpc->lpc_pid); 5280Sstevel@tonic-gate if (mappings_changed_cb != NULL) 5290Sstevel@tonic-gate mappings_changed_cb(lpc); 5300Sstevel@tonic-gate } 5310Sstevel@tonic-gate return (0); 5320Sstevel@tonic-gate } 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate /* 5350Sstevel@tonic-gate * Attempt to page out a region of the given process's address space. May 5360Sstevel@tonic-gate * return nonzero if not all of the pages may are pageable, for any reason. 5370Sstevel@tonic-gate */ 5380Sstevel@tonic-gate static int 5390Sstevel@tonic-gate pageout(pid_t pid, struct ps_prochandle *Pr, caddr_t start, caddr_t end) 5400Sstevel@tonic-gate { 5410Sstevel@tonic-gate int res; 5420Sstevel@tonic-gate 5430Sstevel@tonic-gate if (end <= start) 5440Sstevel@tonic-gate return (0); 5450Sstevel@tonic-gate 5460Sstevel@tonic-gate errno = 0; 5470Sstevel@tonic-gate res = pr_memcntl(Pr, start, (end - start), MC_SYNC, 5480Sstevel@tonic-gate (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0); 5490Sstevel@tonic-gate debug_high("pr_memcntl [%p-%p): %d", (void *)start, (void *)end, res); 5500Sstevel@tonic-gate 5510Sstevel@tonic-gate /* 5520Sstevel@tonic-gate * EBUSY indicates none of the pages have backing store allocated, or 5530Sstevel@tonic-gate * some pages were locked, which are less interesting than other 5540Sstevel@tonic-gate * conditions, which are noted. 5550Sstevel@tonic-gate */ 5560Sstevel@tonic-gate if (res != 0) 5570Sstevel@tonic-gate if (errno == EBUSY) 5580Sstevel@tonic-gate res = 0; 5590Sstevel@tonic-gate else 5600Sstevel@tonic-gate debug("%d: can't pageout %p+%llx (errno %d)", (int)pid, 5610Sstevel@tonic-gate (void *)start, (long long)(end - start), errno); 5620Sstevel@tonic-gate 5630Sstevel@tonic-gate return (res); 5640Sstevel@tonic-gate } 5650Sstevel@tonic-gate 5660Sstevel@tonic-gate /* 5670Sstevel@tonic-gate * Compute the delta of the victim process's RSS since the last call. If the 5680Sstevel@tonic-gate * psinfo cannot be obtained, no work is done, and no error is returned; it is 5690Sstevel@tonic-gate * up to the caller to detect the process' termination via other means. 5700Sstevel@tonic-gate */ 5710Sstevel@tonic-gate static int64_t 5720Sstevel@tonic-gate rss_delta(psinfo_t *new_psinfo, psinfo_t *old_psinfo, lprocess_t *vic) 5730Sstevel@tonic-gate { 5740Sstevel@tonic-gate int64_t d_rss = 0; 5750Sstevel@tonic-gate 5760Sstevel@tonic-gate if (get_psinfo(vic->lpc_pid, new_psinfo, vic->lpc_psinfo_fd, 5770Sstevel@tonic-gate lprocess_update_psinfo_fd_cb, vic, vic) == 0) { 5780Sstevel@tonic-gate d_rss = (int64_t)new_psinfo->pr_rssize - 5790Sstevel@tonic-gate (int64_t)old_psinfo->pr_rssize; 5800Sstevel@tonic-gate if (d_rss < 0) 5810Sstevel@tonic-gate vic->lpc_collection->lcol_stat.lcols_pg_eff += 5820Sstevel@tonic-gate (- d_rss); 5830Sstevel@tonic-gate *old_psinfo = *new_psinfo; 5840Sstevel@tonic-gate } 5850Sstevel@tonic-gate 5860Sstevel@tonic-gate return (d_rss); 5870Sstevel@tonic-gate } 5880Sstevel@tonic-gate 5890Sstevel@tonic-gate static void 5900Sstevel@tonic-gate unignore_mappings(lprocess_t *lpc) 5910Sstevel@tonic-gate { 5920Sstevel@tonic-gate debug("clearing ignored set\n"); 5930Sstevel@tonic-gate lmapping_free(&lpc->lpc_ignore); 5940Sstevel@tonic-gate } 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate static void 5970Sstevel@tonic-gate unignore_referenced_mappings(lprocess_t *lpc) 5980Sstevel@tonic-gate { 5990Sstevel@tonic-gate prpageheader_cur_t cur; 6000Sstevel@tonic-gate void *vicaddr; 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate vicaddr = set_prpageheader_cur(&cur, lpc->lpc_prpageheader, NULL, -1); 6030Sstevel@tonic-gate while (vicaddr != NULL) { 6040Sstevel@tonic-gate if (((*(char *)cur.pr_pdaddr) & (PG_REFERENCED | PG_MODIFIED)) 6050Sstevel@tonic-gate != 0) { 6060Sstevel@tonic-gate if (lmapping_remove(&lpc->lpc_ignore, cur.pr_addr, 6070Sstevel@tonic-gate cur.pr_npage * cur.pr_pagesize) == 0) 6080Sstevel@tonic-gate debug("removed mapping 0x%p+0t%llukB from" 6090Sstevel@tonic-gate " ignored set\n", (void *)cur.pr_addr, 6100Sstevel@tonic-gate (unsigned long long)(cur.pr_npage * 6110Sstevel@tonic-gate cur.pr_pagesize / 1024)); 6120Sstevel@tonic-gate vicaddr = (void *)advance_prpageheader_cur_nextmapping( 6130Sstevel@tonic-gate &cur); 6140Sstevel@tonic-gate } else if ((vicaddr = advance_prpageheader_cur(&cur)) == NULL) 6150Sstevel@tonic-gate vicaddr = (void *)advance_prpageheader_cur_nextmapping( 6160Sstevel@tonic-gate &cur); 6170Sstevel@tonic-gate } 6180Sstevel@tonic-gate } 6190Sstevel@tonic-gate 6200Sstevel@tonic-gate /* 6210Sstevel@tonic-gate * Resume scanning, starting with the last victim, if it is still valid, or any 6220Sstevel@tonic-gate * other one, otherwise. 6230Sstevel@tonic-gate */ 6240Sstevel@tonic-gate void 6250Sstevel@tonic-gate scan(lcollection_t *lcol, int64_t excess) 6260Sstevel@tonic-gate { 6270Sstevel@tonic-gate lprocess_t *vic, *lpc; 6280Sstevel@tonic-gate void *vicaddr, *endaddr, *nvicaddr; 6290Sstevel@tonic-gate prpageheader_cur_t cur; 6300Sstevel@tonic-gate psinfo_t old_psinfo, new_psinfo; 6310Sstevel@tonic-gate hrtime_t scan_start; 6320Sstevel@tonic-gate int res, resumed; 6330Sstevel@tonic-gate uint64_t col_unrm_size; 6340Sstevel@tonic-gate 6350Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "starting to scan, excess %lldk\n", 6360Sstevel@tonic-gate (long long)excess); 6370Sstevel@tonic-gate 6380Sstevel@tonic-gate /* 6390Sstevel@tonic-gate * Determine the address to start scanning at, depending on whether 6400Sstevel@tonic-gate * scanning can be resumed. 6410Sstevel@tonic-gate */ 6420Sstevel@tonic-gate endaddr = NULL; 6430Sstevel@tonic-gate if ((vic = get_valid_victim(lcol, lcol->lcol_victim)) == 6440Sstevel@tonic-gate lcol->lcol_victim && lcol->lcol_resaddr != NULL) { 6450Sstevel@tonic-gate vicaddr = lcol->lcol_resaddr; 6460Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "resuming process %d\n", 6470Sstevel@tonic-gate (int)vic->lpc_pid); 6480Sstevel@tonic-gate resumed = 1; 6490Sstevel@tonic-gate } else { 6500Sstevel@tonic-gate vicaddr = NULL; 6510Sstevel@tonic-gate resumed = 0; 6520Sstevel@tonic-gate } 6530Sstevel@tonic-gate 6540Sstevel@tonic-gate scan_start = gethrtime(); 6550Sstevel@tonic-gate /* 6560Sstevel@tonic-gate * Obtain the most current pagedata for the processes that might be 6570Sstevel@tonic-gate * scanned, and remove from the ignored set any mappings which have 6580Sstevel@tonic-gate * referenced or modified pages (in the hopes that the pageability of 6590Sstevel@tonic-gate * the mapping's pages may have changed). Determine if the 6600Sstevel@tonic-gate * unreferenced and unmodified portion is impossibly small to suffice 6610Sstevel@tonic-gate * to reduce the excess completely. If so, ignore these bits so that 6620Sstevel@tonic-gate * even working set will be paged out. 6630Sstevel@tonic-gate */ 6640Sstevel@tonic-gate col_unrm_size = 0; 6650Sstevel@tonic-gate lpc = vic; 6660Sstevel@tonic-gate while (lpc != NULL && should_run) { 6670Sstevel@tonic-gate if (merge_current_pagedata(lpc, unignore_mappings) != 0) { 6680Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "process %d:" 6690Sstevel@tonic-gate " exited/temporarily unscannable", 6700Sstevel@tonic-gate (int)lpc->lpc_pid); 6710Sstevel@tonic-gate goto next; 6720Sstevel@tonic-gate } 6730Sstevel@tonic-gate debug("process %d: %llu/%llukB scannable\n", (int)lpc->lpc_pid, 6740Sstevel@tonic-gate (unsigned long long)(lpc->lpc_unrm = unrm_size(lpc)), 6750Sstevel@tonic-gate (unsigned long long)lpc->lpc_size); 6760Sstevel@tonic-gate col_unrm_size += lpc->lpc_unrm = unrm_size(lpc); 6770Sstevel@tonic-gate 6780Sstevel@tonic-gate if ((lcol->lcol_stat.lcols_scan_count % 6790Sstevel@tonic-gate RCAPD_IGNORED_SET_FLUSH_IVAL) == 0) { 6800Sstevel@tonic-gate /* 6810Sstevel@tonic-gate * Periodically clear the set of ignored mappings. 6820Sstevel@tonic-gate * This will allow processes whose ignored segments' 6830Sstevel@tonic-gate * pageability have changed (without a corresponding 6840Sstevel@tonic-gate * reference or modification to a page) to be 6850Sstevel@tonic-gate * recognized. 6860Sstevel@tonic-gate */ 6870Sstevel@tonic-gate if (lcol->lcol_stat.lcols_scan_count > 0) 6880Sstevel@tonic-gate unignore_mappings(lpc); 6890Sstevel@tonic-gate } else { 6900Sstevel@tonic-gate /* 6910Sstevel@tonic-gate * Ensure mappings with referenced or modified pages 6920Sstevel@tonic-gate * are not in the ignored set. Their usage might mean 6930Sstevel@tonic-gate * the condition which made them unpageable is gone. 6940Sstevel@tonic-gate */ 6950Sstevel@tonic-gate unignore_referenced_mappings(lpc); 6960Sstevel@tonic-gate } 6970Sstevel@tonic-gate next: 6980Sstevel@tonic-gate lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol, 6990Sstevel@tonic-gate lpc->lpc_next) : NULL; 7000Sstevel@tonic-gate } 7010Sstevel@tonic-gate if (col_unrm_size < excess) { 7020Sstevel@tonic-gate lpc = vic; 7030Sstevel@tonic-gate debug("will not reduce excess with only unreferenced pages\n"); 7040Sstevel@tonic-gate while (lpc != NULL && should_run) { 7050Sstevel@tonic-gate if (lpc->lpc_prpageheader != NULL) { 7060Sstevel@tonic-gate (void) count_pages(lpc->lpc_prpageheader, 7070Sstevel@tonic-gate CP_CLEAR, 0, 0); 7080Sstevel@tonic-gate if (lpc->lpc_pgdata_fd >= 0) { 7090Sstevel@tonic-gate if (rfd_close(lpc->lpc_pgdata_fd) != 0) 7100Sstevel@tonic-gate debug("coud not close %d" 7110Sstevel@tonic-gate " lpc_pgdata_fd %d", 7120Sstevel@tonic-gate (int)lpc->lpc_pid, 7130Sstevel@tonic-gate lpc->lpc_pgdata_fd); 7140Sstevel@tonic-gate lpc->lpc_pgdata_fd = -1; 7150Sstevel@tonic-gate } 7160Sstevel@tonic-gate } 7170Sstevel@tonic-gate lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol, 7180Sstevel@tonic-gate lpc->lpc_next) : NULL; 7190Sstevel@tonic-gate } 7200Sstevel@tonic-gate } 7210Sstevel@tonic-gate 7220Sstevel@tonic-gate /* 7230Sstevel@tonic-gate * Examine each process for pages to remove until the excess is 7240Sstevel@tonic-gate * reduced. 7250Sstevel@tonic-gate */ 7260Sstevel@tonic-gate while (vic != NULL && excess > 0 && should_run) { 7270Sstevel@tonic-gate /* 7280Sstevel@tonic-gate * Skip processes whose death was reported when the merging of 7290Sstevel@tonic-gate * pagedata was attempted. 7300Sstevel@tonic-gate */ 7310Sstevel@tonic-gate if (vic->lpc_prpageheader == NULL) 7320Sstevel@tonic-gate goto nextproc; 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate /* 7350Sstevel@tonic-gate * Obtain optional segment residency information. 7360Sstevel@tonic-gate */ 7370Sstevel@tonic-gate if (lpc_xmap_update(vic) != 0) 7380Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "process %d: xmap" 7390Sstevel@tonic-gate " unreadable; ignoring", (int)vic->lpc_pid); 7400Sstevel@tonic-gate 7410Sstevel@tonic-gate #ifdef DEBUG_MSG 7420Sstevel@tonic-gate { 7430Sstevel@tonic-gate void *ovicaddr = vicaddr; 7440Sstevel@tonic-gate #endif /* DEBUG_MSG */ 7450Sstevel@tonic-gate vicaddr = set_prpageheader_cur_addr(&cur, vic->lpc_prpageheader, 7460Sstevel@tonic-gate vic->lpc_xmap, vic->lpc_nxmap, vicaddr); 7470Sstevel@tonic-gate #ifdef DEBUG_MSG 7480Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "trying to resume from" 7490Sstevel@tonic-gate " 0x%p, next 0x%p\n", ovicaddr, vicaddr); 7500Sstevel@tonic-gate } 7510Sstevel@tonic-gate #endif /* DEBUG_MSG */ 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate /* 7540Sstevel@tonic-gate * Take control of the victim. 7550Sstevel@tonic-gate */ 7560Sstevel@tonic-gate if (get_psinfo(vic->lpc_pid, &old_psinfo, 7570Sstevel@tonic-gate vic->lpc_psinfo_fd, lprocess_update_psinfo_fd_cb, 7580Sstevel@tonic-gate vic, vic) != 0) { 7590Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "cannot get %d psinfo", 7600Sstevel@tonic-gate (int)vic->lpc_pid); 7610Sstevel@tonic-gate goto nextproc; 7620Sstevel@tonic-gate } 7630Sstevel@tonic-gate (void) rfd_reserve(PGRAB_FD_COUNT); 7640Sstevel@tonic-gate if ((scan_pr = Pgrab(vic->lpc_pid, 0, &res)) == NULL) { 7650Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "cannot grab %d (%d)", 7660Sstevel@tonic-gate (int)vic->lpc_pid, res); 7670Sstevel@tonic-gate goto nextproc; 7680Sstevel@tonic-gate } 7690Sstevel@tonic-gate if (Pcreate_agent(scan_pr) != 0) { 7700Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "cannot control %d", 7710Sstevel@tonic-gate (int)vic->lpc_pid); 7720Sstevel@tonic-gate goto nextproc; 7730Sstevel@tonic-gate } 7740Sstevel@tonic-gate /* 7750Sstevel@tonic-gate * Be very pessimistic about the state of the agent LWP -- 7760Sstevel@tonic-gate * verify it's actually stopped. 7770Sstevel@tonic-gate */ 7780Sstevel@tonic-gate errno = 0; 7790Sstevel@tonic-gate while (Pstate(scan_pr) == PS_RUN) 7800Sstevel@tonic-gate (void) Pwait(scan_pr, 0); 7810Sstevel@tonic-gate if (Pstate(scan_pr) != PS_STOP) { 7820Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "agent not in expected" 7830Sstevel@tonic-gate " state (%d)", Pstate(scan_pr)); 7840Sstevel@tonic-gate goto nextproc; 7850Sstevel@tonic-gate } 7860Sstevel@tonic-gate 7870Sstevel@tonic-gate /* 7880Sstevel@tonic-gate * Within the victim's address space, find contiguous ranges of 7890Sstevel@tonic-gate * unreferenced pages to page out. 7900Sstevel@tonic-gate */ 7910Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "paging out process %d\n", 7920Sstevel@tonic-gate (int)vic->lpc_pid); 7930Sstevel@tonic-gate while (excess > 0 && vicaddr != NULL && should_run) { 7940Sstevel@tonic-gate /* 7950Sstevel@tonic-gate * Skip mappings in the ignored set. Mappings get 7960Sstevel@tonic-gate * placed in the ignored set when all their resident 7970Sstevel@tonic-gate * pages are unreference and unmodified, yet unpageable 7980Sstevel@tonic-gate * -- such as when they are locked, or involved in 7990Sstevel@tonic-gate * asynchronous I/O. They will be scanned again when 8000Sstevel@tonic-gate * some page is referenced or modified. 8010Sstevel@tonic-gate */ 8020Sstevel@tonic-gate if (lmapping_contains(vic->lpc_ignore, cur.pr_addr, 8030Sstevel@tonic-gate cur.pr_npage * cur.pr_pagesize)) { 8040Sstevel@tonic-gate debug("ignored mapping at 0x%p\n", 8050Sstevel@tonic-gate (void *)cur.pr_addr); 8060Sstevel@tonic-gate /* 8070Sstevel@tonic-gate * Update statistics. 8080Sstevel@tonic-gate */ 8090Sstevel@tonic-gate lcol->lcol_stat.lcols_pg_att += 8100Sstevel@tonic-gate cur.pr_npage * cur.pr_pagesize / 1024; 8110Sstevel@tonic-gate 8120Sstevel@tonic-gate vicaddr = (void *) 8130Sstevel@tonic-gate advance_prpageheader_cur_nextmapping(&cur); 8140Sstevel@tonic-gate continue; 8150Sstevel@tonic-gate } 8160Sstevel@tonic-gate 8170Sstevel@tonic-gate /* 8180Sstevel@tonic-gate * Determine a range of unreferenced pages to page out, 8190Sstevel@tonic-gate * and clear the R/M bits in the preceding referenced 8200Sstevel@tonic-gate * range. 8210Sstevel@tonic-gate */ 8220Sstevel@tonic-gate st_debug(STDL_HIGH, lcol, "start from mapping at 0x%p," 8230Sstevel@tonic-gate " npage %llu\n", vicaddr, 8240Sstevel@tonic-gate (unsigned long long)cur.pr_npage); 8250Sstevel@tonic-gate while (vicaddr != NULL && 8260Sstevel@tonic-gate *(caddr_t)cur.pr_pdaddr != 0) { 8270Sstevel@tonic-gate *(caddr_t)cur.pr_pdaddr = 0; 8280Sstevel@tonic-gate vicaddr = advance_prpageheader_cur(&cur); 8290Sstevel@tonic-gate } 8300Sstevel@tonic-gate st_debug(STDL_HIGH, lcol, "advance, vicaddr %p, pdaddr" 8310Sstevel@tonic-gate " %p\n", vicaddr, cur.pr_pdaddr); 8320Sstevel@tonic-gate if (vicaddr == NULL) { 8330Sstevel@tonic-gate /* 8340Sstevel@tonic-gate * The end of mapping was reached before any 8350Sstevel@tonic-gate * unreferenced pages were seen. 8360Sstevel@tonic-gate */ 8370Sstevel@tonic-gate vicaddr = (void *) 8380Sstevel@tonic-gate advance_prpageheader_cur_nextmapping(&cur); 8390Sstevel@tonic-gate continue; 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate do 8420Sstevel@tonic-gate endaddr = advance_prpageheader_cur(&cur); 8430Sstevel@tonic-gate while (endaddr != NULL && 8440Sstevel@tonic-gate *(caddr_t)cur.pr_pdaddr == 0 && 8450Sstevel@tonic-gate (((intptr_t)endaddr - (intptr_t)vicaddr) / 8460Sstevel@tonic-gate 1024) < excess); 8470Sstevel@tonic-gate st_debug(STDL_HIGH, lcol, "endaddr %p, *cur %d\n", 8480Sstevel@tonic-gate endaddr, *(caddr_t)cur.pr_pdaddr); 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate /* 8510Sstevel@tonic-gate * Page out from vicaddr to the end of the mapping, or 8520Sstevel@tonic-gate * endaddr if set, then continue scanning after 8530Sstevel@tonic-gate * endaddr, or the next mapping, if not set. 8540Sstevel@tonic-gate */ 8550Sstevel@tonic-gate nvicaddr = endaddr; 8560Sstevel@tonic-gate if (endaddr == NULL) 8570Sstevel@tonic-gate endaddr = (caddr_t)cur.pr_addr + 8580Sstevel@tonic-gate cur.pr_pagesize * cur.pr_npage; 8590Sstevel@tonic-gate if (pageout(vic->lpc_pid, scan_pr, vicaddr, endaddr) == 8600Sstevel@tonic-gate 0) { 8610Sstevel@tonic-gate int64_t d_rss, att; 8620Sstevel@tonic-gate int willignore = 0; 8630Sstevel@tonic-gate 8640Sstevel@tonic-gate excess += (d_rss = rss_delta( 8650Sstevel@tonic-gate &new_psinfo, &old_psinfo, vic)); 8660Sstevel@tonic-gate 8670Sstevel@tonic-gate /* 8680Sstevel@tonic-gate * If this pageout attempt was unsuccessful 8690Sstevel@tonic-gate * (the resident portion was not affected), and 8700Sstevel@tonic-gate * was for the whole mapping, put it in the 8710Sstevel@tonic-gate * ignored set, so it will not be scanned again 8720Sstevel@tonic-gate * until some page is referenced or modified. 8730Sstevel@tonic-gate */ 8740Sstevel@tonic-gate if (d_rss >= 0 && (void *)cur.pr_addr == 8750Sstevel@tonic-gate vicaddr && (cur.pr_pagesize * cur.pr_npage) 8760Sstevel@tonic-gate == ((uintptr_t)endaddr - 8770Sstevel@tonic-gate (uintptr_t)vicaddr)) { 8780Sstevel@tonic-gate if (lmapping_insert( 8790Sstevel@tonic-gate &vic->lpc_ignore, 8800Sstevel@tonic-gate cur.pr_addr, 8810Sstevel@tonic-gate cur.pr_pagesize * 8820Sstevel@tonic-gate cur.pr_npage) != 0) 8830Sstevel@tonic-gate debug("not enough memory to add" 8840Sstevel@tonic-gate " mapping at %p to ignored" 8850Sstevel@tonic-gate " set\n", 8860Sstevel@tonic-gate (void *)cur.pr_addr); 8870Sstevel@tonic-gate willignore = 1; 8880Sstevel@tonic-gate } 8890Sstevel@tonic-gate 8900Sstevel@tonic-gate /* 8910Sstevel@tonic-gate * Update statistics. 8920Sstevel@tonic-gate */ 8930Sstevel@tonic-gate lcol->lcol_stat.lcols_pg_att += (att = 8940Sstevel@tonic-gate ((intptr_t)endaddr - (intptr_t)vicaddr) / 8950Sstevel@tonic-gate 1024); 8960Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "paged out 0x%p" 8970Sstevel@tonic-gate "+0t(%llu/%llu)kB%s\n", vicaddr, 8980Sstevel@tonic-gate (unsigned long long)((d_rss < 8990Sstevel@tonic-gate 0) ? - d_rss : 0), (unsigned long long)att, 9000Sstevel@tonic-gate willignore ? " (will ignore)" : ""); 9010Sstevel@tonic-gate } else { 9020Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, 9030Sstevel@tonic-gate "process %d: exited/unscannable\n", 9040Sstevel@tonic-gate (int)vic->lpc_pid); 9050Sstevel@tonic-gate vic->lpc_unscannable = 1; 9060Sstevel@tonic-gate goto nextproc; 9070Sstevel@tonic-gate } 9080Sstevel@tonic-gate 9090Sstevel@tonic-gate /* 9100Sstevel@tonic-gate * Update the statistics file, if it's time. 9110Sstevel@tonic-gate */ 9120Sstevel@tonic-gate check_update_statistics(); 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate vicaddr = (nvicaddr != NULL) ? nvicaddr : (void 9150Sstevel@tonic-gate *)advance_prpageheader_cur_nextmapping(&cur); 9160Sstevel@tonic-gate } 9170Sstevel@tonic-gate excess += rss_delta(&new_psinfo, &old_psinfo, vic); 9180Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "done, excess %lld\n", 9190Sstevel@tonic-gate (long long)excess); 9200Sstevel@tonic-gate nextproc: 9210Sstevel@tonic-gate /* 9220Sstevel@tonic-gate * If a process was grabbed, release it, destroying its agent. 9230Sstevel@tonic-gate */ 9240Sstevel@tonic-gate if (scan_pr != NULL) { 9250Sstevel@tonic-gate (void) Prelease(scan_pr, 0); 9260Sstevel@tonic-gate scan_pr = NULL; 9270Sstevel@tonic-gate } 9280Sstevel@tonic-gate lcol->lcol_victim = vic; 9290Sstevel@tonic-gate /* 9300Sstevel@tonic-gate * Scan the collection at most once. Only if scanning was not 9310Sstevel@tonic-gate * aborted for any reason, and the end of lprocess has not been 9320Sstevel@tonic-gate * reached, determine the next victim and scan it. 9330Sstevel@tonic-gate */ 9340Sstevel@tonic-gate if (vic != NULL) { 9350Sstevel@tonic-gate if (vic->lpc_next != NULL) { 9360Sstevel@tonic-gate /* 9370Sstevel@tonic-gate * Determine the next process to be scanned. 9380Sstevel@tonic-gate */ 9390Sstevel@tonic-gate if (excess > 0) { 9400Sstevel@tonic-gate vic = get_valid_victim(lcol, 9410Sstevel@tonic-gate vic->lpc_next); 9420Sstevel@tonic-gate vicaddr = 0; 9430Sstevel@tonic-gate } 9440Sstevel@tonic-gate } else { 9450Sstevel@tonic-gate /* 9460Sstevel@tonic-gate * A complete scan of the collection was made, 9470Sstevel@tonic-gate * so tick the scan counter and stop scanning 9480Sstevel@tonic-gate * until the next request. 9490Sstevel@tonic-gate */ 9500Sstevel@tonic-gate lcol->lcol_stat.lcols_scan_count++; 9510Sstevel@tonic-gate lcol->lcol_stat.lcols_scan_time_complete 9520Sstevel@tonic-gate = lcol->lcol_stat.lcols_scan_time; 9530Sstevel@tonic-gate /* 9540Sstevel@tonic-gate * If an excess still exists, tick the 9550Sstevel@tonic-gate * "ineffective scan" counter, signalling that 9560Sstevel@tonic-gate * the cap may be uneforceable. 9570Sstevel@tonic-gate */ 9580Sstevel@tonic-gate if (resumed == 0 && excess > 0) 9590Sstevel@tonic-gate lcol->lcol_stat 9600Sstevel@tonic-gate .lcols_scan_ineffective++; 9610Sstevel@tonic-gate /* 9620Sstevel@tonic-gate * Scanning should start at the beginning of 9630Sstevel@tonic-gate * the process list at the next request. 9640Sstevel@tonic-gate */ 9650Sstevel@tonic-gate if (excess > 0) 9660Sstevel@tonic-gate vic = NULL; 9670Sstevel@tonic-gate } 9680Sstevel@tonic-gate } 9690Sstevel@tonic-gate } 9700Sstevel@tonic-gate lcol->lcol_stat.lcols_scan_time += (gethrtime() - scan_start); 9710Sstevel@tonic-gate st_debug(STDL_HIGH, lcol, "done scanning; excess %lld\n", 9720Sstevel@tonic-gate (long long)excess); 9730Sstevel@tonic-gate 9740Sstevel@tonic-gate lcol->lcol_resaddr = vicaddr; 9750Sstevel@tonic-gate if (lcol->lcol_resaddr == NULL && lcol->lcol_victim != NULL) { 9760Sstevel@tonic-gate lcol->lcol_victim = get_valid_victim(lcol, 9770Sstevel@tonic-gate lcol->lcol_victim->lpc_next); 9780Sstevel@tonic-gate } 9790Sstevel@tonic-gate } 9800Sstevel@tonic-gate 9810Sstevel@tonic-gate /* 9820Sstevel@tonic-gate * Abort the scan in progress, and destroy the agent LWP of any grabbed 9830Sstevel@tonic-gate * processes. 9840Sstevel@tonic-gate */ 9850Sstevel@tonic-gate void 9860Sstevel@tonic-gate scan_abort(void) 9870Sstevel@tonic-gate { 9880Sstevel@tonic-gate if (scan_pr != NULL) 9890Sstevel@tonic-gate (void) Prelease(scan_pr, NULL); 9900Sstevel@tonic-gate } 9910Sstevel@tonic-gate 9920Sstevel@tonic-gate static void 9930Sstevel@tonic-gate revoke_xmap(rfd_t *rfd) 9940Sstevel@tonic-gate { 9950Sstevel@tonic-gate lprocess_t *lpc = rfd->rfd_data; 9960Sstevel@tonic-gate 9970Sstevel@tonic-gate debug("revoking xmap for process %d\n", (int)lpc->lpc_pid); 9980Sstevel@tonic-gate ASSERT(lpc->lpc_xmap_fd != -1); 9990Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 10000Sstevel@tonic-gate } 10010Sstevel@tonic-gate 10020Sstevel@tonic-gate /* 10030Sstevel@tonic-gate * Retrieve the process's current xmap , which is used to determine the size of 10040Sstevel@tonic-gate * the resident portion of its segments. Return zero if successful. 10050Sstevel@tonic-gate */ 10060Sstevel@tonic-gate static int 10070Sstevel@tonic-gate lpc_xmap_update(lprocess_t *lpc) 10080Sstevel@tonic-gate { 10090Sstevel@tonic-gate int res; 10100Sstevel@tonic-gate struct stat st; 10110Sstevel@tonic-gate 10120Sstevel@tonic-gate free(lpc->lpc_xmap); 10130Sstevel@tonic-gate lpc->lpc_xmap = NULL; 10140Sstevel@tonic-gate lpc->lpc_nxmap = -1; 10150Sstevel@tonic-gate 10160Sstevel@tonic-gate if (lpc->lpc_xmap_fd == -1) { 10170Sstevel@tonic-gate char pathbuf[PROC_PATH_MAX]; 10180Sstevel@tonic-gate 10190Sstevel@tonic-gate (void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/xmap", 10200Sstevel@tonic-gate (int)lpc->lpc_pid); 10210Sstevel@tonic-gate if ((lpc->lpc_xmap_fd = rfd_open(pathbuf, 1, RFD_XMAP, 10220Sstevel@tonic-gate revoke_xmap, lpc, O_RDONLY, 0)) < 0) 10230Sstevel@tonic-gate return (-1); 10240Sstevel@tonic-gate } 10250Sstevel@tonic-gate 10260Sstevel@tonic-gate redo: 10270Sstevel@tonic-gate errno = 0; 10280Sstevel@tonic-gate if (fstat(lpc->lpc_xmap_fd, &st) != 0) { 10290Sstevel@tonic-gate debug("cannot stat xmap\n"); 10300Sstevel@tonic-gate (void) rfd_close(lpc->lpc_xmap_fd); 10310Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 10320Sstevel@tonic-gate return (-1); 10330Sstevel@tonic-gate } 10340Sstevel@tonic-gate 10350Sstevel@tonic-gate if ((st.st_size % sizeof (*lpc->lpc_xmap)) != 0) { 10360Sstevel@tonic-gate debug("xmap wrong size\n"); 10370Sstevel@tonic-gate (void) rfd_close(lpc->lpc_xmap_fd); 10380Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 10390Sstevel@tonic-gate return (-1); 10400Sstevel@tonic-gate } 10410Sstevel@tonic-gate 10420Sstevel@tonic-gate lpc->lpc_xmap = malloc(st.st_size); 10430Sstevel@tonic-gate if (lpc->lpc_xmap == NULL) { 10440Sstevel@tonic-gate debug("cannot malloc() %ld bytes for xmap", st.st_size); 10450Sstevel@tonic-gate (void) rfd_close(lpc->lpc_xmap_fd); 10460Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 10470Sstevel@tonic-gate return (-1); 10480Sstevel@tonic-gate } 10490Sstevel@tonic-gate 10500Sstevel@tonic-gate if ((res = pread(lpc->lpc_xmap_fd, lpc->lpc_xmap, st.st_size, 0)) != 10510Sstevel@tonic-gate st.st_size) { 10520Sstevel@tonic-gate free(lpc->lpc_xmap); 10530Sstevel@tonic-gate lpc->lpc_xmap = NULL; 10540Sstevel@tonic-gate if (res > 0) { 10550Sstevel@tonic-gate debug("xmap changed size, retrying\n"); 10560Sstevel@tonic-gate goto redo; 10570Sstevel@tonic-gate } else { 10580Sstevel@tonic-gate debug("cannot read xmap"); 10590Sstevel@tonic-gate return (-1); 10600Sstevel@tonic-gate } 10610Sstevel@tonic-gate } 10620Sstevel@tonic-gate lpc->lpc_nxmap = st.st_size / sizeof (*lpc->lpc_xmap); 10630Sstevel@tonic-gate 10640Sstevel@tonic-gate return (0); 10650Sstevel@tonic-gate } 1066