10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51914Scasper * Common Development and Distribution License (the "License").
61914Scasper * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
21*13093SRoger.Faulkner@Oracle.COM
220Sstevel@tonic-gate /*
23*13093SRoger.Faulkner@Oracle.COM * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * rcapd is a long-running daemon enforcing project-based resource caps (see
280Sstevel@tonic-gate * rcapd(1M)). Each instance of a process aggregate (project or, generically,
290Sstevel@tonic-gate * "collection") may have a memory cap. A single thread monitors the resource
300Sstevel@tonic-gate * utilization of capped collections, enforces caps when they are exceeded (and
310Sstevel@tonic-gate * other conditions are met), and incorporates changes in configuration or
320Sstevel@tonic-gate * caps. Each of these actions occurs not more frequently than the rate
330Sstevel@tonic-gate * specified with rcapadm(1M).
340Sstevel@tonic-gate */
350Sstevel@tonic-gate
360Sstevel@tonic-gate #include <sys/priocntl.h>
370Sstevel@tonic-gate #include <sys/proc.h>
380Sstevel@tonic-gate #include <sys/resource.h>
390Sstevel@tonic-gate #include <sys/sysinfo.h>
400Sstevel@tonic-gate #include <sys/stat.h>
410Sstevel@tonic-gate #include <sys/sysmacros.h>
420Sstevel@tonic-gate #include <sys/time.h>
430Sstevel@tonic-gate #include <sys/types.h>
440Sstevel@tonic-gate #include <dirent.h>
450Sstevel@tonic-gate #include <errno.h>
460Sstevel@tonic-gate #include <fcntl.h>
470Sstevel@tonic-gate #include <kstat.h>
480Sstevel@tonic-gate #include <libintl.h>
490Sstevel@tonic-gate #include <limits.h>
500Sstevel@tonic-gate #include <locale.h>
510Sstevel@tonic-gate #include <priv.h>
520Sstevel@tonic-gate #include <signal.h>
530Sstevel@tonic-gate #include <stdarg.h>
540Sstevel@tonic-gate #include <stdio.h>
551914Scasper #include <stdio_ext.h>
560Sstevel@tonic-gate #include <stdlib.h>
574119Stn143363 #include <libscf.h>
580Sstevel@tonic-gate #include <strings.h>
590Sstevel@tonic-gate #include <time.h>
600Sstevel@tonic-gate #include <unistd.h>
610Sstevel@tonic-gate #include <zone.h>
620Sstevel@tonic-gate #include <assert.h>
633247Sgjelinek #include <sys/vm_usage.h>
640Sstevel@tonic-gate #include "rcapd.h"
650Sstevel@tonic-gate #include "rcapd_mapping.h"
660Sstevel@tonic-gate #include "rcapd_rfd.h"
670Sstevel@tonic-gate #include "rcapd_stat.h"
680Sstevel@tonic-gate #include "utils.h"
690Sstevel@tonic-gate
700Sstevel@tonic-gate #define POSITIVE_MIN(x, y) \
710Sstevel@tonic-gate (((x) <= 0) ? (y) : ((y) <= 0) ? (x) : MIN(x, y))
720Sstevel@tonic-gate #define NEXT_EVENT_TIME(base, seconds) \
730Sstevel@tonic-gate (((int)seconds > 0) ? (base + (hrtime_t)seconds * (hrtime_t)NANOSEC) \
740Sstevel@tonic-gate : (hrtime_t)0)
750Sstevel@tonic-gate #define NEXT_REPORT_EVENT_TIME(base, seconds) \
760Sstevel@tonic-gate ((rcfg.rcfg_stat_file[0] != 0) ? \
770Sstevel@tonic-gate NEXT_EVENT_TIME(gethrtime(), seconds) : (hrtime_t)0)
780Sstevel@tonic-gate #define EVENT_TIME(time, eventtime) \
790Sstevel@tonic-gate (((time) > (eventtime)) && (eventtime) != 0)
800Sstevel@tonic-gate #define STAT_TEMPLATE_SUFFIX ".XXXXXX" /* suffix of mkstemp() arg */
810Sstevel@tonic-gate #define DAEMON_UID 1 /* uid to use */
820Sstevel@tonic-gate
833247Sgjelinek #define CAPPED_PROJECT 0x01
843247Sgjelinek #define CAPPED_ZONE 0x02
853247Sgjelinek
860Sstevel@tonic-gate typedef struct soft_scan_arg {
870Sstevel@tonic-gate uint64_t ssa_sum_excess;
880Sstevel@tonic-gate int64_t ssa_scan_goal;
893247Sgjelinek boolean_t ssa_project_over_cap;
900Sstevel@tonic-gate } soft_scan_arg_t;
910Sstevel@tonic-gate
923247Sgjelinek typedef struct sample_col_arg {
933247Sgjelinek boolean_t sca_any_over_cap;
943247Sgjelinek boolean_t sca_project_over_cap;
953247Sgjelinek } sample_col_arg_t;
963247Sgjelinek
973247Sgjelinek
980Sstevel@tonic-gate static int debug_mode = 0; /* debug mode flag */
990Sstevel@tonic-gate static pid_t rcapd_pid; /* rcapd's pid to ensure it's not */
1000Sstevel@tonic-gate /* scanned */
1010Sstevel@tonic-gate static kstat_ctl_t *kctl; /* kstat chain */
1020Sstevel@tonic-gate static int memory_pressure = 0; /* physical memory utilization (%) */
1030Sstevel@tonic-gate static int memory_pressure_sample = 0; /* count of samples */
1043247Sgjelinek static long page_size_kb = 0; /* system page size in KB */
1053247Sgjelinek static size_t nvmu_vals = 0; /* # of kernel RSS/swap vals in array */
1063247Sgjelinek static size_t vmu_vals_len = 0; /* size of RSS/swap vals array */
1073247Sgjelinek static vmusage_t *vmu_vals = NULL; /* snapshot of kernel RSS/swap values */
1080Sstevel@tonic-gate static hrtime_t next_report; /* time of next report */
1090Sstevel@tonic-gate static int termination_signal = 0; /* terminating signal */
1103247Sgjelinek static zoneid_t my_zoneid = (zoneid_t)-1;
1113247Sgjelinek static lcollection_t *gz_col; /* global zone collection */
1120Sstevel@tonic-gate
1130Sstevel@tonic-gate rcfg_t rcfg;
1143247Sgjelinek /*
1153247Sgjelinek * Updated when we re-read the collection configurations if this rcapd instance
1163247Sgjelinek * is running in the global zone and the global zone is capped.
1173247Sgjelinek */
1183247Sgjelinek boolean_t gz_capped = B_FALSE;
1190Sstevel@tonic-gate
1200Sstevel@tonic-gate /*
1210Sstevel@tonic-gate * Flags.
1220Sstevel@tonic-gate */
1230Sstevel@tonic-gate static int ever_ran;
1240Sstevel@tonic-gate int should_run;
1250Sstevel@tonic-gate static int should_reconfigure;
1260Sstevel@tonic-gate
1270Sstevel@tonic-gate static int verify_statistics(void);
1280Sstevel@tonic-gate static int update_statistics(void);
1290Sstevel@tonic-gate
1300Sstevel@tonic-gate /*
1313247Sgjelinek * Checks if a process is marked 'system'. Returns FALSE only when it is not.
1320Sstevel@tonic-gate */
1333247Sgjelinek static boolean_t
proc_issystem(pid_t pid)1340Sstevel@tonic-gate proc_issystem(pid_t pid)
1350Sstevel@tonic-gate {
1360Sstevel@tonic-gate char pc_clname[PC_CLNMSZ];
1370Sstevel@tonic-gate
1380Sstevel@tonic-gate if (priocntl(P_PID, pid, PC_GETXPARMS, NULL, PC_KY_CLNAME, pc_clname,
1390Sstevel@tonic-gate PC_KY_NULL) != -1) {
1400Sstevel@tonic-gate return (strcmp(pc_clname, "SYS") == 0);
1410Sstevel@tonic-gate } else {
1420Sstevel@tonic-gate debug("cannot get class-specific scheduling parameters; "
1433247Sgjelinek "assuming system process\n");
1443247Sgjelinek return (B_TRUE);
1450Sstevel@tonic-gate }
1460Sstevel@tonic-gate }
1470Sstevel@tonic-gate
1480Sstevel@tonic-gate static void
lprocess_insert_mark(psinfo_t * psinfop)1493247Sgjelinek lprocess_insert_mark(psinfo_t *psinfop)
1500Sstevel@tonic-gate {
1513247Sgjelinek pid_t pid = psinfop->pr_pid;
1523247Sgjelinek /* flag indicating whether the process should be scanned. */
1533247Sgjelinek int unscannable = psinfop->pr_nlwp == 0;
1543247Sgjelinek rcid_t colid;
1550Sstevel@tonic-gate lcollection_t *lcol;
1560Sstevel@tonic-gate lprocess_t *lproc;
1570Sstevel@tonic-gate
1583247Sgjelinek /*
1593247Sgjelinek * Determine which collection to put this process into. We only have
1603247Sgjelinek * to worry about tracking both zone and project capped processes if
1613247Sgjelinek * this rcapd instance is running in the global zone, since we'll only
1623247Sgjelinek * see processes in our own projects in a non-global zone. In the
1633247Sgjelinek * global zone, if the process belongs to a non-global zone, we only
1643247Sgjelinek * need to track it for the capped non-global zone collection. For
1653247Sgjelinek * global zone processes, we first attempt to put the process into a
1663247Sgjelinek * capped project collection. On the second pass into this function
1673247Sgjelinek * the projid will be cleared so we will just track the process for the
1683247Sgjelinek * global zone collection as a whole.
1693247Sgjelinek */
1703247Sgjelinek if (psinfop->pr_zoneid == my_zoneid && psinfop->pr_projid != -1) {
1713247Sgjelinek colid.rcid_type = RCIDT_PROJECT;
1723247Sgjelinek colid.rcid_val = psinfop->pr_projid;
1733247Sgjelinek } else {
1743247Sgjelinek /* try to add to zone collection */
1753247Sgjelinek colid.rcid_type = RCIDT_ZONE;
1763247Sgjelinek colid.rcid_val = psinfop->pr_zoneid;
1773247Sgjelinek }
1783247Sgjelinek
1793247Sgjelinek if ((lcol = lcollection_find(&colid)) == NULL)
1800Sstevel@tonic-gate return;
1810Sstevel@tonic-gate
1820Sstevel@tonic-gate /*
1830Sstevel@tonic-gate * If the process is already being tracked, update the unscannable flag,
1840Sstevel@tonic-gate * as determined by the caller, from the process's psinfo.
1850Sstevel@tonic-gate */
1860Sstevel@tonic-gate lproc = lcol->lcol_lprocess;
1870Sstevel@tonic-gate while (lproc != NULL) {
1880Sstevel@tonic-gate if (lproc->lpc_pid == pid) {
1890Sstevel@tonic-gate lproc->lpc_mark = 1;
1900Sstevel@tonic-gate if (unscannable != 0 && lproc->lpc_unscannable == 0) {
1910Sstevel@tonic-gate debug("process %d: became unscannable\n",
1920Sstevel@tonic-gate (int)lproc->lpc_pid);
1930Sstevel@tonic-gate lproc->lpc_unscannable = 1;
1940Sstevel@tonic-gate }
1950Sstevel@tonic-gate return;
1960Sstevel@tonic-gate }
1970Sstevel@tonic-gate lproc = lproc->lpc_next;
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate
2000Sstevel@tonic-gate /*
2010Sstevel@tonic-gate * We've fallen off the list without finding our current process;
2020Sstevel@tonic-gate * insert it at the list head.
2030Sstevel@tonic-gate */
2040Sstevel@tonic-gate if ((lproc = malloc(sizeof (*lproc))) == NULL)
2050Sstevel@tonic-gate debug("insufficient memory to track new process %d", (int)pid);
2060Sstevel@tonic-gate else {
2070Sstevel@tonic-gate (void) bzero(lproc, sizeof (*lproc));
2080Sstevel@tonic-gate lproc->lpc_pid = pid;
2090Sstevel@tonic-gate lproc->lpc_mark = 1;
2100Sstevel@tonic-gate lproc->lpc_collection = lcol;
2110Sstevel@tonic-gate lproc->lpc_psinfo_fd = -1;
2120Sstevel@tonic-gate lproc->lpc_pgdata_fd = -1;
2130Sstevel@tonic-gate lproc->lpc_xmap_fd = -1;
2140Sstevel@tonic-gate
2150Sstevel@tonic-gate /*
2160Sstevel@tonic-gate * If the caller didn't flag this process as unscannable
2170Sstevel@tonic-gate * already, do some more checking.
2180Sstevel@tonic-gate */
2190Sstevel@tonic-gate lproc->lpc_unscannable = unscannable || proc_issystem(pid);
2200Sstevel@tonic-gate
2210Sstevel@tonic-gate #ifdef DEBUG
2220Sstevel@tonic-gate /*
2230Sstevel@tonic-gate * Verify the sanity of lprocess. It should not contain the
2240Sstevel@tonic-gate * process we are about to prepend.
2250Sstevel@tonic-gate */
2260Sstevel@tonic-gate if (lcollection_member(lcol, lproc)) {
2270Sstevel@tonic-gate lprocess_t *cur = lcol->lcol_lprocess;
2280Sstevel@tonic-gate debug("The collection %lld already has these members, "
2293247Sgjelinek "including me, %d!\n",
2303247Sgjelinek (long long)lcol->lcol_id.rcid_val,
2310Sstevel@tonic-gate (int)lproc->lpc_pid);
2320Sstevel@tonic-gate while (cur != NULL) {
2330Sstevel@tonic-gate debug("\t%d\n", (int)cur->lpc_pid);
2340Sstevel@tonic-gate cur = cur->lpc_next;
2350Sstevel@tonic-gate }
2360Sstevel@tonic-gate info(gettext("process already on lprocess\n"));
2370Sstevel@tonic-gate abort();
2380Sstevel@tonic-gate }
2390Sstevel@tonic-gate #endif /* DEBUG */
2400Sstevel@tonic-gate lproc->lpc_next = lcol->lcol_lprocess;
2410Sstevel@tonic-gate if (lproc->lpc_next != NULL)
2420Sstevel@tonic-gate lproc->lpc_next->lpc_prev = lproc;
2430Sstevel@tonic-gate lproc->lpc_prev = NULL;
2440Sstevel@tonic-gate lcol->lcol_lprocess = lproc;
2450Sstevel@tonic-gate
2463247Sgjelinek debug("tracking %s %ld %d %s%s\n",
2473247Sgjelinek (colid.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
2483247Sgjelinek (long)colid.rcid_val,
2493247Sgjelinek (int)pid, psinfop->pr_psargs,
2500Sstevel@tonic-gate (lproc->lpc_unscannable != 0) ? " (not scannable)" : "");
2510Sstevel@tonic-gate lcol->lcol_stat.lcols_proc_in++;
2520Sstevel@tonic-gate }
2530Sstevel@tonic-gate }
2540Sstevel@tonic-gate
2550Sstevel@tonic-gate static int
list_walk_process_cb(lcollection_t * lcol,void * arg)2560Sstevel@tonic-gate list_walk_process_cb(lcollection_t *lcol, void *arg)
2570Sstevel@tonic-gate {
2580Sstevel@tonic-gate int (*cb)(lcollection_t *, lprocess_t *) =
2590Sstevel@tonic-gate (int(*)(lcollection_t *, lprocess_t *))arg;
2600Sstevel@tonic-gate lprocess_t *member;
2610Sstevel@tonic-gate lprocess_t *next;
2620Sstevel@tonic-gate
2630Sstevel@tonic-gate member = lcol->lcol_lprocess;
2640Sstevel@tonic-gate while (member != NULL) {
2650Sstevel@tonic-gate pid_t pid = member->lpc_pid;
2660Sstevel@tonic-gate next = member->lpc_next;
2670Sstevel@tonic-gate
2680Sstevel@tonic-gate debug_high("list_walk_all lpc %d\n", (int)pid);
2690Sstevel@tonic-gate if (cb(lcol, member) != 0) {
2700Sstevel@tonic-gate debug_high("list_walk_all aborted at lpc %d\n",
2710Sstevel@tonic-gate (int)pid);
2720Sstevel@tonic-gate return (1);
2730Sstevel@tonic-gate }
2740Sstevel@tonic-gate member = next;
2750Sstevel@tonic-gate }
2760Sstevel@tonic-gate
2770Sstevel@tonic-gate return (0);
2780Sstevel@tonic-gate }
2790Sstevel@tonic-gate
2800Sstevel@tonic-gate /*
2810Sstevel@tonic-gate * Invoke the given callback for each process in each collection. Callbacks
2820Sstevel@tonic-gate * are allowed to change the linkage of the process on which they act.
2830Sstevel@tonic-gate */
2840Sstevel@tonic-gate static void
list_walk_all(int (* cb)(lcollection_t *,lprocess_t *))2850Sstevel@tonic-gate list_walk_all(int (*cb)(lcollection_t *, lprocess_t *))
2860Sstevel@tonic-gate {
2870Sstevel@tonic-gate list_walk_collection(list_walk_process_cb, (void *)cb);
2880Sstevel@tonic-gate }
2890Sstevel@tonic-gate
2900Sstevel@tonic-gate static void
revoke_psinfo(rfd_t * rfd)2910Sstevel@tonic-gate revoke_psinfo(rfd_t *rfd)
2920Sstevel@tonic-gate {
2930Sstevel@tonic-gate lprocess_t *lpc = (lprocess_t *)rfd->rfd_data;
2940Sstevel@tonic-gate
2950Sstevel@tonic-gate if (lpc != NULL) {
2960Sstevel@tonic-gate debug("revoking psinfo fd for process %d\n", (int)lpc->lpc_pid);
2970Sstevel@tonic-gate ASSERT(lpc->lpc_psinfo_fd != -1);
2980Sstevel@tonic-gate lpc->lpc_psinfo_fd = -1;
2990Sstevel@tonic-gate } else
3000Sstevel@tonic-gate debug("revoking psinfo fd for unknown process\n");
3010Sstevel@tonic-gate }
3020Sstevel@tonic-gate
3030Sstevel@tonic-gate /*
3040Sstevel@tonic-gate * Retrieve a process's psinfo via an already-opened or new file descriptor.
3050Sstevel@tonic-gate * The supplied descriptor will be closed on failure. An optional callback
3060Sstevel@tonic-gate * will be invoked with the last descriptor tried, and a supplied callback
3070Sstevel@tonic-gate * argument, as its arguments, such that the new descriptor may be cached, or
3080Sstevel@tonic-gate * an old one may be invalidated. If the result of the callback is zero, the
3090Sstevel@tonic-gate * the caller is to assume responsibility for the file descriptor, to close it
3100Sstevel@tonic-gate * with rfd_close().
3110Sstevel@tonic-gate *
3120Sstevel@tonic-gate * On failure, a nonzero value is returned.
3130Sstevel@tonic-gate */
3140Sstevel@tonic-gate int
get_psinfo(pid_t pid,psinfo_t * psinfo,int cached_fd,int (* fd_update_cb)(void *,int),void * arg,lprocess_t * lpc)3150Sstevel@tonic-gate get_psinfo(pid_t pid, psinfo_t *psinfo, int cached_fd,
3160Sstevel@tonic-gate int(*fd_update_cb)(void *, int), void *arg, lprocess_t *lpc)
3170Sstevel@tonic-gate {
3180Sstevel@tonic-gate int fd;
3190Sstevel@tonic-gate int can_try_uncached;
3200Sstevel@tonic-gate
3210Sstevel@tonic-gate ASSERT(!(cached_fd > 0 && fd_update_cb == NULL));
3220Sstevel@tonic-gate
3230Sstevel@tonic-gate do {
3240Sstevel@tonic-gate if (cached_fd >= 0) {
3250Sstevel@tonic-gate fd = cached_fd;
3260Sstevel@tonic-gate can_try_uncached = 1;
3270Sstevel@tonic-gate debug_high("%d/psinfo, trying cached fd %d\n",
3280Sstevel@tonic-gate (int)pid, fd);
3290Sstevel@tonic-gate } else {
3300Sstevel@tonic-gate char pathbuf[PROC_PATH_MAX];
3310Sstevel@tonic-gate
3320Sstevel@tonic-gate can_try_uncached = 0;
3330Sstevel@tonic-gate (void) snprintf(pathbuf, sizeof (pathbuf),
3340Sstevel@tonic-gate "/proc/%d/psinfo", (int)pid);
3350Sstevel@tonic-gate if ((fd = rfd_open(pathbuf, 1, RFD_PSINFO,
3360Sstevel@tonic-gate revoke_psinfo, lpc, O_RDONLY, 0000)) < 0) {
3370Sstevel@tonic-gate debug("cannot open %s", pathbuf);
3380Sstevel@tonic-gate break;
3390Sstevel@tonic-gate } else
3400Sstevel@tonic-gate debug_high("opened %s, fd %d\n", pathbuf, fd);
3410Sstevel@tonic-gate }
3420Sstevel@tonic-gate
3430Sstevel@tonic-gate if (pread(fd, psinfo, sizeof (*psinfo), 0) ==
3440Sstevel@tonic-gate sizeof (*psinfo) && psinfo->pr_pid == pid)
3450Sstevel@tonic-gate break;
3460Sstevel@tonic-gate else {
3470Sstevel@tonic-gate debug_high("closed fd %d\n", fd);
3480Sstevel@tonic-gate if (rfd_close(fd) != 0)
3490Sstevel@tonic-gate debug("could not close fd %d", fd);
3500Sstevel@tonic-gate fd = cached_fd = -1;
3510Sstevel@tonic-gate }
3520Sstevel@tonic-gate } while (can_try_uncached == 1);
3530Sstevel@tonic-gate
3540Sstevel@tonic-gate if (fd_update_cb == NULL || fd_update_cb(arg, fd) != 0)
3550Sstevel@tonic-gate if (fd >= 0) {
3560Sstevel@tonic-gate debug_high("closed %s fd %d\n", fd_update_cb == NULL ?
3570Sstevel@tonic-gate "uncached" : "cached", fd);
3580Sstevel@tonic-gate if (rfd_close(fd) != 0)
3590Sstevel@tonic-gate debug("could not close fd %d", fd);
3600Sstevel@tonic-gate }
3610Sstevel@tonic-gate
3620Sstevel@tonic-gate debug_high("get_psinfo ret %d, fd %d, %s\n", ((fd >= 0) ? 0 : -1), fd,
3630Sstevel@tonic-gate fd_update_cb != NULL ? "cached" : "uncached");
3640Sstevel@tonic-gate return ((fd >= 0) ? 0 : -1);
3650Sstevel@tonic-gate }
3660Sstevel@tonic-gate
3670Sstevel@tonic-gate /*
3683247Sgjelinek * Retrieve the collection membership of all processes and update the psinfo of
3693247Sgjelinek * those non-system, non-zombie ones in collections. For global zone processes,
3703247Sgjelinek * we first attempt to put the process into a capped project collection. We
3713247Sgjelinek * also want to track the process for the global zone collection as a whole.
3720Sstevel@tonic-gate */
3730Sstevel@tonic-gate static void
proc_cb(const pid_t pid)3740Sstevel@tonic-gate proc_cb(const pid_t pid)
3750Sstevel@tonic-gate {
3760Sstevel@tonic-gate psinfo_t psinfo;
3770Sstevel@tonic-gate
3783247Sgjelinek if (get_psinfo(pid, &psinfo, -1, NULL, NULL, NULL) == 0) {
3793247Sgjelinek lprocess_insert_mark(&psinfo);
3803247Sgjelinek if (gz_capped && psinfo.pr_zoneid == GLOBAL_ZONEID) {
3813247Sgjelinek /*
3823247Sgjelinek * We also want to track this process for the global
3833247Sgjelinek * zone as a whole so add it to the global zone
3843247Sgjelinek * collection as well.
3853247Sgjelinek */
3863247Sgjelinek psinfo.pr_projid = -1;
3873247Sgjelinek lprocess_insert_mark(&psinfo);
3883247Sgjelinek }
3893247Sgjelinek }
3900Sstevel@tonic-gate }
3910Sstevel@tonic-gate
3920Sstevel@tonic-gate /*
3930Sstevel@tonic-gate * Cache the process' psinfo fd, taking responsibility for freeing it.
3940Sstevel@tonic-gate */
3950Sstevel@tonic-gate int
lprocess_update_psinfo_fd_cb(void * arg,int fd)3960Sstevel@tonic-gate lprocess_update_psinfo_fd_cb(void *arg, int fd)
3970Sstevel@tonic-gate {
3980Sstevel@tonic-gate lprocess_t *lpc = arg;
3990Sstevel@tonic-gate
4000Sstevel@tonic-gate lpc->lpc_psinfo_fd = fd;
4010Sstevel@tonic-gate return (0);
4020Sstevel@tonic-gate }
4030Sstevel@tonic-gate
4040Sstevel@tonic-gate /*
4053247Sgjelinek * Get the system pagesize.
4060Sstevel@tonic-gate */
4073247Sgjelinek static void
get_page_size(void)4083247Sgjelinek get_page_size(void)
4090Sstevel@tonic-gate {
4103247Sgjelinek page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
4113247Sgjelinek debug("physical page size: %luKB\n", page_size_kb);
4123247Sgjelinek }
4133247Sgjelinek
4143247Sgjelinek static void
tm_fmt(char * msg,hrtime_t t1,hrtime_t t2)4153247Sgjelinek tm_fmt(char *msg, hrtime_t t1, hrtime_t t2)
4163247Sgjelinek {
4173247Sgjelinek hrtime_t diff = t2 - t1;
4180Sstevel@tonic-gate
4193247Sgjelinek if (diff < MILLISEC)
4203247Sgjelinek debug("%s: %lld nanoseconds\n", msg, diff);
4213247Sgjelinek else if (diff < MICROSEC)
4223247Sgjelinek debug("%s: %.2f microseconds\n", msg, (float)diff / MILLISEC);
4233247Sgjelinek else if (diff < NANOSEC)
4243247Sgjelinek debug("%s: %.2f milliseconds\n", msg, (float)diff / MICROSEC);
4253247Sgjelinek else
4263247Sgjelinek debug("%s: %.2f seconds\n", msg, (float)diff / NANOSEC);
4273247Sgjelinek }
4283247Sgjelinek
4293247Sgjelinek /*
4303247Sgjelinek * Get the zone's & project's RSS from the kernel.
4313247Sgjelinek */
4323247Sgjelinek static void
rss_sample(boolean_t my_zone_only,uint_t col_types)4333247Sgjelinek rss_sample(boolean_t my_zone_only, uint_t col_types)
4343247Sgjelinek {
4353247Sgjelinek size_t nres;
4363247Sgjelinek size_t i;
4373247Sgjelinek uint_t flags;
4383247Sgjelinek hrtime_t t1, t2;
4393247Sgjelinek
4403247Sgjelinek if (my_zone_only) {
4413247Sgjelinek flags = VMUSAGE_ZONE;
4420Sstevel@tonic-gate } else {
4433247Sgjelinek flags = 0;
4443247Sgjelinek if (col_types & CAPPED_PROJECT)
4453247Sgjelinek flags |= VMUSAGE_PROJECTS;
4463247Sgjelinek if (col_types & CAPPED_ZONE && my_zoneid == GLOBAL_ZONEID)
4473247Sgjelinek flags |= VMUSAGE_ALL_ZONES;
4480Sstevel@tonic-gate }
4490Sstevel@tonic-gate
4503247Sgjelinek debug("vmusage sample flags 0x%x\n", flags);
4513247Sgjelinek if (flags == 0)
4523247Sgjelinek return;
4533247Sgjelinek
4543247Sgjelinek again:
4553247Sgjelinek /* try the current buffer to see if the list will fit */
4563247Sgjelinek nres = vmu_vals_len;
4573247Sgjelinek t1 = gethrtime();
4583247Sgjelinek if (getvmusage(flags, my_zone_only ? 0 : rcfg.rcfg_rss_sample_interval,
4593247Sgjelinek vmu_vals, &nres) != 0) {
4603247Sgjelinek if (errno != EOVERFLOW) {
4613247Sgjelinek warn(gettext("can't read RSS from kernel\n"));
4623247Sgjelinek return;
4633247Sgjelinek }
4643247Sgjelinek }
4653247Sgjelinek t2 = gethrtime();
4663247Sgjelinek tm_fmt("getvmusage time", t1, t2);
4673247Sgjelinek
4683247Sgjelinek debug("kernel nres %lu\n", (ulong_t)nres);
4693247Sgjelinek
4703247Sgjelinek if (nres > vmu_vals_len) {
4713247Sgjelinek /* array size is now too small, increase it and try again */
4723247Sgjelinek free(vmu_vals);
4733247Sgjelinek
4743247Sgjelinek if ((vmu_vals = (vmusage_t *)calloc(nres,
4753247Sgjelinek sizeof (vmusage_t))) == NULL) {
4763247Sgjelinek warn(gettext("out of memory: could not read RSS from "
4773247Sgjelinek "kernel\n"));
4783247Sgjelinek vmu_vals_len = nvmu_vals = 0;
4793247Sgjelinek return;
4803247Sgjelinek }
4813247Sgjelinek vmu_vals_len = nres;
4823247Sgjelinek goto again;
4833247Sgjelinek }
4843247Sgjelinek
4853247Sgjelinek nvmu_vals = nres;
4863247Sgjelinek
4873247Sgjelinek debug("vmusage_sample\n");
4883247Sgjelinek for (i = 0; i < nvmu_vals; i++) {
4893247Sgjelinek debug("%d: id: %d, type: 0x%x, rss_all: %llu (%lluKB), "
4903247Sgjelinek "swap: %llu\n", (int)i, (int)vmu_vals[i].vmu_id,
4913247Sgjelinek vmu_vals[i].vmu_type,
4923247Sgjelinek (unsigned long long)vmu_vals[i].vmu_rss_all,
4933247Sgjelinek (unsigned long long)vmu_vals[i].vmu_rss_all / 1024,
4943247Sgjelinek (unsigned long long)vmu_vals[i].vmu_swap_all);
4953247Sgjelinek }
4963247Sgjelinek }
4973247Sgjelinek
4983247Sgjelinek static void
update_col_rss(lcollection_t * lcol)4993247Sgjelinek update_col_rss(lcollection_t *lcol)
5003247Sgjelinek {
5013247Sgjelinek int i;
5023247Sgjelinek
5033247Sgjelinek lcol->lcol_rss = 0;
5043247Sgjelinek lcol->lcol_image_size = 0;
5053247Sgjelinek
5063247Sgjelinek for (i = 0; i < nvmu_vals; i++) {
5073247Sgjelinek if (vmu_vals[i].vmu_id != lcol->lcol_id.rcid_val)
5083247Sgjelinek continue;
5093247Sgjelinek
5103247Sgjelinek if (vmu_vals[i].vmu_type == VMUSAGE_ZONE &&
5113247Sgjelinek lcol->lcol_id.rcid_type != RCIDT_ZONE)
5123247Sgjelinek continue;
5133247Sgjelinek
5143247Sgjelinek if (vmu_vals[i].vmu_type == VMUSAGE_PROJECTS &&
5153247Sgjelinek lcol->lcol_id.rcid_type != RCIDT_PROJECT)
5163247Sgjelinek continue;
5173247Sgjelinek
5183247Sgjelinek /* we found the right RSS entry, update the collection vals */
5193247Sgjelinek lcol->lcol_rss = vmu_vals[i].vmu_rss_all / 1024;
5203247Sgjelinek lcol->lcol_image_size = vmu_vals[i].vmu_swap_all / 1024;
5213247Sgjelinek break;
5223247Sgjelinek }
5230Sstevel@tonic-gate }
5240Sstevel@tonic-gate
5250Sstevel@tonic-gate /*
5260Sstevel@tonic-gate * Sample the collection RSS, updating the collection's statistics with the
5273247Sgjelinek * results. Also, sum the rss of all capped projects & return true if
5283247Sgjelinek * the collection is over cap.
5290Sstevel@tonic-gate */
5300Sstevel@tonic-gate static int
rss_sample_col_cb(lcollection_t * lcol,void * arg)5310Sstevel@tonic-gate rss_sample_col_cb(lcollection_t *lcol, void *arg)
5320Sstevel@tonic-gate {
5330Sstevel@tonic-gate int64_t excess;
5340Sstevel@tonic-gate uint64_t rss;
5353247Sgjelinek sample_col_arg_t *col_argp = (sample_col_arg_t *)arg;
5360Sstevel@tonic-gate
5373247Sgjelinek update_col_rss(lcol);
5380Sstevel@tonic-gate
5390Sstevel@tonic-gate lcol->lcol_stat.lcols_rss_sample++;
5400Sstevel@tonic-gate rss = lcol->lcol_rss;
5413247Sgjelinek excess = rss - lcol->lcol_rss_cap;
5423247Sgjelinek if (excess > 0) {
5430Sstevel@tonic-gate lcol->lcol_stat.lcols_rss_act_sum += rss;
5443247Sgjelinek col_argp->sca_any_over_cap = B_TRUE;
5453247Sgjelinek if (lcol->lcol_id.rcid_type == RCIDT_PROJECT)
5463247Sgjelinek col_argp->sca_project_over_cap = B_TRUE;
5473247Sgjelinek }
5480Sstevel@tonic-gate lcol->lcol_stat.lcols_rss_sum += rss;
5490Sstevel@tonic-gate
5500Sstevel@tonic-gate if (lcol->lcol_stat.lcols_min_rss > rss)
5510Sstevel@tonic-gate lcol->lcol_stat.lcols_min_rss = rss;
5520Sstevel@tonic-gate if (lcol->lcol_stat.lcols_max_rss < rss)
5530Sstevel@tonic-gate lcol->lcol_stat.lcols_max_rss = rss;
5540Sstevel@tonic-gate
5550Sstevel@tonic-gate return (0);
5560Sstevel@tonic-gate }
5570Sstevel@tonic-gate
5580Sstevel@tonic-gate /*
5593247Sgjelinek * Determine if we have capped projects, capped zones or both.
5603247Sgjelinek */
5613247Sgjelinek static int
col_type_cb(lcollection_t * lcol,void * arg)5623247Sgjelinek col_type_cb(lcollection_t *lcol, void *arg)
5633247Sgjelinek {
5643247Sgjelinek uint_t *col_type = (uint_t *)arg;
5653247Sgjelinek
5663247Sgjelinek /* skip uncapped collections */
5673247Sgjelinek if (lcol->lcol_rss_cap == 0)
5683247Sgjelinek return (1);
5693247Sgjelinek
5703247Sgjelinek if (lcol->lcol_id.rcid_type == RCIDT_PROJECT)
5713247Sgjelinek *col_type |= CAPPED_PROJECT;
5723247Sgjelinek else
5733247Sgjelinek *col_type |= CAPPED_ZONE;
5743247Sgjelinek
5753247Sgjelinek /* once we know everything is capped, we can stop looking */
5763247Sgjelinek if ((*col_type & CAPPED_ZONE) && (*col_type & CAPPED_PROJECT))
5773247Sgjelinek return (1);
5783247Sgjelinek
5793247Sgjelinek return (0);
5803247Sgjelinek }
5813247Sgjelinek
5823247Sgjelinek /*
5830Sstevel@tonic-gate * Open /proc and walk entries.
5840Sstevel@tonic-gate */
5850Sstevel@tonic-gate static void
proc_walk_all(void (* cb)(const pid_t))5860Sstevel@tonic-gate proc_walk_all(void (*cb)(const pid_t))
5870Sstevel@tonic-gate {
5880Sstevel@tonic-gate DIR *pdir;
5890Sstevel@tonic-gate struct dirent *dirent;
5900Sstevel@tonic-gate pid_t pid;
5910Sstevel@tonic-gate
5920Sstevel@tonic-gate (void) rfd_reserve(1);
5930Sstevel@tonic-gate if ((pdir = opendir("/proc")) == NULL)
5940Sstevel@tonic-gate die(gettext("couldn't open /proc!"));
5950Sstevel@tonic-gate
5960Sstevel@tonic-gate while ((dirent = readdir(pdir)) != NULL) {
5970Sstevel@tonic-gate if (strcmp(".", dirent->d_name) == 0 ||
5980Sstevel@tonic-gate strcmp("..", dirent->d_name) == 0)
5990Sstevel@tonic-gate continue;
6000Sstevel@tonic-gate pid = atoi(dirent->d_name);
6010Sstevel@tonic-gate ASSERT(pid != 0 || strcmp(dirent->d_name, "0") == 0);
6020Sstevel@tonic-gate if (pid == rcapd_pid)
6030Sstevel@tonic-gate continue;
6040Sstevel@tonic-gate else
6050Sstevel@tonic-gate cb(pid);
6060Sstevel@tonic-gate }
6070Sstevel@tonic-gate (void) closedir(pdir);
6080Sstevel@tonic-gate }
6090Sstevel@tonic-gate
6100Sstevel@tonic-gate /*
6110Sstevel@tonic-gate * Clear unmarked callback.
6120Sstevel@tonic-gate */
6130Sstevel@tonic-gate /*ARGSUSED*/
6140Sstevel@tonic-gate static int
sweep_process_cb(lcollection_t * lcol,lprocess_t * lpc)6150Sstevel@tonic-gate sweep_process_cb(lcollection_t *lcol, lprocess_t *lpc)
6160Sstevel@tonic-gate {
6170Sstevel@tonic-gate if (lpc->lpc_mark) {
6180Sstevel@tonic-gate lpc->lpc_mark = 0;
6190Sstevel@tonic-gate } else {
6200Sstevel@tonic-gate debug("process %d finished\n", (int)lpc->lpc_pid);
6210Sstevel@tonic-gate lprocess_free(lpc);
6220Sstevel@tonic-gate }
6230Sstevel@tonic-gate
6240Sstevel@tonic-gate return (0);
6250Sstevel@tonic-gate }
6260Sstevel@tonic-gate
6270Sstevel@tonic-gate /*
6280Sstevel@tonic-gate * Print, for debugging purposes, a collection's recently-sampled RSS and
6290Sstevel@tonic-gate * excess.
6300Sstevel@tonic-gate */
6310Sstevel@tonic-gate /*ARGSUSED*/
6320Sstevel@tonic-gate static int
excess_print_cb(lcollection_t * lcol,void * arg)6330Sstevel@tonic-gate excess_print_cb(lcollection_t *lcol, void *arg)
6340Sstevel@tonic-gate {
6350Sstevel@tonic-gate int64_t excess = lcol->lcol_rss - lcol->lcol_rss_cap;
6360Sstevel@tonic-gate
6370Sstevel@tonic-gate debug("%s %s rss/cap: %llu/%llu, excess = %lld kB\n",
6383247Sgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
6393247Sgjelinek lcol->lcol_name,
6400Sstevel@tonic-gate (unsigned long long)lcol->lcol_rss,
6410Sstevel@tonic-gate (unsigned long long)lcol->lcol_rss_cap,
6420Sstevel@tonic-gate (long long)excess);
6430Sstevel@tonic-gate
6440Sstevel@tonic-gate return (0);
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate
6470Sstevel@tonic-gate /*
6480Sstevel@tonic-gate * Scan those collections which have exceeded their caps.
6493247Sgjelinek *
6503247Sgjelinek * If we're running in the global zone it might have a cap. We don't want to
6513247Sgjelinek * do any capping for the global zone yet since we might get under the cap by
6523247Sgjelinek * just capping the projects in the global zone.
6530Sstevel@tonic-gate */
6540Sstevel@tonic-gate /*ARGSUSED*/
6550Sstevel@tonic-gate static int
scan_cb(lcollection_t * lcol,void * arg)6560Sstevel@tonic-gate scan_cb(lcollection_t *lcol, void *arg)
6570Sstevel@tonic-gate {
6580Sstevel@tonic-gate int64_t excess;
6590Sstevel@tonic-gate
6603247Sgjelinek /* skip over global zone collection for now but keep track for later */
6613247Sgjelinek if (lcol->lcol_id.rcid_type == RCIDT_ZONE &&
6623247Sgjelinek lcol->lcol_id.rcid_val == GLOBAL_ZONEID) {
6633247Sgjelinek gz_col = lcol;
6643247Sgjelinek return (0);
6653247Sgjelinek }
6663247Sgjelinek
6670Sstevel@tonic-gate if ((excess = lcol->lcol_rss - lcol->lcol_rss_cap) > 0) {
6680Sstevel@tonic-gate scan(lcol, excess);
6690Sstevel@tonic-gate lcol->lcol_stat.lcols_scan++;
6700Sstevel@tonic-gate }
6710Sstevel@tonic-gate
6720Sstevel@tonic-gate return (0);
6730Sstevel@tonic-gate }
6740Sstevel@tonic-gate
6750Sstevel@tonic-gate /*
6763247Sgjelinek * Scan the global zone collection and see if it still exceeds its cap.
6773247Sgjelinek * We take into account the effects of capping any global zone projects here.
6783247Sgjelinek */
6793247Sgjelinek static void
scan_gz(lcollection_t * lcol,boolean_t project_over_cap)6803247Sgjelinek scan_gz(lcollection_t *lcol, boolean_t project_over_cap)
6813247Sgjelinek {
6823247Sgjelinek int64_t excess;
6833247Sgjelinek
6843247Sgjelinek /*
6853247Sgjelinek * If we had projects over their cap and the global zone was also over
6863247Sgjelinek * its cap then we need to get the up-to-date global zone rss to
6873247Sgjelinek * determine if we are still over the global zone cap. We might have
6883247Sgjelinek * gone under while we scanned the capped projects. If there were no
6893247Sgjelinek * projects over cap then we can use the rss value we already have for
6903247Sgjelinek * the global zone.
6913247Sgjelinek */
6923247Sgjelinek excess = lcol->lcol_rss - lcol->lcol_rss_cap;
6933247Sgjelinek if (project_over_cap && excess > 0) {
6943247Sgjelinek rss_sample(B_TRUE, CAPPED_ZONE);
6953247Sgjelinek update_col_rss(lcol);
6963247Sgjelinek excess = lcol->lcol_rss - lcol->lcol_rss_cap;
6973247Sgjelinek }
6983247Sgjelinek
6993247Sgjelinek if (excess > 0) {
7003247Sgjelinek debug("global zone excess %lldKB\n", (long long)excess);
7013247Sgjelinek scan(lcol, excess);
7023247Sgjelinek lcol->lcol_stat.lcols_scan++;
7033247Sgjelinek }
7043247Sgjelinek }
7053247Sgjelinek
7063247Sgjelinek /*
7070Sstevel@tonic-gate * Do a soft scan of those collections which have excesses. A soft scan is one
7080Sstevel@tonic-gate * in which the cap enforcement pressure is taken into account. The difference
7090Sstevel@tonic-gate * between the utilized physical memory and the cap enforcement pressure will
7100Sstevel@tonic-gate * be scanned-for, and each collection will be scanned proportionally by their
7110Sstevel@tonic-gate * present excesses.
7120Sstevel@tonic-gate */
7130Sstevel@tonic-gate static int
soft_scan_cb(lcollection_t * lcol,void * a)7140Sstevel@tonic-gate soft_scan_cb(lcollection_t *lcol, void *a)
7150Sstevel@tonic-gate {
7160Sstevel@tonic-gate int64_t excess;
7170Sstevel@tonic-gate soft_scan_arg_t *arg = a;
7180Sstevel@tonic-gate
7193247Sgjelinek /* skip over global zone collection for now but keep track for later */
7203247Sgjelinek if (lcol->lcol_id.rcid_type == RCIDT_ZONE &&
7213247Sgjelinek lcol->lcol_id.rcid_val == GLOBAL_ZONEID) {
7223247Sgjelinek gz_col = lcol;
7233247Sgjelinek return (0);
7243247Sgjelinek }
7253247Sgjelinek
7260Sstevel@tonic-gate if ((excess = lcol->lcol_rss - lcol->lcol_rss_cap) > 0) {
7273247Sgjelinek int64_t adjusted_excess =
7283247Sgjelinek excess * arg->ssa_scan_goal / arg->ssa_sum_excess;
7293247Sgjelinek
7303247Sgjelinek debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, "
7313247Sgjelinek "scanning %lld\n",
7323247Sgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
7333247Sgjelinek "project" : "zone"),
7343247Sgjelinek (long)lcol->lcol_id.rcid_val,
7350Sstevel@tonic-gate (long long)excess, (long long)arg->ssa_scan_goal,
7360Sstevel@tonic-gate (unsigned long long)arg->ssa_sum_excess,
7373247Sgjelinek (long long)adjusted_excess);
7380Sstevel@tonic-gate
7393247Sgjelinek scan(lcol, adjusted_excess);
7400Sstevel@tonic-gate lcol->lcol_stat.lcols_scan++;
7410Sstevel@tonic-gate }
7420Sstevel@tonic-gate
7430Sstevel@tonic-gate return (0);
7440Sstevel@tonic-gate }
7450Sstevel@tonic-gate
7463247Sgjelinek static void
soft_scan_gz(lcollection_t * lcol,void * a)7473247Sgjelinek soft_scan_gz(lcollection_t *lcol, void *a)
7483247Sgjelinek {
7493247Sgjelinek int64_t excess;
7503247Sgjelinek soft_scan_arg_t *arg = a;
7513247Sgjelinek
7523247Sgjelinek /*
7533247Sgjelinek * If we had projects over their cap and the global zone was also over
7543247Sgjelinek * its cap then we need to get the up-to-date global zone rss to
7553247Sgjelinek * determine if we are still over the global zone cap. We might have
7563247Sgjelinek * gone under while we scanned the capped projects. If there were no
7573247Sgjelinek * projects over cap then we can use the rss value we already have for
7583247Sgjelinek * the global zone.
7593247Sgjelinek */
7603247Sgjelinek excess = lcol->lcol_rss - lcol->lcol_rss_cap;
7613247Sgjelinek if (arg->ssa_project_over_cap && excess > 0) {
7623247Sgjelinek rss_sample(B_TRUE, CAPPED_ZONE);
7633247Sgjelinek update_col_rss(lcol);
7643247Sgjelinek excess = lcol->lcol_rss - lcol->lcol_rss_cap;
7653247Sgjelinek }
7663247Sgjelinek
7673247Sgjelinek if (excess > 0) {
7683247Sgjelinek int64_t adjusted_excess =
7693247Sgjelinek excess * arg->ssa_scan_goal / arg->ssa_sum_excess;
7703247Sgjelinek
7713247Sgjelinek debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, "
7723247Sgjelinek "scanning %lld\n",
7733247Sgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
7743247Sgjelinek "project" : "zone"),
7753247Sgjelinek (long)lcol->lcol_id.rcid_val,
7763247Sgjelinek (long long)excess, (long long)arg->ssa_scan_goal,
7773247Sgjelinek (unsigned long long)arg->ssa_sum_excess,
7783247Sgjelinek (long long)adjusted_excess);
7793247Sgjelinek
7803247Sgjelinek scan(lcol, adjusted_excess);
7813247Sgjelinek lcol->lcol_stat.lcols_scan++;
7823247Sgjelinek }
7833247Sgjelinek }
7843247Sgjelinek
7850Sstevel@tonic-gate /*
7860Sstevel@tonic-gate * When a scan could happen, but caps aren't enforced tick the
7870Sstevel@tonic-gate * lcols_unenforced_cap counter.
7880Sstevel@tonic-gate */
7890Sstevel@tonic-gate /*ARGSUSED*/
7900Sstevel@tonic-gate static int
unenforced_cap_cb(lcollection_t * lcol,void * arg)7910Sstevel@tonic-gate unenforced_cap_cb(lcollection_t *lcol, void *arg)
7920Sstevel@tonic-gate {
7930Sstevel@tonic-gate lcol->lcol_stat.lcols_unenforced_cap++;
7940Sstevel@tonic-gate
7950Sstevel@tonic-gate return (0);
7960Sstevel@tonic-gate }
7970Sstevel@tonic-gate
7980Sstevel@tonic-gate /*
7990Sstevel@tonic-gate * Update the count of physically installed memory.
8000Sstevel@tonic-gate */
8010Sstevel@tonic-gate static void
update_phys_total(void)8020Sstevel@tonic-gate update_phys_total(void)
8030Sstevel@tonic-gate {
8040Sstevel@tonic-gate uint64_t old_phys_total;
8050Sstevel@tonic-gate
8060Sstevel@tonic-gate old_phys_total = phys_total;
8073247Sgjelinek phys_total = (uint64_t)sysconf(_SC_PHYS_PAGES) * page_size_kb;
8080Sstevel@tonic-gate if (phys_total != old_phys_total)
8090Sstevel@tonic-gate debug("physical memory%s: %lluM\n", (old_phys_total == 0 ?
8100Sstevel@tonic-gate "" : " adjusted"), (unsigned long long)(phys_total / 1024));
8110Sstevel@tonic-gate }
8120Sstevel@tonic-gate
8130Sstevel@tonic-gate /*
8140Sstevel@tonic-gate * Unlink a process from its collection, updating relevant statistics, and
8150Sstevel@tonic-gate * freeing its associated memory.
8160Sstevel@tonic-gate */
8170Sstevel@tonic-gate void
lprocess_free(lprocess_t * lpc)8180Sstevel@tonic-gate lprocess_free(lprocess_t *lpc)
8190Sstevel@tonic-gate {
8200Sstevel@tonic-gate pid_t pid;
8210Sstevel@tonic-gate
8220Sstevel@tonic-gate lpc->lpc_collection->lcol_stat.lcols_proc_out++;
8230Sstevel@tonic-gate
8240Sstevel@tonic-gate if (lpc->lpc_prev != NULL)
8250Sstevel@tonic-gate lpc->lpc_prev->lpc_next = lpc->lpc_next;
8260Sstevel@tonic-gate if (lpc->lpc_next != NULL)
8270Sstevel@tonic-gate lpc->lpc_next->lpc_prev = lpc->lpc_prev;
8280Sstevel@tonic-gate if (lpc->lpc_collection->lcol_lprocess == lpc)
8290Sstevel@tonic-gate lpc->lpc_collection->lcol_lprocess = (lpc->lpc_next !=
8300Sstevel@tonic-gate lpc ? lpc->lpc_next : NULL);
8310Sstevel@tonic-gate lpc->lpc_next = lpc->lpc_prev = NULL;
8320Sstevel@tonic-gate
8330Sstevel@tonic-gate if (lpc->lpc_prpageheader != NULL)
8340Sstevel@tonic-gate free(lpc->lpc_prpageheader);
8350Sstevel@tonic-gate if (lpc->lpc_xmap != NULL)
8360Sstevel@tonic-gate free(lpc->lpc_xmap);
8370Sstevel@tonic-gate if (lpc->lpc_psinfo_fd >= 0) {
8380Sstevel@tonic-gate if (rfd_close(lpc->lpc_psinfo_fd) != 0)
8390Sstevel@tonic-gate debug("could not close %d lpc_psinfo_fd %d",
8400Sstevel@tonic-gate (int)lpc->lpc_pid, lpc->lpc_psinfo_fd);
8410Sstevel@tonic-gate lpc->lpc_psinfo_fd = -1;
8420Sstevel@tonic-gate }
8430Sstevel@tonic-gate if (lpc->lpc_pgdata_fd >= 0) {
8440Sstevel@tonic-gate if (rfd_close(lpc->lpc_pgdata_fd) != 0)
8450Sstevel@tonic-gate debug("could not close %d lpc_pgdata_fd %d",
8460Sstevel@tonic-gate (int)lpc->lpc_pid, lpc->lpc_pgdata_fd);
8470Sstevel@tonic-gate lpc->lpc_pgdata_fd = -1;
8480Sstevel@tonic-gate }
8490Sstevel@tonic-gate if (lpc->lpc_xmap_fd >= 0) {
8500Sstevel@tonic-gate if (rfd_close(lpc->lpc_xmap_fd) != 0)
8510Sstevel@tonic-gate debug("could not close %d lpc_xmap_fd %d",
8520Sstevel@tonic-gate (int)lpc->lpc_pid, lpc->lpc_xmap_fd);
8530Sstevel@tonic-gate lpc->lpc_xmap_fd = -1;
8540Sstevel@tonic-gate }
8550Sstevel@tonic-gate if (lpc->lpc_ignore != NULL)
8560Sstevel@tonic-gate lmapping_free(&lpc->lpc_ignore);
8570Sstevel@tonic-gate pid = lpc->lpc_pid;
8580Sstevel@tonic-gate free(lpc);
8590Sstevel@tonic-gate debug_high("process %d freed\n", (int)pid);
8600Sstevel@tonic-gate }
8610Sstevel@tonic-gate
8620Sstevel@tonic-gate /*
8630Sstevel@tonic-gate * Collection clear callback.
8640Sstevel@tonic-gate */
8650Sstevel@tonic-gate /*ARGSUSED*/
8660Sstevel@tonic-gate static int
collection_clear_cb(lcollection_t * lcol,void * arg)8670Sstevel@tonic-gate collection_clear_cb(lcollection_t *lcol, void *arg)
8680Sstevel@tonic-gate {
8690Sstevel@tonic-gate lcol->lcol_mark = 0;
8700Sstevel@tonic-gate
8710Sstevel@tonic-gate return (0);
8720Sstevel@tonic-gate }
8730Sstevel@tonic-gate
8740Sstevel@tonic-gate /*
8750Sstevel@tonic-gate * Respond to a terminating signal by setting a termination flag.
8760Sstevel@tonic-gate */
8770Sstevel@tonic-gate /*ARGSUSED*/
8780Sstevel@tonic-gate static void
terminate_signal(int signal)8790Sstevel@tonic-gate terminate_signal(int signal)
8800Sstevel@tonic-gate {
8810Sstevel@tonic-gate if (termination_signal == 0)
8820Sstevel@tonic-gate termination_signal = signal;
8830Sstevel@tonic-gate should_run = 0;
8840Sstevel@tonic-gate }
8850Sstevel@tonic-gate
8860Sstevel@tonic-gate /*
8870Sstevel@tonic-gate * Handle any synchronous or asynchronous signals that would ordinarily cause a
8880Sstevel@tonic-gate * process to abort.
8890Sstevel@tonic-gate */
8900Sstevel@tonic-gate /*ARGSUSED*/
8910Sstevel@tonic-gate static void
abort_signal(int signal)8920Sstevel@tonic-gate abort_signal(int signal)
8930Sstevel@tonic-gate {
8940Sstevel@tonic-gate /*
8950Sstevel@tonic-gate * Allow the scanner to make a last-ditch effort to resume any stopped
8960Sstevel@tonic-gate * processes.
8970Sstevel@tonic-gate */
8980Sstevel@tonic-gate scan_abort();
8990Sstevel@tonic-gate abort();
9000Sstevel@tonic-gate }
9010Sstevel@tonic-gate
9020Sstevel@tonic-gate /*
9030Sstevel@tonic-gate * Clean up collections which have been removed due to configuration. Unlink
9040Sstevel@tonic-gate * the collection from lcollection and free it.
9050Sstevel@tonic-gate */
9060Sstevel@tonic-gate /*ARGSUSED*/
9070Sstevel@tonic-gate static int
collection_sweep_cb(lcollection_t * lcol,void * arg)9080Sstevel@tonic-gate collection_sweep_cb(lcollection_t *lcol, void *arg)
9090Sstevel@tonic-gate {
9100Sstevel@tonic-gate if (lcol->lcol_mark == 0) {
9113247Sgjelinek debug("freeing %s %s\n",
9123247Sgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
9133247Sgjelinek "project" : "zone"), lcol->lcol_name);
9140Sstevel@tonic-gate lcollection_free(lcol);
9150Sstevel@tonic-gate }
9160Sstevel@tonic-gate
9170Sstevel@tonic-gate return (0);
9180Sstevel@tonic-gate }
9190Sstevel@tonic-gate
9200Sstevel@tonic-gate /*
9210Sstevel@tonic-gate * Set those variables which depend on the global configuration.
9220Sstevel@tonic-gate */
9230Sstevel@tonic-gate static void
finish_configuration(void)9240Sstevel@tonic-gate finish_configuration(void)
9250Sstevel@tonic-gate {
9260Sstevel@tonic-gate /*
9270Sstevel@tonic-gate * Warn that any lnode (or non-project) mode specification (by an SRM
9280Sstevel@tonic-gate * 1.3 configuration file, for example) is ignored.
9290Sstevel@tonic-gate */
9300Sstevel@tonic-gate if (strcmp(rcfg.rcfg_mode_name, "project") != 0) {
9310Sstevel@tonic-gate warn(gettext("%s mode specification ignored -- using project"
9320Sstevel@tonic-gate " mode\n"), rcfg.rcfg_mode_name);
9330Sstevel@tonic-gate rcfg.rcfg_mode_name = "project";
9340Sstevel@tonic-gate rcfg.rcfg_mode = rctype_project;
9350Sstevel@tonic-gate }
9360Sstevel@tonic-gate }
9370Sstevel@tonic-gate
9380Sstevel@tonic-gate /*
9394119Stn143363 * Cause the configuration to be reread and applied.
9400Sstevel@tonic-gate */
9410Sstevel@tonic-gate static void
reread_configuration(void)9424119Stn143363 reread_configuration(void)
9430Sstevel@tonic-gate {
9440Sstevel@tonic-gate rcfg_t rcfg_new;
9450Sstevel@tonic-gate
9464119Stn143363 if (rcfg_read(&rcfg_new, update_statistics) != E_SUCCESS) {
9474119Stn143363 warn(gettext("can't reread configuration \n"));
9484119Stn143363 exit(SMF_EXIT_ERR_CONFIG);
9494119Stn143363 } else {
9500Sstevel@tonic-gate /*
9514119Stn143363 * Done reading configuration. Remove existing
9520Sstevel@tonic-gate * collections in case there is a change in collection type.
9530Sstevel@tonic-gate */
9540Sstevel@tonic-gate if (rcfg.rcfg_mode != rcfg_new.rcfg_mode) {
9550Sstevel@tonic-gate list_walk_collection(collection_clear_cb, NULL);
9560Sstevel@tonic-gate list_walk_collection(collection_sweep_cb, NULL);
9570Sstevel@tonic-gate }
9580Sstevel@tonic-gate
9590Sstevel@tonic-gate /*
9600Sstevel@tonic-gate * Make the newly-read configuration the global one, and update
9610Sstevel@tonic-gate * any variables that depend on it.
9620Sstevel@tonic-gate */
9630Sstevel@tonic-gate rcfg = rcfg_new;
9640Sstevel@tonic-gate finish_configuration();
9650Sstevel@tonic-gate }
9660Sstevel@tonic-gate }
9670Sstevel@tonic-gate
9680Sstevel@tonic-gate /*
9694119Stn143363 * First, examine changes, additions, and deletions to cap definitions.
9704119Stn143363 * Then, set the next event time.
9710Sstevel@tonic-gate */
9720Sstevel@tonic-gate static void
reconfigure(hrtime_t now,hrtime_t * next_configuration,hrtime_t * next_proc_walk,hrtime_t * next_rss_sample)9733247Sgjelinek reconfigure(hrtime_t now, hrtime_t *next_configuration,
9743247Sgjelinek hrtime_t *next_proc_walk, hrtime_t *next_rss_sample)
9750Sstevel@tonic-gate {
9760Sstevel@tonic-gate debug("reconfigure...\n");
9770Sstevel@tonic-gate
9780Sstevel@tonic-gate /*
9790Sstevel@tonic-gate * Walk the lcollection, marking active collections so inactive ones
9800Sstevel@tonic-gate * can be freed.
9810Sstevel@tonic-gate */
9820Sstevel@tonic-gate list_walk_collection(collection_clear_cb, NULL);
9830Sstevel@tonic-gate lcollection_update(LCU_ACTIVE_ONLY); /* mark */
9840Sstevel@tonic-gate list_walk_collection(collection_sweep_cb, NULL);
9853247Sgjelinek
9863247Sgjelinek *next_configuration = NEXT_EVENT_TIME(now,
9873247Sgjelinek rcfg.rcfg_reconfiguration_interval);
9883247Sgjelinek
9893247Sgjelinek /*
9903247Sgjelinek * Reset each event time to the shorter of the previous and new
9913247Sgjelinek * intervals.
9923247Sgjelinek */
9933247Sgjelinek if (next_report == 0 && rcfg.rcfg_report_interval > 0)
9943247Sgjelinek next_report = now;
9953247Sgjelinek else
9963247Sgjelinek next_report = POSITIVE_MIN(next_report,
9973247Sgjelinek NEXT_REPORT_EVENT_TIME(now, rcfg.rcfg_report_interval));
9983247Sgjelinek
9993247Sgjelinek if (*next_proc_walk == 0 && rcfg.rcfg_proc_walk_interval > 0)
10003247Sgjelinek *next_proc_walk = now;
10013247Sgjelinek else
10023247Sgjelinek *next_proc_walk = POSITIVE_MIN(*next_proc_walk,
10033247Sgjelinek NEXT_EVENT_TIME(now, rcfg.rcfg_proc_walk_interval));
10043247Sgjelinek
10053247Sgjelinek if (*next_rss_sample == 0 && rcfg.rcfg_rss_sample_interval > 0)
10063247Sgjelinek *next_rss_sample = now;
10073247Sgjelinek else
10083247Sgjelinek *next_rss_sample = POSITIVE_MIN(*next_rss_sample,
10093247Sgjelinek NEXT_EVENT_TIME(now, rcfg.rcfg_rss_sample_interval));
10100Sstevel@tonic-gate }
10110Sstevel@tonic-gate
10120Sstevel@tonic-gate /*
10134119Stn143363 * Respond to SIGHUP by triggering the rereading the configuration and cap
10140Sstevel@tonic-gate * definitions.
10150Sstevel@tonic-gate */
10160Sstevel@tonic-gate /*ARGSUSED*/
10170Sstevel@tonic-gate static void
sighup(int signal)10180Sstevel@tonic-gate sighup(int signal)
10190Sstevel@tonic-gate {
10200Sstevel@tonic-gate should_reconfigure = 1;
10210Sstevel@tonic-gate }
10220Sstevel@tonic-gate
10230Sstevel@tonic-gate /*
10240Sstevel@tonic-gate * Print, for debugging purposes, each collection's interval statistics.
10250Sstevel@tonic-gate */
10260Sstevel@tonic-gate /*ARGSUSED*/
10270Sstevel@tonic-gate static int
simple_report_collection_cb(lcollection_t * lcol,void * arg)10280Sstevel@tonic-gate simple_report_collection_cb(lcollection_t *lcol, void *arg)
10290Sstevel@tonic-gate {
10300Sstevel@tonic-gate #define DELTA(field) \
10313247Sgjelinek (unsigned long long)( \
10320Sstevel@tonic-gate (lcol->lcol_stat.field - lcol->lcol_stat_old.field))
10330Sstevel@tonic-gate
10340Sstevel@tonic-gate debug("%s %s status: succeeded/attempted (k): %llu/%llu, "
10350Sstevel@tonic-gate "ineffective/scans/unenforced/samplings: %llu/%llu/%llu/%llu, RSS "
10360Sstevel@tonic-gate "min/max (k): %llu/%llu, cap %llu kB, processes/thpt: %llu/%llu, "
10373247Sgjelinek "%llu scans over %llu ms\n",
10383247Sgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
10393247Sgjelinek lcol->lcol_name,
10400Sstevel@tonic-gate DELTA(lcols_pg_eff), DELTA(lcols_pg_att),
10410Sstevel@tonic-gate DELTA(lcols_scan_ineffective), DELTA(lcols_scan),
10420Sstevel@tonic-gate DELTA(lcols_unenforced_cap), DELTA(lcols_rss_sample),
10433247Sgjelinek (unsigned long long)lcol->lcol_stat.lcols_min_rss,
10443247Sgjelinek (unsigned long long)lcol->lcol_stat.lcols_max_rss,
10450Sstevel@tonic-gate (unsigned long long)lcol->lcol_rss_cap,
10460Sstevel@tonic-gate (unsigned long long)(lcol->lcol_stat.lcols_proc_in -
10470Sstevel@tonic-gate lcol->lcol_stat.lcols_proc_out), DELTA(lcols_proc_out),
10480Sstevel@tonic-gate DELTA(lcols_scan_count), DELTA(lcols_scan_time_complete) / (NANOSEC
10490Sstevel@tonic-gate / MILLISEC));
10500Sstevel@tonic-gate
10510Sstevel@tonic-gate #undef DELTA
10520Sstevel@tonic-gate
10530Sstevel@tonic-gate return (0);
10540Sstevel@tonic-gate }
10550Sstevel@tonic-gate
10560Sstevel@tonic-gate /*
10570Sstevel@tonic-gate * Record each collection's interval statistics in the statistics file.
10580Sstevel@tonic-gate */
10590Sstevel@tonic-gate static int
report_collection_cb(lcollection_t * lcol,void * arg)10600Sstevel@tonic-gate report_collection_cb(lcollection_t *lcol, void *arg)
10610Sstevel@tonic-gate {
10620Sstevel@tonic-gate lcollection_report_t dc;
10630Sstevel@tonic-gate int fd = (intptr_t)arg;
10640Sstevel@tonic-gate
10650Sstevel@tonic-gate /*
10660Sstevel@tonic-gate * Copy the relevant fields to the collection's record.
10670Sstevel@tonic-gate */
10680Sstevel@tonic-gate bzero(&dc, sizeof (dc));
10690Sstevel@tonic-gate dc.lcol_id = lcol->lcol_id;
10700Sstevel@tonic-gate (void) strcpy(dc.lcol_name, lcol->lcol_name);
10710Sstevel@tonic-gate dc.lcol_rss = lcol->lcol_rss;
10720Sstevel@tonic-gate dc.lcol_image_size = lcol->lcol_image_size;
10730Sstevel@tonic-gate dc.lcol_rss_cap = lcol->lcol_rss_cap;
10740Sstevel@tonic-gate dc.lcol_stat = lcol->lcol_stat;
10750Sstevel@tonic-gate
10760Sstevel@tonic-gate if (write(fd, &dc, sizeof (dc)) == sizeof (dc)) {
10773247Sgjelinek lcol->lcol_stat_old = lcol->lcol_stat;
10780Sstevel@tonic-gate } else {
10793247Sgjelinek debug("can't write %s %s statistics",
10803247Sgjelinek (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
10813247Sgjelinek "project" : "zone"),
10820Sstevel@tonic-gate lcol->lcol_name);
10830Sstevel@tonic-gate }
10840Sstevel@tonic-gate
10850Sstevel@tonic-gate return (0);
10860Sstevel@tonic-gate }
10870Sstevel@tonic-gate
10880Sstevel@tonic-gate /*
10890Sstevel@tonic-gate * Determine the count of pages scanned by the global page scanner, obtained
10900Sstevel@tonic-gate * from the cpu_stat:*::scan kstats. Return zero on success.
10910Sstevel@tonic-gate */
10920Sstevel@tonic-gate static int
get_globally_scanned_pages(uint64_t * scannedp)10930Sstevel@tonic-gate get_globally_scanned_pages(uint64_t *scannedp)
10940Sstevel@tonic-gate {
10950Sstevel@tonic-gate kstat_t *ksp;
10960Sstevel@tonic-gate uint64_t scanned = 0;
10970Sstevel@tonic-gate
10980Sstevel@tonic-gate if (kstat_chain_update(kctl) == -1) {
10990Sstevel@tonic-gate warn(gettext("can't update kstat chain"));
11000Sstevel@tonic-gate return (0);
11010Sstevel@tonic-gate }
11020Sstevel@tonic-gate
11030Sstevel@tonic-gate for (ksp = kctl->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
11040Sstevel@tonic-gate if (strcmp(ksp->ks_module, "cpu_stat") == 0) {
11050Sstevel@tonic-gate if (kstat_read(kctl, ksp, NULL) != -1) {
11060Sstevel@tonic-gate scanned += ((cpu_stat_t *)
11070Sstevel@tonic-gate ksp->ks_data)->cpu_vminfo.scan;
11083247Sgjelinek } else {
11090Sstevel@tonic-gate return (-1);
11103247Sgjelinek }
11110Sstevel@tonic-gate }
11120Sstevel@tonic-gate }
11130Sstevel@tonic-gate
11140Sstevel@tonic-gate *scannedp = scanned;
11150Sstevel@tonic-gate return (0);
11160Sstevel@tonic-gate }
11170Sstevel@tonic-gate
11180Sstevel@tonic-gate /*
11193247Sgjelinek * Determine if the global page scanner is running, during which no memory
11203247Sgjelinek * caps should be enforced, to prevent interference with the global page
11213247Sgjelinek * scanner.
11223247Sgjelinek */
11233247Sgjelinek static boolean_t
is_global_scanner_running()11243247Sgjelinek is_global_scanner_running()
11253247Sgjelinek {
11263247Sgjelinek /* measure delta in page scan count */
11273247Sgjelinek static uint64_t new_sp = 0;
11283247Sgjelinek static uint64_t old_sp = 0;
11293247Sgjelinek boolean_t res = B_FALSE;
11303247Sgjelinek
11313247Sgjelinek if (get_globally_scanned_pages(&new_sp) == 0) {
11323247Sgjelinek if (old_sp != 0 && (new_sp - old_sp) > 0) {
11333247Sgjelinek debug("global memory pressure detected (%llu "
11343247Sgjelinek "pages scanned since last interval)\n",
11353247Sgjelinek (unsigned long long)(new_sp - old_sp));
11363247Sgjelinek res = B_TRUE;
11373247Sgjelinek }
11383247Sgjelinek old_sp = new_sp;
11393247Sgjelinek } else {
11403247Sgjelinek warn(gettext("unable to read cpu statistics"));
11413247Sgjelinek new_sp = old_sp;
11423247Sgjelinek }
11433247Sgjelinek
11443247Sgjelinek return (res);
11453247Sgjelinek }
11463247Sgjelinek
11473247Sgjelinek /*
11483247Sgjelinek * If soft caps are in use, determine if global memory pressure exceeds the
11493247Sgjelinek * configured maximum above which soft caps are enforced.
11503247Sgjelinek */
11513247Sgjelinek static boolean_t
must_enforce_soft_caps()11523247Sgjelinek must_enforce_soft_caps()
11533247Sgjelinek {
11543247Sgjelinek /*
11553247Sgjelinek * Check for changes to the amount of installed physical memory, to
11563247Sgjelinek * compute the current memory pressure.
11573247Sgjelinek */
11583247Sgjelinek update_phys_total();
11593247Sgjelinek
11603247Sgjelinek memory_pressure = 100 - (int)((sysconf(_SC_AVPHYS_PAGES) * page_size_kb)
11613247Sgjelinek * 100.0 / phys_total);
11623247Sgjelinek memory_pressure_sample++;
11633247Sgjelinek if (rcfg.rcfg_memory_cap_enforcement_pressure > 0 &&
11643247Sgjelinek memory_pressure > rcfg.rcfg_memory_cap_enforcement_pressure) {
11653247Sgjelinek return (B_TRUE);
11663247Sgjelinek }
11673247Sgjelinek
11683247Sgjelinek return (B_FALSE);
11693247Sgjelinek }
11703247Sgjelinek
11713247Sgjelinek /*
11720Sstevel@tonic-gate * Update the shared statistics file with each collection's current statistics.
11730Sstevel@tonic-gate * Return zero on success.
11740Sstevel@tonic-gate */
11750Sstevel@tonic-gate static int
update_statistics(void)11760Sstevel@tonic-gate update_statistics(void)
11770Sstevel@tonic-gate {
11780Sstevel@tonic-gate int fd, res;
11790Sstevel@tonic-gate static char template[LINELEN];
11800Sstevel@tonic-gate
11810Sstevel@tonic-gate /*
1182442Sgm149974 * Try to create a directory irrespective of whether it is existing
1183442Sgm149974 * or not. If it is not there then it will create. Otherwise any way
1184442Sgm149974 * it will fail at mkstemp call below.
1185442Sgm149974 */
1186442Sgm149974 (void) mkdir(STAT_FILE_DIR, 0755);
1187442Sgm149974
1188442Sgm149974 /*
11890Sstevel@tonic-gate * Create a temporary file.
11900Sstevel@tonic-gate */
11910Sstevel@tonic-gate if (sizeof (template) < (strlen(rcfg.rcfg_stat_file) +
11920Sstevel@tonic-gate strlen(STAT_TEMPLATE_SUFFIX) + 1)) {
11930Sstevel@tonic-gate debug("temporary file template size too small\n");
11940Sstevel@tonic-gate return (-1);
11950Sstevel@tonic-gate }
11960Sstevel@tonic-gate (void) strcpy(template, rcfg.rcfg_stat_file);
11970Sstevel@tonic-gate (void) strcat(template, STAT_TEMPLATE_SUFFIX);
11980Sstevel@tonic-gate (void) rfd_reserve(1);
11990Sstevel@tonic-gate fd = mkstemp(template);
12000Sstevel@tonic-gate
12010Sstevel@tonic-gate /*
12020Sstevel@tonic-gate * Write the header and per-collection statistics.
12030Sstevel@tonic-gate */
12040Sstevel@tonic-gate if (fd >= 0) {
12050Sstevel@tonic-gate rcapd_stat_hdr_t rs;
12060Sstevel@tonic-gate
12070Sstevel@tonic-gate rs.rs_pid = rcapd_pid;
12080Sstevel@tonic-gate rs.rs_time = gethrtime();
12090Sstevel@tonic-gate ASSERT(sizeof (rs.rs_mode) > strlen(rcfg.rcfg_mode_name));
12100Sstevel@tonic-gate (void) strcpy(rs.rs_mode, rcfg.rcfg_mode_name);
12110Sstevel@tonic-gate rs.rs_pressure_cur = memory_pressure;
12120Sstevel@tonic-gate rs.rs_pressure_cap = rcfg.rcfg_memory_cap_enforcement_pressure;
12130Sstevel@tonic-gate rs.rs_pressure_sample = memory_pressure_sample;
12140Sstevel@tonic-gate
12150Sstevel@tonic-gate if (fchmod(fd, 0644) == 0 && write(fd, &rs, sizeof (rs)) ==
12160Sstevel@tonic-gate sizeof (rs)) {
12170Sstevel@tonic-gate list_walk_collection(report_collection_cb,
1218*13093SRoger.Faulkner@Oracle.COM (void *)(intptr_t)fd);
12190Sstevel@tonic-gate /*
12200Sstevel@tonic-gate * Replace the existing statistics file with this new
12210Sstevel@tonic-gate * one.
12220Sstevel@tonic-gate */
12230Sstevel@tonic-gate res = rename(template, rcfg.rcfg_stat_file);
12240Sstevel@tonic-gate } else
12250Sstevel@tonic-gate res = -1;
12260Sstevel@tonic-gate (void) close(fd);
12270Sstevel@tonic-gate } else
12280Sstevel@tonic-gate res = -1;
12290Sstevel@tonic-gate
12300Sstevel@tonic-gate return (res);
12310Sstevel@tonic-gate }
12320Sstevel@tonic-gate
12330Sstevel@tonic-gate /*
12340Sstevel@tonic-gate * Verify the statistics file can be created and written to, and die if an
12350Sstevel@tonic-gate * existing file may be in use by another rcapd.
12360Sstevel@tonic-gate */
12370Sstevel@tonic-gate static int
verify_statistics(void)12380Sstevel@tonic-gate verify_statistics(void)
12390Sstevel@tonic-gate {
12400Sstevel@tonic-gate pid_t pid;
12410Sstevel@tonic-gate
12420Sstevel@tonic-gate /*
12430Sstevel@tonic-gate * Warn if another instance of rcapd might be active.
12440Sstevel@tonic-gate */
12450Sstevel@tonic-gate (void) rfd_reserve(1);
12460Sstevel@tonic-gate pid = stat_get_rcapd_pid(rcfg.rcfg_stat_file);
12470Sstevel@tonic-gate if (pid != rcapd_pid && pid != -1)
12480Sstevel@tonic-gate die(gettext("%s exists; rcapd may already be active\n"),
12490Sstevel@tonic-gate rcfg.rcfg_stat_file);
12500Sstevel@tonic-gate
12510Sstevel@tonic-gate return (update_statistics());
12520Sstevel@tonic-gate }
12530Sstevel@tonic-gate
12540Sstevel@tonic-gate static int
sum_excess_cb(lcollection_t * lcol,void * arg)12550Sstevel@tonic-gate sum_excess_cb(lcollection_t *lcol, void *arg)
12560Sstevel@tonic-gate {
12570Sstevel@tonic-gate uint64_t *sum_excess = arg;
12580Sstevel@tonic-gate
12590Sstevel@tonic-gate *sum_excess += MAX((int64_t)0, (int64_t)(lcol->lcol_rss -
12600Sstevel@tonic-gate lcol->lcol_rss_cap));
12610Sstevel@tonic-gate return (0);
12620Sstevel@tonic-gate }
12630Sstevel@tonic-gate
12643247Sgjelinek /*
12653247Sgjelinek * Compute the quantity of memory (in kilobytes) above the cap enforcement
12663247Sgjelinek * pressure. Set the scan goal to that quantity (or at most the excess).
12673247Sgjelinek */
12683247Sgjelinek static void
compute_soft_scan_goal(soft_scan_arg_t * argp)12693247Sgjelinek compute_soft_scan_goal(soft_scan_arg_t *argp)
12703247Sgjelinek {
12713247Sgjelinek /*
12723247Sgjelinek * Compute the sum of the collections' excesses, which will be the
12733247Sgjelinek * denominator.
12743247Sgjelinek */
12753247Sgjelinek argp->ssa_sum_excess = 0;
12763247Sgjelinek list_walk_collection(sum_excess_cb, &(argp->ssa_sum_excess));
12773247Sgjelinek
12783247Sgjelinek argp->ssa_scan_goal = MIN((sysconf(_SC_PHYS_PAGES) *
12793247Sgjelinek (100 - rcfg.rcfg_memory_cap_enforcement_pressure) / 100 -
12803247Sgjelinek sysconf(_SC_AVPHYS_PAGES)) * page_size_kb,
12813247Sgjelinek argp->ssa_sum_excess);
12823247Sgjelinek }
12833247Sgjelinek
12840Sstevel@tonic-gate static void
rcapd_usage(void)12850Sstevel@tonic-gate rcapd_usage(void)
12860Sstevel@tonic-gate {
12870Sstevel@tonic-gate info(gettext("usage: rcapd [-d]\n"));
12880Sstevel@tonic-gate }
12890Sstevel@tonic-gate
12900Sstevel@tonic-gate void
check_update_statistics(void)12910Sstevel@tonic-gate check_update_statistics(void)
12920Sstevel@tonic-gate {
12930Sstevel@tonic-gate hrtime_t now = gethrtime();
12940Sstevel@tonic-gate
12950Sstevel@tonic-gate if (EVENT_TIME(now, next_report)) {
12960Sstevel@tonic-gate debug("updating statistics...\n");
12970Sstevel@tonic-gate list_walk_collection(simple_report_collection_cb, NULL);
12980Sstevel@tonic-gate if (update_statistics() != 0)
12990Sstevel@tonic-gate debug("couldn't update statistics");
13000Sstevel@tonic-gate next_report = NEXT_REPORT_EVENT_TIME(now,
13010Sstevel@tonic-gate rcfg.rcfg_report_interval);
13020Sstevel@tonic-gate }
13030Sstevel@tonic-gate }
13040Sstevel@tonic-gate
13050Sstevel@tonic-gate static void
verify_and_set_privileges(void)13060Sstevel@tonic-gate verify_and_set_privileges(void)
13070Sstevel@tonic-gate {
13080Sstevel@tonic-gate priv_set_t *required =
13090Sstevel@tonic-gate priv_str_to_set("zone,sys_resource,proc_owner", ",", NULL);
13100Sstevel@tonic-gate
13110Sstevel@tonic-gate /*
13120Sstevel@tonic-gate * Ensure the required privileges, suitable for controlling processes,
13130Sstevel@tonic-gate * are possessed.
13140Sstevel@tonic-gate */
13150Sstevel@tonic-gate if (setppriv(PRIV_SET, PRIV_PERMITTED, required) != 0 || setppriv(
13160Sstevel@tonic-gate PRIV_SET, PRIV_EFFECTIVE, required) != 0)
13170Sstevel@tonic-gate die(gettext("can't set requisite privileges"));
13180Sstevel@tonic-gate
13190Sstevel@tonic-gate /*
13200Sstevel@tonic-gate * Ensure access to /var/run/daemon.
13210Sstevel@tonic-gate */
13220Sstevel@tonic-gate if (setreuid(DAEMON_UID, DAEMON_UID) != 0)
13230Sstevel@tonic-gate die(gettext("cannot become user daemon"));
13240Sstevel@tonic-gate
13250Sstevel@tonic-gate priv_freeset(required);
13260Sstevel@tonic-gate }
13270Sstevel@tonic-gate
13283247Sgjelinek /*
13293247Sgjelinek * This function does the top-level work to determine if we should do any
13303247Sgjelinek * memory capping, and if so, it invokes the right call-backs to do the work.
13313247Sgjelinek */
13323247Sgjelinek static void
do_capping(hrtime_t now,hrtime_t * next_proc_walk)13333247Sgjelinek do_capping(hrtime_t now, hrtime_t *next_proc_walk)
13343247Sgjelinek {
13353247Sgjelinek boolean_t enforce_caps;
13363247Sgjelinek /* soft cap enforcement flag, depending on memory pressure */
13373247Sgjelinek boolean_t enforce_soft_caps;
13383247Sgjelinek /* avoid interference with kernel's page scanner */
13393247Sgjelinek boolean_t global_scanner_running;
13403247Sgjelinek sample_col_arg_t col_arg;
13413247Sgjelinek soft_scan_arg_t arg;
13423247Sgjelinek uint_t col_types = 0;
13433247Sgjelinek
13443247Sgjelinek /* check what kind of collections (project/zone) are capped */
13453247Sgjelinek list_walk_collection(col_type_cb, &col_types);
13463247Sgjelinek debug("collection types: 0x%x\n", col_types);
13473247Sgjelinek
13483247Sgjelinek /* no capped collections, skip checking rss */
13493247Sgjelinek if (col_types == 0)
13503247Sgjelinek return;
13513247Sgjelinek
13523247Sgjelinek /* Determine if soft caps are enforced. */
13533247Sgjelinek enforce_soft_caps = must_enforce_soft_caps();
13543247Sgjelinek
13553247Sgjelinek /* Determine if the global page scanner is running. */
13563247Sgjelinek global_scanner_running = is_global_scanner_running();
13573247Sgjelinek
13583247Sgjelinek /*
13593247Sgjelinek * Sample collections' member processes RSSes and recompute
13603247Sgjelinek * collections' excess.
13613247Sgjelinek */
13623247Sgjelinek rss_sample(B_FALSE, col_types);
13633247Sgjelinek
13643247Sgjelinek col_arg.sca_any_over_cap = B_FALSE;
13653247Sgjelinek col_arg.sca_project_over_cap = B_FALSE;
13663247Sgjelinek list_walk_collection(rss_sample_col_cb, &col_arg);
13673247Sgjelinek list_walk_collection(excess_print_cb, NULL);
13683247Sgjelinek debug("any collection/project over cap = %d, %d\n",
13693247Sgjelinek col_arg.sca_any_over_cap, col_arg.sca_project_over_cap);
13703247Sgjelinek
13713247Sgjelinek if (enforce_soft_caps)
13723247Sgjelinek debug("memory pressure %d%%\n", memory_pressure);
13733247Sgjelinek
13743247Sgjelinek /*
13753247Sgjelinek * Cap enforcement is determined by the previous conditions.
13763247Sgjelinek */
13773247Sgjelinek enforce_caps = !global_scanner_running && col_arg.sca_any_over_cap &&
13783247Sgjelinek (rcfg.rcfg_memory_cap_enforcement_pressure == 0 ||
13793247Sgjelinek enforce_soft_caps);
13803247Sgjelinek
13813247Sgjelinek debug("%senforcing caps\n", enforce_caps ? "" : "not ");
13823247Sgjelinek
13833247Sgjelinek /*
13843247Sgjelinek * If soft caps are in use, determine the size of the portion from each
13853247Sgjelinek * collection to scan for.
13863247Sgjelinek */
13873247Sgjelinek if (enforce_caps && enforce_soft_caps)
13883247Sgjelinek compute_soft_scan_goal(&arg);
13893247Sgjelinek
13903247Sgjelinek /*
13913247Sgjelinek * Victimize offending collections.
13923247Sgjelinek */
13933247Sgjelinek if (enforce_caps && (!enforce_soft_caps ||
13943247Sgjelinek (arg.ssa_scan_goal > 0 && arg.ssa_sum_excess > 0))) {
13953247Sgjelinek
13963247Sgjelinek /*
13973247Sgjelinek * Since at least one collection is over its cap & needs
13983247Sgjelinek * enforcing, check if it is at least time for a process walk
13993247Sgjelinek * (we could be well past time since we only walk /proc when
14003247Sgjelinek * we need to) and if so, update each collections process list
14013247Sgjelinek * in a single pass through /proc.
14023247Sgjelinek */
14033247Sgjelinek if (EVENT_TIME(now, *next_proc_walk)) {
14043247Sgjelinek debug("scanning process list...\n");
14053247Sgjelinek proc_walk_all(proc_cb); /* insert & mark */
14063247Sgjelinek list_walk_all(sweep_process_cb); /* free dead procs */
14073247Sgjelinek *next_proc_walk = NEXT_EVENT_TIME(now,
14083247Sgjelinek rcfg.rcfg_proc_walk_interval);
14093247Sgjelinek }
14103247Sgjelinek
14113247Sgjelinek gz_col = NULL;
14123247Sgjelinek if (enforce_soft_caps) {
14133247Sgjelinek debug("scan goal is %lldKB\n",
14143247Sgjelinek (long long)arg.ssa_scan_goal);
14153247Sgjelinek list_walk_collection(soft_scan_cb, &arg);
14163247Sgjelinek if (gz_capped && gz_col != NULL) {
14173247Sgjelinek /* process global zone */
14183247Sgjelinek arg.ssa_project_over_cap =
14193247Sgjelinek col_arg.sca_project_over_cap;
14203247Sgjelinek soft_scan_gz(gz_col, &arg);
14213247Sgjelinek }
14223247Sgjelinek } else {
14233247Sgjelinek list_walk_collection(scan_cb, NULL);
14243247Sgjelinek if (gz_capped && gz_col != NULL) {
14253247Sgjelinek /* process global zone */
14263247Sgjelinek scan_gz(gz_col, col_arg.sca_project_over_cap);
14273247Sgjelinek }
14283247Sgjelinek }
14293247Sgjelinek } else if (col_arg.sca_any_over_cap) {
14303247Sgjelinek list_walk_collection(unenforced_cap_cb, NULL);
14313247Sgjelinek }
14323247Sgjelinek }
14333247Sgjelinek
14340Sstevel@tonic-gate int
main(int argc,char * argv[])14350Sstevel@tonic-gate main(int argc, char *argv[])
14360Sstevel@tonic-gate {
14370Sstevel@tonic-gate int res;
14380Sstevel@tonic-gate int should_fork = 1; /* fork flag */
14390Sstevel@tonic-gate hrtime_t now; /* current time */
14400Sstevel@tonic-gate hrtime_t next; /* time of next event */
14410Sstevel@tonic-gate int sig; /* signal iteration */
14420Sstevel@tonic-gate struct rlimit rl;
14430Sstevel@tonic-gate hrtime_t next_proc_walk; /* time of next /proc scan */
14440Sstevel@tonic-gate hrtime_t next_configuration; /* time of next configuration */
14450Sstevel@tonic-gate hrtime_t next_rss_sample; /* (latest) time of next RSS sample */
14460Sstevel@tonic-gate
14470Sstevel@tonic-gate (void) set_message_priority(RCM_INFO);
1448*13093SRoger.Faulkner@Oracle.COM (void) setpname("rcapd");
14490Sstevel@tonic-gate rcapd_pid = getpid();
14500Sstevel@tonic-gate (void) chdir("/");
14510Sstevel@tonic-gate should_run = 1;
14520Sstevel@tonic-gate ever_ran = 0;
14530Sstevel@tonic-gate
14540Sstevel@tonic-gate (void) setlocale(LC_ALL, "");
14550Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN);
14560Sstevel@tonic-gate
14570Sstevel@tonic-gate /*
14580Sstevel@tonic-gate * Parse command-line options.
14590Sstevel@tonic-gate */
14600Sstevel@tonic-gate while ((res = getopt(argc, argv, "dF")) > 0)
14610Sstevel@tonic-gate switch (res) {
14620Sstevel@tonic-gate case 'd':
14630Sstevel@tonic-gate should_fork = 0;
14640Sstevel@tonic-gate if (debug_mode == 0) {
14650Sstevel@tonic-gate debug_mode = 1;
14660Sstevel@tonic-gate (void) set_message_priority(RCM_DEBUG);
14670Sstevel@tonic-gate } else
14680Sstevel@tonic-gate (void) set_message_priority(RCM_DEBUG_HIGH);
14690Sstevel@tonic-gate break;
14700Sstevel@tonic-gate case 'F':
14710Sstevel@tonic-gate should_fork = 0;
14720Sstevel@tonic-gate break;
14730Sstevel@tonic-gate default:
14740Sstevel@tonic-gate rcapd_usage();
14750Sstevel@tonic-gate return (E_USAGE);
14760Sstevel@tonic-gate /*NOTREACHED*/
14770Sstevel@tonic-gate }
14780Sstevel@tonic-gate
14790Sstevel@tonic-gate /*
14804119Stn143363 * Read the configuration.
14814119Stn143363 */
14824119Stn143363 if (rcfg_read(&rcfg, verify_statistics) != E_SUCCESS) {
14834119Stn143363 warn(gettext("resource caps not configured\n"));
14844119Stn143363 return (SMF_EXIT_ERR_CONFIG);
14854119Stn143363 }
14864119Stn143363
14874119Stn143363 /*
14880Sstevel@tonic-gate * If not debugging, fork and continue operating, changing the
14890Sstevel@tonic-gate * destination of messages to syslog().
14900Sstevel@tonic-gate */
14910Sstevel@tonic-gate if (should_fork == 1) {
14920Sstevel@tonic-gate pid_t child;
14930Sstevel@tonic-gate debug("forking\n");
14940Sstevel@tonic-gate child = fork();
14950Sstevel@tonic-gate if (child == -1)
14960Sstevel@tonic-gate die(gettext("cannot fork"));
14970Sstevel@tonic-gate if (child > 0)
14980Sstevel@tonic-gate return (0);
14990Sstevel@tonic-gate else {
15000Sstevel@tonic-gate rcapd_pid = getpid();
15010Sstevel@tonic-gate (void) set_message_destination(RCD_SYSLOG);
15020Sstevel@tonic-gate (void) fclose(stdin);
15030Sstevel@tonic-gate (void) fclose(stdout);
15040Sstevel@tonic-gate (void) fclose(stderr);
15050Sstevel@tonic-gate }
15060Sstevel@tonic-gate /*
15070Sstevel@tonic-gate * Start a new session and detatch from the controlling tty.
15080Sstevel@tonic-gate */
15090Sstevel@tonic-gate if (setsid() == (pid_t)-1)
15100Sstevel@tonic-gate debug(gettext("setsid() failed; cannot detach from "
15110Sstevel@tonic-gate "terminal"));
15120Sstevel@tonic-gate }
15130Sstevel@tonic-gate
15140Sstevel@tonic-gate finish_configuration();
15150Sstevel@tonic-gate should_reconfigure = 0;
15160Sstevel@tonic-gate
15170Sstevel@tonic-gate /*
15180Sstevel@tonic-gate * Check that required privileges are possessed.
15190Sstevel@tonic-gate */
15200Sstevel@tonic-gate verify_and_set_privileges();
15210Sstevel@tonic-gate
15220Sstevel@tonic-gate now = next_report = next_proc_walk = next_rss_sample = gethrtime();
15230Sstevel@tonic-gate next_configuration = NEXT_EVENT_TIME(gethrtime(),
15240Sstevel@tonic-gate rcfg.rcfg_reconfiguration_interval);
15250Sstevel@tonic-gate
15260Sstevel@tonic-gate /*
15270Sstevel@tonic-gate * Open the kstat chain.
15280Sstevel@tonic-gate */
15290Sstevel@tonic-gate kctl = kstat_open();
15300Sstevel@tonic-gate if (kctl == NULL)
15310Sstevel@tonic-gate die(gettext("can't open kstats"));
15320Sstevel@tonic-gate
15330Sstevel@tonic-gate /*
15340Sstevel@tonic-gate * Set RLIMIT_NOFILE as high as practical, so roughly 10K processes can
15350Sstevel@tonic-gate * be effectively managed without revoking descriptors (at 3 per
15360Sstevel@tonic-gate * process).
15370Sstevel@tonic-gate */
15380Sstevel@tonic-gate rl.rlim_cur = 32 * 1024;
15390Sstevel@tonic-gate rl.rlim_max = 32 * 1024;
15400Sstevel@tonic-gate if (setrlimit(RLIMIT_NOFILE, &rl) != 0 &&
15410Sstevel@tonic-gate getrlimit(RLIMIT_NOFILE, &rl) == 0) {
15420Sstevel@tonic-gate rl.rlim_cur = rl.rlim_max;
15430Sstevel@tonic-gate (void) setrlimit(RLIMIT_NOFILE, &rl);
15440Sstevel@tonic-gate }
15451914Scasper (void) enable_extended_FILE_stdio(-1, -1);
15461914Scasper
15470Sstevel@tonic-gate if (getrlimit(RLIMIT_NOFILE, &rl) == 0)
15480Sstevel@tonic-gate debug("fd limit: %lu\n", rl.rlim_cur);
15490Sstevel@tonic-gate else
15500Sstevel@tonic-gate debug("fd limit: unknown\n");
15510Sstevel@tonic-gate
15523247Sgjelinek get_page_size();
15533247Sgjelinek my_zoneid = getzoneid();
15543247Sgjelinek
15550Sstevel@tonic-gate /*
15560Sstevel@tonic-gate * Handle those signals whose (default) exit disposition
15570Sstevel@tonic-gate * prevents rcapd from finishing scanning before terminating.
15580Sstevel@tonic-gate */
15590Sstevel@tonic-gate (void) sigset(SIGINT, terminate_signal);
15600Sstevel@tonic-gate (void) sigset(SIGQUIT, abort_signal);
15610Sstevel@tonic-gate (void) sigset(SIGILL, abort_signal);
15620Sstevel@tonic-gate (void) sigset(SIGEMT, abort_signal);
15630Sstevel@tonic-gate (void) sigset(SIGFPE, abort_signal);
15640Sstevel@tonic-gate (void) sigset(SIGBUS, abort_signal);
15650Sstevel@tonic-gate (void) sigset(SIGSEGV, abort_signal);
15660Sstevel@tonic-gate (void) sigset(SIGSYS, abort_signal);
15670Sstevel@tonic-gate (void) sigset(SIGPIPE, terminate_signal);
15680Sstevel@tonic-gate (void) sigset(SIGALRM, terminate_signal);
15690Sstevel@tonic-gate (void) sigset(SIGTERM, terminate_signal);
15700Sstevel@tonic-gate (void) sigset(SIGUSR1, terminate_signal);
15710Sstevel@tonic-gate (void) sigset(SIGUSR2, terminate_signal);
15720Sstevel@tonic-gate (void) sigset(SIGPOLL, terminate_signal);
15730Sstevel@tonic-gate (void) sigset(SIGVTALRM, terminate_signal);
15740Sstevel@tonic-gate (void) sigset(SIGXCPU, abort_signal);
15750Sstevel@tonic-gate (void) sigset(SIGXFSZ, abort_signal);
15760Sstevel@tonic-gate for (sig = SIGRTMIN; sig <= SIGRTMAX; sig++)
15770Sstevel@tonic-gate (void) sigset(sig, terminate_signal);
15780Sstevel@tonic-gate
15790Sstevel@tonic-gate /*
15800Sstevel@tonic-gate * Install a signal handler for reconfiguration processing.
15810Sstevel@tonic-gate */
15820Sstevel@tonic-gate (void) sigset(SIGHUP, sighup);
15830Sstevel@tonic-gate
15840Sstevel@tonic-gate /*
15850Sstevel@tonic-gate * Determine which process collections to cap.
15860Sstevel@tonic-gate */
15870Sstevel@tonic-gate lcollection_update(LCU_COMPLETE);
15880Sstevel@tonic-gate
15890Sstevel@tonic-gate /*
15900Sstevel@tonic-gate * Loop forever, monitoring collections' resident set sizes and
15913247Sgjelinek * enforcing their caps. Look for changes in caps as well as
15923247Sgjelinek * responding to requests to reread the configuration. Update
15933247Sgjelinek * per-collection statistics periodically.
15940Sstevel@tonic-gate */
15950Sstevel@tonic-gate while (should_run != 0) {
15960Sstevel@tonic-gate struct timespec ts;
15970Sstevel@tonic-gate
15980Sstevel@tonic-gate /*
15990Sstevel@tonic-gate * Announce that rcapd is starting.
16000Sstevel@tonic-gate */
16010Sstevel@tonic-gate if (ever_ran == 0) {
16020Sstevel@tonic-gate info(gettext("starting\n"));
16030Sstevel@tonic-gate ever_ran = 1;
16040Sstevel@tonic-gate }
16050Sstevel@tonic-gate
16060Sstevel@tonic-gate /*
16073247Sgjelinek * Check the configuration at every next_configuration interval.
16083247Sgjelinek * Update the rss data once every next_rss_sample interval.
16093247Sgjelinek * The condition of global memory pressure is also checked at
16103247Sgjelinek * the same frequency, if strict caps are in use.
16110Sstevel@tonic-gate */
16120Sstevel@tonic-gate now = gethrtime();
16130Sstevel@tonic-gate
16140Sstevel@tonic-gate /*
16154119Stn143363 * Detect configuration and cap changes only when SIGHUP
16164119Stn143363 * is received. Call reconfigure to apply new configuration
16174119Stn143363 * parameters.
16180Sstevel@tonic-gate */
16194119Stn143363 if (should_reconfigure == 1) {
16204119Stn143363 reread_configuration();
16214119Stn143363 should_reconfigure = 0;
16223247Sgjelinek reconfigure(now, &next_configuration, &next_proc_walk,
16233247Sgjelinek &next_rss_sample);
16244119Stn143363 }
16254119Stn143363
16264119Stn143363 if (EVENT_TIME(now, next_configuration)) {
16274119Stn143363 reconfigure(now, &next_configuration, &next_proc_walk,
16284119Stn143363 &next_rss_sample);
16290Sstevel@tonic-gate }
16300Sstevel@tonic-gate
16313247Sgjelinek /*
16323247Sgjelinek * Do the main work for enforcing caps.
16333247Sgjelinek */
16340Sstevel@tonic-gate if (EVENT_TIME(now, next_rss_sample)) {
16353247Sgjelinek do_capping(now, &next_proc_walk);
16360Sstevel@tonic-gate
16370Sstevel@tonic-gate next_rss_sample = NEXT_EVENT_TIME(now,
16380Sstevel@tonic-gate rcfg.rcfg_rss_sample_interval);
16390Sstevel@tonic-gate }
16400Sstevel@tonic-gate
16410Sstevel@tonic-gate /*
16420Sstevel@tonic-gate * Update the statistics file, if it's time.
16430Sstevel@tonic-gate */
16440Sstevel@tonic-gate check_update_statistics();
16450Sstevel@tonic-gate
16460Sstevel@tonic-gate /*
16470Sstevel@tonic-gate * Sleep for some time before repeating.
16480Sstevel@tonic-gate */
16490Sstevel@tonic-gate now = gethrtime();
16500Sstevel@tonic-gate next = next_configuration;
16510Sstevel@tonic-gate next = POSITIVE_MIN(next, next_report);
16520Sstevel@tonic-gate next = POSITIVE_MIN(next, next_rss_sample);
16530Sstevel@tonic-gate if (next > now && should_run != 0) {
16540Sstevel@tonic-gate debug("sleeping %-4.2f seconds\n", (float)(next -
16550Sstevel@tonic-gate now) / (float)NANOSEC);
16560Sstevel@tonic-gate hrt2ts(next - now, &ts);
16570Sstevel@tonic-gate (void) nanosleep(&ts, NULL);
16580Sstevel@tonic-gate }
16590Sstevel@tonic-gate }
16600Sstevel@tonic-gate if (termination_signal != 0)
16610Sstevel@tonic-gate debug("exiting due to signal %d\n", termination_signal);
16620Sstevel@tonic-gate if (ever_ran != 0)
16630Sstevel@tonic-gate info(gettext("exiting\n"));
16640Sstevel@tonic-gate
16650Sstevel@tonic-gate /*
16660Sstevel@tonic-gate * Unlink the statistics file before exiting.
16670Sstevel@tonic-gate */
16680Sstevel@tonic-gate if (rcfg.rcfg_stat_file[0] != 0)
16690Sstevel@tonic-gate (void) unlink(rcfg.rcfg_stat_file);
16700Sstevel@tonic-gate
16710Sstevel@tonic-gate return (E_SUCCESS);
16720Sstevel@tonic-gate }
1673