1*25d39513SDavid van Moolenbroek /* MIB service - proc.c - functionality based on service process tables */ 2*25d39513SDavid van Moolenbroek /* Eventually, the CTL_PROC subtree might end up here as well. */ 3*25d39513SDavid van Moolenbroek 4*25d39513SDavid van Moolenbroek #include "mib.h" 5*25d39513SDavid van Moolenbroek 6*25d39513SDavid van Moolenbroek #include <sys/exec.h> 7*25d39513SDavid van Moolenbroek #include <minix/sysinfo.h> 8*25d39513SDavid van Moolenbroek 9*25d39513SDavid van Moolenbroek #include <machine/archtypes.h> 10*25d39513SDavid van Moolenbroek #include "kernel/proc.h" 11*25d39513SDavid van Moolenbroek #include "servers/pm/mproc.h" 12*25d39513SDavid van Moolenbroek #include "servers/vfs/const.h" 13*25d39513SDavid van Moolenbroek #include "servers/vfs/fproc.h" 14*25d39513SDavid van Moolenbroek 15*25d39513SDavid van Moolenbroek typedef struct proc ixfer_proc_t; 16*25d39513SDavid van Moolenbroek typedef struct mproc ixfer_mproc_t; 17*25d39513SDavid van Moolenbroek typedef struct fproc ixfer_fproc_t; 18*25d39513SDavid van Moolenbroek 19*25d39513SDavid van Moolenbroek static ixfer_proc_t proc_tab[NR_TASKS + NR_PROCS]; 20*25d39513SDavid van Moolenbroek static ixfer_mproc_t mproc_tab[NR_PROCS]; 21*25d39513SDavid van Moolenbroek static ixfer_fproc_t fproc_tab[NR_PROCS]; 22*25d39513SDavid van Moolenbroek 23*25d39513SDavid van Moolenbroek /* 24*25d39513SDavid van Moolenbroek * The number of processes added to the current number of processes when doing 25*25d39513SDavid van Moolenbroek * a size estimation, so that the actual data retrieval does not end up with 26*25d39513SDavid van Moolenbroek * too little space if new processes have forked between the two calls. We do 27*25d39513SDavid van Moolenbroek * a process table update only once per clock tick, which means that typically 28*25d39513SDavid van Moolenbroek * no update will take place between the user process's size estimation request 29*25d39513SDavid van Moolenbroek * and its subsequent data retrieval request. On the other hand, if we do 30*25d39513SDavid van Moolenbroek * update process tables in between, quite a bit might have changed. 31*25d39513SDavid van Moolenbroek */ 32*25d39513SDavid van Moolenbroek #define EXTRA_PROCS 8 33*25d39513SDavid van Moolenbroek 34*25d39513SDavid van Moolenbroek #define HASH_SLOTS (NR_PROCS / 4) /* expected nr. of processes in use */ 35*25d39513SDavid van Moolenbroek #define NO_SLOT (-1) 36*25d39513SDavid van Moolenbroek static int hash_tab[HASH_SLOTS]; /* hash table mapping from PID.. */ 37*25d39513SDavid van Moolenbroek static int hnext_tab[NR_PROCS]; /* ..to PM process slot */ 38*25d39513SDavid van Moolenbroek 39*25d39513SDavid van Moolenbroek static clock_t tabs_updated = 0; /* when the tables were last updated */ 40*25d39513SDavid van Moolenbroek static int tabs_valid = TRUE; /* FALSE if obtaining tables failed */ 41*25d39513SDavid van Moolenbroek 42*25d39513SDavid van Moolenbroek /* 43*25d39513SDavid van Moolenbroek * Update the process tables by pulling in new copies from the kernel, PM, and 44*25d39513SDavid van Moolenbroek * VFS, but only every so often and only if it has not failed before. Return 45*25d39513SDavid van Moolenbroek * TRUE iff the tables are now valid. 46*25d39513SDavid van Moolenbroek */ 47*25d39513SDavid van Moolenbroek static int 48*25d39513SDavid van Moolenbroek update_tables(void) 49*25d39513SDavid van Moolenbroek { 50*25d39513SDavid van Moolenbroek clock_t now; 51*25d39513SDavid van Moolenbroek pid_t pid; 52*25d39513SDavid van Moolenbroek int r, kslot, mslot, hslot; 53*25d39513SDavid van Moolenbroek 54*25d39513SDavid van Moolenbroek /* 55*25d39513SDavid van Moolenbroek * If retrieving the tables failed at some point, do not keep trying 56*25d39513SDavid van Moolenbroek * all the time. Such a failure is very unlikely to be transient. 57*25d39513SDavid van Moolenbroek */ 58*25d39513SDavid van Moolenbroek if (tabs_valid == FALSE) 59*25d39513SDavid van Moolenbroek return FALSE; 60*25d39513SDavid van Moolenbroek 61*25d39513SDavid van Moolenbroek /* 62*25d39513SDavid van Moolenbroek * Update the tables once per clock tick at most. The update operation 63*25d39513SDavid van Moolenbroek * is rather heavy, transferring several hundreds of kilobytes between 64*25d39513SDavid van Moolenbroek * servers. Userland should be able to live with information that is 65*25d39513SDavid van Moolenbroek * outdated by at most one clock tick. 66*25d39513SDavid van Moolenbroek */ 67*25d39513SDavid van Moolenbroek now = getticks(); 68*25d39513SDavid van Moolenbroek 69*25d39513SDavid van Moolenbroek if (tabs_updated != 0 && tabs_updated == now) 70*25d39513SDavid van Moolenbroek return TRUE; 71*25d39513SDavid van Moolenbroek 72*25d39513SDavid van Moolenbroek /* Perform an actual update now. */ 73*25d39513SDavid van Moolenbroek tabs_valid = FALSE; 74*25d39513SDavid van Moolenbroek 75*25d39513SDavid van Moolenbroek /* Retrieve and check the kernel process table. */ 76*25d39513SDavid van Moolenbroek if ((r = sys_getproctab(proc_tab)) != OK) { 77*25d39513SDavid van Moolenbroek printf("MIB: unable to obtain kernel process table (%d)\n", r); 78*25d39513SDavid van Moolenbroek 79*25d39513SDavid van Moolenbroek return FALSE; 80*25d39513SDavid van Moolenbroek } 81*25d39513SDavid van Moolenbroek 82*25d39513SDavid van Moolenbroek for (kslot = 0; kslot < NR_TASKS + NR_PROCS; kslot++) { 83*25d39513SDavid van Moolenbroek if (proc_tab[kslot].p_magic != PMAGIC) { 84*25d39513SDavid van Moolenbroek printf("MIB: kernel process table mismatch\n"); 85*25d39513SDavid van Moolenbroek 86*25d39513SDavid van Moolenbroek return FALSE; 87*25d39513SDavid van Moolenbroek } 88*25d39513SDavid van Moolenbroek } 89*25d39513SDavid van Moolenbroek 90*25d39513SDavid van Moolenbroek /* Retrieve and check the PM process table. */ 91*25d39513SDavid van Moolenbroek r = getsysinfo(PM_PROC_NR, SI_PROC_TAB, mproc_tab, sizeof(mproc_tab)); 92*25d39513SDavid van Moolenbroek if (r != OK) { 93*25d39513SDavid van Moolenbroek printf("MIB: unable to obtain PM process table (%d)\n", r); 94*25d39513SDavid van Moolenbroek 95*25d39513SDavid van Moolenbroek return FALSE; 96*25d39513SDavid van Moolenbroek } 97*25d39513SDavid van Moolenbroek 98*25d39513SDavid van Moolenbroek for (mslot = 0; mslot < NR_PROCS; mslot++) { 99*25d39513SDavid van Moolenbroek if (mproc_tab[mslot].mp_magic != MP_MAGIC) { 100*25d39513SDavid van Moolenbroek printf("MIB: PM process table mismatch\n"); 101*25d39513SDavid van Moolenbroek 102*25d39513SDavid van Moolenbroek return FALSE; 103*25d39513SDavid van Moolenbroek } 104*25d39513SDavid van Moolenbroek } 105*25d39513SDavid van Moolenbroek 106*25d39513SDavid van Moolenbroek /* Retrieve the VFS process table, which has no magic number. */ 107*25d39513SDavid van Moolenbroek r = getsysinfo(VFS_PROC_NR, SI_PROC_TAB, fproc_tab, sizeof(fproc_tab)); 108*25d39513SDavid van Moolenbroek if (r != OK) { 109*25d39513SDavid van Moolenbroek printf("MIB: unable to obtain VFS process table (%d)\n", r); 110*25d39513SDavid van Moolenbroek 111*25d39513SDavid van Moolenbroek return FALSE; 112*25d39513SDavid van Moolenbroek } 113*25d39513SDavid van Moolenbroek 114*25d39513SDavid van Moolenbroek tabs_valid = TRUE; 115*25d39513SDavid van Moolenbroek tabs_updated = now; 116*25d39513SDavid van Moolenbroek 117*25d39513SDavid van Moolenbroek /* 118*25d39513SDavid van Moolenbroek * Build a hash table mapping from process IDs to slot numbers, for 119*25d39513SDavid van Moolenbroek * fast access. TODO: decide if this is better done on demand only. 120*25d39513SDavid van Moolenbroek */ 121*25d39513SDavid van Moolenbroek for (hslot = 0; hslot < HASH_SLOTS; hslot++) 122*25d39513SDavid van Moolenbroek hash_tab[hslot] = NO_SLOT; 123*25d39513SDavid van Moolenbroek 124*25d39513SDavid van Moolenbroek for (mslot = 0; mslot < NR_PROCS; mslot++) { 125*25d39513SDavid van Moolenbroek if (mproc_tab[mslot].mp_flags & IN_USE) { 126*25d39513SDavid van Moolenbroek if ((pid = mproc_tab[mslot].mp_pid) <= 0) 127*25d39513SDavid van Moolenbroek continue; 128*25d39513SDavid van Moolenbroek 129*25d39513SDavid van Moolenbroek hslot = mproc_tab[mslot].mp_pid % HASH_SLOTS; 130*25d39513SDavid van Moolenbroek 131*25d39513SDavid van Moolenbroek hnext_tab[mslot] = hash_tab[hslot]; 132*25d39513SDavid van Moolenbroek hash_tab[hslot] = mslot; 133*25d39513SDavid van Moolenbroek } 134*25d39513SDavid van Moolenbroek } 135*25d39513SDavid van Moolenbroek 136*25d39513SDavid van Moolenbroek return TRUE; 137*25d39513SDavid van Moolenbroek } 138*25d39513SDavid van Moolenbroek 139*25d39513SDavid van Moolenbroek /* 140*25d39513SDavid van Moolenbroek * Return the PM slot number for the given PID, or NO_SLOT if the PID is not in 141*25d39513SDavid van Moolenbroek * use by a process. 142*25d39513SDavid van Moolenbroek */ 143*25d39513SDavid van Moolenbroek static int 144*25d39513SDavid van Moolenbroek get_mslot(pid_t pid) 145*25d39513SDavid van Moolenbroek { 146*25d39513SDavid van Moolenbroek int mslot; 147*25d39513SDavid van Moolenbroek 148*25d39513SDavid van Moolenbroek /* PID 0 identifies the kernel; checking this is up to the caller. */ 149*25d39513SDavid van Moolenbroek if (pid <= 0) 150*25d39513SDavid van Moolenbroek return NO_SLOT; 151*25d39513SDavid van Moolenbroek 152*25d39513SDavid van Moolenbroek for (mslot = hash_tab[pid % HASH_SLOTS]; mslot != NO_SLOT; 153*25d39513SDavid van Moolenbroek mslot = hnext_tab[mslot]) 154*25d39513SDavid van Moolenbroek if (mproc_tab[mslot].mp_pid == pid) 155*25d39513SDavid van Moolenbroek break; 156*25d39513SDavid van Moolenbroek 157*25d39513SDavid van Moolenbroek return mslot; 158*25d39513SDavid van Moolenbroek } 159*25d39513SDavid van Moolenbroek 160*25d39513SDavid van Moolenbroek /* 161*25d39513SDavid van Moolenbroek * Store the given number of clock ticks as a timeval structure. 162*25d39513SDavid van Moolenbroek */ 163*25d39513SDavid van Moolenbroek static void 164*25d39513SDavid van Moolenbroek ticks_to_timeval(struct timeval * tv, clock_t ticks) 165*25d39513SDavid van Moolenbroek { 166*25d39513SDavid van Moolenbroek clock_t hz; 167*25d39513SDavid van Moolenbroek 168*25d39513SDavid van Moolenbroek hz = sys_hz(); 169*25d39513SDavid van Moolenbroek 170*25d39513SDavid van Moolenbroek tv->tv_sec = ticks / hz; 171*25d39513SDavid van Moolenbroek tv->tv_usec = (long)((ticks % hz) * 1000000ULL / hz); 172*25d39513SDavid van Moolenbroek } 173*25d39513SDavid van Moolenbroek 174*25d39513SDavid van Moolenbroek /* 175*25d39513SDavid van Moolenbroek * Generate a wchan message text for the cases that the process is blocked on 176*25d39513SDavid van Moolenbroek * IPC with another process, of which the endpoint is given as 'endpt' here. 177*25d39513SDavid van Moolenbroek * The name of the other process is to be stored in 'wmesg', which is a buffer 178*25d39513SDavid van Moolenbroek * of size 'wmsz'. The result should be null terminated. If 'ipc' is set, the 179*25d39513SDavid van Moolenbroek * process is blocked on a direct IPC call, in which case the name of the other 180*25d39513SDavid van Moolenbroek * process is enclosed in parentheses. If 'ipc' is not set, the call is made 181*25d39513SDavid van Moolenbroek * indirectly through VFS, and the name of the other process should not be 182*25d39513SDavid van Moolenbroek * enclosed in parentheses. If no name can be obtained, we use the endpoint of 183*25d39513SDavid van Moolenbroek * the other process instead. 184*25d39513SDavid van Moolenbroek */ 185*25d39513SDavid van Moolenbroek static void 186*25d39513SDavid van Moolenbroek fill_wmesg(char * wmesg, size_t wmsz, endpoint_t endpt, int ipc) 187*25d39513SDavid van Moolenbroek { 188*25d39513SDavid van Moolenbroek const char *name; 189*25d39513SDavid van Moolenbroek int mslot; 190*25d39513SDavid van Moolenbroek 191*25d39513SDavid van Moolenbroek switch (endpt) { 192*25d39513SDavid van Moolenbroek case ANY: 193*25d39513SDavid van Moolenbroek name = "any"; 194*25d39513SDavid van Moolenbroek break; 195*25d39513SDavid van Moolenbroek case SELF: 196*25d39513SDavid van Moolenbroek name = "self"; 197*25d39513SDavid van Moolenbroek break; 198*25d39513SDavid van Moolenbroek case NONE: 199*25d39513SDavid van Moolenbroek name = "none"; 200*25d39513SDavid van Moolenbroek break; 201*25d39513SDavid van Moolenbroek default: 202*25d39513SDavid van Moolenbroek mslot = _ENDPOINT_P(endpt); 203*25d39513SDavid van Moolenbroek if (mslot >= -NR_TASKS && mslot < NR_PROCS && 204*25d39513SDavid van Moolenbroek (mslot < 0 || (mproc_tab[mslot].mp_flags & IN_USE))) 205*25d39513SDavid van Moolenbroek name = proc_tab[NR_TASKS + mslot].p_name; 206*25d39513SDavid van Moolenbroek else 207*25d39513SDavid van Moolenbroek name = NULL; 208*25d39513SDavid van Moolenbroek } 209*25d39513SDavid van Moolenbroek 210*25d39513SDavid van Moolenbroek if (name != NULL) 211*25d39513SDavid van Moolenbroek snprintf(wmesg, wmsz, "%s%s%s", 212*25d39513SDavid van Moolenbroek ipc ? "(" : "", name, ipc ? ")" : ""); 213*25d39513SDavid van Moolenbroek else 214*25d39513SDavid van Moolenbroek snprintf(wmesg, wmsz, "%s%d%s", 215*25d39513SDavid van Moolenbroek ipc ? "(" : "", endpt, ipc ? ")" : ""); 216*25d39513SDavid van Moolenbroek } 217*25d39513SDavid van Moolenbroek 218*25d39513SDavid van Moolenbroek /* 219*25d39513SDavid van Moolenbroek * Return the LWP status of a process, along with additional information in 220*25d39513SDavid van Moolenbroek * case the process is sleeping (LSSLEEP): a wchan value and text to indicate 221*25d39513SDavid van Moolenbroek * what the process is sleeping on, and possibly a flag field modification to 222*25d39513SDavid van Moolenbroek * indicate that the sleep is interruptible. 223*25d39513SDavid van Moolenbroek */ 224*25d39513SDavid van Moolenbroek static int 225*25d39513SDavid van Moolenbroek get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz, 226*25d39513SDavid van Moolenbroek int32_t * flag) 227*25d39513SDavid van Moolenbroek { 228*25d39513SDavid van Moolenbroek struct mproc *mp; 229*25d39513SDavid van Moolenbroek struct fproc *fp; 230*25d39513SDavid van Moolenbroek struct proc *kp; 231*25d39513SDavid van Moolenbroek const char *wmesg; 232*25d39513SDavid van Moolenbroek uint64_t wchan; 233*25d39513SDavid van Moolenbroek endpoint_t endpt; 234*25d39513SDavid van Moolenbroek 235*25d39513SDavid van Moolenbroek mp = &mproc_tab[mslot]; 236*25d39513SDavid van Moolenbroek fp = &fproc_tab[mslot]; 237*25d39513SDavid van Moolenbroek kp = &proc_tab[NR_TASKS + mslot]; 238*25d39513SDavid van Moolenbroek 239*25d39513SDavid van Moolenbroek /* 240*25d39513SDavid van Moolenbroek * First cover all the cases that the process is not sleeping. In 241*25d39513SDavid van Moolenbroek * those cases, we need not return additional sleep information either. 242*25d39513SDavid van Moolenbroek */ 243*25d39513SDavid van Moolenbroek if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)) 244*25d39513SDavid van Moolenbroek return LSZOMB; 245*25d39513SDavid van Moolenbroek 246*25d39513SDavid van Moolenbroek if (mp->mp_flags & EXITING) 247*25d39513SDavid van Moolenbroek return LSDEAD; 248*25d39513SDavid van Moolenbroek 249*25d39513SDavid van Moolenbroek if ((mp->mp_flags & TRACE_STOPPED) || RTS_ISSET(kp, RTS_P_STOP)) 250*25d39513SDavid van Moolenbroek return LSSTOP; 251*25d39513SDavid van Moolenbroek 252*25d39513SDavid van Moolenbroek if (proc_is_runnable(kp)) 253*25d39513SDavid van Moolenbroek return LSRUN; 254*25d39513SDavid van Moolenbroek 255*25d39513SDavid van Moolenbroek /* 256*25d39513SDavid van Moolenbroek * The process is sleeping. In that case, we must also figure out why, 257*25d39513SDavid van Moolenbroek * and return an appropriate wchan value and human-readable wmesg text. 258*25d39513SDavid van Moolenbroek * 259*25d39513SDavid van Moolenbroek * The process can be blocked on either a known sleep state in PM or 260*25d39513SDavid van Moolenbroek * VFS, or otherwise on IPC communication with another process, or 261*25d39513SDavid van Moolenbroek * otherwise on a kernel RTS flag. In each case, decide what to use as 262*25d39513SDavid van Moolenbroek * wchan value and wmesg text, and whether the sleep is interruptible. 263*25d39513SDavid van Moolenbroek * 264*25d39513SDavid van Moolenbroek * The wchan value should be unique for the sleep reason. We use its 265*25d39513SDavid van Moolenbroek * lower eight bits to indicate a class: 266*25d39513SDavid van Moolenbroek * 0x00 = kernel task 267*25d39513SDavid van Moolenbroek * 0x01 = kerel RTS block 268*25d39513SDavid van Moolenbroek * 0x02 = PM call 269*25d39513SDavid van Moolenbroek * 0x03 = VFS call 270*25d39513SDavid van Moolenbroek * 0x04 = MIB call 271*25d39513SDavid van Moolenbroek * 0xff = blocked on process 272*25d39513SDavid van Moolenbroek * The upper bits are used for class-specific information. The actual 273*25d39513SDavid van Moolenbroek * value does not really matter, as long as it is nonzero and there is 274*25d39513SDavid van Moolenbroek * no overlap between the different values. 275*25d39513SDavid van Moolenbroek */ 276*25d39513SDavid van Moolenbroek wchan = 0; 277*25d39513SDavid van Moolenbroek wmesg = NULL; 278*25d39513SDavid van Moolenbroek 279*25d39513SDavid van Moolenbroek /* 280*25d39513SDavid van Moolenbroek * First see if the process is marked as blocked in the tables of PM or 281*25d39513SDavid van Moolenbroek * VFS. Such a block reason is always an interruptible sleep. Note 282*25d39513SDavid van Moolenbroek * that we do not use the kernel table at all in this case: each of the 283*25d39513SDavid van Moolenbroek * three tables is consistent within itself, but not necessarily 284*25d39513SDavid van Moolenbroek * consistent with any of the other tables, so we avoid internal 285*25d39513SDavid van Moolenbroek * mismatches if we can. 286*25d39513SDavid van Moolenbroek */ 287*25d39513SDavid van Moolenbroek if (mp->mp_flags & WAITING) { 288*25d39513SDavid van Moolenbroek wchan = 0x102; 289*25d39513SDavid van Moolenbroek wmesg = "wait"; 290*25d39513SDavid van Moolenbroek } else if (mp->mp_flags & SIGSUSPENDED) { 291*25d39513SDavid van Moolenbroek wchan = 0x202; 292*25d39513SDavid van Moolenbroek wmesg = "pause"; 293*25d39513SDavid van Moolenbroek } else if (fp->fp_blocked_on != FP_BLOCKED_ON_NONE) { 294*25d39513SDavid van Moolenbroek wchan = (fp->fp_blocked_on << 8) | 0x03; 295*25d39513SDavid van Moolenbroek switch (fp->fp_blocked_on) { 296*25d39513SDavid van Moolenbroek case FP_BLOCKED_ON_PIPE: 297*25d39513SDavid van Moolenbroek wmesg = "pipe"; 298*25d39513SDavid van Moolenbroek break; 299*25d39513SDavid van Moolenbroek case FP_BLOCKED_ON_LOCK: 300*25d39513SDavid van Moolenbroek wmesg = "lock"; 301*25d39513SDavid van Moolenbroek break; 302*25d39513SDavid van Moolenbroek case FP_BLOCKED_ON_POPEN: 303*25d39513SDavid van Moolenbroek wmesg = "popen"; 304*25d39513SDavid van Moolenbroek break; 305*25d39513SDavid van Moolenbroek case FP_BLOCKED_ON_SELECT: 306*25d39513SDavid van Moolenbroek wmesg = "select"; 307*25d39513SDavid van Moolenbroek break; 308*25d39513SDavid van Moolenbroek case FP_BLOCKED_ON_OTHER: 309*25d39513SDavid van Moolenbroek /* 310*25d39513SDavid van Moolenbroek * Add the task (= character driver) endpoint to the 311*25d39513SDavid van Moolenbroek * wchan value, and use the driver's process name, 312*25d39513SDavid van Moolenbroek * without parentheses, as wmesg text. 313*25d39513SDavid van Moolenbroek */ 314*25d39513SDavid van Moolenbroek wchan |= (uint64_t)fp->fp_task << 16; 315*25d39513SDavid van Moolenbroek fill_wmesg(wmptr, wmsz, fp->fp_task, FALSE /*ipc*/); 316*25d39513SDavid van Moolenbroek break; 317*25d39513SDavid van Moolenbroek default: 318*25d39513SDavid van Moolenbroek /* A newly added flag we don't yet know about? */ 319*25d39513SDavid van Moolenbroek wmesg = "???"; 320*25d39513SDavid van Moolenbroek break; 321*25d39513SDavid van Moolenbroek } 322*25d39513SDavid van Moolenbroek } 323*25d39513SDavid van Moolenbroek if (wchan != 0) { 324*25d39513SDavid van Moolenbroek *wcptr = wchan; 325*25d39513SDavid van Moolenbroek if (wmesg != NULL) /* NULL means "already set" here */ 326*25d39513SDavid van Moolenbroek strlcpy(wmptr, wmesg, wmsz); 327*25d39513SDavid van Moolenbroek *flag |= L_SINTR; 328*25d39513SDavid van Moolenbroek } 329*25d39513SDavid van Moolenbroek 330*25d39513SDavid van Moolenbroek /* 331*25d39513SDavid van Moolenbroek * See if the process is blocked on sending or receiving. If not, then 332*25d39513SDavid van Moolenbroek * use one of the kernel RTS flags as reason. 333*25d39513SDavid van Moolenbroek */ 334*25d39513SDavid van Moolenbroek endpt = P_BLOCKEDON(kp); 335*25d39513SDavid van Moolenbroek 336*25d39513SDavid van Moolenbroek switch (endpt) { 337*25d39513SDavid van Moolenbroek case MIB_PROC_NR: 338*25d39513SDavid van Moolenbroek /* This is really just aesthetics. */ 339*25d39513SDavid van Moolenbroek wchan = 0x04; 340*25d39513SDavid van Moolenbroek wmesg = "sysctl"; 341*25d39513SDavid van Moolenbroek break; 342*25d39513SDavid van Moolenbroek case NONE: 343*25d39513SDavid van Moolenbroek /* 344*25d39513SDavid van Moolenbroek * The process is not running, but also not blocked on IPC with 345*25d39513SDavid van Moolenbroek * another process. This means it must be stopped on a kernel 346*25d39513SDavid van Moolenbroek * RTS flag. 347*25d39513SDavid van Moolenbroek */ 348*25d39513SDavid van Moolenbroek wchan = ((uint64_t)kp->p_rts_flags << 8) | 0x01; 349*25d39513SDavid van Moolenbroek if (RTS_ISSET(kp, RTS_PROC_STOP)) 350*25d39513SDavid van Moolenbroek wmesg = "kstop"; 351*25d39513SDavid van Moolenbroek else if (RTS_ISSET(kp, RTS_SIGNALED) || 352*25d39513SDavid van Moolenbroek RTS_ISSET(kp, RTS_SIGNALED)) 353*25d39513SDavid van Moolenbroek wmesg = "ksignal"; 354*25d39513SDavid van Moolenbroek else if (RTS_ISSET(kp, RTS_NO_PRIV)) 355*25d39513SDavid van Moolenbroek wmesg = "knopriv"; 356*25d39513SDavid van Moolenbroek else if (RTS_ISSET(kp, RTS_PAGEFAULT) || 357*25d39513SDavid van Moolenbroek RTS_ISSET(kp, RTS_VMREQTARGET)) 358*25d39513SDavid van Moolenbroek wmesg = "fault"; 359*25d39513SDavid van Moolenbroek else if (RTS_ISSET(kp, RTS_NO_QUANTUM)) 360*25d39513SDavid van Moolenbroek wmesg = "sched"; 361*25d39513SDavid van Moolenbroek else 362*25d39513SDavid van Moolenbroek wmesg = "kflag"; 363*25d39513SDavid van Moolenbroek break; 364*25d39513SDavid van Moolenbroek case ANY: 365*25d39513SDavid van Moolenbroek /* 366*25d39513SDavid van Moolenbroek * If the process is blocked receiving from ANY, mark it as 367*25d39513SDavid van Moolenbroek * being in an interruptible sleep. This looks nicer, even 368*25d39513SDavid van Moolenbroek * though "interruptible" is not applicable to services at all. 369*25d39513SDavid van Moolenbroek */ 370*25d39513SDavid van Moolenbroek *flag |= L_SINTR; 371*25d39513SDavid van Moolenbroek break; 372*25d39513SDavid van Moolenbroek } 373*25d39513SDavid van Moolenbroek 374*25d39513SDavid van Moolenbroek /* 375*25d39513SDavid van Moolenbroek * If at this point wchan is still zero, the process is blocked sending 376*25d39513SDavid van Moolenbroek * or receiving. Use a wchan value based on the target endpoint, and 377*25d39513SDavid van Moolenbroek * use "(procname)" as wmesg text. 378*25d39513SDavid van Moolenbroek */ 379*25d39513SDavid van Moolenbroek if (wchan == 0) { 380*25d39513SDavid van Moolenbroek *wcptr = ((uint64_t)endpt << 8) | 0xff; 381*25d39513SDavid van Moolenbroek fill_wmesg(wmptr, wmsz, endpt, TRUE /*ipc*/); 382*25d39513SDavid van Moolenbroek } else { 383*25d39513SDavid van Moolenbroek *wcptr = wchan; 384*25d39513SDavid van Moolenbroek if (wmesg != NULL) /* NULL means "already set" here */ 385*25d39513SDavid van Moolenbroek strlcpy(wmptr, wmesg, wmsz); 386*25d39513SDavid van Moolenbroek } 387*25d39513SDavid van Moolenbroek 388*25d39513SDavid van Moolenbroek return LSSLEEP; 389*25d39513SDavid van Moolenbroek } 390*25d39513SDavid van Moolenbroek 391*25d39513SDavid van Moolenbroek 392*25d39513SDavid van Moolenbroek /* 393*25d39513SDavid van Moolenbroek * Fill the part of a LWP structure that is common between kernel tasks and 394*25d39513SDavid van Moolenbroek * user processes. Also return a CPU estimate in 'estcpu', because we generate 395*25d39513SDavid van Moolenbroek * the value as a side effect here, and the LWP structure has no estcpu field. 396*25d39513SDavid van Moolenbroek */ 397*25d39513SDavid van Moolenbroek static void 398*25d39513SDavid van Moolenbroek fill_lwp_common(struct kinfo_lwp * l, int kslot, uint32_t * estcpu) 399*25d39513SDavid van Moolenbroek { 400*25d39513SDavid van Moolenbroek struct proc *kp; 401*25d39513SDavid van Moolenbroek struct timeval tv; 402*25d39513SDavid van Moolenbroek clock_t uptime; 403*25d39513SDavid van Moolenbroek uint32_t hz; 404*25d39513SDavid van Moolenbroek 405*25d39513SDavid van Moolenbroek kp = &proc_tab[kslot]; 406*25d39513SDavid van Moolenbroek 407*25d39513SDavid van Moolenbroek uptime = getticks(); 408*25d39513SDavid van Moolenbroek hz = sys_hz(); 409*25d39513SDavid van Moolenbroek 410*25d39513SDavid van Moolenbroek /* 411*25d39513SDavid van Moolenbroek * We use the process endpoint as the LWP ID. Not only does this allow 412*25d39513SDavid van Moolenbroek * users to obtain process endpoints with "ps -s" (thus replacing the 413*25d39513SDavid van Moolenbroek * MINIX3 ps(1)'s "ps -E"), but if we ever do implement kernel threads, 414*25d39513SDavid van Moolenbroek * this is probably still going to be accurate. 415*25d39513SDavid van Moolenbroek */ 416*25d39513SDavid van Moolenbroek l->l_lid = kp->p_endpoint; 417*25d39513SDavid van Moolenbroek 418*25d39513SDavid van Moolenbroek /* 419*25d39513SDavid van Moolenbroek * The time during which the process has not been swapped in or out is 420*25d39513SDavid van Moolenbroek * not applicable for us, and thus, we set it to the time the process 421*25d39513SDavid van Moolenbroek * has been running (in seconds). This value is relevant mostly for 422*25d39513SDavid van Moolenbroek * ps(1)'s CPU usage correction for processes that have just started. 423*25d39513SDavid van Moolenbroek */ 424*25d39513SDavid van Moolenbroek if (kslot >= NR_TASKS) 425*25d39513SDavid van Moolenbroek l->l_swtime = uptime - mproc_tab[kslot - NR_TASKS].mp_started; 426*25d39513SDavid van Moolenbroek else 427*25d39513SDavid van Moolenbroek l->l_swtime = uptime; 428*25d39513SDavid van Moolenbroek l->l_swtime /= hz; 429*25d39513SDavid van Moolenbroek 430*25d39513SDavid van Moolenbroek /* 431*25d39513SDavid van Moolenbroek * Sleep (dequeue) times are not maintained for kernel tasks, so 432*25d39513SDavid van Moolenbroek * pretend they are never asleep (which is pretty accurate). 433*25d39513SDavid van Moolenbroek */ 434*25d39513SDavid van Moolenbroek if (kslot < NR_TASKS) 435*25d39513SDavid van Moolenbroek l->l_slptime = 0; 436*25d39513SDavid van Moolenbroek else 437*25d39513SDavid van Moolenbroek l->l_slptime = (uptime - kp->p_dequeued) / hz; 438*25d39513SDavid van Moolenbroek 439*25d39513SDavid van Moolenbroek l->l_priority = kp->p_priority; 440*25d39513SDavid van Moolenbroek l->l_usrpri = kp->p_priority; 441*25d39513SDavid van Moolenbroek l->l_cpuid = kp->p_cpu; 442*25d39513SDavid van Moolenbroek ticks_to_timeval(&tv, kp->p_user_time + kp->p_sys_time); 443*25d39513SDavid van Moolenbroek l->l_rtime_sec = tv.tv_sec; 444*25d39513SDavid van Moolenbroek l->l_rtime_usec = tv.tv_usec; 445*25d39513SDavid van Moolenbroek 446*25d39513SDavid van Moolenbroek /* 447*25d39513SDavid van Moolenbroek * Obtain CPU usage percentages and estimates through library code 448*25d39513SDavid van Moolenbroek * shared between the kernel and this service; see its source for 449*25d39513SDavid van Moolenbroek * details. We note that the produced estcpu value is rather different 450*25d39513SDavid van Moolenbroek * from the one produced by NetBSD, but this should not be a problem. 451*25d39513SDavid van Moolenbroek */ 452*25d39513SDavid van Moolenbroek l->l_pctcpu = cpuavg_getstats(&kp->p_cpuavg, &l->l_cpticks, estcpu, 453*25d39513SDavid van Moolenbroek uptime, hz); 454*25d39513SDavid van Moolenbroek } 455*25d39513SDavid van Moolenbroek 456*25d39513SDavid van Moolenbroek /* 457*25d39513SDavid van Moolenbroek * Fill a LWP structure for a kernel task. Each kernel task has its own LWP, 458*25d39513SDavid van Moolenbroek * and all of them have negative PIDs. 459*25d39513SDavid van Moolenbroek */ 460*25d39513SDavid van Moolenbroek static void 461*25d39513SDavid van Moolenbroek fill_lwp_kern(struct kinfo_lwp * l, int kslot) 462*25d39513SDavid van Moolenbroek { 463*25d39513SDavid van Moolenbroek uint32_t estcpu; 464*25d39513SDavid van Moolenbroek 465*25d39513SDavid van Moolenbroek memset(l, 0, sizeof(*l)); 466*25d39513SDavid van Moolenbroek 467*25d39513SDavid van Moolenbroek l->l_flag = L_INMEM | L_SINTR | L_SYSTEM; 468*25d39513SDavid van Moolenbroek l->l_stat = LSSLEEP; 469*25d39513SDavid van Moolenbroek l->l_pid = kslot - NR_TASKS; 470*25d39513SDavid van Moolenbroek 471*25d39513SDavid van Moolenbroek /* 472*25d39513SDavid van Moolenbroek * When showing LWP entries, ps(1) uses the process name rather than 473*25d39513SDavid van Moolenbroek * the LWP name. All kernel tasks are therefore shown as "[kernel]" 474*25d39513SDavid van Moolenbroek * anyway. We use the wmesg field to show the actual kernel task name. 475*25d39513SDavid van Moolenbroek */ 476*25d39513SDavid van Moolenbroek l->l_wchan = ((uint64_t)(l->l_pid) << 8) | 0x00; 477*25d39513SDavid van Moolenbroek strlcpy(l->l_wmesg, proc_tab[kslot].p_name, sizeof(l->l_wmesg)); 478*25d39513SDavid van Moolenbroek strlcpy(l->l_name, "kernel", sizeof(l->l_name)); 479*25d39513SDavid van Moolenbroek 480*25d39513SDavid van Moolenbroek fill_lwp_common(l, kslot, &estcpu); 481*25d39513SDavid van Moolenbroek } 482*25d39513SDavid van Moolenbroek 483*25d39513SDavid van Moolenbroek /* 484*25d39513SDavid van Moolenbroek * Fill a LWP structure for a user process. 485*25d39513SDavid van Moolenbroek */ 486*25d39513SDavid van Moolenbroek static void 487*25d39513SDavid van Moolenbroek fill_lwp_user(struct kinfo_lwp * l, int mslot) 488*25d39513SDavid van Moolenbroek { 489*25d39513SDavid van Moolenbroek struct mproc *mp; 490*25d39513SDavid van Moolenbroek uint32_t estcpu; 491*25d39513SDavid van Moolenbroek 492*25d39513SDavid van Moolenbroek memset(l, 0, sizeof(*l)); 493*25d39513SDavid van Moolenbroek 494*25d39513SDavid van Moolenbroek mp = &mproc_tab[mslot]; 495*25d39513SDavid van Moolenbroek 496*25d39513SDavid van Moolenbroek l->l_flag = L_INMEM; 497*25d39513SDavid van Moolenbroek l->l_stat = get_lwp_stat(mslot, &l->l_wchan, l->l_wmesg, 498*25d39513SDavid van Moolenbroek sizeof(l->l_wmesg), &l->l_flag); 499*25d39513SDavid van Moolenbroek l->l_pid = mp->mp_pid; 500*25d39513SDavid van Moolenbroek strlcpy(l->l_name, mp->mp_name, sizeof(l->l_name)); 501*25d39513SDavid van Moolenbroek 502*25d39513SDavid van Moolenbroek fill_lwp_common(l, NR_TASKS + mslot, &estcpu); 503*25d39513SDavid van Moolenbroek } 504*25d39513SDavid van Moolenbroek 505*25d39513SDavid van Moolenbroek /* 506*25d39513SDavid van Moolenbroek * Implementation of CTL_KERN KERN_LWP. 507*25d39513SDavid van Moolenbroek */ 508*25d39513SDavid van Moolenbroek ssize_t 509*25d39513SDavid van Moolenbroek mib_kern_lwp(struct mib_call * call, struct mib_node * node __unused, 510*25d39513SDavid van Moolenbroek struct mib_oldp * oldp, struct mib_newp * newp __unused) 511*25d39513SDavid van Moolenbroek { 512*25d39513SDavid van Moolenbroek struct kinfo_lwp lwp; 513*25d39513SDavid van Moolenbroek struct mproc *mp; 514*25d39513SDavid van Moolenbroek size_t copysz; 515*25d39513SDavid van Moolenbroek ssize_t off; 516*25d39513SDavid van Moolenbroek pid_t pid; 517*25d39513SDavid van Moolenbroek int r, elsz, elmax, kslot, mslot, last_mslot; 518*25d39513SDavid van Moolenbroek 519*25d39513SDavid van Moolenbroek if (call->call_namelen != 3) 520*25d39513SDavid van Moolenbroek return EINVAL; 521*25d39513SDavid van Moolenbroek 522*25d39513SDavid van Moolenbroek pid = (pid_t)call->call_name[0]; 523*25d39513SDavid van Moolenbroek elsz = call->call_name[1]; 524*25d39513SDavid van Moolenbroek elmax = call->call_name[2]; /* redundant with the given oldlen.. */ 525*25d39513SDavid van Moolenbroek 526*25d39513SDavid van Moolenbroek if (pid < -1 || elsz <= 0 || elmax < 0) 527*25d39513SDavid van Moolenbroek return EINVAL; 528*25d39513SDavid van Moolenbroek 529*25d39513SDavid van Moolenbroek if (!update_tables()) 530*25d39513SDavid van Moolenbroek return EINVAL; 531*25d39513SDavid van Moolenbroek 532*25d39513SDavid van Moolenbroek off = 0; 533*25d39513SDavid van Moolenbroek copysz = MIN((size_t)elsz, sizeof(lwp)); 534*25d39513SDavid van Moolenbroek 535*25d39513SDavid van Moolenbroek /* 536*25d39513SDavid van Moolenbroek * We model kernel tasks as LWP threads of the kernel (with PID 0). 537*25d39513SDavid van Moolenbroek * Modeling the kernel tasks as processes with negative PIDs, like 538*25d39513SDavid van Moolenbroek * ProcFS does, conflicts with the KERN_LWP API here: a PID of -1 539*25d39513SDavid van Moolenbroek * indicates that the caller wants a full listing of LWPs. 540*25d39513SDavid van Moolenbroek */ 541*25d39513SDavid van Moolenbroek if (pid <= 0) { 542*25d39513SDavid van Moolenbroek for (kslot = 0; kslot < NR_TASKS; kslot++) { 543*25d39513SDavid van Moolenbroek if (mib_inrange(oldp, off) && elmax > 0) { 544*25d39513SDavid van Moolenbroek fill_lwp_kern(&lwp, kslot); 545*25d39513SDavid van Moolenbroek if ((r = mib_copyout(oldp, off, &lwp, 546*25d39513SDavid van Moolenbroek copysz)) < 0) 547*25d39513SDavid van Moolenbroek return r; 548*25d39513SDavid van Moolenbroek elmax--; 549*25d39513SDavid van Moolenbroek } 550*25d39513SDavid van Moolenbroek off += elsz; 551*25d39513SDavid van Moolenbroek } 552*25d39513SDavid van Moolenbroek 553*25d39513SDavid van Moolenbroek /* No need to add extra space here: NR_TASKS is static. */ 554*25d39513SDavid van Moolenbroek if (pid == 0) 555*25d39513SDavid van Moolenbroek return off; 556*25d39513SDavid van Moolenbroek } 557*25d39513SDavid van Moolenbroek 558*25d39513SDavid van Moolenbroek /* 559*25d39513SDavid van Moolenbroek * With PID 0 out of the way: the user requested the LWP for either a 560*25d39513SDavid van Moolenbroek * specific user process (pid > 0), or for all processes (pid < 0). 561*25d39513SDavid van Moolenbroek */ 562*25d39513SDavid van Moolenbroek if (pid > 0) { 563*25d39513SDavid van Moolenbroek if ((mslot = get_mslot(pid)) == NO_SLOT || 564*25d39513SDavid van Moolenbroek (mproc_tab[mslot].mp_flags & (TRACE_ZOMBIE | ZOMBIE))) 565*25d39513SDavid van Moolenbroek return ESRCH; 566*25d39513SDavid van Moolenbroek last_mslot = mslot; 567*25d39513SDavid van Moolenbroek } else { 568*25d39513SDavid van Moolenbroek mslot = 0; 569*25d39513SDavid van Moolenbroek last_mslot = NR_PROCS - 1; 570*25d39513SDavid van Moolenbroek } 571*25d39513SDavid van Moolenbroek 572*25d39513SDavid van Moolenbroek for (; mslot <= last_mslot; mslot++) { 573*25d39513SDavid van Moolenbroek mp = &mproc_tab[mslot]; 574*25d39513SDavid van Moolenbroek 575*25d39513SDavid van Moolenbroek if ((mp->mp_flags & (IN_USE | TRACE_ZOMBIE | ZOMBIE)) != 576*25d39513SDavid van Moolenbroek IN_USE) 577*25d39513SDavid van Moolenbroek continue; 578*25d39513SDavid van Moolenbroek 579*25d39513SDavid van Moolenbroek if (mib_inrange(oldp, off) && elmax > 0) { 580*25d39513SDavid van Moolenbroek fill_lwp_user(&lwp, mslot); 581*25d39513SDavid van Moolenbroek if ((r = mib_copyout(oldp, off, &lwp, copysz)) < 0) 582*25d39513SDavid van Moolenbroek return r; 583*25d39513SDavid van Moolenbroek elmax--; 584*25d39513SDavid van Moolenbroek } 585*25d39513SDavid van Moolenbroek off += elsz; 586*25d39513SDavid van Moolenbroek } 587*25d39513SDavid van Moolenbroek 588*25d39513SDavid van Moolenbroek if (oldp == NULL && pid < 0) 589*25d39513SDavid van Moolenbroek off += EXTRA_PROCS * elsz; 590*25d39513SDavid van Moolenbroek 591*25d39513SDavid van Moolenbroek return off; 592*25d39513SDavid van Moolenbroek } 593*25d39513SDavid van Moolenbroek 594*25d39513SDavid van Moolenbroek 595*25d39513SDavid van Moolenbroek /* 596*25d39513SDavid van Moolenbroek * Fill the part of a process structure that is common between kernel tasks and 597*25d39513SDavid van Moolenbroek * user processes. 598*25d39513SDavid van Moolenbroek */ 599*25d39513SDavid van Moolenbroek static void 600*25d39513SDavid van Moolenbroek fill_proc2_common(struct kinfo_proc2 * p, int kslot) 601*25d39513SDavid van Moolenbroek { 602*25d39513SDavid van Moolenbroek struct vm_usage_info vui; 603*25d39513SDavid van Moolenbroek struct timeval tv; 604*25d39513SDavid van Moolenbroek struct proc *kp; 605*25d39513SDavid van Moolenbroek struct kinfo_lwp l; 606*25d39513SDavid van Moolenbroek 607*25d39513SDavid van Moolenbroek kp = &proc_tab[kslot]; 608*25d39513SDavid van Moolenbroek 609*25d39513SDavid van Moolenbroek /* 610*25d39513SDavid van Moolenbroek * Much of the information in the LWP structure also ends up in the 611*25d39513SDavid van Moolenbroek * process structure. In order to avoid duplication of some important 612*25d39513SDavid van Moolenbroek * code, first generate LWP values and then copy it them into the 613*25d39513SDavid van Moolenbroek * process structure. 614*25d39513SDavid van Moolenbroek */ 615*25d39513SDavid van Moolenbroek memset(&l, 0, sizeof(l)); 616*25d39513SDavid van Moolenbroek fill_lwp_common(&l, kslot, &p->p_estcpu); 617*25d39513SDavid van Moolenbroek 618*25d39513SDavid van Moolenbroek /* Obtain memory usage information from VM. Ignore failures. */ 619*25d39513SDavid van Moolenbroek memset(&vui, 0, sizeof(vui)); 620*25d39513SDavid van Moolenbroek (void)vm_info_usage(kp->p_endpoint, &vui); 621*25d39513SDavid van Moolenbroek 622*25d39513SDavid van Moolenbroek ticks_to_timeval(&tv, kp->p_user_time + kp->p_sys_time); 623*25d39513SDavid van Moolenbroek p->p_rtime_sec = l.l_rtime_sec; 624*25d39513SDavid van Moolenbroek p->p_rtime_usec = l.l_rtime_usec; 625*25d39513SDavid van Moolenbroek p->p_cpticks = l.l_cpticks; 626*25d39513SDavid van Moolenbroek p->p_pctcpu = l.l_pctcpu; 627*25d39513SDavid van Moolenbroek p->p_swtime = l.l_swtime; 628*25d39513SDavid van Moolenbroek p->p_slptime = l.l_slptime; 629*25d39513SDavid van Moolenbroek p->p_uticks = kp->p_user_time; 630*25d39513SDavid van Moolenbroek p->p_sticks = kp->p_sys_time; 631*25d39513SDavid van Moolenbroek /* TODO: p->p_iticks */ 632*25d39513SDavid van Moolenbroek ticks_to_timeval(&tv, kp->p_user_time); 633*25d39513SDavid van Moolenbroek p->p_uutime_sec = tv.tv_sec; 634*25d39513SDavid van Moolenbroek p->p_uutime_usec = tv.tv_usec; 635*25d39513SDavid van Moolenbroek ticks_to_timeval(&tv, kp->p_sys_time); 636*25d39513SDavid van Moolenbroek p->p_ustime_sec = tv.tv_sec; 637*25d39513SDavid van Moolenbroek p->p_ustime_usec = tv.tv_usec; 638*25d39513SDavid van Moolenbroek 639*25d39513SDavid van Moolenbroek p->p_priority = l.l_priority; 640*25d39513SDavid van Moolenbroek p->p_usrpri = l.l_usrpri; 641*25d39513SDavid van Moolenbroek 642*25d39513SDavid van Moolenbroek p->p_vm_rssize = howmany(vui.vui_total, PAGE_SIZE); 643*25d39513SDavid van Moolenbroek p->p_vm_vsize = howmany(vui.vui_virtual, PAGE_SIZE); 644*25d39513SDavid van Moolenbroek p->p_vm_msize = howmany(vui.vui_mvirtual, PAGE_SIZE); 645*25d39513SDavid van Moolenbroek 646*25d39513SDavid van Moolenbroek p->p_uru_maxrss = vui.vui_maxrss; 647*25d39513SDavid van Moolenbroek p->p_uru_minflt = vui.vui_minflt; 648*25d39513SDavid van Moolenbroek p->p_uru_majflt = vui.vui_majflt; 649*25d39513SDavid van Moolenbroek 650*25d39513SDavid van Moolenbroek p->p_cpuid = l.l_cpuid; 651*25d39513SDavid van Moolenbroek } 652*25d39513SDavid van Moolenbroek 653*25d39513SDavid van Moolenbroek /* 654*25d39513SDavid van Moolenbroek * Fill a process structure for the kernel pseudo-process (with PID 0). 655*25d39513SDavid van Moolenbroek */ 656*25d39513SDavid van Moolenbroek static void 657*25d39513SDavid van Moolenbroek fill_proc2_kern(struct kinfo_proc2 * p) 658*25d39513SDavid van Moolenbroek { 659*25d39513SDavid van Moolenbroek 660*25d39513SDavid van Moolenbroek memset(p, 0, sizeof(*p)); 661*25d39513SDavid van Moolenbroek 662*25d39513SDavid van Moolenbroek p->p_flag = L_INMEM | L_SYSTEM | L_SINTR; 663*25d39513SDavid van Moolenbroek p->p_pid = 0; 664*25d39513SDavid van Moolenbroek p->p_stat = LSSLEEP; 665*25d39513SDavid van Moolenbroek p->p_nice = NZERO; 666*25d39513SDavid van Moolenbroek 667*25d39513SDavid van Moolenbroek /* Use the KERNEL task wchan, for consistency between ps and top. */ 668*25d39513SDavid van Moolenbroek p->p_wchan = ((uint64_t)KERNEL << 8) | 0x00; 669*25d39513SDavid van Moolenbroek strlcpy(p->p_wmesg, "kernel", sizeof(p->p_wmesg)); 670*25d39513SDavid van Moolenbroek 671*25d39513SDavid van Moolenbroek strlcpy(p->p_comm, "kernel", sizeof(p->p_comm)); 672*25d39513SDavid van Moolenbroek p->p_realflag = P_INMEM | P_SYSTEM | P_SINTR; 673*25d39513SDavid van Moolenbroek p->p_realstat = SACTIVE; 674*25d39513SDavid van Moolenbroek p->p_nlwps = NR_TASKS; 675*25d39513SDavid van Moolenbroek 676*25d39513SDavid van Moolenbroek /* 677*25d39513SDavid van Moolenbroek * By using the KERNEL slot here, the kernel process will get a proper 678*25d39513SDavid van Moolenbroek * CPU usage average. 679*25d39513SDavid van Moolenbroek */ 680*25d39513SDavid van Moolenbroek fill_proc2_common(p, KERNEL + NR_TASKS); 681*25d39513SDavid van Moolenbroek } 682*25d39513SDavid van Moolenbroek 683*25d39513SDavid van Moolenbroek /* 684*25d39513SDavid van Moolenbroek * Fill a process structure for a user process. 685*25d39513SDavid van Moolenbroek */ 686*25d39513SDavid van Moolenbroek static void 687*25d39513SDavid van Moolenbroek fill_proc2_user(struct kinfo_proc2 * p, int mslot) 688*25d39513SDavid van Moolenbroek { 689*25d39513SDavid van Moolenbroek struct mproc *mp; 690*25d39513SDavid van Moolenbroek struct fproc *fp; 691*25d39513SDavid van Moolenbroek time_t boottime; 692*25d39513SDavid van Moolenbroek dev_t tty; 693*25d39513SDavid van Moolenbroek struct timeval tv; 694*25d39513SDavid van Moolenbroek int i, r, kslot, zombie; 695*25d39513SDavid van Moolenbroek 696*25d39513SDavid van Moolenbroek memset(p, 0, sizeof(*p)); 697*25d39513SDavid van Moolenbroek 698*25d39513SDavid van Moolenbroek if ((r = getuptime(NULL, NULL, &boottime)) != OK) 699*25d39513SDavid van Moolenbroek panic("getuptime failed: %d", r); 700*25d39513SDavid van Moolenbroek 701*25d39513SDavid van Moolenbroek kslot = NR_TASKS + mslot; 702*25d39513SDavid van Moolenbroek mp = &mproc_tab[mslot]; 703*25d39513SDavid van Moolenbroek fp = &fproc_tab[mslot]; 704*25d39513SDavid van Moolenbroek 705*25d39513SDavid van Moolenbroek zombie = (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)); 706*25d39513SDavid van Moolenbroek tty = (!zombie) ? fp->fp_tty : NO_DEV; 707*25d39513SDavid van Moolenbroek 708*25d39513SDavid van Moolenbroek p->p_eflag = 0; 709*25d39513SDavid van Moolenbroek if (tty != NO_DEV) 710*25d39513SDavid van Moolenbroek p->p_eflag |= EPROC_CTTY; 711*25d39513SDavid van Moolenbroek if (mp->mp_pid == mp->mp_procgrp) /* TODO: job control support */ 712*25d39513SDavid van Moolenbroek p->p_eflag |= EPROC_SLEADER; 713*25d39513SDavid van Moolenbroek 714*25d39513SDavid van Moolenbroek p->p_exitsig = SIGCHLD; /* TODO */ 715*25d39513SDavid van Moolenbroek 716*25d39513SDavid van Moolenbroek p->p_flag = P_INMEM; 717*25d39513SDavid van Moolenbroek if (mp->mp_flags & TAINTED) 718*25d39513SDavid van Moolenbroek p->p_flag |= P_SUGID; 719*25d39513SDavid van Moolenbroek if (mp->mp_tracer != NO_TRACER) 720*25d39513SDavid van Moolenbroek p->p_flag |= P_TRACED; 721*25d39513SDavid van Moolenbroek if (tty != NO_DEV) 722*25d39513SDavid van Moolenbroek p->p_flag |= P_CONTROLT; 723*25d39513SDavid van Moolenbroek p->p_pid = mp->mp_pid; 724*25d39513SDavid van Moolenbroek if (mp->mp_parent >= 0 && mp->mp_parent < NR_PROCS) 725*25d39513SDavid van Moolenbroek p->p_ppid = mproc_tab[mp->mp_parent].mp_pid; 726*25d39513SDavid van Moolenbroek p->p_sid = mp->mp_procgrp; /* TODO: job control supported */ 727*25d39513SDavid van Moolenbroek p->p__pgid = mp->mp_procgrp; 728*25d39513SDavid van Moolenbroek p->p_tpgid = (tty != NO_DEV) ? mp->mp_procgrp : 0; 729*25d39513SDavid van Moolenbroek p->p_uid = mp->mp_effuid; 730*25d39513SDavid van Moolenbroek p->p_ruid = mp->mp_realuid; 731*25d39513SDavid van Moolenbroek p->p_gid = mp->mp_effgid; 732*25d39513SDavid van Moolenbroek p->p_rgid = mp->mp_realgid; 733*25d39513SDavid van Moolenbroek p->p_ngroups = MIN(mp->mp_ngroups, KI_NGROUPS); 734*25d39513SDavid van Moolenbroek for (i = 0; i < p->p_ngroups; i++) 735*25d39513SDavid van Moolenbroek p->p_groups[i] = mp->mp_sgroups[i]; 736*25d39513SDavid van Moolenbroek p->p_tdev = tty; 737*25d39513SDavid van Moolenbroek memcpy(&p->p_siglist, &mp->mp_sigpending, sizeof(p->p_siglist)); 738*25d39513SDavid van Moolenbroek memcpy(&p->p_sigmask, &mp->mp_sigmask, sizeof(p->p_sigmask)); 739*25d39513SDavid van Moolenbroek memcpy(&p->p_sigcatch, &mp->mp_catch, sizeof(p->p_sigcatch)); 740*25d39513SDavid van Moolenbroek memcpy(&p->p_sigignore, &mp->mp_ignore, sizeof(p->p_sigignore)); 741*25d39513SDavid van Moolenbroek p->p_nice = mp->mp_nice + NZERO; 742*25d39513SDavid van Moolenbroek strlcpy(p->p_comm, mp->mp_name, sizeof(p->p_comm)); 743*25d39513SDavid van Moolenbroek p->p_uvalid = 1; 744*25d39513SDavid van Moolenbroek ticks_to_timeval(&tv, mp->mp_started); 745*25d39513SDavid van Moolenbroek p->p_ustart_sec = boottime + tv.tv_sec; 746*25d39513SDavid van Moolenbroek p->p_ustart_usec = tv.tv_usec; 747*25d39513SDavid van Moolenbroek /* TODO: other rusage fields */ 748*25d39513SDavid van Moolenbroek ticks_to_timeval(&tv, mp->mp_child_utime + mp->mp_child_stime); 749*25d39513SDavid van Moolenbroek p->p_uctime_sec = tv.tv_sec; 750*25d39513SDavid van Moolenbroek p->p_uctime_usec = tv.tv_usec; 751*25d39513SDavid van Moolenbroek p->p_realflag = p->p_flag; 752*25d39513SDavid van Moolenbroek p->p_nlwps = (zombie) ? 0 : 1; 753*25d39513SDavid van Moolenbroek 754*25d39513SDavid van Moolenbroek p->p_stat = get_lwp_stat(mslot, &p->p_wchan, p->p_wmesg, 755*25d39513SDavid van Moolenbroek sizeof(p->p_wmesg), &p->p_flag); 756*25d39513SDavid van Moolenbroek 757*25d39513SDavid van Moolenbroek switch (p->p_stat) { 758*25d39513SDavid van Moolenbroek case LSRUN: 759*25d39513SDavid van Moolenbroek p->p_realstat = SACTIVE; 760*25d39513SDavid van Moolenbroek p->p_nrlwps = 1; 761*25d39513SDavid van Moolenbroek break; 762*25d39513SDavid van Moolenbroek case LSSLEEP: 763*25d39513SDavid van Moolenbroek p->p_realstat = SACTIVE; 764*25d39513SDavid van Moolenbroek if (p->p_flag & L_SINTR) 765*25d39513SDavid van Moolenbroek p->p_realflag |= P_SINTR; 766*25d39513SDavid van Moolenbroek break; 767*25d39513SDavid van Moolenbroek case LSSTOP: 768*25d39513SDavid van Moolenbroek p->p_realstat = SSTOP; 769*25d39513SDavid van Moolenbroek break; 770*25d39513SDavid van Moolenbroek case LSZOMB: 771*25d39513SDavid van Moolenbroek p->p_realstat = SZOMB; 772*25d39513SDavid van Moolenbroek break; 773*25d39513SDavid van Moolenbroek case LSDEAD: 774*25d39513SDavid van Moolenbroek p->p_stat = LSZOMB; /* ps(1) STAT does not know LSDEAD */ 775*25d39513SDavid van Moolenbroek p->p_realstat = SDEAD; 776*25d39513SDavid van Moolenbroek break; 777*25d39513SDavid van Moolenbroek default: 778*25d39513SDavid van Moolenbroek assert(0); 779*25d39513SDavid van Moolenbroek } 780*25d39513SDavid van Moolenbroek 781*25d39513SDavid van Moolenbroek if (!zombie) 782*25d39513SDavid van Moolenbroek fill_proc2_common(p, kslot); 783*25d39513SDavid van Moolenbroek } 784*25d39513SDavid van Moolenbroek 785*25d39513SDavid van Moolenbroek /* 786*25d39513SDavid van Moolenbroek * Implementation of CTL_KERN KERN_PROC2. 787*25d39513SDavid van Moolenbroek */ 788*25d39513SDavid van Moolenbroek ssize_t 789*25d39513SDavid van Moolenbroek mib_kern_proc2(struct mib_call * call, struct mib_node * node __unused, 790*25d39513SDavid van Moolenbroek struct mib_oldp * oldp, struct mib_newp * newp __unused) 791*25d39513SDavid van Moolenbroek { 792*25d39513SDavid van Moolenbroek struct kinfo_proc2 proc2; 793*25d39513SDavid van Moolenbroek struct mproc *mp; 794*25d39513SDavid van Moolenbroek size_t copysz; 795*25d39513SDavid van Moolenbroek ssize_t off; 796*25d39513SDavid van Moolenbroek dev_t tty; 797*25d39513SDavid van Moolenbroek int r, req, arg, elsz, elmax, kmatch, zombie, mslot; 798*25d39513SDavid van Moolenbroek 799*25d39513SDavid van Moolenbroek if (call->call_namelen != 4) 800*25d39513SDavid van Moolenbroek return EINVAL; 801*25d39513SDavid van Moolenbroek 802*25d39513SDavid van Moolenbroek req = call->call_name[0]; 803*25d39513SDavid van Moolenbroek arg = call->call_name[1]; 804*25d39513SDavid van Moolenbroek elsz = call->call_name[2]; 805*25d39513SDavid van Moolenbroek elmax = call->call_name[3]; /* redundant with the given oldlen.. */ 806*25d39513SDavid van Moolenbroek 807*25d39513SDavid van Moolenbroek /* 808*25d39513SDavid van Moolenbroek * The kernel is special, in that it does not have a slot in the PM or 809*25d39513SDavid van Moolenbroek * VFS tables. As such, it is dealt with separately. While checking 810*25d39513SDavid van Moolenbroek * arguments, we might as well check whether the kernel is matched. 811*25d39513SDavid van Moolenbroek */ 812*25d39513SDavid van Moolenbroek switch (req) { 813*25d39513SDavid van Moolenbroek case KERN_PROC_ALL: 814*25d39513SDavid van Moolenbroek kmatch = TRUE; 815*25d39513SDavid van Moolenbroek break; 816*25d39513SDavid van Moolenbroek case KERN_PROC_PID: 817*25d39513SDavid van Moolenbroek case KERN_PROC_SESSION: 818*25d39513SDavid van Moolenbroek case KERN_PROC_PGRP: 819*25d39513SDavid van Moolenbroek case KERN_PROC_UID: 820*25d39513SDavid van Moolenbroek case KERN_PROC_RUID: 821*25d39513SDavid van Moolenbroek case KERN_PROC_GID: 822*25d39513SDavid van Moolenbroek case KERN_PROC_RGID: 823*25d39513SDavid van Moolenbroek kmatch = (arg == 0); 824*25d39513SDavid van Moolenbroek break; 825*25d39513SDavid van Moolenbroek case KERN_PROC_TTY: 826*25d39513SDavid van Moolenbroek kmatch = ((dev_t)arg == KERN_PROC_TTY_NODEV); 827*25d39513SDavid van Moolenbroek break; 828*25d39513SDavid van Moolenbroek default: 829*25d39513SDavid van Moolenbroek return EINVAL; 830*25d39513SDavid van Moolenbroek } 831*25d39513SDavid van Moolenbroek 832*25d39513SDavid van Moolenbroek if (elsz <= 0 || elmax < 0) 833*25d39513SDavid van Moolenbroek return EINVAL; 834*25d39513SDavid van Moolenbroek 835*25d39513SDavid van Moolenbroek if (!update_tables()) 836*25d39513SDavid van Moolenbroek return EINVAL; 837*25d39513SDavid van Moolenbroek 838*25d39513SDavid van Moolenbroek off = 0; 839*25d39513SDavid van Moolenbroek copysz = MIN((size_t)elsz, sizeof(proc2)); 840*25d39513SDavid van Moolenbroek 841*25d39513SDavid van Moolenbroek if (kmatch) { 842*25d39513SDavid van Moolenbroek if (mib_inrange(oldp, off) && elmax > 0) { 843*25d39513SDavid van Moolenbroek fill_proc2_kern(&proc2); 844*25d39513SDavid van Moolenbroek if ((r = mib_copyout(oldp, off, &proc2, copysz)) < 0) 845*25d39513SDavid van Moolenbroek return r; 846*25d39513SDavid van Moolenbroek elmax--; 847*25d39513SDavid van Moolenbroek } 848*25d39513SDavid van Moolenbroek off += elsz; 849*25d39513SDavid van Moolenbroek } 850*25d39513SDavid van Moolenbroek 851*25d39513SDavid van Moolenbroek for (mslot = 0; mslot < NR_PROCS; mslot++) { 852*25d39513SDavid van Moolenbroek mp = &mproc_tab[mslot]; 853*25d39513SDavid van Moolenbroek 854*25d39513SDavid van Moolenbroek if (!(mp->mp_flags & IN_USE)) 855*25d39513SDavid van Moolenbroek continue; 856*25d39513SDavid van Moolenbroek 857*25d39513SDavid van Moolenbroek switch (req) { 858*25d39513SDavid van Moolenbroek case KERN_PROC_PID: 859*25d39513SDavid van Moolenbroek if ((pid_t)arg != mp->mp_pid) 860*25d39513SDavid van Moolenbroek continue; 861*25d39513SDavid van Moolenbroek break; 862*25d39513SDavid van Moolenbroek case KERN_PROC_SESSION: /* TODO: job control support */ 863*25d39513SDavid van Moolenbroek case KERN_PROC_PGRP: 864*25d39513SDavid van Moolenbroek if ((pid_t)arg != mp->mp_procgrp) 865*25d39513SDavid van Moolenbroek continue; 866*25d39513SDavid van Moolenbroek break; 867*25d39513SDavid van Moolenbroek case KERN_PROC_TTY: 868*25d39513SDavid van Moolenbroek if ((dev_t)arg == KERN_PROC_TTY_REVOKE) 869*25d39513SDavid van Moolenbroek continue; /* TODO: revoke(2) support */ 870*25d39513SDavid van Moolenbroek /* Do not access the fproc_tab slot of zombies. */ 871*25d39513SDavid van Moolenbroek zombie = (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)); 872*25d39513SDavid van Moolenbroek tty = (zombie) ? fproc_tab[mslot].fp_tty : NO_DEV; 873*25d39513SDavid van Moolenbroek if ((dev_t)arg == KERN_PROC_TTY_NODEV) { 874*25d39513SDavid van Moolenbroek if (tty != NO_DEV) 875*25d39513SDavid van Moolenbroek continue; 876*25d39513SDavid van Moolenbroek } else if ((dev_t)arg == NO_DEV || (dev_t)arg != tty) 877*25d39513SDavid van Moolenbroek continue; 878*25d39513SDavid van Moolenbroek break; 879*25d39513SDavid van Moolenbroek case KERN_PROC_UID: 880*25d39513SDavid van Moolenbroek if ((uid_t)arg != mp->mp_effuid) 881*25d39513SDavid van Moolenbroek continue; 882*25d39513SDavid van Moolenbroek break; 883*25d39513SDavid van Moolenbroek case KERN_PROC_RUID: 884*25d39513SDavid van Moolenbroek if ((uid_t)arg != mp->mp_realuid) 885*25d39513SDavid van Moolenbroek continue; 886*25d39513SDavid van Moolenbroek break; 887*25d39513SDavid van Moolenbroek case KERN_PROC_GID: 888*25d39513SDavid van Moolenbroek if ((gid_t)arg != mp->mp_effgid) 889*25d39513SDavid van Moolenbroek continue; 890*25d39513SDavid van Moolenbroek break; 891*25d39513SDavid van Moolenbroek case KERN_PROC_RGID: 892*25d39513SDavid van Moolenbroek if ((gid_t)arg != mp->mp_realgid) 893*25d39513SDavid van Moolenbroek continue; 894*25d39513SDavid van Moolenbroek break; 895*25d39513SDavid van Moolenbroek } 896*25d39513SDavid van Moolenbroek 897*25d39513SDavid van Moolenbroek if (mib_inrange(oldp, off) && elmax > 0) { 898*25d39513SDavid van Moolenbroek fill_proc2_user(&proc2, mslot); 899*25d39513SDavid van Moolenbroek if ((r = mib_copyout(oldp, off, &proc2, copysz)) < 0) 900*25d39513SDavid van Moolenbroek return r; 901*25d39513SDavid van Moolenbroek elmax--; 902*25d39513SDavid van Moolenbroek } 903*25d39513SDavid van Moolenbroek off += elsz; 904*25d39513SDavid van Moolenbroek } 905*25d39513SDavid van Moolenbroek 906*25d39513SDavid van Moolenbroek if (oldp == NULL && req != KERN_PROC_PID) 907*25d39513SDavid van Moolenbroek off += EXTRA_PROCS * elsz; 908*25d39513SDavid van Moolenbroek 909*25d39513SDavid van Moolenbroek return off; 910*25d39513SDavid van Moolenbroek } 911*25d39513SDavid van Moolenbroek 912*25d39513SDavid van Moolenbroek /* 913*25d39513SDavid van Moolenbroek * Implementation of CTL_KERN KERN_PROC_ARGS. 914*25d39513SDavid van Moolenbroek */ 915*25d39513SDavid van Moolenbroek ssize_t 916*25d39513SDavid van Moolenbroek mib_kern_proc_args(struct mib_call * call, struct mib_node * node __unused, 917*25d39513SDavid van Moolenbroek struct mib_oldp * oldp, struct mib_newp * newp __unused) 918*25d39513SDavid van Moolenbroek { 919*25d39513SDavid van Moolenbroek char vbuf[PAGE_SIZE], sbuf[PAGE_SIZE], obuf[PAGE_SIZE]; 920*25d39513SDavid van Moolenbroek struct ps_strings pss; 921*25d39513SDavid van Moolenbroek struct mproc *mp; 922*25d39513SDavid van Moolenbroek char *buf, *p, *q, *pptr; 923*25d39513SDavid van Moolenbroek vir_bytes vaddr, vpage, spage, paddr, ppage; 924*25d39513SDavid van Moolenbroek size_t max, off, olen, oleft, oldlen, bytes, pleft; 925*25d39513SDavid van Moolenbroek unsigned int copybudget; 926*25d39513SDavid van Moolenbroek pid_t pid; 927*25d39513SDavid van Moolenbroek int req, mslot, count, aborted, ended; 928*25d39513SDavid van Moolenbroek ssize_t r; 929*25d39513SDavid van Moolenbroek 930*25d39513SDavid van Moolenbroek if (call->call_namelen != 2) 931*25d39513SDavid van Moolenbroek return EINVAL; 932*25d39513SDavid van Moolenbroek 933*25d39513SDavid van Moolenbroek pid = call->call_name[0]; 934*25d39513SDavid van Moolenbroek req = call->call_name[1]; 935*25d39513SDavid van Moolenbroek 936*25d39513SDavid van Moolenbroek switch (req) { 937*25d39513SDavid van Moolenbroek case KERN_PROC_ARGV: 938*25d39513SDavid van Moolenbroek case KERN_PROC_ENV: 939*25d39513SDavid van Moolenbroek case KERN_PROC_NARGV: 940*25d39513SDavid van Moolenbroek case KERN_PROC_NENV: 941*25d39513SDavid van Moolenbroek break; 942*25d39513SDavid van Moolenbroek default: 943*25d39513SDavid van Moolenbroek return EOPNOTSUPP; 944*25d39513SDavid van Moolenbroek } 945*25d39513SDavid van Moolenbroek 946*25d39513SDavid van Moolenbroek if (!update_tables()) 947*25d39513SDavid van Moolenbroek return EINVAL; 948*25d39513SDavid van Moolenbroek 949*25d39513SDavid van Moolenbroek if ((mslot = get_mslot(pid)) == NO_SLOT) 950*25d39513SDavid van Moolenbroek return ESRCH; 951*25d39513SDavid van Moolenbroek mp = &mproc_tab[mslot]; 952*25d39513SDavid van Moolenbroek if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)) 953*25d39513SDavid van Moolenbroek return ESRCH; 954*25d39513SDavid van Moolenbroek 955*25d39513SDavid van Moolenbroek /* We can return the count field size without copying in any data. */ 956*25d39513SDavid van Moolenbroek if (oldp == NULL && (req == KERN_PROC_NARGV || req == KERN_PROC_NENV)) 957*25d39513SDavid van Moolenbroek return sizeof(count); 958*25d39513SDavid van Moolenbroek 959*25d39513SDavid van Moolenbroek if (sys_datacopy(mp->mp_endpoint, 960*25d39513SDavid van Moolenbroek mp->mp_frame_addr + mp->mp_frame_len - sizeof(pss), 961*25d39513SDavid van Moolenbroek SELF, (vir_bytes)&pss, sizeof(pss)) != OK) 962*25d39513SDavid van Moolenbroek return EINVAL; 963*25d39513SDavid van Moolenbroek 964*25d39513SDavid van Moolenbroek /* 965*25d39513SDavid van Moolenbroek * Determine the upper size limit of the requested data. Not only may 966*25d39513SDavid van Moolenbroek * the size never exceed ARG_MAX, it may also not exceed the frame 967*25d39513SDavid van Moolenbroek * length as given in its original exec call. In fact, the frame 968*25d39513SDavid van Moolenbroek * length should be substantially larger: all strings for both the 969*25d39513SDavid van Moolenbroek * arguments and the environment are in there, along with other stuff, 970*25d39513SDavid van Moolenbroek * and there must be no overlap between strings. It is possible that 971*25d39513SDavid van Moolenbroek * the application called setproctitle(3), in which case the ps_strings 972*25d39513SDavid van Moolenbroek * pointers refer to data outside the frame altogether. However, this 973*25d39513SDavid van Moolenbroek * data should not exceed 2048 bytes, and we cover this by rounding up 974*25d39513SDavid van Moolenbroek * the frame length to a multiple of the page size. Anyhow, NetBSD 975*25d39513SDavid van Moolenbroek * blindly returns ARG_MAX when asked for a size estimate, so with this 976*25d39513SDavid van Moolenbroek * maximum we are already quite a bit more accurate. 977*25d39513SDavid van Moolenbroek */ 978*25d39513SDavid van Moolenbroek max = roundup(MIN(mp->mp_frame_len, ARG_MAX), PAGE_SIZE); 979*25d39513SDavid van Moolenbroek 980*25d39513SDavid van Moolenbroek switch (req) { 981*25d39513SDavid van Moolenbroek case KERN_PROC_NARGV: 982*25d39513SDavid van Moolenbroek count = pss.ps_nargvstr; 983*25d39513SDavid van Moolenbroek return mib_copyout(oldp, 0, &count, sizeof(count)); 984*25d39513SDavid van Moolenbroek case KERN_PROC_NENV: 985*25d39513SDavid van Moolenbroek count = pss.ps_nenvstr; 986*25d39513SDavid van Moolenbroek return mib_copyout(oldp, 0, &count, sizeof(count)); 987*25d39513SDavid van Moolenbroek case KERN_PROC_ARGV: 988*25d39513SDavid van Moolenbroek if (oldp == NULL) 989*25d39513SDavid van Moolenbroek return max; 990*25d39513SDavid van Moolenbroek vaddr = (vir_bytes)pss.ps_argvstr; 991*25d39513SDavid van Moolenbroek count = pss.ps_nargvstr; 992*25d39513SDavid van Moolenbroek break; 993*25d39513SDavid van Moolenbroek case KERN_PROC_ENV: 994*25d39513SDavid van Moolenbroek if (oldp == NULL) 995*25d39513SDavid van Moolenbroek return max; 996*25d39513SDavid van Moolenbroek vaddr = (vir_bytes)pss.ps_envstr; 997*25d39513SDavid van Moolenbroek count = pss.ps_nenvstr; 998*25d39513SDavid van Moolenbroek break; 999*25d39513SDavid van Moolenbroek } 1000*25d39513SDavid van Moolenbroek 1001*25d39513SDavid van Moolenbroek /* 1002*25d39513SDavid van Moolenbroek * Go through the strings. Copy in entire, machine-aligned pages at 1003*25d39513SDavid van Moolenbroek * once, in the hope that all data is stored consecutively, which it 1004*25d39513SDavid van Moolenbroek * should be: we expect that the vector is followed by the strings, and 1005*25d39513SDavid van Moolenbroek * that the strings are stored in order of vector reference. We keep 1006*25d39513SDavid van Moolenbroek * up to two pages with copied-in data: one for the vector, and 1007*25d39513SDavid van Moolenbroek * optionally one for string data. In addition, we keep one page with 1008*25d39513SDavid van Moolenbroek * data to be copied out, so that we do not cause a lot of copy 1009*25d39513SDavid van Moolenbroek * overhead for short strings. 1010*25d39513SDavid van Moolenbroek * 1011*25d39513SDavid van Moolenbroek * We stop whenever any of the following conditions are met: 1012*25d39513SDavid van Moolenbroek * - copying in data from the target process fails for any reason; 1013*25d39513SDavid van Moolenbroek * - we have processed the last index ('count') into the vector; 1014*25d39513SDavid van Moolenbroek * - the current vector element is a NULL pointer; 1015*25d39513SDavid van Moolenbroek * - the requested number of output bytes ('oldlen') has been reached; 1016*25d39513SDavid van Moolenbroek * - the maximum number of output bytes ('max') has been reached; 1017*25d39513SDavid van Moolenbroek * - the number of page copy-ins exceeds an estimated threshold; 1018*25d39513SDavid van Moolenbroek * - copying out data fails for any reason (we then return the error). 1019*25d39513SDavid van Moolenbroek * 1020*25d39513SDavid van Moolenbroek * We limit the number of page copy-ins because otherwise a rogue 1021*25d39513SDavid van Moolenbroek * process could create an argument vector consisting of only two-byte 1022*25d39513SDavid van Moolenbroek * strings that all span two pages, causing us to copy up to 1GB of 1023*25d39513SDavid van Moolenbroek * data with the current ARG_MAX value of 256K. No reasonable vector 1024*25d39513SDavid van Moolenbroek * should cause more than (ARG_MAX / PAGE_SIZE) page copies for 1025*25d39513SDavid van Moolenbroek * strings; we are nice enough to allow twice that. Vector copies do 1026*25d39513SDavid van Moolenbroek * not count, as they are linear anyway. 1027*25d39513SDavid van Moolenbroek * 1028*25d39513SDavid van Moolenbroek * Unlike every other sysctl(2) call, we are supposed to truncate the 1029*25d39513SDavid van Moolenbroek * resulting size (the returned 'oldlen') to the requested size (the 1030*25d39513SDavid van Moolenbroek * given 'oldlen') *and* return the resulting size, rather than ENOMEM 1031*25d39513SDavid van Moolenbroek * and the real size. Unfortunately, libkvm actually relies on this. 1032*25d39513SDavid van Moolenbroek * 1033*25d39513SDavid van Moolenbroek * Generally speaking, upon failure we just return a truncated result. 1034*25d39513SDavid van Moolenbroek * In case of truncation, the data we copy out need not be null 1035*25d39513SDavid van Moolenbroek * terminated. It is up to userland to process the data correctly. 1036*25d39513SDavid van Moolenbroek */ 1037*25d39513SDavid van Moolenbroek if (trunc_page(vaddr) == 0 || vaddr % sizeof(char *) != 0) 1038*25d39513SDavid van Moolenbroek return 0; 1039*25d39513SDavid van Moolenbroek 1040*25d39513SDavid van Moolenbroek off = 0; 1041*25d39513SDavid van Moolenbroek olen = 0; 1042*25d39513SDavid van Moolenbroek aborted = FALSE; 1043*25d39513SDavid van Moolenbroek 1044*25d39513SDavid van Moolenbroek oldlen = mib_getoldlen(oldp); 1045*25d39513SDavid van Moolenbroek if (oldlen > max) 1046*25d39513SDavid van Moolenbroek oldlen = max; 1047*25d39513SDavid van Moolenbroek 1048*25d39513SDavid van Moolenbroek copybudget = (ARG_MAX / PAGE_SIZE) * 2; 1049*25d39513SDavid van Moolenbroek 1050*25d39513SDavid van Moolenbroek vpage = 0; 1051*25d39513SDavid van Moolenbroek spage = 0; 1052*25d39513SDavid van Moolenbroek 1053*25d39513SDavid van Moolenbroek while (count > 0 && off + olen < oldlen && !aborted) { 1054*25d39513SDavid van Moolenbroek /* 1055*25d39513SDavid van Moolenbroek * Start by fetching the page containing the current vector 1056*25d39513SDavid van Moolenbroek * element, if needed. We could limit the fetch to the vector 1057*25d39513SDavid van Moolenbroek * size, but our hope is that for the simple cases, the strings 1058*25d39513SDavid van Moolenbroek * are on the remainder of the same page, so we save a copy 1059*25d39513SDavid van Moolenbroek * call. TODO: since the strings should follow the vector, we 1060*25d39513SDavid van Moolenbroek * could start the copy at the base of the vector. 1061*25d39513SDavid van Moolenbroek */ 1062*25d39513SDavid van Moolenbroek if (trunc_page(vaddr) != vpage) { 1063*25d39513SDavid van Moolenbroek vpage = trunc_page(vaddr); 1064*25d39513SDavid van Moolenbroek if (sys_datacopy(mp->mp_endpoint, vpage, SELF, 1065*25d39513SDavid van Moolenbroek (vir_bytes)vbuf, PAGE_SIZE) != OK) 1066*25d39513SDavid van Moolenbroek break; 1067*25d39513SDavid van Moolenbroek } 1068*25d39513SDavid van Moolenbroek 1069*25d39513SDavid van Moolenbroek /* Get the current vector element, pointing to a string. */ 1070*25d39513SDavid van Moolenbroek memcpy(&pptr, &vbuf[vaddr - vpage], sizeof(pptr)); 1071*25d39513SDavid van Moolenbroek paddr = (vir_bytes)pptr; 1072*25d39513SDavid van Moolenbroek ppage = trunc_page(paddr); 1073*25d39513SDavid van Moolenbroek if (ppage == 0) 1074*25d39513SDavid van Moolenbroek break; 1075*25d39513SDavid van Moolenbroek 1076*25d39513SDavid van Moolenbroek /* Fetch the string itself, one page at a time at most. */ 1077*25d39513SDavid van Moolenbroek do { 1078*25d39513SDavid van Moolenbroek /* 1079*25d39513SDavid van Moolenbroek * See if the string pointer falls inside either the 1080*25d39513SDavid van Moolenbroek * vector page or the previously fetched string page 1081*25d39513SDavid van Moolenbroek * (if any). If not, fetch a string page. 1082*25d39513SDavid van Moolenbroek */ 1083*25d39513SDavid van Moolenbroek if (ppage == vpage) { 1084*25d39513SDavid van Moolenbroek buf = vbuf; 1085*25d39513SDavid van Moolenbroek } else if (ppage == spage) { 1086*25d39513SDavid van Moolenbroek buf = sbuf; 1087*25d39513SDavid van Moolenbroek } else { 1088*25d39513SDavid van Moolenbroek if (--copybudget == 0) { 1089*25d39513SDavid van Moolenbroek aborted = TRUE; 1090*25d39513SDavid van Moolenbroek break; 1091*25d39513SDavid van Moolenbroek } 1092*25d39513SDavid van Moolenbroek spage = ppage; 1093*25d39513SDavid van Moolenbroek if (sys_datacopy(mp->mp_endpoint, spage, SELF, 1094*25d39513SDavid van Moolenbroek (vir_bytes)sbuf, PAGE_SIZE) != OK) { 1095*25d39513SDavid van Moolenbroek aborted = TRUE; 1096*25d39513SDavid van Moolenbroek break; 1097*25d39513SDavid van Moolenbroek } 1098*25d39513SDavid van Moolenbroek buf = sbuf; 1099*25d39513SDavid van Moolenbroek } 1100*25d39513SDavid van Moolenbroek 1101*25d39513SDavid van Moolenbroek /* 1102*25d39513SDavid van Moolenbroek * We now have a string fragment in a buffer. See if 1103*25d39513SDavid van Moolenbroek * the string is null terminated. If not, all the data 1104*25d39513SDavid van Moolenbroek * up to the buffer end is part of the string, and the 1105*25d39513SDavid van Moolenbroek * string continues on the next page. 1106*25d39513SDavid van Moolenbroek */ 1107*25d39513SDavid van Moolenbroek p = &buf[paddr - ppage]; 1108*25d39513SDavid van Moolenbroek pleft = PAGE_SIZE - (paddr - ppage); 1109*25d39513SDavid van Moolenbroek assert(pleft > 0); 1110*25d39513SDavid van Moolenbroek 1111*25d39513SDavid van Moolenbroek if ((q = memchr(p, '\0', pleft)) != NULL) { 1112*25d39513SDavid van Moolenbroek bytes = (size_t)(q - p + 1); 1113*25d39513SDavid van Moolenbroek assert(bytes <= pleft); 1114*25d39513SDavid van Moolenbroek ended = TRUE; 1115*25d39513SDavid van Moolenbroek } else { 1116*25d39513SDavid van Moolenbroek bytes = pleft; 1117*25d39513SDavid van Moolenbroek ended = FALSE; 1118*25d39513SDavid van Moolenbroek } 1119*25d39513SDavid van Moolenbroek 1120*25d39513SDavid van Moolenbroek /* Limit the result to the requested length. */ 1121*25d39513SDavid van Moolenbroek if (off + olen + bytes > oldlen) 1122*25d39513SDavid van Moolenbroek bytes = oldlen - off - olen; 1123*25d39513SDavid van Moolenbroek 1124*25d39513SDavid van Moolenbroek /* 1125*25d39513SDavid van Moolenbroek * Add 'bytes' bytes from string pointer 'p' to the 1126*25d39513SDavid van Moolenbroek * output buffer, copying out its contents to userland 1127*25d39513SDavid van Moolenbroek * if it has filled up. 1128*25d39513SDavid van Moolenbroek */ 1129*25d39513SDavid van Moolenbroek if (olen + bytes > sizeof(obuf)) { 1130*25d39513SDavid van Moolenbroek oleft = sizeof(obuf) - olen; 1131*25d39513SDavid van Moolenbroek memcpy(&obuf[olen], p, oleft); 1132*25d39513SDavid van Moolenbroek 1133*25d39513SDavid van Moolenbroek if ((r = mib_copyout(oldp, off, obuf, 1134*25d39513SDavid van Moolenbroek sizeof(obuf))) < 0) 1135*25d39513SDavid van Moolenbroek return r; 1136*25d39513SDavid van Moolenbroek off += sizeof(obuf); 1137*25d39513SDavid van Moolenbroek olen = 0; 1138*25d39513SDavid van Moolenbroek 1139*25d39513SDavid van Moolenbroek p += oleft; 1140*25d39513SDavid van Moolenbroek bytes -= oleft; 1141*25d39513SDavid van Moolenbroek } 1142*25d39513SDavid van Moolenbroek if (bytes > 0) { 1143*25d39513SDavid van Moolenbroek memcpy(&obuf[olen], p, bytes); 1144*25d39513SDavid van Moolenbroek olen += bytes; 1145*25d39513SDavid van Moolenbroek } 1146*25d39513SDavid van Moolenbroek 1147*25d39513SDavid van Moolenbroek /* 1148*25d39513SDavid van Moolenbroek * Continue as long as we have not yet found the string 1149*25d39513SDavid van Moolenbroek * end, and we have not yet filled the output buffer. 1150*25d39513SDavid van Moolenbroek */ 1151*25d39513SDavid van Moolenbroek paddr += pleft; 1152*25d39513SDavid van Moolenbroek assert(trunc_page(paddr) == paddr); 1153*25d39513SDavid van Moolenbroek ppage = paddr; 1154*25d39513SDavid van Moolenbroek } while (!ended && off + olen < oldlen); 1155*25d39513SDavid van Moolenbroek 1156*25d39513SDavid van Moolenbroek vaddr += sizeof(char *); 1157*25d39513SDavid van Moolenbroek count--; 1158*25d39513SDavid van Moolenbroek } 1159*25d39513SDavid van Moolenbroek 1160*25d39513SDavid van Moolenbroek /* Copy out any remainder of the output buffer. */ 1161*25d39513SDavid van Moolenbroek if (olen > 0) { 1162*25d39513SDavid van Moolenbroek if ((r = mib_copyout(oldp, off, obuf, olen)) < 0) 1163*25d39513SDavid van Moolenbroek return r; 1164*25d39513SDavid van Moolenbroek off += olen; 1165*25d39513SDavid van Moolenbroek } 1166*25d39513SDavid van Moolenbroek 1167*25d39513SDavid van Moolenbroek assert(off <= oldlen); 1168*25d39513SDavid van Moolenbroek return off; 1169*25d39513SDavid van Moolenbroek } 1170