149594Sbostic /*- 249594Sbostic * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 349594Sbostic * All rights reserved. 423366Smckusick * 549594Sbostic * %sccs.include.redist.c% 649594Sbostic * 7*54791Storek * @(#)kern_clock.c 7.22 (Berkeley) 07/08/92 823366Smckusick */ 99Sbill 1017088Sbloom #include "param.h" 1117088Sbloom #include "systm.h" 1229946Skarels #include "dkstat.h" 1317088Sbloom #include "callout.h" 1417088Sbloom #include "kernel.h" 1517088Sbloom #include "proc.h" 1648979Skarels #include "resourcevar.h" 179Sbill 1847546Skarels #include "machine/cpu.h" 1935406Skarels 2010291Smckusick #ifdef GPROF 21*54791Storek #include "gmon.h" 22*54791Storek extern u_short *kcount; 2310291Smckusick #endif 2410291Smckusick 258124Sroot /* 268124Sroot * Clock handling routines. 278124Sroot * 28*54791Storek * This code is written to operate with two timers that run independently of 29*54791Storek * each other. The main clock, running hz times per second, is used to keep 30*54791Storek * track of real time. The second timer handles kernel and user profiling, 31*54791Storek * and does resource use estimation. If the second timer is programmable, 32*54791Storek * it is randomized to avoid aliasing between the two clocks. For example, 33*54791Storek * the randomization prevents an adversary from always giving up the cpu 34*54791Storek * just before its quantum expires. Otherwise, it would never accumulate 35*54791Storek * cpu ticks. The mean frequency of the second timer is stathz. 36*54791Storek * 37*54791Storek * If no second timer exists, stathz will be zero; in this case we drive 38*54791Storek * profiling and statistics off the main clock. This WILL NOT be accurate; 39*54791Storek * do not do it unless absolutely necessary. 40*54791Storek * 41*54791Storek * The statistics clock may (or may not) be run at a higher rate while 42*54791Storek * profiling. This profile clock runs at profhz. We require that profhz 43*54791Storek * be an integral multiple of stathz. 44*54791Storek * 45*54791Storek * If the statistics clock is running fast, it must be divided by the ratio 46*54791Storek * profhz/stathz for statistics. (For profiling, every tick counts.) 478124Sroot */ 481559Sbill 498124Sroot /* 508124Sroot * TODO: 5112747Ssam * allocate more timeout table slots when table overflows. 528124Sroot */ 5326265Skarels 5417007Smckusick /* 5517007Smckusick * Bump a timeval by a small number of usec's. 5617007Smckusick */ 5717007Smckusick #define BUMPTIME(t, usec) { \ 58*54791Storek register volatile struct timeval *tp = (t); \ 59*54791Storek register long us; \ 6017007Smckusick \ 61*54791Storek tp->tv_usec = us = tp->tv_usec + (usec); \ 62*54791Storek if (us >= 1000000) { \ 63*54791Storek tp->tv_usec = us - 1000000; \ 6417007Smckusick tp->tv_sec++; \ 6517007Smckusick } \ 6617007Smckusick } 6717007Smckusick 6854124Smckusick int stathz; 6953011Ssklower int profhz; 7054138Smckusick int profprocs; 71*54791Storek static int psratio, psdiv, pscnt; /* prof => stat divider */ 72*54791Storek 73*54791Storek volatile struct timeval time; 74*54791Storek volatile struct timeval mono_time; 75*54791Storek 768124Sroot /* 77*54791Storek * Initialize clock frequencies and start both clocks running. 788124Sroot */ 79*54791Storek void 80*54791Storek initclocks() 81*54791Storek { 82*54791Storek register int i; 83*54791Storek 84*54791Storek /* 85*54791Storek * Set divisors to 1 (normal case) and let the machine-specific 86*54791Storek * code do its bit. 87*54791Storek */ 88*54791Storek psdiv = pscnt = 1; 89*54791Storek cpu_initclocks(); 90*54791Storek 91*54791Storek /* 92*54791Storek * Compute profhz/stathz, and fix profhz if needed. 93*54791Storek */ 94*54791Storek i = stathz ? stathz : hz; 95*54791Storek if (profhz == 0) 96*54791Storek profhz = i; 97*54791Storek psratio = profhz / i; 98*54791Storek } 99*54791Storek 100*54791Storek /* 101*54791Storek * The real-time timer, interrupting hz times per second. 102*54791Storek */ 103*54791Storek void 10444774Swilliam hardclock(frame) 105*54791Storek register struct clockframe *frame; 1069Sbill { 1072768Swnj register struct callout *p1; 108*54791Storek register struct proc *p; 109*54791Storek register int needsoft; 11028947Skarels extern int tickdelta; 11128947Skarels extern long timedelta; 1129Sbill 1138124Sroot /* 1148124Sroot * Update real-time timeout queue. 1158124Sroot * At front of queue are some number of events which are ``due''. 1168124Sroot * The time to these is <= 0 and if negative represents the 1178124Sroot * number of ticks which have passed since it was supposed to happen. 1188124Sroot * The rest of the q elements (times > 0) are events yet to happen, 1198124Sroot * where the time for each is given as a delta from the previous. 1208124Sroot * Decrementing just the first of these serves to decrement the time 1218124Sroot * to all events. 1228124Sroot */ 123*54791Storek needsoft = 0; 124*54791Storek for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 12512747Ssam if (--p1->c_time > 0) 12612747Ssam break; 12716172Skarels needsoft = 1; 12812747Ssam if (p1->c_time == 0) 12912747Ssam break; 13012747Ssam } 131138Sbill 132*54791Storek p = curproc; 133*54791Storek if (p) { 134*54791Storek register struct pstats *pstats; 135*54791Storek 1368124Sroot /* 137*54791Storek * Run current process's virtual and profile time, as needed. 1388124Sroot */ 139*54791Storek pstats = p->p_stats; 140*54791Storek if (CLKF_USERMODE(frame) && 141*54791Storek timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 14247546Skarels itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 14340674Smarc psignal(p, SIGVTALRM); 14447546Skarels if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 14547546Skarels itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 14640674Smarc psignal(p, SIGPROF); 1479Sbill } 1488124Sroot 1498124Sroot /* 150*54791Storek * If no separate statistics clock is available, run it from here. 15111392Ssam */ 15254124Smckusick if (stathz == 0) 153*54791Storek statclock(frame); 15411392Ssam 15511392Ssam /* 156*54791Storek * Increment the time-of-day. 1578124Sroot */ 15853011Ssklower if (timedelta == 0) { 159*54791Storek BUMPTIME(&time, tick); 160*54791Storek BUMPTIME(&mono_time, tick); 16153011Ssklower } else { 162*54791Storek register int delta; 16317356Skarels 16428828Skarels if (timedelta < 0) { 16528828Skarels delta = tick - tickdelta; 16628828Skarels timedelta += tickdelta; 16717356Skarels } else { 16828828Skarels delta = tick + tickdelta; 16928828Skarels timedelta -= tickdelta; 17017356Skarels } 17117356Skarels BUMPTIME(&time, delta); 172*54791Storek BUMPTIME(&mono_time, delta); 17317356Skarels } 174*54791Storek 175*54791Storek /* 176*54791Storek * Process callouts at a very low cpu priority, so we don't keep the 177*54791Storek * relatively high clock interrupt priority any longer than necessary. 178*54791Storek */ 17916525Skarels if (needsoft) { 180*54791Storek if (CLKF_BASEPRI(frame)) { 18116525Skarels /* 18216525Skarels * Save the overhead of a software interrupt; 18316525Skarels * it will happen as soon as we return, so do it now. 18416525Skarels */ 185*54791Storek (void)splsoftclock(); 186*54791Storek softclock(); 18716525Skarels } else 18816525Skarels setsoftclock(); 18916525Skarels } 1902442Swnj } 1912442Swnj 1928124Sroot /* 193*54791Storek * Software (low priority) clock interrupt. 1948124Sroot * Run periodic events from timeout queue. 1958124Sroot */ 1962609Swnj /*ARGSUSED*/ 197*54791Storek void 198*54791Storek softclock() 1992442Swnj { 200*54791Storek register struct callout *c; 201*54791Storek register void *arg; 202*54791Storek register void (*func) __P((void *)); 203*54791Storek register int s; 2042442Swnj 205*54791Storek s = splhigh(); 206*54791Storek while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 207*54791Storek func = c->c_func; 208*54791Storek arg = c->c_arg; 209*54791Storek calltodo.c_next = c->c_next; 210*54791Storek c->c_next = callfree; 211*54791Storek callfree = c; 2129157Ssam splx(s); 213*54791Storek (*func)(arg); 214*54791Storek (void) splhigh(); 2152442Swnj } 216*54791Storek splx(s); 2179Sbill } 2189Sbill 2199Sbill /* 22047546Skarels * Arrange that (*func)(arg) is called in t/hz seconds. 22112747Ssam */ 222*54791Storek void 22347546Skarels timeout(func, arg, t) 224*54791Storek void (*func) __P((void *)); 225*54791Storek void *arg; 22612747Ssam register int t; 2279Sbill { 2283542Swnj register struct callout *p1, *p2, *pnew; 229*54791Storek register int s; 2309Sbill 231*54791Storek s = splhigh(); 23218282Smckusick if (t <= 0) 23312747Ssam t = 1; 2343542Swnj pnew = callfree; 2353542Swnj if (pnew == NULL) 2363542Swnj panic("timeout table overflow"); 2373542Swnj callfree = pnew->c_next; 2383542Swnj pnew->c_arg = arg; 23947546Skarels pnew->c_func = func; 2403542Swnj for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 2419742Ssam if (p2->c_time > 0) 2429742Ssam t -= p2->c_time; 2433542Swnj p1->c_next = pnew; 2443542Swnj pnew->c_next = p2; 2453542Swnj pnew->c_time = t; 2463542Swnj if (p2) 2473542Swnj p2->c_time -= t; 2489Sbill splx(s); 2499Sbill } 2507305Ssam 2517305Ssam /* 2527305Ssam * untimeout is called to remove a function timeout call 2537305Ssam * from the callout structure. 2547305Ssam */ 255*54791Storek void 25647546Skarels untimeout(func, arg) 257*54791Storek void (*func) __P((void *)); 258*54791Storek void *arg; 2597305Ssam { 2607305Ssam register struct callout *p1, *p2; 2617305Ssam register int s; 2627305Ssam 26326265Skarels s = splhigh(); 264*54791Storek for (p1 = &calltodo; (p2 = p1->c_next) != NULL; p1 = p2) { 26547546Skarels if (p2->c_func == func && p2->c_arg == arg) { 2668112Sroot if (p2->c_next && p2->c_time > 0) 2677305Ssam p2->c_next->c_time += p2->c_time; 2687305Ssam p1->c_next = p2->c_next; 2697305Ssam p2->c_next = callfree; 2707305Ssam callfree = p2; 2717305Ssam break; 2727305Ssam } 2737305Ssam } 2747305Ssam splx(s); 2757305Ssam } 2768112Sroot 2778124Sroot /* 2788124Sroot * Compute number of hz until specified time. 2798124Sroot * Used to compute third argument to timeout() from an 2808124Sroot * absolute time. 2818124Sroot */ 282*54791Storek int 2838112Sroot hzto(tv) 2848112Sroot struct timeval *tv; 2858112Sroot { 286*54791Storek register long ticks, sec; 287*54791Storek int s; 2888112Sroot 2898124Sroot /* 2908124Sroot * If number of milliseconds will fit in 32 bit arithmetic, 2918124Sroot * then compute number of milliseconds to time and scale to 2928124Sroot * ticks. Otherwise just compute number of hz in time, rounding 2938124Sroot * times greater than representible to maximum value. 2948124Sroot * 2958124Sroot * Delta times less than 25 days can be computed ``exactly''. 2968124Sroot * Maximum value for any timeout in 10ms ticks is 250 days. 2978124Sroot */ 298*54791Storek s = splhigh(); 2998124Sroot sec = tv->tv_sec - time.tv_sec; 3008124Sroot if (sec <= 0x7fffffff / 1000 - 1000) 3018124Sroot ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 3028124Sroot (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 3038124Sroot else if (sec <= 0x7fffffff / hz) 3048124Sroot ticks = sec * hz; 3058124Sroot else 3068124Sroot ticks = 0x7fffffff; 3078112Sroot splx(s); 3088112Sroot return (ticks); 3098112Sroot } 31052668Smckusick 31152668Smckusick /* 312*54791Storek * Start profiling on a process. 313*54791Storek * 314*54791Storek * Kernel profiling passes proc0 which never exits and hence 315*54791Storek * keeps the profile clock running constantly. 316*54791Storek */ 317*54791Storek void 318*54791Storek startprofclock(p) 319*54791Storek register struct proc *p; 320*54791Storek { 321*54791Storek int s; 322*54791Storek 323*54791Storek if ((p->p_flag & SPROFIL) == 0) { 324*54791Storek p->p_flag |= SPROFIL; 325*54791Storek if (++profprocs == 1 && stathz != 0) { 326*54791Storek s = splstatclock(); 327*54791Storek psdiv = pscnt = psratio; 328*54791Storek setstatclockrate(profhz); 329*54791Storek splx(s); 330*54791Storek } 331*54791Storek } 332*54791Storek } 333*54791Storek 334*54791Storek /* 335*54791Storek * Stop profiling on a process. 336*54791Storek */ 337*54791Storek void 338*54791Storek stopprofclock(p) 339*54791Storek register struct proc *p; 340*54791Storek { 341*54791Storek int s; 342*54791Storek 343*54791Storek if (p->p_flag & SPROFIL) { 344*54791Storek p->p_flag &= ~SPROFIL; 345*54791Storek if (--profprocs == 0 && stathz != 0) { 346*54791Storek s = splstatclock(); 347*54791Storek psdiv = pscnt = 1; 348*54791Storek setstatclockrate(stathz); 349*54791Storek splx(s); 350*54791Storek } 351*54791Storek } 352*54791Storek } 353*54791Storek 354*54791Storek int dk_ndrive = DK_NDRIVE; 355*54791Storek 356*54791Storek /* 357*54791Storek * Statistics clock. Grab profile sample, and if divider reaches 0, 358*54791Storek * do process and kernel statistics. 359*54791Storek */ 360*54791Storek void 361*54791Storek statclock(frame) 362*54791Storek register struct clockframe *frame; 363*54791Storek { 364*54791Storek #ifdef GPROF 365*54791Storek register struct gmonparam *g; 366*54791Storek #endif 367*54791Storek register struct proc *p; 368*54791Storek register int i; 369*54791Storek 370*54791Storek if (CLKF_USERMODE(frame)) { 371*54791Storek p = curproc; 372*54791Storek if (p->p_flag & SPROFIL) 373*54791Storek addupc_intr(p, CLKF_PC(frame), 1); 374*54791Storek if (--pscnt > 0) 375*54791Storek return; 376*54791Storek /* 377*54791Storek * Came from user mode; CPU was in user state. 378*54791Storek * If this process is being profiled record the tick. 379*54791Storek */ 380*54791Storek p->p_uticks++; 381*54791Storek if (p->p_nice > NZERO) 382*54791Storek cp_time[CP_NICE]++; 383*54791Storek else 384*54791Storek cp_time[CP_USER]++; 385*54791Storek } else { 386*54791Storek #ifdef GPROF 387*54791Storek /* 388*54791Storek * Kernel statistics are just like addupc_intr, only easier. 389*54791Storek */ 390*54791Storek g = &_gmonparam; 391*54791Storek if (g->state == GMON_PROF_ON) { 392*54791Storek i = CLKF_PC(frame) - g->lowpc; 393*54791Storek if (i < g->textsize) 394*54791Storek kcount[s / (HISTFRACTION * sizeof(*kcount))]++; 395*54791Storek } 396*54791Storek #endif 397*54791Storek if (--pscnt > 0) 398*54791Storek return; 399*54791Storek /* 400*54791Storek * Came from kernel mode, so we were: 401*54791Storek * - handling an interrupt, 402*54791Storek * - doing syscall or trap work on behalf of the current 403*54791Storek * user process, or 404*54791Storek * - spinning in the idle loop. 405*54791Storek * Whichever it is, charge the time as appropriate. 406*54791Storek * Note that we charge interrupts to the current process, 407*54791Storek * regardless of whether they are ``for'' that process, 408*54791Storek * so that we know how much of its real time was spent 409*54791Storek * in ``non-process'' (i.e., interrupt) work. 410*54791Storek */ 411*54791Storek p = curproc; 412*54791Storek if (CLKF_INTR(frame)) { 413*54791Storek if (p != NULL) 414*54791Storek p->p_iticks++; 415*54791Storek cp_time[CP_INTR]++; 416*54791Storek } else if (p != NULL) { 417*54791Storek p->p_sticks++; 418*54791Storek cp_time[CP_SYS]++; 419*54791Storek } else 420*54791Storek cp_time[CP_IDLE]++; 421*54791Storek } 422*54791Storek pscnt = psdiv; 423*54791Storek 424*54791Storek /* 425*54791Storek * We maintain statistics shown by user-level statistics 426*54791Storek * programs: the amount of time in each cpu state, and 427*54791Storek * the amount of time each of DK_NDRIVE ``drives'' is busy. 428*54791Storek * 429*54791Storek * XXX should either run linked list of drives, or (better) 430*54791Storek * grab timestamps in the start & done code. 431*54791Storek */ 432*54791Storek for (i = 0; i < DK_NDRIVE; i++) 433*54791Storek if (dk_busy & (1 << i)) 434*54791Storek dk_time[i]++; 435*54791Storek 436*54791Storek /* 437*54791Storek * We adjust the priority of the current process. 438*54791Storek * The priority of a process gets worse as it accumulates 439*54791Storek * CPU time. The cpu usage estimator (p_cpu) is increased here 440*54791Storek * and the formula for computing priorities (in kern_synch.c) 441*54791Storek * will compute a different value each time the p_cpu increases 442*54791Storek * by 4. The cpu usage estimator ramps up quite quickly when 443*54791Storek * the process is running (linearly), and decays away 444*54791Storek * exponentially, at a rate which is proportionally slower 445*54791Storek * when the system is busy. The basic principal is that the 446*54791Storek * system will 90% forget that a process used a lot of CPU 447*54791Storek * time in 5*loadav seconds. This causes the system to favor 448*54791Storek * processes which haven't run much recently, and to 449*54791Storek * round-robin among other processes. 450*54791Storek */ 451*54791Storek if (p != NULL) { 452*54791Storek p->p_cpticks++; 453*54791Storek if (++p->p_cpu == 0) 454*54791Storek p->p_cpu--; 455*54791Storek if ((p->p_cpu & 3) == 0) { 456*54791Storek setpri(p); 457*54791Storek if (p->p_pri >= PUSER) 458*54791Storek p->p_pri = p->p_usrpri; 459*54791Storek } 460*54791Storek } 461*54791Storek } 462*54791Storek 463*54791Storek /* 46452668Smckusick * Return information about system clocks. 46552668Smckusick */ 46652668Smckusick /* ARGSUSED */ 46752668Smckusick kinfo_clockrate(op, where, acopysize, arg, aneeded) 46852668Smckusick int op; 46952668Smckusick register char *where; 47052668Smckusick int *acopysize, arg, *aneeded; 47152668Smckusick { 47252668Smckusick int buflen, error; 47352668Smckusick struct clockinfo clockinfo; 47452668Smckusick 47552668Smckusick *aneeded = sizeof(clockinfo); 47652668Smckusick if (where == NULL) 47752668Smckusick return (0); 47852668Smckusick /* 47952668Smckusick * Check for enough buffering. 48052668Smckusick */ 48152668Smckusick buflen = *acopysize; 48252668Smckusick if (buflen < sizeof(clockinfo)) { 48352668Smckusick *acopysize = 0; 48452668Smckusick return (0); 48552668Smckusick } 48652668Smckusick /* 48752668Smckusick * Copyout clockinfo structure. 48852668Smckusick */ 48952668Smckusick clockinfo.hz = hz; 49052668Smckusick clockinfo.tick = tick; 49152668Smckusick clockinfo.profhz = profhz; 492*54791Storek clockinfo.stathz = stathz ? stathz : hz; 49352668Smckusick if (error = copyout((caddr_t)&clockinfo, where, sizeof(clockinfo))) 49452668Smckusick return (error); 49552668Smckusick *acopysize = sizeof(clockinfo); 49652668Smckusick return (0); 49752668Smckusick } 498