1 /* $OpenBSD: kern_clock.c,v 1.100 2019/11/02 16:56:17 cheloha Exp $ */ 2 /* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/timeout.h> 43 #include <sys/kernel.h> 44 #include <sys/limits.h> 45 #include <sys/proc.h> 46 #include <sys/user.h> 47 #include <sys/resourcevar.h> 48 #include <sys/signalvar.h> 49 #include <sys/sysctl.h> 50 #include <sys/sched.h> 51 #include <sys/timetc.h> 52 53 54 #if defined(GPROF) || defined(DDBPROF) 55 #include <sys/gmon.h> 56 #endif 57 58 /* 59 * Clock handling routines. 60 * 61 * This code is written to operate with two timers that run independently of 62 * each other. The main clock, running hz times per second, is used to keep 63 * track of real time. The second timer handles kernel and user profiling, 64 * and does resource use estimation. If the second timer is programmable, 65 * it is randomized to avoid aliasing between the two clocks. For example, 66 * the randomization prevents an adversary from always giving up the cpu 67 * just before its quantum expires. Otherwise, it would never accumulate 68 * cpu ticks. The mean frequency of the second timer is stathz. 69 * 70 * If no second timer exists, stathz will be zero; in this case we drive 71 * profiling and statistics off the main clock. This WILL NOT be accurate; 72 * do not do it unless absolutely necessary. 73 * 74 * The statistics clock may (or may not) be run at a higher rate while 75 * profiling. This profile clock runs at profhz. We require that profhz 76 * be an integral multiple of stathz. 77 * 78 * If the statistics clock is running fast, it must be divided by the ratio 79 * profhz/stathz for statistics. (For profiling, every tick counts.) 80 */ 81 82 int stathz; 83 int schedhz; 84 int profhz; 85 int profprocs; 86 int ticks; 87 static int psdiv, pscnt; /* prof => stat divider */ 88 int psratio; /* ratio: prof / stat */ 89 90 volatile unsigned long jiffies; /* XXX Linux API for drm(4) */ 91 92 /* 93 * Initialize clock frequencies and start both clocks running. 94 */ 95 void 96 initclocks(void) 97 { 98 int i; 99 100 ticks = INT_MAX - (15 * 60 * hz); 101 jiffies = ULONG_MAX - (10 * 60 * hz); 102 103 /* 104 * Set divisors to 1 (normal case) and let the machine-specific 105 * code do its bit. 106 */ 107 psdiv = pscnt = 1; 108 cpu_initclocks(); 109 110 /* 111 * Compute profhz/stathz, and fix profhz if needed. 112 */ 113 i = stathz ? stathz : hz; 114 if (profhz == 0) 115 profhz = i; 116 psratio = profhz / i; 117 118 /* For very large HZ, ensure that division by 0 does not occur later */ 119 if (tickadj == 0) 120 tickadj = 1; 121 122 inittimecounter(); 123 } 124 125 /* 126 * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL. 127 * We don't want to send signals with psignal from hardclock because it makes 128 * MULTIPROCESSOR locking very complicated. Instead, to use an idea from 129 * FreeBSD, we set a flag on the thread and when it goes to return to 130 * userspace it signals itself. 131 */ 132 133 /* 134 * The real-time timer, interrupting hz times per second. 135 */ 136 void 137 hardclock(struct clockframe *frame) 138 { 139 struct proc *p; 140 struct cpu_info *ci = curcpu(); 141 142 p = curproc; 143 if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) { 144 struct process *pr = p->p_p; 145 146 /* 147 * Run current process's virtual and profile time, as needed. 148 */ 149 if (CLKF_USERMODE(frame) && 150 timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) && 151 itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], tick_nsec) == 0) { 152 atomic_setbits_int(&p->p_flag, P_ALRMPEND); 153 need_proftick(p); 154 } 155 if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) && 156 itimerdecr(&pr->ps_timer[ITIMER_PROF], tick_nsec) == 0) { 157 atomic_setbits_int(&p->p_flag, P_PROFPEND); 158 need_proftick(p); 159 } 160 } 161 162 /* 163 * If no separate statistics clock is available, run it from here. 164 */ 165 if (stathz == 0) 166 statclock(frame); 167 168 if (--ci->ci_schedstate.spc_rrticks <= 0) 169 roundrobin(ci); 170 171 /* 172 * If we are not the primary CPU, we're not allowed to do 173 * any more work. 174 */ 175 if (CPU_IS_PRIMARY(ci) == 0) 176 return; 177 178 tc_ticktock(); 179 ticks++; 180 jiffies++; 181 182 /* 183 * Update the timeout wheel. 184 */ 185 timeout_hardclock_update(); 186 } 187 188 /* 189 * Compute number of hz in the specified amount of time. 190 */ 191 int 192 tvtohz(const struct timeval *tv) 193 { 194 unsigned long nticks; 195 time_t sec; 196 long usec; 197 198 /* 199 * If the number of usecs in the whole seconds part of the time 200 * fits in a long, then the total number of usecs will 201 * fit in an unsigned long. Compute the total and convert it to 202 * ticks, rounding up and adding 1 to allow for the current tick 203 * to expire. Rounding also depends on unsigned long arithmetic 204 * to avoid overflow. 205 * 206 * Otherwise, if the number of ticks in the whole seconds part of 207 * the time fits in a long, then convert the parts to 208 * ticks separately and add, using similar rounding methods and 209 * overflow avoidance. This method would work in the previous 210 * case but it is slightly slower and assumes that hz is integral. 211 * 212 * Otherwise, round the time down to the maximum 213 * representable value. 214 * 215 * If ints have 32 bits, then the maximum value for any timeout in 216 * 10ms ticks is 248 days. 217 */ 218 sec = tv->tv_sec; 219 usec = tv->tv_usec; 220 if (sec < 0 || (sec == 0 && usec <= 0)) 221 nticks = 0; 222 else if (sec <= LONG_MAX / 1000000) 223 nticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 224 / tick + 1; 225 else if (sec <= LONG_MAX / hz) 226 nticks = sec * hz 227 + ((unsigned long)usec + (tick - 1)) / tick + 1; 228 else 229 nticks = LONG_MAX; 230 if (nticks > INT_MAX) 231 nticks = INT_MAX; 232 return ((int)nticks); 233 } 234 235 int 236 tstohz(const struct timespec *ts) 237 { 238 struct timeval tv; 239 TIMESPEC_TO_TIMEVAL(&tv, ts); 240 241 /* Round up. */ 242 if ((ts->tv_nsec % 1000) != 0) { 243 tv.tv_usec += 1; 244 if (tv.tv_usec >= 1000000) { 245 tv.tv_usec -= 1000000; 246 tv.tv_sec += 1; 247 } 248 } 249 250 return (tvtohz(&tv)); 251 } 252 253 /* 254 * Start profiling on a process. 255 * 256 * Kernel profiling passes proc0 which never exits and hence 257 * keeps the profile clock running constantly. 258 */ 259 void 260 startprofclock(struct process *pr) 261 { 262 int s; 263 264 if ((pr->ps_flags & PS_PROFIL) == 0) { 265 atomic_setbits_int(&pr->ps_flags, PS_PROFIL); 266 if (++profprocs == 1 && stathz != 0) { 267 s = splstatclock(); 268 psdiv = pscnt = psratio; 269 setstatclockrate(profhz); 270 splx(s); 271 } 272 } 273 } 274 275 /* 276 * Stop profiling on a process. 277 */ 278 void 279 stopprofclock(struct process *pr) 280 { 281 int s; 282 283 if (pr->ps_flags & PS_PROFIL) { 284 atomic_clearbits_int(&pr->ps_flags, PS_PROFIL); 285 if (--profprocs == 0 && stathz != 0) { 286 s = splstatclock(); 287 psdiv = pscnt = 1; 288 setstatclockrate(stathz); 289 splx(s); 290 } 291 } 292 } 293 294 /* 295 * Statistics clock. Grab profile sample, and if divider reaches 0, 296 * do process and kernel statistics. 297 */ 298 void 299 statclock(struct clockframe *frame) 300 { 301 #if defined(GPROF) || defined(DDBPROF) 302 struct gmonparam *g; 303 u_long i; 304 #endif 305 struct cpu_info *ci = curcpu(); 306 struct schedstate_percpu *spc = &ci->ci_schedstate; 307 struct proc *p = curproc; 308 struct process *pr; 309 310 /* 311 * Notice changes in divisor frequency, and adjust clock 312 * frequency accordingly. 313 */ 314 if (spc->spc_psdiv != psdiv) { 315 spc->spc_psdiv = psdiv; 316 spc->spc_pscnt = psdiv; 317 if (psdiv == 1) { 318 setstatclockrate(stathz); 319 } else { 320 setstatclockrate(profhz); 321 } 322 } 323 324 if (CLKF_USERMODE(frame)) { 325 pr = p->p_p; 326 if (pr->ps_flags & PS_PROFIL) 327 addupc_intr(p, CLKF_PC(frame)); 328 if (--spc->spc_pscnt > 0) 329 return; 330 /* 331 * Came from user mode; CPU was in user state. 332 * If this process is being profiled record the tick. 333 */ 334 p->p_uticks++; 335 if (pr->ps_nice > NZERO) 336 spc->spc_cp_time[CP_NICE]++; 337 else 338 spc->spc_cp_time[CP_USER]++; 339 } else { 340 #if defined(GPROF) || defined(DDBPROF) 341 /* 342 * Kernel statistics are just like addupc_intr, only easier. 343 */ 344 g = ci->ci_gmon; 345 if (g != NULL && g->state == GMON_PROF_ON) { 346 i = CLKF_PC(frame) - g->lowpc; 347 if (i < g->textsize) { 348 i /= HISTFRACTION * sizeof(*g->kcount); 349 g->kcount[i]++; 350 } 351 } 352 #endif 353 #if defined(PROC_PC) 354 if (p != NULL && p->p_p->ps_flags & PS_PROFIL) 355 addupc_intr(p, PROC_PC(p)); 356 #endif 357 if (--spc->spc_pscnt > 0) 358 return; 359 /* 360 * Came from kernel mode, so we were: 361 * - spinning on a lock 362 * - handling an interrupt, 363 * - doing syscall or trap work on behalf of the current 364 * user process, or 365 * - spinning in the idle loop. 366 * Whichever it is, charge the time as appropriate. 367 * Note that we charge interrupts to the current process, 368 * regardless of whether they are ``for'' that process, 369 * so that we know how much of its real time was spent 370 * in ``non-process'' (i.e., interrupt) work. 371 */ 372 if (CLKF_INTR(frame)) { 373 if (p != NULL) 374 p->p_iticks++; 375 spc->spc_cp_time[spc->spc_spinning ? 376 CP_SPIN : CP_INTR]++; 377 } else if (p != NULL && p != spc->spc_idleproc) { 378 p->p_sticks++; 379 spc->spc_cp_time[spc->spc_spinning ? 380 CP_SPIN : CP_SYS]++; 381 } else 382 spc->spc_cp_time[spc->spc_spinning ? 383 CP_SPIN : CP_IDLE]++; 384 } 385 spc->spc_pscnt = psdiv; 386 387 if (p != NULL) { 388 p->p_cpticks++; 389 /* 390 * If no schedclock is provided, call it here at ~~12-25 Hz; 391 * ~~16 Hz is best 392 */ 393 if (schedhz == 0) { 394 if ((++spc->spc_schedticks & 3) == 0) 395 schedclock(p); 396 } 397 } 398 } 399 400 /* 401 * Return information about system clocks. 402 */ 403 int 404 sysctl_clockrate(char *where, size_t *sizep, void *newp) 405 { 406 struct clockinfo clkinfo; 407 408 /* 409 * Construct clockinfo structure. 410 */ 411 memset(&clkinfo, 0, sizeof clkinfo); 412 clkinfo.tick = tick; 413 clkinfo.tickadj = tickadj; 414 clkinfo.hz = hz; 415 clkinfo.profhz = profhz; 416 clkinfo.stathz = stathz ? stathz : hz; 417 return (sysctl_rdstruct(where, sizep, newp, &clkinfo, sizeof(clkinfo))); 418 } 419