1 /* $OpenBSD: kern_clock.c,v 1.107 2023/03/03 20:16:44 cheloha Exp $ */ 2 /* $NetBSD: kern_clock.c,v 1.34 1996/06/09 04:51:03 briggs Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/timeout.h> 43 #include <sys/kernel.h> 44 #include <sys/limits.h> 45 #include <sys/proc.h> 46 #include <sys/user.h> 47 #include <sys/resourcevar.h> 48 #include <sys/sysctl.h> 49 #include <sys/sched.h> 50 #include <sys/timetc.h> 51 52 #if defined(GPROF) || defined(DDBPROF) 53 #include <sys/gmon.h> 54 #endif 55 56 #include "dt.h" 57 #if NDT > 0 58 #include <dev/dt/dtvar.h> 59 #endif 60 61 /* 62 * Clock handling routines. 63 * 64 * This code is written to operate with two timers that run independently of 65 * each other. The main clock, running hz times per second, is used to keep 66 * track of real time. The second timer handles kernel and user profiling, 67 * and does resource use estimation. If the second timer is programmable, 68 * it is randomized to avoid aliasing between the two clocks. For example, 69 * the randomization prevents an adversary from always giving up the cpu 70 * just before its quantum expires. Otherwise, it would never accumulate 71 * cpu ticks. The mean frequency of the second timer is stathz. 72 * 73 * If no second timer exists, stathz will be zero; in this case we drive 74 * profiling and statistics off the main clock. This WILL NOT be accurate; 75 * do not do it unless absolutely necessary. 76 * 77 * The statistics clock may (or may not) be run at a higher rate while 78 * profiling. This profile clock runs at profhz. We require that profhz 79 * be an integral multiple of stathz. 80 * 81 * If the statistics clock is running fast, it must be divided by the ratio 82 * profhz/stathz for statistics. (For profiling, every tick counts.) 83 */ 84 85 int stathz; 86 int schedhz; 87 int profhz; 88 int profprocs; 89 int ticks = INT_MAX - (15 * 60 * HZ); 90 static int psdiv, pscnt; /* prof => stat divider */ 91 int psratio; /* ratio: prof / stat */ 92 93 volatile unsigned long jiffies = ULONG_MAX - (10 * 60 * HZ); 94 95 /* 96 * Initialize clock frequencies and start both clocks running. 97 */ 98 void 99 initclocks(void) 100 { 101 /* 102 * Set divisors to 1 (normal case) and let the machine-specific 103 * code do its bit. 104 */ 105 psdiv = pscnt = 1; 106 cpu_initclocks(); 107 108 /* 109 * Compute profhz/stathz. 110 */ 111 psratio = profhz / stathz; 112 113 inittimecounter(); 114 } 115 116 /* 117 * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL. 118 * We don't want to send signals with psignal from hardclock because it makes 119 * MULTIPROCESSOR locking very complicated. Instead, to use an idea from 120 * FreeBSD, we set a flag on the thread and when it goes to return to 121 * userspace it signals itself. 122 */ 123 124 /* 125 * The real-time timer, interrupting hz times per second. 126 */ 127 void 128 hardclock(struct clockframe *frame) 129 { 130 struct proc *p; 131 struct cpu_info *ci = curcpu(); 132 133 p = curproc; 134 if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) { 135 struct process *pr = p->p_p; 136 137 /* 138 * Run current process's virtual and profile time, as needed. 139 */ 140 if (CLKF_USERMODE(frame) && 141 timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) && 142 itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], tick_nsec) == 0) { 143 atomic_setbits_int(&p->p_flag, P_ALRMPEND); 144 need_proftick(p); 145 } 146 if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) && 147 itimerdecr(&pr->ps_timer[ITIMER_PROF], tick_nsec) == 0) { 148 atomic_setbits_int(&p->p_flag, P_PROFPEND); 149 need_proftick(p); 150 } 151 } 152 153 if (--ci->ci_schedstate.spc_rrticks <= 0) 154 roundrobin(ci); 155 156 #if NDT > 0 157 DT_ENTER(profile, NULL); 158 if (CPU_IS_PRIMARY(ci)) 159 DT_ENTER(interval, NULL); 160 #endif 161 162 /* 163 * If we are not the primary CPU, we're not allowed to do 164 * any more work. 165 */ 166 if (CPU_IS_PRIMARY(ci) == 0) 167 return; 168 169 tc_ticktock(); 170 ticks++; 171 jiffies++; 172 173 /* 174 * Update the timeout wheel. 175 */ 176 timeout_hardclock_update(); 177 } 178 179 /* 180 * Compute number of hz in the specified amount of time. 181 */ 182 int 183 tvtohz(const struct timeval *tv) 184 { 185 unsigned long nticks; 186 time_t sec; 187 long usec; 188 189 /* 190 * If the number of usecs in the whole seconds part of the time 191 * fits in a long, then the total number of usecs will 192 * fit in an unsigned long. Compute the total and convert it to 193 * ticks, rounding up and adding 1 to allow for the current tick 194 * to expire. Rounding also depends on unsigned long arithmetic 195 * to avoid overflow. 196 * 197 * Otherwise, if the number of ticks in the whole seconds part of 198 * the time fits in a long, then convert the parts to 199 * ticks separately and add, using similar rounding methods and 200 * overflow avoidance. This method would work in the previous 201 * case but it is slightly slower and assumes that hz is integral. 202 * 203 * Otherwise, round the time down to the maximum 204 * representable value. 205 * 206 * If ints have 32 bits, then the maximum value for any timeout in 207 * 10ms ticks is 248 days. 208 */ 209 sec = tv->tv_sec; 210 usec = tv->tv_usec; 211 if (sec < 0 || (sec == 0 && usec <= 0)) 212 nticks = 0; 213 else if (sec <= LONG_MAX / 1000000) 214 nticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 215 / tick + 1; 216 else if (sec <= LONG_MAX / hz) 217 nticks = sec * hz 218 + ((unsigned long)usec + (tick - 1)) / tick + 1; 219 else 220 nticks = LONG_MAX; 221 if (nticks > INT_MAX) 222 nticks = INT_MAX; 223 return ((int)nticks); 224 } 225 226 int 227 tstohz(const struct timespec *ts) 228 { 229 struct timeval tv; 230 TIMESPEC_TO_TIMEVAL(&tv, ts); 231 232 /* Round up. */ 233 if ((ts->tv_nsec % 1000) != 0) { 234 tv.tv_usec += 1; 235 if (tv.tv_usec >= 1000000) { 236 tv.tv_usec -= 1000000; 237 tv.tv_sec += 1; 238 } 239 } 240 241 return (tvtohz(&tv)); 242 } 243 244 /* 245 * Start profiling on a process. 246 * 247 * Kernel profiling passes proc0 which never exits and hence 248 * keeps the profile clock running constantly. 249 */ 250 void 251 startprofclock(struct process *pr) 252 { 253 int s; 254 255 if ((pr->ps_flags & PS_PROFIL) == 0) { 256 atomic_setbits_int(&pr->ps_flags, PS_PROFIL); 257 if (++profprocs == 1) { 258 s = splstatclock(); 259 psdiv = pscnt = psratio; 260 setstatclockrate(profhz); 261 splx(s); 262 } 263 } 264 } 265 266 /* 267 * Stop profiling on a process. 268 */ 269 void 270 stopprofclock(struct process *pr) 271 { 272 int s; 273 274 if (pr->ps_flags & PS_PROFIL) { 275 atomic_clearbits_int(&pr->ps_flags, PS_PROFIL); 276 if (--profprocs == 0) { 277 s = splstatclock(); 278 psdiv = pscnt = 1; 279 setstatclockrate(stathz); 280 splx(s); 281 } 282 } 283 } 284 285 /* 286 * Statistics clock. Grab profile sample, and if divider reaches 0, 287 * do process and kernel statistics. 288 */ 289 void 290 statclock(struct clockframe *frame) 291 { 292 #if defined(GPROF) || defined(DDBPROF) 293 struct gmonparam *g; 294 u_long i; 295 #endif 296 struct cpu_info *ci = curcpu(); 297 struct schedstate_percpu *spc = &ci->ci_schedstate; 298 struct proc *p = curproc; 299 struct process *pr; 300 301 /* 302 * Notice changes in divisor frequency, and adjust clock 303 * frequency accordingly. 304 */ 305 if (spc->spc_psdiv != psdiv) { 306 spc->spc_psdiv = psdiv; 307 spc->spc_pscnt = psdiv; 308 if (psdiv == 1) { 309 setstatclockrate(stathz); 310 } else { 311 setstatclockrate(profhz); 312 } 313 } 314 315 if (CLKF_USERMODE(frame)) { 316 pr = p->p_p; 317 if (pr->ps_flags & PS_PROFIL) 318 addupc_intr(p, CLKF_PC(frame)); 319 if (--spc->spc_pscnt > 0) 320 return; 321 /* 322 * Came from user mode; CPU was in user state. 323 * If this process is being profiled record the tick. 324 */ 325 p->p_uticks++; 326 if (pr->ps_nice > NZERO) 327 spc->spc_cp_time[CP_NICE]++; 328 else 329 spc->spc_cp_time[CP_USER]++; 330 } else { 331 #if defined(GPROF) || defined(DDBPROF) 332 /* 333 * Kernel statistics are just like addupc_intr, only easier. 334 */ 335 g = ci->ci_gmon; 336 if (g != NULL && g->state == GMON_PROF_ON) { 337 i = CLKF_PC(frame) - g->lowpc; 338 if (i < g->textsize) { 339 i /= HISTFRACTION * sizeof(*g->kcount); 340 g->kcount[i]++; 341 } 342 } 343 #endif 344 if (p != NULL && p->p_p->ps_flags & PS_PROFIL) 345 addupc_intr(p, PROC_PC(p)); 346 if (--spc->spc_pscnt > 0) 347 return; 348 /* 349 * Came from kernel mode, so we were: 350 * - spinning on a lock 351 * - handling an interrupt, 352 * - doing syscall or trap work on behalf of the current 353 * user process, or 354 * - spinning in the idle loop. 355 * Whichever it is, charge the time as appropriate. 356 * Note that we charge interrupts to the current process, 357 * regardless of whether they are ``for'' that process, 358 * so that we know how much of its real time was spent 359 * in ``non-process'' (i.e., interrupt) work. 360 */ 361 if (CLKF_INTR(frame)) { 362 if (p != NULL) 363 p->p_iticks++; 364 spc->spc_cp_time[spc->spc_spinning ? 365 CP_SPIN : CP_INTR]++; 366 } else if (p != NULL && p != spc->spc_idleproc) { 367 p->p_sticks++; 368 spc->spc_cp_time[spc->spc_spinning ? 369 CP_SPIN : CP_SYS]++; 370 } else 371 spc->spc_cp_time[spc->spc_spinning ? 372 CP_SPIN : CP_IDLE]++; 373 } 374 spc->spc_pscnt = psdiv; 375 376 if (p != NULL) { 377 p->p_cpticks++; 378 /* 379 * If no schedclock is provided, call it here at ~~12-25 Hz; 380 * ~~16 Hz is best 381 */ 382 if (schedhz == 0) { 383 if ((++spc->spc_schedticks & 3) == 0) 384 schedclock(p); 385 } 386 } 387 } 388 389 /* 390 * Return information about system clocks. 391 */ 392 int 393 sysctl_clockrate(char *where, size_t *sizep, void *newp) 394 { 395 struct clockinfo clkinfo; 396 397 /* 398 * Construct clockinfo structure. 399 */ 400 memset(&clkinfo, 0, sizeof clkinfo); 401 clkinfo.tick = tick; 402 clkinfo.hz = hz; 403 clkinfo.profhz = profhz; 404 clkinfo.stathz = stathz; 405 return (sysctl_rdstruct(where, sizep, newp, &clkinfo, sizeof(clkinfo))); 406 } 407