1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 34 * $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/buf.h> 40 #include <sys/sysproto.h> 41 #include <sys/resourcevar.h> 42 #include <sys/signalvar.h> 43 #include <sys/kernel.h> 44 #include <sys/sysent.h> 45 #include <sys/sysunion.h> 46 #include <sys/proc.h> 47 #include <sys/priv.h> 48 #include <sys/time.h> 49 #include <sys/vnode.h> 50 #include <sys/sysctl.h> 51 #include <sys/kern_syscall.h> 52 #include <vm/vm.h> 53 #include <vm/vm_extern.h> 54 55 #include <sys/msgport2.h> 56 #include <sys/thread2.h> 57 #include <sys/mplock2.h> 58 59 struct timezone tz; 60 61 /* 62 * Time of day and interval timer support. 63 * 64 * These routines provide the kernel entry points to get and set 65 * the time-of-day and per-process interval timers. Subroutines 66 * here provide support for adding and subtracting timeval structures 67 * and decrementing interval timers, optionally reloading the interval 68 * timers when they expire. 69 */ 70 71 static int settime(struct timeval *); 72 static void timevalfix(struct timeval *); 73 74 /* 75 * Nanosleep tries very hard to sleep for a precisely requested time 76 * interval, down to 1uS. The administrator can impose a minimum delay 77 * and a delay below which we hard-loop instead of initiate a timer 78 * interrupt and sleep. 79 * 80 * For machines under high loads it might be beneficial to increase min_us 81 * to e.g. 1000uS (1ms) so spining processes sleep meaningfully. 82 */ 83 static int nanosleep_min_us = 10; 84 static int nanosleep_hard_us = 100; 85 SYSCTL_INT(_kern, OID_AUTO, nanosleep_min_us, CTLFLAG_RW, 86 &nanosleep_min_us, 0, "") 87 SYSCTL_INT(_kern, OID_AUTO, nanosleep_hard_us, CTLFLAG_RW, 88 &nanosleep_hard_us, 0, "") 89 90 static int 91 settime(struct timeval *tv) 92 { 93 struct timeval delta, tv1, tv2; 94 static struct timeval maxtime, laststep; 95 struct timespec ts; 96 int origcpu; 97 98 if ((origcpu = mycpu->gd_cpuid) != 0) 99 lwkt_setcpu_self(globaldata_find(0)); 100 101 crit_enter(); 102 microtime(&tv1); 103 delta = *tv; 104 timevalsub(&delta, &tv1); 105 106 /* 107 * If the system is secure, we do not allow the time to be 108 * set to a value earlier than 1 second less than the highest 109 * time we have yet seen. The worst a miscreant can do in 110 * this circumstance is "freeze" time. He couldn't go 111 * back to the past. 112 * 113 * We similarly do not allow the clock to be stepped more 114 * than one second, nor more than once per second. This allows 115 * a miscreant to make the clock march double-time, but no worse. 116 */ 117 if (securelevel > 1) { 118 if (delta.tv_sec < 0 || delta.tv_usec < 0) { 119 /* 120 * Update maxtime to latest time we've seen. 121 */ 122 if (tv1.tv_sec > maxtime.tv_sec) 123 maxtime = tv1; 124 tv2 = *tv; 125 timevalsub(&tv2, &maxtime); 126 if (tv2.tv_sec < -1) { 127 tv->tv_sec = maxtime.tv_sec - 1; 128 kprintf("Time adjustment clamped to -1 second\n"); 129 } 130 } else { 131 if (tv1.tv_sec == laststep.tv_sec) { 132 crit_exit(); 133 return (EPERM); 134 } 135 if (delta.tv_sec > 1) { 136 tv->tv_sec = tv1.tv_sec + 1; 137 kprintf("Time adjustment clamped to +1 second\n"); 138 } 139 laststep = *tv; 140 } 141 } 142 143 ts.tv_sec = tv->tv_sec; 144 ts.tv_nsec = tv->tv_usec * 1000; 145 set_timeofday(&ts); 146 crit_exit(); 147 148 if (origcpu != 0) 149 lwkt_setcpu_self(globaldata_find(origcpu)); 150 151 resettodr(); 152 return (0); 153 } 154 155 /* 156 * MPSAFE 157 */ 158 int 159 kern_clock_gettime(clockid_t clock_id, struct timespec *ats) 160 { 161 int error = 0; 162 struct proc *p; 163 164 switch(clock_id) { 165 case CLOCK_REALTIME: 166 case CLOCK_REALTIME_PRECISE: 167 nanotime(ats); 168 break; 169 case CLOCK_REALTIME_FAST: 170 getnanotime(ats); 171 break; 172 case CLOCK_MONOTONIC: 173 case CLOCK_MONOTONIC_PRECISE: 174 case CLOCK_UPTIME: 175 case CLOCK_UPTIME_PRECISE: 176 nanouptime(ats); 177 break; 178 case CLOCK_MONOTONIC_FAST: 179 case CLOCK_UPTIME_FAST: 180 getnanouptime(ats); 181 break; 182 case CLOCK_VIRTUAL: 183 p = curproc; 184 ats->tv_sec = p->p_timer[ITIMER_VIRTUAL].it_value.tv_sec; 185 ats->tv_nsec = p->p_timer[ITIMER_VIRTUAL].it_value.tv_usec * 186 1000; 187 break; 188 case CLOCK_PROF: 189 p = curproc; 190 ats->tv_sec = p->p_timer[ITIMER_PROF].it_value.tv_sec; 191 ats->tv_nsec = p->p_timer[ITIMER_PROF].it_value.tv_usec * 192 1000; 193 break; 194 case CLOCK_SECOND: 195 ats->tv_sec = time_second; 196 ats->tv_nsec = 0; 197 break; 198 default: 199 error = EINVAL; 200 break; 201 } 202 return (error); 203 } 204 205 /* 206 * MPSAFE 207 */ 208 int 209 sys_clock_gettime(struct clock_gettime_args *uap) 210 { 211 struct timespec ats; 212 int error; 213 214 error = kern_clock_gettime(uap->clock_id, &ats); 215 if (error == 0) 216 error = copyout(&ats, uap->tp, sizeof(ats)); 217 218 return (error); 219 } 220 221 int 222 kern_clock_settime(clockid_t clock_id, struct timespec *ats) 223 { 224 struct thread *td = curthread; 225 struct timeval atv; 226 int error; 227 228 if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0) 229 return (error); 230 if (clock_id != CLOCK_REALTIME) 231 return (EINVAL); 232 if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000) 233 return (EINVAL); 234 235 TIMESPEC_TO_TIMEVAL(&atv, ats); 236 error = settime(&atv); 237 return (error); 238 } 239 240 /* 241 * MPALMOSTSAFE 242 */ 243 int 244 sys_clock_settime(struct clock_settime_args *uap) 245 { 246 struct timespec ats; 247 int error; 248 249 if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0) 250 return (error); 251 252 get_mplock(); 253 error = kern_clock_settime(uap->clock_id, &ats); 254 rel_mplock(); 255 return (error); 256 } 257 258 /* 259 * MPSAFE 260 */ 261 int 262 kern_clock_getres(clockid_t clock_id, struct timespec *ts) 263 { 264 int error; 265 266 switch(clock_id) { 267 case CLOCK_REALTIME: 268 case CLOCK_REALTIME_FAST: 269 case CLOCK_REALTIME_PRECISE: 270 case CLOCK_MONOTONIC: 271 case CLOCK_MONOTONIC_FAST: 272 case CLOCK_MONOTONIC_PRECISE: 273 case CLOCK_UPTIME: 274 case CLOCK_UPTIME_FAST: 275 case CLOCK_UPTIME_PRECISE: 276 /* 277 * Round up the result of the division cheaply 278 * by adding 1. Rounding up is especially important 279 * if rounding down would give 0. Perfect rounding 280 * is unimportant. 281 */ 282 ts->tv_sec = 0; 283 ts->tv_nsec = 1000000000 / sys_cputimer->freq + 1; 284 error = 0; 285 break; 286 case CLOCK_VIRTUAL: 287 case CLOCK_PROF: 288 /* Accurately round up here because we can do so cheaply. */ 289 ts->tv_sec = 0; 290 ts->tv_nsec = (1000000000 + hz - 1) / hz; 291 error = 0; 292 break; 293 case CLOCK_SECOND: 294 ts->tv_sec = 1; 295 ts->tv_nsec = 0; 296 error = 0; 297 break; 298 default: 299 error = EINVAL; 300 break; 301 } 302 303 return(error); 304 } 305 306 /* 307 * MPSAFE 308 */ 309 int 310 sys_clock_getres(struct clock_getres_args *uap) 311 { 312 int error; 313 struct timespec ts; 314 315 error = kern_clock_getres(uap->clock_id, &ts); 316 if (error == 0) 317 error = copyout(&ts, uap->tp, sizeof(ts)); 318 319 return (error); 320 } 321 322 /* 323 * nanosleep1() 324 * 325 * This is a general helper function for nanosleep() (aka sleep() aka 326 * usleep()). 327 * 328 * If there is less then one tick's worth of time left and 329 * we haven't done a yield, or the remaining microseconds is 330 * ridiculously low, do a yield. This avoids having 331 * to deal with systimer overheads when the system is under 332 * heavy loads. If we have done a yield already then use 333 * a systimer and an uninterruptable thread wait. 334 * 335 * If there is more then a tick's worth of time left, 336 * calculate the baseline ticks and use an interruptable 337 * tsleep, then handle the fine-grained delay on the next 338 * loop. This usually results in two sleeps occuring, a long one 339 * and a short one. 340 * 341 * MPSAFE 342 */ 343 static void 344 ns1_systimer(systimer_t info, int in_ipi __unused, 345 struct intrframe *frame __unused) 346 { 347 lwkt_schedule(info->data); 348 } 349 350 int 351 nanosleep1(struct timespec *rqt, struct timespec *rmt) 352 { 353 static int nanowait; 354 struct timespec ts, ts2, ts3; 355 struct timeval tv; 356 int error; 357 358 if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) 359 return (EINVAL); 360 /* XXX: imho this should return EINVAL at least for tv_sec < 0 */ 361 if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) 362 return (0); 363 nanouptime(&ts); 364 timespecadd(&ts, rqt); /* ts = target timestamp compare */ 365 TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */ 366 367 for (;;) { 368 int ticks; 369 struct systimer info; 370 371 ticks = tv.tv_usec / ustick; /* approximate */ 372 373 if (tv.tv_sec == 0 && ticks == 0) { 374 thread_t td = curthread; 375 if (tv.tv_usec > 0 && tv.tv_usec < nanosleep_min_us) 376 tv.tv_usec = nanosleep_min_us; 377 if (tv.tv_usec < nanosleep_hard_us) { 378 lwkt_user_yield(); 379 cpu_pause(); 380 } else { 381 crit_enter_quick(td); 382 systimer_init_oneshot(&info, ns1_systimer, 383 td, tv.tv_usec); 384 lwkt_deschedule_self(td); 385 crit_exit_quick(td); 386 lwkt_switch(); 387 systimer_del(&info); /* make sure it's gone */ 388 } 389 error = iscaught(td->td_lwp); 390 } else if (tv.tv_sec == 0) { 391 error = tsleep(&nanowait, PCATCH, "nanslp", ticks); 392 } else { 393 ticks = tvtohz_low(&tv); /* also handles overflow */ 394 error = tsleep(&nanowait, PCATCH, "nanslp", ticks); 395 } 396 nanouptime(&ts2); 397 if (error && error != EWOULDBLOCK) { 398 if (error == ERESTART) 399 error = EINTR; 400 if (rmt != NULL) { 401 timespecsub(&ts, &ts2); 402 if (ts.tv_sec < 0) 403 timespecclear(&ts); 404 *rmt = ts; 405 } 406 return (error); 407 } 408 if (timespeccmp(&ts2, &ts, >=)) 409 return (0); 410 ts3 = ts; 411 timespecsub(&ts3, &ts2); 412 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 413 } 414 } 415 416 /* 417 * MPSAFE 418 */ 419 int 420 sys_nanosleep(struct nanosleep_args *uap) 421 { 422 int error; 423 struct timespec rqt; 424 struct timespec rmt; 425 426 error = copyin(uap->rqtp, &rqt, sizeof(rqt)); 427 if (error) 428 return (error); 429 430 error = nanosleep1(&rqt, &rmt); 431 432 /* 433 * copyout the residual if nanosleep was interrupted. 434 */ 435 if (error && uap->rmtp) { 436 int error2; 437 438 error2 = copyout(&rmt, uap->rmtp, sizeof(rmt)); 439 if (error2) 440 error = error2; 441 } 442 return (error); 443 } 444 445 /* 446 * MPSAFE 447 */ 448 int 449 sys_gettimeofday(struct gettimeofday_args *uap) 450 { 451 struct timeval atv; 452 int error = 0; 453 454 if (uap->tp) { 455 microtime(&atv); 456 if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp, 457 sizeof (atv)))) 458 return (error); 459 } 460 if (uap->tzp) 461 error = copyout((caddr_t)&tz, (caddr_t)uap->tzp, 462 sizeof (tz)); 463 return (error); 464 } 465 466 /* 467 * MPALMOSTSAFE 468 */ 469 int 470 sys_settimeofday(struct settimeofday_args *uap) 471 { 472 struct thread *td = curthread; 473 struct timeval atv; 474 struct timezone atz; 475 int error; 476 477 if ((error = priv_check(td, PRIV_SETTIMEOFDAY))) 478 return (error); 479 /* 480 * Verify all parameters before changing time. 481 * 482 * NOTE: We do not allow the time to be set to 0.0, which also by 483 * happy coincidence works around a pkgsrc bulk build bug. 484 */ 485 if (uap->tv) { 486 if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 487 sizeof(atv)))) 488 return (error); 489 if (atv.tv_usec < 0 || atv.tv_usec >= 1000000) 490 return (EINVAL); 491 if (atv.tv_sec == 0 && atv.tv_usec == 0) 492 return (EINVAL); 493 } 494 if (uap->tzp && 495 (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz)))) 496 return (error); 497 498 get_mplock(); 499 if (uap->tv && (error = settime(&atv))) { 500 rel_mplock(); 501 return (error); 502 } 503 rel_mplock(); 504 if (uap->tzp) 505 tz = atz; 506 return (0); 507 } 508 509 static void 510 kern_adjtime_common(void) 511 { 512 if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) || 513 (ntp_delta < 0 && ntp_delta > -ntp_default_tick_delta)) 514 ntp_tick_delta = ntp_delta; 515 else if (ntp_delta > ntp_big_delta) 516 ntp_tick_delta = 10 * ntp_default_tick_delta; 517 else if (ntp_delta < -ntp_big_delta) 518 ntp_tick_delta = -10 * ntp_default_tick_delta; 519 else if (ntp_delta > 0) 520 ntp_tick_delta = ntp_default_tick_delta; 521 else 522 ntp_tick_delta = -ntp_default_tick_delta; 523 } 524 525 void 526 kern_adjtime(int64_t delta, int64_t *odelta) 527 { 528 int origcpu; 529 530 if ((origcpu = mycpu->gd_cpuid) != 0) 531 lwkt_setcpu_self(globaldata_find(0)); 532 533 crit_enter(); 534 *odelta = ntp_delta; 535 ntp_delta = delta; 536 kern_adjtime_common(); 537 crit_exit(); 538 539 if (origcpu != 0) 540 lwkt_setcpu_self(globaldata_find(origcpu)); 541 } 542 543 static void 544 kern_get_ntp_delta(int64_t *delta) 545 { 546 int origcpu; 547 548 if ((origcpu = mycpu->gd_cpuid) != 0) 549 lwkt_setcpu_self(globaldata_find(0)); 550 551 crit_enter(); 552 *delta = ntp_delta; 553 crit_exit(); 554 555 if (origcpu != 0) 556 lwkt_setcpu_self(globaldata_find(origcpu)); 557 } 558 559 void 560 kern_reladjtime(int64_t delta) 561 { 562 int origcpu; 563 564 if ((origcpu = mycpu->gd_cpuid) != 0) 565 lwkt_setcpu_self(globaldata_find(0)); 566 567 crit_enter(); 568 ntp_delta += delta; 569 kern_adjtime_common(); 570 crit_exit(); 571 572 if (origcpu != 0) 573 lwkt_setcpu_self(globaldata_find(origcpu)); 574 } 575 576 static void 577 kern_adjfreq(int64_t rate) 578 { 579 int origcpu; 580 581 if ((origcpu = mycpu->gd_cpuid) != 0) 582 lwkt_setcpu_self(globaldata_find(0)); 583 584 crit_enter(); 585 ntp_tick_permanent = rate; 586 crit_exit(); 587 588 if (origcpu != 0) 589 lwkt_setcpu_self(globaldata_find(origcpu)); 590 } 591 592 /* 593 * MPALMOSTSAFE 594 */ 595 int 596 sys_adjtime(struct adjtime_args *uap) 597 { 598 struct thread *td = curthread; 599 struct timeval atv; 600 int64_t ndelta, odelta; 601 int error; 602 603 if ((error = priv_check(td, PRIV_ADJTIME))) 604 return (error); 605 error = copyin(uap->delta, &atv, sizeof(struct timeval)); 606 if (error) 607 return (error); 608 609 /* 610 * Compute the total correction and the rate at which to apply it. 611 * Round the adjustment down to a whole multiple of the per-tick 612 * delta, so that after some number of incremental changes in 613 * hardclock(), tickdelta will become zero, lest the correction 614 * overshoot and start taking us away from the desired final time. 615 */ 616 ndelta = (int64_t)atv.tv_sec * 1000000000 + atv.tv_usec * 1000; 617 get_mplock(); 618 kern_adjtime(ndelta, &odelta); 619 rel_mplock(); 620 621 if (uap->olddelta) { 622 atv.tv_sec = odelta / 1000000000; 623 atv.tv_usec = odelta % 1000000000 / 1000; 624 copyout(&atv, uap->olddelta, sizeof(struct timeval)); 625 } 626 return (0); 627 } 628 629 static int 630 sysctl_adjtime(SYSCTL_HANDLER_ARGS) 631 { 632 int64_t delta; 633 int error; 634 635 if (req->newptr != NULL) { 636 if (priv_check(curthread, PRIV_ROOT)) 637 return (EPERM); 638 error = SYSCTL_IN(req, &delta, sizeof(delta)); 639 if (error) 640 return (error); 641 kern_reladjtime(delta); 642 } 643 644 if (req->oldptr) 645 kern_get_ntp_delta(&delta); 646 error = SYSCTL_OUT(req, &delta, sizeof(delta)); 647 return (error); 648 } 649 650 /* 651 * delta is in nanoseconds. 652 */ 653 static int 654 sysctl_delta(SYSCTL_HANDLER_ARGS) 655 { 656 int64_t delta, old_delta; 657 int error; 658 659 if (req->newptr != NULL) { 660 if (priv_check(curthread, PRIV_ROOT)) 661 return (EPERM); 662 error = SYSCTL_IN(req, &delta, sizeof(delta)); 663 if (error) 664 return (error); 665 kern_adjtime(delta, &old_delta); 666 } 667 668 if (req->oldptr != NULL) 669 kern_get_ntp_delta(&old_delta); 670 error = SYSCTL_OUT(req, &old_delta, sizeof(old_delta)); 671 return (error); 672 } 673 674 /* 675 * frequency is in nanoseconds per second shifted left 32. 676 * kern_adjfreq() needs it in nanoseconds per tick shifted left 32. 677 */ 678 static int 679 sysctl_adjfreq(SYSCTL_HANDLER_ARGS) 680 { 681 int64_t freqdelta; 682 int error; 683 684 if (req->newptr != NULL) { 685 if (priv_check(curthread, PRIV_ROOT)) 686 return (EPERM); 687 error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta)); 688 if (error) 689 return (error); 690 691 freqdelta /= hz; 692 kern_adjfreq(freqdelta); 693 } 694 695 if (req->oldptr != NULL) 696 freqdelta = ntp_tick_permanent * hz; 697 error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta)); 698 if (error) 699 return (error); 700 701 return (0); 702 } 703 704 SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls"); 705 SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent, 706 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 707 sysctl_adjfreq, "Q", "permanent correction per second"); 708 SYSCTL_PROC(_kern_ntp, OID_AUTO, delta, 709 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 710 sysctl_delta, "Q", "one-time delta"); 711 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD, 712 &ntp_big_delta, sizeof(ntp_big_delta), "Q", 713 "threshold for fast adjustment"); 714 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD, 715 &ntp_tick_delta, sizeof(ntp_tick_delta), "LU", 716 "per-tick adjustment"); 717 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD, 718 &ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU", 719 "default per-tick adjustment"); 720 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW, 721 &ntp_leap_second, sizeof(ntp_leap_second), "LU", 722 "next leap second"); 723 SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW, 724 &ntp_leap_insert, 0, "insert or remove leap second"); 725 SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust, 726 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 727 sysctl_adjtime, "Q", "relative adjust for delta"); 728 729 /* 730 * Get value of an interval timer. The process virtual and 731 * profiling virtual time timers are kept in the p_stats area, since 732 * they can be swapped out. These are kept internally in the 733 * way they are specified externally: in time until they expire. 734 * 735 * The real time interval timer is kept in the process table slot 736 * for the process, and its value (it_value) is kept as an 737 * absolute time rather than as a delta, so that it is easy to keep 738 * periodic real-time signals from drifting. 739 * 740 * Virtual time timers are processed in the hardclock() routine of 741 * kern_clock.c. The real time timer is processed by a timeout 742 * routine, called from the softclock() routine. Since a callout 743 * may be delayed in real time due to interrupt processing in the system, 744 * it is possible for the real time timeout routine (realitexpire, given below), 745 * to be delayed in real time past when it is supposed to occur. It 746 * does not suffice, therefore, to reload the real timer .it_value from the 747 * real time timers .it_interval. Rather, we compute the next time in 748 * absolute time the timer should go off. 749 * 750 * MPALMOSTSAFE 751 */ 752 int 753 sys_getitimer(struct getitimer_args *uap) 754 { 755 struct proc *p = curproc; 756 struct timeval ctv; 757 struct itimerval aitv; 758 759 if (uap->which > ITIMER_PROF) 760 return (EINVAL); 761 lwkt_gettoken(&p->p_token); 762 if (uap->which == ITIMER_REAL) { 763 /* 764 * Convert from absolute to relative time in .it_value 765 * part of real time timer. If time for real time timer 766 * has passed return 0, else return difference between 767 * current time and time for the timer to go off. 768 */ 769 aitv = p->p_realtimer; 770 if (timevalisset(&aitv.it_value)) { 771 getmicrouptime(&ctv); 772 if (timevalcmp(&aitv.it_value, &ctv, <)) 773 timevalclear(&aitv.it_value); 774 else 775 timevalsub(&aitv.it_value, &ctv); 776 } 777 } else { 778 aitv = p->p_timer[uap->which]; 779 } 780 lwkt_reltoken(&p->p_token); 781 return (copyout(&aitv, uap->itv, sizeof (struct itimerval))); 782 } 783 784 /* 785 * MPALMOSTSAFE 786 */ 787 int 788 sys_setitimer(struct setitimer_args *uap) 789 { 790 struct itimerval aitv; 791 struct timeval ctv; 792 struct itimerval *itvp; 793 struct proc *p = curproc; 794 int error; 795 796 if (uap->which > ITIMER_PROF) 797 return (EINVAL); 798 itvp = uap->itv; 799 if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv, 800 sizeof(struct itimerval)))) 801 return (error); 802 if ((uap->itv = uap->oitv) && 803 (error = sys_getitimer((struct getitimer_args *)uap))) 804 return (error); 805 if (itvp == NULL) 806 return (0); 807 if (itimerfix(&aitv.it_value)) 808 return (EINVAL); 809 if (!timevalisset(&aitv.it_value)) 810 timevalclear(&aitv.it_interval); 811 else if (itimerfix(&aitv.it_interval)) 812 return (EINVAL); 813 lwkt_gettoken(&p->p_token); 814 if (uap->which == ITIMER_REAL) { 815 if (timevalisset(&p->p_realtimer.it_value)) 816 callout_stop_sync(&p->p_ithandle); 817 if (timevalisset(&aitv.it_value)) 818 callout_reset(&p->p_ithandle, 819 tvtohz_high(&aitv.it_value), realitexpire, p); 820 getmicrouptime(&ctv); 821 timevaladd(&aitv.it_value, &ctv); 822 p->p_realtimer = aitv; 823 } else { 824 p->p_timer[uap->which] = aitv; 825 switch(uap->which) { 826 case ITIMER_VIRTUAL: 827 p->p_flags &= ~P_SIGVTALRM; 828 break; 829 case ITIMER_PROF: 830 p->p_flags &= ~P_SIGPROF; 831 break; 832 } 833 } 834 lwkt_reltoken(&p->p_token); 835 return (0); 836 } 837 838 /* 839 * Real interval timer expired: 840 * send process whose timer expired an alarm signal. 841 * If time is not set up to reload, then just return. 842 * Else compute next time timer should go off which is > current time. 843 * This is where delay in processing this timeout causes multiple 844 * SIGALRM calls to be compressed into one. 845 * tvtohz_high() always adds 1 to allow for the time until the next clock 846 * interrupt being strictly less than 1 clock tick, but we don't want 847 * that here since we want to appear to be in sync with the clock 848 * interrupt even when we're delayed. 849 */ 850 void 851 realitexpire(void *arg) 852 { 853 struct proc *p; 854 struct timeval ctv, ntv; 855 856 p = (struct proc *)arg; 857 PHOLD(p); 858 lwkt_gettoken(&p->p_token); 859 ksignal(p, SIGALRM); 860 if (!timevalisset(&p->p_realtimer.it_interval)) { 861 timevalclear(&p->p_realtimer.it_value); 862 goto done; 863 } 864 for (;;) { 865 timevaladd(&p->p_realtimer.it_value, 866 &p->p_realtimer.it_interval); 867 getmicrouptime(&ctv); 868 if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) { 869 ntv = p->p_realtimer.it_value; 870 timevalsub(&ntv, &ctv); 871 callout_reset(&p->p_ithandle, tvtohz_low(&ntv), 872 realitexpire, p); 873 goto done; 874 } 875 } 876 done: 877 lwkt_reltoken(&p->p_token); 878 PRELE(p); 879 } 880 881 /* 882 * Check that a proposed value to load into the .it_value or 883 * .it_interval part of an interval timer is acceptable, and 884 * fix it to have at least minimal value (i.e. if it is less 885 * than the resolution of the clock, round it up.) 886 * 887 * MPSAFE 888 */ 889 int 890 itimerfix(struct timeval *tv) 891 { 892 893 if (tv->tv_sec < 0 || tv->tv_sec > 100000000 || 894 tv->tv_usec < 0 || tv->tv_usec >= 1000000) 895 return (EINVAL); 896 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < ustick) 897 tv->tv_usec = ustick; 898 return (0); 899 } 900 901 /* 902 * Decrement an interval timer by a specified number 903 * of microseconds, which must be less than a second, 904 * i.e. < 1000000. If the timer expires, then reload 905 * it. In this case, carry over (usec - old value) to 906 * reduce the value reloaded into the timer so that 907 * the timer does not drift. This routine assumes 908 * that it is called in a context where the timers 909 * on which it is operating cannot change in value. 910 */ 911 int 912 itimerdecr(struct itimerval *itp, int usec) 913 { 914 915 if (itp->it_value.tv_usec < usec) { 916 if (itp->it_value.tv_sec == 0) { 917 /* expired, and already in next interval */ 918 usec -= itp->it_value.tv_usec; 919 goto expire; 920 } 921 itp->it_value.tv_usec += 1000000; 922 itp->it_value.tv_sec--; 923 } 924 itp->it_value.tv_usec -= usec; 925 usec = 0; 926 if (timevalisset(&itp->it_value)) 927 return (1); 928 /* expired, exactly at end of interval */ 929 expire: 930 if (timevalisset(&itp->it_interval)) { 931 itp->it_value = itp->it_interval; 932 itp->it_value.tv_usec -= usec; 933 if (itp->it_value.tv_usec < 0) { 934 itp->it_value.tv_usec += 1000000; 935 itp->it_value.tv_sec--; 936 } 937 } else 938 itp->it_value.tv_usec = 0; /* sec is already 0 */ 939 return (0); 940 } 941 942 /* 943 * Add and subtract routines for timevals. 944 * N.B.: subtract routine doesn't deal with 945 * results which are before the beginning, 946 * it just gets very confused in this case. 947 * Caveat emptor. 948 */ 949 void 950 timevaladd(struct timeval *t1, const struct timeval *t2) 951 { 952 953 t1->tv_sec += t2->tv_sec; 954 t1->tv_usec += t2->tv_usec; 955 timevalfix(t1); 956 } 957 958 void 959 timevalsub(struct timeval *t1, const struct timeval *t2) 960 { 961 962 t1->tv_sec -= t2->tv_sec; 963 t1->tv_usec -= t2->tv_usec; 964 timevalfix(t1); 965 } 966 967 static void 968 timevalfix(struct timeval *t1) 969 { 970 971 if (t1->tv_usec < 0) { 972 t1->tv_sec--; 973 t1->tv_usec += 1000000; 974 } 975 if (t1->tv_usec >= 1000000) { 976 t1->tv_sec++; 977 t1->tv_usec -= 1000000; 978 } 979 } 980 981 /* 982 * ratecheck(): simple time-based rate-limit checking. 983 */ 984 int 985 ratecheck(struct timeval *lasttime, const struct timeval *mininterval) 986 { 987 struct timeval tv, delta; 988 int rv = 0; 989 990 getmicrouptime(&tv); /* NB: 10ms precision */ 991 delta = tv; 992 timevalsub(&delta, lasttime); 993 994 /* 995 * check for 0,0 is so that the message will be seen at least once, 996 * even if interval is huge. 997 */ 998 if (timevalcmp(&delta, mininterval, >=) || 999 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { 1000 *lasttime = tv; 1001 rv = 1; 1002 } 1003 1004 return (rv); 1005 } 1006 1007 /* 1008 * ppsratecheck(): packets (or events) per second limitation. 1009 * 1010 * Return 0 if the limit is to be enforced (e.g. the caller 1011 * should drop a packet because of the rate limitation). 1012 * 1013 * maxpps of 0 always causes zero to be returned. maxpps of -1 1014 * always causes 1 to be returned; this effectively defeats rate 1015 * limiting. 1016 * 1017 * Note that we maintain the struct timeval for compatibility 1018 * with other bsd systems. We reuse the storage and just monitor 1019 * clock ticks for minimal overhead. 1020 */ 1021 int 1022 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) 1023 { 1024 int now; 1025 1026 /* 1027 * Reset the last time and counter if this is the first call 1028 * or more than a second has passed since the last update of 1029 * lasttime. 1030 */ 1031 now = ticks; 1032 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { 1033 lasttime->tv_sec = now; 1034 *curpps = 1; 1035 return (maxpps != 0); 1036 } else { 1037 (*curpps)++; /* NB: ignore potential overflow */ 1038 return (maxpps < 0 || *curpps < maxpps); 1039 } 1040 } 1041 1042