15107Seota /*
25107Seota * CDDL HEADER START
35107Seota *
45107Seota * The contents of this file are subject to the terms of the
55107Seota * Common Development and Distribution License (the "License").
65107Seota * You may not use this file except in compliance with the License.
75107Seota *
85107Seota * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95107Seota * or http://www.opensolaris.org/os/licensing.
105107Seota * See the License for the specific language governing permissions
115107Seota * and limitations under the License.
125107Seota *
135107Seota * When distributing Covered Code, include this CDDL HEADER in each
145107Seota * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155107Seota * If applicable, add the following below this CDDL HEADER, with the
165107Seota * fields enclosed by brackets "[]" replaced with your own identifying
175107Seota * information: Portions Copyright [yyyy] [name of copyright owner]
185107Seota *
195107Seota * CDDL HEADER END
205107Seota */
215107Seota
225107Seota /*
23*9314SEiji.Ota@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
245107Seota * Use is subject to license terms.
255107Seota */
265107Seota
275107Seota #include <sys/atomic.h>
285107Seota #include <sys/callb.h>
295107Seota #include <sys/conf.h>
305107Seota #include <sys/cmn_err.h>
315107Seota #include <sys/taskq.h>
325107Seota #include <sys/dditypes.h>
335107Seota #include <sys/ddi_timer.h>
345107Seota #include <sys/disp.h>
355107Seota #include <sys/kobj.h>
365107Seota #include <sys/note.h>
375107Seota #include <sys/param.h>
385107Seota #include <sys/sysmacros.h>
395107Seota #include <sys/systm.h>
405107Seota #include <sys/time.h>
415107Seota #include <sys/types.h>
425107Seota
435107Seota /*
445107Seota * global variables for timeout request
455107Seota */
465107Seota static kmem_cache_t *req_cache; /* kmem cache for timeout request */
475107Seota
485107Seota /*
495343Seota * taskq parameters for cyclic_timer
505343Seota *
515343Seota * timer_taskq_num:
525343Seota * timer_taskq_num represents the number of taskq threads.
535343Seota * Currently 4 threads are pooled to handle periodic timeout requests.
545343Seota * This number is chosen based on the fact that the callout (one-time
555343Seota * timeout framework) uses 8 threads with TQ_NOSLEEP; the periodic timeout
565343Seota * calls taskq_dispatch() with TQ_SLEEP instead, and in this case, 4 threads
575343Seota * should be sufficient to handle periodic timeout requests. (see also
585343Seota * timer_taskq_max_num below)
595343Seota *
605343Seota * timer_taskq_min_num:
615343Seota * timer_taskq_min_num represents the number of pre-populated taskq_ent
625343Seota * structures, and this variable holds the same value as timer_taskq_num does.
635343Seota *
645343Seota * timer_taskq_max_num:
655343Seota * Since TQ_SLEEP is set when taskq_dispatch() is called, the framework waits
665343Seota * for one second if more taskq_ent structures than timer_taskq_max_num are
675343Seota * required. However, from the timeout point of view, one second is much longer
685343Seota * than expected, and to prevent this occurrence, timer_taskq_max_num should
695343Seota * hold a sufficiently-large value, which is 128 here. Note that since the size
705343Seota * of taskq_ent_t is relatively small, this doesn't use up the resource so much.
715343Seota * (Currently the size is less than 8k at most)
725343Seota *
735343Seota * About the detailed explanation of the taskq function arguments, please see
745343Seota * usr/src/uts/common/os/taskq.c.
755107Seota */
765343Seota int timer_taskq_num = 4; /* taskq thread number */
775343Seota int timer_taskq_min_num = 4; /* min. number of taskq_ent structs */
785343Seota int timer_taskq_max_num = 128; /* max. number of taskq_ent structs */
795107Seota static taskq_t *tm_taskq; /* taskq thread pool */
805107Seota static kthread_t *tm_work_thread; /* work thread invoking taskq */
815107Seota
825107Seota /*
835107Seota * timer variables
845107Seota */
855107Seota static cyc_timer_t *ddi_timer; /* ddi timer based on the cyclic */
865107Seota static volatile hrtime_t timer_hrtime; /* current tick time on the timer */
875107Seota
885107Seota /*
895107Seota * Variable used for the suspend/resume.
905107Seota */
915107Seota static volatile boolean_t timer_suspended;
925107Seota
935107Seota /*
945107Seota * Kernel taskq queue to ddi timer
955107Seota */
965107Seota static list_t kern_queue; /* kernel thread request queue */
975107Seota static kcondvar_t kern_cv; /* condition variable for taskq queue */
985107Seota
995107Seota /*
1005107Seota * Software interrupt queue dedicated to ddi timer
1015107Seota */
1025107Seota static list_t intr_queue; /* software interrupt request queue */
1035107Seota static uint_t intr_state; /* software interrupt state */
1045107Seota
1055107Seota /*
1065107Seota * This lock is used to protect the intr_queue and kern_queue.
1075107Seota * It's also used to protect the intr_state which represents the software
1085107Seota * interrupt state for the timer.
1095107Seota */
1105107Seota static kmutex_t disp_req_lock;
1115107Seota
1125107Seota /*
1135107Seota * the periodic timer interrupt priority level
1145107Seota */
1155107Seota enum {
1165107Seota TM_IPL_0 = 0, /* kernel context */
1175107Seota TM_IPL_1, TM_IPL_2, TM_IPL_3, /* level 1-3 */
1185107Seota TM_IPL_4, TM_IPL_5, TM_IPL_6, /* level 4-6 */
1195107Seota TM_IPL_7, TM_IPL_8, TM_IPL_9, /* level 7-9 */
1205107Seota TM_IPL_10 /* level 10 */
1215107Seota };
1225107Seota
1235107Seota /*
1245107Seota * A callback handler used by CPR to stop and resume callouts.
1255107Seota * Since the taskq uses TASKQ_CPR_SAFE, the function just set the boolean
1265107Seota * flag to timer_suspended here.
1275107Seota */
1285107Seota /*ARGSUSED*/
1295107Seota static boolean_t
timer_cpr_callb(void * arg,int code)1305107Seota timer_cpr_callb(void *arg, int code)
1315107Seota {
1325107Seota timer_suspended = (code == CB_CODE_CPR_CHKPT);
1335107Seota return (B_TRUE);
1345107Seota }
1355107Seota
1365107Seota /*
1375107Seota * Return a proposed timeout request id. add_req() determines whether
1385107Seota * or not the proposed one is used. If it's not suitable, add_req()
1395107Seota * recalls get_req_cnt(). To reduce the lock contention between the
1405107Seota * timer and i_untimeout(), the atomic instruction should be used here.
1415107Seota */
1425107Seota static timeout_t
get_req_cnt(void)1435107Seota get_req_cnt(void)
1445107Seota {
1455107Seota static volatile ulong_t timeout_cnt = 0;
1465107Seota return ((timeout_t)atomic_inc_ulong_nv(&timeout_cnt));
1475107Seota }
1485107Seota
1495107Seota /*
1505107Seota * Get the system resolution.
1515107Seota * Note. currently there is a restriction about the system resolution, and
1525107Seota * the 10ms tick (the default clock resolution) is only supported now.
1535107Seota */
1545107Seota static hrtime_t
i_get_res(void)1555107Seota i_get_res(void)
1565107Seota {
1575107Seota return ((hrtime_t)10000000); /* 10ms tick only */
1585107Seota }
1595107Seota
1605107Seota /*
1615107Seota * Return the value for the cog of the timing wheel.
1625107Seota * TICK_FACTOR is used to gain a finer cog on the clock resolution.
1635107Seota */
1645107Seota static hrtime_t
tw_tick(hrtime_t time)1655107Seota tw_tick(hrtime_t time)
1665107Seota {
1675107Seota return ((time << TICK_FACTOR) / ddi_timer->res);
1685107Seota }
1695107Seota
1705107Seota /*
1715107Seota * Calculate the expiration time for the timeout request.
1725107Seota */
1735107Seota static hrtime_t
expire_tick(tm_req_t * req)1745107Seota expire_tick(tm_req_t *req)
1755107Seota {
1765107Seota return (tw_tick(req->exp_time));
1775107Seota }
1785107Seota
1795107Seota /*
1805107Seota * Register a timeout request to the timer. This function is used
1815107Seota * in i_timeout().
1825107Seota */
1835107Seota static timeout_t
add_req(tm_req_t * req)1845107Seota add_req(tm_req_t *req)
1855107Seota {
1865107Seota timer_tw_t *tid, *tw;
1875107Seota tm_req_t *next;
1885107Seota timeout_t id;
1895107Seota
1905107Seota retry:
1915107Seota /*
1925107Seota * Retrieve a timeout request id. Since i_timeout() needs to return
1935107Seota * a non-zero value, re-try if the zero is gotten.
1945107Seota */
1955107Seota if ((id = get_req_cnt()) == 0)
1965107Seota id = get_req_cnt();
1975107Seota
1985107Seota /*
1995107Seota * Check if the id is not used yet. Since the framework now deals
2005107Seota * with the periodic timeout requests, we cannot assume the id
2015107Seota * allocated (long) before doesn't exist any more when it will
2025107Seota * be re-assigned again (especially on 32bit) but need to handle
2035107Seota * this case to solve the conflicts. If it's used already, retry
2045107Seota * another.
2055107Seota */
2065107Seota tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)];
2075107Seota mutex_enter(&tid->lock);
2085107Seota for (next = list_head(&tid->req); next != NULL;
2095107Seota next = list_next(&tid->req, next)) {
2105107Seota if (next->id == id) {
2115107Seota mutex_exit(&tid->lock);
2125107Seota goto retry;
2135107Seota }
2145107Seota }
2155107Seota /* Nobody uses this id yet */
2165107Seota req->id = id;
2175107Seota
2185107Seota /*
2195107Seota * Register this request to the timer.
2205107Seota * The list operation must be list_insert_head().
2215107Seota * Other operations can degrade performance.
2225107Seota */
2235107Seota list_insert_head(&tid->req, req);
2245107Seota mutex_exit(&tid->lock);
2255107Seota
2265107Seota tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
2275107Seota mutex_enter(&tw->lock);
2285107Seota /*
2295107Seota * Other operations than list_insert_head() can
2305107Seota * degrade performance here.
2315107Seota */
2325107Seota list_insert_head(&tw->req, req);
2335107Seota mutex_exit(&tw->lock);
2345107Seota
2355107Seota return (id);
2365107Seota }
2375107Seota
2385107Seota /*
2395107Seota * Periodic timeout requests cannot be removed until they are canceled
2405107Seota * explicitly. Until then, they need to be re-registerd after they are
2415107Seota * fired. transfer_req() re-registers the requests for the next fires.
2425107Seota * Note. transfer_req() sends the cv_signal to timeout_execute(), which
2435107Seota * runs in interrupt context. Make sure this function will not be blocked,
2445107Seota * otherwise the deadlock situation can occur.
2455107Seota */
2465107Seota static void
transfer_req(tm_req_t * req,timer_tw_t * tw)2475107Seota transfer_req(tm_req_t *req, timer_tw_t *tw)
2485107Seota {
2495107Seota timer_tw_t *new_tw;
2505107Seota hrtime_t curr_time;
2515107Seota ASSERT(tw && MUTEX_HELD(&tw->lock));
2525107Seota
2535107Seota /* Calculate the next expiration time by interval */
2545107Seota req->exp_time += req->interval;
2555107Seota curr_time = gethrtime();
2565107Seota
2575107Seota /*
2585107Seota * If a long time (more than 1 clock resolution) has already
2595107Seota * passed for some reason (e.g. debugger or high interrupt),
2605107Seota * round up the next expiration to the appropriate one
2615107Seota * since this request is periodic and never catches with it.
2625107Seota */
2635107Seota if (curr_time - req->exp_time >= ddi_timer->res) {
2645107Seota req->exp_time = roundup(curr_time + req->interval,
2655107Seota ddi_timer->res);
2665107Seota }
2675107Seota
2685107Seota /*
2695107Seota * Re-register this request.
2705107Seota * Note. since it is guaranteed that the timer is invoked on only
2715107Seota * one CPU at any time (by the cyclic subsystem), a deadlock
2725107Seota * cannot occur regardless of the lock order here.
2735107Seota */
2745107Seota new_tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
2755107Seota
2765107Seota /*
2775107Seota * If it's on the timer cog already, there is nothing
2785107Seota * to do. Just return.
2795107Seota */
2805107Seota if (new_tw == tw)
2815107Seota return;
2825107Seota
2835107Seota /* Remove this request from the timer */
2845107Seota list_remove(&tw->req, req);
2855107Seota
2865107Seota /* Re-register this request to the timer */
2875107Seota mutex_enter(&new_tw->lock);
2885107Seota
2895107Seota /*
2905107Seota * Other operations than list_insert_head() can
2915107Seota * degrade performance here.
2925107Seota */
2935107Seota list_insert_head(&new_tw->req, req);
2945107Seota mutex_exit(&new_tw->lock);
2955107Seota
2965107Seota /*
2975107Seota * Set the TM_TRANSFER flag and notify the request is transfered
2985107Seota * completely. This prevents a race in the case that this request
2995107Seota * is serviced on another CPU already.
3005107Seota */
3015107Seota mutex_enter(&req->lock);
3025107Seota req->flags |= TM_TRANSFER;
3035107Seota cv_signal(&req->cv);
3045107Seota mutex_exit(&req->lock);
3055107Seota }
3065107Seota
3075107Seota /*
3085107Seota * Execute timeout requests.
3095107Seota * Note. since timeout_execute() can run in interrupt context and block
3105107Seota * on condition variables, there are restrictions on the timer code that
3115107Seota * signals these condition variables (see i_untimeout(), transfer_req(),
3125107Seota * and condvar(9F)). Functions that signal these cvs must ensure that
3135107Seota * they will not be blocked (for memory allocations or any other reason)
3145107Seota * since condition variables don't support priority inheritance.
3155107Seota */
3165107Seota static void
timeout_execute(void * arg)3175107Seota timeout_execute(void *arg)
3185107Seota {
3195107Seota tm_req_t *req = (tm_req_t *)arg;
3205107Seota ASSERT(req->flags & TM_INVOKING && !(req->flags & TM_EXECUTING));
3215107Seota
3225107Seota for (;;) {
3235107Seota /*
3245107Seota * Check if this request is canceled. If it's canceled, do not
3255107Seota * execute this request.
3265107Seota */
3275107Seota mutex_enter(&req->lock);
3285107Seota if (!(req->flags & TM_CANCEL)) {
3295107Seota /*
3305107Seota * Set the current thread to prevent a dead lock
3315107Seota * situation in case that this timeout request is
3325107Seota * canceled in the handler being invoked now.
3335107Seota * (this doesn't violate the spec) Set TM_EXECUTING
3345107Seota * to show this handler is invoked soon.
3355107Seota */
3365107Seota req->h_thread = curthread;
3375107Seota req->flags |= TM_EXECUTING;
3385107Seota mutex_exit(&req->lock);
3395107Seota
3405107Seota /* The handler is invoked without holding any locks */
3415107Seota (*req->handler)(req->arg);
3425107Seota
3435107Seota mutex_enter(&req->lock);
3445107Seota }
3455107Seota
3465107Seota /*
347*9314SEiji.Ota@Sun.COM * Check if this request is canceled or not. If not, prepare
348*9314SEiji.Ota@Sun.COM * for the next fire.
3495107Seota */
3505107Seota if (req->flags & TM_CANCEL) {
3515107Seota timer_tw_t *tw;
3525107Seota /*
3535107Seota * Wait until the timer finishes all things for
3545107Seota * this request.
3555107Seota */
3565107Seota while (!(req->flags & TM_TRANSFER))
3575107Seota cv_wait(&req->cv, &req->lock);
3585107Seota mutex_exit(&req->lock);
3595107Seota ASSERT(req->flags & TM_TRANSFER);
3605107Seota
3615107Seota /* Remove this request from the timer */
3625107Seota tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
3635107Seota mutex_enter(&tw->lock);
3645107Seota list_remove(&tw->req, req);
3655107Seota mutex_exit(&tw->lock);
3665107Seota
3675107Seota /* Free this request */
3685107Seota kmem_cache_free(req_cache, req);
3695107Seota return;
3705107Seota }
3715107Seota ASSERT(req->flags & TM_EXECUTING);
3725107Seota
3735107Seota /*
3745107Seota * TM_EXECUTING must be set at this point.
3755107Seota * Unset the flag.
3765107Seota */
3775107Seota req->flags &= ~(TM_EXECUTING | TM_TRANSFER);
3785107Seota
3795107Seota /*
3805107Seota * Decrease the request cnt. The reqest cnt shows
3815107Seota * how many times this request is executed now.
3825107Seota * If this counter becomes the zero, drop TM_INVOKING
3835107Seota * to show there is no requests to do now.
3845107Seota */
3855107Seota req->cnt--;
3865107Seota if (req->cnt == 0) {
3875107Seota req->flags &= ~TM_INVOKING;
3885107Seota mutex_exit(&req->lock);
3895107Seota return;
3905107Seota }
3915107Seota mutex_exit(&req->lock);
3925107Seota }
3935107Seota }
3945107Seota
3955107Seota /*
3965107Seota * Timeout worker thread for processing task queue.
3975107Seota */
3985107Seota static void
timeout_taskq_thread(void * arg)3995107Seota timeout_taskq_thread(void *arg)
4005107Seota {
4015107Seota _NOTE(ARGUNUSED(arg));
4025107Seota tm_req_t *kern_req;
4035107Seota callb_cpr_t cprinfo;
4045107Seota
4055107Seota CALLB_CPR_INIT(&cprinfo, &disp_req_lock, callb_generic_cpr,
4065107Seota "timeout_taskq_thread");
4075107Seota
4085107Seota /*
4095107Seota * This thread is wakened up when a new request is added to
4105107Seota * the queue. Then pick up all requests and dispatch them
4115107Seota * via taskq_dispatch().
4125107Seota */
4135107Seota for (;;) {
4145107Seota /*
4155107Seota * Check the queue and pick up a request if the queue
4165107Seota * is not NULL.
4175107Seota */
4185107Seota mutex_enter(&disp_req_lock);
4195107Seota while ((kern_req = list_head(&kern_queue)) == NULL) {
4205107Seota CALLB_CPR_SAFE_BEGIN(&cprinfo);
4215107Seota cv_wait(&kern_cv, &disp_req_lock);
4225107Seota CALLB_CPR_SAFE_END(&cprinfo, &disp_req_lock);
4235107Seota }
4245107Seota list_remove(&kern_queue, kern_req);
4255107Seota mutex_exit(&disp_req_lock);
4265107Seota
4275107Seota /* Execute the timeout request via the taskq thread */
4285107Seota (void) taskq_dispatch(tm_taskq, timeout_execute,
4295107Seota (void *)kern_req, TQ_SLEEP);
4305107Seota }
4315107Seota }
4325107Seota
4335107Seota /*
4345107Seota * Dispatch the timeout request based on the level specified.
4355107Seota * If the level is equal to zero, notify the worker thread to
4365107Seota * call taskq_dispatch() in kernel context. If the level is bigger
4375107Seota * than zero, add a software interrupt request to the queue and raise
4385107Seota * the interrupt level to the specified one.
4395107Seota */
4405107Seota static void
timeout_dispatch(tm_req_t * req)4415107Seota timeout_dispatch(tm_req_t *req)
4425107Seota {
4435107Seota int level = req->level;
4445107Seota extern void sir_on(int);
4455107Seota
4465107Seota if (level == TM_IPL_0) {
4475107Seota /* Add a new request to the tail */
4485107Seota mutex_enter(&disp_req_lock);
4495107Seota list_insert_tail(&kern_queue, req);
4505107Seota mutex_exit(&disp_req_lock);
4515107Seota
4525107Seota /*
4535107Seota * notify the worker thread that this request
4545107Seota * is newly added to the queue.
4555107Seota * Note. this cv_signal() can be called after the
4565107Seota * mutex_lock.
4575107Seota */
4585107Seota cv_signal(&kern_cv);
4595107Seota } else {
4605107Seota /* Add a new request to the tail */
4615107Seota mutex_enter(&disp_req_lock);
4625107Seota list_insert_tail(&intr_queue, req);
4635107Seota
4645107Seota /* Issue the software interrupt */
4655107Seota if (intr_state & TM_INTR_START(level)) {
4665107Seota /*
4675107Seota * timer_softintr() is already running; no need to
4685107Seota * raise a siron. Due to lock protection of
4695107Seota * the intr_queue and intr_state, we know that
4705107Seota * timer_softintr() will see the new addition to
4715107Seota * the intr_queue.
4725107Seota */
4735107Seota mutex_exit(&disp_req_lock);
4745107Seota } else {
4755107Seota intr_state |= TM_INTR_SET(level);
4765107Seota mutex_exit(&disp_req_lock);
4775107Seota
4785107Seota /* Raise an interrupt to execute timeout requests */
4795107Seota sir_on(level);
4805107Seota }
4815107Seota }
4825107Seota }
4835107Seota
4845107Seota /*
4855107Seota * Check the software interrupt queue and invoke requests at the specified
4865107Seota * interrupt level.
4875107Seota * Note that the queue may change during call so that the disp_req_lock
4885107Seota * and the intr_state are used to protect it.
4895107Seota * The software interrupts supported here are up to the level 10. Higher
4905107Seota * than 10 interrupts cannot be supported.
4915107Seota */
4925107Seota void
timer_softintr(int level)4935107Seota timer_softintr(int level)
4945107Seota {
4955107Seota tm_req_t *intr_req;
4965107Seota ASSERT(level >= TM_IPL_1 && level <= TM_IPL_10);
4975107Seota
4985107Seota /* Check if we are asked to process the softcall list */
4995107Seota mutex_enter(&disp_req_lock);
5005107Seota if (!(intr_state & TM_INTR_SET(level))) {
5015107Seota mutex_exit(&disp_req_lock);
5025107Seota return;
5035107Seota }
5045107Seota
5055107Seota /* Notify this software interrupt request will be executed soon */
5065107Seota intr_state |= TM_INTR_START(level);
5075107Seota intr_state &= ~TM_INTR_SET(level);
5085107Seota
5095107Seota /* loop the link until there is no requests */
5105107Seota for (intr_req = list_head(&intr_queue); intr_req != NULL;
5115107Seota /* Nothing */) {
5125107Seota
5135107Seota /* Check the interrupt level */
5145107Seota if (intr_req->level != level) {
5155107Seota intr_req = list_next(&intr_queue, intr_req);
5165107Seota continue;
5175107Seota }
5185107Seota list_remove(&intr_queue, intr_req);
5195107Seota mutex_exit(&disp_req_lock);
5205107Seota
5215107Seota /* Execute the software interrupt request */
5225107Seota timeout_execute(intr_req);
5235107Seota
5245107Seota mutex_enter(&disp_req_lock);
5255107Seota /* Restart the loop since new requests might be added */
5265107Seota intr_req = list_head(&intr_queue);
5275107Seota }
5285107Seota
5295107Seota /* reset the interrupt state */
5305107Seota intr_state &= ~TM_INTR_START(level);
5315107Seota mutex_exit(&disp_req_lock);
5325107Seota }
5335107Seota
5345107Seota /*
5355107Seota * void
5365107Seota * cyclic_timer(void)
5375107Seota *
5385107Seota * Overview
5395107Seota * cyclic_timer() is a function invoked periodically by the cyclic
5405107Seota * subsystem.
5415107Seota *
5425107Seota * The function calls timeout_invoke() with timeout requests whose
5435107Seota * expiration time is already reached.
5445107Seota *
5455107Seota * Arguments
5465107Seota * Nothing
5475107Seota *
5485107Seota * Return value
5495107Seota * Nothing
5505107Seota */
5515107Seota void
cyclic_timer(void)5525107Seota cyclic_timer(void)
5535107Seota {
5545107Seota tm_req_t *req;
5555107Seota timer_tw_t *tw;
5565107Seota hrtime_t curr_tick, curr;
5575107Seota
5585107Seota /* If the system is suspended, just return */
5595107Seota if (timer_suspended)
5605107Seota return;
5615107Seota
5625107Seota /* Get the current time */
5635107Seota timer_hrtime = ddi_timer->tick_time = curr = gethrtime();
5645107Seota curr_tick = tw_tick(ddi_timer->tick_time);
5655107Seota
5665107Seota restart:
5675107Seota /*
5685107Seota * Check the timer cogs to see if there are timeout requests
5695107Seota * who reach the expiration time. Call timeout_invoke() to execute
5705107Seota * the requests, then.
5715107Seota */
5725107Seota while (curr_tick >= ddi_timer->tick) {
5735107Seota tm_req_t *next;
5745107Seota tw = &ddi_timer->exhash[TM_HASH(ddi_timer->tick)];
5755107Seota mutex_enter(&tw->lock);
5765107Seota for (req = list_head(&tw->req); req != NULL; req = next) {
5775107Seota next = list_next(&tw->req, req);
5785107Seota /*
5795107Seota * If this request is already obsolete, free
5805107Seota * it here.
5815107Seota */
5825107Seota if (req->flags & TM_UTMCOMP) {
5835107Seota /*
5845107Seota * Remove this request from the timer,
5855107Seota * then free it.
5865107Seota */
5875107Seota list_remove(&tw->req, req);
5885107Seota kmem_cache_free(req_cache, req);
5895107Seota } else if (curr >= req->exp_time) {
5905107Seota mutex_enter(&req->lock);
5915107Seota /*
5925107Seota * Check if this request is canceled, but not
5935107Seota * being executed now.
5945107Seota */
5955107Seota if (req->flags & TM_CANCEL &&
5965107Seota !(req->flags & TM_INVOKING)) {
5975107Seota mutex_exit(&req->lock);
5985107Seota continue;
5995107Seota }
6005107Seota /*
6015107Seota * Record how many times timeout_execute()
6025107Seota * must be invoked.
6035107Seota */
6045107Seota req->cnt++;
6055107Seota /*
6065107Seota * Invoke timeout_execute() via taskq or
6075107Seota * software interrupt.
6085107Seota */
6095107Seota if (req->flags & TM_INVOKING) {
6105107Seota /*
6115107Seota * If it's already invoked,
6125107Seota * There is nothing to do.
6135107Seota */
6145107Seota mutex_exit(&req->lock);
6155107Seota } else {
6165107Seota req->flags |= TM_INVOKING;
6175107Seota mutex_exit(&req->lock);
6185107Seota /*
6195107Seota * Dispatch this timeout request.
6205107Seota * timeout_dispatch() chooses either
6215107Seota * a software interrupt or taskq thread
6225107Seota * based on the level.
6235107Seota */
6245107Seota timeout_dispatch(req);
6255107Seota }
6265107Seota /*
6275107Seota * Periodic timeout requests must prepare for
6285107Seota * the next fire.
6295107Seota */
6305107Seota transfer_req(req, tw);
6315107Seota }
6325107Seota }
6335107Seota mutex_exit(&tw->lock);
6345107Seota ddi_timer->tick++;
6355107Seota }
6365107Seota
6375107Seota /*
6385107Seota * Check the current time. If we spend some amount of time,
6395107Seota * double-check if some of the requests reaches the expiration
6405107Seota * time during the work.
6415107Seota */
6425107Seota curr = gethrtime();
6435107Seota curr_tick = tw_tick(curr);
6445107Seota if (curr_tick >= ddi_timer->tick) {
6455107Seota ddi_timer->tick -= 1;
6465107Seota goto restart;
6475107Seota }
6485107Seota /* Adjustment for the next rolling */
6495107Seota ddi_timer->tick -= 1;
6505107Seota }
6515107Seota
6525107Seota /*
6535107Seota * void
6545107Seota * timer_init(void)
6555107Seota *
6565107Seota * Overview
6575107Seota * timer_init() allocates the internal data structures used by
6585107Seota * i_timeout(), i_untimeout() and the timer.
6595107Seota *
6605107Seota * Arguments
6615107Seota * Nothing
6625107Seota *
6635107Seota * Return value
6645107Seota * Nothing
6655107Seota *
6665107Seota * Caller's context
6675107Seota * timer_init() can be called in kernel context only.
6685107Seota */
6695107Seota void
timer_init(void)6705107Seota timer_init(void)
6715107Seota {
6725107Seota int i;
6735107Seota
6745107Seota /* Create kmem_cache for timeout requests */
6755107Seota req_cache = kmem_cache_create("timeout_request", sizeof (tm_req_t),
6765107Seota 0, NULL, NULL, NULL, NULL, NULL, 0);
6775107Seota
6785107Seota /* Initialize the timer which is invoked by the cyclic subsystem */
6795107Seota ddi_timer = kmem_alloc(sizeof (cyc_timer_t), KM_SLEEP);
6805107Seota ddi_timer->res = nsec_per_tick;
6815107Seota ddi_timer->tick = tw_tick(gethrtime());
6825107Seota ddi_timer->tick_time = 0;
6835107Seota
6845107Seota /* Initialize the timing wheel */
6855107Seota bzero((char *)&ddi_timer->idhash[0], TM_HASH_SZ * sizeof (timer_tw_t));
6865107Seota bzero((char *)&ddi_timer->exhash[0], TM_HASH_SZ * sizeof (timer_tw_t));
6875107Seota
6885107Seota for (i = 0; i < TM_HASH_SZ; i++) {
6895107Seota list_create(&ddi_timer->idhash[i].req, sizeof (tm_req_t),
6905107Seota offsetof(tm_req_t, id_req));
6915107Seota mutex_init(&ddi_timer->idhash[i].lock, NULL, MUTEX_ADAPTIVE,
6925107Seota NULL);
6935107Seota
6945107Seota list_create(&ddi_timer->exhash[i].req, sizeof (tm_req_t),
6955107Seota offsetof(tm_req_t, ex_req));
6965107Seota mutex_init(&ddi_timer->exhash[i].lock, NULL, MUTEX_ADAPTIVE,
6975107Seota NULL);
6985107Seota }
6995107Seota
7005107Seota /* Create a taskq thread pool */
7015107Seota tm_taskq = taskq_create_instance("timeout_taskq", 0,
7025107Seota timer_taskq_num, MAXCLSYSPRI,
7035265Seota timer_taskq_min_num, timer_taskq_max_num,
7045107Seota TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
7055107Seota
7065107Seota /*
7075107Seota * Initialize the taskq queue which is dedicated to this timeout
7085107Seota * interface/timer.
7095107Seota */
7105107Seota list_create(&kern_queue, sizeof (tm_req_t),
7115107Seota offsetof(tm_req_t, disp_req));
7125107Seota
7135107Seota /* Create a worker thread to dispatch the taskq thread */
7145107Seota tm_work_thread = thread_create(NULL, 0, timeout_taskq_thread, NULL,
7155107Seota 0, &p0, TS_RUN, MAXCLSYSPRI);
7165107Seota
7175107Seota /*
7185107Seota * Initialize the software interrupt queue which is dedicated to
7195107Seota * this timeout interface/timer.
7205107Seota */
7215107Seota list_create(&intr_queue, sizeof (tm_req_t),
7225107Seota offsetof(tm_req_t, disp_req));
7235107Seota
7245107Seota /*
7255107Seota * Initialize the mutex lock used for both of kern_queue and
7265107Seota * intr_queue.
7275107Seota */
7285107Seota mutex_init(&disp_req_lock, NULL, MUTEX_ADAPTIVE, NULL);
7295107Seota cv_init(&kern_cv, NULL, CV_DEFAULT, NULL);
7305107Seota
7315107Seota /* Register the callback handler for the system suspend/resume */
7325107Seota (void) callb_add(timer_cpr_callb, 0, CB_CL_CPR_CALLOUT, "cyclicTimer");
7335107Seota }
7345107Seota
7355107Seota /*
7365107Seota * timeout_t
7375107Seota * i_timeout(void (*func)(void *), void *arg, hrtime_t interval,
7385107Seota * int level, int flags)
7395107Seota *
7405107Seota * Overview
7415107Seota * i_timeout() is an internal function scheduling the passed function
7425107Seota * to be invoked in the interval in nanoseconds. The callback function
7435107Seota * keeps invoked until the request is explicitly canceled by i_untimeout().
7445107Seota * This function is used for ddi_periodic_add(9F).
7455107Seota *
7465107Seota * Arguments
7475107Seota *
7485107Seota * func: the callback function
7495107Seota * the callback function will be invoked in kernel context if
7505107Seota * the level passed is the zero. Otherwise be invoked in interrupt
7515107Seota * context at the specified level by the argument "level".
7525107Seota *
7535107Seota * Note that It's guaranteed by the cyclic subsystem that the
7545107Seota * function is invoked on the only one CPU and is never executed
7555107Seota * simultaneously even on MP system.
7565107Seota *
7575107Seota * arg: the argument passed to the callback function
7585107Seota *
7595107Seota * interval: interval time in nanoseconds
7605107Seota * if the interval is the zero, the timer resolution is used.
7615107Seota *
7625107Seota * level : callback interrupt level
7635107Seota * If the value is 0 (the zero), the callback function is invoked
7645107Seota * in kernel context. If the value is more than 0 (the zero), but
7655107Seota * less than or equal to 10, the callback function is invoked in
7665107Seota * interrupt context at the specified interrupt level.
7675107Seota * This value must be in range of 0-10.
7685107Seota *
7695107Seota * Return value
7705107Seota * returns a non-zero opaque value (timeout_t) on success.
7715107Seota *
7725107Seota * Caller's context
773*9314SEiji.Ota@Sun.COM * i_timeout() can be called in user or kernel context.
7745107Seota */
7755107Seota timeout_t
i_timeout(void (* func)(void *),void * arg,hrtime_t interval,int level)7765107Seota i_timeout(void (*func)(void *), void *arg, hrtime_t interval, int level)
7775107Seota {
7785107Seota hrtime_t start_time = gethrtime(), res;
7795107Seota tm_req_t *req = NULL;
7805107Seota
7815107Seota /* Allocate and initialize the timeout request */
7825107Seota req = kmem_cache_alloc(req_cache, KM_SLEEP);
7835107Seota req->handler = func;
7845107Seota req->arg = arg;
7855107Seota req->h_thread = NULL;
7865107Seota req->level = level;
7875107Seota req->flags = 0;
7885107Seota req->cnt = 0;
7895107Seota mutex_init(&req->lock, NULL, MUTEX_ADAPTIVE, NULL);
7905107Seota cv_init(&req->cv, NULL, CV_DEFAULT, NULL);
7915107Seota
7925107Seota /*
7935107Seota * The resolution must be finer than or equal to
7945107Seota * the requested interval. If it's not, set the resolution
7955107Seota * to the interval.
7965107Seota * Note. There is a restriction currently. Regardless of the
7975107Seota * clock resolution used here, 10ms is set as the timer resolution.
7985107Seota * Even on the 1ms resolution timer, the minimum interval is 10ms.
7995107Seota */
8005107Seota if ((res = i_get_res()) > interval) {
8015107Seota uintptr_t pc = (uintptr_t)req->handler;
8025107Seota ulong_t off;
8035107Seota cmn_err(CE_WARN,
8045107Seota "The periodic timeout (handler=%s, interval=%lld) "
8055107Seota "requests a finer interval than the supported resolution. "
8065107Seota "It rounds up to %lld\n", kobj_getsymname(pc, &off),
8075107Seota interval, res);
8085107Seota interval = res;
8095107Seota }
8105107Seota
8115107Seota /*
8125107Seota * If the specified interval is already multiples of
8135107Seota * the resolution, use it as is. Otherwise, it rounds
8145107Seota * up to multiples of the timer resolution.
8155107Seota */
8165107Seota req->interval = roundup(interval, i_get_res());
8175107Seota
8185107Seota /*
8195107Seota * For the periodic timeout requests, the first expiration time will
8205107Seota * be adjusted to the timer tick edge to take advantage of the cyclic
8215107Seota * subsystem. In that case, the first fire is likely not an expected
8225107Seota * one, but the fires later can be more accurate due to this.
8235107Seota */
8245107Seota req->exp_time = roundup(start_time + req->interval, i_get_res());
8255107Seota
8265107Seota /* Add the request to the timer */
8275107Seota return (add_req(req));
8285107Seota }
8295107Seota
8305107Seota /*
8315107Seota * void
8325107Seota * i_untimeout(timeout_t req)
8335107Seota *
8345107Seota * Overview
8355107Seota * i_untimeout() is an internal function canceling the i_timeout()
8365107Seota * request previously issued.
8375107Seota * This function is used for ddi_periodic_delete(9F).
8385107Seota *
8395107Seota * Argument
8405107Seota * req: timeout_t opaque value i_timeout() returned previously.
8415107Seota *
8425107Seota * Return value
8435107Seota * Nothing.
8445107Seota *
8455107Seota * Caller's context
8465107Seota * i_untimeout() can be called in user, kernel or interrupt context.
8475107Seota * It cannot be called in high interrupt context.
8485107Seota *
8495107Seota * Note. This function is used by ddi_periodic_delete(), which cannot
8505107Seota * be called in interrupt context. As a result, this function is called
851*9314SEiji.Ota@Sun.COM * in user or kernel context only in practice.
8525107Seota */
8535107Seota void
i_untimeout(timeout_t timeout_req)8545107Seota i_untimeout(timeout_t timeout_req)
8555107Seota {
8565107Seota timer_tw_t *tid;
8575107Seota tm_req_t *req;
8585107Seota timeout_t id;
8595107Seota
8605107Seota /* Retrieve the id for this timeout request */
8615107Seota id = (timeout_t)timeout_req;
8625107Seota tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)];
8635107Seota
8645107Seota mutex_enter(&tid->lock);
8655107Seota for (req = list_head(&tid->req); req != NULL;
8665107Seota req = list_next(&tid->req, req)) {
8675107Seota if (req->id == id)
8685107Seota break;
8695107Seota }
8705107Seota if (req == NULL) {
8715107Seota /* There is no requests with this id after all */
8725107Seota mutex_exit(&tid->lock);
8735107Seota return;
8745107Seota }
8755107Seota mutex_enter(&req->lock);
8765107Seota
8775107Seota /* Unregister this request first */
8785107Seota list_remove(&tid->req, req);
8795107Seota
8805107Seota /* Notify that this request is canceled */
8815107Seota req->flags |= TM_CANCEL;
8825107Seota
8835107Seota /* Check if the handler is invoked */
8845107Seota if (req->flags & TM_INVOKING) {
8855107Seota /*
886*9314SEiji.Ota@Sun.COM * This request will be removed by timeout_execute() later,
887*9314SEiji.Ota@Sun.COM * so that there is no extra thing to do any more.
8885107Seota */
889*9314SEiji.Ota@Sun.COM mutex_exit(&req->lock);
8905107Seota mutex_exit(&tid->lock);
8915107Seota return;
8925107Seota }
8935107Seota mutex_exit(&req->lock);
8945107Seota mutex_exit(&tid->lock);
8955107Seota
8965107Seota /*
8975107Seota * Notify untimeout() is about to be finished, and this request
8985107Seota * can be freed.
8995107Seota */
9005107Seota atomic_or_uint(&req->flags, TM_UTMCOMP);
9015107Seota }
902