15107Seota /* 25107Seota * CDDL HEADER START 35107Seota * 45107Seota * The contents of this file are subject to the terms of the 55107Seota * Common Development and Distribution License (the "License"). 65107Seota * You may not use this file except in compliance with the License. 75107Seota * 85107Seota * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 95107Seota * or http://www.opensolaris.org/os/licensing. 105107Seota * See the License for the specific language governing permissions 115107Seota * and limitations under the License. 125107Seota * 135107Seota * When distributing Covered Code, include this CDDL HEADER in each 145107Seota * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 155107Seota * If applicable, add the following below this CDDL HEADER, with the 165107Seota * fields enclosed by brackets "[]" replaced with your own identifying 175107Seota * information: Portions Copyright [yyyy] [name of copyright owner] 185107Seota * 195107Seota * CDDL HEADER END 205107Seota */ 215107Seota 225107Seota /* 235107Seota * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 245107Seota * Use is subject to license terms. 255107Seota */ 265107Seota 275107Seota #pragma ident "%Z%%M% %I% %E% SMI" 285107Seota 295107Seota #include <sys/atomic.h> 305107Seota #include <sys/callb.h> 315107Seota #include <sys/conf.h> 325107Seota #include <sys/cmn_err.h> 335107Seota #include <sys/taskq.h> 345107Seota #include <sys/dditypes.h> 355107Seota #include <sys/ddi_timer.h> 365107Seota #include <sys/disp.h> 375107Seota #include <sys/kobj.h> 385107Seota #include <sys/note.h> 395107Seota #include <sys/param.h> 405107Seota #include <sys/sysmacros.h> 415107Seota #include <sys/systm.h> 425107Seota #include <sys/time.h> 435107Seota #include <sys/types.h> 445107Seota 455107Seota /* 465107Seota * global variables for timeout request 475107Seota */ 485107Seota static kmem_cache_t *req_cache; /* kmem cache for timeout request */ 495107Seota 505107Seota /* 51*5343Seota * taskq parameters for cyclic_timer 52*5343Seota * 53*5343Seota * timer_taskq_num: 54*5343Seota * timer_taskq_num represents the number of taskq threads. 55*5343Seota * Currently 4 threads are pooled to handle periodic timeout requests. 56*5343Seota * This number is chosen based on the fact that the callout (one-time 57*5343Seota * timeout framework) uses 8 threads with TQ_NOSLEEP; the periodic timeout 58*5343Seota * calls taskq_dispatch() with TQ_SLEEP instead, and in this case, 4 threads 59*5343Seota * should be sufficient to handle periodic timeout requests. (see also 60*5343Seota * timer_taskq_max_num below) 61*5343Seota * 62*5343Seota * timer_taskq_min_num: 63*5343Seota * timer_taskq_min_num represents the number of pre-populated taskq_ent 64*5343Seota * structures, and this variable holds the same value as timer_taskq_num does. 65*5343Seota * 66*5343Seota * timer_taskq_max_num: 67*5343Seota * Since TQ_SLEEP is set when taskq_dispatch() is called, the framework waits 68*5343Seota * for one second if more taskq_ent structures than timer_taskq_max_num are 69*5343Seota * required. However, from the timeout point of view, one second is much longer 70*5343Seota * than expected, and to prevent this occurrence, timer_taskq_max_num should 71*5343Seota * hold a sufficiently-large value, which is 128 here. Note that since the size 72*5343Seota * of taskq_ent_t is relatively small, this doesn't use up the resource so much. 73*5343Seota * (Currently the size is less than 8k at most) 74*5343Seota * 75*5343Seota * About the detailed explanation of the taskq function arguments, please see 76*5343Seota * usr/src/uts/common/os/taskq.c. 775107Seota */ 78*5343Seota int timer_taskq_num = 4; /* taskq thread number */ 79*5343Seota int timer_taskq_min_num = 4; /* min. number of taskq_ent structs */ 80*5343Seota int timer_taskq_max_num = 128; /* max. number of taskq_ent structs */ 815107Seota static taskq_t *tm_taskq; /* taskq thread pool */ 825107Seota static kthread_t *tm_work_thread; /* work thread invoking taskq */ 835107Seota 845107Seota /* 855107Seota * timer variables 865107Seota */ 875107Seota static cyc_timer_t *ddi_timer; /* ddi timer based on the cyclic */ 885107Seota static volatile hrtime_t timer_hrtime; /* current tick time on the timer */ 895107Seota 905107Seota /* 915107Seota * Variable used for the suspend/resume. 925107Seota */ 935107Seota static volatile boolean_t timer_suspended; 945107Seota 955107Seota /* 965107Seota * Kernel taskq queue to ddi timer 975107Seota */ 985107Seota static list_t kern_queue; /* kernel thread request queue */ 995107Seota static kcondvar_t kern_cv; /* condition variable for taskq queue */ 1005107Seota 1015107Seota /* 1025107Seota * Software interrupt queue dedicated to ddi timer 1035107Seota */ 1045107Seota static list_t intr_queue; /* software interrupt request queue */ 1055107Seota static uint_t intr_state; /* software interrupt state */ 1065107Seota 1075107Seota /* 1085107Seota * This lock is used to protect the intr_queue and kern_queue. 1095107Seota * It's also used to protect the intr_state which represents the software 1105107Seota * interrupt state for the timer. 1115107Seota */ 1125107Seota static kmutex_t disp_req_lock; 1135107Seota 1145107Seota /* 1155107Seota * the periodic timer interrupt priority level 1165107Seota */ 1175107Seota enum { 1185107Seota TM_IPL_0 = 0, /* kernel context */ 1195107Seota TM_IPL_1, TM_IPL_2, TM_IPL_3, /* level 1-3 */ 1205107Seota TM_IPL_4, TM_IPL_5, TM_IPL_6, /* level 4-6 */ 1215107Seota TM_IPL_7, TM_IPL_8, TM_IPL_9, /* level 7-9 */ 1225107Seota TM_IPL_10 /* level 10 */ 1235107Seota }; 1245107Seota 1255107Seota /* 1265107Seota * A callback handler used by CPR to stop and resume callouts. 1275107Seota * Since the taskq uses TASKQ_CPR_SAFE, the function just set the boolean 1285107Seota * flag to timer_suspended here. 1295107Seota */ 1305107Seota /*ARGSUSED*/ 1315107Seota static boolean_t 1325107Seota timer_cpr_callb(void *arg, int code) 1335107Seota { 1345107Seota timer_suspended = (code == CB_CODE_CPR_CHKPT); 1355107Seota return (B_TRUE); 1365107Seota } 1375107Seota 1385107Seota /* 1395107Seota * Return a proposed timeout request id. add_req() determines whether 1405107Seota * or not the proposed one is used. If it's not suitable, add_req() 1415107Seota * recalls get_req_cnt(). To reduce the lock contention between the 1425107Seota * timer and i_untimeout(), the atomic instruction should be used here. 1435107Seota */ 1445107Seota static timeout_t 1455107Seota get_req_cnt(void) 1465107Seota { 1475107Seota static volatile ulong_t timeout_cnt = 0; 1485107Seota return ((timeout_t)atomic_inc_ulong_nv(&timeout_cnt)); 1495107Seota } 1505107Seota 1515107Seota /* 1525107Seota * Get the system resolution. 1535107Seota * Note. currently there is a restriction about the system resolution, and 1545107Seota * the 10ms tick (the default clock resolution) is only supported now. 1555107Seota */ 1565107Seota static hrtime_t 1575107Seota i_get_res(void) 1585107Seota { 1595107Seota return ((hrtime_t)10000000); /* 10ms tick only */ 1605107Seota } 1615107Seota 1625107Seota /* 1635107Seota * Return the value for the cog of the timing wheel. 1645107Seota * TICK_FACTOR is used to gain a finer cog on the clock resolution. 1655107Seota */ 1665107Seota static hrtime_t 1675107Seota tw_tick(hrtime_t time) 1685107Seota { 1695107Seota return ((time << TICK_FACTOR) / ddi_timer->res); 1705107Seota } 1715107Seota 1725107Seota /* 1735107Seota * Calculate the expiration time for the timeout request. 1745107Seota */ 1755107Seota static hrtime_t 1765107Seota expire_tick(tm_req_t *req) 1775107Seota { 1785107Seota return (tw_tick(req->exp_time)); 1795107Seota } 1805107Seota 1815107Seota /* 1825107Seota * Register a timeout request to the timer. This function is used 1835107Seota * in i_timeout(). 1845107Seota */ 1855107Seota static timeout_t 1865107Seota add_req(tm_req_t *req) 1875107Seota { 1885107Seota timer_tw_t *tid, *tw; 1895107Seota tm_req_t *next; 1905107Seota timeout_t id; 1915107Seota 1925107Seota retry: 1935107Seota /* 1945107Seota * Retrieve a timeout request id. Since i_timeout() needs to return 1955107Seota * a non-zero value, re-try if the zero is gotten. 1965107Seota */ 1975107Seota if ((id = get_req_cnt()) == 0) 1985107Seota id = get_req_cnt(); 1995107Seota 2005107Seota /* 2015107Seota * Check if the id is not used yet. Since the framework now deals 2025107Seota * with the periodic timeout requests, we cannot assume the id 2035107Seota * allocated (long) before doesn't exist any more when it will 2045107Seota * be re-assigned again (especially on 32bit) but need to handle 2055107Seota * this case to solve the conflicts. If it's used already, retry 2065107Seota * another. 2075107Seota */ 2085107Seota tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)]; 2095107Seota mutex_enter(&tid->lock); 2105107Seota for (next = list_head(&tid->req); next != NULL; 2115107Seota next = list_next(&tid->req, next)) { 2125107Seota if (next->id == id) { 2135107Seota mutex_exit(&tid->lock); 2145107Seota goto retry; 2155107Seota } 2165107Seota } 2175107Seota /* Nobody uses this id yet */ 2185107Seota req->id = id; 2195107Seota 2205107Seota /* 2215107Seota * Register this request to the timer. 2225107Seota * The list operation must be list_insert_head(). 2235107Seota * Other operations can degrade performance. 2245107Seota */ 2255107Seota list_insert_head(&tid->req, req); 2265107Seota mutex_exit(&tid->lock); 2275107Seota 2285107Seota tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))]; 2295107Seota mutex_enter(&tw->lock); 2305107Seota /* 2315107Seota * Other operations than list_insert_head() can 2325107Seota * degrade performance here. 2335107Seota */ 2345107Seota list_insert_head(&tw->req, req); 2355107Seota mutex_exit(&tw->lock); 2365107Seota 2375107Seota return (id); 2385107Seota } 2395107Seota 2405107Seota /* 2415107Seota * Periodic timeout requests cannot be removed until they are canceled 2425107Seota * explicitly. Until then, they need to be re-registerd after they are 2435107Seota * fired. transfer_req() re-registers the requests for the next fires. 2445107Seota * Note. transfer_req() sends the cv_signal to timeout_execute(), which 2455107Seota * runs in interrupt context. Make sure this function will not be blocked, 2465107Seota * otherwise the deadlock situation can occur. 2475107Seota */ 2485107Seota static void 2495107Seota transfer_req(tm_req_t *req, timer_tw_t *tw) 2505107Seota { 2515107Seota timer_tw_t *new_tw; 2525107Seota hrtime_t curr_time; 2535107Seota ASSERT(tw && MUTEX_HELD(&tw->lock)); 2545107Seota 2555107Seota /* Calculate the next expiration time by interval */ 2565107Seota req->exp_time += req->interval; 2575107Seota curr_time = gethrtime(); 2585107Seota 2595107Seota /* 2605107Seota * If a long time (more than 1 clock resolution) has already 2615107Seota * passed for some reason (e.g. debugger or high interrupt), 2625107Seota * round up the next expiration to the appropriate one 2635107Seota * since this request is periodic and never catches with it. 2645107Seota */ 2655107Seota if (curr_time - req->exp_time >= ddi_timer->res) { 2665107Seota req->exp_time = roundup(curr_time + req->interval, 2675107Seota ddi_timer->res); 2685107Seota } 2695107Seota 2705107Seota /* 2715107Seota * Re-register this request. 2725107Seota * Note. since it is guaranteed that the timer is invoked on only 2735107Seota * one CPU at any time (by the cyclic subsystem), a deadlock 2745107Seota * cannot occur regardless of the lock order here. 2755107Seota */ 2765107Seota new_tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))]; 2775107Seota 2785107Seota /* 2795107Seota * If it's on the timer cog already, there is nothing 2805107Seota * to do. Just return. 2815107Seota */ 2825107Seota if (new_tw == tw) 2835107Seota return; 2845107Seota 2855107Seota /* Remove this request from the timer */ 2865107Seota list_remove(&tw->req, req); 2875107Seota 2885107Seota /* Re-register this request to the timer */ 2895107Seota mutex_enter(&new_tw->lock); 2905107Seota 2915107Seota /* 2925107Seota * Other operations than list_insert_head() can 2935107Seota * degrade performance here. 2945107Seota */ 2955107Seota list_insert_head(&new_tw->req, req); 2965107Seota mutex_exit(&new_tw->lock); 2975107Seota 2985107Seota /* 2995107Seota * Set the TM_TRANSFER flag and notify the request is transfered 3005107Seota * completely. This prevents a race in the case that this request 3015107Seota * is serviced on another CPU already. 3025107Seota */ 3035107Seota mutex_enter(&req->lock); 3045107Seota req->flags |= TM_TRANSFER; 3055107Seota cv_signal(&req->cv); 3065107Seota mutex_exit(&req->lock); 3075107Seota } 3085107Seota 3095107Seota /* 3105107Seota * Execute timeout requests. 3115107Seota * Note. since timeout_execute() can run in interrupt context and block 3125107Seota * on condition variables, there are restrictions on the timer code that 3135107Seota * signals these condition variables (see i_untimeout(), transfer_req(), 3145107Seota * and condvar(9F)). Functions that signal these cvs must ensure that 3155107Seota * they will not be blocked (for memory allocations or any other reason) 3165107Seota * since condition variables don't support priority inheritance. 3175107Seota */ 3185107Seota static void 3195107Seota timeout_execute(void *arg) 3205107Seota { 3215107Seota tm_req_t *req = (tm_req_t *)arg; 3225107Seota ASSERT(req->flags & TM_INVOKING && !(req->flags & TM_EXECUTING)); 3235107Seota 3245107Seota for (;;) { 3255107Seota /* 3265107Seota * Check if this request is canceled. If it's canceled, do not 3275107Seota * execute this request. 3285107Seota */ 3295107Seota mutex_enter(&req->lock); 3305107Seota if (!(req->flags & TM_CANCEL)) { 3315107Seota /* 3325107Seota * Set the current thread to prevent a dead lock 3335107Seota * situation in case that this timeout request is 3345107Seota * canceled in the handler being invoked now. 3355107Seota * (this doesn't violate the spec) Set TM_EXECUTING 3365107Seota * to show this handler is invoked soon. 3375107Seota */ 3385107Seota req->h_thread = curthread; 3395107Seota req->flags |= TM_EXECUTING; 3405107Seota mutex_exit(&req->lock); 3415107Seota 3425107Seota /* The handler is invoked without holding any locks */ 3435107Seota (*req->handler)(req->arg); 3445107Seota 3455107Seota /* 3465107Seota * Set TM_COMPLETE and notify the request is complete 3475107Seota * now. 3485107Seota */ 3495107Seota mutex_enter(&req->lock); 3505107Seota req->flags |= TM_COMPLETE; 3515107Seota if (req->flags & TM_COMPWAIT) 3525107Seota cv_signal(&req->cv); 3535107Seota } 3545107Seota 3555107Seota /* 3565107Seota * The handler is invoked at this point. If this request 3575107Seota * is not canceled, prepare for the next fire. 3585107Seota */ 3595107Seota if (req->flags & TM_CANCEL) { 3605107Seota timer_tw_t *tw; 3615107Seota /* 3625107Seota * Wait until the timer finishes all things for 3635107Seota * this request. 3645107Seota */ 3655107Seota while (!(req->flags & TM_TRANSFER)) 3665107Seota cv_wait(&req->cv, &req->lock); 3675107Seota mutex_exit(&req->lock); 3685107Seota ASSERT(req->flags & TM_TRANSFER); 3695107Seota 3705107Seota /* Remove this request from the timer */ 3715107Seota tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))]; 3725107Seota mutex_enter(&tw->lock); 3735107Seota list_remove(&tw->req, req); 3745107Seota mutex_exit(&tw->lock); 3755107Seota 3765107Seota /* 3775107Seota * Wait until i_untimeout() can go ahead. 3785107Seota * This prevents the request from being freed before 3795107Seota * i_untimeout() is complete. 3805107Seota */ 3815107Seota mutex_enter(&req->lock); 3825107Seota while (req->flags & TM_COMPWAIT) 3835107Seota cv_wait(&req->cv, &req->lock); 3845107Seota mutex_exit(&req->lock); 3855107Seota ASSERT(!(req->flags & TM_COMPWAIT)); 3865107Seota 3875107Seota /* Free this request */ 3885107Seota kmem_cache_free(req_cache, req); 3895107Seota return; 3905107Seota } 3915107Seota ASSERT(req->flags & TM_EXECUTING); 3925107Seota 3935107Seota /* 3945107Seota * TM_EXECUTING must be set at this point. 3955107Seota * Unset the flag. 3965107Seota */ 3975107Seota req->flags &= ~(TM_EXECUTING | TM_TRANSFER); 3985107Seota 3995107Seota /* 4005107Seota * Decrease the request cnt. The reqest cnt shows 4015107Seota * how many times this request is executed now. 4025107Seota * If this counter becomes the zero, drop TM_INVOKING 4035107Seota * to show there is no requests to do now. 4045107Seota */ 4055107Seota req->cnt--; 4065107Seota if (req->cnt == 0) { 4075107Seota req->flags &= ~TM_INVOKING; 4085107Seota mutex_exit(&req->lock); 4095107Seota return; 4105107Seota } 4115107Seota mutex_exit(&req->lock); 4125107Seota } 4135107Seota } 4145107Seota 4155107Seota /* 4165107Seota * Timeout worker thread for processing task queue. 4175107Seota */ 4185107Seota static void 4195107Seota timeout_taskq_thread(void *arg) 4205107Seota { 4215107Seota _NOTE(ARGUNUSED(arg)); 4225107Seota tm_req_t *kern_req; 4235107Seota callb_cpr_t cprinfo; 4245107Seota 4255107Seota CALLB_CPR_INIT(&cprinfo, &disp_req_lock, callb_generic_cpr, 4265107Seota "timeout_taskq_thread"); 4275107Seota 4285107Seota /* 4295107Seota * This thread is wakened up when a new request is added to 4305107Seota * the queue. Then pick up all requests and dispatch them 4315107Seota * via taskq_dispatch(). 4325107Seota */ 4335107Seota for (;;) { 4345107Seota /* 4355107Seota * Check the queue and pick up a request if the queue 4365107Seota * is not NULL. 4375107Seota */ 4385107Seota mutex_enter(&disp_req_lock); 4395107Seota while ((kern_req = list_head(&kern_queue)) == NULL) { 4405107Seota CALLB_CPR_SAFE_BEGIN(&cprinfo); 4415107Seota cv_wait(&kern_cv, &disp_req_lock); 4425107Seota CALLB_CPR_SAFE_END(&cprinfo, &disp_req_lock); 4435107Seota } 4445107Seota list_remove(&kern_queue, kern_req); 4455107Seota mutex_exit(&disp_req_lock); 4465107Seota 4475107Seota /* Execute the timeout request via the taskq thread */ 4485107Seota (void) taskq_dispatch(tm_taskq, timeout_execute, 4495107Seota (void *)kern_req, TQ_SLEEP); 4505107Seota } 4515107Seota } 4525107Seota 4535107Seota /* 4545107Seota * Dispatch the timeout request based on the level specified. 4555107Seota * If the level is equal to zero, notify the worker thread to 4565107Seota * call taskq_dispatch() in kernel context. If the level is bigger 4575107Seota * than zero, add a software interrupt request to the queue and raise 4585107Seota * the interrupt level to the specified one. 4595107Seota */ 4605107Seota static void 4615107Seota timeout_dispatch(tm_req_t *req) 4625107Seota { 4635107Seota int level = req->level; 4645107Seota extern void sir_on(int); 4655107Seota 4665107Seota if (level == TM_IPL_0) { 4675107Seota /* Add a new request to the tail */ 4685107Seota mutex_enter(&disp_req_lock); 4695107Seota list_insert_tail(&kern_queue, req); 4705107Seota mutex_exit(&disp_req_lock); 4715107Seota 4725107Seota /* 4735107Seota * notify the worker thread that this request 4745107Seota * is newly added to the queue. 4755107Seota * Note. this cv_signal() can be called after the 4765107Seota * mutex_lock. 4775107Seota */ 4785107Seota cv_signal(&kern_cv); 4795107Seota } else { 4805107Seota /* Add a new request to the tail */ 4815107Seota mutex_enter(&disp_req_lock); 4825107Seota list_insert_tail(&intr_queue, req); 4835107Seota 4845107Seota /* Issue the software interrupt */ 4855107Seota if (intr_state & TM_INTR_START(level)) { 4865107Seota /* 4875107Seota * timer_softintr() is already running; no need to 4885107Seota * raise a siron. Due to lock protection of 4895107Seota * the intr_queue and intr_state, we know that 4905107Seota * timer_softintr() will see the new addition to 4915107Seota * the intr_queue. 4925107Seota */ 4935107Seota mutex_exit(&disp_req_lock); 4945107Seota } else { 4955107Seota intr_state |= TM_INTR_SET(level); 4965107Seota mutex_exit(&disp_req_lock); 4975107Seota 4985107Seota /* Raise an interrupt to execute timeout requests */ 4995107Seota sir_on(level); 5005107Seota } 5015107Seota } 5025107Seota } 5035107Seota 5045107Seota /* 5055107Seota * Check the software interrupt queue and invoke requests at the specified 5065107Seota * interrupt level. 5075107Seota * Note that the queue may change during call so that the disp_req_lock 5085107Seota * and the intr_state are used to protect it. 5095107Seota * The software interrupts supported here are up to the level 10. Higher 5105107Seota * than 10 interrupts cannot be supported. 5115107Seota */ 5125107Seota void 5135107Seota timer_softintr(int level) 5145107Seota { 5155107Seota tm_req_t *intr_req; 5165107Seota ASSERT(level >= TM_IPL_1 && level <= TM_IPL_10); 5175107Seota 5185107Seota /* Check if we are asked to process the softcall list */ 5195107Seota mutex_enter(&disp_req_lock); 5205107Seota if (!(intr_state & TM_INTR_SET(level))) { 5215107Seota mutex_exit(&disp_req_lock); 5225107Seota return; 5235107Seota } 5245107Seota 5255107Seota /* Notify this software interrupt request will be executed soon */ 5265107Seota intr_state |= TM_INTR_START(level); 5275107Seota intr_state &= ~TM_INTR_SET(level); 5285107Seota 5295107Seota /* loop the link until there is no requests */ 5305107Seota for (intr_req = list_head(&intr_queue); intr_req != NULL; 5315107Seota /* Nothing */) { 5325107Seota 5335107Seota /* Check the interrupt level */ 5345107Seota if (intr_req->level != level) { 5355107Seota intr_req = list_next(&intr_queue, intr_req); 5365107Seota continue; 5375107Seota } 5385107Seota list_remove(&intr_queue, intr_req); 5395107Seota mutex_exit(&disp_req_lock); 5405107Seota 5415107Seota /* Execute the software interrupt request */ 5425107Seota timeout_execute(intr_req); 5435107Seota 5445107Seota mutex_enter(&disp_req_lock); 5455107Seota /* Restart the loop since new requests might be added */ 5465107Seota intr_req = list_head(&intr_queue); 5475107Seota } 5485107Seota 5495107Seota /* reset the interrupt state */ 5505107Seota intr_state &= ~TM_INTR_START(level); 5515107Seota mutex_exit(&disp_req_lock); 5525107Seota } 5535107Seota 5545107Seota /* 5555107Seota * void 5565107Seota * cyclic_timer(void) 5575107Seota * 5585107Seota * Overview 5595107Seota * cyclic_timer() is a function invoked periodically by the cyclic 5605107Seota * subsystem. 5615107Seota * 5625107Seota * The function calls timeout_invoke() with timeout requests whose 5635107Seota * expiration time is already reached. 5645107Seota * 5655107Seota * Arguments 5665107Seota * Nothing 5675107Seota * 5685107Seota * Return value 5695107Seota * Nothing 5705107Seota */ 5715107Seota void 5725107Seota cyclic_timer(void) 5735107Seota { 5745107Seota tm_req_t *req; 5755107Seota timer_tw_t *tw; 5765107Seota hrtime_t curr_tick, curr; 5775107Seota 5785107Seota /* If the system is suspended, just return */ 5795107Seota if (timer_suspended) 5805107Seota return; 5815107Seota 5825107Seota /* Get the current time */ 5835107Seota timer_hrtime = ddi_timer->tick_time = curr = gethrtime(); 5845107Seota curr_tick = tw_tick(ddi_timer->tick_time); 5855107Seota 5865107Seota restart: 5875107Seota /* 5885107Seota * Check the timer cogs to see if there are timeout requests 5895107Seota * who reach the expiration time. Call timeout_invoke() to execute 5905107Seota * the requests, then. 5915107Seota */ 5925107Seota while (curr_tick >= ddi_timer->tick) { 5935107Seota tm_req_t *next; 5945107Seota tw = &ddi_timer->exhash[TM_HASH(ddi_timer->tick)]; 5955107Seota mutex_enter(&tw->lock); 5965107Seota for (req = list_head(&tw->req); req != NULL; req = next) { 5975107Seota next = list_next(&tw->req, req); 5985107Seota /* 5995107Seota * If this request is already obsolete, free 6005107Seota * it here. 6015107Seota */ 6025107Seota if (req->flags & TM_UTMCOMP) { 6035107Seota /* 6045107Seota * Remove this request from the timer, 6055107Seota * then free it. 6065107Seota */ 6075107Seota list_remove(&tw->req, req); 6085107Seota kmem_cache_free(req_cache, req); 6095107Seota } else if (curr >= req->exp_time) { 6105107Seota mutex_enter(&req->lock); 6115107Seota /* 6125107Seota * Check if this request is canceled, but not 6135107Seota * being executed now. 6145107Seota */ 6155107Seota if (req->flags & TM_CANCEL && 6165107Seota !(req->flags & TM_INVOKING)) { 6175107Seota mutex_exit(&req->lock); 6185107Seota continue; 6195107Seota } 6205107Seota /* 6215107Seota * Record how many times timeout_execute() 6225107Seota * must be invoked. 6235107Seota */ 6245107Seota req->cnt++; 6255107Seota /* 6265107Seota * Invoke timeout_execute() via taskq or 6275107Seota * software interrupt. 6285107Seota */ 6295107Seota if (req->flags & TM_INVOKING) { 6305107Seota /* 6315107Seota * If it's already invoked, 6325107Seota * There is nothing to do. 6335107Seota */ 6345107Seota mutex_exit(&req->lock); 6355107Seota } else { 6365107Seota req->flags |= TM_INVOKING; 6375107Seota mutex_exit(&req->lock); 6385107Seota /* 6395107Seota * Dispatch this timeout request. 6405107Seota * timeout_dispatch() chooses either 6415107Seota * a software interrupt or taskq thread 6425107Seota * based on the level. 6435107Seota */ 6445107Seota timeout_dispatch(req); 6455107Seota } 6465107Seota /* 6475107Seota * Periodic timeout requests must prepare for 6485107Seota * the next fire. 6495107Seota */ 6505107Seota transfer_req(req, tw); 6515107Seota } 6525107Seota } 6535107Seota mutex_exit(&tw->lock); 6545107Seota ddi_timer->tick++; 6555107Seota } 6565107Seota 6575107Seota /* 6585107Seota * Check the current time. If we spend some amount of time, 6595107Seota * double-check if some of the requests reaches the expiration 6605107Seota * time during the work. 6615107Seota */ 6625107Seota curr = gethrtime(); 6635107Seota curr_tick = tw_tick(curr); 6645107Seota if (curr_tick >= ddi_timer->tick) { 6655107Seota ddi_timer->tick -= 1; 6665107Seota goto restart; 6675107Seota } 6685107Seota /* Adjustment for the next rolling */ 6695107Seota ddi_timer->tick -= 1; 6705107Seota } 6715107Seota 6725107Seota /* 6735107Seota * void 6745107Seota * timer_init(void) 6755107Seota * 6765107Seota * Overview 6775107Seota * timer_init() allocates the internal data structures used by 6785107Seota * i_timeout(), i_untimeout() and the timer. 6795107Seota * 6805107Seota * Arguments 6815107Seota * Nothing 6825107Seota * 6835107Seota * Return value 6845107Seota * Nothing 6855107Seota * 6865107Seota * Caller's context 6875107Seota * timer_init() can be called in kernel context only. 6885107Seota */ 6895107Seota void 6905107Seota timer_init(void) 6915107Seota { 6925107Seota int i; 6935107Seota 6945107Seota /* Create kmem_cache for timeout requests */ 6955107Seota req_cache = kmem_cache_create("timeout_request", sizeof (tm_req_t), 6965107Seota 0, NULL, NULL, NULL, NULL, NULL, 0); 6975107Seota 6985107Seota /* Initialize the timer which is invoked by the cyclic subsystem */ 6995107Seota ddi_timer = kmem_alloc(sizeof (cyc_timer_t), KM_SLEEP); 7005107Seota ddi_timer->res = nsec_per_tick; 7015107Seota ddi_timer->tick = tw_tick(gethrtime()); 7025107Seota ddi_timer->tick_time = 0; 7035107Seota 7045107Seota /* Initialize the timing wheel */ 7055107Seota bzero((char *)&ddi_timer->idhash[0], TM_HASH_SZ * sizeof (timer_tw_t)); 7065107Seota bzero((char *)&ddi_timer->exhash[0], TM_HASH_SZ * sizeof (timer_tw_t)); 7075107Seota 7085107Seota for (i = 0; i < TM_HASH_SZ; i++) { 7095107Seota list_create(&ddi_timer->idhash[i].req, sizeof (tm_req_t), 7105107Seota offsetof(tm_req_t, id_req)); 7115107Seota mutex_init(&ddi_timer->idhash[i].lock, NULL, MUTEX_ADAPTIVE, 7125107Seota NULL); 7135107Seota 7145107Seota list_create(&ddi_timer->exhash[i].req, sizeof (tm_req_t), 7155107Seota offsetof(tm_req_t, ex_req)); 7165107Seota mutex_init(&ddi_timer->exhash[i].lock, NULL, MUTEX_ADAPTIVE, 7175107Seota NULL); 7185107Seota } 7195107Seota 7205107Seota /* Create a taskq thread pool */ 7215107Seota tm_taskq = taskq_create_instance("timeout_taskq", 0, 7225107Seota timer_taskq_num, MAXCLSYSPRI, 7235265Seota timer_taskq_min_num, timer_taskq_max_num, 7245107Seota TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 7255107Seota 7265107Seota /* 7275107Seota * Initialize the taskq queue which is dedicated to this timeout 7285107Seota * interface/timer. 7295107Seota */ 7305107Seota list_create(&kern_queue, sizeof (tm_req_t), 7315107Seota offsetof(tm_req_t, disp_req)); 7325107Seota 7335107Seota /* Create a worker thread to dispatch the taskq thread */ 7345107Seota tm_work_thread = thread_create(NULL, 0, timeout_taskq_thread, NULL, 7355107Seota 0, &p0, TS_RUN, MAXCLSYSPRI); 7365107Seota 7375107Seota /* 7385107Seota * Initialize the software interrupt queue which is dedicated to 7395107Seota * this timeout interface/timer. 7405107Seota */ 7415107Seota list_create(&intr_queue, sizeof (tm_req_t), 7425107Seota offsetof(tm_req_t, disp_req)); 7435107Seota 7445107Seota /* 7455107Seota * Initialize the mutex lock used for both of kern_queue and 7465107Seota * intr_queue. 7475107Seota */ 7485107Seota mutex_init(&disp_req_lock, NULL, MUTEX_ADAPTIVE, NULL); 7495107Seota cv_init(&kern_cv, NULL, CV_DEFAULT, NULL); 7505107Seota 7515107Seota /* Register the callback handler for the system suspend/resume */ 7525107Seota (void) callb_add(timer_cpr_callb, 0, CB_CL_CPR_CALLOUT, "cyclicTimer"); 7535107Seota } 7545107Seota 7555107Seota /* 7565107Seota * timeout_t 7575107Seota * i_timeout(void (*func)(void *), void *arg, hrtime_t interval, 7585107Seota * int level, int flags) 7595107Seota * 7605107Seota * Overview 7615107Seota * i_timeout() is an internal function scheduling the passed function 7625107Seota * to be invoked in the interval in nanoseconds. The callback function 7635107Seota * keeps invoked until the request is explicitly canceled by i_untimeout(). 7645107Seota * This function is used for ddi_periodic_add(9F). 7655107Seota * 7665107Seota * Arguments 7675107Seota * 7685107Seota * func: the callback function 7695107Seota * the callback function will be invoked in kernel context if 7705107Seota * the level passed is the zero. Otherwise be invoked in interrupt 7715107Seota * context at the specified level by the argument "level". 7725107Seota * 7735107Seota * Note that It's guaranteed by the cyclic subsystem that the 7745107Seota * function is invoked on the only one CPU and is never executed 7755107Seota * simultaneously even on MP system. 7765107Seota * 7775107Seota * arg: the argument passed to the callback function 7785107Seota * 7795107Seota * interval: interval time in nanoseconds 7805107Seota * if the interval is the zero, the timer resolution is used. 7815107Seota * 7825107Seota * level : callback interrupt level 7835107Seota * If the value is 0 (the zero), the callback function is invoked 7845107Seota * in kernel context. If the value is more than 0 (the zero), but 7855107Seota * less than or equal to 10, the callback function is invoked in 7865107Seota * interrupt context at the specified interrupt level. 7875107Seota * This value must be in range of 0-10. 7885107Seota * 7895107Seota * Return value 7905107Seota * returns a non-zero opaque value (timeout_t) on success. 7915107Seota * 7925107Seota * Caller's context 7935107Seota * i_timeout() can be called in user, kernel or interrupt context. 7945107Seota * It cannot be called in high interrupt context. 7955107Seota * 7965107Seota * Note. This function is used by ddi_periodic_add(), which cannot 7975107Seota * be called in interrupt context. As a result, this function is called 7985107Seota * in user or kernel context only in practice. 7995107Seota * 8005107Seota */ 8015107Seota timeout_t 8025107Seota i_timeout(void (*func)(void *), void *arg, hrtime_t interval, int level) 8035107Seota { 8045107Seota hrtime_t start_time = gethrtime(), res; 8055107Seota tm_req_t *req = NULL; 8065107Seota 8075107Seota /* Allocate and initialize the timeout request */ 8085107Seota req = kmem_cache_alloc(req_cache, KM_SLEEP); 8095107Seota req->handler = func; 8105107Seota req->arg = arg; 8115107Seota req->h_thread = NULL; 8125107Seota req->level = level; 8135107Seota req->flags = 0; 8145107Seota req->cnt = 0; 8155107Seota mutex_init(&req->lock, NULL, MUTEX_ADAPTIVE, NULL); 8165107Seota cv_init(&req->cv, NULL, CV_DEFAULT, NULL); 8175107Seota 8185107Seota /* 8195107Seota * The resolution must be finer than or equal to 8205107Seota * the requested interval. If it's not, set the resolution 8215107Seota * to the interval. 8225107Seota * Note. There is a restriction currently. Regardless of the 8235107Seota * clock resolution used here, 10ms is set as the timer resolution. 8245107Seota * Even on the 1ms resolution timer, the minimum interval is 10ms. 8255107Seota */ 8265107Seota if ((res = i_get_res()) > interval) { 8275107Seota uintptr_t pc = (uintptr_t)req->handler; 8285107Seota ulong_t off; 8295107Seota cmn_err(CE_WARN, 8305107Seota "The periodic timeout (handler=%s, interval=%lld) " 8315107Seota "requests a finer interval than the supported resolution. " 8325107Seota "It rounds up to %lld\n", kobj_getsymname(pc, &off), 8335107Seota interval, res); 8345107Seota interval = res; 8355107Seota } 8365107Seota 8375107Seota /* 8385107Seota * If the specified interval is already multiples of 8395107Seota * the resolution, use it as is. Otherwise, it rounds 8405107Seota * up to multiples of the timer resolution. 8415107Seota */ 8425107Seota req->interval = roundup(interval, i_get_res()); 8435107Seota 8445107Seota /* 8455107Seota * For the periodic timeout requests, the first expiration time will 8465107Seota * be adjusted to the timer tick edge to take advantage of the cyclic 8475107Seota * subsystem. In that case, the first fire is likely not an expected 8485107Seota * one, but the fires later can be more accurate due to this. 8495107Seota */ 8505107Seota req->exp_time = roundup(start_time + req->interval, i_get_res()); 8515107Seota 8525107Seota /* Add the request to the timer */ 8535107Seota return (add_req(req)); 8545107Seota } 8555107Seota 8565107Seota /* 8575107Seota * void 8585107Seota * i_untimeout(timeout_t req) 8595107Seota * 8605107Seota * Overview 8615107Seota * i_untimeout() is an internal function canceling the i_timeout() 8625107Seota * request previously issued. 8635107Seota * This function is used for ddi_periodic_delete(9F). 8645107Seota * 8655107Seota * Argument 8665107Seota * req: timeout_t opaque value i_timeout() returned previously. 8675107Seota * 8685107Seota * Return value 8695107Seota * Nothing. 8705107Seota * 8715107Seota * Caller's context 8725107Seota * i_untimeout() can be called in user, kernel or interrupt context. 8735107Seota * It cannot be called in high interrupt context. 8745107Seota * 8755107Seota * Note. This function is used by ddi_periodic_delete(), which cannot 8765107Seota * be called in interrupt context. As a result, this function is called 8775107Seota * in user or kernel context only in practice. Also i_untimeout() sends 8785107Seota * the cv_signal to timeout_execute(), which runs in interrupt context. 8795107Seota * Make sure this function will not be blocked, otherwise the deadlock 8805107Seota * situation can occur. See timeout_execute(). 8815107Seota */ 8825107Seota void 8835107Seota i_untimeout(timeout_t timeout_req) 8845107Seota { 8855107Seota timer_tw_t *tid; 8865107Seota tm_req_t *req; 8875107Seota timeout_t id; 8885107Seota 8895107Seota /* Retrieve the id for this timeout request */ 8905107Seota id = (timeout_t)timeout_req; 8915107Seota tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)]; 8925107Seota 8935107Seota mutex_enter(&tid->lock); 8945107Seota for (req = list_head(&tid->req); req != NULL; 8955107Seota req = list_next(&tid->req, req)) { 8965107Seota if (req->id == id) 8975107Seota break; 8985107Seota } 8995107Seota if (req == NULL) { 9005107Seota /* There is no requests with this id after all */ 9015107Seota mutex_exit(&tid->lock); 9025107Seota return; 9035107Seota } 9045107Seota mutex_enter(&req->lock); 9055107Seota 9065107Seota /* Unregister this request first */ 9075107Seota list_remove(&tid->req, req); 9085107Seota 9095107Seota /* Notify that this request is canceled */ 9105107Seota req->flags |= TM_CANCEL; 9115107Seota 9125107Seota /* Check if the handler is invoked */ 9135107Seota if (req->flags & TM_INVOKING) { 9145107Seota /* 9155107Seota * If this request is not yet executed or is already finished 9165107Seota * then there is nothing to do but just return. Otherwise 9175107Seota * we'll have to wait for the callback execution being complete. 9185107Seota */ 9195107Seota if (!(req->flags & TM_EXECUTING) || req->flags & TM_COMPLETE) { 9205107Seota /* There is nothing to do any more */ 9215107Seota mutex_exit(&req->lock); 9225107Seota mutex_exit(&tid->lock); 9235107Seota return; 9245107Seota } 9255107Seota 9265107Seota /* 9275107Seota * If this is the recursive call, there is nothing 9285107Seota * to do any more. This is the case that i_untimeout() 9295107Seota * is called in the handler. 9305107Seota */ 9315107Seota if (req->h_thread == curthread) { 9325107Seota mutex_exit(&req->lock); 9335107Seota mutex_exit(&tid->lock); 9345107Seota return; 9355107Seota } 9365107Seota 9375107Seota /* 9385107Seota * Notify that i_untimeout() is waiting until this request 9395107Seota * is complete. 9405107Seota */ 9415107Seota req->flags |= TM_COMPWAIT; 9425107Seota mutex_exit(&tid->lock); 9435107Seota 9445107Seota /* 9455107Seota * Wait for this timeout request being complete before 9465107Seota * the return. 9475107Seota */ 9485107Seota while (!(req->flags & TM_COMPLETE)) 9495107Seota cv_wait(&req->cv, &req->lock); 9505107Seota req->flags &= ~TM_COMPWAIT; 9515107Seota cv_signal(&req->cv); 9525107Seota mutex_exit(&req->lock); 9535107Seota return; 9545107Seota } 9555107Seota mutex_exit(&req->lock); 9565107Seota mutex_exit(&tid->lock); 9575107Seota 9585107Seota /* 9595107Seota * Notify untimeout() is about to be finished, and this request 9605107Seota * can be freed. 9615107Seota */ 9625107Seota atomic_or_uint(&req->flags, TM_UTMCOMP); 9635107Seota } 964