15107Seota /* 25107Seota * CDDL HEADER START 35107Seota * 45107Seota * The contents of this file are subject to the terms of the 55107Seota * Common Development and Distribution License (the "License"). 65107Seota * You may not use this file except in compliance with the License. 75107Seota * 85107Seota * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 95107Seota * or http://www.opensolaris.org/os/licensing. 105107Seota * See the License for the specific language governing permissions 115107Seota * and limitations under the License. 125107Seota * 135107Seota * When distributing Covered Code, include this CDDL HEADER in each 145107Seota * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 155107Seota * If applicable, add the following below this CDDL HEADER, with the 165107Seota * fields enclosed by brackets "[]" replaced with your own identifying 175107Seota * information: Portions Copyright [yyyy] [name of copyright owner] 185107Seota * 195107Seota * CDDL HEADER END 205107Seota */ 215107Seota 225107Seota /* 235107Seota * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 245107Seota * Use is subject to license terms. 255107Seota */ 265107Seota 275107Seota #pragma ident "%Z%%M% %I% %E% SMI" 285107Seota 295107Seota #include <sys/atomic.h> 305107Seota #include <sys/callb.h> 315107Seota #include <sys/conf.h> 325107Seota #include <sys/cmn_err.h> 335107Seota #include <sys/taskq.h> 345107Seota #include <sys/dditypes.h> 355107Seota #include <sys/ddi_timer.h> 365107Seota #include <sys/disp.h> 375107Seota #include <sys/kobj.h> 385107Seota #include <sys/note.h> 395107Seota #include <sys/param.h> 405107Seota #include <sys/sysmacros.h> 415107Seota #include <sys/systm.h> 425107Seota #include <sys/time.h> 435107Seota #include <sys/types.h> 445107Seota 455107Seota /* 465107Seota * global variables for timeout request 475107Seota */ 485107Seota static kmem_cache_t *req_cache; /* kmem cache for timeout request */ 495107Seota 505107Seota /* 515107Seota * taskq for timer 525107Seota */ 53*5265Seota int timer_taskq_num = 1; /* initial thread number */ 54*5265Seota int timer_taskq_min_num = 4; /* minimum taskq thread pool */ 55*5265Seota int timer_taskq_max_num = 16; /* maximum taskq thread pool */ 565107Seota static taskq_t *tm_taskq; /* taskq thread pool */ 575107Seota static kthread_t *tm_work_thread; /* work thread invoking taskq */ 585107Seota 595107Seota /* 605107Seota * timer variables 615107Seota */ 625107Seota static cyc_timer_t *ddi_timer; /* ddi timer based on the cyclic */ 635107Seota static volatile hrtime_t timer_hrtime; /* current tick time on the timer */ 645107Seota 655107Seota /* 665107Seota * Variable used for the suspend/resume. 675107Seota */ 685107Seota static volatile boolean_t timer_suspended; 695107Seota 705107Seota /* 715107Seota * Kernel taskq queue to ddi timer 725107Seota */ 735107Seota static list_t kern_queue; /* kernel thread request queue */ 745107Seota static kcondvar_t kern_cv; /* condition variable for taskq queue */ 755107Seota 765107Seota /* 775107Seota * Software interrupt queue dedicated to ddi timer 785107Seota */ 795107Seota static list_t intr_queue; /* software interrupt request queue */ 805107Seota static uint_t intr_state; /* software interrupt state */ 815107Seota 825107Seota /* 835107Seota * This lock is used to protect the intr_queue and kern_queue. 845107Seota * It's also used to protect the intr_state which represents the software 855107Seota * interrupt state for the timer. 865107Seota */ 875107Seota static kmutex_t disp_req_lock; 885107Seota 895107Seota /* 905107Seota * the periodic timer interrupt priority level 915107Seota */ 925107Seota enum { 935107Seota TM_IPL_0 = 0, /* kernel context */ 945107Seota TM_IPL_1, TM_IPL_2, TM_IPL_3, /* level 1-3 */ 955107Seota TM_IPL_4, TM_IPL_5, TM_IPL_6, /* level 4-6 */ 965107Seota TM_IPL_7, TM_IPL_8, TM_IPL_9, /* level 7-9 */ 975107Seota TM_IPL_10 /* level 10 */ 985107Seota }; 995107Seota 1005107Seota /* 1015107Seota * A callback handler used by CPR to stop and resume callouts. 1025107Seota * Since the taskq uses TASKQ_CPR_SAFE, the function just set the boolean 1035107Seota * flag to timer_suspended here. 1045107Seota */ 1055107Seota /*ARGSUSED*/ 1065107Seota static boolean_t 1075107Seota timer_cpr_callb(void *arg, int code) 1085107Seota { 1095107Seota timer_suspended = (code == CB_CODE_CPR_CHKPT); 1105107Seota return (B_TRUE); 1115107Seota } 1125107Seota 1135107Seota /* 1145107Seota * Return a proposed timeout request id. add_req() determines whether 1155107Seota * or not the proposed one is used. If it's not suitable, add_req() 1165107Seota * recalls get_req_cnt(). To reduce the lock contention between the 1175107Seota * timer and i_untimeout(), the atomic instruction should be used here. 1185107Seota */ 1195107Seota static timeout_t 1205107Seota get_req_cnt(void) 1215107Seota { 1225107Seota static volatile ulong_t timeout_cnt = 0; 1235107Seota return ((timeout_t)atomic_inc_ulong_nv(&timeout_cnt)); 1245107Seota } 1255107Seota 1265107Seota /* 1275107Seota * Get the system resolution. 1285107Seota * Note. currently there is a restriction about the system resolution, and 1295107Seota * the 10ms tick (the default clock resolution) is only supported now. 1305107Seota */ 1315107Seota static hrtime_t 1325107Seota i_get_res(void) 1335107Seota { 1345107Seota return ((hrtime_t)10000000); /* 10ms tick only */ 1355107Seota } 1365107Seota 1375107Seota /* 1385107Seota * Return the value for the cog of the timing wheel. 1395107Seota * TICK_FACTOR is used to gain a finer cog on the clock resolution. 1405107Seota */ 1415107Seota static hrtime_t 1425107Seota tw_tick(hrtime_t time) 1435107Seota { 1445107Seota return ((time << TICK_FACTOR) / ddi_timer->res); 1455107Seota } 1465107Seota 1475107Seota /* 1485107Seota * Calculate the expiration time for the timeout request. 1495107Seota */ 1505107Seota static hrtime_t 1515107Seota expire_tick(tm_req_t *req) 1525107Seota { 1535107Seota return (tw_tick(req->exp_time)); 1545107Seota } 1555107Seota 1565107Seota /* 1575107Seota * Register a timeout request to the timer. This function is used 1585107Seota * in i_timeout(). 1595107Seota */ 1605107Seota static timeout_t 1615107Seota add_req(tm_req_t *req) 1625107Seota { 1635107Seota timer_tw_t *tid, *tw; 1645107Seota tm_req_t *next; 1655107Seota timeout_t id; 1665107Seota 1675107Seota retry: 1685107Seota /* 1695107Seota * Retrieve a timeout request id. Since i_timeout() needs to return 1705107Seota * a non-zero value, re-try if the zero is gotten. 1715107Seota */ 1725107Seota if ((id = get_req_cnt()) == 0) 1735107Seota id = get_req_cnt(); 1745107Seota 1755107Seota /* 1765107Seota * Check if the id is not used yet. Since the framework now deals 1775107Seota * with the periodic timeout requests, we cannot assume the id 1785107Seota * allocated (long) before doesn't exist any more when it will 1795107Seota * be re-assigned again (especially on 32bit) but need to handle 1805107Seota * this case to solve the conflicts. If it's used already, retry 1815107Seota * another. 1825107Seota */ 1835107Seota tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)]; 1845107Seota mutex_enter(&tid->lock); 1855107Seota for (next = list_head(&tid->req); next != NULL; 1865107Seota next = list_next(&tid->req, next)) { 1875107Seota if (next->id == id) { 1885107Seota mutex_exit(&tid->lock); 1895107Seota goto retry; 1905107Seota } 1915107Seota } 1925107Seota /* Nobody uses this id yet */ 1935107Seota req->id = id; 1945107Seota 1955107Seota /* 1965107Seota * Register this request to the timer. 1975107Seota * The list operation must be list_insert_head(). 1985107Seota * Other operations can degrade performance. 1995107Seota */ 2005107Seota list_insert_head(&tid->req, req); 2015107Seota mutex_exit(&tid->lock); 2025107Seota 2035107Seota tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))]; 2045107Seota mutex_enter(&tw->lock); 2055107Seota /* 2065107Seota * Other operations than list_insert_head() can 2075107Seota * degrade performance here. 2085107Seota */ 2095107Seota list_insert_head(&tw->req, req); 2105107Seota mutex_exit(&tw->lock); 2115107Seota 2125107Seota return (id); 2135107Seota } 2145107Seota 2155107Seota /* 2165107Seota * Periodic timeout requests cannot be removed until they are canceled 2175107Seota * explicitly. Until then, they need to be re-registerd after they are 2185107Seota * fired. transfer_req() re-registers the requests for the next fires. 2195107Seota * Note. transfer_req() sends the cv_signal to timeout_execute(), which 2205107Seota * runs in interrupt context. Make sure this function will not be blocked, 2215107Seota * otherwise the deadlock situation can occur. 2225107Seota */ 2235107Seota static void 2245107Seota transfer_req(tm_req_t *req, timer_tw_t *tw) 2255107Seota { 2265107Seota timer_tw_t *new_tw; 2275107Seota hrtime_t curr_time; 2285107Seota ASSERT(tw && MUTEX_HELD(&tw->lock)); 2295107Seota 2305107Seota /* Calculate the next expiration time by interval */ 2315107Seota req->exp_time += req->interval; 2325107Seota curr_time = gethrtime(); 2335107Seota 2345107Seota /* 2355107Seota * If a long time (more than 1 clock resolution) has already 2365107Seota * passed for some reason (e.g. debugger or high interrupt), 2375107Seota * round up the next expiration to the appropriate one 2385107Seota * since this request is periodic and never catches with it. 2395107Seota */ 2405107Seota if (curr_time - req->exp_time >= ddi_timer->res) { 2415107Seota req->exp_time = roundup(curr_time + req->interval, 2425107Seota ddi_timer->res); 2435107Seota } 2445107Seota 2455107Seota /* 2465107Seota * Re-register this request. 2475107Seota * Note. since it is guaranteed that the timer is invoked on only 2485107Seota * one CPU at any time (by the cyclic subsystem), a deadlock 2495107Seota * cannot occur regardless of the lock order here. 2505107Seota */ 2515107Seota new_tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))]; 2525107Seota 2535107Seota /* 2545107Seota * If it's on the timer cog already, there is nothing 2555107Seota * to do. Just return. 2565107Seota */ 2575107Seota if (new_tw == tw) 2585107Seota return; 2595107Seota 2605107Seota /* Remove this request from the timer */ 2615107Seota list_remove(&tw->req, req); 2625107Seota 2635107Seota /* Re-register this request to the timer */ 2645107Seota mutex_enter(&new_tw->lock); 2655107Seota 2665107Seota /* 2675107Seota * Other operations than list_insert_head() can 2685107Seota * degrade performance here. 2695107Seota */ 2705107Seota list_insert_head(&new_tw->req, req); 2715107Seota mutex_exit(&new_tw->lock); 2725107Seota 2735107Seota /* 2745107Seota * Set the TM_TRANSFER flag and notify the request is transfered 2755107Seota * completely. This prevents a race in the case that this request 2765107Seota * is serviced on another CPU already. 2775107Seota */ 2785107Seota mutex_enter(&req->lock); 2795107Seota req->flags |= TM_TRANSFER; 2805107Seota cv_signal(&req->cv); 2815107Seota mutex_exit(&req->lock); 2825107Seota } 2835107Seota 2845107Seota /* 2855107Seota * Execute timeout requests. 2865107Seota * Note. since timeout_execute() can run in interrupt context and block 2875107Seota * on condition variables, there are restrictions on the timer code that 2885107Seota * signals these condition variables (see i_untimeout(), transfer_req(), 2895107Seota * and condvar(9F)). Functions that signal these cvs must ensure that 2905107Seota * they will not be blocked (for memory allocations or any other reason) 2915107Seota * since condition variables don't support priority inheritance. 2925107Seota */ 2935107Seota static void 2945107Seota timeout_execute(void *arg) 2955107Seota { 2965107Seota tm_req_t *req = (tm_req_t *)arg; 2975107Seota ASSERT(req->flags & TM_INVOKING && !(req->flags & TM_EXECUTING)); 2985107Seota 2995107Seota for (;;) { 3005107Seota /* 3015107Seota * Check if this request is canceled. If it's canceled, do not 3025107Seota * execute this request. 3035107Seota */ 3045107Seota mutex_enter(&req->lock); 3055107Seota if (!(req->flags & TM_CANCEL)) { 3065107Seota /* 3075107Seota * Set the current thread to prevent a dead lock 3085107Seota * situation in case that this timeout request is 3095107Seota * canceled in the handler being invoked now. 3105107Seota * (this doesn't violate the spec) Set TM_EXECUTING 3115107Seota * to show this handler is invoked soon. 3125107Seota */ 3135107Seota req->h_thread = curthread; 3145107Seota req->flags |= TM_EXECUTING; 3155107Seota mutex_exit(&req->lock); 3165107Seota 3175107Seota /* The handler is invoked without holding any locks */ 3185107Seota (*req->handler)(req->arg); 3195107Seota 3205107Seota /* 3215107Seota * Set TM_COMPLETE and notify the request is complete 3225107Seota * now. 3235107Seota */ 3245107Seota mutex_enter(&req->lock); 3255107Seota req->flags |= TM_COMPLETE; 3265107Seota if (req->flags & TM_COMPWAIT) 3275107Seota cv_signal(&req->cv); 3285107Seota } 3295107Seota 3305107Seota /* 3315107Seota * The handler is invoked at this point. If this request 3325107Seota * is not canceled, prepare for the next fire. 3335107Seota */ 3345107Seota if (req->flags & TM_CANCEL) { 3355107Seota timer_tw_t *tw; 3365107Seota /* 3375107Seota * Wait until the timer finishes all things for 3385107Seota * this request. 3395107Seota */ 3405107Seota while (!(req->flags & TM_TRANSFER)) 3415107Seota cv_wait(&req->cv, &req->lock); 3425107Seota mutex_exit(&req->lock); 3435107Seota ASSERT(req->flags & TM_TRANSFER); 3445107Seota 3455107Seota /* Remove this request from the timer */ 3465107Seota tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))]; 3475107Seota mutex_enter(&tw->lock); 3485107Seota list_remove(&tw->req, req); 3495107Seota mutex_exit(&tw->lock); 3505107Seota 3515107Seota /* 3525107Seota * Wait until i_untimeout() can go ahead. 3535107Seota * This prevents the request from being freed before 3545107Seota * i_untimeout() is complete. 3555107Seota */ 3565107Seota mutex_enter(&req->lock); 3575107Seota while (req->flags & TM_COMPWAIT) 3585107Seota cv_wait(&req->cv, &req->lock); 3595107Seota mutex_exit(&req->lock); 3605107Seota ASSERT(!(req->flags & TM_COMPWAIT)); 3615107Seota 3625107Seota /* Free this request */ 3635107Seota kmem_cache_free(req_cache, req); 3645107Seota return; 3655107Seota } 3665107Seota ASSERT(req->flags & TM_EXECUTING); 3675107Seota 3685107Seota /* 3695107Seota * TM_EXECUTING must be set at this point. 3705107Seota * Unset the flag. 3715107Seota */ 3725107Seota req->flags &= ~(TM_EXECUTING | TM_TRANSFER); 3735107Seota 3745107Seota /* 3755107Seota * Decrease the request cnt. The reqest cnt shows 3765107Seota * how many times this request is executed now. 3775107Seota * If this counter becomes the zero, drop TM_INVOKING 3785107Seota * to show there is no requests to do now. 3795107Seota */ 3805107Seota req->cnt--; 3815107Seota if (req->cnt == 0) { 3825107Seota req->flags &= ~TM_INVOKING; 3835107Seota mutex_exit(&req->lock); 3845107Seota return; 3855107Seota } 3865107Seota mutex_exit(&req->lock); 3875107Seota } 3885107Seota } 3895107Seota 3905107Seota /* 3915107Seota * Timeout worker thread for processing task queue. 3925107Seota */ 3935107Seota static void 3945107Seota timeout_taskq_thread(void *arg) 3955107Seota { 3965107Seota _NOTE(ARGUNUSED(arg)); 3975107Seota tm_req_t *kern_req; 3985107Seota callb_cpr_t cprinfo; 3995107Seota 4005107Seota CALLB_CPR_INIT(&cprinfo, &disp_req_lock, callb_generic_cpr, 4015107Seota "timeout_taskq_thread"); 4025107Seota 4035107Seota /* 4045107Seota * This thread is wakened up when a new request is added to 4055107Seota * the queue. Then pick up all requests and dispatch them 4065107Seota * via taskq_dispatch(). 4075107Seota */ 4085107Seota for (;;) { 4095107Seota /* 4105107Seota * Check the queue and pick up a request if the queue 4115107Seota * is not NULL. 4125107Seota */ 4135107Seota mutex_enter(&disp_req_lock); 4145107Seota while ((kern_req = list_head(&kern_queue)) == NULL) { 4155107Seota CALLB_CPR_SAFE_BEGIN(&cprinfo); 4165107Seota cv_wait(&kern_cv, &disp_req_lock); 4175107Seota CALLB_CPR_SAFE_END(&cprinfo, &disp_req_lock); 4185107Seota } 4195107Seota list_remove(&kern_queue, kern_req); 4205107Seota mutex_exit(&disp_req_lock); 4215107Seota 4225107Seota /* Execute the timeout request via the taskq thread */ 4235107Seota (void) taskq_dispatch(tm_taskq, timeout_execute, 4245107Seota (void *)kern_req, TQ_SLEEP); 4255107Seota } 4265107Seota } 4275107Seota 4285107Seota /* 4295107Seota * Dispatch the timeout request based on the level specified. 4305107Seota * If the level is equal to zero, notify the worker thread to 4315107Seota * call taskq_dispatch() in kernel context. If the level is bigger 4325107Seota * than zero, add a software interrupt request to the queue and raise 4335107Seota * the interrupt level to the specified one. 4345107Seota */ 4355107Seota static void 4365107Seota timeout_dispatch(tm_req_t *req) 4375107Seota { 4385107Seota int level = req->level; 4395107Seota extern void sir_on(int); 4405107Seota 4415107Seota if (level == TM_IPL_0) { 4425107Seota /* Add a new request to the tail */ 4435107Seota mutex_enter(&disp_req_lock); 4445107Seota list_insert_tail(&kern_queue, req); 4455107Seota mutex_exit(&disp_req_lock); 4465107Seota 4475107Seota /* 4485107Seota * notify the worker thread that this request 4495107Seota * is newly added to the queue. 4505107Seota * Note. this cv_signal() can be called after the 4515107Seota * mutex_lock. 4525107Seota */ 4535107Seota cv_signal(&kern_cv); 4545107Seota } else { 4555107Seota /* Add a new request to the tail */ 4565107Seota mutex_enter(&disp_req_lock); 4575107Seota list_insert_tail(&intr_queue, req); 4585107Seota 4595107Seota /* Issue the software interrupt */ 4605107Seota if (intr_state & TM_INTR_START(level)) { 4615107Seota /* 4625107Seota * timer_softintr() is already running; no need to 4635107Seota * raise a siron. Due to lock protection of 4645107Seota * the intr_queue and intr_state, we know that 4655107Seota * timer_softintr() will see the new addition to 4665107Seota * the intr_queue. 4675107Seota */ 4685107Seota mutex_exit(&disp_req_lock); 4695107Seota } else { 4705107Seota intr_state |= TM_INTR_SET(level); 4715107Seota mutex_exit(&disp_req_lock); 4725107Seota 4735107Seota /* Raise an interrupt to execute timeout requests */ 4745107Seota sir_on(level); 4755107Seota } 4765107Seota } 4775107Seota } 4785107Seota 4795107Seota /* 4805107Seota * Check the software interrupt queue and invoke requests at the specified 4815107Seota * interrupt level. 4825107Seota * Note that the queue may change during call so that the disp_req_lock 4835107Seota * and the intr_state are used to protect it. 4845107Seota * The software interrupts supported here are up to the level 10. Higher 4855107Seota * than 10 interrupts cannot be supported. 4865107Seota */ 4875107Seota void 4885107Seota timer_softintr(int level) 4895107Seota { 4905107Seota tm_req_t *intr_req; 4915107Seota ASSERT(level >= TM_IPL_1 && level <= TM_IPL_10); 4925107Seota 4935107Seota /* Check if we are asked to process the softcall list */ 4945107Seota mutex_enter(&disp_req_lock); 4955107Seota if (!(intr_state & TM_INTR_SET(level))) { 4965107Seota mutex_exit(&disp_req_lock); 4975107Seota return; 4985107Seota } 4995107Seota 5005107Seota /* Notify this software interrupt request will be executed soon */ 5015107Seota intr_state |= TM_INTR_START(level); 5025107Seota intr_state &= ~TM_INTR_SET(level); 5035107Seota 5045107Seota /* loop the link until there is no requests */ 5055107Seota for (intr_req = list_head(&intr_queue); intr_req != NULL; 5065107Seota /* Nothing */) { 5075107Seota 5085107Seota /* Check the interrupt level */ 5095107Seota if (intr_req->level != level) { 5105107Seota intr_req = list_next(&intr_queue, intr_req); 5115107Seota continue; 5125107Seota } 5135107Seota list_remove(&intr_queue, intr_req); 5145107Seota mutex_exit(&disp_req_lock); 5155107Seota 5165107Seota /* Execute the software interrupt request */ 5175107Seota timeout_execute(intr_req); 5185107Seota 5195107Seota mutex_enter(&disp_req_lock); 5205107Seota /* Restart the loop since new requests might be added */ 5215107Seota intr_req = list_head(&intr_queue); 5225107Seota } 5235107Seota 5245107Seota /* reset the interrupt state */ 5255107Seota intr_state &= ~TM_INTR_START(level); 5265107Seota mutex_exit(&disp_req_lock); 5275107Seota } 5285107Seota 5295107Seota /* 5305107Seota * void 5315107Seota * cyclic_timer(void) 5325107Seota * 5335107Seota * Overview 5345107Seota * cyclic_timer() is a function invoked periodically by the cyclic 5355107Seota * subsystem. 5365107Seota * 5375107Seota * The function calls timeout_invoke() with timeout requests whose 5385107Seota * expiration time is already reached. 5395107Seota * 5405107Seota * Arguments 5415107Seota * Nothing 5425107Seota * 5435107Seota * Return value 5445107Seota * Nothing 5455107Seota */ 5465107Seota void 5475107Seota cyclic_timer(void) 5485107Seota { 5495107Seota tm_req_t *req; 5505107Seota timer_tw_t *tw; 5515107Seota hrtime_t curr_tick, curr; 5525107Seota 5535107Seota /* If the system is suspended, just return */ 5545107Seota if (timer_suspended) 5555107Seota return; 5565107Seota 5575107Seota /* Get the current time */ 5585107Seota timer_hrtime = ddi_timer->tick_time = curr = gethrtime(); 5595107Seota curr_tick = tw_tick(ddi_timer->tick_time); 5605107Seota 5615107Seota restart: 5625107Seota /* 5635107Seota * Check the timer cogs to see if there are timeout requests 5645107Seota * who reach the expiration time. Call timeout_invoke() to execute 5655107Seota * the requests, then. 5665107Seota */ 5675107Seota while (curr_tick >= ddi_timer->tick) { 5685107Seota tm_req_t *next; 5695107Seota tw = &ddi_timer->exhash[TM_HASH(ddi_timer->tick)]; 5705107Seota mutex_enter(&tw->lock); 5715107Seota for (req = list_head(&tw->req); req != NULL; req = next) { 5725107Seota next = list_next(&tw->req, req); 5735107Seota /* 5745107Seota * If this request is already obsolete, free 5755107Seota * it here. 5765107Seota */ 5775107Seota if (req->flags & TM_UTMCOMP) { 5785107Seota /* 5795107Seota * Remove this request from the timer, 5805107Seota * then free it. 5815107Seota */ 5825107Seota list_remove(&tw->req, req); 5835107Seota kmem_cache_free(req_cache, req); 5845107Seota } else if (curr >= req->exp_time) { 5855107Seota mutex_enter(&req->lock); 5865107Seota /* 5875107Seota * Check if this request is canceled, but not 5885107Seota * being executed now. 5895107Seota */ 5905107Seota if (req->flags & TM_CANCEL && 5915107Seota !(req->flags & TM_INVOKING)) { 5925107Seota mutex_exit(&req->lock); 5935107Seota continue; 5945107Seota } 5955107Seota /* 5965107Seota * Record how many times timeout_execute() 5975107Seota * must be invoked. 5985107Seota */ 5995107Seota req->cnt++; 6005107Seota /* 6015107Seota * Invoke timeout_execute() via taskq or 6025107Seota * software interrupt. 6035107Seota */ 6045107Seota if (req->flags & TM_INVOKING) { 6055107Seota /* 6065107Seota * If it's already invoked, 6075107Seota * There is nothing to do. 6085107Seota */ 6095107Seota mutex_exit(&req->lock); 6105107Seota } else { 6115107Seota req->flags |= TM_INVOKING; 6125107Seota mutex_exit(&req->lock); 6135107Seota /* 6145107Seota * Dispatch this timeout request. 6155107Seota * timeout_dispatch() chooses either 6165107Seota * a software interrupt or taskq thread 6175107Seota * based on the level. 6185107Seota */ 6195107Seota timeout_dispatch(req); 6205107Seota } 6215107Seota /* 6225107Seota * Periodic timeout requests must prepare for 6235107Seota * the next fire. 6245107Seota */ 6255107Seota transfer_req(req, tw); 6265107Seota } 6275107Seota } 6285107Seota mutex_exit(&tw->lock); 6295107Seota ddi_timer->tick++; 6305107Seota } 6315107Seota 6325107Seota /* 6335107Seota * Check the current time. If we spend some amount of time, 6345107Seota * double-check if some of the requests reaches the expiration 6355107Seota * time during the work. 6365107Seota */ 6375107Seota curr = gethrtime(); 6385107Seota curr_tick = tw_tick(curr); 6395107Seota if (curr_tick >= ddi_timer->tick) { 6405107Seota ddi_timer->tick -= 1; 6415107Seota goto restart; 6425107Seota } 6435107Seota /* Adjustment for the next rolling */ 6445107Seota ddi_timer->tick -= 1; 6455107Seota } 6465107Seota 6475107Seota /* 6485107Seota * void 6495107Seota * timer_init(void) 6505107Seota * 6515107Seota * Overview 6525107Seota * timer_init() allocates the internal data structures used by 6535107Seota * i_timeout(), i_untimeout() and the timer. 6545107Seota * 6555107Seota * Arguments 6565107Seota * Nothing 6575107Seota * 6585107Seota * Return value 6595107Seota * Nothing 6605107Seota * 6615107Seota * Caller's context 6625107Seota * timer_init() can be called in kernel context only. 6635107Seota */ 6645107Seota void 6655107Seota timer_init(void) 6665107Seota { 6675107Seota int i; 6685107Seota 6695107Seota /* Create kmem_cache for timeout requests */ 6705107Seota req_cache = kmem_cache_create("timeout_request", sizeof (tm_req_t), 6715107Seota 0, NULL, NULL, NULL, NULL, NULL, 0); 6725107Seota 6735107Seota /* Initialize the timer which is invoked by the cyclic subsystem */ 6745107Seota ddi_timer = kmem_alloc(sizeof (cyc_timer_t), KM_SLEEP); 6755107Seota ddi_timer->res = nsec_per_tick; 6765107Seota ddi_timer->tick = tw_tick(gethrtime()); 6775107Seota ddi_timer->tick_time = 0; 6785107Seota 6795107Seota /* Initialize the timing wheel */ 6805107Seota bzero((char *)&ddi_timer->idhash[0], TM_HASH_SZ * sizeof (timer_tw_t)); 6815107Seota bzero((char *)&ddi_timer->exhash[0], TM_HASH_SZ * sizeof (timer_tw_t)); 6825107Seota 6835107Seota for (i = 0; i < TM_HASH_SZ; i++) { 6845107Seota list_create(&ddi_timer->idhash[i].req, sizeof (tm_req_t), 6855107Seota offsetof(tm_req_t, id_req)); 6865107Seota mutex_init(&ddi_timer->idhash[i].lock, NULL, MUTEX_ADAPTIVE, 6875107Seota NULL); 6885107Seota 6895107Seota list_create(&ddi_timer->exhash[i].req, sizeof (tm_req_t), 6905107Seota offsetof(tm_req_t, ex_req)); 6915107Seota mutex_init(&ddi_timer->exhash[i].lock, NULL, MUTEX_ADAPTIVE, 6925107Seota NULL); 6935107Seota } 6945107Seota 6955107Seota /* Create a taskq thread pool */ 6965107Seota tm_taskq = taskq_create_instance("timeout_taskq", 0, 6975107Seota timer_taskq_num, MAXCLSYSPRI, 698*5265Seota timer_taskq_min_num, timer_taskq_max_num, 6995107Seota TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 7005107Seota 7015107Seota /* 7025107Seota * Initialize the taskq queue which is dedicated to this timeout 7035107Seota * interface/timer. 7045107Seota */ 7055107Seota list_create(&kern_queue, sizeof (tm_req_t), 7065107Seota offsetof(tm_req_t, disp_req)); 7075107Seota 7085107Seota /* Create a worker thread to dispatch the taskq thread */ 7095107Seota tm_work_thread = thread_create(NULL, 0, timeout_taskq_thread, NULL, 7105107Seota 0, &p0, TS_RUN, MAXCLSYSPRI); 7115107Seota 7125107Seota /* 7135107Seota * Initialize the software interrupt queue which is dedicated to 7145107Seota * this timeout interface/timer. 7155107Seota */ 7165107Seota list_create(&intr_queue, sizeof (tm_req_t), 7175107Seota offsetof(tm_req_t, disp_req)); 7185107Seota 7195107Seota /* 7205107Seota * Initialize the mutex lock used for both of kern_queue and 7215107Seota * intr_queue. 7225107Seota */ 7235107Seota mutex_init(&disp_req_lock, NULL, MUTEX_ADAPTIVE, NULL); 7245107Seota cv_init(&kern_cv, NULL, CV_DEFAULT, NULL); 7255107Seota 7265107Seota /* Register the callback handler for the system suspend/resume */ 7275107Seota (void) callb_add(timer_cpr_callb, 0, CB_CL_CPR_CALLOUT, "cyclicTimer"); 7285107Seota } 7295107Seota 7305107Seota /* 7315107Seota * timeout_t 7325107Seota * i_timeout(void (*func)(void *), void *arg, hrtime_t interval, 7335107Seota * int level, int flags) 7345107Seota * 7355107Seota * Overview 7365107Seota * i_timeout() is an internal function scheduling the passed function 7375107Seota * to be invoked in the interval in nanoseconds. The callback function 7385107Seota * keeps invoked until the request is explicitly canceled by i_untimeout(). 7395107Seota * This function is used for ddi_periodic_add(9F). 7405107Seota * 7415107Seota * Arguments 7425107Seota * 7435107Seota * func: the callback function 7445107Seota * the callback function will be invoked in kernel context if 7455107Seota * the level passed is the zero. Otherwise be invoked in interrupt 7465107Seota * context at the specified level by the argument "level". 7475107Seota * 7485107Seota * Note that It's guaranteed by the cyclic subsystem that the 7495107Seota * function is invoked on the only one CPU and is never executed 7505107Seota * simultaneously even on MP system. 7515107Seota * 7525107Seota * arg: the argument passed to the callback function 7535107Seota * 7545107Seota * interval: interval time in nanoseconds 7555107Seota * if the interval is the zero, the timer resolution is used. 7565107Seota * 7575107Seota * level : callback interrupt level 7585107Seota * If the value is 0 (the zero), the callback function is invoked 7595107Seota * in kernel context. If the value is more than 0 (the zero), but 7605107Seota * less than or equal to 10, the callback function is invoked in 7615107Seota * interrupt context at the specified interrupt level. 7625107Seota * This value must be in range of 0-10. 7635107Seota * 7645107Seota * Return value 7655107Seota * returns a non-zero opaque value (timeout_t) on success. 7665107Seota * 7675107Seota * Caller's context 7685107Seota * i_timeout() can be called in user, kernel or interrupt context. 7695107Seota * It cannot be called in high interrupt context. 7705107Seota * 7715107Seota * Note. This function is used by ddi_periodic_add(), which cannot 7725107Seota * be called in interrupt context. As a result, this function is called 7735107Seota * in user or kernel context only in practice. 7745107Seota * 7755107Seota */ 7765107Seota timeout_t 7775107Seota i_timeout(void (*func)(void *), void *arg, hrtime_t interval, int level) 7785107Seota { 7795107Seota hrtime_t start_time = gethrtime(), res; 7805107Seota tm_req_t *req = NULL; 7815107Seota 7825107Seota /* Allocate and initialize the timeout request */ 7835107Seota req = kmem_cache_alloc(req_cache, KM_SLEEP); 7845107Seota req->handler = func; 7855107Seota req->arg = arg; 7865107Seota req->h_thread = NULL; 7875107Seota req->level = level; 7885107Seota req->flags = 0; 7895107Seota req->cnt = 0; 7905107Seota mutex_init(&req->lock, NULL, MUTEX_ADAPTIVE, NULL); 7915107Seota cv_init(&req->cv, NULL, CV_DEFAULT, NULL); 7925107Seota 7935107Seota /* 7945107Seota * The resolution must be finer than or equal to 7955107Seota * the requested interval. If it's not, set the resolution 7965107Seota * to the interval. 7975107Seota * Note. There is a restriction currently. Regardless of the 7985107Seota * clock resolution used here, 10ms is set as the timer resolution. 7995107Seota * Even on the 1ms resolution timer, the minimum interval is 10ms. 8005107Seota */ 8015107Seota if ((res = i_get_res()) > interval) { 8025107Seota uintptr_t pc = (uintptr_t)req->handler; 8035107Seota ulong_t off; 8045107Seota cmn_err(CE_WARN, 8055107Seota "The periodic timeout (handler=%s, interval=%lld) " 8065107Seota "requests a finer interval than the supported resolution. " 8075107Seota "It rounds up to %lld\n", kobj_getsymname(pc, &off), 8085107Seota interval, res); 8095107Seota interval = res; 8105107Seota } 8115107Seota 8125107Seota /* 8135107Seota * If the specified interval is already multiples of 8145107Seota * the resolution, use it as is. Otherwise, it rounds 8155107Seota * up to multiples of the timer resolution. 8165107Seota */ 8175107Seota req->interval = roundup(interval, i_get_res()); 8185107Seota 8195107Seota /* 8205107Seota * For the periodic timeout requests, the first expiration time will 8215107Seota * be adjusted to the timer tick edge to take advantage of the cyclic 8225107Seota * subsystem. In that case, the first fire is likely not an expected 8235107Seota * one, but the fires later can be more accurate due to this. 8245107Seota */ 8255107Seota req->exp_time = roundup(start_time + req->interval, i_get_res()); 8265107Seota 8275107Seota /* Add the request to the timer */ 8285107Seota return (add_req(req)); 8295107Seota } 8305107Seota 8315107Seota /* 8325107Seota * void 8335107Seota * i_untimeout(timeout_t req) 8345107Seota * 8355107Seota * Overview 8365107Seota * i_untimeout() is an internal function canceling the i_timeout() 8375107Seota * request previously issued. 8385107Seota * This function is used for ddi_periodic_delete(9F). 8395107Seota * 8405107Seota * Argument 8415107Seota * req: timeout_t opaque value i_timeout() returned previously. 8425107Seota * 8435107Seota * Return value 8445107Seota * Nothing. 8455107Seota * 8465107Seota * Caller's context 8475107Seota * i_untimeout() can be called in user, kernel or interrupt context. 8485107Seota * It cannot be called in high interrupt context. 8495107Seota * 8505107Seota * Note. This function is used by ddi_periodic_delete(), which cannot 8515107Seota * be called in interrupt context. As a result, this function is called 8525107Seota * in user or kernel context only in practice. Also i_untimeout() sends 8535107Seota * the cv_signal to timeout_execute(), which runs in interrupt context. 8545107Seota * Make sure this function will not be blocked, otherwise the deadlock 8555107Seota * situation can occur. See timeout_execute(). 8565107Seota */ 8575107Seota void 8585107Seota i_untimeout(timeout_t timeout_req) 8595107Seota { 8605107Seota timer_tw_t *tid; 8615107Seota tm_req_t *req; 8625107Seota timeout_t id; 8635107Seota 8645107Seota /* Retrieve the id for this timeout request */ 8655107Seota id = (timeout_t)timeout_req; 8665107Seota tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)]; 8675107Seota 8685107Seota mutex_enter(&tid->lock); 8695107Seota for (req = list_head(&tid->req); req != NULL; 8705107Seota req = list_next(&tid->req, req)) { 8715107Seota if (req->id == id) 8725107Seota break; 8735107Seota } 8745107Seota if (req == NULL) { 8755107Seota /* There is no requests with this id after all */ 8765107Seota mutex_exit(&tid->lock); 8775107Seota return; 8785107Seota } 8795107Seota mutex_enter(&req->lock); 8805107Seota 8815107Seota /* Unregister this request first */ 8825107Seota list_remove(&tid->req, req); 8835107Seota 8845107Seota /* Notify that this request is canceled */ 8855107Seota req->flags |= TM_CANCEL; 8865107Seota 8875107Seota /* Check if the handler is invoked */ 8885107Seota if (req->flags & TM_INVOKING) { 8895107Seota /* 8905107Seota * If this request is not yet executed or is already finished 8915107Seota * then there is nothing to do but just return. Otherwise 8925107Seota * we'll have to wait for the callback execution being complete. 8935107Seota */ 8945107Seota if (!(req->flags & TM_EXECUTING) || req->flags & TM_COMPLETE) { 8955107Seota /* There is nothing to do any more */ 8965107Seota mutex_exit(&req->lock); 8975107Seota mutex_exit(&tid->lock); 8985107Seota return; 8995107Seota } 9005107Seota 9015107Seota /* 9025107Seota * If this is the recursive call, there is nothing 9035107Seota * to do any more. This is the case that i_untimeout() 9045107Seota * is called in the handler. 9055107Seota */ 9065107Seota if (req->h_thread == curthread) { 9075107Seota mutex_exit(&req->lock); 9085107Seota mutex_exit(&tid->lock); 9095107Seota return; 9105107Seota } 9115107Seota 9125107Seota /* 9135107Seota * Notify that i_untimeout() is waiting until this request 9145107Seota * is complete. 9155107Seota */ 9165107Seota req->flags |= TM_COMPWAIT; 9175107Seota mutex_exit(&tid->lock); 9185107Seota 9195107Seota /* 9205107Seota * Wait for this timeout request being complete before 9215107Seota * the return. 9225107Seota */ 9235107Seota while (!(req->flags & TM_COMPLETE)) 9245107Seota cv_wait(&req->cv, &req->lock); 9255107Seota req->flags &= ~TM_COMPWAIT; 9265107Seota cv_signal(&req->cv); 9275107Seota mutex_exit(&req->lock); 9285107Seota return; 9295107Seota } 9305107Seota mutex_exit(&req->lock); 9315107Seota mutex_exit(&tid->lock); 9325107Seota 9335107Seota /* 9345107Seota * Notify untimeout() is about to be finished, and this request 9355107Seota * can be freed. 9365107Seota */ 9375107Seota atomic_or_uint(&req->flags, TM_UTMCOMP); 9385107Seota } 939