xref: /onnv-gate/usr/src/uts/common/os/ddi_timer.c (revision 9314:4be129136bf2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/atomic.h>
28 #include <sys/callb.h>
29 #include <sys/conf.h>
30 #include <sys/cmn_err.h>
31 #include <sys/taskq.h>
32 #include <sys/dditypes.h>
33 #include <sys/ddi_timer.h>
34 #include <sys/disp.h>
35 #include <sys/kobj.h>
36 #include <sys/note.h>
37 #include <sys/param.h>
38 #include <sys/sysmacros.h>
39 #include <sys/systm.h>
40 #include <sys/time.h>
41 #include <sys/types.h>
42 
43 /*
44  * global variables for timeout request
45  */
46 static kmem_cache_t *req_cache;		/* kmem cache for timeout request */
47 
48 /*
49  * taskq parameters for cyclic_timer
50  *
51  * timer_taskq_num:
52  * timer_taskq_num represents the number of taskq threads.
53  * Currently 4 threads are pooled to handle periodic timeout requests.
54  * This number is chosen based on the fact that the callout (one-time
55  * timeout framework) uses 8 threads with TQ_NOSLEEP; the periodic timeout
56  * calls taskq_dispatch() with TQ_SLEEP instead, and in this case, 4 threads
57  * should be sufficient to handle periodic timeout requests. (see also
58  * timer_taskq_max_num below)
59  *
60  * timer_taskq_min_num:
61  * timer_taskq_min_num represents the number of pre-populated taskq_ent
62  * structures, and this variable holds the same value as timer_taskq_num does.
63  *
64  * timer_taskq_max_num:
65  * Since TQ_SLEEP is set when taskq_dispatch() is called, the framework waits
66  * for one second if more taskq_ent structures than timer_taskq_max_num are
67  * required. However, from the timeout point of view, one second is much longer
68  * than expected, and to prevent this occurrence, timer_taskq_max_num should
69  * hold a sufficiently-large value, which is 128 here. Note that since the size
70  * of taskq_ent_t is relatively small, this doesn't use up the resource so much.
71  * (Currently the size is less than 8k at most)
72  *
73  * About the detailed explanation of the taskq function arguments, please see
74  * usr/src/uts/common/os/taskq.c.
75  */
76 int timer_taskq_num = 4;		/* taskq thread number */
77 int timer_taskq_min_num = 4;		/* min. number of taskq_ent structs */
78 int timer_taskq_max_num = 128;		/* max. number of taskq_ent structs */
79 static taskq_t *tm_taskq;		/* taskq thread pool */
80 static kthread_t *tm_work_thread;	/* work thread invoking taskq */
81 
82 /*
83  * timer variables
84  */
85 static cyc_timer_t *ddi_timer;		/* ddi timer based on the cyclic */
86 static volatile hrtime_t timer_hrtime;	/* current tick time on the timer */
87 
88 /*
89  * Variable used for the suspend/resume.
90  */
91 static volatile boolean_t timer_suspended;
92 
93 /*
94  * Kernel taskq queue to ddi timer
95  */
96 static list_t kern_queue;	/* kernel thread request queue */
97 static kcondvar_t kern_cv;	/* condition variable for taskq queue */
98 
99 /*
100  * Software interrupt queue dedicated to ddi timer
101  */
102 static list_t intr_queue;	/* software interrupt request queue */
103 static uint_t intr_state;	/* software interrupt state */
104 
105 /*
106  * This lock is used to protect the intr_queue and kern_queue.
107  * It's also used to protect the intr_state which represents the software
108  * interrupt state for the timer.
109  */
110 static kmutex_t	disp_req_lock;
111 
112 /*
113  * the periodic timer interrupt priority level
114  */
115 enum {
116 	TM_IPL_0 = 0,			/* kernel context */
117 	TM_IPL_1, TM_IPL_2, TM_IPL_3,	/* level 1-3 */
118 	TM_IPL_4, TM_IPL_5, TM_IPL_6,	/* level 4-6 */
119 	TM_IPL_7, TM_IPL_8, TM_IPL_9,	/* level 7-9 */
120 	TM_IPL_10			/* level 10 */
121 };
122 
123 /*
124  * A callback handler used by CPR to stop and resume callouts.
125  * Since the taskq uses TASKQ_CPR_SAFE, the function just set the boolean
126  * flag to timer_suspended here.
127  */
128 /*ARGSUSED*/
129 static boolean_t
timer_cpr_callb(void * arg,int code)130 timer_cpr_callb(void *arg, int code)
131 {
132 	timer_suspended = (code == CB_CODE_CPR_CHKPT);
133 	return (B_TRUE);
134 }
135 
136 /*
137  * Return a proposed timeout request id. add_req() determines whether
138  * or not the proposed one is used. If it's not suitable, add_req()
139  * recalls get_req_cnt(). To reduce the lock contention between the
140  * timer and i_untimeout(), the atomic instruction should be used here.
141  */
142 static timeout_t
get_req_cnt(void)143 get_req_cnt(void)
144 {
145 	static volatile ulong_t timeout_cnt = 0;
146 	return ((timeout_t)atomic_inc_ulong_nv(&timeout_cnt));
147 }
148 
149 /*
150  * Get the system resolution.
151  * Note. currently there is a restriction about the system resolution, and
152  * the 10ms tick (the default clock resolution) is only supported now.
153  */
154 static hrtime_t
i_get_res(void)155 i_get_res(void)
156 {
157 	return ((hrtime_t)10000000); /* 10ms tick only */
158 }
159 
160 /*
161  * Return the value for the cog of the timing wheel.
162  * TICK_FACTOR is used to gain a finer cog on the clock resolution.
163  */
164 static hrtime_t
tw_tick(hrtime_t time)165 tw_tick(hrtime_t time)
166 {
167 	return ((time << TICK_FACTOR) / ddi_timer->res);
168 }
169 
170 /*
171  * Calculate the expiration time for the timeout request.
172  */
173 static hrtime_t
expire_tick(tm_req_t * req)174 expire_tick(tm_req_t *req)
175 {
176 	return (tw_tick(req->exp_time));
177 }
178 
179 /*
180  * Register a timeout request to the timer. This function is used
181  * in i_timeout().
182  */
183 static timeout_t
add_req(tm_req_t * req)184 add_req(tm_req_t *req)
185 {
186 	timer_tw_t *tid, *tw;
187 	tm_req_t *next;
188 	timeout_t id;
189 
190 retry:
191 	/*
192 	 * Retrieve a timeout request id. Since i_timeout() needs to return
193 	 * a non-zero value, re-try if the zero is gotten.
194 	 */
195 	if ((id = get_req_cnt()) == 0)
196 		id = get_req_cnt();
197 
198 	/*
199 	 * Check if the id is not used yet. Since the framework now deals
200 	 * with the periodic timeout requests, we cannot assume the id
201 	 * allocated (long) before doesn't exist any more when it will
202 	 * be re-assigned again (especially on 32bit) but need to handle
203 	 * this case to solve the conflicts. If it's used already, retry
204 	 * another.
205 	 */
206 	tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)];
207 	mutex_enter(&tid->lock);
208 	for (next = list_head(&tid->req); next != NULL;
209 	    next = list_next(&tid->req, next)) {
210 		if (next->id == id) {
211 			mutex_exit(&tid->lock);
212 			goto retry;
213 		}
214 	}
215 	/* Nobody uses this id yet */
216 	req->id = id;
217 
218 	/*
219 	 * Register this request to the timer.
220 	 * The list operation must be list_insert_head().
221 	 * Other operations can degrade performance.
222 	 */
223 	list_insert_head(&tid->req, req);
224 	mutex_exit(&tid->lock);
225 
226 	tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
227 	mutex_enter(&tw->lock);
228 	/*
229 	 * Other operations than list_insert_head() can
230 	 * degrade performance here.
231 	 */
232 	list_insert_head(&tw->req, req);
233 	mutex_exit(&tw->lock);
234 
235 	return (id);
236 }
237 
238 /*
239  * Periodic timeout requests cannot be removed until they are canceled
240  * explicitly. Until then, they need to be re-registerd after they are
241  * fired. transfer_req() re-registers the requests for the next fires.
242  * Note. transfer_req() sends the cv_signal to timeout_execute(), which
243  * runs in interrupt context. Make sure this function will not be blocked,
244  * otherwise the deadlock situation can occur.
245  */
246 static void
transfer_req(tm_req_t * req,timer_tw_t * tw)247 transfer_req(tm_req_t *req, timer_tw_t *tw)
248 {
249 	timer_tw_t *new_tw;
250 	hrtime_t curr_time;
251 	ASSERT(tw && MUTEX_HELD(&tw->lock));
252 
253 	/* Calculate the next expiration time by interval */
254 	req->exp_time += req->interval;
255 	curr_time = gethrtime();
256 
257 	/*
258 	 * If a long time (more than 1 clock resolution) has already
259 	 * passed for some reason (e.g. debugger or high interrupt),
260 	 * round up the next expiration to the appropriate one
261 	 * since this request is periodic and never catches with it.
262 	 */
263 	if (curr_time - req->exp_time >= ddi_timer->res) {
264 		req->exp_time = roundup(curr_time + req->interval,
265 		    ddi_timer->res);
266 	}
267 
268 	/*
269 	 * Re-register this request.
270 	 * Note. since it is guaranteed that the timer is invoked on only
271 	 * one CPU at any time (by the cyclic subsystem), a deadlock
272 	 * cannot occur regardless of the lock order here.
273 	 */
274 	new_tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
275 
276 	/*
277 	 * If it's on the timer cog already, there is nothing
278 	 * to do. Just return.
279 	 */
280 	if (new_tw == tw)
281 		return;
282 
283 	/* Remove this request from the timer */
284 	list_remove(&tw->req, req);
285 
286 	/* Re-register this request to the timer */
287 	mutex_enter(&new_tw->lock);
288 
289 	/*
290 	 * Other operations than list_insert_head() can
291 	 * degrade performance here.
292 	 */
293 	list_insert_head(&new_tw->req, req);
294 	mutex_exit(&new_tw->lock);
295 
296 	/*
297 	 * Set the TM_TRANSFER flag and notify the request is transfered
298 	 * completely. This prevents a race in the case that this request
299 	 * is serviced on another CPU already.
300 	 */
301 	mutex_enter(&req->lock);
302 	req->flags |= TM_TRANSFER;
303 	cv_signal(&req->cv);
304 	mutex_exit(&req->lock);
305 }
306 
307 /*
308  * Execute timeout requests.
309  * Note. since timeout_execute() can run in interrupt context and block
310  * on condition variables, there are restrictions on the timer code that
311  * signals these condition variables (see i_untimeout(), transfer_req(),
312  * and condvar(9F)). Functions that signal these cvs must ensure that
313  * they will not be blocked (for memory allocations or any other reason)
314  * since condition variables don't support priority inheritance.
315  */
316 static void
timeout_execute(void * arg)317 timeout_execute(void *arg)
318 {
319 	tm_req_t *req = (tm_req_t *)arg;
320 	ASSERT(req->flags & TM_INVOKING && !(req->flags & TM_EXECUTING));
321 
322 	for (;;) {
323 		/*
324 		 * Check if this request is canceled. If it's canceled, do not
325 		 * execute this request.
326 		 */
327 		mutex_enter(&req->lock);
328 		if (!(req->flags & TM_CANCEL)) {
329 			/*
330 			 * Set the current thread to prevent a dead lock
331 			 * situation in case that this timeout request is
332 			 * canceled in the handler being invoked now.
333 			 * (this doesn't violate the spec) Set TM_EXECUTING
334 			 * to show this handler is invoked soon.
335 			 */
336 			req->h_thread = curthread;
337 			req->flags |= TM_EXECUTING;
338 			mutex_exit(&req->lock);
339 
340 			/* The handler is invoked without holding any locks */
341 			(*req->handler)(req->arg);
342 
343 			mutex_enter(&req->lock);
344 		}
345 
346 		/*
347 		 * Check if this request is canceled or not. If not, prepare
348 		 * for the next fire.
349 		 */
350 		if (req->flags & TM_CANCEL) {
351 			timer_tw_t *tw;
352 			/*
353 			 * Wait until the timer finishes all things for
354 			 * this request.
355 			 */
356 			while (!(req->flags & TM_TRANSFER))
357 				cv_wait(&req->cv, &req->lock);
358 			mutex_exit(&req->lock);
359 			ASSERT(req->flags & TM_TRANSFER);
360 
361 			/* Remove this request from the timer */
362 			tw = &ddi_timer->exhash[TM_HASH(expire_tick(req))];
363 			mutex_enter(&tw->lock);
364 			list_remove(&tw->req, req);
365 			mutex_exit(&tw->lock);
366 
367 			/* Free this request */
368 			kmem_cache_free(req_cache, req);
369 			return;
370 		}
371 		ASSERT(req->flags & TM_EXECUTING);
372 
373 		/*
374 		 * TM_EXECUTING must be set at this point.
375 		 * Unset the flag.
376 		 */
377 		req->flags &= ~(TM_EXECUTING | TM_TRANSFER);
378 
379 		/*
380 		 * Decrease the request cnt. The reqest cnt shows
381 		 * how many times this request is executed now.
382 		 * If this counter becomes the zero, drop TM_INVOKING
383 		 * to show there is no requests to do now.
384 		 */
385 		req->cnt--;
386 		if (req->cnt == 0) {
387 			req->flags &= ~TM_INVOKING;
388 			mutex_exit(&req->lock);
389 			return;
390 		}
391 		mutex_exit(&req->lock);
392 	}
393 }
394 
395 /*
396  * Timeout worker thread for processing task queue.
397  */
398 static void
timeout_taskq_thread(void * arg)399 timeout_taskq_thread(void *arg)
400 {
401 	_NOTE(ARGUNUSED(arg));
402 	tm_req_t *kern_req;
403 	callb_cpr_t cprinfo;
404 
405 	CALLB_CPR_INIT(&cprinfo, &disp_req_lock, callb_generic_cpr,
406 	    "timeout_taskq_thread");
407 
408 	/*
409 	 * This thread is wakened up when a new request is added to
410 	 * the queue. Then pick up all requests and dispatch them
411 	 * via taskq_dispatch().
412 	 */
413 	for (;;) {
414 		/*
415 		 * Check the queue and pick up a request if the queue
416 		 * is not NULL.
417 		 */
418 		mutex_enter(&disp_req_lock);
419 		while ((kern_req = list_head(&kern_queue)) == NULL) {
420 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
421 			cv_wait(&kern_cv, &disp_req_lock);
422 			CALLB_CPR_SAFE_END(&cprinfo, &disp_req_lock);
423 		}
424 		list_remove(&kern_queue, kern_req);
425 		mutex_exit(&disp_req_lock);
426 
427 		/* Execute the timeout request via the taskq thread */
428 		(void) taskq_dispatch(tm_taskq, timeout_execute,
429 		    (void *)kern_req, TQ_SLEEP);
430 	}
431 }
432 
433 /*
434  * Dispatch the timeout request based on the level specified.
435  * If the level is equal to zero, notify the worker thread to
436  * call taskq_dispatch() in kernel context. If the level is bigger
437  * than zero, add a software interrupt request to the queue and raise
438  * the interrupt level to the specified one.
439  */
440 static void
timeout_dispatch(tm_req_t * req)441 timeout_dispatch(tm_req_t *req)
442 {
443 	int level = req->level;
444 	extern void sir_on(int);
445 
446 	if (level == TM_IPL_0) {
447 		/* Add a new request to the tail */
448 		mutex_enter(&disp_req_lock);
449 		list_insert_tail(&kern_queue, req);
450 		mutex_exit(&disp_req_lock);
451 
452 		/*
453 		 * notify the worker thread that this request
454 		 * is newly added to the queue.
455 		 * Note. this cv_signal() can be called after the
456 		 * mutex_lock.
457 		 */
458 		cv_signal(&kern_cv);
459 	} else {
460 		/* Add a new request to the tail */
461 		mutex_enter(&disp_req_lock);
462 		list_insert_tail(&intr_queue, req);
463 
464 		/* Issue the software interrupt */
465 		if (intr_state & TM_INTR_START(level)) {
466 			/*
467 			 * timer_softintr() is already running; no need to
468 			 * raise a siron. Due to lock protection of
469 			 * the intr_queue and intr_state, we know that
470 			 * timer_softintr() will see the new addition to
471 			 * the intr_queue.
472 			 */
473 			mutex_exit(&disp_req_lock);
474 		} else {
475 			intr_state |= TM_INTR_SET(level);
476 			mutex_exit(&disp_req_lock);
477 
478 			/* Raise an interrupt to execute timeout requests */
479 			sir_on(level);
480 		}
481 	}
482 }
483 
484 /*
485  * Check the software interrupt queue and invoke requests at the specified
486  * interrupt level.
487  * Note that the queue may change during call so that the disp_req_lock
488  * and the intr_state are used to protect it.
489  * The software interrupts supported here are up to the level 10. Higher
490  * than 10 interrupts cannot be supported.
491  */
492 void
timer_softintr(int level)493 timer_softintr(int level)
494 {
495 	tm_req_t *intr_req;
496 	ASSERT(level >= TM_IPL_1 && level <= TM_IPL_10);
497 
498 	/* Check if we are asked to process the softcall list */
499 	mutex_enter(&disp_req_lock);
500 	if (!(intr_state & TM_INTR_SET(level))) {
501 		mutex_exit(&disp_req_lock);
502 		return;
503 	}
504 
505 	/* Notify this software interrupt request will be executed soon */
506 	intr_state |= TM_INTR_START(level);
507 	intr_state &= ~TM_INTR_SET(level);
508 
509 	/* loop the link until there is no requests */
510 	for (intr_req = list_head(&intr_queue); intr_req != NULL;
511 	    /* Nothing */) {
512 
513 		/* Check the interrupt level */
514 		if (intr_req->level != level) {
515 			intr_req = list_next(&intr_queue, intr_req);
516 			continue;
517 		}
518 		list_remove(&intr_queue, intr_req);
519 		mutex_exit(&disp_req_lock);
520 
521 		/* Execute the software interrupt request */
522 		timeout_execute(intr_req);
523 
524 		mutex_enter(&disp_req_lock);
525 		/* Restart the loop since new requests might be added */
526 		intr_req = list_head(&intr_queue);
527 	}
528 
529 	/* reset the interrupt state */
530 	intr_state &= ~TM_INTR_START(level);
531 	mutex_exit(&disp_req_lock);
532 }
533 
534 /*
535  *  void
536  *  cyclic_timer(void)
537  *
538  *  Overview
539  *   cyclic_timer() is a function invoked periodically by the cyclic
540  *   subsystem.
541  *
542  *   The function calls timeout_invoke() with timeout requests whose
543  *   expiration time is already reached.
544  *
545  *  Arguments
546  *   Nothing
547  *
548  *  Return value
549  *   Nothing
550  */
551 void
cyclic_timer(void)552 cyclic_timer(void)
553 {
554 	tm_req_t *req;
555 	timer_tw_t *tw;
556 	hrtime_t curr_tick, curr;
557 
558 	/* If the system is suspended, just return */
559 	if (timer_suspended)
560 		return;
561 
562 	/* Get the current time */
563 	timer_hrtime = ddi_timer->tick_time = curr = gethrtime();
564 	curr_tick = tw_tick(ddi_timer->tick_time);
565 
566 restart:
567 	/*
568 	 * Check the timer cogs to see if there are timeout requests
569 	 * who reach the expiration time. Call timeout_invoke() to execute
570 	 * the requests, then.
571 	 */
572 	while (curr_tick >= ddi_timer->tick) {
573 		tm_req_t *next;
574 		tw = &ddi_timer->exhash[TM_HASH(ddi_timer->tick)];
575 		mutex_enter(&tw->lock);
576 		for (req = list_head(&tw->req); req != NULL; req = next) {
577 			next = list_next(&tw->req, req);
578 			/*
579 			 * If this request is already obsolete, free
580 			 * it here.
581 			 */
582 			if (req->flags & TM_UTMCOMP) {
583 				/*
584 				 * Remove this request from the timer,
585 				 * then free it.
586 				 */
587 				list_remove(&tw->req, req);
588 				kmem_cache_free(req_cache, req);
589 			} else if (curr >= req->exp_time) {
590 				mutex_enter(&req->lock);
591 				/*
592 				 * Check if this request is canceled, but not
593 				 * being executed now.
594 				 */
595 				if (req->flags & TM_CANCEL &&
596 				    !(req->flags & TM_INVOKING)) {
597 					mutex_exit(&req->lock);
598 					continue;
599 				}
600 				/*
601 				 * Record how many times timeout_execute()
602 				 * must be invoked.
603 				 */
604 				req->cnt++;
605 				/*
606 				 * Invoke timeout_execute() via taskq or
607 				 * software interrupt.
608 				 */
609 				if (req->flags & TM_INVOKING) {
610 					/*
611 					 * If it's already invoked,
612 					 * There is nothing to do.
613 					 */
614 					mutex_exit(&req->lock);
615 				} else {
616 					req->flags |= TM_INVOKING;
617 					mutex_exit(&req->lock);
618 					/*
619 					 * Dispatch this timeout request.
620 					 * timeout_dispatch() chooses either
621 					 * a software interrupt or taskq thread
622 					 * based on the level.
623 					 */
624 					timeout_dispatch(req);
625 				}
626 				/*
627 				 * Periodic timeout requests must prepare for
628 				 * the next fire.
629 				 */
630 				transfer_req(req, tw);
631 			}
632 		}
633 		mutex_exit(&tw->lock);
634 		ddi_timer->tick++;
635 	}
636 
637 	/*
638 	 * Check the current time. If we spend some amount of time,
639 	 * double-check if some of the requests reaches the expiration
640 	 * time during the work.
641 	 */
642 	curr = gethrtime();
643 	curr_tick = tw_tick(curr);
644 	if (curr_tick >= ddi_timer->tick) {
645 		ddi_timer->tick -= 1;
646 		goto restart;
647 	}
648 	/* Adjustment for the next rolling */
649 	ddi_timer->tick -= 1;
650 }
651 
652 /*
653  *  void
654  *  timer_init(void)
655  *
656  *  Overview
657  *    timer_init() allocates the internal data structures used by
658  *    i_timeout(), i_untimeout() and the timer.
659  *
660  *  Arguments
661  *    Nothing
662  *
663  *  Return value
664  *    Nothing
665  *
666  *  Caller's context
667  *    timer_init() can be called in kernel context only.
668  */
669 void
timer_init(void)670 timer_init(void)
671 {
672 	int i;
673 
674 	/* Create kmem_cache for timeout requests */
675 	req_cache = kmem_cache_create("timeout_request", sizeof (tm_req_t),
676 	    0, NULL, NULL, NULL, NULL, NULL, 0);
677 
678 	/* Initialize the timer which is invoked by the cyclic subsystem */
679 	ddi_timer = kmem_alloc(sizeof (cyc_timer_t), KM_SLEEP);
680 	ddi_timer->res = nsec_per_tick;
681 	ddi_timer->tick = tw_tick(gethrtime());
682 	ddi_timer->tick_time = 0;
683 
684 	/* Initialize the timing wheel */
685 	bzero((char *)&ddi_timer->idhash[0], TM_HASH_SZ * sizeof (timer_tw_t));
686 	bzero((char *)&ddi_timer->exhash[0], TM_HASH_SZ * sizeof (timer_tw_t));
687 
688 	for (i = 0; i < TM_HASH_SZ; i++) {
689 		list_create(&ddi_timer->idhash[i].req, sizeof (tm_req_t),
690 		    offsetof(tm_req_t, id_req));
691 		mutex_init(&ddi_timer->idhash[i].lock, NULL, MUTEX_ADAPTIVE,
692 		    NULL);
693 
694 		list_create(&ddi_timer->exhash[i].req, sizeof (tm_req_t),
695 		    offsetof(tm_req_t, ex_req));
696 		mutex_init(&ddi_timer->exhash[i].lock, NULL, MUTEX_ADAPTIVE,
697 		    NULL);
698 	}
699 
700 	/* Create a taskq thread pool */
701 	tm_taskq = taskq_create_instance("timeout_taskq", 0,
702 	    timer_taskq_num, MAXCLSYSPRI,
703 	    timer_taskq_min_num, timer_taskq_max_num,
704 	    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
705 
706 	/*
707 	 * Initialize the taskq queue which is dedicated to this timeout
708 	 * interface/timer.
709 	 */
710 	list_create(&kern_queue, sizeof (tm_req_t),
711 	    offsetof(tm_req_t, disp_req));
712 
713 	/* Create a worker thread to dispatch the taskq thread */
714 	tm_work_thread = thread_create(NULL, 0, timeout_taskq_thread, NULL,
715 	    0, &p0, TS_RUN, MAXCLSYSPRI);
716 
717 	/*
718 	 * Initialize the software interrupt queue which is dedicated to
719 	 * this timeout interface/timer.
720 	 */
721 	list_create(&intr_queue, sizeof (tm_req_t),
722 	    offsetof(tm_req_t, disp_req));
723 
724 	/*
725 	 * Initialize the mutex lock used for both of kern_queue and
726 	 * intr_queue.
727 	 */
728 	mutex_init(&disp_req_lock, NULL, MUTEX_ADAPTIVE, NULL);
729 	cv_init(&kern_cv, NULL, CV_DEFAULT, NULL);
730 
731 	/* Register the callback handler for the system suspend/resume */
732 	(void) callb_add(timer_cpr_callb, 0, CB_CL_CPR_CALLOUT, "cyclicTimer");
733 }
734 
735 /*
736  *  timeout_t
737  *  i_timeout(void (*func)(void *), void *arg,  hrtime_t interval,
738  *      int level, int flags)
739  *
740  *  Overview
741  *    i_timeout() is an internal function scheduling the passed function
742  *    to be invoked in the interval in nanoseconds. The callback function
743  *    keeps invoked until the request is explicitly canceled by i_untimeout().
744  *    This function is used for ddi_periodic_add(9F).
745  *
746  *  Arguments
747  *
748  *    func: the callback function
749  *          the callback function will be invoked in kernel context if
750  *          the level passed is the zero. Otherwise be invoked in interrupt
751  *          context at the specified level by the argument "level".
752  *
753  *          Note that It's guaranteed by the cyclic subsystem that the
754  *          function is invoked on the only one CPU and is never executed
755  *          simultaneously even on MP system.
756  *
757  *     arg: the argument passed to the callback function
758  *
759  * interval: interval time in nanoseconds
760  *          if the interval is the zero, the timer resolution is used.
761  *
762  *  level : callback interrupt level
763  *          If the value is 0 (the zero), the callback function is invoked
764  *          in kernel context. If the value is more than 0 (the zero), but
765  *          less than or equal to 10, the callback function is invoked in
766  *          interrupt context at the specified interrupt level.
767  *          This value must be in range of 0-10.
768  *
769  *  Return value
770  *    returns a non-zero opaque value (timeout_t) on success.
771  *
772  *  Caller's context
773  *    i_timeout() can be called in user or kernel context.
774  */
775 timeout_t
i_timeout(void (* func)(void *),void * arg,hrtime_t interval,int level)776 i_timeout(void (*func)(void *), void *arg, hrtime_t interval, int level)
777 {
778 	hrtime_t start_time = gethrtime(), res;
779 	tm_req_t *req = NULL;
780 
781 	/* Allocate and initialize the timeout request */
782 	req = kmem_cache_alloc(req_cache, KM_SLEEP);
783 	req->handler = func;
784 	req->arg = arg;
785 	req->h_thread = NULL;
786 	req->level = level;
787 	req->flags = 0;
788 	req->cnt = 0;
789 	mutex_init(&req->lock, NULL, MUTEX_ADAPTIVE, NULL);
790 	cv_init(&req->cv, NULL, CV_DEFAULT, NULL);
791 
792 	/*
793 	 * The resolution must be finer than or equal to
794 	 * the requested interval. If it's not, set the resolution
795 	 * to the interval.
796 	 * Note. There is a restriction currently. Regardless of the
797 	 * clock resolution used here, 10ms is set as the timer resolution.
798 	 * Even on the 1ms resolution timer, the minimum interval is 10ms.
799 	 */
800 	if ((res = i_get_res()) > interval) {
801 		uintptr_t pc = (uintptr_t)req->handler;
802 		ulong_t off;
803 		cmn_err(CE_WARN,
804 		    "The periodic timeout (handler=%s, interval=%lld) "
805 		    "requests a finer interval than the supported resolution. "
806 		    "It rounds up to %lld\n", kobj_getsymname(pc, &off),
807 		    interval, res);
808 		interval = res;
809 	}
810 
811 	/*
812 	 * If the specified interval is already multiples of
813 	 * the resolution, use it as is. Otherwise, it rounds
814 	 * up to multiples of the timer resolution.
815 	 */
816 	req->interval = roundup(interval, i_get_res());
817 
818 	/*
819 	 * For the periodic timeout requests, the first expiration time will
820 	 * be adjusted to the timer tick edge to take advantage of the cyclic
821 	 * subsystem. In that case, the first fire is likely not an expected
822 	 * one, but the fires later can be more accurate due to this.
823 	 */
824 	req->exp_time = roundup(start_time + req->interval, i_get_res());
825 
826 	/* Add the request to the timer */
827 	return (add_req(req));
828 }
829 
830 /*
831  *  void
832  *  i_untimeout(timeout_t req)
833  *
834  *  Overview
835  *    i_untimeout() is an internal function canceling the i_timeout()
836  *    request previously issued.
837  *    This function is used for ddi_periodic_delete(9F).
838  *
839  *  Argument
840  *      req: timeout_t opaque value i_timeout() returned previously.
841  *
842  *  Return value
843  *      Nothing.
844  *
845  *  Caller's context
846  *    i_untimeout() can be called in user, kernel or interrupt context.
847  *    It cannot be called in high interrupt context.
848  *
849  *  Note. This function is used by ddi_periodic_delete(), which cannot
850  *  be called in interrupt context. As a result, this function is called
851  *  in user or kernel context only in practice.
852  */
853 void
i_untimeout(timeout_t timeout_req)854 i_untimeout(timeout_t timeout_req)
855 {
856 	timer_tw_t *tid;
857 	tm_req_t *req;
858 	timeout_t id;
859 
860 	/* Retrieve the id for this timeout request */
861 	id = (timeout_t)timeout_req;
862 	tid = &ddi_timer->idhash[TM_HASH((uintptr_t)id)];
863 
864 	mutex_enter(&tid->lock);
865 	for (req = list_head(&tid->req); req != NULL;
866 	    req = list_next(&tid->req, req)) {
867 		if (req->id == id)
868 			break;
869 	}
870 	if (req == NULL) {
871 		/* There is no requests with this id after all */
872 		mutex_exit(&tid->lock);
873 		return;
874 	}
875 	mutex_enter(&req->lock);
876 
877 	/* Unregister this request first */
878 	list_remove(&tid->req, req);
879 
880 	/* Notify that this request is canceled */
881 	req->flags |= TM_CANCEL;
882 
883 	/* Check if the handler is invoked */
884 	if (req->flags & TM_INVOKING) {
885 		/*
886 		 * This request will be removed by timeout_execute() later,
887 		 * so that there is no extra thing to do any more.
888 		 */
889 		mutex_exit(&req->lock);
890 		mutex_exit(&tid->lock);
891 		return;
892 	}
893 	mutex_exit(&req->lock);
894 	mutex_exit(&tid->lock);
895 
896 	/*
897 	 * Notify untimeout() is about to be finished, and this request
898 	 * can be freed.
899 	 */
900 	atomic_or_uint(&req->flags, TM_UTMCOMP);
901 }
902