xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision ccfee5f7189c492b35bde8da3072d0a3624ce219)
1 /*	$NetBSD: linux_work.c,v 1.41 2018/08/27 15:06:37 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.41 2018/08/27 15:06:37 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #include <sys/queue.h>
45 #include <sys/sdt.h>
46 
47 #include <linux/workqueue.h>
48 
49 TAILQ_HEAD(work_head, work_struct);
50 TAILQ_HEAD(dwork_head, delayed_work);
51 
52 struct workqueue_struct {
53 	kmutex_t		wq_lock;
54 	kcondvar_t		wq_cv;
55 	struct dwork_head	wq_delayed; /* delayed work scheduled */
56 	struct work_head	wq_queue;   /* work to run */
57 	struct work_head	wq_dqueue;  /* delayed work to run now */
58 	struct work_struct	*wq_current_work;
59 	int			wq_flags;
60 	bool			wq_dying;
61 	uint64_t		wq_gen;
62 	struct lwp		*wq_lwp;
63 };
64 
65 static void __dead	linux_workqueue_thread(void *);
66 static void		linux_workqueue_timeout(void *);
67 static bool		work_claimed(struct work_struct *,
68 			    struct workqueue_struct *);
69 static struct workqueue_struct *
70 			work_queue(struct work_struct *);
71 static bool		acquire_work(struct work_struct *,
72 			    struct workqueue_struct *);
73 static void		release_work(struct work_struct *,
74 			    struct workqueue_struct *);
75 static void		wait_for_current_work(struct work_struct *,
76 			    struct workqueue_struct *);
77 static void		dw_callout_init(struct workqueue_struct *,
78 			    struct delayed_work *);
79 static void		dw_callout_destroy(struct workqueue_struct *,
80 			    struct delayed_work *);
81 static void		cancel_delayed_work_done(struct workqueue_struct *,
82 			    struct delayed_work *);
83 
84 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
85     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
86 SDT_PROBE_DEFINE2(sdt, linux, work, release,
87     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
88 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
89     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
90 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
91     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
92 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
93     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
94     "unsigned long"/*ticks*/);
95 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
96     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
97 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
98     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
99 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
101 SDT_PROBE_DEFINE2(sdt, linux, work, run,
102     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
103 SDT_PROBE_DEFINE2(sdt, linux, work, done,
104     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
105 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
106     "struct workqueue_struct *"/*wq*/);
107 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
108     "struct workqueue_struct *"/*wq*/);
109 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
110     "struct workqueue_struct *"/*wq*/);
111 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
112     "struct workqueue_struct *"/*wq*/);
113 
114 static specificdata_key_t workqueue_key __read_mostly;
115 
116 struct workqueue_struct	*system_wq __read_mostly;
117 struct workqueue_struct	*system_long_wq __read_mostly;
118 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
119 
120 static inline uintptr_t
121 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
122 {
123 
124 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
125 }
126 
127 /*
128  * linux_workqueue_init()
129  *
130  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
131  *	NetBSD error on failure.
132  */
133 int
134 linux_workqueue_init(void)
135 {
136 	int error;
137 
138 	error = lwp_specific_key_create(&workqueue_key, NULL);
139 	if (error)
140 		goto fail0;
141 
142 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
143 	if (system_wq == NULL) {
144 		error = ENOMEM;
145 		goto fail1;
146 	}
147 
148 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
149 	if (system_long_wq == NULL) {
150 		error = ENOMEM;
151 		goto fail2;
152 	}
153 
154 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
155 	if (system_long_wq == NULL) {
156 		error = ENOMEM;
157 		goto fail3;
158 	}
159 
160 	return 0;
161 
162 fail4: __unused
163 	destroy_workqueue(system_power_efficient_wq);
164 fail3:	destroy_workqueue(system_long_wq);
165 fail2:	destroy_workqueue(system_wq);
166 fail1:	lwp_specific_key_delete(workqueue_key);
167 fail0:	KASSERT(error);
168 	return error;
169 }
170 
171 /*
172  * linux_workqueue_fini()
173  *
174  *	Destroy the Linux workqueue subsystem.  Never fails.
175  */
176 void
177 linux_workqueue_fini(void)
178 {
179 
180 	destroy_workqueue(system_power_efficient_wq);
181 	destroy_workqueue(system_long_wq);
182 	destroy_workqueue(system_wq);
183 	lwp_specific_key_delete(workqueue_key);
184 }
185 
186 /*
187  * Workqueues
188  */
189 
190 /*
191  * alloc_ordered_workqueue(name, flags)
192  *
193  *	Create a workqueue of the given name.  No flags are currently
194  *	defined.  Return NULL on failure, pointer to struct
195  *	workqueue_struct object on success.
196  */
197 struct workqueue_struct *
198 alloc_ordered_workqueue(const char *name, int flags)
199 {
200 	struct workqueue_struct *wq;
201 	int error;
202 
203 	KASSERT(flags == 0);
204 
205 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
206 
207 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_NONE);
208 	cv_init(&wq->wq_cv, name);
209 	TAILQ_INIT(&wq->wq_delayed);
210 	TAILQ_INIT(&wq->wq_queue);
211 	TAILQ_INIT(&wq->wq_dqueue);
212 	wq->wq_current_work = NULL;
213 	wq->wq_flags = 0;
214 	wq->wq_dying = false;
215 	wq->wq_gen = 0;
216 	wq->wq_lwp = NULL;
217 
218 	error = kthread_create(PRI_NONE,
219 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
220 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
221 	if (error)
222 		goto fail0;
223 
224 	return wq;
225 
226 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
227 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
228 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
229 	cv_destroy(&wq->wq_cv);
230 	mutex_destroy(&wq->wq_lock);
231 	kmem_free(wq, sizeof(*wq));
232 	return NULL;
233 }
234 
235 /*
236  * destroy_workqueue(wq)
237  *
238  *	Destroy a workqueue created with wq.  Cancel any pending
239  *	delayed work.  Wait for all queued work to complete.
240  *
241  *	May sleep.
242  */
243 void
244 destroy_workqueue(struct workqueue_struct *wq)
245 {
246 
247 	/*
248 	 * Cancel all delayed work.  We do this first because any
249 	 * delayed work that that has already timed out, which we can't
250 	 * cancel, may have queued new work.
251 	 */
252 	mutex_enter(&wq->wq_lock);
253 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
254 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
255 
256 		KASSERT(work_queue(&dw->work) == wq);
257 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
258 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
259 			dw->dw_state == DELAYED_WORK_CANCELLED),
260 		    "delayed work %p in bad state: %d",
261 		    dw, dw->dw_state);
262 
263 		/*
264 		 * Mark it cancelled and try to stop the callout before
265 		 * it starts.
266 		 *
267 		 * If it's too late and the callout has already begun
268 		 * to execute, then it will notice that we asked to
269 		 * cancel it and remove itself from the queue before
270 		 * returning.
271 		 *
272 		 * If we stopped the callout before it started,
273 		 * however, then we can safely destroy the callout and
274 		 * dissociate it from the workqueue ourselves.
275 		 */
276 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
277 		dw->dw_state = DELAYED_WORK_CANCELLED;
278 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
279 			cancel_delayed_work_done(wq, dw);
280 	}
281 	mutex_exit(&wq->wq_lock);
282 
283 	/*
284 	 * At this point, no new work can be put on the queue.
285 	 */
286 
287 	/* Tell the thread to exit.  */
288 	mutex_enter(&wq->wq_lock);
289 	wq->wq_dying = true;
290 	cv_broadcast(&wq->wq_cv);
291 	mutex_exit(&wq->wq_lock);
292 
293 	/* Wait for it to exit.  */
294 	(void)kthread_join(wq->wq_lwp);
295 
296 	KASSERT(wq->wq_dying);
297 	KASSERT(wq->wq_flags == 0);
298 	KASSERT(wq->wq_current_work == NULL);
299 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
300 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
301 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
302 	cv_destroy(&wq->wq_cv);
303 	mutex_destroy(&wq->wq_lock);
304 
305 	kmem_free(wq, sizeof(*wq));
306 }
307 
308 /*
309  * Work thread and callout
310  */
311 
312 /*
313  * linux_workqueue_thread(cookie)
314  *
315  *	Main function for a workqueue's worker thread.  Waits until
316  *	there is work queued, grabs a batch of work off the queue,
317  *	executes it all, bumps the generation number, and repeats,
318  *	until dying.
319  */
320 static void __dead
321 linux_workqueue_thread(void *cookie)
322 {
323 	struct workqueue_struct *const wq = cookie;
324 	struct work_head queue, dqueue;
325 	struct work_head *const q[2] = { &queue, &dqueue };
326 	unsigned i;
327 
328 	lwp_setspecific(workqueue_key, wq);
329 
330 	mutex_enter(&wq->wq_lock);
331 	for (;;) {
332 		/*
333 		 * Wait until there's activity.  If there's no work and
334 		 * we're dying, stop here.
335 		 */
336 		while (TAILQ_EMPTY(&wq->wq_queue) &&
337 		    TAILQ_EMPTY(&wq->wq_dqueue) &&
338 		    !wq->wq_dying)
339 			cv_wait(&wq->wq_cv, &wq->wq_lock);
340 		if (wq->wq_dying) {
341 			KASSERT(TAILQ_EMPTY(&wq->wq_queue));
342 			KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
343 			break;
344 		}
345 
346 		/* Grab a batch of work off the queue.  */
347 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
348 		TAILQ_INIT(&queue);
349 		TAILQ_INIT(&dqueue);
350 		TAILQ_CONCAT(&queue, &wq->wq_queue, work_entry);
351 		TAILQ_CONCAT(&dqueue, &wq->wq_dqueue, work_entry);
352 
353 		/* Process each work item in the batch.  */
354 		for (i = 0; i < 2; i++) {
355 			while (!TAILQ_EMPTY(q[i])) {
356 				struct work_struct *work = TAILQ_FIRST(q[i]);
357 				void (*func)(struct work_struct *);
358 
359 				KASSERT(work_queue(work) == wq);
360 				KASSERT(work_claimed(work, wq));
361 				KASSERTMSG((q[i] != &dqueue ||
362 					container_of(work, struct delayed_work,
363 					    work)->dw_state ==
364 					DELAYED_WORK_IDLE),
365 				    "delayed work %p queued and scheduled",
366 				    work);
367 
368 				TAILQ_REMOVE(q[i], work, work_entry);
369 				KASSERT(wq->wq_current_work == NULL);
370 				wq->wq_current_work = work;
371 				func = work->func;
372 				release_work(work, wq);
373 				/* Can't dereference work after this point.  */
374 
375 				mutex_exit(&wq->wq_lock);
376 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
377 				(*func)(work);
378 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
379 				mutex_enter(&wq->wq_lock);
380 
381 				KASSERT(wq->wq_current_work == work);
382 				wq->wq_current_work = NULL;
383 				cv_broadcast(&wq->wq_cv);
384 			}
385 		}
386 
387 		/* Notify flush that we've completed a batch of work.  */
388 		wq->wq_gen++;
389 		cv_broadcast(&wq->wq_cv);
390 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
391 	}
392 	mutex_exit(&wq->wq_lock);
393 
394 	kthread_exit(0);
395 }
396 
397 /*
398  * linux_workqueue_timeout(cookie)
399  *
400  *	Delayed work timeout callback.
401  *
402  *	- If scheduled, queue it.
403  *	- If rescheduled, callout_schedule ourselves again.
404  *	- If cancelled, destroy the callout and release the work from
405  *        the workqueue.
406  */
407 static void
408 linux_workqueue_timeout(void *cookie)
409 {
410 	struct delayed_work *const dw = cookie;
411 	struct workqueue_struct *const wq = work_queue(&dw->work);
412 
413 	KASSERTMSG(wq != NULL,
414 	    "delayed work %p state %d resched %d",
415 	    dw, dw->dw_state, dw->dw_resched);
416 
417 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
418 
419 	mutex_enter(&wq->wq_lock);
420 	KASSERT(work_queue(&dw->work) == wq);
421 	switch (dw->dw_state) {
422 	case DELAYED_WORK_IDLE:
423 		panic("delayed work callout uninitialized: %p", dw);
424 	case DELAYED_WORK_SCHEDULED:
425 		dw_callout_destroy(wq, dw);
426 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
427 		cv_broadcast(&wq->wq_cv);
428 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
429 		break;
430 	case DELAYED_WORK_RESCHEDULED:
431 		KASSERT(dw->dw_resched >= 0);
432 		callout_schedule(&dw->dw_callout, dw->dw_resched);
433 		dw->dw_state = DELAYED_WORK_SCHEDULED;
434 		dw->dw_resched = -1;
435 		break;
436 	case DELAYED_WORK_CANCELLED:
437 		cancel_delayed_work_done(wq, dw);
438 		/* Can't dereference dw after this point.  */
439 		goto out;
440 	default:
441 		panic("delayed work callout in bad state: %p", dw);
442 	}
443 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
444 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
445 out:	mutex_exit(&wq->wq_lock);
446 }
447 
448 /*
449  * current_work()
450  *
451  *	If in a workqueue worker thread, return the work it is
452  *	currently executing.  Otherwise return NULL.
453  */
454 struct work_struct *
455 current_work(void)
456 {
457 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
458 
459 	/* If we're not a workqueue thread, then there's no work.  */
460 	if (wq == NULL)
461 		return NULL;
462 
463 	/*
464 	 * Otherwise, this should be possible only while work is in
465 	 * progress.  Return the current work item.
466 	 */
467 	KASSERT(wq->wq_current_work != NULL);
468 	return wq->wq_current_work;
469 }
470 
471 /*
472  * Work
473  */
474 
475 /*
476  * INIT_WORK(work, fn)
477  *
478  *	Initialize work for use with a workqueue to call fn in a worker
479  *	thread.  There is no corresponding destruction operation.
480  */
481 void
482 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
483 {
484 
485 	work->work_owner = 0;
486 	work->func = fn;
487 }
488 
489 /*
490  * work_claimed(work, wq)
491  *
492  *	True if work is currently claimed by a workqueue, meaning it is
493  *	either on the queue or scheduled in a callout.  The workqueue
494  *	must be wq, and caller must hold wq's lock.
495  */
496 static bool
497 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
498 {
499 
500 	KASSERT(work_queue(work) == wq);
501 	KASSERT(mutex_owned(&wq->wq_lock));
502 
503 	return work->work_owner & 1;
504 }
505 
506 /*
507  * work_queue(work)
508  *
509  *	Return the last queue that work was queued on, or NULL if it
510  *	was never queued.
511  */
512 static struct workqueue_struct *
513 work_queue(struct work_struct *work)
514 {
515 
516 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
517 }
518 
519 /*
520  * acquire_work(work, wq)
521  *
522  *	Try to claim work for wq.  If work is already claimed, it must
523  *	be claimed by wq; return false.  If work is not already
524  *	claimed, claim it, issue a memory barrier to match any prior
525  *	release_work, and return true.
526  *
527  *	Caller must hold wq's lock.
528  */
529 static bool
530 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
531 {
532 	uintptr_t owner0, owner;
533 
534 	KASSERT(mutex_owned(&wq->wq_lock));
535 	KASSERT(((uintptr_t)wq & 1) == 0);
536 
537 	owner = (uintptr_t)wq | 1;
538 	do {
539 		owner0 = work->work_owner;
540 		if (owner0 & 1) {
541 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
542 			return false;
543 		}
544 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
545 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
546 	    owner0);
547 
548 	KASSERT(work_queue(work) == wq);
549 	membar_enter();
550 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
551 	return true;
552 }
553 
554 /*
555  * release_work(work, wq)
556  *
557  *	Issue a memory barrier to match any subsequent acquire_work and
558  *	dissociate work from wq.
559  *
560  *	Caller must hold wq's lock and work must be associated with wq.
561  */
562 static void
563 release_work(struct work_struct *work, struct workqueue_struct *wq)
564 {
565 
566 	KASSERT(work_queue(work) == wq);
567 	KASSERT(mutex_owned(&wq->wq_lock));
568 
569 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
570 	membar_exit();
571 
572 	/*
573 	 * Non-interlocked r/m/w is safe here because nobody else can
574 	 * write to this while the claimed bit is setand the workqueue
575 	 * lock is held.
576 	 */
577 	work->work_owner &= ~(uintptr_t)1;
578 }
579 
580 /*
581  * schedule_work(work)
582  *
583  *	If work is not already queued on system_wq, queue it to be run
584  *	by system_wq's worker thread when it next can.  True if it was
585  *	newly queued, false if it was already queued.  If the work was
586  *	already running, queue it to run again.
587  *
588  *	Caller must ensure work is not queued to run on a different
589  *	workqueue.
590  */
591 bool
592 schedule_work(struct work_struct *work)
593 {
594 
595 	return queue_work(system_wq, work);
596 }
597 
598 /*
599  * queue_work(wq, work)
600  *
601  *	If work is not already queued on wq, queue it to be run by wq's
602  *	worker thread when it next can.  True if it was newly queued,
603  *	false if it was already queued.  If the work was already
604  *	running, queue it to run again.
605  *
606  *	Caller must ensure work is not queued to run on a different
607  *	workqueue.
608  */
609 bool
610 queue_work(struct workqueue_struct *wq, struct work_struct *work)
611 {
612 	bool newly_queued;
613 
614 	KASSERT(wq != NULL);
615 
616 	mutex_enter(&wq->wq_lock);
617 	if (__predict_true(acquire_work(work, wq))) {
618 		/*
619 		 * It wasn't on any workqueue at all.  Put it on this
620 		 * one, and signal the worker thread that there is work
621 		 * to do.
622 		 */
623 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
624 		cv_broadcast(&wq->wq_cv);
625 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
626 		newly_queued = true;
627 	} else {
628 		/*
629 		 * It was already on this workqueue.  Nothing to do
630 		 * since it is already queued.
631 		 */
632 		newly_queued = false;
633 	}
634 	mutex_exit(&wq->wq_lock);
635 
636 	return newly_queued;
637 }
638 
639 /*
640  * cancel_work(work)
641  *
642  *	If work was queued, remove it from the queue and return true.
643  *	If work was not queued, return false.  Work may still be
644  *	running when this returns.
645  */
646 bool
647 cancel_work(struct work_struct *work)
648 {
649 	struct workqueue_struct *wq;
650 	bool cancelled_p = false;
651 
652 	/* If there's no workqueue, nothing to cancel.   */
653 	if ((wq = work_queue(work)) == NULL)
654 		goto out;
655 
656 	mutex_enter(&wq->wq_lock);
657 	if (__predict_false(work_queue(work) != wq)) {
658 		/*
659 		 * It has finished execution or been cancelled by
660 		 * another thread, and has been moved off the
661 		 * workqueue, so it's too to cancel.
662 		 */
663 		cancelled_p = false;
664 	} else {
665 		/* Check whether it's on the queue.  */
666 		if (work_claimed(work, wq)) {
667 			/*
668 			 * It is still on the queue.  Take it off the
669 			 * queue and report successful cancellation.
670 			 */
671 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
672 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
673 			release_work(work, wq);
674 			/* Can't dereference work after this point.  */
675 			cancelled_p = true;
676 		} else {
677 			/* Not on the queue.  Couldn't cancel it.  */
678 			cancelled_p = false;
679 		}
680 	}
681 	mutex_exit(&wq->wq_lock);
682 
683 out:	return cancelled_p;
684 }
685 
686 /*
687  * cancel_work_sync(work)
688  *
689  *	If work was queued, remove it from the queue and return true.
690  *	If work was not queued, return false.  Either way, if work is
691  *	currently running, wait for it to complete.
692  *
693  *	May sleep.
694  */
695 bool
696 cancel_work_sync(struct work_struct *work)
697 {
698 	struct workqueue_struct *wq;
699 	bool cancelled_p = false;
700 
701 	/* If there's no workqueue, nothing to cancel.   */
702 	if ((wq = work_queue(work)) == NULL)
703 		goto out;
704 
705 	mutex_enter(&wq->wq_lock);
706 	if (__predict_false(work_queue(work) != wq)) {
707 		/*
708 		 * It has finished execution or been cancelled by
709 		 * another thread, and has been moved off the
710 		 * workqueue, so it's too late to cancel.
711 		 */
712 		cancelled_p = false;
713 	} else {
714 		/* Check whether it's on the queue.  */
715 		if (work_claimed(work, wq)) {
716 			/*
717 			 * It is still on the queue.  Take it off the
718 			 * queue and report successful cancellation.
719 			 */
720 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
721 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
722 			release_work(work, wq);
723 			/* Can't dereference work after this point.  */
724 			cancelled_p = true;
725 		} else {
726 			/* Not on the queue.  Couldn't cancel it.  */
727 			cancelled_p = false;
728 		}
729 		/* If it's still running, wait for it to complete.  */
730 		if (wq->wq_current_work == work)
731 			wait_for_current_work(work, wq);
732 	}
733 	mutex_exit(&wq->wq_lock);
734 
735 out:	return cancelled_p;
736 }
737 
738 /*
739  * wait_for_current_work(work, wq)
740  *
741  *	wq must be currently executing work.  Wait for it to finish.
742  *
743  *	Does not dereference work.
744  */
745 static void
746 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
747 {
748 	uint64_t gen;
749 
750 	KASSERT(mutex_owned(&wq->wq_lock));
751 	KASSERT(wq->wq_current_work == work);
752 
753 	/* Wait only one generation in case it gets requeued quickly.  */
754 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
755 	gen = wq->wq_gen;
756 	do {
757 		cv_wait(&wq->wq_cv, &wq->wq_lock);
758 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
759 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
760 }
761 
762 /*
763  * Delayed work
764  */
765 
766 /*
767  * INIT_DELAYED_WORK(dw, fn)
768  *
769  *	Initialize dw for use with a workqueue to call fn in a worker
770  *	thread after a delay.  There is no corresponding destruction
771  *	operation.
772  */
773 void
774 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
775 {
776 
777 	INIT_WORK(&dw->work, fn);
778 	dw->dw_state = DELAYED_WORK_IDLE;
779 	dw->dw_resched = -1;
780 
781 	/*
782 	 * Defer callout_init until we are going to schedule the
783 	 * callout, which can then callout_destroy it, because
784 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
785 	 * we have no opportunity to call callout_destroy.
786 	 */
787 }
788 
789 /*
790  * schedule_delayed_work(dw, ticks)
791  *
792  *	If it is not currently scheduled, schedule dw to run after
793  *	ticks on system_wq.  If currently executing and not already
794  *	rescheduled, reschedule it.  True if it was newly scheduled,
795  *	false if it was already scheduled.
796  *
797  *	If ticks == 0, queue it to run as soon as the worker can,
798  *	without waiting for the next callout tick to run.
799  */
800 bool
801 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
802 {
803 
804 	return queue_delayed_work(system_wq, dw, ticks);
805 }
806 
807 /*
808  * dw_callout_init(wq, dw)
809  *
810  *	Initialize the callout of dw and transition to
811  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
812  */
813 static void
814 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
815 {
816 
817 	KASSERT(mutex_owned(&wq->wq_lock));
818 	KASSERT(work_queue(&dw->work) == wq);
819 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
820 
821 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
822 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
823 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
824 	dw->dw_state = DELAYED_WORK_SCHEDULED;
825 }
826 
827 /*
828  * dw_callout_destroy(wq, dw)
829  *
830  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
831  */
832 static void
833 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
834 {
835 
836 	KASSERT(mutex_owned(&wq->wq_lock));
837 	KASSERT(work_queue(&dw->work) == wq);
838 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
839 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
840 	    dw->dw_state == DELAYED_WORK_CANCELLED);
841 
842 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
843 	callout_destroy(&dw->dw_callout);
844 	dw->dw_resched = -1;
845 	dw->dw_state = DELAYED_WORK_IDLE;
846 }
847 
848 /*
849  * cancel_delayed_work_done(wq, dw)
850  *
851  *	Complete cancellation of a delayed work: transition from
852  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
853  *	workqueue.  Caller must not dereference dw after this returns.
854  */
855 static void
856 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
857 {
858 
859 	KASSERT(mutex_owned(&wq->wq_lock));
860 	KASSERT(work_queue(&dw->work) == wq);
861 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
862 
863 	dw_callout_destroy(wq, dw);
864 	release_work(&dw->work, wq);
865 	/* Can't dereference dw after this point.  */
866 }
867 
868 /*
869  * queue_delayed_work(wq, dw, ticks)
870  *
871  *	If it is not currently scheduled, schedule dw to run after
872  *	ticks on wq.  If currently queued, remove it from the queue
873  *	first.
874  *
875  *	If ticks == 0, queue it to run as soon as the worker can,
876  *	without waiting for the next callout tick to run.
877  */
878 bool
879 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
880     unsigned long ticks)
881 {
882 	bool newly_queued;
883 
884 	mutex_enter(&wq->wq_lock);
885 	if (__predict_true(acquire_work(&dw->work, wq))) {
886 		/*
887 		 * It wasn't on any workqueue at all.  Schedule it to
888 		 * run on this one.
889 		 */
890 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
891 		if (ticks == 0) {
892 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
893 			    work_entry);
894 			cv_broadcast(&wq->wq_cv);
895 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
896 		} else {
897 			/*
898 			 * Initialize a callout and schedule to run
899 			 * after a delay.
900 			 */
901 			dw_callout_init(wq, dw);
902 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
903 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
904 		}
905 		newly_queued = true;
906 	} else {
907 		/* It was already on this workqueue.  */
908 		switch (dw->dw_state) {
909 		case DELAYED_WORK_IDLE:
910 		case DELAYED_WORK_SCHEDULED:
911 		case DELAYED_WORK_RESCHEDULED:
912 			/* On the queue or already scheduled.  Leave it.  */
913 			newly_queued = false;
914 			break;
915 		case DELAYED_WORK_CANCELLED:
916 			/*
917 			 * Scheduled and the callout began, but it was
918 			 * cancelled.  Reschedule it.
919 			 */
920 			if (ticks == 0) {
921 				dw->dw_state = DELAYED_WORK_SCHEDULED;
922 				SDT_PROBE2(sdt, linux, work, queue,
923 				    &dw->work, wq);
924 			} else {
925 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
926 				dw->dw_resched = MIN(INT_MAX, ticks);
927 				SDT_PROBE3(sdt, linux, work, schedule,
928 				    dw, wq, ticks);
929 			}
930 			newly_queued = true;
931 			break;
932 		default:
933 			panic("invalid delayed work state: %d",
934 			    dw->dw_state);
935 		}
936 	}
937 	mutex_exit(&wq->wq_lock);
938 
939 	return newly_queued;
940 }
941 
942 /*
943  * mod_delayed_work(wq, dw, ticks)
944  *
945  *	Schedule dw to run after ticks.  If scheduled or queued,
946  *	reschedule.  If ticks == 0, run without delay.
947  *
948  *	True if it modified the timer of an already scheduled work,
949  *	false if it newly scheduled the work.
950  */
951 bool
952 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
953     unsigned long ticks)
954 {
955 	bool timer_modified;
956 
957 	mutex_enter(&wq->wq_lock);
958 	if (acquire_work(&dw->work, wq)) {
959 		/*
960 		 * It wasn't on any workqueue at all.  Schedule it to
961 		 * run on this one.
962 		 */
963 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
964 		if (ticks == 0) {
965 			/*
966 			 * Run immediately: put it on the queue and
967 			 * signal the worker thread.
968 			 */
969 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
970 			    work_entry);
971 			cv_broadcast(&wq->wq_cv);
972 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
973 		} else {
974 			/*
975 			 * Initialize a callout and schedule to run
976 			 * after a delay.
977 			 */
978 			dw_callout_init(wq, dw);
979 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
980 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
981 		}
982 		timer_modified = false;
983 	} else {
984 		/* It was already on this workqueue.  */
985 		switch (dw->dw_state) {
986 		case DELAYED_WORK_IDLE:
987 			/* On the queue.  */
988 			if (ticks == 0) {
989 				/* Leave it be.  */
990 				SDT_PROBE2(sdt, linux, work, cancel,
991 				    &dw->work, wq);
992 				SDT_PROBE2(sdt, linux, work, queue,
993 				    &dw->work, wq);
994 			} else {
995 				/* Remove from the queue and schedule.  */
996 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
997 				    work_entry);
998 				dw_callout_init(wq, dw);
999 				callout_schedule(&dw->dw_callout,
1000 				    MIN(INT_MAX, ticks));
1001 				SDT_PROBE2(sdt, linux, work, cancel,
1002 				    &dw->work, wq);
1003 				SDT_PROBE3(sdt, linux, work, schedule,
1004 				    dw, wq, ticks);
1005 			}
1006 			timer_modified = true;
1007 			break;
1008 		case DELAYED_WORK_SCHEDULED:
1009 			/*
1010 			 * It is scheduled to run after a delay.  Try
1011 			 * to stop it and reschedule it; if we can't,
1012 			 * either reschedule it or cancel it to put it
1013 			 * on the queue, and inform the callout.
1014 			 */
1015 			if (callout_stop(&dw->dw_callout)) {
1016 				/* Can't stop, callout has begun.  */
1017 				if (ticks == 0) {
1018 					/*
1019 					 * We don't actually need to do
1020 					 * anything.  The callout will
1021 					 * queue it as soon as it gets
1022 					 * the lock.
1023 					 */
1024 					SDT_PROBE2(sdt, linux, work, cancel,
1025 					    &dw->work, wq);
1026 					SDT_PROBE2(sdt, linux, work, queue,
1027 					    &dw->work, wq);
1028 				} else {
1029 					/* Ask the callout to reschedule.  */
1030 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1031 					dw->dw_resched = MIN(INT_MAX, ticks);
1032 					SDT_PROBE2(sdt, linux, work, cancel,
1033 					    &dw->work, wq);
1034 					SDT_PROBE3(sdt, linux, work, schedule,
1035 					    dw, wq, ticks);
1036 				}
1037 			} else {
1038 				/* We stopped the callout before it began.  */
1039 				if (ticks == 0) {
1040 					/*
1041 					 * Run immediately: destroy the
1042 					 * callout, put it on the
1043 					 * queue, and signal the worker
1044 					 * thread.
1045 					 */
1046 					dw_callout_destroy(wq, dw);
1047 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1048 					    &dw->work, work_entry);
1049 					cv_broadcast(&wq->wq_cv);
1050 					SDT_PROBE2(sdt, linux, work, cancel,
1051 					    &dw->work, wq);
1052 					SDT_PROBE2(sdt, linux, work, queue,
1053 					    &dw->work, wq);
1054 				} else {
1055 					/*
1056 					 * Reschedule the callout.  No
1057 					 * state change.
1058 					 */
1059 					callout_schedule(&dw->dw_callout,
1060 					    MIN(INT_MAX, ticks));
1061 					SDT_PROBE2(sdt, linux, work, cancel,
1062 					    &dw->work, wq);
1063 					SDT_PROBE3(sdt, linux, work, schedule,
1064 					    dw, wq, ticks);
1065 				}
1066 			}
1067 			timer_modified = true;
1068 			break;
1069 		case DELAYED_WORK_RESCHEDULED:
1070 			/*
1071 			 * Someone rescheduled it after the callout
1072 			 * started but before the poor thing even had a
1073 			 * chance to acquire the lock.
1074 			 */
1075 			if (ticks == 0) {
1076 				/*
1077 				 * We can just switch back to
1078 				 * DELAYED_WORK_SCHEDULED so that the
1079 				 * callout will queue the work as soon
1080 				 * as it gets the lock.
1081 				 */
1082 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1083 				dw->dw_resched = -1;
1084 				SDT_PROBE2(sdt, linux, work, cancel,
1085 				    &dw->work, wq);
1086 				SDT_PROBE2(sdt, linux, work, queue,
1087 				    &dw->work, wq);
1088 			} else {
1089 				/* Change the rescheduled time.  */
1090 				dw->dw_resched = ticks;
1091 				SDT_PROBE2(sdt, linux, work, cancel,
1092 				    &dw->work, wq);
1093 				SDT_PROBE3(sdt, linux, work, schedule,
1094 				    dw, wq, ticks);
1095 			}
1096 			timer_modified = true;
1097 			break;
1098 		case DELAYED_WORK_CANCELLED:
1099 			/*
1100 			 * Someone cancelled it after the callout
1101 			 * started but before the poor thing even had a
1102 			 * chance to acquire the lock.
1103 			 */
1104 			if (ticks == 0) {
1105 				/*
1106 				 * We can just switch back to
1107 				 * DELAYED_WORK_SCHEDULED so that the
1108 				 * callout will queue the work as soon
1109 				 * as it gets the lock.
1110 				 */
1111 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1112 				SDT_PROBE2(sdt, linux, work, queue,
1113 				    &dw->work, wq);
1114 			} else {
1115 				/* Ask it to reschedule.  */
1116 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1117 				dw->dw_resched = MIN(INT_MAX, ticks);
1118 				SDT_PROBE3(sdt, linux, work, schedule,
1119 				    dw, wq, ticks);
1120 			}
1121 			timer_modified = false;
1122 			break;
1123 		default:
1124 			panic("invalid delayed work state: %d", dw->dw_state);
1125 		}
1126 	}
1127 	mutex_exit(&wq->wq_lock);
1128 
1129 	return timer_modified;
1130 }
1131 
1132 /*
1133  * cancel_delayed_work(dw)
1134  *
1135  *	If work was scheduled or queued, remove it from the schedule or
1136  *	queue and return true.  If work was not scheduled or queued,
1137  *	return false.  Note that work may already be running; if it
1138  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1139  *	will return false, and either way, cancel_delayed_work will NOT
1140  *	wait for the work to complete.
1141  */
1142 bool
1143 cancel_delayed_work(struct delayed_work *dw)
1144 {
1145 	struct workqueue_struct *wq;
1146 	bool cancelled_p;
1147 
1148 	/* If there's no workqueue, nothing to cancel.   */
1149 	if ((wq = work_queue(&dw->work)) == NULL)
1150 		return false;
1151 
1152 	mutex_enter(&wq->wq_lock);
1153 	if (__predict_false(work_queue(&dw->work) != wq)) {
1154 		cancelled_p = false;
1155 	} else {
1156 		switch (dw->dw_state) {
1157 		case DELAYED_WORK_IDLE:
1158 			/*
1159 			 * It is either on the queue or already running
1160 			 * or both.
1161 			 */
1162 			if (work_claimed(&dw->work, wq)) {
1163 				/* On the queue.  Remove and release.  */
1164 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1165 				    work_entry);
1166 				SDT_PROBE2(sdt, linux, work, cancel,
1167 				    &dw->work, wq);
1168 				release_work(&dw->work, wq);
1169 				/* Can't dereference dw after this point.  */
1170 				cancelled_p = true;
1171 			} else {
1172 				/* Not on the queue, so didn't cancel.  */
1173 				cancelled_p = false;
1174 			}
1175 			break;
1176 		case DELAYED_WORK_SCHEDULED:
1177 			/*
1178 			 * If it is scheduled, mark it cancelled and
1179 			 * try to stop the callout before it starts.
1180 			 *
1181 			 * If it's too late and the callout has already
1182 			 * begun to execute, tough.
1183 			 *
1184 			 * If we stopped the callout before it started,
1185 			 * however, then destroy the callout and
1186 			 * dissociate it from the workqueue ourselves.
1187 			 */
1188 			dw->dw_state = DELAYED_WORK_CANCELLED;
1189 			cancelled_p = true;
1190 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1191 			if (!callout_stop(&dw->dw_callout))
1192 				cancel_delayed_work_done(wq, dw);
1193 			break;
1194 		case DELAYED_WORK_RESCHEDULED:
1195 			/*
1196 			 * If it is being rescheduled, the callout has
1197 			 * already fired.  We must ask it to cancel.
1198 			 */
1199 			dw->dw_state = DELAYED_WORK_CANCELLED;
1200 			dw->dw_resched = -1;
1201 			cancelled_p = true;
1202 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1203 			break;
1204 		case DELAYED_WORK_CANCELLED:
1205 			/*
1206 			 * If it is being cancelled, the callout has
1207 			 * already fired.  There is nothing more for us
1208 			 * to do.  Someone else claims credit for
1209 			 * cancelling it.
1210 			 */
1211 			cancelled_p = false;
1212 			break;
1213 		default:
1214 			panic("invalid delayed work state: %d",
1215 			    dw->dw_state);
1216 		}
1217 	}
1218 	mutex_exit(&wq->wq_lock);
1219 
1220 	return cancelled_p;
1221 }
1222 
1223 /*
1224  * cancel_delayed_work_sync(dw)
1225  *
1226  *	If work was scheduled or queued, remove it from the schedule or
1227  *	queue and return true.  If work was not scheduled or queued,
1228  *	return false.  Note that work may already be running; if it
1229  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1230  *	will return false; either way, wait for it to complete.
1231  */
1232 bool
1233 cancel_delayed_work_sync(struct delayed_work *dw)
1234 {
1235 	struct workqueue_struct *wq;
1236 	bool cancelled_p;
1237 
1238 	/* If there's no workqueue, nothing to cancel.  */
1239 	if ((wq = work_queue(&dw->work)) == NULL)
1240 		return false;
1241 
1242 	mutex_enter(&wq->wq_lock);
1243 	if (__predict_false(work_queue(&dw->work) != wq)) {
1244 		cancelled_p = false;
1245 	} else {
1246 		switch (dw->dw_state) {
1247 		case DELAYED_WORK_IDLE:
1248 			/*
1249 			 * It is either on the queue or already running
1250 			 * or both.
1251 			 */
1252 			if (work_claimed(&dw->work, wq)) {
1253 				/* On the queue.  Remove and release.  */
1254 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1255 				    work_entry);
1256 				SDT_PROBE2(sdt, linux, work, cancel,
1257 				    &dw->work, wq);
1258 				release_work(&dw->work, wq);
1259 				/* Can't dereference dw after this point.  */
1260 				cancelled_p = true;
1261 			} else {
1262 				/* Not on the queue, so didn't cancel. */
1263 				cancelled_p = false;
1264 			}
1265 			/* If it's still running, wait for it to complete.  */
1266 			if (wq->wq_current_work == &dw->work)
1267 				wait_for_current_work(&dw->work, wq);
1268 			break;
1269 		case DELAYED_WORK_SCHEDULED:
1270 			/*
1271 			 * If it is scheduled, mark it cancelled and
1272 			 * try to stop the callout before it starts.
1273 			 *
1274 			 * If it's too late and the callout has already
1275 			 * begun to execute, we must wait for it to
1276 			 * complete.  But we got in soon enough to ask
1277 			 * the callout not to run, so we successfully
1278 			 * cancelled it in that case.
1279 			 *
1280 			 * If we stopped the callout before it started,
1281 			 * then we must destroy the callout and
1282 			 * dissociate it from the workqueue ourselves.
1283 			 */
1284 			dw->dw_state = DELAYED_WORK_CANCELLED;
1285 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1286 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1287 				cancel_delayed_work_done(wq, dw);
1288 			cancelled_p = true;
1289 			break;
1290 		case DELAYED_WORK_RESCHEDULED:
1291 			/*
1292 			 * If it is being rescheduled, the callout has
1293 			 * already fired.  We must ask it to cancel and
1294 			 * wait for it to complete.
1295 			 */
1296 			dw->dw_state = DELAYED_WORK_CANCELLED;
1297 			dw->dw_resched = -1;
1298 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1299 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1300 			cancelled_p = true;
1301 			break;
1302 		case DELAYED_WORK_CANCELLED:
1303 			/*
1304 			 * If it is being cancelled, the callout has
1305 			 * already fired.  We need only wait for it to
1306 			 * complete.  Someone else, however, claims
1307 			 * credit for cancelling it.
1308 			 */
1309 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1310 			cancelled_p = false;
1311 			break;
1312 		default:
1313 			panic("invalid delayed work state: %d",
1314 			    dw->dw_state);
1315 		}
1316 	}
1317 	mutex_exit(&wq->wq_lock);
1318 
1319 	return cancelled_p;
1320 }
1321 
1322 /*
1323  * Flush
1324  */
1325 
1326 /*
1327  * flush_scheduled_work()
1328  *
1329  *	Wait for all work queued on system_wq to complete.  This does
1330  *	not include delayed work.
1331  */
1332 void
1333 flush_scheduled_work(void)
1334 {
1335 
1336 	flush_workqueue(system_wq);
1337 }
1338 
1339 /*
1340  * flush_workqueue_locked(wq)
1341  *
1342  *	Wait for all work queued on wq to complete.  This does not
1343  *	include delayed work.
1344  *
1345  *	Caller must hold wq's lock.
1346  */
1347 static void
1348 flush_workqueue_locked(struct workqueue_struct *wq)
1349 {
1350 	uint64_t gen;
1351 
1352 	KASSERT(mutex_owned(&wq->wq_lock));
1353 
1354 	/* Get the current generation number.  */
1355 	gen = wq->wq_gen;
1356 
1357 	/*
1358 	 * If there's a batch of work in progress, we must wait for the
1359 	 * worker thread to finish that batch.
1360 	 */
1361 	if (wq->wq_current_work != NULL)
1362 		gen++;
1363 
1364 	/*
1365 	 * If there's any work yet to be claimed from the queue by the
1366 	 * worker thread, we must wait for it to finish one more batch
1367 	 * too.
1368 	 */
1369 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue))
1370 		gen++;
1371 
1372 	/* Wait until the generation number has caught up.  */
1373 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1374 	while (wq->wq_gen < gen)
1375 		cv_wait(&wq->wq_cv, &wq->wq_lock);
1376 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1377 }
1378 
1379 /*
1380  * flush_workqueue(wq)
1381  *
1382  *	Wait for all work queued on wq to complete.  This does not
1383  *	include delayed work.
1384  */
1385 void
1386 flush_workqueue(struct workqueue_struct *wq)
1387 {
1388 
1389 	mutex_enter(&wq->wq_lock);
1390 	flush_workqueue_locked(wq);
1391 	mutex_exit(&wq->wq_lock);
1392 }
1393 
1394 /*
1395  * flush_work(work)
1396  *
1397  *	If work is queued or currently executing, wait for it to
1398  *	complete.
1399  */
1400 void
1401 flush_work(struct work_struct *work)
1402 {
1403 	struct workqueue_struct *wq;
1404 
1405 	/* If there's no workqueue, nothing to flush.  */
1406 	if ((wq = work_queue(work)) == NULL)
1407 		return;
1408 
1409 	flush_workqueue(wq);
1410 }
1411 
1412 /*
1413  * flush_delayed_work(dw)
1414  *
1415  *	If dw is scheduled to run after a delay, queue it immediately
1416  *	instead.  Then, if dw is queued or currently executing, wait
1417  *	for it to complete.
1418  */
1419 void
1420 flush_delayed_work(struct delayed_work *dw)
1421 {
1422 	struct workqueue_struct *wq;
1423 
1424 	/* If there's no workqueue, nothing to flush.  */
1425 	if ((wq = work_queue(&dw->work)) == NULL)
1426 		return;
1427 
1428 	mutex_enter(&wq->wq_lock);
1429 	if (__predict_false(work_queue(&dw->work) != wq)) {
1430 		/*
1431 		 * Moved off the queue already (and possibly to another
1432 		 * queue, though that would be ill-advised), so it must
1433 		 * have completed, and we have nothing more to do.
1434 		 */
1435 	} else {
1436 		switch (dw->dw_state) {
1437 		case DELAYED_WORK_IDLE:
1438 			/*
1439 			 * It has a workqueue assigned and the callout
1440 			 * is idle, so it must be in progress or on the
1441 			 * queue.  In that case, we'll wait for it to
1442 			 * complete.
1443 			 */
1444 			break;
1445 		case DELAYED_WORK_SCHEDULED:
1446 		case DELAYED_WORK_RESCHEDULED:
1447 		case DELAYED_WORK_CANCELLED:
1448 			/*
1449 			 * The callout is scheduled, and may have even
1450 			 * started.  Mark it as scheduled so that if
1451 			 * the callout has fired it will queue the work
1452 			 * itself.  Try to stop the callout -- if we
1453 			 * can, queue the work now; if we can't, wait
1454 			 * for the callout to complete, which entails
1455 			 * queueing it.
1456 			 */
1457 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1458 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1459 				/*
1460 				 * We stopped it before it ran.  No
1461 				 * state change in the interim is
1462 				 * possible.  Destroy the callout and
1463 				 * queue it ourselves.
1464 				 */
1465 				KASSERT(dw->dw_state ==
1466 				    DELAYED_WORK_SCHEDULED);
1467 				dw_callout_destroy(wq, dw);
1468 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1469 				    work_entry);
1470 				cv_broadcast(&wq->wq_cv);
1471 				SDT_PROBE2(sdt, linux, work, queue,
1472 				    &dw->work, wq);
1473 			}
1474 			break;
1475 		default:
1476 			panic("invalid delayed work state: %d", dw->dw_state);
1477 		}
1478 		/*
1479 		 * Waiting for the whole queue to flush is overkill,
1480 		 * but doesn't hurt.
1481 		 */
1482 		flush_workqueue_locked(wq);
1483 	}
1484 	mutex_exit(&wq->wq_lock);
1485 }
1486