xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision f7bdb0aaef366d99e290bd77f8aec8d5729b7e79)
1 /*	$NetBSD: linux_work.c,v 1.49 2021/12/19 01:04:05 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.49 2021/12/19 01:04:05 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49 
50 #include <linux/workqueue.h>
51 
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54 
55 struct workqueue_struct {
56 	kmutex_t		wq_lock;
57 	kcondvar_t		wq_cv;
58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
59 	struct work_head	wq_queue;   /* work to run */
60 	struct work_head	wq_dqueue;  /* delayed work to run now */
61 	struct work_struct	*wq_current_work;
62 	int			wq_flags;
63 	bool			wq_dying;
64 	uint64_t		wq_gen;
65 	struct lwp		*wq_lwp;
66 };
67 
68 static void __dead	linux_workqueue_thread(void *);
69 static void		linux_workqueue_timeout(void *);
70 static bool		work_claimed(struct work_struct *,
71 			    struct workqueue_struct *);
72 static struct workqueue_struct *
73 			work_queue(struct work_struct *);
74 static bool		acquire_work(struct work_struct *,
75 			    struct workqueue_struct *);
76 static void		release_work(struct work_struct *,
77 			    struct workqueue_struct *);
78 static void		wait_for_current_work(struct work_struct *,
79 			    struct workqueue_struct *);
80 static void		dw_callout_init(struct workqueue_struct *,
81 			    struct delayed_work *);
82 static void		dw_callout_destroy(struct workqueue_struct *,
83 			    struct delayed_work *);
84 static void		cancel_delayed_work_done(struct workqueue_struct *,
85 			    struct delayed_work *);
86 
87 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
88     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
89 SDT_PROBE_DEFINE2(sdt, linux, work, release,
90     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
91 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
92     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
93 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
94     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
95 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
96     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
97     "unsigned long"/*ticks*/);
98 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
99     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
100 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
101     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
102 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
103     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
104 SDT_PROBE_DEFINE2(sdt, linux, work, run,
105     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
106 SDT_PROBE_DEFINE2(sdt, linux, work, done,
107     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
108 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
109     "struct workqueue_struct *"/*wq*/);
110 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
111     "struct workqueue_struct *"/*wq*/);
112 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
113     "struct workqueue_struct *"/*wq*/);
114 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
115     "struct workqueue_struct *"/*wq*/);
116 
117 static specificdata_key_t workqueue_key __read_mostly;
118 
119 struct workqueue_struct	*system_wq __read_mostly;
120 struct workqueue_struct	*system_long_wq __read_mostly;
121 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
122 struct workqueue_struct	*system_unbound_wq __read_mostly;
123 
124 static inline uintptr_t
125 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
126 {
127 
128 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
129 }
130 
131 /*
132  * linux_workqueue_init()
133  *
134  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
135  *	NetBSD error on failure.
136  */
137 static int
138 linux_workqueue_init0(void)
139 {
140 	int error;
141 
142 	error = lwp_specific_key_create(&workqueue_key, NULL);
143 	if (error)
144 		goto fail0;
145 
146 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
147 	if (system_wq == NULL) {
148 		error = ENOMEM;
149 		goto fail1;
150 	}
151 
152 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
153 	if (system_long_wq == NULL) {
154 		error = ENOMEM;
155 		goto fail2;
156 	}
157 
158 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
159 	if (system_power_efficient_wq == NULL) {
160 		error = ENOMEM;
161 		goto fail3;
162 	}
163 
164 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
165 	if (system_unbound_wq == NULL) {
166 		error = ENOMEM;
167 		goto fail4;
168 	}
169 
170 	return 0;
171 
172 fail5: __unused
173 	destroy_workqueue(system_unbound_wq);
174 fail4:	destroy_workqueue(system_power_efficient_wq);
175 fail3:	destroy_workqueue(system_long_wq);
176 fail2:	destroy_workqueue(system_wq);
177 fail1:	lwp_specific_key_delete(workqueue_key);
178 fail0:	KASSERT(error);
179 	return error;
180 }
181 
182 /*
183  * linux_workqueue_fini()
184  *
185  *	Destroy the Linux workqueue subsystem.  Never fails.
186  */
187 static void
188 linux_workqueue_fini0(void)
189 {
190 
191 	destroy_workqueue(system_power_efficient_wq);
192 	destroy_workqueue(system_long_wq);
193 	destroy_workqueue(system_wq);
194 	lwp_specific_key_delete(workqueue_key);
195 }
196 
197 #ifndef _MODULE
198 static ONCE_DECL(linux_workqueue_init_once);
199 #endif
200 
201 int
202 linux_workqueue_init(void)
203 {
204 #ifdef _MODULE
205 	return linux_workqueue_init0();
206 #else
207 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
208 #endif
209 }
210 
211 void
212 linux_workqueue_fini(void)
213 {
214 #ifdef _MODULE
215 	return linux_workqueue_fini0();
216 #else
217 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
218 #endif
219 }
220 
221 /*
222  * Workqueues
223  */
224 
225 /*
226  * alloc_ordered_workqueue(name, flags)
227  *
228  *	Create a workqueue of the given name.  No flags are currently
229  *	defined.  Return NULL on failure, pointer to struct
230  *	workqueue_struct object on success.
231  */
232 struct workqueue_struct *
233 alloc_ordered_workqueue(const char *name, int flags)
234 {
235 	struct workqueue_struct *wq;
236 	int error;
237 
238 	KASSERT(flags == 0);
239 
240 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
241 
242 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
243 	cv_init(&wq->wq_cv, name);
244 	TAILQ_INIT(&wq->wq_delayed);
245 	TAILQ_INIT(&wq->wq_queue);
246 	TAILQ_INIT(&wq->wq_dqueue);
247 	wq->wq_current_work = NULL;
248 	wq->wq_flags = 0;
249 	wq->wq_dying = false;
250 	wq->wq_gen = 0;
251 	wq->wq_lwp = NULL;
252 
253 	error = kthread_create(PRI_NONE,
254 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
255 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
256 	if (error)
257 		goto fail0;
258 
259 	return wq;
260 
261 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
262 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
263 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
264 	cv_destroy(&wq->wq_cv);
265 	mutex_destroy(&wq->wq_lock);
266 	kmem_free(wq, sizeof(*wq));
267 	return NULL;
268 }
269 
270 /*
271  * destroy_workqueue(wq)
272  *
273  *	Destroy a workqueue created with wq.  Cancel any pending
274  *	delayed work.  Wait for all queued work to complete.
275  *
276  *	May sleep.
277  */
278 void
279 destroy_workqueue(struct workqueue_struct *wq)
280 {
281 
282 	/*
283 	 * Cancel all delayed work.  We do this first because any
284 	 * delayed work that that has already timed out, which we can't
285 	 * cancel, may have queued new work.
286 	 */
287 	mutex_enter(&wq->wq_lock);
288 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
289 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
290 
291 		KASSERT(work_queue(&dw->work) == wq);
292 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
293 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
294 			dw->dw_state == DELAYED_WORK_CANCELLED),
295 		    "delayed work %p in bad state: %d",
296 		    dw, dw->dw_state);
297 
298 		/*
299 		 * Mark it cancelled and try to stop the callout before
300 		 * it starts.
301 		 *
302 		 * If it's too late and the callout has already begun
303 		 * to execute, then it will notice that we asked to
304 		 * cancel it and remove itself from the queue before
305 		 * returning.
306 		 *
307 		 * If we stopped the callout before it started,
308 		 * however, then we can safely destroy the callout and
309 		 * dissociate it from the workqueue ourselves.
310 		 */
311 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
312 		dw->dw_state = DELAYED_WORK_CANCELLED;
313 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
314 			cancel_delayed_work_done(wq, dw);
315 	}
316 	mutex_exit(&wq->wq_lock);
317 
318 	/*
319 	 * At this point, no new work can be put on the queue.
320 	 */
321 
322 	/* Tell the thread to exit.  */
323 	mutex_enter(&wq->wq_lock);
324 	wq->wq_dying = true;
325 	cv_broadcast(&wq->wq_cv);
326 	mutex_exit(&wq->wq_lock);
327 
328 	/* Wait for it to exit.  */
329 	(void)kthread_join(wq->wq_lwp);
330 
331 	KASSERT(wq->wq_dying);
332 	KASSERT(wq->wq_flags == 0);
333 	KASSERT(wq->wq_current_work == NULL);
334 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
335 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
336 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
337 	cv_destroy(&wq->wq_cv);
338 	mutex_destroy(&wq->wq_lock);
339 
340 	kmem_free(wq, sizeof(*wq));
341 }
342 
343 /*
344  * Work thread and callout
345  */
346 
347 /*
348  * linux_workqueue_thread(cookie)
349  *
350  *	Main function for a workqueue's worker thread.  Waits until
351  *	there is work queued, grabs a batch of work off the queue,
352  *	executes it all, bumps the generation number, and repeats,
353  *	until dying.
354  */
355 static void __dead
356 linux_workqueue_thread(void *cookie)
357 {
358 	struct workqueue_struct *const wq = cookie;
359 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
360 	struct work_struct marker, *work;
361 	unsigned i;
362 
363 	lwp_setspecific(workqueue_key, wq);
364 
365 	mutex_enter(&wq->wq_lock);
366 	for (;;) {
367 		/*
368 		 * Wait until there's activity.  If there's no work and
369 		 * we're dying, stop here.
370 		 */
371 		if (TAILQ_EMPTY(&wq->wq_queue) &&
372 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
373 			if (wq->wq_dying)
374 				break;
375 			cv_wait(&wq->wq_cv, &wq->wq_lock);
376 			continue;
377 		}
378 
379 		/*
380 		 * Start a batch of work.  Use a marker to delimit when
381 		 * the batch ends so we can advance the generation
382 		 * after the batch.
383 		 */
384 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
385 		for (i = 0; i < 2; i++) {
386 			if (TAILQ_EMPTY(q[i]))
387 				continue;
388 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
389 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
390 				void (*func)(struct work_struct *);
391 
392 				KASSERT(work_queue(work) == wq);
393 				KASSERT(work_claimed(work, wq));
394 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
395 					container_of(work, struct delayed_work,
396 					    work)->dw_state ==
397 					DELAYED_WORK_IDLE),
398 				    "delayed work %p queued and scheduled",
399 				    work);
400 
401 				TAILQ_REMOVE(q[i], work, work_entry);
402 				KASSERT(wq->wq_current_work == NULL);
403 				wq->wq_current_work = work;
404 				func = work->func;
405 				release_work(work, wq);
406 				/* Can't dereference work after this point.  */
407 
408 				mutex_exit(&wq->wq_lock);
409 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
410 				(*func)(work);
411 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
412 				mutex_enter(&wq->wq_lock);
413 
414 				KASSERT(wq->wq_current_work == work);
415 				wq->wq_current_work = NULL;
416 				cv_broadcast(&wq->wq_cv);
417 			}
418 			TAILQ_REMOVE(q[i], &marker, work_entry);
419 		}
420 
421 		/* Notify flush that we've completed a batch of work.  */
422 		wq->wq_gen++;
423 		cv_broadcast(&wq->wq_cv);
424 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
425 	}
426 	mutex_exit(&wq->wq_lock);
427 
428 	kthread_exit(0);
429 }
430 
431 /*
432  * linux_workqueue_timeout(cookie)
433  *
434  *	Delayed work timeout callback.
435  *
436  *	- If scheduled, queue it.
437  *	- If rescheduled, callout_schedule ourselves again.
438  *	- If cancelled, destroy the callout and release the work from
439  *        the workqueue.
440  */
441 static void
442 linux_workqueue_timeout(void *cookie)
443 {
444 	struct delayed_work *const dw = cookie;
445 	struct workqueue_struct *const wq = work_queue(&dw->work);
446 
447 	KASSERTMSG(wq != NULL,
448 	    "delayed work %p state %d resched %d",
449 	    dw, dw->dw_state, dw->dw_resched);
450 
451 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
452 
453 	mutex_enter(&wq->wq_lock);
454 	KASSERT(work_queue(&dw->work) == wq);
455 	switch (dw->dw_state) {
456 	case DELAYED_WORK_IDLE:
457 		panic("delayed work callout uninitialized: %p", dw);
458 	case DELAYED_WORK_SCHEDULED:
459 		dw_callout_destroy(wq, dw);
460 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
461 		cv_broadcast(&wq->wq_cv);
462 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
463 		break;
464 	case DELAYED_WORK_RESCHEDULED:
465 		KASSERT(dw->dw_resched >= 0);
466 		callout_schedule(&dw->dw_callout, dw->dw_resched);
467 		dw->dw_state = DELAYED_WORK_SCHEDULED;
468 		dw->dw_resched = -1;
469 		break;
470 	case DELAYED_WORK_CANCELLED:
471 		cancel_delayed_work_done(wq, dw);
472 		/* Can't dereference dw after this point.  */
473 		goto out;
474 	default:
475 		panic("delayed work callout in bad state: %p", dw);
476 	}
477 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
478 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
479 out:	mutex_exit(&wq->wq_lock);
480 }
481 
482 /*
483  * current_work()
484  *
485  *	If in a workqueue worker thread, return the work it is
486  *	currently executing.  Otherwise return NULL.
487  */
488 struct work_struct *
489 current_work(void)
490 {
491 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
492 
493 	/* If we're not a workqueue thread, then there's no work.  */
494 	if (wq == NULL)
495 		return NULL;
496 
497 	/*
498 	 * Otherwise, this should be possible only while work is in
499 	 * progress.  Return the current work item.
500 	 */
501 	KASSERT(wq->wq_current_work != NULL);
502 	return wq->wq_current_work;
503 }
504 
505 /*
506  * Work
507  */
508 
509 /*
510  * INIT_WORK(work, fn)
511  *
512  *	Initialize work for use with a workqueue to call fn in a worker
513  *	thread.  There is no corresponding destruction operation.
514  */
515 void
516 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
517 {
518 
519 	work->work_owner = 0;
520 	work->func = fn;
521 }
522 
523 /*
524  * work_claimed(work, wq)
525  *
526  *	True if work is currently claimed by a workqueue, meaning it is
527  *	either on the queue or scheduled in a callout.  The workqueue
528  *	must be wq, and caller must hold wq's lock.
529  */
530 static bool
531 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
532 {
533 
534 	KASSERT(work_queue(work) == wq);
535 	KASSERT(mutex_owned(&wq->wq_lock));
536 
537 	return work->work_owner & 1;
538 }
539 
540 /*
541  * work_pending(work)
542  *
543  *	True if work is currently claimed by any workqueue, scheduled
544  *	to run on that workqueue.
545  */
546 bool
547 work_pending(struct work_struct *work)
548 {
549 
550 	return work->work_owner & 1;
551 }
552 
553 /*
554  * work_queue(work)
555  *
556  *	Return the last queue that work was queued on, or NULL if it
557  *	was never queued.
558  */
559 static struct workqueue_struct *
560 work_queue(struct work_struct *work)
561 {
562 
563 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
564 }
565 
566 /*
567  * acquire_work(work, wq)
568  *
569  *	Try to claim work for wq.  If work is already claimed, it must
570  *	be claimed by wq; return false.  If work is not already
571  *	claimed, claim it, issue a memory barrier to match any prior
572  *	release_work, and return true.
573  *
574  *	Caller must hold wq's lock.
575  */
576 static bool
577 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
578 {
579 	uintptr_t owner0, owner;
580 
581 	KASSERT(mutex_owned(&wq->wq_lock));
582 	KASSERT(((uintptr_t)wq & 1) == 0);
583 
584 	owner = (uintptr_t)wq | 1;
585 	do {
586 		owner0 = work->work_owner;
587 		if (owner0 & 1) {
588 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
589 			return false;
590 		}
591 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
592 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
593 	    owner0);
594 
595 	KASSERT(work_queue(work) == wq);
596 	membar_enter();
597 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
598 	return true;
599 }
600 
601 /*
602  * release_work(work, wq)
603  *
604  *	Issue a memory barrier to match any subsequent acquire_work and
605  *	dissociate work from wq.
606  *
607  *	Caller must hold wq's lock and work must be associated with wq.
608  */
609 static void
610 release_work(struct work_struct *work, struct workqueue_struct *wq)
611 {
612 
613 	KASSERT(work_queue(work) == wq);
614 	KASSERT(mutex_owned(&wq->wq_lock));
615 
616 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
617 	membar_exit();
618 
619 	/*
620 	 * Non-interlocked r/m/w is safe here because nobody else can
621 	 * write to this while the claimed bit is setand the workqueue
622 	 * lock is held.
623 	 */
624 	work->work_owner &= ~(uintptr_t)1;
625 }
626 
627 /*
628  * schedule_work(work)
629  *
630  *	If work is not already queued on system_wq, queue it to be run
631  *	by system_wq's worker thread when it next can.  True if it was
632  *	newly queued, false if it was already queued.  If the work was
633  *	already running, queue it to run again.
634  *
635  *	Caller must ensure work is not queued to run on a different
636  *	workqueue.
637  */
638 bool
639 schedule_work(struct work_struct *work)
640 {
641 
642 	return queue_work(system_wq, work);
643 }
644 
645 /*
646  * queue_work(wq, work)
647  *
648  *	If work is not already queued on wq, queue it to be run by wq's
649  *	worker thread when it next can.  True if it was newly queued,
650  *	false if it was already queued.  If the work was already
651  *	running, queue it to run again.
652  *
653  *	Caller must ensure work is not queued to run on a different
654  *	workqueue.
655  */
656 bool
657 queue_work(struct workqueue_struct *wq, struct work_struct *work)
658 {
659 	bool newly_queued;
660 
661 	KASSERT(wq != NULL);
662 
663 	mutex_enter(&wq->wq_lock);
664 	if (__predict_true(acquire_work(work, wq))) {
665 		/*
666 		 * It wasn't on any workqueue at all.  Put it on this
667 		 * one, and signal the worker thread that there is work
668 		 * to do.
669 		 */
670 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
671 		cv_broadcast(&wq->wq_cv);
672 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
673 		newly_queued = true;
674 	} else {
675 		/*
676 		 * It was already on this workqueue.  Nothing to do
677 		 * since it is already queued.
678 		 */
679 		newly_queued = false;
680 	}
681 	mutex_exit(&wq->wq_lock);
682 
683 	return newly_queued;
684 }
685 
686 /*
687  * cancel_work(work)
688  *
689  *	If work was queued, remove it from the queue and return true.
690  *	If work was not queued, return false.  Work may still be
691  *	running when this returns.
692  */
693 bool
694 cancel_work(struct work_struct *work)
695 {
696 	struct workqueue_struct *wq;
697 	bool cancelled_p = false;
698 
699 	/* If there's no workqueue, nothing to cancel.   */
700 	if ((wq = work_queue(work)) == NULL)
701 		goto out;
702 
703 	mutex_enter(&wq->wq_lock);
704 	if (__predict_false(work_queue(work) != wq)) {
705 		/*
706 		 * It has finished execution or been cancelled by
707 		 * another thread, and has been moved off the
708 		 * workqueue, so it's too to cancel.
709 		 */
710 		cancelled_p = false;
711 	} else {
712 		/* Check whether it's on the queue.  */
713 		if (work_claimed(work, wq)) {
714 			/*
715 			 * It is still on the queue.  Take it off the
716 			 * queue and report successful cancellation.
717 			 */
718 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
719 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
720 			release_work(work, wq);
721 			/* Can't dereference work after this point.  */
722 			cancelled_p = true;
723 		} else {
724 			/* Not on the queue.  Couldn't cancel it.  */
725 			cancelled_p = false;
726 		}
727 	}
728 	mutex_exit(&wq->wq_lock);
729 
730 out:	return cancelled_p;
731 }
732 
733 /*
734  * cancel_work_sync(work)
735  *
736  *	If work was queued, remove it from the queue and return true.
737  *	If work was not queued, return false.  Either way, if work is
738  *	currently running, wait for it to complete.
739  *
740  *	May sleep.
741  */
742 bool
743 cancel_work_sync(struct work_struct *work)
744 {
745 	struct workqueue_struct *wq;
746 	bool cancelled_p = false;
747 
748 	/* If there's no workqueue, nothing to cancel.   */
749 	if ((wq = work_queue(work)) == NULL)
750 		goto out;
751 
752 	mutex_enter(&wq->wq_lock);
753 	if (__predict_false(work_queue(work) != wq)) {
754 		/*
755 		 * It has finished execution or been cancelled by
756 		 * another thread, and has been moved off the
757 		 * workqueue, so it's too late to cancel.
758 		 */
759 		cancelled_p = false;
760 	} else {
761 		/* Check whether it's on the queue.  */
762 		if (work_claimed(work, wq)) {
763 			/*
764 			 * It is still on the queue.  Take it off the
765 			 * queue and report successful cancellation.
766 			 */
767 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
768 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
769 			release_work(work, wq);
770 			/* Can't dereference work after this point.  */
771 			cancelled_p = true;
772 		} else {
773 			/* Not on the queue.  Couldn't cancel it.  */
774 			cancelled_p = false;
775 		}
776 		/* If it's still running, wait for it to complete.  */
777 		if (wq->wq_current_work == work)
778 			wait_for_current_work(work, wq);
779 	}
780 	mutex_exit(&wq->wq_lock);
781 
782 out:	return cancelled_p;
783 }
784 
785 /*
786  * wait_for_current_work(work, wq)
787  *
788  *	wq must be currently executing work.  Wait for it to finish.
789  *
790  *	Does not dereference work.
791  */
792 static void
793 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
794 {
795 	uint64_t gen;
796 
797 	KASSERT(mutex_owned(&wq->wq_lock));
798 	KASSERT(wq->wq_current_work == work);
799 
800 	/* Wait only one generation in case it gets requeued quickly.  */
801 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
802 	gen = wq->wq_gen;
803 	do {
804 		cv_wait(&wq->wq_cv, &wq->wq_lock);
805 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
806 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
807 }
808 
809 /*
810  * Delayed work
811  */
812 
813 /*
814  * INIT_DELAYED_WORK(dw, fn)
815  *
816  *	Initialize dw for use with a workqueue to call fn in a worker
817  *	thread after a delay.  There is no corresponding destruction
818  *	operation.
819  */
820 void
821 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
822 {
823 
824 	INIT_WORK(&dw->work, fn);
825 	dw->dw_state = DELAYED_WORK_IDLE;
826 	dw->dw_resched = -1;
827 
828 	/*
829 	 * Defer callout_init until we are going to schedule the
830 	 * callout, which can then callout_destroy it, because
831 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
832 	 * we have no opportunity to call callout_destroy.
833 	 */
834 }
835 
836 /*
837  * schedule_delayed_work(dw, ticks)
838  *
839  *	If it is not currently scheduled, schedule dw to run after
840  *	ticks on system_wq.  If currently executing and not already
841  *	rescheduled, reschedule it.  True if it was newly scheduled,
842  *	false if it was already scheduled.
843  *
844  *	If ticks == 0, queue it to run as soon as the worker can,
845  *	without waiting for the next callout tick to run.
846  */
847 bool
848 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
849 {
850 
851 	return queue_delayed_work(system_wq, dw, ticks);
852 }
853 
854 /*
855  * dw_callout_init(wq, dw)
856  *
857  *	Initialize the callout of dw and transition to
858  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
859  */
860 static void
861 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
862 {
863 
864 	KASSERT(mutex_owned(&wq->wq_lock));
865 	KASSERT(work_queue(&dw->work) == wq);
866 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
867 
868 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
869 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
870 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
871 	dw->dw_state = DELAYED_WORK_SCHEDULED;
872 }
873 
874 /*
875  * dw_callout_destroy(wq, dw)
876  *
877  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
878  */
879 static void
880 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
881 {
882 
883 	KASSERT(mutex_owned(&wq->wq_lock));
884 	KASSERT(work_queue(&dw->work) == wq);
885 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
886 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
887 	    dw->dw_state == DELAYED_WORK_CANCELLED);
888 
889 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
890 	callout_destroy(&dw->dw_callout);
891 	dw->dw_resched = -1;
892 	dw->dw_state = DELAYED_WORK_IDLE;
893 }
894 
895 /*
896  * cancel_delayed_work_done(wq, dw)
897  *
898  *	Complete cancellation of a delayed work: transition from
899  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
900  *	workqueue.  Caller must not dereference dw after this returns.
901  */
902 static void
903 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
904 {
905 
906 	KASSERT(mutex_owned(&wq->wq_lock));
907 	KASSERT(work_queue(&dw->work) == wq);
908 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
909 
910 	dw_callout_destroy(wq, dw);
911 	release_work(&dw->work, wq);
912 	/* Can't dereference dw after this point.  */
913 }
914 
915 /*
916  * queue_delayed_work(wq, dw, ticks)
917  *
918  *	If it is not currently scheduled, schedule dw to run after
919  *	ticks on wq.  If currently queued, remove it from the queue
920  *	first.
921  *
922  *	If ticks == 0, queue it to run as soon as the worker can,
923  *	without waiting for the next callout tick to run.
924  */
925 bool
926 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
927     unsigned long ticks)
928 {
929 	bool newly_queued;
930 
931 	mutex_enter(&wq->wq_lock);
932 	if (__predict_true(acquire_work(&dw->work, wq))) {
933 		/*
934 		 * It wasn't on any workqueue at all.  Schedule it to
935 		 * run on this one.
936 		 */
937 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
938 		if (ticks == 0) {
939 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
940 			    work_entry);
941 			cv_broadcast(&wq->wq_cv);
942 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
943 		} else {
944 			/*
945 			 * Initialize a callout and schedule to run
946 			 * after a delay.
947 			 */
948 			dw_callout_init(wq, dw);
949 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
950 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
951 		}
952 		newly_queued = true;
953 	} else {
954 		/* It was already on this workqueue.  */
955 		switch (dw->dw_state) {
956 		case DELAYED_WORK_IDLE:
957 		case DELAYED_WORK_SCHEDULED:
958 		case DELAYED_WORK_RESCHEDULED:
959 			/* On the queue or already scheduled.  Leave it.  */
960 			newly_queued = false;
961 			break;
962 		case DELAYED_WORK_CANCELLED:
963 			/*
964 			 * Scheduled and the callout began, but it was
965 			 * cancelled.  Reschedule it.
966 			 */
967 			if (ticks == 0) {
968 				dw->dw_state = DELAYED_WORK_SCHEDULED;
969 				SDT_PROBE2(sdt, linux, work, queue,
970 				    &dw->work, wq);
971 			} else {
972 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
973 				dw->dw_resched = MIN(INT_MAX, ticks);
974 				SDT_PROBE3(sdt, linux, work, schedule,
975 				    dw, wq, ticks);
976 			}
977 			newly_queued = true;
978 			break;
979 		default:
980 			panic("invalid delayed work state: %d",
981 			    dw->dw_state);
982 		}
983 	}
984 	mutex_exit(&wq->wq_lock);
985 
986 	return newly_queued;
987 }
988 
989 /*
990  * mod_delayed_work(wq, dw, ticks)
991  *
992  *	Schedule dw to run after ticks.  If scheduled or queued,
993  *	reschedule.  If ticks == 0, run without delay.
994  *
995  *	True if it modified the timer of an already scheduled work,
996  *	false if it newly scheduled the work.
997  */
998 bool
999 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
1000     unsigned long ticks)
1001 {
1002 	bool timer_modified;
1003 
1004 	mutex_enter(&wq->wq_lock);
1005 	if (acquire_work(&dw->work, wq)) {
1006 		/*
1007 		 * It wasn't on any workqueue at all.  Schedule it to
1008 		 * run on this one.
1009 		 */
1010 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
1011 		if (ticks == 0) {
1012 			/*
1013 			 * Run immediately: put it on the queue and
1014 			 * signal the worker thread.
1015 			 */
1016 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1017 			    work_entry);
1018 			cv_broadcast(&wq->wq_cv);
1019 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
1020 		} else {
1021 			/*
1022 			 * Initialize a callout and schedule to run
1023 			 * after a delay.
1024 			 */
1025 			dw_callout_init(wq, dw);
1026 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1027 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
1028 		}
1029 		timer_modified = false;
1030 	} else {
1031 		/* It was already on this workqueue.  */
1032 		switch (dw->dw_state) {
1033 		case DELAYED_WORK_IDLE:
1034 			/* On the queue.  */
1035 			if (ticks == 0) {
1036 				/* Leave it be.  */
1037 				SDT_PROBE2(sdt, linux, work, cancel,
1038 				    &dw->work, wq);
1039 				SDT_PROBE2(sdt, linux, work, queue,
1040 				    &dw->work, wq);
1041 			} else {
1042 				/* Remove from the queue and schedule.  */
1043 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1044 				    work_entry);
1045 				dw_callout_init(wq, dw);
1046 				callout_schedule(&dw->dw_callout,
1047 				    MIN(INT_MAX, ticks));
1048 				SDT_PROBE2(sdt, linux, work, cancel,
1049 				    &dw->work, wq);
1050 				SDT_PROBE3(sdt, linux, work, schedule,
1051 				    dw, wq, ticks);
1052 			}
1053 			timer_modified = true;
1054 			break;
1055 		case DELAYED_WORK_SCHEDULED:
1056 			/*
1057 			 * It is scheduled to run after a delay.  Try
1058 			 * to stop it and reschedule it; if we can't,
1059 			 * either reschedule it or cancel it to put it
1060 			 * on the queue, and inform the callout.
1061 			 */
1062 			if (callout_stop(&dw->dw_callout)) {
1063 				/* Can't stop, callout has begun.  */
1064 				if (ticks == 0) {
1065 					/*
1066 					 * We don't actually need to do
1067 					 * anything.  The callout will
1068 					 * queue it as soon as it gets
1069 					 * the lock.
1070 					 */
1071 					SDT_PROBE2(sdt, linux, work, cancel,
1072 					    &dw->work, wq);
1073 					SDT_PROBE2(sdt, linux, work, queue,
1074 					    &dw->work, wq);
1075 				} else {
1076 					/* Ask the callout to reschedule.  */
1077 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1078 					dw->dw_resched = MIN(INT_MAX, ticks);
1079 					SDT_PROBE2(sdt, linux, work, cancel,
1080 					    &dw->work, wq);
1081 					SDT_PROBE3(sdt, linux, work, schedule,
1082 					    dw, wq, ticks);
1083 				}
1084 			} else {
1085 				/* We stopped the callout before it began.  */
1086 				if (ticks == 0) {
1087 					/*
1088 					 * Run immediately: destroy the
1089 					 * callout, put it on the
1090 					 * queue, and signal the worker
1091 					 * thread.
1092 					 */
1093 					dw_callout_destroy(wq, dw);
1094 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1095 					    &dw->work, work_entry);
1096 					cv_broadcast(&wq->wq_cv);
1097 					SDT_PROBE2(sdt, linux, work, cancel,
1098 					    &dw->work, wq);
1099 					SDT_PROBE2(sdt, linux, work, queue,
1100 					    &dw->work, wq);
1101 				} else {
1102 					/*
1103 					 * Reschedule the callout.  No
1104 					 * state change.
1105 					 */
1106 					callout_schedule(&dw->dw_callout,
1107 					    MIN(INT_MAX, ticks));
1108 					SDT_PROBE2(sdt, linux, work, cancel,
1109 					    &dw->work, wq);
1110 					SDT_PROBE3(sdt, linux, work, schedule,
1111 					    dw, wq, ticks);
1112 				}
1113 			}
1114 			timer_modified = true;
1115 			break;
1116 		case DELAYED_WORK_RESCHEDULED:
1117 			/*
1118 			 * Someone rescheduled it after the callout
1119 			 * started but before the poor thing even had a
1120 			 * chance to acquire the lock.
1121 			 */
1122 			if (ticks == 0) {
1123 				/*
1124 				 * We can just switch back to
1125 				 * DELAYED_WORK_SCHEDULED so that the
1126 				 * callout will queue the work as soon
1127 				 * as it gets the lock.
1128 				 */
1129 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1130 				dw->dw_resched = -1;
1131 				SDT_PROBE2(sdt, linux, work, cancel,
1132 				    &dw->work, wq);
1133 				SDT_PROBE2(sdt, linux, work, queue,
1134 				    &dw->work, wq);
1135 			} else {
1136 				/* Change the rescheduled time.  */
1137 				dw->dw_resched = ticks;
1138 				SDT_PROBE2(sdt, linux, work, cancel,
1139 				    &dw->work, wq);
1140 				SDT_PROBE3(sdt, linux, work, schedule,
1141 				    dw, wq, ticks);
1142 			}
1143 			timer_modified = true;
1144 			break;
1145 		case DELAYED_WORK_CANCELLED:
1146 			/*
1147 			 * Someone cancelled it after the callout
1148 			 * started but before the poor thing even had a
1149 			 * chance to acquire the lock.
1150 			 */
1151 			if (ticks == 0) {
1152 				/*
1153 				 * We can just switch back to
1154 				 * DELAYED_WORK_SCHEDULED so that the
1155 				 * callout will queue the work as soon
1156 				 * as it gets the lock.
1157 				 */
1158 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1159 				SDT_PROBE2(sdt, linux, work, queue,
1160 				    &dw->work, wq);
1161 			} else {
1162 				/* Ask it to reschedule.  */
1163 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1164 				dw->dw_resched = MIN(INT_MAX, ticks);
1165 				SDT_PROBE3(sdt, linux, work, schedule,
1166 				    dw, wq, ticks);
1167 			}
1168 			timer_modified = false;
1169 			break;
1170 		default:
1171 			panic("invalid delayed work state: %d", dw->dw_state);
1172 		}
1173 	}
1174 	mutex_exit(&wq->wq_lock);
1175 
1176 	return timer_modified;
1177 }
1178 
1179 /*
1180  * cancel_delayed_work(dw)
1181  *
1182  *	If work was scheduled or queued, remove it from the schedule or
1183  *	queue and return true.  If work was not scheduled or queued,
1184  *	return false.  Note that work may already be running; if it
1185  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1186  *	will return false, and either way, cancel_delayed_work will NOT
1187  *	wait for the work to complete.
1188  */
1189 bool
1190 cancel_delayed_work(struct delayed_work *dw)
1191 {
1192 	struct workqueue_struct *wq;
1193 	bool cancelled_p;
1194 
1195 	/* If there's no workqueue, nothing to cancel.   */
1196 	if ((wq = work_queue(&dw->work)) == NULL)
1197 		return false;
1198 
1199 	mutex_enter(&wq->wq_lock);
1200 	if (__predict_false(work_queue(&dw->work) != wq)) {
1201 		cancelled_p = false;
1202 	} else {
1203 		switch (dw->dw_state) {
1204 		case DELAYED_WORK_IDLE:
1205 			/*
1206 			 * It is either on the queue or already running
1207 			 * or both.
1208 			 */
1209 			if (work_claimed(&dw->work, wq)) {
1210 				/* On the queue.  Remove and release.  */
1211 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1212 				    work_entry);
1213 				SDT_PROBE2(sdt, linux, work, cancel,
1214 				    &dw->work, wq);
1215 				release_work(&dw->work, wq);
1216 				/* Can't dereference dw after this point.  */
1217 				cancelled_p = true;
1218 			} else {
1219 				/* Not on the queue, so didn't cancel.  */
1220 				cancelled_p = false;
1221 			}
1222 			break;
1223 		case DELAYED_WORK_SCHEDULED:
1224 			/*
1225 			 * If it is scheduled, mark it cancelled and
1226 			 * try to stop the callout before it starts.
1227 			 *
1228 			 * If it's too late and the callout has already
1229 			 * begun to execute, tough.
1230 			 *
1231 			 * If we stopped the callout before it started,
1232 			 * however, then destroy the callout and
1233 			 * dissociate it from the workqueue ourselves.
1234 			 */
1235 			dw->dw_state = DELAYED_WORK_CANCELLED;
1236 			cancelled_p = true;
1237 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1238 			if (!callout_stop(&dw->dw_callout))
1239 				cancel_delayed_work_done(wq, dw);
1240 			break;
1241 		case DELAYED_WORK_RESCHEDULED:
1242 			/*
1243 			 * If it is being rescheduled, the callout has
1244 			 * already fired.  We must ask it to cancel.
1245 			 */
1246 			dw->dw_state = DELAYED_WORK_CANCELLED;
1247 			dw->dw_resched = -1;
1248 			cancelled_p = true;
1249 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1250 			break;
1251 		case DELAYED_WORK_CANCELLED:
1252 			/*
1253 			 * If it is being cancelled, the callout has
1254 			 * already fired.  There is nothing more for us
1255 			 * to do.  Someone else claims credit for
1256 			 * cancelling it.
1257 			 */
1258 			cancelled_p = false;
1259 			break;
1260 		default:
1261 			panic("invalid delayed work state: %d",
1262 			    dw->dw_state);
1263 		}
1264 	}
1265 	mutex_exit(&wq->wq_lock);
1266 
1267 	return cancelled_p;
1268 }
1269 
1270 /*
1271  * cancel_delayed_work_sync(dw)
1272  *
1273  *	If work was scheduled or queued, remove it from the schedule or
1274  *	queue and return true.  If work was not scheduled or queued,
1275  *	return false.  Note that work may already be running; if it
1276  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1277  *	will return false; either way, wait for it to complete.
1278  */
1279 bool
1280 cancel_delayed_work_sync(struct delayed_work *dw)
1281 {
1282 	struct workqueue_struct *wq;
1283 	bool cancelled_p;
1284 
1285 	/* If there's no workqueue, nothing to cancel.  */
1286 	if ((wq = work_queue(&dw->work)) == NULL)
1287 		return false;
1288 
1289 	mutex_enter(&wq->wq_lock);
1290 	if (__predict_false(work_queue(&dw->work) != wq)) {
1291 		cancelled_p = false;
1292 	} else {
1293 		switch (dw->dw_state) {
1294 		case DELAYED_WORK_IDLE:
1295 			/*
1296 			 * It is either on the queue or already running
1297 			 * or both.
1298 			 */
1299 			if (work_claimed(&dw->work, wq)) {
1300 				/* On the queue.  Remove and release.  */
1301 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1302 				    work_entry);
1303 				SDT_PROBE2(sdt, linux, work, cancel,
1304 				    &dw->work, wq);
1305 				release_work(&dw->work, wq);
1306 				/* Can't dereference dw after this point.  */
1307 				cancelled_p = true;
1308 			} else {
1309 				/* Not on the queue, so didn't cancel. */
1310 				cancelled_p = false;
1311 			}
1312 			/* If it's still running, wait for it to complete.  */
1313 			if (wq->wq_current_work == &dw->work)
1314 				wait_for_current_work(&dw->work, wq);
1315 			break;
1316 		case DELAYED_WORK_SCHEDULED:
1317 			/*
1318 			 * If it is scheduled, mark it cancelled and
1319 			 * try to stop the callout before it starts.
1320 			 *
1321 			 * If it's too late and the callout has already
1322 			 * begun to execute, we must wait for it to
1323 			 * complete.  But we got in soon enough to ask
1324 			 * the callout not to run, so we successfully
1325 			 * cancelled it in that case.
1326 			 *
1327 			 * If we stopped the callout before it started,
1328 			 * then we must destroy the callout and
1329 			 * dissociate it from the workqueue ourselves.
1330 			 */
1331 			dw->dw_state = DELAYED_WORK_CANCELLED;
1332 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1333 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1334 				cancel_delayed_work_done(wq, dw);
1335 			cancelled_p = true;
1336 			break;
1337 		case DELAYED_WORK_RESCHEDULED:
1338 			/*
1339 			 * If it is being rescheduled, the callout has
1340 			 * already fired.  We must ask it to cancel and
1341 			 * wait for it to complete.
1342 			 */
1343 			dw->dw_state = DELAYED_WORK_CANCELLED;
1344 			dw->dw_resched = -1;
1345 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1346 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1347 			cancelled_p = true;
1348 			break;
1349 		case DELAYED_WORK_CANCELLED:
1350 			/*
1351 			 * If it is being cancelled, the callout has
1352 			 * already fired.  We need only wait for it to
1353 			 * complete.  Someone else, however, claims
1354 			 * credit for cancelling it.
1355 			 */
1356 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1357 			cancelled_p = false;
1358 			break;
1359 		default:
1360 			panic("invalid delayed work state: %d",
1361 			    dw->dw_state);
1362 		}
1363 	}
1364 	mutex_exit(&wq->wq_lock);
1365 
1366 	return cancelled_p;
1367 }
1368 
1369 /*
1370  * Flush
1371  */
1372 
1373 /*
1374  * flush_scheduled_work()
1375  *
1376  *	Wait for all work queued on system_wq to complete.  This does
1377  *	not include delayed work.
1378  */
1379 void
1380 flush_scheduled_work(void)
1381 {
1382 
1383 	flush_workqueue(system_wq);
1384 }
1385 
1386 /*
1387  * flush_workqueue_locked(wq)
1388  *
1389  *	Wait for all work queued on wq to complete.  This does not
1390  *	include delayed work.
1391  *
1392  *	Caller must hold wq's lock.
1393  */
1394 static void
1395 flush_workqueue_locked(struct workqueue_struct *wq)
1396 {
1397 	uint64_t gen;
1398 
1399 	KASSERT(mutex_owned(&wq->wq_lock));
1400 
1401 	/* Get the current generation number.  */
1402 	gen = wq->wq_gen;
1403 
1404 	/*
1405 	 * If there's a batch of work in progress, we must wait for the
1406 	 * worker thread to finish that batch.
1407 	 */
1408 	if (wq->wq_current_work != NULL)
1409 		gen++;
1410 
1411 	/*
1412 	 * If there's any work yet to be claimed from the queue by the
1413 	 * worker thread, we must wait for it to finish one more batch
1414 	 * too.
1415 	 */
1416 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue))
1417 		gen++;
1418 
1419 	/* Wait until the generation number has caught up.  */
1420 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1421 	while (wq->wq_gen < gen)
1422 		cv_wait(&wq->wq_cv, &wq->wq_lock);
1423 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1424 }
1425 
1426 /*
1427  * flush_workqueue(wq)
1428  *
1429  *	Wait for all work queued on wq to complete.  This does not
1430  *	include delayed work.
1431  */
1432 void
1433 flush_workqueue(struct workqueue_struct *wq)
1434 {
1435 
1436 	mutex_enter(&wq->wq_lock);
1437 	flush_workqueue_locked(wq);
1438 	mutex_exit(&wq->wq_lock);
1439 }
1440 
1441 /*
1442  * flush_work(work)
1443  *
1444  *	If work is queued or currently executing, wait for it to
1445  *	complete.
1446  *
1447  *	Return true if we waited to flush it, false if it was already
1448  *	idle.
1449  */
1450 bool
1451 flush_work(struct work_struct *work)
1452 {
1453 	struct workqueue_struct *wq;
1454 
1455 	/* If there's no workqueue, nothing to flush.  */
1456 	if ((wq = work_queue(work)) == NULL)
1457 		return false;
1458 
1459 	flush_workqueue(wq);
1460 	return true;
1461 }
1462 
1463 /*
1464  * flush_delayed_work(dw)
1465  *
1466  *	If dw is scheduled to run after a delay, queue it immediately
1467  *	instead.  Then, if dw is queued or currently executing, wait
1468  *	for it to complete.
1469  */
1470 bool
1471 flush_delayed_work(struct delayed_work *dw)
1472 {
1473 	struct workqueue_struct *wq;
1474 	bool waited = false;
1475 
1476 	/* If there's no workqueue, nothing to flush.  */
1477 	if ((wq = work_queue(&dw->work)) == NULL)
1478 		return false;
1479 
1480 	mutex_enter(&wq->wq_lock);
1481 	if (__predict_false(work_queue(&dw->work) != wq)) {
1482 		/*
1483 		 * Moved off the queue already (and possibly to another
1484 		 * queue, though that would be ill-advised), so it must
1485 		 * have completed, and we have nothing more to do.
1486 		 */
1487 		waited = false;
1488 	} else {
1489 		switch (dw->dw_state) {
1490 		case DELAYED_WORK_IDLE:
1491 			/*
1492 			 * It has a workqueue assigned and the callout
1493 			 * is idle, so it must be in progress or on the
1494 			 * queue.  In that case, we'll wait for it to
1495 			 * complete.
1496 			 */
1497 			break;
1498 		case DELAYED_WORK_SCHEDULED:
1499 		case DELAYED_WORK_RESCHEDULED:
1500 		case DELAYED_WORK_CANCELLED:
1501 			/*
1502 			 * The callout is scheduled, and may have even
1503 			 * started.  Mark it as scheduled so that if
1504 			 * the callout has fired it will queue the work
1505 			 * itself.  Try to stop the callout -- if we
1506 			 * can, queue the work now; if we can't, wait
1507 			 * for the callout to complete, which entails
1508 			 * queueing it.
1509 			 */
1510 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1511 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1512 				/*
1513 				 * We stopped it before it ran.  No
1514 				 * state change in the interim is
1515 				 * possible.  Destroy the callout and
1516 				 * queue it ourselves.
1517 				 */
1518 				KASSERT(dw->dw_state ==
1519 				    DELAYED_WORK_SCHEDULED);
1520 				dw_callout_destroy(wq, dw);
1521 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1522 				    work_entry);
1523 				cv_broadcast(&wq->wq_cv);
1524 				SDT_PROBE2(sdt, linux, work, queue,
1525 				    &dw->work, wq);
1526 			}
1527 			break;
1528 		default:
1529 			panic("invalid delayed work state: %d", dw->dw_state);
1530 		}
1531 		/*
1532 		 * Waiting for the whole queue to flush is overkill,
1533 		 * but doesn't hurt.
1534 		 */
1535 		flush_workqueue_locked(wq);
1536 		waited = true;
1537 	}
1538 	mutex_exit(&wq->wq_lock);
1539 
1540 	return waited;
1541 }
1542 
1543 /*
1544  * delayed_work_pending(dw)
1545  *
1546  *	True if dw is currently scheduled to execute, false if not.
1547  */
1548 bool
1549 delayed_work_pending(struct delayed_work *dw)
1550 {
1551 
1552 	return work_pending(&dw->work);
1553 }
1554