xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: linux_work.c,v 1.45 2020/02/01 22:38:05 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.45 2020/02/01 22:38:05 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49 
50 #include <linux/workqueue.h>
51 
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54 
55 struct workqueue_struct {
56 	kmutex_t		wq_lock;
57 	kcondvar_t		wq_cv;
58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
59 	struct work_head	wq_queue;   /* work to run */
60 	struct work_head	wq_dqueue;  /* delayed work to run now */
61 	struct work_struct	*wq_current_work;
62 	int			wq_flags;
63 	bool			wq_dying;
64 	uint64_t		wq_gen;
65 	struct lwp		*wq_lwp;
66 };
67 
68 static void __dead	linux_workqueue_thread(void *);
69 static void		linux_workqueue_timeout(void *);
70 static bool		work_claimed(struct work_struct *,
71 			    struct workqueue_struct *);
72 static struct workqueue_struct *
73 			work_queue(struct work_struct *);
74 static bool		acquire_work(struct work_struct *,
75 			    struct workqueue_struct *);
76 static void		release_work(struct work_struct *,
77 			    struct workqueue_struct *);
78 static void		wait_for_current_work(struct work_struct *,
79 			    struct workqueue_struct *);
80 static void		dw_callout_init(struct workqueue_struct *,
81 			    struct delayed_work *);
82 static void		dw_callout_destroy(struct workqueue_struct *,
83 			    struct delayed_work *);
84 static void		cancel_delayed_work_done(struct workqueue_struct *,
85 			    struct delayed_work *);
86 
87 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
88     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
89 SDT_PROBE_DEFINE2(sdt, linux, work, release,
90     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
91 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
92     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
93 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
94     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
95 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
96     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
97     "unsigned long"/*ticks*/);
98 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
99     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
100 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
101     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
102 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
103     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
104 SDT_PROBE_DEFINE2(sdt, linux, work, run,
105     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
106 SDT_PROBE_DEFINE2(sdt, linux, work, done,
107     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
108 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
109     "struct workqueue_struct *"/*wq*/);
110 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
111     "struct workqueue_struct *"/*wq*/);
112 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
113     "struct workqueue_struct *"/*wq*/);
114 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
115     "struct workqueue_struct *"/*wq*/);
116 
117 static specificdata_key_t workqueue_key __read_mostly;
118 
119 struct workqueue_struct	*system_wq __read_mostly;
120 struct workqueue_struct	*system_long_wq __read_mostly;
121 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
122 
123 static inline uintptr_t
124 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
125 {
126 
127 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
128 }
129 
130 /*
131  * linux_workqueue_init()
132  *
133  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
134  *	NetBSD error on failure.
135  */
136 static int
137 linux_workqueue_init0(void)
138 {
139 	int error;
140 
141 	error = lwp_specific_key_create(&workqueue_key, NULL);
142 	if (error)
143 		goto fail0;
144 
145 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
146 	if (system_wq == NULL) {
147 		error = ENOMEM;
148 		goto fail1;
149 	}
150 
151 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
152 	if (system_long_wq == NULL) {
153 		error = ENOMEM;
154 		goto fail2;
155 	}
156 
157 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
158 	if (system_long_wq == NULL) {
159 		error = ENOMEM;
160 		goto fail3;
161 	}
162 
163 	return 0;
164 
165 fail4: __unused
166 	destroy_workqueue(system_power_efficient_wq);
167 fail3:	destroy_workqueue(system_long_wq);
168 fail2:	destroy_workqueue(system_wq);
169 fail1:	lwp_specific_key_delete(workqueue_key);
170 fail0:	KASSERT(error);
171 	return error;
172 }
173 
174 /*
175  * linux_workqueue_fini()
176  *
177  *	Destroy the Linux workqueue subsystem.  Never fails.
178  */
179 static void
180 linux_workqueue_fini0(void)
181 {
182 
183 	destroy_workqueue(system_power_efficient_wq);
184 	destroy_workqueue(system_long_wq);
185 	destroy_workqueue(system_wq);
186 	lwp_specific_key_delete(workqueue_key);
187 }
188 
189 #ifndef _MODULE
190 static ONCE_DECL(linux_workqueue_init_once);
191 #endif
192 
193 int
194 linux_workqueue_init(void)
195 {
196 #ifdef _MODULE
197 	return linux_workqueue_init0();
198 #else
199 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
200 #endif
201 }
202 
203 void
204 linux_workqueue_fini(void)
205 {
206 #ifdef _MODULE
207 	return linux_workqueue_fini0();
208 #else
209 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
210 #endif
211 }
212 
213 /*
214  * Workqueues
215  */
216 
217 /*
218  * alloc_ordered_workqueue(name, flags)
219  *
220  *	Create a workqueue of the given name.  No flags are currently
221  *	defined.  Return NULL on failure, pointer to struct
222  *	workqueue_struct object on success.
223  */
224 struct workqueue_struct *
225 alloc_ordered_workqueue(const char *name, int flags)
226 {
227 	struct workqueue_struct *wq;
228 	int error;
229 
230 	KASSERT(flags == 0);
231 
232 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
233 
234 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
235 	cv_init(&wq->wq_cv, name);
236 	TAILQ_INIT(&wq->wq_delayed);
237 	TAILQ_INIT(&wq->wq_queue);
238 	TAILQ_INIT(&wq->wq_dqueue);
239 	wq->wq_current_work = NULL;
240 	wq->wq_flags = 0;
241 	wq->wq_dying = false;
242 	wq->wq_gen = 0;
243 	wq->wq_lwp = NULL;
244 
245 	error = kthread_create(PRI_NONE,
246 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
247 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
248 	if (error)
249 		goto fail0;
250 
251 	return wq;
252 
253 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
254 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
255 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
256 	cv_destroy(&wq->wq_cv);
257 	mutex_destroy(&wq->wq_lock);
258 	kmem_free(wq, sizeof(*wq));
259 	return NULL;
260 }
261 
262 /*
263  * destroy_workqueue(wq)
264  *
265  *	Destroy a workqueue created with wq.  Cancel any pending
266  *	delayed work.  Wait for all queued work to complete.
267  *
268  *	May sleep.
269  */
270 void
271 destroy_workqueue(struct workqueue_struct *wq)
272 {
273 
274 	/*
275 	 * Cancel all delayed work.  We do this first because any
276 	 * delayed work that that has already timed out, which we can't
277 	 * cancel, may have queued new work.
278 	 */
279 	mutex_enter(&wq->wq_lock);
280 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
281 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
282 
283 		KASSERT(work_queue(&dw->work) == wq);
284 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
285 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
286 			dw->dw_state == DELAYED_WORK_CANCELLED),
287 		    "delayed work %p in bad state: %d",
288 		    dw, dw->dw_state);
289 
290 		/*
291 		 * Mark it cancelled and try to stop the callout before
292 		 * it starts.
293 		 *
294 		 * If it's too late and the callout has already begun
295 		 * to execute, then it will notice that we asked to
296 		 * cancel it and remove itself from the queue before
297 		 * returning.
298 		 *
299 		 * If we stopped the callout before it started,
300 		 * however, then we can safely destroy the callout and
301 		 * dissociate it from the workqueue ourselves.
302 		 */
303 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
304 		dw->dw_state = DELAYED_WORK_CANCELLED;
305 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
306 			cancel_delayed_work_done(wq, dw);
307 	}
308 	mutex_exit(&wq->wq_lock);
309 
310 	/*
311 	 * At this point, no new work can be put on the queue.
312 	 */
313 
314 	/* Tell the thread to exit.  */
315 	mutex_enter(&wq->wq_lock);
316 	wq->wq_dying = true;
317 	cv_broadcast(&wq->wq_cv);
318 	mutex_exit(&wq->wq_lock);
319 
320 	/* Wait for it to exit.  */
321 	(void)kthread_join(wq->wq_lwp);
322 
323 	KASSERT(wq->wq_dying);
324 	KASSERT(wq->wq_flags == 0);
325 	KASSERT(wq->wq_current_work == NULL);
326 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
327 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
328 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
329 	cv_destroy(&wq->wq_cv);
330 	mutex_destroy(&wq->wq_lock);
331 
332 	kmem_free(wq, sizeof(*wq));
333 }
334 
335 /*
336  * Work thread and callout
337  */
338 
339 /*
340  * linux_workqueue_thread(cookie)
341  *
342  *	Main function for a workqueue's worker thread.  Waits until
343  *	there is work queued, grabs a batch of work off the queue,
344  *	executes it all, bumps the generation number, and repeats,
345  *	until dying.
346  */
347 static void __dead
348 linux_workqueue_thread(void *cookie)
349 {
350 	struct workqueue_struct *const wq = cookie;
351 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
352 	struct work_struct marker, *work;
353 	unsigned i;
354 
355 	lwp_setspecific(workqueue_key, wq);
356 
357 	mutex_enter(&wq->wq_lock);
358 	for (;;) {
359 		/*
360 		 * Wait until there's activity.  If there's no work and
361 		 * we're dying, stop here.
362 		 */
363 		if (TAILQ_EMPTY(&wq->wq_queue) &&
364 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
365 			if (wq->wq_dying)
366 				break;
367 			cv_wait(&wq->wq_cv, &wq->wq_lock);
368 			continue;
369 		}
370 
371 		/*
372 		 * Start a batch of work.  Use a marker to delimit when
373 		 * the batch ends so we can advance the generation
374 		 * after the batch.
375 		 */
376 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
377 		for (i = 0; i < 2; i++) {
378 			if (TAILQ_EMPTY(q[i]))
379 				continue;
380 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
381 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
382 				void (*func)(struct work_struct *);
383 
384 				KASSERT(work_queue(work) == wq);
385 				KASSERT(work_claimed(work, wq));
386 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
387 					container_of(work, struct delayed_work,
388 					    work)->dw_state ==
389 					DELAYED_WORK_IDLE),
390 				    "delayed work %p queued and scheduled",
391 				    work);
392 
393 				TAILQ_REMOVE(q[i], work, work_entry);
394 				KASSERT(wq->wq_current_work == NULL);
395 				wq->wq_current_work = work;
396 				func = work->func;
397 				release_work(work, wq);
398 				/* Can't dereference work after this point.  */
399 
400 				mutex_exit(&wq->wq_lock);
401 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
402 				(*func)(work);
403 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
404 				mutex_enter(&wq->wq_lock);
405 
406 				KASSERT(wq->wq_current_work == work);
407 				wq->wq_current_work = NULL;
408 				cv_broadcast(&wq->wq_cv);
409 			}
410 			TAILQ_REMOVE(q[i], &marker, work_entry);
411 		}
412 
413 		/* Notify flush that we've completed a batch of work.  */
414 		wq->wq_gen++;
415 		cv_broadcast(&wq->wq_cv);
416 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
417 	}
418 	mutex_exit(&wq->wq_lock);
419 
420 	kthread_exit(0);
421 }
422 
423 /*
424  * linux_workqueue_timeout(cookie)
425  *
426  *	Delayed work timeout callback.
427  *
428  *	- If scheduled, queue it.
429  *	- If rescheduled, callout_schedule ourselves again.
430  *	- If cancelled, destroy the callout and release the work from
431  *        the workqueue.
432  */
433 static void
434 linux_workqueue_timeout(void *cookie)
435 {
436 	struct delayed_work *const dw = cookie;
437 	struct workqueue_struct *const wq = work_queue(&dw->work);
438 
439 	KASSERTMSG(wq != NULL,
440 	    "delayed work %p state %d resched %d",
441 	    dw, dw->dw_state, dw->dw_resched);
442 
443 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
444 
445 	mutex_enter(&wq->wq_lock);
446 	KASSERT(work_queue(&dw->work) == wq);
447 	switch (dw->dw_state) {
448 	case DELAYED_WORK_IDLE:
449 		panic("delayed work callout uninitialized: %p", dw);
450 	case DELAYED_WORK_SCHEDULED:
451 		dw_callout_destroy(wq, dw);
452 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
453 		cv_broadcast(&wq->wq_cv);
454 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
455 		break;
456 	case DELAYED_WORK_RESCHEDULED:
457 		KASSERT(dw->dw_resched >= 0);
458 		callout_schedule(&dw->dw_callout, dw->dw_resched);
459 		dw->dw_state = DELAYED_WORK_SCHEDULED;
460 		dw->dw_resched = -1;
461 		break;
462 	case DELAYED_WORK_CANCELLED:
463 		cancel_delayed_work_done(wq, dw);
464 		/* Can't dereference dw after this point.  */
465 		goto out;
466 	default:
467 		panic("delayed work callout in bad state: %p", dw);
468 	}
469 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
470 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
471 out:	mutex_exit(&wq->wq_lock);
472 }
473 
474 /*
475  * current_work()
476  *
477  *	If in a workqueue worker thread, return the work it is
478  *	currently executing.  Otherwise return NULL.
479  */
480 struct work_struct *
481 current_work(void)
482 {
483 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
484 
485 	/* If we're not a workqueue thread, then there's no work.  */
486 	if (wq == NULL)
487 		return NULL;
488 
489 	/*
490 	 * Otherwise, this should be possible only while work is in
491 	 * progress.  Return the current work item.
492 	 */
493 	KASSERT(wq->wq_current_work != NULL);
494 	return wq->wq_current_work;
495 }
496 
497 /*
498  * Work
499  */
500 
501 /*
502  * INIT_WORK(work, fn)
503  *
504  *	Initialize work for use with a workqueue to call fn in a worker
505  *	thread.  There is no corresponding destruction operation.
506  */
507 void
508 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
509 {
510 
511 	work->work_owner = 0;
512 	work->func = fn;
513 }
514 
515 /*
516  * work_claimed(work, wq)
517  *
518  *	True if work is currently claimed by a workqueue, meaning it is
519  *	either on the queue or scheduled in a callout.  The workqueue
520  *	must be wq, and caller must hold wq's lock.
521  */
522 static bool
523 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
524 {
525 
526 	KASSERT(work_queue(work) == wq);
527 	KASSERT(mutex_owned(&wq->wq_lock));
528 
529 	return work->work_owner & 1;
530 }
531 
532 /*
533  * work_queue(work)
534  *
535  *	Return the last queue that work was queued on, or NULL if it
536  *	was never queued.
537  */
538 static struct workqueue_struct *
539 work_queue(struct work_struct *work)
540 {
541 
542 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
543 }
544 
545 /*
546  * acquire_work(work, wq)
547  *
548  *	Try to claim work for wq.  If work is already claimed, it must
549  *	be claimed by wq; return false.  If work is not already
550  *	claimed, claim it, issue a memory barrier to match any prior
551  *	release_work, and return true.
552  *
553  *	Caller must hold wq's lock.
554  */
555 static bool
556 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
557 {
558 	uintptr_t owner0, owner;
559 
560 	KASSERT(mutex_owned(&wq->wq_lock));
561 	KASSERT(((uintptr_t)wq & 1) == 0);
562 
563 	owner = (uintptr_t)wq | 1;
564 	do {
565 		owner0 = work->work_owner;
566 		if (owner0 & 1) {
567 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
568 			return false;
569 		}
570 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
571 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
572 	    owner0);
573 
574 	KASSERT(work_queue(work) == wq);
575 	membar_enter();
576 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
577 	return true;
578 }
579 
580 /*
581  * release_work(work, wq)
582  *
583  *	Issue a memory barrier to match any subsequent acquire_work and
584  *	dissociate work from wq.
585  *
586  *	Caller must hold wq's lock and work must be associated with wq.
587  */
588 static void
589 release_work(struct work_struct *work, struct workqueue_struct *wq)
590 {
591 
592 	KASSERT(work_queue(work) == wq);
593 	KASSERT(mutex_owned(&wq->wq_lock));
594 
595 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
596 	membar_exit();
597 
598 	/*
599 	 * Non-interlocked r/m/w is safe here because nobody else can
600 	 * write to this while the claimed bit is setand the workqueue
601 	 * lock is held.
602 	 */
603 	work->work_owner &= ~(uintptr_t)1;
604 }
605 
606 /*
607  * schedule_work(work)
608  *
609  *	If work is not already queued on system_wq, queue it to be run
610  *	by system_wq's worker thread when it next can.  True if it was
611  *	newly queued, false if it was already queued.  If the work was
612  *	already running, queue it to run again.
613  *
614  *	Caller must ensure work is not queued to run on a different
615  *	workqueue.
616  */
617 bool
618 schedule_work(struct work_struct *work)
619 {
620 
621 	return queue_work(system_wq, work);
622 }
623 
624 /*
625  * queue_work(wq, work)
626  *
627  *	If work is not already queued on wq, queue it to be run by wq's
628  *	worker thread when it next can.  True if it was newly queued,
629  *	false if it was already queued.  If the work was already
630  *	running, queue it to run again.
631  *
632  *	Caller must ensure work is not queued to run on a different
633  *	workqueue.
634  */
635 bool
636 queue_work(struct workqueue_struct *wq, struct work_struct *work)
637 {
638 	bool newly_queued;
639 
640 	KASSERT(wq != NULL);
641 
642 	mutex_enter(&wq->wq_lock);
643 	if (__predict_true(acquire_work(work, wq))) {
644 		/*
645 		 * It wasn't on any workqueue at all.  Put it on this
646 		 * one, and signal the worker thread that there is work
647 		 * to do.
648 		 */
649 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
650 		cv_broadcast(&wq->wq_cv);
651 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
652 		newly_queued = true;
653 	} else {
654 		/*
655 		 * It was already on this workqueue.  Nothing to do
656 		 * since it is already queued.
657 		 */
658 		newly_queued = false;
659 	}
660 	mutex_exit(&wq->wq_lock);
661 
662 	return newly_queued;
663 }
664 
665 /*
666  * cancel_work(work)
667  *
668  *	If work was queued, remove it from the queue and return true.
669  *	If work was not queued, return false.  Work may still be
670  *	running when this returns.
671  */
672 bool
673 cancel_work(struct work_struct *work)
674 {
675 	struct workqueue_struct *wq;
676 	bool cancelled_p = false;
677 
678 	/* If there's no workqueue, nothing to cancel.   */
679 	if ((wq = work_queue(work)) == NULL)
680 		goto out;
681 
682 	mutex_enter(&wq->wq_lock);
683 	if (__predict_false(work_queue(work) != wq)) {
684 		/*
685 		 * It has finished execution or been cancelled by
686 		 * another thread, and has been moved off the
687 		 * workqueue, so it's too to cancel.
688 		 */
689 		cancelled_p = false;
690 	} else {
691 		/* Check whether it's on the queue.  */
692 		if (work_claimed(work, wq)) {
693 			/*
694 			 * It is still on the queue.  Take it off the
695 			 * queue and report successful cancellation.
696 			 */
697 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
698 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
699 			release_work(work, wq);
700 			/* Can't dereference work after this point.  */
701 			cancelled_p = true;
702 		} else {
703 			/* Not on the queue.  Couldn't cancel it.  */
704 			cancelled_p = false;
705 		}
706 	}
707 	mutex_exit(&wq->wq_lock);
708 
709 out:	return cancelled_p;
710 }
711 
712 /*
713  * cancel_work_sync(work)
714  *
715  *	If work was queued, remove it from the queue and return true.
716  *	If work was not queued, return false.  Either way, if work is
717  *	currently running, wait for it to complete.
718  *
719  *	May sleep.
720  */
721 bool
722 cancel_work_sync(struct work_struct *work)
723 {
724 	struct workqueue_struct *wq;
725 	bool cancelled_p = false;
726 
727 	/* If there's no workqueue, nothing to cancel.   */
728 	if ((wq = work_queue(work)) == NULL)
729 		goto out;
730 
731 	mutex_enter(&wq->wq_lock);
732 	if (__predict_false(work_queue(work) != wq)) {
733 		/*
734 		 * It has finished execution or been cancelled by
735 		 * another thread, and has been moved off the
736 		 * workqueue, so it's too late to cancel.
737 		 */
738 		cancelled_p = false;
739 	} else {
740 		/* Check whether it's on the queue.  */
741 		if (work_claimed(work, wq)) {
742 			/*
743 			 * It is still on the queue.  Take it off the
744 			 * queue and report successful cancellation.
745 			 */
746 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
747 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
748 			release_work(work, wq);
749 			/* Can't dereference work after this point.  */
750 			cancelled_p = true;
751 		} else {
752 			/* Not on the queue.  Couldn't cancel it.  */
753 			cancelled_p = false;
754 		}
755 		/* If it's still running, wait for it to complete.  */
756 		if (wq->wq_current_work == work)
757 			wait_for_current_work(work, wq);
758 	}
759 	mutex_exit(&wq->wq_lock);
760 
761 out:	return cancelled_p;
762 }
763 
764 /*
765  * wait_for_current_work(work, wq)
766  *
767  *	wq must be currently executing work.  Wait for it to finish.
768  *
769  *	Does not dereference work.
770  */
771 static void
772 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
773 {
774 	uint64_t gen;
775 
776 	KASSERT(mutex_owned(&wq->wq_lock));
777 	KASSERT(wq->wq_current_work == work);
778 
779 	/* Wait only one generation in case it gets requeued quickly.  */
780 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
781 	gen = wq->wq_gen;
782 	do {
783 		cv_wait(&wq->wq_cv, &wq->wq_lock);
784 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
785 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
786 }
787 
788 /*
789  * Delayed work
790  */
791 
792 /*
793  * INIT_DELAYED_WORK(dw, fn)
794  *
795  *	Initialize dw for use with a workqueue to call fn in a worker
796  *	thread after a delay.  There is no corresponding destruction
797  *	operation.
798  */
799 void
800 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
801 {
802 
803 	INIT_WORK(&dw->work, fn);
804 	dw->dw_state = DELAYED_WORK_IDLE;
805 	dw->dw_resched = -1;
806 
807 	/*
808 	 * Defer callout_init until we are going to schedule the
809 	 * callout, which can then callout_destroy it, because
810 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
811 	 * we have no opportunity to call callout_destroy.
812 	 */
813 }
814 
815 /*
816  * schedule_delayed_work(dw, ticks)
817  *
818  *	If it is not currently scheduled, schedule dw to run after
819  *	ticks on system_wq.  If currently executing and not already
820  *	rescheduled, reschedule it.  True if it was newly scheduled,
821  *	false if it was already scheduled.
822  *
823  *	If ticks == 0, queue it to run as soon as the worker can,
824  *	without waiting for the next callout tick to run.
825  */
826 bool
827 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
828 {
829 
830 	return queue_delayed_work(system_wq, dw, ticks);
831 }
832 
833 /*
834  * dw_callout_init(wq, dw)
835  *
836  *	Initialize the callout of dw and transition to
837  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
838  */
839 static void
840 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
841 {
842 
843 	KASSERT(mutex_owned(&wq->wq_lock));
844 	KASSERT(work_queue(&dw->work) == wq);
845 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
846 
847 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
848 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
849 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
850 	dw->dw_state = DELAYED_WORK_SCHEDULED;
851 }
852 
853 /*
854  * dw_callout_destroy(wq, dw)
855  *
856  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
857  */
858 static void
859 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
860 {
861 
862 	KASSERT(mutex_owned(&wq->wq_lock));
863 	KASSERT(work_queue(&dw->work) == wq);
864 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
865 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
866 	    dw->dw_state == DELAYED_WORK_CANCELLED);
867 
868 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
869 	callout_destroy(&dw->dw_callout);
870 	dw->dw_resched = -1;
871 	dw->dw_state = DELAYED_WORK_IDLE;
872 }
873 
874 /*
875  * cancel_delayed_work_done(wq, dw)
876  *
877  *	Complete cancellation of a delayed work: transition from
878  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
879  *	workqueue.  Caller must not dereference dw after this returns.
880  */
881 static void
882 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
883 {
884 
885 	KASSERT(mutex_owned(&wq->wq_lock));
886 	KASSERT(work_queue(&dw->work) == wq);
887 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
888 
889 	dw_callout_destroy(wq, dw);
890 	release_work(&dw->work, wq);
891 	/* Can't dereference dw after this point.  */
892 }
893 
894 /*
895  * queue_delayed_work(wq, dw, ticks)
896  *
897  *	If it is not currently scheduled, schedule dw to run after
898  *	ticks on wq.  If currently queued, remove it from the queue
899  *	first.
900  *
901  *	If ticks == 0, queue it to run as soon as the worker can,
902  *	without waiting for the next callout tick to run.
903  */
904 bool
905 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
906     unsigned long ticks)
907 {
908 	bool newly_queued;
909 
910 	mutex_enter(&wq->wq_lock);
911 	if (__predict_true(acquire_work(&dw->work, wq))) {
912 		/*
913 		 * It wasn't on any workqueue at all.  Schedule it to
914 		 * run on this one.
915 		 */
916 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
917 		if (ticks == 0) {
918 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
919 			    work_entry);
920 			cv_broadcast(&wq->wq_cv);
921 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
922 		} else {
923 			/*
924 			 * Initialize a callout and schedule to run
925 			 * after a delay.
926 			 */
927 			dw_callout_init(wq, dw);
928 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
929 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
930 		}
931 		newly_queued = true;
932 	} else {
933 		/* It was already on this workqueue.  */
934 		switch (dw->dw_state) {
935 		case DELAYED_WORK_IDLE:
936 		case DELAYED_WORK_SCHEDULED:
937 		case DELAYED_WORK_RESCHEDULED:
938 			/* On the queue or already scheduled.  Leave it.  */
939 			newly_queued = false;
940 			break;
941 		case DELAYED_WORK_CANCELLED:
942 			/*
943 			 * Scheduled and the callout began, but it was
944 			 * cancelled.  Reschedule it.
945 			 */
946 			if (ticks == 0) {
947 				dw->dw_state = DELAYED_WORK_SCHEDULED;
948 				SDT_PROBE2(sdt, linux, work, queue,
949 				    &dw->work, wq);
950 			} else {
951 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
952 				dw->dw_resched = MIN(INT_MAX, ticks);
953 				SDT_PROBE3(sdt, linux, work, schedule,
954 				    dw, wq, ticks);
955 			}
956 			newly_queued = true;
957 			break;
958 		default:
959 			panic("invalid delayed work state: %d",
960 			    dw->dw_state);
961 		}
962 	}
963 	mutex_exit(&wq->wq_lock);
964 
965 	return newly_queued;
966 }
967 
968 /*
969  * mod_delayed_work(wq, dw, ticks)
970  *
971  *	Schedule dw to run after ticks.  If scheduled or queued,
972  *	reschedule.  If ticks == 0, run without delay.
973  *
974  *	True if it modified the timer of an already scheduled work,
975  *	false if it newly scheduled the work.
976  */
977 bool
978 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
979     unsigned long ticks)
980 {
981 	bool timer_modified;
982 
983 	mutex_enter(&wq->wq_lock);
984 	if (acquire_work(&dw->work, wq)) {
985 		/*
986 		 * It wasn't on any workqueue at all.  Schedule it to
987 		 * run on this one.
988 		 */
989 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
990 		if (ticks == 0) {
991 			/*
992 			 * Run immediately: put it on the queue and
993 			 * signal the worker thread.
994 			 */
995 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
996 			    work_entry);
997 			cv_broadcast(&wq->wq_cv);
998 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
999 		} else {
1000 			/*
1001 			 * Initialize a callout and schedule to run
1002 			 * after a delay.
1003 			 */
1004 			dw_callout_init(wq, dw);
1005 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1006 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
1007 		}
1008 		timer_modified = false;
1009 	} else {
1010 		/* It was already on this workqueue.  */
1011 		switch (dw->dw_state) {
1012 		case DELAYED_WORK_IDLE:
1013 			/* On the queue.  */
1014 			if (ticks == 0) {
1015 				/* Leave it be.  */
1016 				SDT_PROBE2(sdt, linux, work, cancel,
1017 				    &dw->work, wq);
1018 				SDT_PROBE2(sdt, linux, work, queue,
1019 				    &dw->work, wq);
1020 			} else {
1021 				/* Remove from the queue and schedule.  */
1022 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1023 				    work_entry);
1024 				dw_callout_init(wq, dw);
1025 				callout_schedule(&dw->dw_callout,
1026 				    MIN(INT_MAX, ticks));
1027 				SDT_PROBE2(sdt, linux, work, cancel,
1028 				    &dw->work, wq);
1029 				SDT_PROBE3(sdt, linux, work, schedule,
1030 				    dw, wq, ticks);
1031 			}
1032 			timer_modified = true;
1033 			break;
1034 		case DELAYED_WORK_SCHEDULED:
1035 			/*
1036 			 * It is scheduled to run after a delay.  Try
1037 			 * to stop it and reschedule it; if we can't,
1038 			 * either reschedule it or cancel it to put it
1039 			 * on the queue, and inform the callout.
1040 			 */
1041 			if (callout_stop(&dw->dw_callout)) {
1042 				/* Can't stop, callout has begun.  */
1043 				if (ticks == 0) {
1044 					/*
1045 					 * We don't actually need to do
1046 					 * anything.  The callout will
1047 					 * queue it as soon as it gets
1048 					 * the lock.
1049 					 */
1050 					SDT_PROBE2(sdt, linux, work, cancel,
1051 					    &dw->work, wq);
1052 					SDT_PROBE2(sdt, linux, work, queue,
1053 					    &dw->work, wq);
1054 				} else {
1055 					/* Ask the callout to reschedule.  */
1056 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1057 					dw->dw_resched = MIN(INT_MAX, ticks);
1058 					SDT_PROBE2(sdt, linux, work, cancel,
1059 					    &dw->work, wq);
1060 					SDT_PROBE3(sdt, linux, work, schedule,
1061 					    dw, wq, ticks);
1062 				}
1063 			} else {
1064 				/* We stopped the callout before it began.  */
1065 				if (ticks == 0) {
1066 					/*
1067 					 * Run immediately: destroy the
1068 					 * callout, put it on the
1069 					 * queue, and signal the worker
1070 					 * thread.
1071 					 */
1072 					dw_callout_destroy(wq, dw);
1073 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1074 					    &dw->work, work_entry);
1075 					cv_broadcast(&wq->wq_cv);
1076 					SDT_PROBE2(sdt, linux, work, cancel,
1077 					    &dw->work, wq);
1078 					SDT_PROBE2(sdt, linux, work, queue,
1079 					    &dw->work, wq);
1080 				} else {
1081 					/*
1082 					 * Reschedule the callout.  No
1083 					 * state change.
1084 					 */
1085 					callout_schedule(&dw->dw_callout,
1086 					    MIN(INT_MAX, ticks));
1087 					SDT_PROBE2(sdt, linux, work, cancel,
1088 					    &dw->work, wq);
1089 					SDT_PROBE3(sdt, linux, work, schedule,
1090 					    dw, wq, ticks);
1091 				}
1092 			}
1093 			timer_modified = true;
1094 			break;
1095 		case DELAYED_WORK_RESCHEDULED:
1096 			/*
1097 			 * Someone rescheduled it after the callout
1098 			 * started but before the poor thing even had a
1099 			 * chance to acquire the lock.
1100 			 */
1101 			if (ticks == 0) {
1102 				/*
1103 				 * We can just switch back to
1104 				 * DELAYED_WORK_SCHEDULED so that the
1105 				 * callout will queue the work as soon
1106 				 * as it gets the lock.
1107 				 */
1108 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1109 				dw->dw_resched = -1;
1110 				SDT_PROBE2(sdt, linux, work, cancel,
1111 				    &dw->work, wq);
1112 				SDT_PROBE2(sdt, linux, work, queue,
1113 				    &dw->work, wq);
1114 			} else {
1115 				/* Change the rescheduled time.  */
1116 				dw->dw_resched = ticks;
1117 				SDT_PROBE2(sdt, linux, work, cancel,
1118 				    &dw->work, wq);
1119 				SDT_PROBE3(sdt, linux, work, schedule,
1120 				    dw, wq, ticks);
1121 			}
1122 			timer_modified = true;
1123 			break;
1124 		case DELAYED_WORK_CANCELLED:
1125 			/*
1126 			 * Someone cancelled it after the callout
1127 			 * started but before the poor thing even had a
1128 			 * chance to acquire the lock.
1129 			 */
1130 			if (ticks == 0) {
1131 				/*
1132 				 * We can just switch back to
1133 				 * DELAYED_WORK_SCHEDULED so that the
1134 				 * callout will queue the work as soon
1135 				 * as it gets the lock.
1136 				 */
1137 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1138 				SDT_PROBE2(sdt, linux, work, queue,
1139 				    &dw->work, wq);
1140 			} else {
1141 				/* Ask it to reschedule.  */
1142 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1143 				dw->dw_resched = MIN(INT_MAX, ticks);
1144 				SDT_PROBE3(sdt, linux, work, schedule,
1145 				    dw, wq, ticks);
1146 			}
1147 			timer_modified = false;
1148 			break;
1149 		default:
1150 			panic("invalid delayed work state: %d", dw->dw_state);
1151 		}
1152 	}
1153 	mutex_exit(&wq->wq_lock);
1154 
1155 	return timer_modified;
1156 }
1157 
1158 /*
1159  * cancel_delayed_work(dw)
1160  *
1161  *	If work was scheduled or queued, remove it from the schedule or
1162  *	queue and return true.  If work was not scheduled or queued,
1163  *	return false.  Note that work may already be running; if it
1164  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1165  *	will return false, and either way, cancel_delayed_work will NOT
1166  *	wait for the work to complete.
1167  */
1168 bool
1169 cancel_delayed_work(struct delayed_work *dw)
1170 {
1171 	struct workqueue_struct *wq;
1172 	bool cancelled_p;
1173 
1174 	/* If there's no workqueue, nothing to cancel.   */
1175 	if ((wq = work_queue(&dw->work)) == NULL)
1176 		return false;
1177 
1178 	mutex_enter(&wq->wq_lock);
1179 	if (__predict_false(work_queue(&dw->work) != wq)) {
1180 		cancelled_p = false;
1181 	} else {
1182 		switch (dw->dw_state) {
1183 		case DELAYED_WORK_IDLE:
1184 			/*
1185 			 * It is either on the queue or already running
1186 			 * or both.
1187 			 */
1188 			if (work_claimed(&dw->work, wq)) {
1189 				/* On the queue.  Remove and release.  */
1190 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1191 				    work_entry);
1192 				SDT_PROBE2(sdt, linux, work, cancel,
1193 				    &dw->work, wq);
1194 				release_work(&dw->work, wq);
1195 				/* Can't dereference dw after this point.  */
1196 				cancelled_p = true;
1197 			} else {
1198 				/* Not on the queue, so didn't cancel.  */
1199 				cancelled_p = false;
1200 			}
1201 			break;
1202 		case DELAYED_WORK_SCHEDULED:
1203 			/*
1204 			 * If it is scheduled, mark it cancelled and
1205 			 * try to stop the callout before it starts.
1206 			 *
1207 			 * If it's too late and the callout has already
1208 			 * begun to execute, tough.
1209 			 *
1210 			 * If we stopped the callout before it started,
1211 			 * however, then destroy the callout and
1212 			 * dissociate it from the workqueue ourselves.
1213 			 */
1214 			dw->dw_state = DELAYED_WORK_CANCELLED;
1215 			cancelled_p = true;
1216 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1217 			if (!callout_stop(&dw->dw_callout))
1218 				cancel_delayed_work_done(wq, dw);
1219 			break;
1220 		case DELAYED_WORK_RESCHEDULED:
1221 			/*
1222 			 * If it is being rescheduled, the callout has
1223 			 * already fired.  We must ask it to cancel.
1224 			 */
1225 			dw->dw_state = DELAYED_WORK_CANCELLED;
1226 			dw->dw_resched = -1;
1227 			cancelled_p = true;
1228 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1229 			break;
1230 		case DELAYED_WORK_CANCELLED:
1231 			/*
1232 			 * If it is being cancelled, the callout has
1233 			 * already fired.  There is nothing more for us
1234 			 * to do.  Someone else claims credit for
1235 			 * cancelling it.
1236 			 */
1237 			cancelled_p = false;
1238 			break;
1239 		default:
1240 			panic("invalid delayed work state: %d",
1241 			    dw->dw_state);
1242 		}
1243 	}
1244 	mutex_exit(&wq->wq_lock);
1245 
1246 	return cancelled_p;
1247 }
1248 
1249 /*
1250  * cancel_delayed_work_sync(dw)
1251  *
1252  *	If work was scheduled or queued, remove it from the schedule or
1253  *	queue and return true.  If work was not scheduled or queued,
1254  *	return false.  Note that work may already be running; if it
1255  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1256  *	will return false; either way, wait for it to complete.
1257  */
1258 bool
1259 cancel_delayed_work_sync(struct delayed_work *dw)
1260 {
1261 	struct workqueue_struct *wq;
1262 	bool cancelled_p;
1263 
1264 	/* If there's no workqueue, nothing to cancel.  */
1265 	if ((wq = work_queue(&dw->work)) == NULL)
1266 		return false;
1267 
1268 	mutex_enter(&wq->wq_lock);
1269 	if (__predict_false(work_queue(&dw->work) != wq)) {
1270 		cancelled_p = false;
1271 	} else {
1272 		switch (dw->dw_state) {
1273 		case DELAYED_WORK_IDLE:
1274 			/*
1275 			 * It is either on the queue or already running
1276 			 * or both.
1277 			 */
1278 			if (work_claimed(&dw->work, wq)) {
1279 				/* On the queue.  Remove and release.  */
1280 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1281 				    work_entry);
1282 				SDT_PROBE2(sdt, linux, work, cancel,
1283 				    &dw->work, wq);
1284 				release_work(&dw->work, wq);
1285 				/* Can't dereference dw after this point.  */
1286 				cancelled_p = true;
1287 			} else {
1288 				/* Not on the queue, so didn't cancel. */
1289 				cancelled_p = false;
1290 			}
1291 			/* If it's still running, wait for it to complete.  */
1292 			if (wq->wq_current_work == &dw->work)
1293 				wait_for_current_work(&dw->work, wq);
1294 			break;
1295 		case DELAYED_WORK_SCHEDULED:
1296 			/*
1297 			 * If it is scheduled, mark it cancelled and
1298 			 * try to stop the callout before it starts.
1299 			 *
1300 			 * If it's too late and the callout has already
1301 			 * begun to execute, we must wait for it to
1302 			 * complete.  But we got in soon enough to ask
1303 			 * the callout not to run, so we successfully
1304 			 * cancelled it in that case.
1305 			 *
1306 			 * If we stopped the callout before it started,
1307 			 * then we must destroy the callout and
1308 			 * dissociate it from the workqueue ourselves.
1309 			 */
1310 			dw->dw_state = DELAYED_WORK_CANCELLED;
1311 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1312 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1313 				cancel_delayed_work_done(wq, dw);
1314 			cancelled_p = true;
1315 			break;
1316 		case DELAYED_WORK_RESCHEDULED:
1317 			/*
1318 			 * If it is being rescheduled, the callout has
1319 			 * already fired.  We must ask it to cancel and
1320 			 * wait for it to complete.
1321 			 */
1322 			dw->dw_state = DELAYED_WORK_CANCELLED;
1323 			dw->dw_resched = -1;
1324 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1325 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1326 			cancelled_p = true;
1327 			break;
1328 		case DELAYED_WORK_CANCELLED:
1329 			/*
1330 			 * If it is being cancelled, the callout has
1331 			 * already fired.  We need only wait for it to
1332 			 * complete.  Someone else, however, claims
1333 			 * credit for cancelling it.
1334 			 */
1335 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1336 			cancelled_p = false;
1337 			break;
1338 		default:
1339 			panic("invalid delayed work state: %d",
1340 			    dw->dw_state);
1341 		}
1342 	}
1343 	mutex_exit(&wq->wq_lock);
1344 
1345 	return cancelled_p;
1346 }
1347 
1348 /*
1349  * Flush
1350  */
1351 
1352 /*
1353  * flush_scheduled_work()
1354  *
1355  *	Wait for all work queued on system_wq to complete.  This does
1356  *	not include delayed work.
1357  */
1358 void
1359 flush_scheduled_work(void)
1360 {
1361 
1362 	flush_workqueue(system_wq);
1363 }
1364 
1365 /*
1366  * flush_workqueue_locked(wq)
1367  *
1368  *	Wait for all work queued on wq to complete.  This does not
1369  *	include delayed work.
1370  *
1371  *	Caller must hold wq's lock.
1372  */
1373 static void
1374 flush_workqueue_locked(struct workqueue_struct *wq)
1375 {
1376 	uint64_t gen;
1377 
1378 	KASSERT(mutex_owned(&wq->wq_lock));
1379 
1380 	/* Get the current generation number.  */
1381 	gen = wq->wq_gen;
1382 
1383 	/*
1384 	 * If there's a batch of work in progress, we must wait for the
1385 	 * worker thread to finish that batch.
1386 	 */
1387 	if (wq->wq_current_work != NULL)
1388 		gen++;
1389 
1390 	/*
1391 	 * If there's any work yet to be claimed from the queue by the
1392 	 * worker thread, we must wait for it to finish one more batch
1393 	 * too.
1394 	 */
1395 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue))
1396 		gen++;
1397 
1398 	/* Wait until the generation number has caught up.  */
1399 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1400 	while (wq->wq_gen < gen)
1401 		cv_wait(&wq->wq_cv, &wq->wq_lock);
1402 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1403 }
1404 
1405 /*
1406  * flush_workqueue(wq)
1407  *
1408  *	Wait for all work queued on wq to complete.  This does not
1409  *	include delayed work.
1410  */
1411 void
1412 flush_workqueue(struct workqueue_struct *wq)
1413 {
1414 
1415 	mutex_enter(&wq->wq_lock);
1416 	flush_workqueue_locked(wq);
1417 	mutex_exit(&wq->wq_lock);
1418 }
1419 
1420 /*
1421  * flush_work(work)
1422  *
1423  *	If work is queued or currently executing, wait for it to
1424  *	complete.
1425  */
1426 void
1427 flush_work(struct work_struct *work)
1428 {
1429 	struct workqueue_struct *wq;
1430 
1431 	/* If there's no workqueue, nothing to flush.  */
1432 	if ((wq = work_queue(work)) == NULL)
1433 		return;
1434 
1435 	flush_workqueue(wq);
1436 }
1437 
1438 /*
1439  * flush_delayed_work(dw)
1440  *
1441  *	If dw is scheduled to run after a delay, queue it immediately
1442  *	instead.  Then, if dw is queued or currently executing, wait
1443  *	for it to complete.
1444  */
1445 void
1446 flush_delayed_work(struct delayed_work *dw)
1447 {
1448 	struct workqueue_struct *wq;
1449 
1450 	/* If there's no workqueue, nothing to flush.  */
1451 	if ((wq = work_queue(&dw->work)) == NULL)
1452 		return;
1453 
1454 	mutex_enter(&wq->wq_lock);
1455 	if (__predict_false(work_queue(&dw->work) != wq)) {
1456 		/*
1457 		 * Moved off the queue already (and possibly to another
1458 		 * queue, though that would be ill-advised), so it must
1459 		 * have completed, and we have nothing more to do.
1460 		 */
1461 	} else {
1462 		switch (dw->dw_state) {
1463 		case DELAYED_WORK_IDLE:
1464 			/*
1465 			 * It has a workqueue assigned and the callout
1466 			 * is idle, so it must be in progress or on the
1467 			 * queue.  In that case, we'll wait for it to
1468 			 * complete.
1469 			 */
1470 			break;
1471 		case DELAYED_WORK_SCHEDULED:
1472 		case DELAYED_WORK_RESCHEDULED:
1473 		case DELAYED_WORK_CANCELLED:
1474 			/*
1475 			 * The callout is scheduled, and may have even
1476 			 * started.  Mark it as scheduled so that if
1477 			 * the callout has fired it will queue the work
1478 			 * itself.  Try to stop the callout -- if we
1479 			 * can, queue the work now; if we can't, wait
1480 			 * for the callout to complete, which entails
1481 			 * queueing it.
1482 			 */
1483 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1484 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1485 				/*
1486 				 * We stopped it before it ran.  No
1487 				 * state change in the interim is
1488 				 * possible.  Destroy the callout and
1489 				 * queue it ourselves.
1490 				 */
1491 				KASSERT(dw->dw_state ==
1492 				    DELAYED_WORK_SCHEDULED);
1493 				dw_callout_destroy(wq, dw);
1494 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1495 				    work_entry);
1496 				cv_broadcast(&wq->wq_cv);
1497 				SDT_PROBE2(sdt, linux, work, queue,
1498 				    &dw->work, wq);
1499 			}
1500 			break;
1501 		default:
1502 			panic("invalid delayed work state: %d", dw->dw_state);
1503 		}
1504 		/*
1505 		 * Waiting for the whole queue to flush is overkill,
1506 		 * but doesn't hurt.
1507 		 */
1508 		flush_workqueue_locked(wq);
1509 	}
1510 	mutex_exit(&wq->wq_lock);
1511 }
1512