xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision 95fc21bf1947698bbdc8bf5ed3813b7250e8ff82)
1 /*	$NetBSD: linux_work.c,v 1.43 2018/08/27 15:25:43 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.43 2018/08/27 15:25:43 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #include <sys/queue.h>
45 #include <sys/sdt.h>
46 
47 #include <linux/workqueue.h>
48 
49 TAILQ_HEAD(work_head, work_struct);
50 TAILQ_HEAD(dwork_head, delayed_work);
51 
52 struct workqueue_struct {
53 	kmutex_t		wq_lock;
54 	kcondvar_t		wq_cv;
55 	struct dwork_head	wq_delayed; /* delayed work scheduled */
56 	struct work_head	wq_queue;   /* work to run */
57 	struct work_head	wq_dqueue;  /* delayed work to run now */
58 	struct work_struct	*wq_current_work;
59 	int			wq_flags;
60 	bool			wq_dying;
61 	uint64_t		wq_gen;
62 	struct lwp		*wq_lwp;
63 };
64 
65 static void __dead	linux_workqueue_thread(void *);
66 static void		linux_workqueue_timeout(void *);
67 static bool		work_claimed(struct work_struct *,
68 			    struct workqueue_struct *);
69 static struct workqueue_struct *
70 			work_queue(struct work_struct *);
71 static bool		acquire_work(struct work_struct *,
72 			    struct workqueue_struct *);
73 static void		release_work(struct work_struct *,
74 			    struct workqueue_struct *);
75 static void		wait_for_current_work(struct work_struct *,
76 			    struct workqueue_struct *);
77 static void		dw_callout_init(struct workqueue_struct *,
78 			    struct delayed_work *);
79 static void		dw_callout_destroy(struct workqueue_struct *,
80 			    struct delayed_work *);
81 static void		cancel_delayed_work_done(struct workqueue_struct *,
82 			    struct delayed_work *);
83 
84 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
85     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
86 SDT_PROBE_DEFINE2(sdt, linux, work, release,
87     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
88 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
89     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
90 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
91     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
92 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
93     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
94     "unsigned long"/*ticks*/);
95 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
96     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
97 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
98     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
99 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
101 SDT_PROBE_DEFINE2(sdt, linux, work, run,
102     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
103 SDT_PROBE_DEFINE2(sdt, linux, work, done,
104     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
105 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
106     "struct workqueue_struct *"/*wq*/);
107 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
108     "struct workqueue_struct *"/*wq*/);
109 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
110     "struct workqueue_struct *"/*wq*/);
111 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
112     "struct workqueue_struct *"/*wq*/);
113 
114 static specificdata_key_t workqueue_key __read_mostly;
115 
116 struct workqueue_struct	*system_wq __read_mostly;
117 struct workqueue_struct	*system_long_wq __read_mostly;
118 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
119 
120 static inline uintptr_t
121 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
122 {
123 
124 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
125 }
126 
127 /*
128  * linux_workqueue_init()
129  *
130  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
131  *	NetBSD error on failure.
132  */
133 int
134 linux_workqueue_init(void)
135 {
136 	int error;
137 
138 	error = lwp_specific_key_create(&workqueue_key, NULL);
139 	if (error)
140 		goto fail0;
141 
142 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
143 	if (system_wq == NULL) {
144 		error = ENOMEM;
145 		goto fail1;
146 	}
147 
148 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
149 	if (system_long_wq == NULL) {
150 		error = ENOMEM;
151 		goto fail2;
152 	}
153 
154 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
155 	if (system_long_wq == NULL) {
156 		error = ENOMEM;
157 		goto fail3;
158 	}
159 
160 	return 0;
161 
162 fail4: __unused
163 	destroy_workqueue(system_power_efficient_wq);
164 fail3:	destroy_workqueue(system_long_wq);
165 fail2:	destroy_workqueue(system_wq);
166 fail1:	lwp_specific_key_delete(workqueue_key);
167 fail0:	KASSERT(error);
168 	return error;
169 }
170 
171 /*
172  * linux_workqueue_fini()
173  *
174  *	Destroy the Linux workqueue subsystem.  Never fails.
175  */
176 void
177 linux_workqueue_fini(void)
178 {
179 
180 	destroy_workqueue(system_power_efficient_wq);
181 	destroy_workqueue(system_long_wq);
182 	destroy_workqueue(system_wq);
183 	lwp_specific_key_delete(workqueue_key);
184 }
185 
186 /*
187  * Workqueues
188  */
189 
190 /*
191  * alloc_ordered_workqueue(name, flags)
192  *
193  *	Create a workqueue of the given name.  No flags are currently
194  *	defined.  Return NULL on failure, pointer to struct
195  *	workqueue_struct object on success.
196  */
197 struct workqueue_struct *
198 alloc_ordered_workqueue(const char *name, int flags)
199 {
200 	struct workqueue_struct *wq;
201 	int error;
202 
203 	KASSERT(flags == 0);
204 
205 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
206 
207 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
208 	cv_init(&wq->wq_cv, name);
209 	TAILQ_INIT(&wq->wq_delayed);
210 	TAILQ_INIT(&wq->wq_queue);
211 	TAILQ_INIT(&wq->wq_dqueue);
212 	wq->wq_current_work = NULL;
213 	wq->wq_flags = 0;
214 	wq->wq_dying = false;
215 	wq->wq_gen = 0;
216 	wq->wq_lwp = NULL;
217 
218 	error = kthread_create(PRI_NONE,
219 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
220 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
221 	if (error)
222 		goto fail0;
223 
224 	return wq;
225 
226 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
227 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
228 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
229 	cv_destroy(&wq->wq_cv);
230 	mutex_destroy(&wq->wq_lock);
231 	kmem_free(wq, sizeof(*wq));
232 	return NULL;
233 }
234 
235 /*
236  * destroy_workqueue(wq)
237  *
238  *	Destroy a workqueue created with wq.  Cancel any pending
239  *	delayed work.  Wait for all queued work to complete.
240  *
241  *	May sleep.
242  */
243 void
244 destroy_workqueue(struct workqueue_struct *wq)
245 {
246 
247 	/*
248 	 * Cancel all delayed work.  We do this first because any
249 	 * delayed work that that has already timed out, which we can't
250 	 * cancel, may have queued new work.
251 	 */
252 	mutex_enter(&wq->wq_lock);
253 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
254 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
255 
256 		KASSERT(work_queue(&dw->work) == wq);
257 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
258 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
259 			dw->dw_state == DELAYED_WORK_CANCELLED),
260 		    "delayed work %p in bad state: %d",
261 		    dw, dw->dw_state);
262 
263 		/*
264 		 * Mark it cancelled and try to stop the callout before
265 		 * it starts.
266 		 *
267 		 * If it's too late and the callout has already begun
268 		 * to execute, then it will notice that we asked to
269 		 * cancel it and remove itself from the queue before
270 		 * returning.
271 		 *
272 		 * If we stopped the callout before it started,
273 		 * however, then we can safely destroy the callout and
274 		 * dissociate it from the workqueue ourselves.
275 		 */
276 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
277 		dw->dw_state = DELAYED_WORK_CANCELLED;
278 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
279 			cancel_delayed_work_done(wq, dw);
280 	}
281 	mutex_exit(&wq->wq_lock);
282 
283 	/*
284 	 * At this point, no new work can be put on the queue.
285 	 */
286 
287 	/* Tell the thread to exit.  */
288 	mutex_enter(&wq->wq_lock);
289 	wq->wq_dying = true;
290 	cv_broadcast(&wq->wq_cv);
291 	mutex_exit(&wq->wq_lock);
292 
293 	/* Wait for it to exit.  */
294 	(void)kthread_join(wq->wq_lwp);
295 
296 	KASSERT(wq->wq_dying);
297 	KASSERT(wq->wq_flags == 0);
298 	KASSERT(wq->wq_current_work == NULL);
299 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
300 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
301 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
302 	cv_destroy(&wq->wq_cv);
303 	mutex_destroy(&wq->wq_lock);
304 
305 	kmem_free(wq, sizeof(*wq));
306 }
307 
308 /*
309  * Work thread and callout
310  */
311 
312 /*
313  * linux_workqueue_thread(cookie)
314  *
315  *	Main function for a workqueue's worker thread.  Waits until
316  *	there is work queued, grabs a batch of work off the queue,
317  *	executes it all, bumps the generation number, and repeats,
318  *	until dying.
319  */
320 static void __dead
321 linux_workqueue_thread(void *cookie)
322 {
323 	struct workqueue_struct *const wq = cookie;
324 	struct work_head queue, dqueue;
325 	struct work_head *const q[2] = { &queue, &dqueue };
326 	unsigned i;
327 
328 	lwp_setspecific(workqueue_key, wq);
329 
330 	mutex_enter(&wq->wq_lock);
331 	for (;;) {
332 		/*
333 		 * Wait until there's activity.  If there's no work and
334 		 * we're dying, stop here.
335 		 */
336 		if (TAILQ_EMPTY(&wq->wq_queue) &&
337 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
338 			if (wq->wq_dying)
339 				break;
340 			cv_wait(&wq->wq_cv, &wq->wq_lock);
341 			continue;
342 		}
343 
344 		/* Grab a batch of work off the queue.  */
345 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
346 		TAILQ_INIT(&queue);
347 		TAILQ_INIT(&dqueue);
348 		TAILQ_CONCAT(&queue, &wq->wq_queue, work_entry);
349 		TAILQ_CONCAT(&dqueue, &wq->wq_dqueue, work_entry);
350 
351 		/* Process each work item in the batch.  */
352 		for (i = 0; i < 2; i++) {
353 			while (!TAILQ_EMPTY(q[i])) {
354 				struct work_struct *work = TAILQ_FIRST(q[i]);
355 				void (*func)(struct work_struct *);
356 
357 				KASSERT(work_queue(work) == wq);
358 				KASSERT(work_claimed(work, wq));
359 				KASSERTMSG((q[i] != &dqueue ||
360 					container_of(work, struct delayed_work,
361 					    work)->dw_state ==
362 					DELAYED_WORK_IDLE),
363 				    "delayed work %p queued and scheduled",
364 				    work);
365 
366 				TAILQ_REMOVE(q[i], work, work_entry);
367 				KASSERT(wq->wq_current_work == NULL);
368 				wq->wq_current_work = work;
369 				func = work->func;
370 				release_work(work, wq);
371 				/* Can't dereference work after this point.  */
372 
373 				mutex_exit(&wq->wq_lock);
374 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
375 				(*func)(work);
376 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
377 				mutex_enter(&wq->wq_lock);
378 
379 				KASSERT(wq->wq_current_work == work);
380 				wq->wq_current_work = NULL;
381 				cv_broadcast(&wq->wq_cv);
382 			}
383 		}
384 
385 		/* Notify flush that we've completed a batch of work.  */
386 		wq->wq_gen++;
387 		cv_broadcast(&wq->wq_cv);
388 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
389 	}
390 	mutex_exit(&wq->wq_lock);
391 
392 	kthread_exit(0);
393 }
394 
395 /*
396  * linux_workqueue_timeout(cookie)
397  *
398  *	Delayed work timeout callback.
399  *
400  *	- If scheduled, queue it.
401  *	- If rescheduled, callout_schedule ourselves again.
402  *	- If cancelled, destroy the callout and release the work from
403  *        the workqueue.
404  */
405 static void
406 linux_workqueue_timeout(void *cookie)
407 {
408 	struct delayed_work *const dw = cookie;
409 	struct workqueue_struct *const wq = work_queue(&dw->work);
410 
411 	KASSERTMSG(wq != NULL,
412 	    "delayed work %p state %d resched %d",
413 	    dw, dw->dw_state, dw->dw_resched);
414 
415 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
416 
417 	mutex_enter(&wq->wq_lock);
418 	KASSERT(work_queue(&dw->work) == wq);
419 	switch (dw->dw_state) {
420 	case DELAYED_WORK_IDLE:
421 		panic("delayed work callout uninitialized: %p", dw);
422 	case DELAYED_WORK_SCHEDULED:
423 		dw_callout_destroy(wq, dw);
424 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
425 		cv_broadcast(&wq->wq_cv);
426 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
427 		break;
428 	case DELAYED_WORK_RESCHEDULED:
429 		KASSERT(dw->dw_resched >= 0);
430 		callout_schedule(&dw->dw_callout, dw->dw_resched);
431 		dw->dw_state = DELAYED_WORK_SCHEDULED;
432 		dw->dw_resched = -1;
433 		break;
434 	case DELAYED_WORK_CANCELLED:
435 		cancel_delayed_work_done(wq, dw);
436 		/* Can't dereference dw after this point.  */
437 		goto out;
438 	default:
439 		panic("delayed work callout in bad state: %p", dw);
440 	}
441 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
442 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
443 out:	mutex_exit(&wq->wq_lock);
444 }
445 
446 /*
447  * current_work()
448  *
449  *	If in a workqueue worker thread, return the work it is
450  *	currently executing.  Otherwise return NULL.
451  */
452 struct work_struct *
453 current_work(void)
454 {
455 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
456 
457 	/* If we're not a workqueue thread, then there's no work.  */
458 	if (wq == NULL)
459 		return NULL;
460 
461 	/*
462 	 * Otherwise, this should be possible only while work is in
463 	 * progress.  Return the current work item.
464 	 */
465 	KASSERT(wq->wq_current_work != NULL);
466 	return wq->wq_current_work;
467 }
468 
469 /*
470  * Work
471  */
472 
473 /*
474  * INIT_WORK(work, fn)
475  *
476  *	Initialize work for use with a workqueue to call fn in a worker
477  *	thread.  There is no corresponding destruction operation.
478  */
479 void
480 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
481 {
482 
483 	work->work_owner = 0;
484 	work->func = fn;
485 }
486 
487 /*
488  * work_claimed(work, wq)
489  *
490  *	True if work is currently claimed by a workqueue, meaning it is
491  *	either on the queue or scheduled in a callout.  The workqueue
492  *	must be wq, and caller must hold wq's lock.
493  */
494 static bool
495 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
496 {
497 
498 	KASSERT(work_queue(work) == wq);
499 	KASSERT(mutex_owned(&wq->wq_lock));
500 
501 	return work->work_owner & 1;
502 }
503 
504 /*
505  * work_queue(work)
506  *
507  *	Return the last queue that work was queued on, or NULL if it
508  *	was never queued.
509  */
510 static struct workqueue_struct *
511 work_queue(struct work_struct *work)
512 {
513 
514 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
515 }
516 
517 /*
518  * acquire_work(work, wq)
519  *
520  *	Try to claim work for wq.  If work is already claimed, it must
521  *	be claimed by wq; return false.  If work is not already
522  *	claimed, claim it, issue a memory barrier to match any prior
523  *	release_work, and return true.
524  *
525  *	Caller must hold wq's lock.
526  */
527 static bool
528 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
529 {
530 	uintptr_t owner0, owner;
531 
532 	KASSERT(mutex_owned(&wq->wq_lock));
533 	KASSERT(((uintptr_t)wq & 1) == 0);
534 
535 	owner = (uintptr_t)wq | 1;
536 	do {
537 		owner0 = work->work_owner;
538 		if (owner0 & 1) {
539 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
540 			return false;
541 		}
542 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
543 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
544 	    owner0);
545 
546 	KASSERT(work_queue(work) == wq);
547 	membar_enter();
548 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
549 	return true;
550 }
551 
552 /*
553  * release_work(work, wq)
554  *
555  *	Issue a memory barrier to match any subsequent acquire_work and
556  *	dissociate work from wq.
557  *
558  *	Caller must hold wq's lock and work must be associated with wq.
559  */
560 static void
561 release_work(struct work_struct *work, struct workqueue_struct *wq)
562 {
563 
564 	KASSERT(work_queue(work) == wq);
565 	KASSERT(mutex_owned(&wq->wq_lock));
566 
567 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
568 	membar_exit();
569 
570 	/*
571 	 * Non-interlocked r/m/w is safe here because nobody else can
572 	 * write to this while the claimed bit is setand the workqueue
573 	 * lock is held.
574 	 */
575 	work->work_owner &= ~(uintptr_t)1;
576 }
577 
578 /*
579  * schedule_work(work)
580  *
581  *	If work is not already queued on system_wq, queue it to be run
582  *	by system_wq's worker thread when it next can.  True if it was
583  *	newly queued, false if it was already queued.  If the work was
584  *	already running, queue it to run again.
585  *
586  *	Caller must ensure work is not queued to run on a different
587  *	workqueue.
588  */
589 bool
590 schedule_work(struct work_struct *work)
591 {
592 
593 	return queue_work(system_wq, work);
594 }
595 
596 /*
597  * queue_work(wq, work)
598  *
599  *	If work is not already queued on wq, queue it to be run by wq's
600  *	worker thread when it next can.  True if it was newly queued,
601  *	false if it was already queued.  If the work was already
602  *	running, queue it to run again.
603  *
604  *	Caller must ensure work is not queued to run on a different
605  *	workqueue.
606  */
607 bool
608 queue_work(struct workqueue_struct *wq, struct work_struct *work)
609 {
610 	bool newly_queued;
611 
612 	KASSERT(wq != NULL);
613 
614 	mutex_enter(&wq->wq_lock);
615 	if (__predict_true(acquire_work(work, wq))) {
616 		/*
617 		 * It wasn't on any workqueue at all.  Put it on this
618 		 * one, and signal the worker thread that there is work
619 		 * to do.
620 		 */
621 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
622 		cv_broadcast(&wq->wq_cv);
623 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
624 		newly_queued = true;
625 	} else {
626 		/*
627 		 * It was already on this workqueue.  Nothing to do
628 		 * since it is already queued.
629 		 */
630 		newly_queued = false;
631 	}
632 	mutex_exit(&wq->wq_lock);
633 
634 	return newly_queued;
635 }
636 
637 /*
638  * cancel_work(work)
639  *
640  *	If work was queued, remove it from the queue and return true.
641  *	If work was not queued, return false.  Work may still be
642  *	running when this returns.
643  */
644 bool
645 cancel_work(struct work_struct *work)
646 {
647 	struct workqueue_struct *wq;
648 	bool cancelled_p = false;
649 
650 	/* If there's no workqueue, nothing to cancel.   */
651 	if ((wq = work_queue(work)) == NULL)
652 		goto out;
653 
654 	mutex_enter(&wq->wq_lock);
655 	if (__predict_false(work_queue(work) != wq)) {
656 		/*
657 		 * It has finished execution or been cancelled by
658 		 * another thread, and has been moved off the
659 		 * workqueue, so it's too to cancel.
660 		 */
661 		cancelled_p = false;
662 	} else {
663 		/* Check whether it's on the queue.  */
664 		if (work_claimed(work, wq)) {
665 			/*
666 			 * It is still on the queue.  Take it off the
667 			 * queue and report successful cancellation.
668 			 */
669 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
670 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
671 			release_work(work, wq);
672 			/* Can't dereference work after this point.  */
673 			cancelled_p = true;
674 		} else {
675 			/* Not on the queue.  Couldn't cancel it.  */
676 			cancelled_p = false;
677 		}
678 	}
679 	mutex_exit(&wq->wq_lock);
680 
681 out:	return cancelled_p;
682 }
683 
684 /*
685  * cancel_work_sync(work)
686  *
687  *	If work was queued, remove it from the queue and return true.
688  *	If work was not queued, return false.  Either way, if work is
689  *	currently running, wait for it to complete.
690  *
691  *	May sleep.
692  */
693 bool
694 cancel_work_sync(struct work_struct *work)
695 {
696 	struct workqueue_struct *wq;
697 	bool cancelled_p = false;
698 
699 	/* If there's no workqueue, nothing to cancel.   */
700 	if ((wq = work_queue(work)) == NULL)
701 		goto out;
702 
703 	mutex_enter(&wq->wq_lock);
704 	if (__predict_false(work_queue(work) != wq)) {
705 		/*
706 		 * It has finished execution or been cancelled by
707 		 * another thread, and has been moved off the
708 		 * workqueue, so it's too late to cancel.
709 		 */
710 		cancelled_p = false;
711 	} else {
712 		/* Check whether it's on the queue.  */
713 		if (work_claimed(work, wq)) {
714 			/*
715 			 * It is still on the queue.  Take it off the
716 			 * queue and report successful cancellation.
717 			 */
718 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
719 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
720 			release_work(work, wq);
721 			/* Can't dereference work after this point.  */
722 			cancelled_p = true;
723 		} else {
724 			/* Not on the queue.  Couldn't cancel it.  */
725 			cancelled_p = false;
726 		}
727 		/* If it's still running, wait for it to complete.  */
728 		if (wq->wq_current_work == work)
729 			wait_for_current_work(work, wq);
730 	}
731 	mutex_exit(&wq->wq_lock);
732 
733 out:	return cancelled_p;
734 }
735 
736 /*
737  * wait_for_current_work(work, wq)
738  *
739  *	wq must be currently executing work.  Wait for it to finish.
740  *
741  *	Does not dereference work.
742  */
743 static void
744 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
745 {
746 	uint64_t gen;
747 
748 	KASSERT(mutex_owned(&wq->wq_lock));
749 	KASSERT(wq->wq_current_work == work);
750 
751 	/* Wait only one generation in case it gets requeued quickly.  */
752 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
753 	gen = wq->wq_gen;
754 	do {
755 		cv_wait(&wq->wq_cv, &wq->wq_lock);
756 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
757 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
758 }
759 
760 /*
761  * Delayed work
762  */
763 
764 /*
765  * INIT_DELAYED_WORK(dw, fn)
766  *
767  *	Initialize dw for use with a workqueue to call fn in a worker
768  *	thread after a delay.  There is no corresponding destruction
769  *	operation.
770  */
771 void
772 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
773 {
774 
775 	INIT_WORK(&dw->work, fn);
776 	dw->dw_state = DELAYED_WORK_IDLE;
777 	dw->dw_resched = -1;
778 
779 	/*
780 	 * Defer callout_init until we are going to schedule the
781 	 * callout, which can then callout_destroy it, because
782 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
783 	 * we have no opportunity to call callout_destroy.
784 	 */
785 }
786 
787 /*
788  * schedule_delayed_work(dw, ticks)
789  *
790  *	If it is not currently scheduled, schedule dw to run after
791  *	ticks on system_wq.  If currently executing and not already
792  *	rescheduled, reschedule it.  True if it was newly scheduled,
793  *	false if it was already scheduled.
794  *
795  *	If ticks == 0, queue it to run as soon as the worker can,
796  *	without waiting for the next callout tick to run.
797  */
798 bool
799 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
800 {
801 
802 	return queue_delayed_work(system_wq, dw, ticks);
803 }
804 
805 /*
806  * dw_callout_init(wq, dw)
807  *
808  *	Initialize the callout of dw and transition to
809  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
810  */
811 static void
812 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
813 {
814 
815 	KASSERT(mutex_owned(&wq->wq_lock));
816 	KASSERT(work_queue(&dw->work) == wq);
817 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
818 
819 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
820 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
821 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
822 	dw->dw_state = DELAYED_WORK_SCHEDULED;
823 }
824 
825 /*
826  * dw_callout_destroy(wq, dw)
827  *
828  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
829  */
830 static void
831 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
832 {
833 
834 	KASSERT(mutex_owned(&wq->wq_lock));
835 	KASSERT(work_queue(&dw->work) == wq);
836 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
837 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
838 	    dw->dw_state == DELAYED_WORK_CANCELLED);
839 
840 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
841 	callout_destroy(&dw->dw_callout);
842 	dw->dw_resched = -1;
843 	dw->dw_state = DELAYED_WORK_IDLE;
844 }
845 
846 /*
847  * cancel_delayed_work_done(wq, dw)
848  *
849  *	Complete cancellation of a delayed work: transition from
850  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
851  *	workqueue.  Caller must not dereference dw after this returns.
852  */
853 static void
854 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
855 {
856 
857 	KASSERT(mutex_owned(&wq->wq_lock));
858 	KASSERT(work_queue(&dw->work) == wq);
859 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
860 
861 	dw_callout_destroy(wq, dw);
862 	release_work(&dw->work, wq);
863 	/* Can't dereference dw after this point.  */
864 }
865 
866 /*
867  * queue_delayed_work(wq, dw, ticks)
868  *
869  *	If it is not currently scheduled, schedule dw to run after
870  *	ticks on wq.  If currently queued, remove it from the queue
871  *	first.
872  *
873  *	If ticks == 0, queue it to run as soon as the worker can,
874  *	without waiting for the next callout tick to run.
875  */
876 bool
877 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
878     unsigned long ticks)
879 {
880 	bool newly_queued;
881 
882 	mutex_enter(&wq->wq_lock);
883 	if (__predict_true(acquire_work(&dw->work, wq))) {
884 		/*
885 		 * It wasn't on any workqueue at all.  Schedule it to
886 		 * run on this one.
887 		 */
888 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
889 		if (ticks == 0) {
890 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
891 			    work_entry);
892 			cv_broadcast(&wq->wq_cv);
893 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
894 		} else {
895 			/*
896 			 * Initialize a callout and schedule to run
897 			 * after a delay.
898 			 */
899 			dw_callout_init(wq, dw);
900 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
901 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
902 		}
903 		newly_queued = true;
904 	} else {
905 		/* It was already on this workqueue.  */
906 		switch (dw->dw_state) {
907 		case DELAYED_WORK_IDLE:
908 		case DELAYED_WORK_SCHEDULED:
909 		case DELAYED_WORK_RESCHEDULED:
910 			/* On the queue or already scheduled.  Leave it.  */
911 			newly_queued = false;
912 			break;
913 		case DELAYED_WORK_CANCELLED:
914 			/*
915 			 * Scheduled and the callout began, but it was
916 			 * cancelled.  Reschedule it.
917 			 */
918 			if (ticks == 0) {
919 				dw->dw_state = DELAYED_WORK_SCHEDULED;
920 				SDT_PROBE2(sdt, linux, work, queue,
921 				    &dw->work, wq);
922 			} else {
923 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
924 				dw->dw_resched = MIN(INT_MAX, ticks);
925 				SDT_PROBE3(sdt, linux, work, schedule,
926 				    dw, wq, ticks);
927 			}
928 			newly_queued = true;
929 			break;
930 		default:
931 			panic("invalid delayed work state: %d",
932 			    dw->dw_state);
933 		}
934 	}
935 	mutex_exit(&wq->wq_lock);
936 
937 	return newly_queued;
938 }
939 
940 /*
941  * mod_delayed_work(wq, dw, ticks)
942  *
943  *	Schedule dw to run after ticks.  If scheduled or queued,
944  *	reschedule.  If ticks == 0, run without delay.
945  *
946  *	True if it modified the timer of an already scheduled work,
947  *	false if it newly scheduled the work.
948  */
949 bool
950 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
951     unsigned long ticks)
952 {
953 	bool timer_modified;
954 
955 	mutex_enter(&wq->wq_lock);
956 	if (acquire_work(&dw->work, wq)) {
957 		/*
958 		 * It wasn't on any workqueue at all.  Schedule it to
959 		 * run on this one.
960 		 */
961 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
962 		if (ticks == 0) {
963 			/*
964 			 * Run immediately: put it on the queue and
965 			 * signal the worker thread.
966 			 */
967 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
968 			    work_entry);
969 			cv_broadcast(&wq->wq_cv);
970 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
971 		} else {
972 			/*
973 			 * Initialize a callout and schedule to run
974 			 * after a delay.
975 			 */
976 			dw_callout_init(wq, dw);
977 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
978 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
979 		}
980 		timer_modified = false;
981 	} else {
982 		/* It was already on this workqueue.  */
983 		switch (dw->dw_state) {
984 		case DELAYED_WORK_IDLE:
985 			/* On the queue.  */
986 			if (ticks == 0) {
987 				/* Leave it be.  */
988 				SDT_PROBE2(sdt, linux, work, cancel,
989 				    &dw->work, wq);
990 				SDT_PROBE2(sdt, linux, work, queue,
991 				    &dw->work, wq);
992 			} else {
993 				/* Remove from the queue and schedule.  */
994 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
995 				    work_entry);
996 				dw_callout_init(wq, dw);
997 				callout_schedule(&dw->dw_callout,
998 				    MIN(INT_MAX, ticks));
999 				SDT_PROBE2(sdt, linux, work, cancel,
1000 				    &dw->work, wq);
1001 				SDT_PROBE3(sdt, linux, work, schedule,
1002 				    dw, wq, ticks);
1003 			}
1004 			timer_modified = true;
1005 			break;
1006 		case DELAYED_WORK_SCHEDULED:
1007 			/*
1008 			 * It is scheduled to run after a delay.  Try
1009 			 * to stop it and reschedule it; if we can't,
1010 			 * either reschedule it or cancel it to put it
1011 			 * on the queue, and inform the callout.
1012 			 */
1013 			if (callout_stop(&dw->dw_callout)) {
1014 				/* Can't stop, callout has begun.  */
1015 				if (ticks == 0) {
1016 					/*
1017 					 * We don't actually need to do
1018 					 * anything.  The callout will
1019 					 * queue it as soon as it gets
1020 					 * the lock.
1021 					 */
1022 					SDT_PROBE2(sdt, linux, work, cancel,
1023 					    &dw->work, wq);
1024 					SDT_PROBE2(sdt, linux, work, queue,
1025 					    &dw->work, wq);
1026 				} else {
1027 					/* Ask the callout to reschedule.  */
1028 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1029 					dw->dw_resched = MIN(INT_MAX, ticks);
1030 					SDT_PROBE2(sdt, linux, work, cancel,
1031 					    &dw->work, wq);
1032 					SDT_PROBE3(sdt, linux, work, schedule,
1033 					    dw, wq, ticks);
1034 				}
1035 			} else {
1036 				/* We stopped the callout before it began.  */
1037 				if (ticks == 0) {
1038 					/*
1039 					 * Run immediately: destroy the
1040 					 * callout, put it on the
1041 					 * queue, and signal the worker
1042 					 * thread.
1043 					 */
1044 					dw_callout_destroy(wq, dw);
1045 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1046 					    &dw->work, work_entry);
1047 					cv_broadcast(&wq->wq_cv);
1048 					SDT_PROBE2(sdt, linux, work, cancel,
1049 					    &dw->work, wq);
1050 					SDT_PROBE2(sdt, linux, work, queue,
1051 					    &dw->work, wq);
1052 				} else {
1053 					/*
1054 					 * Reschedule the callout.  No
1055 					 * state change.
1056 					 */
1057 					callout_schedule(&dw->dw_callout,
1058 					    MIN(INT_MAX, ticks));
1059 					SDT_PROBE2(sdt, linux, work, cancel,
1060 					    &dw->work, wq);
1061 					SDT_PROBE3(sdt, linux, work, schedule,
1062 					    dw, wq, ticks);
1063 				}
1064 			}
1065 			timer_modified = true;
1066 			break;
1067 		case DELAYED_WORK_RESCHEDULED:
1068 			/*
1069 			 * Someone rescheduled it after the callout
1070 			 * started but before the poor thing even had a
1071 			 * chance to acquire the lock.
1072 			 */
1073 			if (ticks == 0) {
1074 				/*
1075 				 * We can just switch back to
1076 				 * DELAYED_WORK_SCHEDULED so that the
1077 				 * callout will queue the work as soon
1078 				 * as it gets the lock.
1079 				 */
1080 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1081 				dw->dw_resched = -1;
1082 				SDT_PROBE2(sdt, linux, work, cancel,
1083 				    &dw->work, wq);
1084 				SDT_PROBE2(sdt, linux, work, queue,
1085 				    &dw->work, wq);
1086 			} else {
1087 				/* Change the rescheduled time.  */
1088 				dw->dw_resched = ticks;
1089 				SDT_PROBE2(sdt, linux, work, cancel,
1090 				    &dw->work, wq);
1091 				SDT_PROBE3(sdt, linux, work, schedule,
1092 				    dw, wq, ticks);
1093 			}
1094 			timer_modified = true;
1095 			break;
1096 		case DELAYED_WORK_CANCELLED:
1097 			/*
1098 			 * Someone cancelled it after the callout
1099 			 * started but before the poor thing even had a
1100 			 * chance to acquire the lock.
1101 			 */
1102 			if (ticks == 0) {
1103 				/*
1104 				 * We can just switch back to
1105 				 * DELAYED_WORK_SCHEDULED so that the
1106 				 * callout will queue the work as soon
1107 				 * as it gets the lock.
1108 				 */
1109 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1110 				SDT_PROBE2(sdt, linux, work, queue,
1111 				    &dw->work, wq);
1112 			} else {
1113 				/* Ask it to reschedule.  */
1114 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1115 				dw->dw_resched = MIN(INT_MAX, ticks);
1116 				SDT_PROBE3(sdt, linux, work, schedule,
1117 				    dw, wq, ticks);
1118 			}
1119 			timer_modified = false;
1120 			break;
1121 		default:
1122 			panic("invalid delayed work state: %d", dw->dw_state);
1123 		}
1124 	}
1125 	mutex_exit(&wq->wq_lock);
1126 
1127 	return timer_modified;
1128 }
1129 
1130 /*
1131  * cancel_delayed_work(dw)
1132  *
1133  *	If work was scheduled or queued, remove it from the schedule or
1134  *	queue and return true.  If work was not scheduled or queued,
1135  *	return false.  Note that work may already be running; if it
1136  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1137  *	will return false, and either way, cancel_delayed_work will NOT
1138  *	wait for the work to complete.
1139  */
1140 bool
1141 cancel_delayed_work(struct delayed_work *dw)
1142 {
1143 	struct workqueue_struct *wq;
1144 	bool cancelled_p;
1145 
1146 	/* If there's no workqueue, nothing to cancel.   */
1147 	if ((wq = work_queue(&dw->work)) == NULL)
1148 		return false;
1149 
1150 	mutex_enter(&wq->wq_lock);
1151 	if (__predict_false(work_queue(&dw->work) != wq)) {
1152 		cancelled_p = false;
1153 	} else {
1154 		switch (dw->dw_state) {
1155 		case DELAYED_WORK_IDLE:
1156 			/*
1157 			 * It is either on the queue or already running
1158 			 * or both.
1159 			 */
1160 			if (work_claimed(&dw->work, wq)) {
1161 				/* On the queue.  Remove and release.  */
1162 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1163 				    work_entry);
1164 				SDT_PROBE2(sdt, linux, work, cancel,
1165 				    &dw->work, wq);
1166 				release_work(&dw->work, wq);
1167 				/* Can't dereference dw after this point.  */
1168 				cancelled_p = true;
1169 			} else {
1170 				/* Not on the queue, so didn't cancel.  */
1171 				cancelled_p = false;
1172 			}
1173 			break;
1174 		case DELAYED_WORK_SCHEDULED:
1175 			/*
1176 			 * If it is scheduled, mark it cancelled and
1177 			 * try to stop the callout before it starts.
1178 			 *
1179 			 * If it's too late and the callout has already
1180 			 * begun to execute, tough.
1181 			 *
1182 			 * If we stopped the callout before it started,
1183 			 * however, then destroy the callout and
1184 			 * dissociate it from the workqueue ourselves.
1185 			 */
1186 			dw->dw_state = DELAYED_WORK_CANCELLED;
1187 			cancelled_p = true;
1188 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1189 			if (!callout_stop(&dw->dw_callout))
1190 				cancel_delayed_work_done(wq, dw);
1191 			break;
1192 		case DELAYED_WORK_RESCHEDULED:
1193 			/*
1194 			 * If it is being rescheduled, the callout has
1195 			 * already fired.  We must ask it to cancel.
1196 			 */
1197 			dw->dw_state = DELAYED_WORK_CANCELLED;
1198 			dw->dw_resched = -1;
1199 			cancelled_p = true;
1200 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1201 			break;
1202 		case DELAYED_WORK_CANCELLED:
1203 			/*
1204 			 * If it is being cancelled, the callout has
1205 			 * already fired.  There is nothing more for us
1206 			 * to do.  Someone else claims credit for
1207 			 * cancelling it.
1208 			 */
1209 			cancelled_p = false;
1210 			break;
1211 		default:
1212 			panic("invalid delayed work state: %d",
1213 			    dw->dw_state);
1214 		}
1215 	}
1216 	mutex_exit(&wq->wq_lock);
1217 
1218 	return cancelled_p;
1219 }
1220 
1221 /*
1222  * cancel_delayed_work_sync(dw)
1223  *
1224  *	If work was scheduled or queued, remove it from the schedule or
1225  *	queue and return true.  If work was not scheduled or queued,
1226  *	return false.  Note that work may already be running; if it
1227  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1228  *	will return false; either way, wait for it to complete.
1229  */
1230 bool
1231 cancel_delayed_work_sync(struct delayed_work *dw)
1232 {
1233 	struct workqueue_struct *wq;
1234 	bool cancelled_p;
1235 
1236 	/* If there's no workqueue, nothing to cancel.  */
1237 	if ((wq = work_queue(&dw->work)) == NULL)
1238 		return false;
1239 
1240 	mutex_enter(&wq->wq_lock);
1241 	if (__predict_false(work_queue(&dw->work) != wq)) {
1242 		cancelled_p = false;
1243 	} else {
1244 		switch (dw->dw_state) {
1245 		case DELAYED_WORK_IDLE:
1246 			/*
1247 			 * It is either on the queue or already running
1248 			 * or both.
1249 			 */
1250 			if (work_claimed(&dw->work, wq)) {
1251 				/* On the queue.  Remove and release.  */
1252 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1253 				    work_entry);
1254 				SDT_PROBE2(sdt, linux, work, cancel,
1255 				    &dw->work, wq);
1256 				release_work(&dw->work, wq);
1257 				/* Can't dereference dw after this point.  */
1258 				cancelled_p = true;
1259 			} else {
1260 				/* Not on the queue, so didn't cancel. */
1261 				cancelled_p = false;
1262 			}
1263 			/* If it's still running, wait for it to complete.  */
1264 			if (wq->wq_current_work == &dw->work)
1265 				wait_for_current_work(&dw->work, wq);
1266 			break;
1267 		case DELAYED_WORK_SCHEDULED:
1268 			/*
1269 			 * If it is scheduled, mark it cancelled and
1270 			 * try to stop the callout before it starts.
1271 			 *
1272 			 * If it's too late and the callout has already
1273 			 * begun to execute, we must wait for it to
1274 			 * complete.  But we got in soon enough to ask
1275 			 * the callout not to run, so we successfully
1276 			 * cancelled it in that case.
1277 			 *
1278 			 * If we stopped the callout before it started,
1279 			 * then we must destroy the callout and
1280 			 * dissociate it from the workqueue ourselves.
1281 			 */
1282 			dw->dw_state = DELAYED_WORK_CANCELLED;
1283 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1284 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1285 				cancel_delayed_work_done(wq, dw);
1286 			cancelled_p = true;
1287 			break;
1288 		case DELAYED_WORK_RESCHEDULED:
1289 			/*
1290 			 * If it is being rescheduled, the callout has
1291 			 * already fired.  We must ask it to cancel and
1292 			 * wait for it to complete.
1293 			 */
1294 			dw->dw_state = DELAYED_WORK_CANCELLED;
1295 			dw->dw_resched = -1;
1296 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1297 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1298 			cancelled_p = true;
1299 			break;
1300 		case DELAYED_WORK_CANCELLED:
1301 			/*
1302 			 * If it is being cancelled, the callout has
1303 			 * already fired.  We need only wait for it to
1304 			 * complete.  Someone else, however, claims
1305 			 * credit for cancelling it.
1306 			 */
1307 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1308 			cancelled_p = false;
1309 			break;
1310 		default:
1311 			panic("invalid delayed work state: %d",
1312 			    dw->dw_state);
1313 		}
1314 	}
1315 	mutex_exit(&wq->wq_lock);
1316 
1317 	return cancelled_p;
1318 }
1319 
1320 /*
1321  * Flush
1322  */
1323 
1324 /*
1325  * flush_scheduled_work()
1326  *
1327  *	Wait for all work queued on system_wq to complete.  This does
1328  *	not include delayed work.
1329  */
1330 void
1331 flush_scheduled_work(void)
1332 {
1333 
1334 	flush_workqueue(system_wq);
1335 }
1336 
1337 /*
1338  * flush_workqueue_locked(wq)
1339  *
1340  *	Wait for all work queued on wq to complete.  This does not
1341  *	include delayed work.
1342  *
1343  *	Caller must hold wq's lock.
1344  */
1345 static void
1346 flush_workqueue_locked(struct workqueue_struct *wq)
1347 {
1348 	uint64_t gen;
1349 
1350 	KASSERT(mutex_owned(&wq->wq_lock));
1351 
1352 	/* Get the current generation number.  */
1353 	gen = wq->wq_gen;
1354 
1355 	/*
1356 	 * If there's a batch of work in progress, we must wait for the
1357 	 * worker thread to finish that batch.
1358 	 */
1359 	if (wq->wq_current_work != NULL)
1360 		gen++;
1361 
1362 	/*
1363 	 * If there's any work yet to be claimed from the queue by the
1364 	 * worker thread, we must wait for it to finish one more batch
1365 	 * too.
1366 	 */
1367 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue))
1368 		gen++;
1369 
1370 	/* Wait until the generation number has caught up.  */
1371 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1372 	while (wq->wq_gen < gen)
1373 		cv_wait(&wq->wq_cv, &wq->wq_lock);
1374 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1375 }
1376 
1377 /*
1378  * flush_workqueue(wq)
1379  *
1380  *	Wait for all work queued on wq to complete.  This does not
1381  *	include delayed work.
1382  */
1383 void
1384 flush_workqueue(struct workqueue_struct *wq)
1385 {
1386 
1387 	mutex_enter(&wq->wq_lock);
1388 	flush_workqueue_locked(wq);
1389 	mutex_exit(&wq->wq_lock);
1390 }
1391 
1392 /*
1393  * flush_work(work)
1394  *
1395  *	If work is queued or currently executing, wait for it to
1396  *	complete.
1397  */
1398 void
1399 flush_work(struct work_struct *work)
1400 {
1401 	struct workqueue_struct *wq;
1402 
1403 	/* If there's no workqueue, nothing to flush.  */
1404 	if ((wq = work_queue(work)) == NULL)
1405 		return;
1406 
1407 	flush_workqueue(wq);
1408 }
1409 
1410 /*
1411  * flush_delayed_work(dw)
1412  *
1413  *	If dw is scheduled to run after a delay, queue it immediately
1414  *	instead.  Then, if dw is queued or currently executing, wait
1415  *	for it to complete.
1416  */
1417 void
1418 flush_delayed_work(struct delayed_work *dw)
1419 {
1420 	struct workqueue_struct *wq;
1421 
1422 	/* If there's no workqueue, nothing to flush.  */
1423 	if ((wq = work_queue(&dw->work)) == NULL)
1424 		return;
1425 
1426 	mutex_enter(&wq->wq_lock);
1427 	if (__predict_false(work_queue(&dw->work) != wq)) {
1428 		/*
1429 		 * Moved off the queue already (and possibly to another
1430 		 * queue, though that would be ill-advised), so it must
1431 		 * have completed, and we have nothing more to do.
1432 		 */
1433 	} else {
1434 		switch (dw->dw_state) {
1435 		case DELAYED_WORK_IDLE:
1436 			/*
1437 			 * It has a workqueue assigned and the callout
1438 			 * is idle, so it must be in progress or on the
1439 			 * queue.  In that case, we'll wait for it to
1440 			 * complete.
1441 			 */
1442 			break;
1443 		case DELAYED_WORK_SCHEDULED:
1444 		case DELAYED_WORK_RESCHEDULED:
1445 		case DELAYED_WORK_CANCELLED:
1446 			/*
1447 			 * The callout is scheduled, and may have even
1448 			 * started.  Mark it as scheduled so that if
1449 			 * the callout has fired it will queue the work
1450 			 * itself.  Try to stop the callout -- if we
1451 			 * can, queue the work now; if we can't, wait
1452 			 * for the callout to complete, which entails
1453 			 * queueing it.
1454 			 */
1455 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1456 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1457 				/*
1458 				 * We stopped it before it ran.  No
1459 				 * state change in the interim is
1460 				 * possible.  Destroy the callout and
1461 				 * queue it ourselves.
1462 				 */
1463 				KASSERT(dw->dw_state ==
1464 				    DELAYED_WORK_SCHEDULED);
1465 				dw_callout_destroy(wq, dw);
1466 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1467 				    work_entry);
1468 				cv_broadcast(&wq->wq_cv);
1469 				SDT_PROBE2(sdt, linux, work, queue,
1470 				    &dw->work, wq);
1471 			}
1472 			break;
1473 		default:
1474 			panic("invalid delayed work state: %d", dw->dw_state);
1475 		}
1476 		/*
1477 		 * Waiting for the whole queue to flush is overkill,
1478 		 * but doesn't hurt.
1479 		 */
1480 		flush_workqueue_locked(wq);
1481 	}
1482 	mutex_exit(&wq->wq_lock);
1483 }
1484