xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision 4f49735f7f6f31185cab75c0fc2a0dddedaa5297)
1 /*	$NetBSD: linux_work.c,v 1.54 2021/12/19 11:40:05 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.54 2021/12/19 11:40:05 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49 
50 #include <linux/workqueue.h>
51 
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54 
55 struct workqueue_struct {
56 	kmutex_t		wq_lock;
57 	kcondvar_t		wq_cv;
58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
59 	struct work_head	wq_queue;   /* work to run */
60 	struct work_head	wq_dqueue;  /* delayed work to run now */
61 	struct work_struct	*wq_current_work;
62 	int			wq_flags;
63 	bool			wq_dying;
64 	uint64_t		wq_gen;
65 	struct lwp		*wq_lwp;
66 	const char		*wq_name;
67 };
68 
69 static void __dead	linux_workqueue_thread(void *);
70 static void		linux_workqueue_timeout(void *);
71 static bool		work_claimed(struct work_struct *,
72 			    struct workqueue_struct *);
73 static struct workqueue_struct *
74 			work_queue(struct work_struct *);
75 static bool		acquire_work(struct work_struct *,
76 			    struct workqueue_struct *);
77 static void		release_work(struct work_struct *,
78 			    struct workqueue_struct *);
79 static void		wait_for_current_work(struct work_struct *,
80 			    struct workqueue_struct *);
81 static void		dw_callout_init(struct workqueue_struct *,
82 			    struct delayed_work *);
83 static void		dw_callout_destroy(struct workqueue_struct *,
84 			    struct delayed_work *);
85 static void		cancel_delayed_work_done(struct workqueue_struct *,
86 			    struct delayed_work *);
87 
88 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
89     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
90 SDT_PROBE_DEFINE2(sdt, linux, work, release,
91     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
92 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
93     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
94 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
95     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
96 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
97     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
98     "unsigned long"/*ticks*/);
99 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
101 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
102     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
103 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
104     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
105 SDT_PROBE_DEFINE2(sdt, linux, work, run,
106     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
107 SDT_PROBE_DEFINE2(sdt, linux, work, done,
108     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
109 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
110     "struct workqueue_struct *"/*wq*/);
111 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
112     "struct workqueue_struct *"/*wq*/);
113 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
114     "struct workqueue_struct *"/*wq*/);
115 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
116     "struct workqueue_struct *"/*wq*/);
117 
118 static specificdata_key_t workqueue_key __read_mostly;
119 
120 struct workqueue_struct	*system_highpri_wq __read_mostly;
121 struct workqueue_struct	*system_long_wq __read_mostly;
122 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
123 struct workqueue_struct	*system_unbound_wq __read_mostly;
124 struct workqueue_struct	*system_wq __read_mostly;
125 
126 static inline uintptr_t
127 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
128 {
129 
130 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
131 }
132 
133 /*
134  * linux_workqueue_init()
135  *
136  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
137  *	NetBSD error on failure.
138  */
139 static int
140 linux_workqueue_init0(void)
141 {
142 	int error;
143 
144 	error = lwp_specific_key_create(&workqueue_key, NULL);
145 	if (error)
146 		goto out;
147 
148 	system_highpri_wq = alloc_ordered_workqueue("lnxhipwq", 0);
149 	if (system_highpri_wq == NULL) {
150 		error = ENOMEM;
151 		goto out;
152 	}
153 
154 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
155 	if (system_long_wq == NULL) {
156 		error = ENOMEM;
157 		goto out;
158 	}
159 
160 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
161 	if (system_power_efficient_wq == NULL) {
162 		error = ENOMEM;
163 		goto out;
164 	}
165 
166 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
167 	if (system_unbound_wq == NULL) {
168 		error = ENOMEM;
169 		goto out;
170 	}
171 
172 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
173 	if (system_wq == NULL) {
174 		error = ENOMEM;
175 		goto out;
176 	}
177 
178 	/* Success!  */
179 	error = 0;
180 
181 out:	if (error) {
182 		if (system_highpri_wq)
183 			destroy_workqueue(system_highpri_wq);
184 		if (system_long_wq)
185 			destroy_workqueue(system_long_wq);
186 		if (system_power_efficient_wq)
187 			destroy_workqueue(system_power_efficient_wq);
188 		if (system_unbound_wq)
189 			destroy_workqueue(system_unbound_wq);
190 		if (system_wq)
191 			destroy_workqueue(system_wq);
192 		if (workqueue_key)
193 			lwp_specific_key_delete(workqueue_key);
194 	}
195 
196 	return error;
197 }
198 
199 /*
200  * linux_workqueue_fini()
201  *
202  *	Destroy the Linux workqueue subsystem.  Never fails.
203  */
204 static void
205 linux_workqueue_fini0(void)
206 {
207 
208 	destroy_workqueue(system_power_efficient_wq);
209 	destroy_workqueue(system_long_wq);
210 	destroy_workqueue(system_wq);
211 	lwp_specific_key_delete(workqueue_key);
212 }
213 
214 #ifndef _MODULE
215 static ONCE_DECL(linux_workqueue_init_once);
216 #endif
217 
218 int
219 linux_workqueue_init(void)
220 {
221 #ifdef _MODULE
222 	return linux_workqueue_init0();
223 #else
224 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
225 #endif
226 }
227 
228 void
229 linux_workqueue_fini(void)
230 {
231 #ifdef _MODULE
232 	return linux_workqueue_fini0();
233 #else
234 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
235 #endif
236 }
237 
238 /*
239  * Workqueues
240  */
241 
242 /*
243  * alloc_workqueue(name, flags, max_active)
244  *
245  *	Create a workqueue of the given name.  max_active is the
246  *	maximum number of work items in flight, or 0 for the default.
247  *	Return NULL on failure, pointer to struct workqueue_struct
248  *	object on success.
249  */
250 struct workqueue_struct *
251 alloc_workqueue(const char *name, int flags, unsigned max_active)
252 {
253 	struct workqueue_struct *wq;
254 	int error;
255 
256 	KASSERT(max_active == 0 || max_active == 1);
257 
258 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
259 
260 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
261 	cv_init(&wq->wq_cv, name);
262 	TAILQ_INIT(&wq->wq_delayed);
263 	TAILQ_INIT(&wq->wq_queue);
264 	TAILQ_INIT(&wq->wq_dqueue);
265 	wq->wq_current_work = NULL;
266 	wq->wq_flags = 0;
267 	wq->wq_dying = false;
268 	wq->wq_gen = 0;
269 	wq->wq_lwp = NULL;
270 	wq->wq_name = name;
271 
272 	error = kthread_create(PRI_NONE,
273 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
274 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
275 	if (error)
276 		goto fail0;
277 
278 	return wq;
279 
280 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
281 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
282 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
283 	cv_destroy(&wq->wq_cv);
284 	mutex_destroy(&wq->wq_lock);
285 	kmem_free(wq, sizeof(*wq));
286 	return NULL;
287 }
288 
289 /*
290  * alloc_ordered_workqueue(name, flags)
291  *
292  *	Same as alloc_workqueue(name, flags, 1).
293  */
294 struct workqueue_struct *
295 alloc_ordered_workqueue(const char *name, int flags)
296 {
297 
298 	return alloc_workqueue(name, flags, 1);
299 }
300 
301 /*
302  * destroy_workqueue(wq)
303  *
304  *	Destroy a workqueue created with wq.  Cancel any pending
305  *	delayed work.  Wait for all queued work to complete.
306  *
307  *	May sleep.
308  */
309 void
310 destroy_workqueue(struct workqueue_struct *wq)
311 {
312 
313 	/*
314 	 * Cancel all delayed work.  We do this first because any
315 	 * delayed work that that has already timed out, which we can't
316 	 * cancel, may have queued new work.
317 	 */
318 	mutex_enter(&wq->wq_lock);
319 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
320 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
321 
322 		KASSERT(work_queue(&dw->work) == wq);
323 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
324 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
325 			dw->dw_state == DELAYED_WORK_CANCELLED),
326 		    "delayed work %p in bad state: %d",
327 		    dw, dw->dw_state);
328 
329 		/*
330 		 * Mark it cancelled and try to stop the callout before
331 		 * it starts.
332 		 *
333 		 * If it's too late and the callout has already begun
334 		 * to execute, then it will notice that we asked to
335 		 * cancel it and remove itself from the queue before
336 		 * returning.
337 		 *
338 		 * If we stopped the callout before it started,
339 		 * however, then we can safely destroy the callout and
340 		 * dissociate it from the workqueue ourselves.
341 		 */
342 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
343 		dw->dw_state = DELAYED_WORK_CANCELLED;
344 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
345 			cancel_delayed_work_done(wq, dw);
346 	}
347 	mutex_exit(&wq->wq_lock);
348 
349 	/*
350 	 * At this point, no new work can be put on the queue.
351 	 */
352 
353 	/* Tell the thread to exit.  */
354 	mutex_enter(&wq->wq_lock);
355 	wq->wq_dying = true;
356 	cv_broadcast(&wq->wq_cv);
357 	mutex_exit(&wq->wq_lock);
358 
359 	/* Wait for it to exit.  */
360 	(void)kthread_join(wq->wq_lwp);
361 
362 	KASSERT(wq->wq_dying);
363 	KASSERT(wq->wq_flags == 0);
364 	KASSERT(wq->wq_current_work == NULL);
365 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
366 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
367 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
368 	cv_destroy(&wq->wq_cv);
369 	mutex_destroy(&wq->wq_lock);
370 
371 	kmem_free(wq, sizeof(*wq));
372 }
373 
374 /*
375  * Work thread and callout
376  */
377 
378 /*
379  * linux_workqueue_thread(cookie)
380  *
381  *	Main function for a workqueue's worker thread.  Waits until
382  *	there is work queued, grabs a batch of work off the queue,
383  *	executes it all, bumps the generation number, and repeats,
384  *	until dying.
385  */
386 static void __dead
387 linux_workqueue_thread(void *cookie)
388 {
389 	struct workqueue_struct *const wq = cookie;
390 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
391 	struct work_struct marker, *work;
392 	unsigned i;
393 
394 	lwp_setspecific(workqueue_key, wq);
395 
396 	mutex_enter(&wq->wq_lock);
397 	for (;;) {
398 		/*
399 		 * Wait until there's activity.  If there's no work and
400 		 * we're dying, stop here.
401 		 */
402 		if (TAILQ_EMPTY(&wq->wq_queue) &&
403 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
404 			if (wq->wq_dying)
405 				break;
406 			cv_wait(&wq->wq_cv, &wq->wq_lock);
407 			continue;
408 		}
409 
410 		/*
411 		 * Start a batch of work.  Use a marker to delimit when
412 		 * the batch ends so we can advance the generation
413 		 * after the batch.
414 		 */
415 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
416 		for (i = 0; i < 2; i++) {
417 			if (TAILQ_EMPTY(q[i]))
418 				continue;
419 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
420 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
421 				void (*func)(struct work_struct *);
422 
423 				KASSERT(work_queue(work) == wq);
424 				KASSERT(work_claimed(work, wq));
425 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
426 					container_of(work, struct delayed_work,
427 					    work)->dw_state ==
428 					DELAYED_WORK_IDLE),
429 				    "delayed work %p queued and scheduled",
430 				    work);
431 
432 				TAILQ_REMOVE(q[i], work, work_entry);
433 				KASSERT(wq->wq_current_work == NULL);
434 				wq->wq_current_work = work;
435 				func = work->func;
436 				release_work(work, wq);
437 				/* Can't dereference work after this point.  */
438 
439 				mutex_exit(&wq->wq_lock);
440 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
441 				(*func)(work);
442 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
443 				mutex_enter(&wq->wq_lock);
444 
445 				KASSERT(wq->wq_current_work == work);
446 				wq->wq_current_work = NULL;
447 				cv_broadcast(&wq->wq_cv);
448 			}
449 			TAILQ_REMOVE(q[i], &marker, work_entry);
450 		}
451 
452 		/* Notify flush that we've completed a batch of work.  */
453 		wq->wq_gen++;
454 		cv_broadcast(&wq->wq_cv);
455 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
456 	}
457 	mutex_exit(&wq->wq_lock);
458 
459 	kthread_exit(0);
460 }
461 
462 /*
463  * linux_workqueue_timeout(cookie)
464  *
465  *	Delayed work timeout callback.
466  *
467  *	- If scheduled, queue it.
468  *	- If rescheduled, callout_schedule ourselves again.
469  *	- If cancelled, destroy the callout and release the work from
470  *        the workqueue.
471  */
472 static void
473 linux_workqueue_timeout(void *cookie)
474 {
475 	struct delayed_work *const dw = cookie;
476 	struct workqueue_struct *const wq = work_queue(&dw->work);
477 
478 	KASSERTMSG(wq != NULL,
479 	    "delayed work %p state %d resched %d",
480 	    dw, dw->dw_state, dw->dw_resched);
481 
482 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
483 
484 	mutex_enter(&wq->wq_lock);
485 	KASSERT(work_queue(&dw->work) == wq);
486 	switch (dw->dw_state) {
487 	case DELAYED_WORK_IDLE:
488 		panic("delayed work callout uninitialized: %p", dw);
489 	case DELAYED_WORK_SCHEDULED:
490 		dw_callout_destroy(wq, dw);
491 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
492 		cv_broadcast(&wq->wq_cv);
493 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
494 		break;
495 	case DELAYED_WORK_RESCHEDULED:
496 		KASSERT(dw->dw_resched >= 0);
497 		callout_schedule(&dw->dw_callout, dw->dw_resched);
498 		dw->dw_state = DELAYED_WORK_SCHEDULED;
499 		dw->dw_resched = -1;
500 		break;
501 	case DELAYED_WORK_CANCELLED:
502 		cancel_delayed_work_done(wq, dw);
503 		/* Can't dereference dw after this point.  */
504 		goto out;
505 	default:
506 		panic("delayed work callout in bad state: %p", dw);
507 	}
508 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
509 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
510 out:	mutex_exit(&wq->wq_lock);
511 }
512 
513 /*
514  * current_work()
515  *
516  *	If in a workqueue worker thread, return the work it is
517  *	currently executing.  Otherwise return NULL.
518  */
519 struct work_struct *
520 current_work(void)
521 {
522 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
523 
524 	/* If we're not a workqueue thread, then there's no work.  */
525 	if (wq == NULL)
526 		return NULL;
527 
528 	/*
529 	 * Otherwise, this should be possible only while work is in
530 	 * progress.  Return the current work item.
531 	 */
532 	KASSERT(wq->wq_current_work != NULL);
533 	return wq->wq_current_work;
534 }
535 
536 /*
537  * Work
538  */
539 
540 /*
541  * INIT_WORK(work, fn)
542  *
543  *	Initialize work for use with a workqueue to call fn in a worker
544  *	thread.  There is no corresponding destruction operation.
545  */
546 void
547 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
548 {
549 
550 	work->work_owner = 0;
551 	work->func = fn;
552 }
553 
554 /*
555  * work_claimed(work, wq)
556  *
557  *	True if work is currently claimed by a workqueue, meaning it is
558  *	either on the queue or scheduled in a callout.  The workqueue
559  *	must be wq, and caller must hold wq's lock.
560  */
561 static bool
562 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
563 {
564 
565 	KASSERT(work_queue(work) == wq);
566 	KASSERT(mutex_owned(&wq->wq_lock));
567 
568 	return atomic_load_relaxed(&work->work_owner) & 1;
569 }
570 
571 /*
572  * work_pending(work)
573  *
574  *	True if work is currently claimed by any workqueue, scheduled
575  *	to run on that workqueue.
576  */
577 bool
578 work_pending(const struct work_struct *work)
579 {
580 
581 	return atomic_load_relaxed(&work->work_owner) & 1;
582 }
583 
584 /*
585  * work_queue(work)
586  *
587  *	Return the last queue that work was queued on, or NULL if it
588  *	was never queued.
589  */
590 static struct workqueue_struct *
591 work_queue(struct work_struct *work)
592 {
593 
594 	return (struct workqueue_struct *)
595 	    (atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
596 }
597 
598 /*
599  * acquire_work(work, wq)
600  *
601  *	Try to claim work for wq.  If work is already claimed, it must
602  *	be claimed by wq; return false.  If work is not already
603  *	claimed, claim it, issue a memory barrier to match any prior
604  *	release_work, and return true.
605  *
606  *	Caller must hold wq's lock.
607  */
608 static bool
609 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
610 {
611 	uintptr_t owner0, owner;
612 
613 	KASSERT(mutex_owned(&wq->wq_lock));
614 	KASSERT(((uintptr_t)wq & 1) == 0);
615 
616 	owner = (uintptr_t)wq | 1;
617 	do {
618 		owner0 = atomic_load_relaxed(&work->work_owner);
619 		if (owner0 & 1) {
620 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
621 			return false;
622 		}
623 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
624 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
625 	    owner0);
626 
627 	KASSERT(work_queue(work) == wq);
628 	membar_enter();
629 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
630 	return true;
631 }
632 
633 /*
634  * release_work(work, wq)
635  *
636  *	Issue a memory barrier to match any subsequent acquire_work and
637  *	dissociate work from wq.
638  *
639  *	Caller must hold wq's lock and work must be associated with wq.
640  */
641 static void
642 release_work(struct work_struct *work, struct workqueue_struct *wq)
643 {
644 
645 	KASSERT(work_queue(work) == wq);
646 	KASSERT(mutex_owned(&wq->wq_lock));
647 
648 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
649 	membar_exit();
650 
651 	/*
652 	 * Non-interlocked r/m/w is safe here because nobody else can
653 	 * write to this while the claimed bit is set and the workqueue
654 	 * lock is held.
655 	 */
656 	atomic_store_relaxed(&work->work_owner,
657 	    atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
658 }
659 
660 /*
661  * schedule_work(work)
662  *
663  *	If work is not already queued on system_wq, queue it to be run
664  *	by system_wq's worker thread when it next can.  True if it was
665  *	newly queued, false if it was already queued.  If the work was
666  *	already running, queue it to run again.
667  *
668  *	Caller must ensure work is not queued to run on a different
669  *	workqueue.
670  */
671 bool
672 schedule_work(struct work_struct *work)
673 {
674 
675 	return queue_work(system_wq, work);
676 }
677 
678 /*
679  * queue_work(wq, work)
680  *
681  *	If work is not already queued on wq, queue it to be run by wq's
682  *	worker thread when it next can.  True if it was newly queued,
683  *	false if it was already queued.  If the work was already
684  *	running, queue it to run again.
685  *
686  *	Caller must ensure work is not queued to run on a different
687  *	workqueue.
688  */
689 bool
690 queue_work(struct workqueue_struct *wq, struct work_struct *work)
691 {
692 	bool newly_queued;
693 
694 	KASSERT(wq != NULL);
695 
696 	mutex_enter(&wq->wq_lock);
697 	if (__predict_true(acquire_work(work, wq))) {
698 		/*
699 		 * It wasn't on any workqueue at all.  Put it on this
700 		 * one, and signal the worker thread that there is work
701 		 * to do.
702 		 */
703 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
704 		cv_broadcast(&wq->wq_cv);
705 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
706 		newly_queued = true;
707 	} else {
708 		/*
709 		 * It was already on this workqueue.  Nothing to do
710 		 * since it is already queued.
711 		 */
712 		newly_queued = false;
713 	}
714 	mutex_exit(&wq->wq_lock);
715 
716 	return newly_queued;
717 }
718 
719 /*
720  * cancel_work(work)
721  *
722  *	If work was queued, remove it from the queue and return true.
723  *	If work was not queued, return false.  Work may still be
724  *	running when this returns.
725  */
726 bool
727 cancel_work(struct work_struct *work)
728 {
729 	struct workqueue_struct *wq;
730 	bool cancelled_p = false;
731 
732 	/* If there's no workqueue, nothing to cancel.   */
733 	if ((wq = work_queue(work)) == NULL)
734 		goto out;
735 
736 	mutex_enter(&wq->wq_lock);
737 	if (__predict_false(work_queue(work) != wq)) {
738 		/*
739 		 * It has finished execution or been cancelled by
740 		 * another thread, and has been moved off the
741 		 * workqueue, so it's too to cancel.
742 		 */
743 		cancelled_p = false;
744 	} else {
745 		/* Check whether it's on the queue.  */
746 		if (work_claimed(work, wq)) {
747 			/*
748 			 * It is still on the queue.  Take it off the
749 			 * queue and report successful cancellation.
750 			 */
751 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
752 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
753 			release_work(work, wq);
754 			/* Can't dereference work after this point.  */
755 			cancelled_p = true;
756 		} else {
757 			/* Not on the queue.  Couldn't cancel it.  */
758 			cancelled_p = false;
759 		}
760 	}
761 	mutex_exit(&wq->wq_lock);
762 
763 out:	return cancelled_p;
764 }
765 
766 /*
767  * cancel_work_sync(work)
768  *
769  *	If work was queued, remove it from the queue and return true.
770  *	If work was not queued, return false.  Either way, if work is
771  *	currently running, wait for it to complete.
772  *
773  *	May sleep.
774  */
775 bool
776 cancel_work_sync(struct work_struct *work)
777 {
778 	struct workqueue_struct *wq;
779 	bool cancelled_p = false;
780 
781 	/* If there's no workqueue, nothing to cancel.   */
782 	if ((wq = work_queue(work)) == NULL)
783 		goto out;
784 
785 	mutex_enter(&wq->wq_lock);
786 	if (__predict_false(work_queue(work) != wq)) {
787 		/*
788 		 * It has finished execution or been cancelled by
789 		 * another thread, and has been moved off the
790 		 * workqueue, so it's too late to cancel.
791 		 */
792 		cancelled_p = false;
793 	} else {
794 		/* Check whether it's on the queue.  */
795 		if (work_claimed(work, wq)) {
796 			/*
797 			 * It is still on the queue.  Take it off the
798 			 * queue and report successful cancellation.
799 			 */
800 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
801 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
802 			release_work(work, wq);
803 			/* Can't dereference work after this point.  */
804 			cancelled_p = true;
805 		} else {
806 			/* Not on the queue.  Couldn't cancel it.  */
807 			cancelled_p = false;
808 		}
809 		/* If it's still running, wait for it to complete.  */
810 		if (wq->wq_current_work == work)
811 			wait_for_current_work(work, wq);
812 	}
813 	mutex_exit(&wq->wq_lock);
814 
815 out:	return cancelled_p;
816 }
817 
818 /*
819  * wait_for_current_work(work, wq)
820  *
821  *	wq must be currently executing work.  Wait for it to finish.
822  *
823  *	Does not dereference work.
824  */
825 static void
826 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
827 {
828 	uint64_t gen;
829 
830 	KASSERT(mutex_owned(&wq->wq_lock));
831 	KASSERT(wq->wq_current_work == work);
832 
833 	/* Wait only one generation in case it gets requeued quickly.  */
834 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
835 	gen = wq->wq_gen;
836 	do {
837 		cv_wait(&wq->wq_cv, &wq->wq_lock);
838 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
839 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
840 }
841 
842 /*
843  * Delayed work
844  */
845 
846 /*
847  * INIT_DELAYED_WORK(dw, fn)
848  *
849  *	Initialize dw for use with a workqueue to call fn in a worker
850  *	thread after a delay.  There is no corresponding destruction
851  *	operation.
852  */
853 void
854 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
855 {
856 
857 	INIT_WORK(&dw->work, fn);
858 	dw->dw_state = DELAYED_WORK_IDLE;
859 	dw->dw_resched = -1;
860 
861 	/*
862 	 * Defer callout_init until we are going to schedule the
863 	 * callout, which can then callout_destroy it, because
864 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
865 	 * we have no opportunity to call callout_destroy.
866 	 */
867 }
868 
869 /*
870  * schedule_delayed_work(dw, ticks)
871  *
872  *	If it is not currently scheduled, schedule dw to run after
873  *	ticks on system_wq.  If currently executing and not already
874  *	rescheduled, reschedule it.  True if it was newly scheduled,
875  *	false if it was already scheduled.
876  *
877  *	If ticks == 0, queue it to run as soon as the worker can,
878  *	without waiting for the next callout tick to run.
879  */
880 bool
881 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
882 {
883 
884 	return queue_delayed_work(system_wq, dw, ticks);
885 }
886 
887 /*
888  * dw_callout_init(wq, dw)
889  *
890  *	Initialize the callout of dw and transition to
891  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
892  */
893 static void
894 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
895 {
896 
897 	KASSERT(mutex_owned(&wq->wq_lock));
898 	KASSERT(work_queue(&dw->work) == wq);
899 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
900 
901 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
902 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
903 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
904 	dw->dw_state = DELAYED_WORK_SCHEDULED;
905 }
906 
907 /*
908  * dw_callout_destroy(wq, dw)
909  *
910  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
911  */
912 static void
913 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
914 {
915 
916 	KASSERT(mutex_owned(&wq->wq_lock));
917 	KASSERT(work_queue(&dw->work) == wq);
918 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
919 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
920 	    dw->dw_state == DELAYED_WORK_CANCELLED);
921 
922 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
923 	callout_destroy(&dw->dw_callout);
924 	dw->dw_resched = -1;
925 	dw->dw_state = DELAYED_WORK_IDLE;
926 }
927 
928 /*
929  * cancel_delayed_work_done(wq, dw)
930  *
931  *	Complete cancellation of a delayed work: transition from
932  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
933  *	workqueue.  Caller must not dereference dw after this returns.
934  */
935 static void
936 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
937 {
938 
939 	KASSERT(mutex_owned(&wq->wq_lock));
940 	KASSERT(work_queue(&dw->work) == wq);
941 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
942 
943 	dw_callout_destroy(wq, dw);
944 	release_work(&dw->work, wq);
945 	/* Can't dereference dw after this point.  */
946 }
947 
948 /*
949  * queue_delayed_work(wq, dw, ticks)
950  *
951  *	If it is not currently scheduled, schedule dw to run after
952  *	ticks on wq.  If currently queued, remove it from the queue
953  *	first.
954  *
955  *	If ticks == 0, queue it to run as soon as the worker can,
956  *	without waiting for the next callout tick to run.
957  */
958 bool
959 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
960     unsigned long ticks)
961 {
962 	bool newly_queued;
963 
964 	mutex_enter(&wq->wq_lock);
965 	if (__predict_true(acquire_work(&dw->work, wq))) {
966 		/*
967 		 * It wasn't on any workqueue at all.  Schedule it to
968 		 * run on this one.
969 		 */
970 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
971 		if (ticks == 0) {
972 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
973 			    work_entry);
974 			cv_broadcast(&wq->wq_cv);
975 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
976 		} else {
977 			/*
978 			 * Initialize a callout and schedule to run
979 			 * after a delay.
980 			 */
981 			dw_callout_init(wq, dw);
982 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
983 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
984 		}
985 		newly_queued = true;
986 	} else {
987 		/* It was already on this workqueue.  */
988 		switch (dw->dw_state) {
989 		case DELAYED_WORK_IDLE:
990 		case DELAYED_WORK_SCHEDULED:
991 		case DELAYED_WORK_RESCHEDULED:
992 			/* On the queue or already scheduled.  Leave it.  */
993 			newly_queued = false;
994 			break;
995 		case DELAYED_WORK_CANCELLED:
996 			/*
997 			 * Scheduled and the callout began, but it was
998 			 * cancelled.  Reschedule it.
999 			 */
1000 			if (ticks == 0) {
1001 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1002 				SDT_PROBE2(sdt, linux, work, queue,
1003 				    &dw->work, wq);
1004 			} else {
1005 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1006 				dw->dw_resched = MIN(INT_MAX, ticks);
1007 				SDT_PROBE3(sdt, linux, work, schedule,
1008 				    dw, wq, ticks);
1009 			}
1010 			newly_queued = true;
1011 			break;
1012 		default:
1013 			panic("invalid delayed work state: %d",
1014 			    dw->dw_state);
1015 		}
1016 	}
1017 	mutex_exit(&wq->wq_lock);
1018 
1019 	return newly_queued;
1020 }
1021 
1022 /*
1023  * mod_delayed_work(wq, dw, ticks)
1024  *
1025  *	Schedule dw to run after ticks.  If scheduled or queued,
1026  *	reschedule.  If ticks == 0, run without delay.
1027  *
1028  *	True if it modified the timer of an already scheduled work,
1029  *	false if it newly scheduled the work.
1030  */
1031 bool
1032 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
1033     unsigned long ticks)
1034 {
1035 	bool timer_modified;
1036 
1037 	mutex_enter(&wq->wq_lock);
1038 	if (acquire_work(&dw->work, wq)) {
1039 		/*
1040 		 * It wasn't on any workqueue at all.  Schedule it to
1041 		 * run on this one.
1042 		 */
1043 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
1044 		if (ticks == 0) {
1045 			/*
1046 			 * Run immediately: put it on the queue and
1047 			 * signal the worker thread.
1048 			 */
1049 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1050 			    work_entry);
1051 			cv_broadcast(&wq->wq_cv);
1052 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
1053 		} else {
1054 			/*
1055 			 * Initialize a callout and schedule to run
1056 			 * after a delay.
1057 			 */
1058 			dw_callout_init(wq, dw);
1059 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1060 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
1061 		}
1062 		timer_modified = false;
1063 	} else {
1064 		/* It was already on this workqueue.  */
1065 		switch (dw->dw_state) {
1066 		case DELAYED_WORK_IDLE:
1067 			/* On the queue.  */
1068 			if (ticks == 0) {
1069 				/* Leave it be.  */
1070 				SDT_PROBE2(sdt, linux, work, cancel,
1071 				    &dw->work, wq);
1072 				SDT_PROBE2(sdt, linux, work, queue,
1073 				    &dw->work, wq);
1074 			} else {
1075 				/* Remove from the queue and schedule.  */
1076 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1077 				    work_entry);
1078 				dw_callout_init(wq, dw);
1079 				callout_schedule(&dw->dw_callout,
1080 				    MIN(INT_MAX, ticks));
1081 				SDT_PROBE2(sdt, linux, work, cancel,
1082 				    &dw->work, wq);
1083 				SDT_PROBE3(sdt, linux, work, schedule,
1084 				    dw, wq, ticks);
1085 			}
1086 			timer_modified = true;
1087 			break;
1088 		case DELAYED_WORK_SCHEDULED:
1089 			/*
1090 			 * It is scheduled to run after a delay.  Try
1091 			 * to stop it and reschedule it; if we can't,
1092 			 * either reschedule it or cancel it to put it
1093 			 * on the queue, and inform the callout.
1094 			 */
1095 			if (callout_stop(&dw->dw_callout)) {
1096 				/* Can't stop, callout has begun.  */
1097 				if (ticks == 0) {
1098 					/*
1099 					 * We don't actually need to do
1100 					 * anything.  The callout will
1101 					 * queue it as soon as it gets
1102 					 * the lock.
1103 					 */
1104 					SDT_PROBE2(sdt, linux, work, cancel,
1105 					    &dw->work, wq);
1106 					SDT_PROBE2(sdt, linux, work, queue,
1107 					    &dw->work, wq);
1108 				} else {
1109 					/* Ask the callout to reschedule.  */
1110 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1111 					dw->dw_resched = MIN(INT_MAX, ticks);
1112 					SDT_PROBE2(sdt, linux, work, cancel,
1113 					    &dw->work, wq);
1114 					SDT_PROBE3(sdt, linux, work, schedule,
1115 					    dw, wq, ticks);
1116 				}
1117 			} else {
1118 				/* We stopped the callout before it began.  */
1119 				if (ticks == 0) {
1120 					/*
1121 					 * Run immediately: destroy the
1122 					 * callout, put it on the
1123 					 * queue, and signal the worker
1124 					 * thread.
1125 					 */
1126 					dw_callout_destroy(wq, dw);
1127 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1128 					    &dw->work, work_entry);
1129 					cv_broadcast(&wq->wq_cv);
1130 					SDT_PROBE2(sdt, linux, work, cancel,
1131 					    &dw->work, wq);
1132 					SDT_PROBE2(sdt, linux, work, queue,
1133 					    &dw->work, wq);
1134 				} else {
1135 					/*
1136 					 * Reschedule the callout.  No
1137 					 * state change.
1138 					 */
1139 					callout_schedule(&dw->dw_callout,
1140 					    MIN(INT_MAX, ticks));
1141 					SDT_PROBE2(sdt, linux, work, cancel,
1142 					    &dw->work, wq);
1143 					SDT_PROBE3(sdt, linux, work, schedule,
1144 					    dw, wq, ticks);
1145 				}
1146 			}
1147 			timer_modified = true;
1148 			break;
1149 		case DELAYED_WORK_RESCHEDULED:
1150 			/*
1151 			 * Someone rescheduled it after the callout
1152 			 * started but before the poor thing even had a
1153 			 * chance to acquire the lock.
1154 			 */
1155 			if (ticks == 0) {
1156 				/*
1157 				 * We can just switch back to
1158 				 * DELAYED_WORK_SCHEDULED so that the
1159 				 * callout will queue the work as soon
1160 				 * as it gets the lock.
1161 				 */
1162 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1163 				dw->dw_resched = -1;
1164 				SDT_PROBE2(sdt, linux, work, cancel,
1165 				    &dw->work, wq);
1166 				SDT_PROBE2(sdt, linux, work, queue,
1167 				    &dw->work, wq);
1168 			} else {
1169 				/* Change the rescheduled time.  */
1170 				dw->dw_resched = ticks;
1171 				SDT_PROBE2(sdt, linux, work, cancel,
1172 				    &dw->work, wq);
1173 				SDT_PROBE3(sdt, linux, work, schedule,
1174 				    dw, wq, ticks);
1175 			}
1176 			timer_modified = true;
1177 			break;
1178 		case DELAYED_WORK_CANCELLED:
1179 			/*
1180 			 * Someone cancelled it after the callout
1181 			 * started but before the poor thing even had a
1182 			 * chance to acquire the lock.
1183 			 */
1184 			if (ticks == 0) {
1185 				/*
1186 				 * We can just switch back to
1187 				 * DELAYED_WORK_SCHEDULED so that the
1188 				 * callout will queue the work as soon
1189 				 * as it gets the lock.
1190 				 */
1191 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1192 				SDT_PROBE2(sdt, linux, work, queue,
1193 				    &dw->work, wq);
1194 			} else {
1195 				/* Ask it to reschedule.  */
1196 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1197 				dw->dw_resched = MIN(INT_MAX, ticks);
1198 				SDT_PROBE3(sdt, linux, work, schedule,
1199 				    dw, wq, ticks);
1200 			}
1201 			timer_modified = false;
1202 			break;
1203 		default:
1204 			panic("invalid delayed work state: %d", dw->dw_state);
1205 		}
1206 	}
1207 	mutex_exit(&wq->wq_lock);
1208 
1209 	return timer_modified;
1210 }
1211 
1212 /*
1213  * cancel_delayed_work(dw)
1214  *
1215  *	If work was scheduled or queued, remove it from the schedule or
1216  *	queue and return true.  If work was not scheduled or queued,
1217  *	return false.  Note that work may already be running; if it
1218  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1219  *	will return false, and either way, cancel_delayed_work will NOT
1220  *	wait for the work to complete.
1221  */
1222 bool
1223 cancel_delayed_work(struct delayed_work *dw)
1224 {
1225 	struct workqueue_struct *wq;
1226 	bool cancelled_p;
1227 
1228 	/* If there's no workqueue, nothing to cancel.   */
1229 	if ((wq = work_queue(&dw->work)) == NULL)
1230 		return false;
1231 
1232 	mutex_enter(&wq->wq_lock);
1233 	if (__predict_false(work_queue(&dw->work) != wq)) {
1234 		cancelled_p = false;
1235 	} else {
1236 		switch (dw->dw_state) {
1237 		case DELAYED_WORK_IDLE:
1238 			/*
1239 			 * It is either on the queue or already running
1240 			 * or both.
1241 			 */
1242 			if (work_claimed(&dw->work, wq)) {
1243 				/* On the queue.  Remove and release.  */
1244 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1245 				    work_entry);
1246 				SDT_PROBE2(sdt, linux, work, cancel,
1247 				    &dw->work, wq);
1248 				release_work(&dw->work, wq);
1249 				/* Can't dereference dw after this point.  */
1250 				cancelled_p = true;
1251 			} else {
1252 				/* Not on the queue, so didn't cancel.  */
1253 				cancelled_p = false;
1254 			}
1255 			break;
1256 		case DELAYED_WORK_SCHEDULED:
1257 			/*
1258 			 * If it is scheduled, mark it cancelled and
1259 			 * try to stop the callout before it starts.
1260 			 *
1261 			 * If it's too late and the callout has already
1262 			 * begun to execute, tough.
1263 			 *
1264 			 * If we stopped the callout before it started,
1265 			 * however, then destroy the callout and
1266 			 * dissociate it from the workqueue ourselves.
1267 			 */
1268 			dw->dw_state = DELAYED_WORK_CANCELLED;
1269 			cancelled_p = true;
1270 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1271 			if (!callout_stop(&dw->dw_callout))
1272 				cancel_delayed_work_done(wq, dw);
1273 			break;
1274 		case DELAYED_WORK_RESCHEDULED:
1275 			/*
1276 			 * If it is being rescheduled, the callout has
1277 			 * already fired.  We must ask it to cancel.
1278 			 */
1279 			dw->dw_state = DELAYED_WORK_CANCELLED;
1280 			dw->dw_resched = -1;
1281 			cancelled_p = true;
1282 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1283 			break;
1284 		case DELAYED_WORK_CANCELLED:
1285 			/*
1286 			 * If it is being cancelled, the callout has
1287 			 * already fired.  There is nothing more for us
1288 			 * to do.  Someone else claims credit for
1289 			 * cancelling it.
1290 			 */
1291 			cancelled_p = false;
1292 			break;
1293 		default:
1294 			panic("invalid delayed work state: %d",
1295 			    dw->dw_state);
1296 		}
1297 	}
1298 	mutex_exit(&wq->wq_lock);
1299 
1300 	return cancelled_p;
1301 }
1302 
1303 /*
1304  * cancel_delayed_work_sync(dw)
1305  *
1306  *	If work was scheduled or queued, remove it from the schedule or
1307  *	queue and return true.  If work was not scheduled or queued,
1308  *	return false.  Note that work may already be running; if it
1309  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1310  *	will return false; either way, wait for it to complete.
1311  */
1312 bool
1313 cancel_delayed_work_sync(struct delayed_work *dw)
1314 {
1315 	struct workqueue_struct *wq;
1316 	bool cancelled_p;
1317 
1318 	/* If there's no workqueue, nothing to cancel.  */
1319 	if ((wq = work_queue(&dw->work)) == NULL)
1320 		return false;
1321 
1322 	mutex_enter(&wq->wq_lock);
1323 	if (__predict_false(work_queue(&dw->work) != wq)) {
1324 		cancelled_p = false;
1325 	} else {
1326 		switch (dw->dw_state) {
1327 		case DELAYED_WORK_IDLE:
1328 			/*
1329 			 * It is either on the queue or already running
1330 			 * or both.
1331 			 */
1332 			if (work_claimed(&dw->work, wq)) {
1333 				/* On the queue.  Remove and release.  */
1334 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1335 				    work_entry);
1336 				SDT_PROBE2(sdt, linux, work, cancel,
1337 				    &dw->work, wq);
1338 				release_work(&dw->work, wq);
1339 				/* Can't dereference dw after this point.  */
1340 				cancelled_p = true;
1341 			} else {
1342 				/* Not on the queue, so didn't cancel. */
1343 				cancelled_p = false;
1344 			}
1345 			/* If it's still running, wait for it to complete.  */
1346 			if (wq->wq_current_work == &dw->work)
1347 				wait_for_current_work(&dw->work, wq);
1348 			break;
1349 		case DELAYED_WORK_SCHEDULED:
1350 			/*
1351 			 * If it is scheduled, mark it cancelled and
1352 			 * try to stop the callout before it starts.
1353 			 *
1354 			 * If it's too late and the callout has already
1355 			 * begun to execute, we must wait for it to
1356 			 * complete.  But we got in soon enough to ask
1357 			 * the callout not to run, so we successfully
1358 			 * cancelled it in that case.
1359 			 *
1360 			 * If we stopped the callout before it started,
1361 			 * then we must destroy the callout and
1362 			 * dissociate it from the workqueue ourselves.
1363 			 */
1364 			dw->dw_state = DELAYED_WORK_CANCELLED;
1365 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1366 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1367 				cancel_delayed_work_done(wq, dw);
1368 			cancelled_p = true;
1369 			break;
1370 		case DELAYED_WORK_RESCHEDULED:
1371 			/*
1372 			 * If it is being rescheduled, the callout has
1373 			 * already fired.  We must ask it to cancel and
1374 			 * wait for it to complete.
1375 			 */
1376 			dw->dw_state = DELAYED_WORK_CANCELLED;
1377 			dw->dw_resched = -1;
1378 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1379 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1380 			cancelled_p = true;
1381 			break;
1382 		case DELAYED_WORK_CANCELLED:
1383 			/*
1384 			 * If it is being cancelled, the callout has
1385 			 * already fired.  We need only wait for it to
1386 			 * complete.  Someone else, however, claims
1387 			 * credit for cancelling it.
1388 			 */
1389 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1390 			cancelled_p = false;
1391 			break;
1392 		default:
1393 			panic("invalid delayed work state: %d",
1394 			    dw->dw_state);
1395 		}
1396 	}
1397 	mutex_exit(&wq->wq_lock);
1398 
1399 	return cancelled_p;
1400 }
1401 
1402 /*
1403  * Flush
1404  */
1405 
1406 /*
1407  * flush_scheduled_work()
1408  *
1409  *	Wait for all work queued on system_wq to complete.  This does
1410  *	not include delayed work.
1411  */
1412 void
1413 flush_scheduled_work(void)
1414 {
1415 
1416 	flush_workqueue(system_wq);
1417 }
1418 
1419 /*
1420  * flush_workqueue_locked(wq)
1421  *
1422  *	Wait for all work queued on wq to complete.  This does not
1423  *	include delayed work.  True if there was work to be flushed,
1424  *	false it the queue was empty.
1425  *
1426  *	Caller must hold wq's lock.
1427  */
1428 static bool
1429 flush_workqueue_locked(struct workqueue_struct *wq)
1430 {
1431 	uint64_t gen;
1432 	bool work_queued = false;
1433 
1434 	KASSERT(mutex_owned(&wq->wq_lock));
1435 
1436 	/* Get the current generation number.  */
1437 	gen = wq->wq_gen;
1438 
1439 	/*
1440 	 * If there's a batch of work in progress, we must wait for the
1441 	 * worker thread to finish that batch.
1442 	 */
1443 	if (wq->wq_current_work != NULL) {
1444 		gen++;
1445 		work_queued = true;
1446 	}
1447 
1448 	/*
1449 	 * If there's any work yet to be claimed from the queue by the
1450 	 * worker thread, we must wait for it to finish one more batch
1451 	 * too.
1452 	 */
1453 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue)) {
1454 		gen++;
1455 		work_queued = true;
1456 	}
1457 
1458 	/* Wait until the generation number has caught up.  */
1459 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1460 	while (wq->wq_gen < gen)
1461 		cv_wait(&wq->wq_cv, &wq->wq_lock);
1462 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1463 
1464 	/* Return whether we had to wait for anything.  */
1465 	return work_queued;
1466 }
1467 
1468 /*
1469  * flush_workqueue(wq)
1470  *
1471  *	Wait for all work queued on wq to complete.  This does not
1472  *	include delayed work.
1473  */
1474 void
1475 flush_workqueue(struct workqueue_struct *wq)
1476 {
1477 
1478 	mutex_enter(&wq->wq_lock);
1479 	(void)flush_workqueue_locked(wq);
1480 	mutex_exit(&wq->wq_lock);
1481 }
1482 
1483 /*
1484  * drain_workqueue(wq)
1485  *
1486  *	Repeatedly flush wq until there is no more work.
1487  */
1488 void
1489 drain_workqueue(struct workqueue_struct *wq)
1490 {
1491 	unsigned ntries = 0;
1492 
1493 	mutex_enter(&wq->wq_lock);
1494 	while (flush_workqueue_locked(wq)) {
1495 		if (ntries++ == 10 || (ntries % 100) == 0)
1496 			printf("linux workqueue %s"
1497 			    ": still clogged after %u flushes",
1498 			    wq->wq_name, ntries);
1499 	}
1500 	mutex_exit(&wq->wq_lock);
1501 }
1502 
1503 /*
1504  * flush_work(work)
1505  *
1506  *	If work is queued or currently executing, wait for it to
1507  *	complete.
1508  *
1509  *	Return true if we waited to flush it, false if it was already
1510  *	idle.
1511  */
1512 bool
1513 flush_work(struct work_struct *work)
1514 {
1515 	struct workqueue_struct *wq;
1516 
1517 	/* If there's no workqueue, nothing to flush.  */
1518 	if ((wq = work_queue(work)) == NULL)
1519 		return false;
1520 
1521 	flush_workqueue(wq);
1522 	return true;
1523 }
1524 
1525 /*
1526  * flush_delayed_work(dw)
1527  *
1528  *	If dw is scheduled to run after a delay, queue it immediately
1529  *	instead.  Then, if dw is queued or currently executing, wait
1530  *	for it to complete.
1531  */
1532 bool
1533 flush_delayed_work(struct delayed_work *dw)
1534 {
1535 	struct workqueue_struct *wq;
1536 	bool waited = false;
1537 
1538 	/* If there's no workqueue, nothing to flush.  */
1539 	if ((wq = work_queue(&dw->work)) == NULL)
1540 		return false;
1541 
1542 	mutex_enter(&wq->wq_lock);
1543 	if (__predict_false(work_queue(&dw->work) != wq)) {
1544 		/*
1545 		 * Moved off the queue already (and possibly to another
1546 		 * queue, though that would be ill-advised), so it must
1547 		 * have completed, and we have nothing more to do.
1548 		 */
1549 		waited = false;
1550 	} else {
1551 		switch (dw->dw_state) {
1552 		case DELAYED_WORK_IDLE:
1553 			/*
1554 			 * It has a workqueue assigned and the callout
1555 			 * is idle, so it must be in progress or on the
1556 			 * queue.  In that case, we'll wait for it to
1557 			 * complete.
1558 			 */
1559 			break;
1560 		case DELAYED_WORK_SCHEDULED:
1561 		case DELAYED_WORK_RESCHEDULED:
1562 		case DELAYED_WORK_CANCELLED:
1563 			/*
1564 			 * The callout is scheduled, and may have even
1565 			 * started.  Mark it as scheduled so that if
1566 			 * the callout has fired it will queue the work
1567 			 * itself.  Try to stop the callout -- if we
1568 			 * can, queue the work now; if we can't, wait
1569 			 * for the callout to complete, which entails
1570 			 * queueing it.
1571 			 */
1572 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1573 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1574 				/*
1575 				 * We stopped it before it ran.  No
1576 				 * state change in the interim is
1577 				 * possible.  Destroy the callout and
1578 				 * queue it ourselves.
1579 				 */
1580 				KASSERT(dw->dw_state ==
1581 				    DELAYED_WORK_SCHEDULED);
1582 				dw_callout_destroy(wq, dw);
1583 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1584 				    work_entry);
1585 				cv_broadcast(&wq->wq_cv);
1586 				SDT_PROBE2(sdt, linux, work, queue,
1587 				    &dw->work, wq);
1588 			}
1589 			break;
1590 		default:
1591 			panic("invalid delayed work state: %d", dw->dw_state);
1592 		}
1593 		/*
1594 		 * Waiting for the whole queue to flush is overkill,
1595 		 * but doesn't hurt.
1596 		 */
1597 		(void)flush_workqueue_locked(wq);
1598 		waited = true;
1599 	}
1600 	mutex_exit(&wq->wq_lock);
1601 
1602 	return waited;
1603 }
1604 
1605 /*
1606  * delayed_work_pending(dw)
1607  *
1608  *	True if dw is currently scheduled to execute, false if not.
1609  */
1610 bool
1611 delayed_work_pending(const struct delayed_work *dw)
1612 {
1613 
1614 	return work_pending(&dw->work);
1615 }
1616