xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision dfa0e026b754eb1035e5de7f65af7fb7cbdae3b8)
1 /*	$NetBSD: linux_work.c,v 1.53 2021/12/19 11:38:03 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.53 2021/12/19 11:38:03 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49 
50 #include <linux/workqueue.h>
51 
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54 
55 struct workqueue_struct {
56 	kmutex_t		wq_lock;
57 	kcondvar_t		wq_cv;
58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
59 	struct work_head	wq_queue;   /* work to run */
60 	struct work_head	wq_dqueue;  /* delayed work to run now */
61 	struct work_struct	*wq_current_work;
62 	int			wq_flags;
63 	bool			wq_dying;
64 	uint64_t		wq_gen;
65 	struct lwp		*wq_lwp;
66 	const char		*wq_name;
67 };
68 
69 static void __dead	linux_workqueue_thread(void *);
70 static void		linux_workqueue_timeout(void *);
71 static bool		work_claimed(struct work_struct *,
72 			    struct workqueue_struct *);
73 static struct workqueue_struct *
74 			work_queue(struct work_struct *);
75 static bool		acquire_work(struct work_struct *,
76 			    struct workqueue_struct *);
77 static void		release_work(struct work_struct *,
78 			    struct workqueue_struct *);
79 static void		wait_for_current_work(struct work_struct *,
80 			    struct workqueue_struct *);
81 static void		dw_callout_init(struct workqueue_struct *,
82 			    struct delayed_work *);
83 static void		dw_callout_destroy(struct workqueue_struct *,
84 			    struct delayed_work *);
85 static void		cancel_delayed_work_done(struct workqueue_struct *,
86 			    struct delayed_work *);
87 
88 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
89     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
90 SDT_PROBE_DEFINE2(sdt, linux, work, release,
91     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
92 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
93     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
94 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
95     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
96 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
97     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
98     "unsigned long"/*ticks*/);
99 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
101 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
102     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
103 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
104     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
105 SDT_PROBE_DEFINE2(sdt, linux, work, run,
106     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
107 SDT_PROBE_DEFINE2(sdt, linux, work, done,
108     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
109 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
110     "struct workqueue_struct *"/*wq*/);
111 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
112     "struct workqueue_struct *"/*wq*/);
113 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
114     "struct workqueue_struct *"/*wq*/);
115 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
116     "struct workqueue_struct *"/*wq*/);
117 
118 static specificdata_key_t workqueue_key __read_mostly;
119 
120 struct workqueue_struct	*system_highpri_wq __read_mostly;
121 struct workqueue_struct	*system_long_wq __read_mostly;
122 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
123 struct workqueue_struct	*system_unbound_wq __read_mostly;
124 struct workqueue_struct	*system_wq __read_mostly;
125 
126 static inline uintptr_t
127 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
128 {
129 
130 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
131 }
132 
133 /*
134  * linux_workqueue_init()
135  *
136  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
137  *	NetBSD error on failure.
138  */
139 static int
140 linux_workqueue_init0(void)
141 {
142 	int error;
143 
144 	error = lwp_specific_key_create(&workqueue_key, NULL);
145 	if (error)
146 		goto out;
147 
148 	system_highpri_wq = alloc_ordered_workqueue("lnxhipwq", 0);
149 	if (system_highpri_wq == NULL) {
150 		error = ENOMEM;
151 		goto out;
152 	}
153 
154 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
155 	if (system_long_wq == NULL) {
156 		error = ENOMEM;
157 		goto out;
158 	}
159 
160 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
161 	if (system_power_efficient_wq == NULL) {
162 		error = ENOMEM;
163 		goto out;
164 	}
165 
166 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
167 	if (system_unbound_wq == NULL) {
168 		error = ENOMEM;
169 		goto out;
170 	}
171 
172 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
173 	if (system_wq == NULL) {
174 		error = ENOMEM;
175 		goto out;
176 	}
177 
178 	/* Success!  */
179 	error = 0;
180 
181 out:	if (error) {
182 		if (system_highpri_wq)
183 			destroy_workqueue(system_highpri_wq);
184 		if (system_long_wq)
185 			destroy_workqueue(system_long_wq);
186 		if (system_power_efficient_wq)
187 			destroy_workqueue(system_power_efficient_wq);
188 		if (system_unbound_wq)
189 			destroy_workqueue(system_unbound_wq);
190 		if (system_wq)
191 			destroy_workqueue(system_wq);
192 		if (workqueue_key)
193 			lwp_specific_key_delete(workqueue_key);
194 	}
195 
196 	return error;
197 }
198 
199 /*
200  * linux_workqueue_fini()
201  *
202  *	Destroy the Linux workqueue subsystem.  Never fails.
203  */
204 static void
205 linux_workqueue_fini0(void)
206 {
207 
208 	destroy_workqueue(system_power_efficient_wq);
209 	destroy_workqueue(system_long_wq);
210 	destroy_workqueue(system_wq);
211 	lwp_specific_key_delete(workqueue_key);
212 }
213 
214 #ifndef _MODULE
215 static ONCE_DECL(linux_workqueue_init_once);
216 #endif
217 
218 int
219 linux_workqueue_init(void)
220 {
221 #ifdef _MODULE
222 	return linux_workqueue_init0();
223 #else
224 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
225 #endif
226 }
227 
228 void
229 linux_workqueue_fini(void)
230 {
231 #ifdef _MODULE
232 	return linux_workqueue_fini0();
233 #else
234 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
235 #endif
236 }
237 
238 /*
239  * Workqueues
240  */
241 
242 /*
243  * alloc_workqueue(name, flags, max_active)
244  *
245  *	Create a workqueue of the given name.  max_active is the
246  *	maximum number of work items in flight, or 0 for the default.
247  *	Return NULL on failure, pointer to struct workqueue_struct
248  *	object on success.
249  */
250 struct workqueue_struct *
251 alloc_workqueue(const char *name, int flags, unsigned max_active)
252 {
253 	struct workqueue_struct *wq;
254 	int error;
255 
256 	KASSERT(max_active == 0 || max_active == 1);
257 
258 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
259 
260 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
261 	cv_init(&wq->wq_cv, name);
262 	TAILQ_INIT(&wq->wq_delayed);
263 	TAILQ_INIT(&wq->wq_queue);
264 	TAILQ_INIT(&wq->wq_dqueue);
265 	wq->wq_current_work = NULL;
266 	wq->wq_flags = 0;
267 	wq->wq_dying = false;
268 	wq->wq_gen = 0;
269 	wq->wq_lwp = NULL;
270 	wq->wq_name = name;
271 
272 	error = kthread_create(PRI_NONE,
273 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
274 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
275 	if (error)
276 		goto fail0;
277 
278 	return wq;
279 
280 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
281 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
282 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
283 	cv_destroy(&wq->wq_cv);
284 	mutex_destroy(&wq->wq_lock);
285 	kmem_free(wq, sizeof(*wq));
286 	return NULL;
287 }
288 
289 /*
290  * alloc_ordered_workqueue(name, flags)
291  *
292  *	Same as alloc_workqueue(name, flags, 1).
293  */
294 struct workqueue_struct *
295 alloc_ordered_workqueue(const char *name, int flags)
296 {
297 
298 	return alloc_workqueue(name, flags, 1);
299 }
300 
301 /*
302  * destroy_workqueue(wq)
303  *
304  *	Destroy a workqueue created with wq.  Cancel any pending
305  *	delayed work.  Wait for all queued work to complete.
306  *
307  *	May sleep.
308  */
309 void
310 destroy_workqueue(struct workqueue_struct *wq)
311 {
312 
313 	/*
314 	 * Cancel all delayed work.  We do this first because any
315 	 * delayed work that that has already timed out, which we can't
316 	 * cancel, may have queued new work.
317 	 */
318 	mutex_enter(&wq->wq_lock);
319 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
320 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
321 
322 		KASSERT(work_queue(&dw->work) == wq);
323 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
324 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
325 			dw->dw_state == DELAYED_WORK_CANCELLED),
326 		    "delayed work %p in bad state: %d",
327 		    dw, dw->dw_state);
328 
329 		/*
330 		 * Mark it cancelled and try to stop the callout before
331 		 * it starts.
332 		 *
333 		 * If it's too late and the callout has already begun
334 		 * to execute, then it will notice that we asked to
335 		 * cancel it and remove itself from the queue before
336 		 * returning.
337 		 *
338 		 * If we stopped the callout before it started,
339 		 * however, then we can safely destroy the callout and
340 		 * dissociate it from the workqueue ourselves.
341 		 */
342 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
343 		dw->dw_state = DELAYED_WORK_CANCELLED;
344 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
345 			cancel_delayed_work_done(wq, dw);
346 	}
347 	mutex_exit(&wq->wq_lock);
348 
349 	/*
350 	 * At this point, no new work can be put on the queue.
351 	 */
352 
353 	/* Tell the thread to exit.  */
354 	mutex_enter(&wq->wq_lock);
355 	wq->wq_dying = true;
356 	cv_broadcast(&wq->wq_cv);
357 	mutex_exit(&wq->wq_lock);
358 
359 	/* Wait for it to exit.  */
360 	(void)kthread_join(wq->wq_lwp);
361 
362 	KASSERT(wq->wq_dying);
363 	KASSERT(wq->wq_flags == 0);
364 	KASSERT(wq->wq_current_work == NULL);
365 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
366 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
367 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
368 	cv_destroy(&wq->wq_cv);
369 	mutex_destroy(&wq->wq_lock);
370 
371 	kmem_free(wq, sizeof(*wq));
372 }
373 
374 /*
375  * Work thread and callout
376  */
377 
378 /*
379  * linux_workqueue_thread(cookie)
380  *
381  *	Main function for a workqueue's worker thread.  Waits until
382  *	there is work queued, grabs a batch of work off the queue,
383  *	executes it all, bumps the generation number, and repeats,
384  *	until dying.
385  */
386 static void __dead
387 linux_workqueue_thread(void *cookie)
388 {
389 	struct workqueue_struct *const wq = cookie;
390 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
391 	struct work_struct marker, *work;
392 	unsigned i;
393 
394 	lwp_setspecific(workqueue_key, wq);
395 
396 	mutex_enter(&wq->wq_lock);
397 	for (;;) {
398 		/*
399 		 * Wait until there's activity.  If there's no work and
400 		 * we're dying, stop here.
401 		 */
402 		if (TAILQ_EMPTY(&wq->wq_queue) &&
403 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
404 			if (wq->wq_dying)
405 				break;
406 			cv_wait(&wq->wq_cv, &wq->wq_lock);
407 			continue;
408 		}
409 
410 		/*
411 		 * Start a batch of work.  Use a marker to delimit when
412 		 * the batch ends so we can advance the generation
413 		 * after the batch.
414 		 */
415 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
416 		for (i = 0; i < 2; i++) {
417 			if (TAILQ_EMPTY(q[i]))
418 				continue;
419 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
420 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
421 				void (*func)(struct work_struct *);
422 
423 				KASSERT(work_queue(work) == wq);
424 				KASSERT(work_claimed(work, wq));
425 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
426 					container_of(work, struct delayed_work,
427 					    work)->dw_state ==
428 					DELAYED_WORK_IDLE),
429 				    "delayed work %p queued and scheduled",
430 				    work);
431 
432 				TAILQ_REMOVE(q[i], work, work_entry);
433 				KASSERT(wq->wq_current_work == NULL);
434 				wq->wq_current_work = work;
435 				func = work->func;
436 				release_work(work, wq);
437 				/* Can't dereference work after this point.  */
438 
439 				mutex_exit(&wq->wq_lock);
440 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
441 				(*func)(work);
442 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
443 				mutex_enter(&wq->wq_lock);
444 
445 				KASSERT(wq->wq_current_work == work);
446 				wq->wq_current_work = NULL;
447 				cv_broadcast(&wq->wq_cv);
448 			}
449 			TAILQ_REMOVE(q[i], &marker, work_entry);
450 		}
451 
452 		/* Notify flush that we've completed a batch of work.  */
453 		wq->wq_gen++;
454 		cv_broadcast(&wq->wq_cv);
455 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
456 	}
457 	mutex_exit(&wq->wq_lock);
458 
459 	kthread_exit(0);
460 }
461 
462 /*
463  * linux_workqueue_timeout(cookie)
464  *
465  *	Delayed work timeout callback.
466  *
467  *	- If scheduled, queue it.
468  *	- If rescheduled, callout_schedule ourselves again.
469  *	- If cancelled, destroy the callout and release the work from
470  *        the workqueue.
471  */
472 static void
473 linux_workqueue_timeout(void *cookie)
474 {
475 	struct delayed_work *const dw = cookie;
476 	struct workqueue_struct *const wq = work_queue(&dw->work);
477 
478 	KASSERTMSG(wq != NULL,
479 	    "delayed work %p state %d resched %d",
480 	    dw, dw->dw_state, dw->dw_resched);
481 
482 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
483 
484 	mutex_enter(&wq->wq_lock);
485 	KASSERT(work_queue(&dw->work) == wq);
486 	switch (dw->dw_state) {
487 	case DELAYED_WORK_IDLE:
488 		panic("delayed work callout uninitialized: %p", dw);
489 	case DELAYED_WORK_SCHEDULED:
490 		dw_callout_destroy(wq, dw);
491 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
492 		cv_broadcast(&wq->wq_cv);
493 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
494 		break;
495 	case DELAYED_WORK_RESCHEDULED:
496 		KASSERT(dw->dw_resched >= 0);
497 		callout_schedule(&dw->dw_callout, dw->dw_resched);
498 		dw->dw_state = DELAYED_WORK_SCHEDULED;
499 		dw->dw_resched = -1;
500 		break;
501 	case DELAYED_WORK_CANCELLED:
502 		cancel_delayed_work_done(wq, dw);
503 		/* Can't dereference dw after this point.  */
504 		goto out;
505 	default:
506 		panic("delayed work callout in bad state: %p", dw);
507 	}
508 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
509 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
510 out:	mutex_exit(&wq->wq_lock);
511 }
512 
513 /*
514  * current_work()
515  *
516  *	If in a workqueue worker thread, return the work it is
517  *	currently executing.  Otherwise return NULL.
518  */
519 struct work_struct *
520 current_work(void)
521 {
522 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
523 
524 	/* If we're not a workqueue thread, then there's no work.  */
525 	if (wq == NULL)
526 		return NULL;
527 
528 	/*
529 	 * Otherwise, this should be possible only while work is in
530 	 * progress.  Return the current work item.
531 	 */
532 	KASSERT(wq->wq_current_work != NULL);
533 	return wq->wq_current_work;
534 }
535 
536 /*
537  * Work
538  */
539 
540 /*
541  * INIT_WORK(work, fn)
542  *
543  *	Initialize work for use with a workqueue to call fn in a worker
544  *	thread.  There is no corresponding destruction operation.
545  */
546 void
547 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
548 {
549 
550 	work->work_owner = 0;
551 	work->func = fn;
552 }
553 
554 /*
555  * work_claimed(work, wq)
556  *
557  *	True if work is currently claimed by a workqueue, meaning it is
558  *	either on the queue or scheduled in a callout.  The workqueue
559  *	must be wq, and caller must hold wq's lock.
560  */
561 static bool
562 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
563 {
564 
565 	KASSERT(work_queue(work) == wq);
566 	KASSERT(mutex_owned(&wq->wq_lock));
567 
568 	return work->work_owner & 1;
569 }
570 
571 /*
572  * work_pending(work)
573  *
574  *	True if work is currently claimed by any workqueue, scheduled
575  *	to run on that workqueue.
576  */
577 bool
578 work_pending(const struct work_struct *work)
579 {
580 
581 	return work->work_owner & 1;
582 }
583 
584 /*
585  * work_queue(work)
586  *
587  *	Return the last queue that work was queued on, or NULL if it
588  *	was never queued.
589  */
590 static struct workqueue_struct *
591 work_queue(struct work_struct *work)
592 {
593 
594 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
595 }
596 
597 /*
598  * acquire_work(work, wq)
599  *
600  *	Try to claim work for wq.  If work is already claimed, it must
601  *	be claimed by wq; return false.  If work is not already
602  *	claimed, claim it, issue a memory barrier to match any prior
603  *	release_work, and return true.
604  *
605  *	Caller must hold wq's lock.
606  */
607 static bool
608 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
609 {
610 	uintptr_t owner0, owner;
611 
612 	KASSERT(mutex_owned(&wq->wq_lock));
613 	KASSERT(((uintptr_t)wq & 1) == 0);
614 
615 	owner = (uintptr_t)wq | 1;
616 	do {
617 		owner0 = work->work_owner;
618 		if (owner0 & 1) {
619 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
620 			return false;
621 		}
622 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
623 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
624 	    owner0);
625 
626 	KASSERT(work_queue(work) == wq);
627 	membar_enter();
628 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
629 	return true;
630 }
631 
632 /*
633  * release_work(work, wq)
634  *
635  *	Issue a memory barrier to match any subsequent acquire_work and
636  *	dissociate work from wq.
637  *
638  *	Caller must hold wq's lock and work must be associated with wq.
639  */
640 static void
641 release_work(struct work_struct *work, struct workqueue_struct *wq)
642 {
643 
644 	KASSERT(work_queue(work) == wq);
645 	KASSERT(mutex_owned(&wq->wq_lock));
646 
647 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
648 	membar_exit();
649 
650 	/*
651 	 * Non-interlocked r/m/w is safe here because nobody else can
652 	 * write to this while the claimed bit is setand the workqueue
653 	 * lock is held.
654 	 */
655 	work->work_owner &= ~(uintptr_t)1;
656 }
657 
658 /*
659  * schedule_work(work)
660  *
661  *	If work is not already queued on system_wq, queue it to be run
662  *	by system_wq's worker thread when it next can.  True if it was
663  *	newly queued, false if it was already queued.  If the work was
664  *	already running, queue it to run again.
665  *
666  *	Caller must ensure work is not queued to run on a different
667  *	workqueue.
668  */
669 bool
670 schedule_work(struct work_struct *work)
671 {
672 
673 	return queue_work(system_wq, work);
674 }
675 
676 /*
677  * queue_work(wq, work)
678  *
679  *	If work is not already queued on wq, queue it to be run by wq's
680  *	worker thread when it next can.  True if it was newly queued,
681  *	false if it was already queued.  If the work was already
682  *	running, queue it to run again.
683  *
684  *	Caller must ensure work is not queued to run on a different
685  *	workqueue.
686  */
687 bool
688 queue_work(struct workqueue_struct *wq, struct work_struct *work)
689 {
690 	bool newly_queued;
691 
692 	KASSERT(wq != NULL);
693 
694 	mutex_enter(&wq->wq_lock);
695 	if (__predict_true(acquire_work(work, wq))) {
696 		/*
697 		 * It wasn't on any workqueue at all.  Put it on this
698 		 * one, and signal the worker thread that there is work
699 		 * to do.
700 		 */
701 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
702 		cv_broadcast(&wq->wq_cv);
703 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
704 		newly_queued = true;
705 	} else {
706 		/*
707 		 * It was already on this workqueue.  Nothing to do
708 		 * since it is already queued.
709 		 */
710 		newly_queued = false;
711 	}
712 	mutex_exit(&wq->wq_lock);
713 
714 	return newly_queued;
715 }
716 
717 /*
718  * cancel_work(work)
719  *
720  *	If work was queued, remove it from the queue and return true.
721  *	If work was not queued, return false.  Work may still be
722  *	running when this returns.
723  */
724 bool
725 cancel_work(struct work_struct *work)
726 {
727 	struct workqueue_struct *wq;
728 	bool cancelled_p = false;
729 
730 	/* If there's no workqueue, nothing to cancel.   */
731 	if ((wq = work_queue(work)) == NULL)
732 		goto out;
733 
734 	mutex_enter(&wq->wq_lock);
735 	if (__predict_false(work_queue(work) != wq)) {
736 		/*
737 		 * It has finished execution or been cancelled by
738 		 * another thread, and has been moved off the
739 		 * workqueue, so it's too to cancel.
740 		 */
741 		cancelled_p = false;
742 	} else {
743 		/* Check whether it's on the queue.  */
744 		if (work_claimed(work, wq)) {
745 			/*
746 			 * It is still on the queue.  Take it off the
747 			 * queue and report successful cancellation.
748 			 */
749 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
750 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
751 			release_work(work, wq);
752 			/* Can't dereference work after this point.  */
753 			cancelled_p = true;
754 		} else {
755 			/* Not on the queue.  Couldn't cancel it.  */
756 			cancelled_p = false;
757 		}
758 	}
759 	mutex_exit(&wq->wq_lock);
760 
761 out:	return cancelled_p;
762 }
763 
764 /*
765  * cancel_work_sync(work)
766  *
767  *	If work was queued, remove it from the queue and return true.
768  *	If work was not queued, return false.  Either way, if work is
769  *	currently running, wait for it to complete.
770  *
771  *	May sleep.
772  */
773 bool
774 cancel_work_sync(struct work_struct *work)
775 {
776 	struct workqueue_struct *wq;
777 	bool cancelled_p = false;
778 
779 	/* If there's no workqueue, nothing to cancel.   */
780 	if ((wq = work_queue(work)) == NULL)
781 		goto out;
782 
783 	mutex_enter(&wq->wq_lock);
784 	if (__predict_false(work_queue(work) != wq)) {
785 		/*
786 		 * It has finished execution or been cancelled by
787 		 * another thread, and has been moved off the
788 		 * workqueue, so it's too late to cancel.
789 		 */
790 		cancelled_p = false;
791 	} else {
792 		/* Check whether it's on the queue.  */
793 		if (work_claimed(work, wq)) {
794 			/*
795 			 * It is still on the queue.  Take it off the
796 			 * queue and report successful cancellation.
797 			 */
798 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
799 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
800 			release_work(work, wq);
801 			/* Can't dereference work after this point.  */
802 			cancelled_p = true;
803 		} else {
804 			/* Not on the queue.  Couldn't cancel it.  */
805 			cancelled_p = false;
806 		}
807 		/* If it's still running, wait for it to complete.  */
808 		if (wq->wq_current_work == work)
809 			wait_for_current_work(work, wq);
810 	}
811 	mutex_exit(&wq->wq_lock);
812 
813 out:	return cancelled_p;
814 }
815 
816 /*
817  * wait_for_current_work(work, wq)
818  *
819  *	wq must be currently executing work.  Wait for it to finish.
820  *
821  *	Does not dereference work.
822  */
823 static void
824 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
825 {
826 	uint64_t gen;
827 
828 	KASSERT(mutex_owned(&wq->wq_lock));
829 	KASSERT(wq->wq_current_work == work);
830 
831 	/* Wait only one generation in case it gets requeued quickly.  */
832 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
833 	gen = wq->wq_gen;
834 	do {
835 		cv_wait(&wq->wq_cv, &wq->wq_lock);
836 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
837 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
838 }
839 
840 /*
841  * Delayed work
842  */
843 
844 /*
845  * INIT_DELAYED_WORK(dw, fn)
846  *
847  *	Initialize dw for use with a workqueue to call fn in a worker
848  *	thread after a delay.  There is no corresponding destruction
849  *	operation.
850  */
851 void
852 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
853 {
854 
855 	INIT_WORK(&dw->work, fn);
856 	dw->dw_state = DELAYED_WORK_IDLE;
857 	dw->dw_resched = -1;
858 
859 	/*
860 	 * Defer callout_init until we are going to schedule the
861 	 * callout, which can then callout_destroy it, because
862 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
863 	 * we have no opportunity to call callout_destroy.
864 	 */
865 }
866 
867 /*
868  * schedule_delayed_work(dw, ticks)
869  *
870  *	If it is not currently scheduled, schedule dw to run after
871  *	ticks on system_wq.  If currently executing and not already
872  *	rescheduled, reschedule it.  True if it was newly scheduled,
873  *	false if it was already scheduled.
874  *
875  *	If ticks == 0, queue it to run as soon as the worker can,
876  *	without waiting for the next callout tick to run.
877  */
878 bool
879 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
880 {
881 
882 	return queue_delayed_work(system_wq, dw, ticks);
883 }
884 
885 /*
886  * dw_callout_init(wq, dw)
887  *
888  *	Initialize the callout of dw and transition to
889  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
890  */
891 static void
892 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
893 {
894 
895 	KASSERT(mutex_owned(&wq->wq_lock));
896 	KASSERT(work_queue(&dw->work) == wq);
897 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
898 
899 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
900 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
901 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
902 	dw->dw_state = DELAYED_WORK_SCHEDULED;
903 }
904 
905 /*
906  * dw_callout_destroy(wq, dw)
907  *
908  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
909  */
910 static void
911 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
912 {
913 
914 	KASSERT(mutex_owned(&wq->wq_lock));
915 	KASSERT(work_queue(&dw->work) == wq);
916 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
917 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
918 	    dw->dw_state == DELAYED_WORK_CANCELLED);
919 
920 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
921 	callout_destroy(&dw->dw_callout);
922 	dw->dw_resched = -1;
923 	dw->dw_state = DELAYED_WORK_IDLE;
924 }
925 
926 /*
927  * cancel_delayed_work_done(wq, dw)
928  *
929  *	Complete cancellation of a delayed work: transition from
930  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
931  *	workqueue.  Caller must not dereference dw after this returns.
932  */
933 static void
934 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
935 {
936 
937 	KASSERT(mutex_owned(&wq->wq_lock));
938 	KASSERT(work_queue(&dw->work) == wq);
939 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
940 
941 	dw_callout_destroy(wq, dw);
942 	release_work(&dw->work, wq);
943 	/* Can't dereference dw after this point.  */
944 }
945 
946 /*
947  * queue_delayed_work(wq, dw, ticks)
948  *
949  *	If it is not currently scheduled, schedule dw to run after
950  *	ticks on wq.  If currently queued, remove it from the queue
951  *	first.
952  *
953  *	If ticks == 0, queue it to run as soon as the worker can,
954  *	without waiting for the next callout tick to run.
955  */
956 bool
957 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
958     unsigned long ticks)
959 {
960 	bool newly_queued;
961 
962 	mutex_enter(&wq->wq_lock);
963 	if (__predict_true(acquire_work(&dw->work, wq))) {
964 		/*
965 		 * It wasn't on any workqueue at all.  Schedule it to
966 		 * run on this one.
967 		 */
968 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
969 		if (ticks == 0) {
970 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
971 			    work_entry);
972 			cv_broadcast(&wq->wq_cv);
973 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
974 		} else {
975 			/*
976 			 * Initialize a callout and schedule to run
977 			 * after a delay.
978 			 */
979 			dw_callout_init(wq, dw);
980 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
981 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
982 		}
983 		newly_queued = true;
984 	} else {
985 		/* It was already on this workqueue.  */
986 		switch (dw->dw_state) {
987 		case DELAYED_WORK_IDLE:
988 		case DELAYED_WORK_SCHEDULED:
989 		case DELAYED_WORK_RESCHEDULED:
990 			/* On the queue or already scheduled.  Leave it.  */
991 			newly_queued = false;
992 			break;
993 		case DELAYED_WORK_CANCELLED:
994 			/*
995 			 * Scheduled and the callout began, but it was
996 			 * cancelled.  Reschedule it.
997 			 */
998 			if (ticks == 0) {
999 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1000 				SDT_PROBE2(sdt, linux, work, queue,
1001 				    &dw->work, wq);
1002 			} else {
1003 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1004 				dw->dw_resched = MIN(INT_MAX, ticks);
1005 				SDT_PROBE3(sdt, linux, work, schedule,
1006 				    dw, wq, ticks);
1007 			}
1008 			newly_queued = true;
1009 			break;
1010 		default:
1011 			panic("invalid delayed work state: %d",
1012 			    dw->dw_state);
1013 		}
1014 	}
1015 	mutex_exit(&wq->wq_lock);
1016 
1017 	return newly_queued;
1018 }
1019 
1020 /*
1021  * mod_delayed_work(wq, dw, ticks)
1022  *
1023  *	Schedule dw to run after ticks.  If scheduled or queued,
1024  *	reschedule.  If ticks == 0, run without delay.
1025  *
1026  *	True if it modified the timer of an already scheduled work,
1027  *	false if it newly scheduled the work.
1028  */
1029 bool
1030 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
1031     unsigned long ticks)
1032 {
1033 	bool timer_modified;
1034 
1035 	mutex_enter(&wq->wq_lock);
1036 	if (acquire_work(&dw->work, wq)) {
1037 		/*
1038 		 * It wasn't on any workqueue at all.  Schedule it to
1039 		 * run on this one.
1040 		 */
1041 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
1042 		if (ticks == 0) {
1043 			/*
1044 			 * Run immediately: put it on the queue and
1045 			 * signal the worker thread.
1046 			 */
1047 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1048 			    work_entry);
1049 			cv_broadcast(&wq->wq_cv);
1050 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
1051 		} else {
1052 			/*
1053 			 * Initialize a callout and schedule to run
1054 			 * after a delay.
1055 			 */
1056 			dw_callout_init(wq, dw);
1057 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1058 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
1059 		}
1060 		timer_modified = false;
1061 	} else {
1062 		/* It was already on this workqueue.  */
1063 		switch (dw->dw_state) {
1064 		case DELAYED_WORK_IDLE:
1065 			/* On the queue.  */
1066 			if (ticks == 0) {
1067 				/* Leave it be.  */
1068 				SDT_PROBE2(sdt, linux, work, cancel,
1069 				    &dw->work, wq);
1070 				SDT_PROBE2(sdt, linux, work, queue,
1071 				    &dw->work, wq);
1072 			} else {
1073 				/* Remove from the queue and schedule.  */
1074 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1075 				    work_entry);
1076 				dw_callout_init(wq, dw);
1077 				callout_schedule(&dw->dw_callout,
1078 				    MIN(INT_MAX, ticks));
1079 				SDT_PROBE2(sdt, linux, work, cancel,
1080 				    &dw->work, wq);
1081 				SDT_PROBE3(sdt, linux, work, schedule,
1082 				    dw, wq, ticks);
1083 			}
1084 			timer_modified = true;
1085 			break;
1086 		case DELAYED_WORK_SCHEDULED:
1087 			/*
1088 			 * It is scheduled to run after a delay.  Try
1089 			 * to stop it and reschedule it; if we can't,
1090 			 * either reschedule it or cancel it to put it
1091 			 * on the queue, and inform the callout.
1092 			 */
1093 			if (callout_stop(&dw->dw_callout)) {
1094 				/* Can't stop, callout has begun.  */
1095 				if (ticks == 0) {
1096 					/*
1097 					 * We don't actually need to do
1098 					 * anything.  The callout will
1099 					 * queue it as soon as it gets
1100 					 * the lock.
1101 					 */
1102 					SDT_PROBE2(sdt, linux, work, cancel,
1103 					    &dw->work, wq);
1104 					SDT_PROBE2(sdt, linux, work, queue,
1105 					    &dw->work, wq);
1106 				} else {
1107 					/* Ask the callout to reschedule.  */
1108 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1109 					dw->dw_resched = MIN(INT_MAX, ticks);
1110 					SDT_PROBE2(sdt, linux, work, cancel,
1111 					    &dw->work, wq);
1112 					SDT_PROBE3(sdt, linux, work, schedule,
1113 					    dw, wq, ticks);
1114 				}
1115 			} else {
1116 				/* We stopped the callout before it began.  */
1117 				if (ticks == 0) {
1118 					/*
1119 					 * Run immediately: destroy the
1120 					 * callout, put it on the
1121 					 * queue, and signal the worker
1122 					 * thread.
1123 					 */
1124 					dw_callout_destroy(wq, dw);
1125 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1126 					    &dw->work, work_entry);
1127 					cv_broadcast(&wq->wq_cv);
1128 					SDT_PROBE2(sdt, linux, work, cancel,
1129 					    &dw->work, wq);
1130 					SDT_PROBE2(sdt, linux, work, queue,
1131 					    &dw->work, wq);
1132 				} else {
1133 					/*
1134 					 * Reschedule the callout.  No
1135 					 * state change.
1136 					 */
1137 					callout_schedule(&dw->dw_callout,
1138 					    MIN(INT_MAX, ticks));
1139 					SDT_PROBE2(sdt, linux, work, cancel,
1140 					    &dw->work, wq);
1141 					SDT_PROBE3(sdt, linux, work, schedule,
1142 					    dw, wq, ticks);
1143 				}
1144 			}
1145 			timer_modified = true;
1146 			break;
1147 		case DELAYED_WORK_RESCHEDULED:
1148 			/*
1149 			 * Someone rescheduled it after the callout
1150 			 * started but before the poor thing even had a
1151 			 * chance to acquire the lock.
1152 			 */
1153 			if (ticks == 0) {
1154 				/*
1155 				 * We can just switch back to
1156 				 * DELAYED_WORK_SCHEDULED so that the
1157 				 * callout will queue the work as soon
1158 				 * as it gets the lock.
1159 				 */
1160 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1161 				dw->dw_resched = -1;
1162 				SDT_PROBE2(sdt, linux, work, cancel,
1163 				    &dw->work, wq);
1164 				SDT_PROBE2(sdt, linux, work, queue,
1165 				    &dw->work, wq);
1166 			} else {
1167 				/* Change the rescheduled time.  */
1168 				dw->dw_resched = ticks;
1169 				SDT_PROBE2(sdt, linux, work, cancel,
1170 				    &dw->work, wq);
1171 				SDT_PROBE3(sdt, linux, work, schedule,
1172 				    dw, wq, ticks);
1173 			}
1174 			timer_modified = true;
1175 			break;
1176 		case DELAYED_WORK_CANCELLED:
1177 			/*
1178 			 * Someone cancelled it after the callout
1179 			 * started but before the poor thing even had a
1180 			 * chance to acquire the lock.
1181 			 */
1182 			if (ticks == 0) {
1183 				/*
1184 				 * We can just switch back to
1185 				 * DELAYED_WORK_SCHEDULED so that the
1186 				 * callout will queue the work as soon
1187 				 * as it gets the lock.
1188 				 */
1189 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1190 				SDT_PROBE2(sdt, linux, work, queue,
1191 				    &dw->work, wq);
1192 			} else {
1193 				/* Ask it to reschedule.  */
1194 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1195 				dw->dw_resched = MIN(INT_MAX, ticks);
1196 				SDT_PROBE3(sdt, linux, work, schedule,
1197 				    dw, wq, ticks);
1198 			}
1199 			timer_modified = false;
1200 			break;
1201 		default:
1202 			panic("invalid delayed work state: %d", dw->dw_state);
1203 		}
1204 	}
1205 	mutex_exit(&wq->wq_lock);
1206 
1207 	return timer_modified;
1208 }
1209 
1210 /*
1211  * cancel_delayed_work(dw)
1212  *
1213  *	If work was scheduled or queued, remove it from the schedule or
1214  *	queue and return true.  If work was not scheduled or queued,
1215  *	return false.  Note that work may already be running; if it
1216  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1217  *	will return false, and either way, cancel_delayed_work will NOT
1218  *	wait for the work to complete.
1219  */
1220 bool
1221 cancel_delayed_work(struct delayed_work *dw)
1222 {
1223 	struct workqueue_struct *wq;
1224 	bool cancelled_p;
1225 
1226 	/* If there's no workqueue, nothing to cancel.   */
1227 	if ((wq = work_queue(&dw->work)) == NULL)
1228 		return false;
1229 
1230 	mutex_enter(&wq->wq_lock);
1231 	if (__predict_false(work_queue(&dw->work) != wq)) {
1232 		cancelled_p = false;
1233 	} else {
1234 		switch (dw->dw_state) {
1235 		case DELAYED_WORK_IDLE:
1236 			/*
1237 			 * It is either on the queue or already running
1238 			 * or both.
1239 			 */
1240 			if (work_claimed(&dw->work, wq)) {
1241 				/* On the queue.  Remove and release.  */
1242 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1243 				    work_entry);
1244 				SDT_PROBE2(sdt, linux, work, cancel,
1245 				    &dw->work, wq);
1246 				release_work(&dw->work, wq);
1247 				/* Can't dereference dw after this point.  */
1248 				cancelled_p = true;
1249 			} else {
1250 				/* Not on the queue, so didn't cancel.  */
1251 				cancelled_p = false;
1252 			}
1253 			break;
1254 		case DELAYED_WORK_SCHEDULED:
1255 			/*
1256 			 * If it is scheduled, mark it cancelled and
1257 			 * try to stop the callout before it starts.
1258 			 *
1259 			 * If it's too late and the callout has already
1260 			 * begun to execute, tough.
1261 			 *
1262 			 * If we stopped the callout before it started,
1263 			 * however, then destroy the callout and
1264 			 * dissociate it from the workqueue ourselves.
1265 			 */
1266 			dw->dw_state = DELAYED_WORK_CANCELLED;
1267 			cancelled_p = true;
1268 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1269 			if (!callout_stop(&dw->dw_callout))
1270 				cancel_delayed_work_done(wq, dw);
1271 			break;
1272 		case DELAYED_WORK_RESCHEDULED:
1273 			/*
1274 			 * If it is being rescheduled, the callout has
1275 			 * already fired.  We must ask it to cancel.
1276 			 */
1277 			dw->dw_state = DELAYED_WORK_CANCELLED;
1278 			dw->dw_resched = -1;
1279 			cancelled_p = true;
1280 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1281 			break;
1282 		case DELAYED_WORK_CANCELLED:
1283 			/*
1284 			 * If it is being cancelled, the callout has
1285 			 * already fired.  There is nothing more for us
1286 			 * to do.  Someone else claims credit for
1287 			 * cancelling it.
1288 			 */
1289 			cancelled_p = false;
1290 			break;
1291 		default:
1292 			panic("invalid delayed work state: %d",
1293 			    dw->dw_state);
1294 		}
1295 	}
1296 	mutex_exit(&wq->wq_lock);
1297 
1298 	return cancelled_p;
1299 }
1300 
1301 /*
1302  * cancel_delayed_work_sync(dw)
1303  *
1304  *	If work was scheduled or queued, remove it from the schedule or
1305  *	queue and return true.  If work was not scheduled or queued,
1306  *	return false.  Note that work may already be running; if it
1307  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1308  *	will return false; either way, wait for it to complete.
1309  */
1310 bool
1311 cancel_delayed_work_sync(struct delayed_work *dw)
1312 {
1313 	struct workqueue_struct *wq;
1314 	bool cancelled_p;
1315 
1316 	/* If there's no workqueue, nothing to cancel.  */
1317 	if ((wq = work_queue(&dw->work)) == NULL)
1318 		return false;
1319 
1320 	mutex_enter(&wq->wq_lock);
1321 	if (__predict_false(work_queue(&dw->work) != wq)) {
1322 		cancelled_p = false;
1323 	} else {
1324 		switch (dw->dw_state) {
1325 		case DELAYED_WORK_IDLE:
1326 			/*
1327 			 * It is either on the queue or already running
1328 			 * or both.
1329 			 */
1330 			if (work_claimed(&dw->work, wq)) {
1331 				/* On the queue.  Remove and release.  */
1332 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1333 				    work_entry);
1334 				SDT_PROBE2(sdt, linux, work, cancel,
1335 				    &dw->work, wq);
1336 				release_work(&dw->work, wq);
1337 				/* Can't dereference dw after this point.  */
1338 				cancelled_p = true;
1339 			} else {
1340 				/* Not on the queue, so didn't cancel. */
1341 				cancelled_p = false;
1342 			}
1343 			/* If it's still running, wait for it to complete.  */
1344 			if (wq->wq_current_work == &dw->work)
1345 				wait_for_current_work(&dw->work, wq);
1346 			break;
1347 		case DELAYED_WORK_SCHEDULED:
1348 			/*
1349 			 * If it is scheduled, mark it cancelled and
1350 			 * try to stop the callout before it starts.
1351 			 *
1352 			 * If it's too late and the callout has already
1353 			 * begun to execute, we must wait for it to
1354 			 * complete.  But we got in soon enough to ask
1355 			 * the callout not to run, so we successfully
1356 			 * cancelled it in that case.
1357 			 *
1358 			 * If we stopped the callout before it started,
1359 			 * then we must destroy the callout and
1360 			 * dissociate it from the workqueue ourselves.
1361 			 */
1362 			dw->dw_state = DELAYED_WORK_CANCELLED;
1363 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1364 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1365 				cancel_delayed_work_done(wq, dw);
1366 			cancelled_p = true;
1367 			break;
1368 		case DELAYED_WORK_RESCHEDULED:
1369 			/*
1370 			 * If it is being rescheduled, the callout has
1371 			 * already fired.  We must ask it to cancel and
1372 			 * wait for it to complete.
1373 			 */
1374 			dw->dw_state = DELAYED_WORK_CANCELLED;
1375 			dw->dw_resched = -1;
1376 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1377 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1378 			cancelled_p = true;
1379 			break;
1380 		case DELAYED_WORK_CANCELLED:
1381 			/*
1382 			 * If it is being cancelled, the callout has
1383 			 * already fired.  We need only wait for it to
1384 			 * complete.  Someone else, however, claims
1385 			 * credit for cancelling it.
1386 			 */
1387 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1388 			cancelled_p = false;
1389 			break;
1390 		default:
1391 			panic("invalid delayed work state: %d",
1392 			    dw->dw_state);
1393 		}
1394 	}
1395 	mutex_exit(&wq->wq_lock);
1396 
1397 	return cancelled_p;
1398 }
1399 
1400 /*
1401  * Flush
1402  */
1403 
1404 /*
1405  * flush_scheduled_work()
1406  *
1407  *	Wait for all work queued on system_wq to complete.  This does
1408  *	not include delayed work.
1409  */
1410 void
1411 flush_scheduled_work(void)
1412 {
1413 
1414 	flush_workqueue(system_wq);
1415 }
1416 
1417 /*
1418  * flush_workqueue_locked(wq)
1419  *
1420  *	Wait for all work queued on wq to complete.  This does not
1421  *	include delayed work.  True if there was work to be flushed,
1422  *	false it the queue was empty.
1423  *
1424  *	Caller must hold wq's lock.
1425  */
1426 static bool
1427 flush_workqueue_locked(struct workqueue_struct *wq)
1428 {
1429 	uint64_t gen;
1430 	bool work_queued = false;
1431 
1432 	KASSERT(mutex_owned(&wq->wq_lock));
1433 
1434 	/* Get the current generation number.  */
1435 	gen = wq->wq_gen;
1436 
1437 	/*
1438 	 * If there's a batch of work in progress, we must wait for the
1439 	 * worker thread to finish that batch.
1440 	 */
1441 	if (wq->wq_current_work != NULL) {
1442 		gen++;
1443 		work_queued = true;
1444 	}
1445 
1446 	/*
1447 	 * If there's any work yet to be claimed from the queue by the
1448 	 * worker thread, we must wait for it to finish one more batch
1449 	 * too.
1450 	 */
1451 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue)) {
1452 		gen++;
1453 		work_queued = true;
1454 	}
1455 
1456 	/* Wait until the generation number has caught up.  */
1457 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1458 	while (wq->wq_gen < gen)
1459 		cv_wait(&wq->wq_cv, &wq->wq_lock);
1460 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1461 
1462 	/* Return whether we had to wait for anything.  */
1463 	return work_queued;
1464 }
1465 
1466 /*
1467  * flush_workqueue(wq)
1468  *
1469  *	Wait for all work queued on wq to complete.  This does not
1470  *	include delayed work.
1471  */
1472 void
1473 flush_workqueue(struct workqueue_struct *wq)
1474 {
1475 
1476 	mutex_enter(&wq->wq_lock);
1477 	(void)flush_workqueue_locked(wq);
1478 	mutex_exit(&wq->wq_lock);
1479 }
1480 
1481 /*
1482  * drain_workqueue(wq)
1483  *
1484  *	Repeatedly flush wq until there is no more work.
1485  */
1486 void
1487 drain_workqueue(struct workqueue_struct *wq)
1488 {
1489 	unsigned ntries = 0;
1490 
1491 	mutex_enter(&wq->wq_lock);
1492 	while (flush_workqueue_locked(wq)) {
1493 		if (ntries++ == 10 || (ntries % 100) == 0)
1494 			printf("linux workqueue %s"
1495 			    ": still clogged after %u flushes",
1496 			    wq->wq_name, ntries);
1497 	}
1498 	mutex_exit(&wq->wq_lock);
1499 }
1500 
1501 /*
1502  * flush_work(work)
1503  *
1504  *	If work is queued or currently executing, wait for it to
1505  *	complete.
1506  *
1507  *	Return true if we waited to flush it, false if it was already
1508  *	idle.
1509  */
1510 bool
1511 flush_work(struct work_struct *work)
1512 {
1513 	struct workqueue_struct *wq;
1514 
1515 	/* If there's no workqueue, nothing to flush.  */
1516 	if ((wq = work_queue(work)) == NULL)
1517 		return false;
1518 
1519 	flush_workqueue(wq);
1520 	return true;
1521 }
1522 
1523 /*
1524  * flush_delayed_work(dw)
1525  *
1526  *	If dw is scheduled to run after a delay, queue it immediately
1527  *	instead.  Then, if dw is queued or currently executing, wait
1528  *	for it to complete.
1529  */
1530 bool
1531 flush_delayed_work(struct delayed_work *dw)
1532 {
1533 	struct workqueue_struct *wq;
1534 	bool waited = false;
1535 
1536 	/* If there's no workqueue, nothing to flush.  */
1537 	if ((wq = work_queue(&dw->work)) == NULL)
1538 		return false;
1539 
1540 	mutex_enter(&wq->wq_lock);
1541 	if (__predict_false(work_queue(&dw->work) != wq)) {
1542 		/*
1543 		 * Moved off the queue already (and possibly to another
1544 		 * queue, though that would be ill-advised), so it must
1545 		 * have completed, and we have nothing more to do.
1546 		 */
1547 		waited = false;
1548 	} else {
1549 		switch (dw->dw_state) {
1550 		case DELAYED_WORK_IDLE:
1551 			/*
1552 			 * It has a workqueue assigned and the callout
1553 			 * is idle, so it must be in progress or on the
1554 			 * queue.  In that case, we'll wait for it to
1555 			 * complete.
1556 			 */
1557 			break;
1558 		case DELAYED_WORK_SCHEDULED:
1559 		case DELAYED_WORK_RESCHEDULED:
1560 		case DELAYED_WORK_CANCELLED:
1561 			/*
1562 			 * The callout is scheduled, and may have even
1563 			 * started.  Mark it as scheduled so that if
1564 			 * the callout has fired it will queue the work
1565 			 * itself.  Try to stop the callout -- if we
1566 			 * can, queue the work now; if we can't, wait
1567 			 * for the callout to complete, which entails
1568 			 * queueing it.
1569 			 */
1570 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1571 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1572 				/*
1573 				 * We stopped it before it ran.  No
1574 				 * state change in the interim is
1575 				 * possible.  Destroy the callout and
1576 				 * queue it ourselves.
1577 				 */
1578 				KASSERT(dw->dw_state ==
1579 				    DELAYED_WORK_SCHEDULED);
1580 				dw_callout_destroy(wq, dw);
1581 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1582 				    work_entry);
1583 				cv_broadcast(&wq->wq_cv);
1584 				SDT_PROBE2(sdt, linux, work, queue,
1585 				    &dw->work, wq);
1586 			}
1587 			break;
1588 		default:
1589 			panic("invalid delayed work state: %d", dw->dw_state);
1590 		}
1591 		/*
1592 		 * Waiting for the whole queue to flush is overkill,
1593 		 * but doesn't hurt.
1594 		 */
1595 		(void)flush_workqueue_locked(wq);
1596 		waited = true;
1597 	}
1598 	mutex_exit(&wq->wq_lock);
1599 
1600 	return waited;
1601 }
1602 
1603 /*
1604  * delayed_work_pending(dw)
1605  *
1606  *	True if dw is currently scheduled to execute, false if not.
1607  */
1608 bool
1609 delayed_work_pending(const struct delayed_work *dw)
1610 {
1611 
1612 	return work_pending(&dw->work);
1613 }
1614