xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision 181b03448d698fe3ed1b0ba2df5182b888193486)
1 /*	$NetBSD: linux_work.c,v 1.52 2021/12/19 01:51:02 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.52 2021/12/19 01:51:02 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49 
50 #include <linux/workqueue.h>
51 
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54 
55 struct workqueue_struct {
56 	kmutex_t		wq_lock;
57 	kcondvar_t		wq_cv;
58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
59 	struct work_head	wq_queue;   /* work to run */
60 	struct work_head	wq_dqueue;  /* delayed work to run now */
61 	struct work_struct	*wq_current_work;
62 	int			wq_flags;
63 	bool			wq_dying;
64 	uint64_t		wq_gen;
65 	struct lwp		*wq_lwp;
66 	const char		*wq_name;
67 };
68 
69 static void __dead	linux_workqueue_thread(void *);
70 static void		linux_workqueue_timeout(void *);
71 static bool		work_claimed(struct work_struct *,
72 			    struct workqueue_struct *);
73 static struct workqueue_struct *
74 			work_queue(struct work_struct *);
75 static bool		acquire_work(struct work_struct *,
76 			    struct workqueue_struct *);
77 static void		release_work(struct work_struct *,
78 			    struct workqueue_struct *);
79 static void		wait_for_current_work(struct work_struct *,
80 			    struct workqueue_struct *);
81 static void		dw_callout_init(struct workqueue_struct *,
82 			    struct delayed_work *);
83 static void		dw_callout_destroy(struct workqueue_struct *,
84 			    struct delayed_work *);
85 static void		cancel_delayed_work_done(struct workqueue_struct *,
86 			    struct delayed_work *);
87 
88 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
89     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
90 SDT_PROBE_DEFINE2(sdt, linux, work, release,
91     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
92 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
93     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
94 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
95     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
96 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
97     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
98     "unsigned long"/*ticks*/);
99 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
101 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
102     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
103 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
104     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
105 SDT_PROBE_DEFINE2(sdt, linux, work, run,
106     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
107 SDT_PROBE_DEFINE2(sdt, linux, work, done,
108     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
109 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
110     "struct workqueue_struct *"/*wq*/);
111 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
112     "struct workqueue_struct *"/*wq*/);
113 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
114     "struct workqueue_struct *"/*wq*/);
115 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
116     "struct workqueue_struct *"/*wq*/);
117 
118 static specificdata_key_t workqueue_key __read_mostly;
119 
120 struct workqueue_struct	*system_wq __read_mostly;
121 struct workqueue_struct	*system_long_wq __read_mostly;
122 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
123 struct workqueue_struct	*system_unbound_wq __read_mostly;
124 
125 static inline uintptr_t
126 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
127 {
128 
129 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
130 }
131 
132 /*
133  * linux_workqueue_init()
134  *
135  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
136  *	NetBSD error on failure.
137  */
138 static int
139 linux_workqueue_init0(void)
140 {
141 	int error;
142 
143 	error = lwp_specific_key_create(&workqueue_key, NULL);
144 	if (error)
145 		goto fail0;
146 
147 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
148 	if (system_wq == NULL) {
149 		error = ENOMEM;
150 		goto fail1;
151 	}
152 
153 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
154 	if (system_long_wq == NULL) {
155 		error = ENOMEM;
156 		goto fail2;
157 	}
158 
159 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
160 	if (system_power_efficient_wq == NULL) {
161 		error = ENOMEM;
162 		goto fail3;
163 	}
164 
165 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
166 	if (system_unbound_wq == NULL) {
167 		error = ENOMEM;
168 		goto fail4;
169 	}
170 
171 	return 0;
172 
173 fail5: __unused
174 	destroy_workqueue(system_unbound_wq);
175 fail4:	destroy_workqueue(system_power_efficient_wq);
176 fail3:	destroy_workqueue(system_long_wq);
177 fail2:	destroy_workqueue(system_wq);
178 fail1:	lwp_specific_key_delete(workqueue_key);
179 fail0:	KASSERT(error);
180 	return error;
181 }
182 
183 /*
184  * linux_workqueue_fini()
185  *
186  *	Destroy the Linux workqueue subsystem.  Never fails.
187  */
188 static void
189 linux_workqueue_fini0(void)
190 {
191 
192 	destroy_workqueue(system_power_efficient_wq);
193 	destroy_workqueue(system_long_wq);
194 	destroy_workqueue(system_wq);
195 	lwp_specific_key_delete(workqueue_key);
196 }
197 
198 #ifndef _MODULE
199 static ONCE_DECL(linux_workqueue_init_once);
200 #endif
201 
202 int
203 linux_workqueue_init(void)
204 {
205 #ifdef _MODULE
206 	return linux_workqueue_init0();
207 #else
208 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
209 #endif
210 }
211 
212 void
213 linux_workqueue_fini(void)
214 {
215 #ifdef _MODULE
216 	return linux_workqueue_fini0();
217 #else
218 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
219 #endif
220 }
221 
222 /*
223  * Workqueues
224  */
225 
226 /*
227  * alloc_workqueue(name, flags, max_active)
228  *
229  *	Create a workqueue of the given name.  max_active is the
230  *	maximum number of work items in flight, or 0 for the default.
231  *	Return NULL on failure, pointer to struct workqueue_struct
232  *	object on success.
233  */
234 struct workqueue_struct *
235 alloc_workqueue(const char *name, int flags, unsigned max_active)
236 {
237 	struct workqueue_struct *wq;
238 	int error;
239 
240 	KASSERT(max_active == 0 || max_active == 1);
241 
242 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
243 
244 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
245 	cv_init(&wq->wq_cv, name);
246 	TAILQ_INIT(&wq->wq_delayed);
247 	TAILQ_INIT(&wq->wq_queue);
248 	TAILQ_INIT(&wq->wq_dqueue);
249 	wq->wq_current_work = NULL;
250 	wq->wq_flags = 0;
251 	wq->wq_dying = false;
252 	wq->wq_gen = 0;
253 	wq->wq_lwp = NULL;
254 	wq->wq_name = name;
255 
256 	error = kthread_create(PRI_NONE,
257 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
258 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
259 	if (error)
260 		goto fail0;
261 
262 	return wq;
263 
264 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
265 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
266 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
267 	cv_destroy(&wq->wq_cv);
268 	mutex_destroy(&wq->wq_lock);
269 	kmem_free(wq, sizeof(*wq));
270 	return NULL;
271 }
272 
273 /*
274  * alloc_ordered_workqueue(name, flags)
275  *
276  *	Same as alloc_workqueue(name, flags, 1).
277  */
278 struct workqueue_struct *
279 alloc_ordered_workqueue(const char *name, int flags)
280 {
281 
282 	return alloc_workqueue(name, flags, 1);
283 }
284 
285 /*
286  * destroy_workqueue(wq)
287  *
288  *	Destroy a workqueue created with wq.  Cancel any pending
289  *	delayed work.  Wait for all queued work to complete.
290  *
291  *	May sleep.
292  */
293 void
294 destroy_workqueue(struct workqueue_struct *wq)
295 {
296 
297 	/*
298 	 * Cancel all delayed work.  We do this first because any
299 	 * delayed work that that has already timed out, which we can't
300 	 * cancel, may have queued new work.
301 	 */
302 	mutex_enter(&wq->wq_lock);
303 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
304 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
305 
306 		KASSERT(work_queue(&dw->work) == wq);
307 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
308 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
309 			dw->dw_state == DELAYED_WORK_CANCELLED),
310 		    "delayed work %p in bad state: %d",
311 		    dw, dw->dw_state);
312 
313 		/*
314 		 * Mark it cancelled and try to stop the callout before
315 		 * it starts.
316 		 *
317 		 * If it's too late and the callout has already begun
318 		 * to execute, then it will notice that we asked to
319 		 * cancel it and remove itself from the queue before
320 		 * returning.
321 		 *
322 		 * If we stopped the callout before it started,
323 		 * however, then we can safely destroy the callout and
324 		 * dissociate it from the workqueue ourselves.
325 		 */
326 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
327 		dw->dw_state = DELAYED_WORK_CANCELLED;
328 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
329 			cancel_delayed_work_done(wq, dw);
330 	}
331 	mutex_exit(&wq->wq_lock);
332 
333 	/*
334 	 * At this point, no new work can be put on the queue.
335 	 */
336 
337 	/* Tell the thread to exit.  */
338 	mutex_enter(&wq->wq_lock);
339 	wq->wq_dying = true;
340 	cv_broadcast(&wq->wq_cv);
341 	mutex_exit(&wq->wq_lock);
342 
343 	/* Wait for it to exit.  */
344 	(void)kthread_join(wq->wq_lwp);
345 
346 	KASSERT(wq->wq_dying);
347 	KASSERT(wq->wq_flags == 0);
348 	KASSERT(wq->wq_current_work == NULL);
349 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
350 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
351 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
352 	cv_destroy(&wq->wq_cv);
353 	mutex_destroy(&wq->wq_lock);
354 
355 	kmem_free(wq, sizeof(*wq));
356 }
357 
358 /*
359  * Work thread and callout
360  */
361 
362 /*
363  * linux_workqueue_thread(cookie)
364  *
365  *	Main function for a workqueue's worker thread.  Waits until
366  *	there is work queued, grabs a batch of work off the queue,
367  *	executes it all, bumps the generation number, and repeats,
368  *	until dying.
369  */
370 static void __dead
371 linux_workqueue_thread(void *cookie)
372 {
373 	struct workqueue_struct *const wq = cookie;
374 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
375 	struct work_struct marker, *work;
376 	unsigned i;
377 
378 	lwp_setspecific(workqueue_key, wq);
379 
380 	mutex_enter(&wq->wq_lock);
381 	for (;;) {
382 		/*
383 		 * Wait until there's activity.  If there's no work and
384 		 * we're dying, stop here.
385 		 */
386 		if (TAILQ_EMPTY(&wq->wq_queue) &&
387 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
388 			if (wq->wq_dying)
389 				break;
390 			cv_wait(&wq->wq_cv, &wq->wq_lock);
391 			continue;
392 		}
393 
394 		/*
395 		 * Start a batch of work.  Use a marker to delimit when
396 		 * the batch ends so we can advance the generation
397 		 * after the batch.
398 		 */
399 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
400 		for (i = 0; i < 2; i++) {
401 			if (TAILQ_EMPTY(q[i]))
402 				continue;
403 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
404 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
405 				void (*func)(struct work_struct *);
406 
407 				KASSERT(work_queue(work) == wq);
408 				KASSERT(work_claimed(work, wq));
409 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
410 					container_of(work, struct delayed_work,
411 					    work)->dw_state ==
412 					DELAYED_WORK_IDLE),
413 				    "delayed work %p queued and scheduled",
414 				    work);
415 
416 				TAILQ_REMOVE(q[i], work, work_entry);
417 				KASSERT(wq->wq_current_work == NULL);
418 				wq->wq_current_work = work;
419 				func = work->func;
420 				release_work(work, wq);
421 				/* Can't dereference work after this point.  */
422 
423 				mutex_exit(&wq->wq_lock);
424 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
425 				(*func)(work);
426 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
427 				mutex_enter(&wq->wq_lock);
428 
429 				KASSERT(wq->wq_current_work == work);
430 				wq->wq_current_work = NULL;
431 				cv_broadcast(&wq->wq_cv);
432 			}
433 			TAILQ_REMOVE(q[i], &marker, work_entry);
434 		}
435 
436 		/* Notify flush that we've completed a batch of work.  */
437 		wq->wq_gen++;
438 		cv_broadcast(&wq->wq_cv);
439 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
440 	}
441 	mutex_exit(&wq->wq_lock);
442 
443 	kthread_exit(0);
444 }
445 
446 /*
447  * linux_workqueue_timeout(cookie)
448  *
449  *	Delayed work timeout callback.
450  *
451  *	- If scheduled, queue it.
452  *	- If rescheduled, callout_schedule ourselves again.
453  *	- If cancelled, destroy the callout and release the work from
454  *        the workqueue.
455  */
456 static void
457 linux_workqueue_timeout(void *cookie)
458 {
459 	struct delayed_work *const dw = cookie;
460 	struct workqueue_struct *const wq = work_queue(&dw->work);
461 
462 	KASSERTMSG(wq != NULL,
463 	    "delayed work %p state %d resched %d",
464 	    dw, dw->dw_state, dw->dw_resched);
465 
466 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
467 
468 	mutex_enter(&wq->wq_lock);
469 	KASSERT(work_queue(&dw->work) == wq);
470 	switch (dw->dw_state) {
471 	case DELAYED_WORK_IDLE:
472 		panic("delayed work callout uninitialized: %p", dw);
473 	case DELAYED_WORK_SCHEDULED:
474 		dw_callout_destroy(wq, dw);
475 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
476 		cv_broadcast(&wq->wq_cv);
477 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
478 		break;
479 	case DELAYED_WORK_RESCHEDULED:
480 		KASSERT(dw->dw_resched >= 0);
481 		callout_schedule(&dw->dw_callout, dw->dw_resched);
482 		dw->dw_state = DELAYED_WORK_SCHEDULED;
483 		dw->dw_resched = -1;
484 		break;
485 	case DELAYED_WORK_CANCELLED:
486 		cancel_delayed_work_done(wq, dw);
487 		/* Can't dereference dw after this point.  */
488 		goto out;
489 	default:
490 		panic("delayed work callout in bad state: %p", dw);
491 	}
492 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
493 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
494 out:	mutex_exit(&wq->wq_lock);
495 }
496 
497 /*
498  * current_work()
499  *
500  *	If in a workqueue worker thread, return the work it is
501  *	currently executing.  Otherwise return NULL.
502  */
503 struct work_struct *
504 current_work(void)
505 {
506 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
507 
508 	/* If we're not a workqueue thread, then there's no work.  */
509 	if (wq == NULL)
510 		return NULL;
511 
512 	/*
513 	 * Otherwise, this should be possible only while work is in
514 	 * progress.  Return the current work item.
515 	 */
516 	KASSERT(wq->wq_current_work != NULL);
517 	return wq->wq_current_work;
518 }
519 
520 /*
521  * Work
522  */
523 
524 /*
525  * INIT_WORK(work, fn)
526  *
527  *	Initialize work for use with a workqueue to call fn in a worker
528  *	thread.  There is no corresponding destruction operation.
529  */
530 void
531 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
532 {
533 
534 	work->work_owner = 0;
535 	work->func = fn;
536 }
537 
538 /*
539  * work_claimed(work, wq)
540  *
541  *	True if work is currently claimed by a workqueue, meaning it is
542  *	either on the queue or scheduled in a callout.  The workqueue
543  *	must be wq, and caller must hold wq's lock.
544  */
545 static bool
546 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
547 {
548 
549 	KASSERT(work_queue(work) == wq);
550 	KASSERT(mutex_owned(&wq->wq_lock));
551 
552 	return work->work_owner & 1;
553 }
554 
555 /*
556  * work_pending(work)
557  *
558  *	True if work is currently claimed by any workqueue, scheduled
559  *	to run on that workqueue.
560  */
561 bool
562 work_pending(const struct work_struct *work)
563 {
564 
565 	return work->work_owner & 1;
566 }
567 
568 /*
569  * work_queue(work)
570  *
571  *	Return the last queue that work was queued on, or NULL if it
572  *	was never queued.
573  */
574 static struct workqueue_struct *
575 work_queue(struct work_struct *work)
576 {
577 
578 	return (struct workqueue_struct *)(work->work_owner & ~(uintptr_t)1);
579 }
580 
581 /*
582  * acquire_work(work, wq)
583  *
584  *	Try to claim work for wq.  If work is already claimed, it must
585  *	be claimed by wq; return false.  If work is not already
586  *	claimed, claim it, issue a memory barrier to match any prior
587  *	release_work, and return true.
588  *
589  *	Caller must hold wq's lock.
590  */
591 static bool
592 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
593 {
594 	uintptr_t owner0, owner;
595 
596 	KASSERT(mutex_owned(&wq->wq_lock));
597 	KASSERT(((uintptr_t)wq & 1) == 0);
598 
599 	owner = (uintptr_t)wq | 1;
600 	do {
601 		owner0 = work->work_owner;
602 		if (owner0 & 1) {
603 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
604 			return false;
605 		}
606 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
607 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
608 	    owner0);
609 
610 	KASSERT(work_queue(work) == wq);
611 	membar_enter();
612 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
613 	return true;
614 }
615 
616 /*
617  * release_work(work, wq)
618  *
619  *	Issue a memory barrier to match any subsequent acquire_work and
620  *	dissociate work from wq.
621  *
622  *	Caller must hold wq's lock and work must be associated with wq.
623  */
624 static void
625 release_work(struct work_struct *work, struct workqueue_struct *wq)
626 {
627 
628 	KASSERT(work_queue(work) == wq);
629 	KASSERT(mutex_owned(&wq->wq_lock));
630 
631 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
632 	membar_exit();
633 
634 	/*
635 	 * Non-interlocked r/m/w is safe here because nobody else can
636 	 * write to this while the claimed bit is setand the workqueue
637 	 * lock is held.
638 	 */
639 	work->work_owner &= ~(uintptr_t)1;
640 }
641 
642 /*
643  * schedule_work(work)
644  *
645  *	If work is not already queued on system_wq, queue it to be run
646  *	by system_wq's worker thread when it next can.  True if it was
647  *	newly queued, false if it was already queued.  If the work was
648  *	already running, queue it to run again.
649  *
650  *	Caller must ensure work is not queued to run on a different
651  *	workqueue.
652  */
653 bool
654 schedule_work(struct work_struct *work)
655 {
656 
657 	return queue_work(system_wq, work);
658 }
659 
660 /*
661  * queue_work(wq, work)
662  *
663  *	If work is not already queued on wq, queue it to be run by wq's
664  *	worker thread when it next can.  True if it was newly queued,
665  *	false if it was already queued.  If the work was already
666  *	running, queue it to run again.
667  *
668  *	Caller must ensure work is not queued to run on a different
669  *	workqueue.
670  */
671 bool
672 queue_work(struct workqueue_struct *wq, struct work_struct *work)
673 {
674 	bool newly_queued;
675 
676 	KASSERT(wq != NULL);
677 
678 	mutex_enter(&wq->wq_lock);
679 	if (__predict_true(acquire_work(work, wq))) {
680 		/*
681 		 * It wasn't on any workqueue at all.  Put it on this
682 		 * one, and signal the worker thread that there is work
683 		 * to do.
684 		 */
685 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
686 		cv_broadcast(&wq->wq_cv);
687 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
688 		newly_queued = true;
689 	} else {
690 		/*
691 		 * It was already on this workqueue.  Nothing to do
692 		 * since it is already queued.
693 		 */
694 		newly_queued = false;
695 	}
696 	mutex_exit(&wq->wq_lock);
697 
698 	return newly_queued;
699 }
700 
701 /*
702  * cancel_work(work)
703  *
704  *	If work was queued, remove it from the queue and return true.
705  *	If work was not queued, return false.  Work may still be
706  *	running when this returns.
707  */
708 bool
709 cancel_work(struct work_struct *work)
710 {
711 	struct workqueue_struct *wq;
712 	bool cancelled_p = false;
713 
714 	/* If there's no workqueue, nothing to cancel.   */
715 	if ((wq = work_queue(work)) == NULL)
716 		goto out;
717 
718 	mutex_enter(&wq->wq_lock);
719 	if (__predict_false(work_queue(work) != wq)) {
720 		/*
721 		 * It has finished execution or been cancelled by
722 		 * another thread, and has been moved off the
723 		 * workqueue, so it's too to cancel.
724 		 */
725 		cancelled_p = false;
726 	} else {
727 		/* Check whether it's on the queue.  */
728 		if (work_claimed(work, wq)) {
729 			/*
730 			 * It is still on the queue.  Take it off the
731 			 * queue and report successful cancellation.
732 			 */
733 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
734 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
735 			release_work(work, wq);
736 			/* Can't dereference work after this point.  */
737 			cancelled_p = true;
738 		} else {
739 			/* Not on the queue.  Couldn't cancel it.  */
740 			cancelled_p = false;
741 		}
742 	}
743 	mutex_exit(&wq->wq_lock);
744 
745 out:	return cancelled_p;
746 }
747 
748 /*
749  * cancel_work_sync(work)
750  *
751  *	If work was queued, remove it from the queue and return true.
752  *	If work was not queued, return false.  Either way, if work is
753  *	currently running, wait for it to complete.
754  *
755  *	May sleep.
756  */
757 bool
758 cancel_work_sync(struct work_struct *work)
759 {
760 	struct workqueue_struct *wq;
761 	bool cancelled_p = false;
762 
763 	/* If there's no workqueue, nothing to cancel.   */
764 	if ((wq = work_queue(work)) == NULL)
765 		goto out;
766 
767 	mutex_enter(&wq->wq_lock);
768 	if (__predict_false(work_queue(work) != wq)) {
769 		/*
770 		 * It has finished execution or been cancelled by
771 		 * another thread, and has been moved off the
772 		 * workqueue, so it's too late to cancel.
773 		 */
774 		cancelled_p = false;
775 	} else {
776 		/* Check whether it's on the queue.  */
777 		if (work_claimed(work, wq)) {
778 			/*
779 			 * It is still on the queue.  Take it off the
780 			 * queue and report successful cancellation.
781 			 */
782 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
783 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
784 			release_work(work, wq);
785 			/* Can't dereference work after this point.  */
786 			cancelled_p = true;
787 		} else {
788 			/* Not on the queue.  Couldn't cancel it.  */
789 			cancelled_p = false;
790 		}
791 		/* If it's still running, wait for it to complete.  */
792 		if (wq->wq_current_work == work)
793 			wait_for_current_work(work, wq);
794 	}
795 	mutex_exit(&wq->wq_lock);
796 
797 out:	return cancelled_p;
798 }
799 
800 /*
801  * wait_for_current_work(work, wq)
802  *
803  *	wq must be currently executing work.  Wait for it to finish.
804  *
805  *	Does not dereference work.
806  */
807 static void
808 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
809 {
810 	uint64_t gen;
811 
812 	KASSERT(mutex_owned(&wq->wq_lock));
813 	KASSERT(wq->wq_current_work == work);
814 
815 	/* Wait only one generation in case it gets requeued quickly.  */
816 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
817 	gen = wq->wq_gen;
818 	do {
819 		cv_wait(&wq->wq_cv, &wq->wq_lock);
820 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
821 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
822 }
823 
824 /*
825  * Delayed work
826  */
827 
828 /*
829  * INIT_DELAYED_WORK(dw, fn)
830  *
831  *	Initialize dw for use with a workqueue to call fn in a worker
832  *	thread after a delay.  There is no corresponding destruction
833  *	operation.
834  */
835 void
836 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
837 {
838 
839 	INIT_WORK(&dw->work, fn);
840 	dw->dw_state = DELAYED_WORK_IDLE;
841 	dw->dw_resched = -1;
842 
843 	/*
844 	 * Defer callout_init until we are going to schedule the
845 	 * callout, which can then callout_destroy it, because
846 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
847 	 * we have no opportunity to call callout_destroy.
848 	 */
849 }
850 
851 /*
852  * schedule_delayed_work(dw, ticks)
853  *
854  *	If it is not currently scheduled, schedule dw to run after
855  *	ticks on system_wq.  If currently executing and not already
856  *	rescheduled, reschedule it.  True if it was newly scheduled,
857  *	false if it was already scheduled.
858  *
859  *	If ticks == 0, queue it to run as soon as the worker can,
860  *	without waiting for the next callout tick to run.
861  */
862 bool
863 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
864 {
865 
866 	return queue_delayed_work(system_wq, dw, ticks);
867 }
868 
869 /*
870  * dw_callout_init(wq, dw)
871  *
872  *	Initialize the callout of dw and transition to
873  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
874  */
875 static void
876 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
877 {
878 
879 	KASSERT(mutex_owned(&wq->wq_lock));
880 	KASSERT(work_queue(&dw->work) == wq);
881 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
882 
883 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
884 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
885 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
886 	dw->dw_state = DELAYED_WORK_SCHEDULED;
887 }
888 
889 /*
890  * dw_callout_destroy(wq, dw)
891  *
892  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
893  */
894 static void
895 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
896 {
897 
898 	KASSERT(mutex_owned(&wq->wq_lock));
899 	KASSERT(work_queue(&dw->work) == wq);
900 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
901 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
902 	    dw->dw_state == DELAYED_WORK_CANCELLED);
903 
904 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
905 	callout_destroy(&dw->dw_callout);
906 	dw->dw_resched = -1;
907 	dw->dw_state = DELAYED_WORK_IDLE;
908 }
909 
910 /*
911  * cancel_delayed_work_done(wq, dw)
912  *
913  *	Complete cancellation of a delayed work: transition from
914  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
915  *	workqueue.  Caller must not dereference dw after this returns.
916  */
917 static void
918 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
919 {
920 
921 	KASSERT(mutex_owned(&wq->wq_lock));
922 	KASSERT(work_queue(&dw->work) == wq);
923 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
924 
925 	dw_callout_destroy(wq, dw);
926 	release_work(&dw->work, wq);
927 	/* Can't dereference dw after this point.  */
928 }
929 
930 /*
931  * queue_delayed_work(wq, dw, ticks)
932  *
933  *	If it is not currently scheduled, schedule dw to run after
934  *	ticks on wq.  If currently queued, remove it from the queue
935  *	first.
936  *
937  *	If ticks == 0, queue it to run as soon as the worker can,
938  *	without waiting for the next callout tick to run.
939  */
940 bool
941 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
942     unsigned long ticks)
943 {
944 	bool newly_queued;
945 
946 	mutex_enter(&wq->wq_lock);
947 	if (__predict_true(acquire_work(&dw->work, wq))) {
948 		/*
949 		 * It wasn't on any workqueue at all.  Schedule it to
950 		 * run on this one.
951 		 */
952 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
953 		if (ticks == 0) {
954 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
955 			    work_entry);
956 			cv_broadcast(&wq->wq_cv);
957 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
958 		} else {
959 			/*
960 			 * Initialize a callout and schedule to run
961 			 * after a delay.
962 			 */
963 			dw_callout_init(wq, dw);
964 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
965 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
966 		}
967 		newly_queued = true;
968 	} else {
969 		/* It was already on this workqueue.  */
970 		switch (dw->dw_state) {
971 		case DELAYED_WORK_IDLE:
972 		case DELAYED_WORK_SCHEDULED:
973 		case DELAYED_WORK_RESCHEDULED:
974 			/* On the queue or already scheduled.  Leave it.  */
975 			newly_queued = false;
976 			break;
977 		case DELAYED_WORK_CANCELLED:
978 			/*
979 			 * Scheduled and the callout began, but it was
980 			 * cancelled.  Reschedule it.
981 			 */
982 			if (ticks == 0) {
983 				dw->dw_state = DELAYED_WORK_SCHEDULED;
984 				SDT_PROBE2(sdt, linux, work, queue,
985 				    &dw->work, wq);
986 			} else {
987 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
988 				dw->dw_resched = MIN(INT_MAX, ticks);
989 				SDT_PROBE3(sdt, linux, work, schedule,
990 				    dw, wq, ticks);
991 			}
992 			newly_queued = true;
993 			break;
994 		default:
995 			panic("invalid delayed work state: %d",
996 			    dw->dw_state);
997 		}
998 	}
999 	mutex_exit(&wq->wq_lock);
1000 
1001 	return newly_queued;
1002 }
1003 
1004 /*
1005  * mod_delayed_work(wq, dw, ticks)
1006  *
1007  *	Schedule dw to run after ticks.  If scheduled or queued,
1008  *	reschedule.  If ticks == 0, run without delay.
1009  *
1010  *	True if it modified the timer of an already scheduled work,
1011  *	false if it newly scheduled the work.
1012  */
1013 bool
1014 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
1015     unsigned long ticks)
1016 {
1017 	bool timer_modified;
1018 
1019 	mutex_enter(&wq->wq_lock);
1020 	if (acquire_work(&dw->work, wq)) {
1021 		/*
1022 		 * It wasn't on any workqueue at all.  Schedule it to
1023 		 * run on this one.
1024 		 */
1025 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
1026 		if (ticks == 0) {
1027 			/*
1028 			 * Run immediately: put it on the queue and
1029 			 * signal the worker thread.
1030 			 */
1031 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1032 			    work_entry);
1033 			cv_broadcast(&wq->wq_cv);
1034 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
1035 		} else {
1036 			/*
1037 			 * Initialize a callout and schedule to run
1038 			 * after a delay.
1039 			 */
1040 			dw_callout_init(wq, dw);
1041 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1042 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
1043 		}
1044 		timer_modified = false;
1045 	} else {
1046 		/* It was already on this workqueue.  */
1047 		switch (dw->dw_state) {
1048 		case DELAYED_WORK_IDLE:
1049 			/* On the queue.  */
1050 			if (ticks == 0) {
1051 				/* Leave it be.  */
1052 				SDT_PROBE2(sdt, linux, work, cancel,
1053 				    &dw->work, wq);
1054 				SDT_PROBE2(sdt, linux, work, queue,
1055 				    &dw->work, wq);
1056 			} else {
1057 				/* Remove from the queue and schedule.  */
1058 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1059 				    work_entry);
1060 				dw_callout_init(wq, dw);
1061 				callout_schedule(&dw->dw_callout,
1062 				    MIN(INT_MAX, ticks));
1063 				SDT_PROBE2(sdt, linux, work, cancel,
1064 				    &dw->work, wq);
1065 				SDT_PROBE3(sdt, linux, work, schedule,
1066 				    dw, wq, ticks);
1067 			}
1068 			timer_modified = true;
1069 			break;
1070 		case DELAYED_WORK_SCHEDULED:
1071 			/*
1072 			 * It is scheduled to run after a delay.  Try
1073 			 * to stop it and reschedule it; if we can't,
1074 			 * either reschedule it or cancel it to put it
1075 			 * on the queue, and inform the callout.
1076 			 */
1077 			if (callout_stop(&dw->dw_callout)) {
1078 				/* Can't stop, callout has begun.  */
1079 				if (ticks == 0) {
1080 					/*
1081 					 * We don't actually need to do
1082 					 * anything.  The callout will
1083 					 * queue it as soon as it gets
1084 					 * the lock.
1085 					 */
1086 					SDT_PROBE2(sdt, linux, work, cancel,
1087 					    &dw->work, wq);
1088 					SDT_PROBE2(sdt, linux, work, queue,
1089 					    &dw->work, wq);
1090 				} else {
1091 					/* Ask the callout to reschedule.  */
1092 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1093 					dw->dw_resched = MIN(INT_MAX, ticks);
1094 					SDT_PROBE2(sdt, linux, work, cancel,
1095 					    &dw->work, wq);
1096 					SDT_PROBE3(sdt, linux, work, schedule,
1097 					    dw, wq, ticks);
1098 				}
1099 			} else {
1100 				/* We stopped the callout before it began.  */
1101 				if (ticks == 0) {
1102 					/*
1103 					 * Run immediately: destroy the
1104 					 * callout, put it on the
1105 					 * queue, and signal the worker
1106 					 * thread.
1107 					 */
1108 					dw_callout_destroy(wq, dw);
1109 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1110 					    &dw->work, work_entry);
1111 					cv_broadcast(&wq->wq_cv);
1112 					SDT_PROBE2(sdt, linux, work, cancel,
1113 					    &dw->work, wq);
1114 					SDT_PROBE2(sdt, linux, work, queue,
1115 					    &dw->work, wq);
1116 				} else {
1117 					/*
1118 					 * Reschedule the callout.  No
1119 					 * state change.
1120 					 */
1121 					callout_schedule(&dw->dw_callout,
1122 					    MIN(INT_MAX, ticks));
1123 					SDT_PROBE2(sdt, linux, work, cancel,
1124 					    &dw->work, wq);
1125 					SDT_PROBE3(sdt, linux, work, schedule,
1126 					    dw, wq, ticks);
1127 				}
1128 			}
1129 			timer_modified = true;
1130 			break;
1131 		case DELAYED_WORK_RESCHEDULED:
1132 			/*
1133 			 * Someone rescheduled it after the callout
1134 			 * started but before the poor thing even had a
1135 			 * chance to acquire the lock.
1136 			 */
1137 			if (ticks == 0) {
1138 				/*
1139 				 * We can just switch back to
1140 				 * DELAYED_WORK_SCHEDULED so that the
1141 				 * callout will queue the work as soon
1142 				 * as it gets the lock.
1143 				 */
1144 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1145 				dw->dw_resched = -1;
1146 				SDT_PROBE2(sdt, linux, work, cancel,
1147 				    &dw->work, wq);
1148 				SDT_PROBE2(sdt, linux, work, queue,
1149 				    &dw->work, wq);
1150 			} else {
1151 				/* Change the rescheduled time.  */
1152 				dw->dw_resched = ticks;
1153 				SDT_PROBE2(sdt, linux, work, cancel,
1154 				    &dw->work, wq);
1155 				SDT_PROBE3(sdt, linux, work, schedule,
1156 				    dw, wq, ticks);
1157 			}
1158 			timer_modified = true;
1159 			break;
1160 		case DELAYED_WORK_CANCELLED:
1161 			/*
1162 			 * Someone cancelled it after the callout
1163 			 * started but before the poor thing even had a
1164 			 * chance to acquire the lock.
1165 			 */
1166 			if (ticks == 0) {
1167 				/*
1168 				 * We can just switch back to
1169 				 * DELAYED_WORK_SCHEDULED so that the
1170 				 * callout will queue the work as soon
1171 				 * as it gets the lock.
1172 				 */
1173 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1174 				SDT_PROBE2(sdt, linux, work, queue,
1175 				    &dw->work, wq);
1176 			} else {
1177 				/* Ask it to reschedule.  */
1178 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1179 				dw->dw_resched = MIN(INT_MAX, ticks);
1180 				SDT_PROBE3(sdt, linux, work, schedule,
1181 				    dw, wq, ticks);
1182 			}
1183 			timer_modified = false;
1184 			break;
1185 		default:
1186 			panic("invalid delayed work state: %d", dw->dw_state);
1187 		}
1188 	}
1189 	mutex_exit(&wq->wq_lock);
1190 
1191 	return timer_modified;
1192 }
1193 
1194 /*
1195  * cancel_delayed_work(dw)
1196  *
1197  *	If work was scheduled or queued, remove it from the schedule or
1198  *	queue and return true.  If work was not scheduled or queued,
1199  *	return false.  Note that work may already be running; if it
1200  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1201  *	will return false, and either way, cancel_delayed_work will NOT
1202  *	wait for the work to complete.
1203  */
1204 bool
1205 cancel_delayed_work(struct delayed_work *dw)
1206 {
1207 	struct workqueue_struct *wq;
1208 	bool cancelled_p;
1209 
1210 	/* If there's no workqueue, nothing to cancel.   */
1211 	if ((wq = work_queue(&dw->work)) == NULL)
1212 		return false;
1213 
1214 	mutex_enter(&wq->wq_lock);
1215 	if (__predict_false(work_queue(&dw->work) != wq)) {
1216 		cancelled_p = false;
1217 	} else {
1218 		switch (dw->dw_state) {
1219 		case DELAYED_WORK_IDLE:
1220 			/*
1221 			 * It is either on the queue or already running
1222 			 * or both.
1223 			 */
1224 			if (work_claimed(&dw->work, wq)) {
1225 				/* On the queue.  Remove and release.  */
1226 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1227 				    work_entry);
1228 				SDT_PROBE2(sdt, linux, work, cancel,
1229 				    &dw->work, wq);
1230 				release_work(&dw->work, wq);
1231 				/* Can't dereference dw after this point.  */
1232 				cancelled_p = true;
1233 			} else {
1234 				/* Not on the queue, so didn't cancel.  */
1235 				cancelled_p = false;
1236 			}
1237 			break;
1238 		case DELAYED_WORK_SCHEDULED:
1239 			/*
1240 			 * If it is scheduled, mark it cancelled and
1241 			 * try to stop the callout before it starts.
1242 			 *
1243 			 * If it's too late and the callout has already
1244 			 * begun to execute, tough.
1245 			 *
1246 			 * If we stopped the callout before it started,
1247 			 * however, then destroy the callout and
1248 			 * dissociate it from the workqueue ourselves.
1249 			 */
1250 			dw->dw_state = DELAYED_WORK_CANCELLED;
1251 			cancelled_p = true;
1252 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1253 			if (!callout_stop(&dw->dw_callout))
1254 				cancel_delayed_work_done(wq, dw);
1255 			break;
1256 		case DELAYED_WORK_RESCHEDULED:
1257 			/*
1258 			 * If it is being rescheduled, the callout has
1259 			 * already fired.  We must ask it to cancel.
1260 			 */
1261 			dw->dw_state = DELAYED_WORK_CANCELLED;
1262 			dw->dw_resched = -1;
1263 			cancelled_p = true;
1264 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1265 			break;
1266 		case DELAYED_WORK_CANCELLED:
1267 			/*
1268 			 * If it is being cancelled, the callout has
1269 			 * already fired.  There is nothing more for us
1270 			 * to do.  Someone else claims credit for
1271 			 * cancelling it.
1272 			 */
1273 			cancelled_p = false;
1274 			break;
1275 		default:
1276 			panic("invalid delayed work state: %d",
1277 			    dw->dw_state);
1278 		}
1279 	}
1280 	mutex_exit(&wq->wq_lock);
1281 
1282 	return cancelled_p;
1283 }
1284 
1285 /*
1286  * cancel_delayed_work_sync(dw)
1287  *
1288  *	If work was scheduled or queued, remove it from the schedule or
1289  *	queue and return true.  If work was not scheduled or queued,
1290  *	return false.  Note that work may already be running; if it
1291  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1292  *	will return false; either way, wait for it to complete.
1293  */
1294 bool
1295 cancel_delayed_work_sync(struct delayed_work *dw)
1296 {
1297 	struct workqueue_struct *wq;
1298 	bool cancelled_p;
1299 
1300 	/* If there's no workqueue, nothing to cancel.  */
1301 	if ((wq = work_queue(&dw->work)) == NULL)
1302 		return false;
1303 
1304 	mutex_enter(&wq->wq_lock);
1305 	if (__predict_false(work_queue(&dw->work) != wq)) {
1306 		cancelled_p = false;
1307 	} else {
1308 		switch (dw->dw_state) {
1309 		case DELAYED_WORK_IDLE:
1310 			/*
1311 			 * It is either on the queue or already running
1312 			 * or both.
1313 			 */
1314 			if (work_claimed(&dw->work, wq)) {
1315 				/* On the queue.  Remove and release.  */
1316 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1317 				    work_entry);
1318 				SDT_PROBE2(sdt, linux, work, cancel,
1319 				    &dw->work, wq);
1320 				release_work(&dw->work, wq);
1321 				/* Can't dereference dw after this point.  */
1322 				cancelled_p = true;
1323 			} else {
1324 				/* Not on the queue, so didn't cancel. */
1325 				cancelled_p = false;
1326 			}
1327 			/* If it's still running, wait for it to complete.  */
1328 			if (wq->wq_current_work == &dw->work)
1329 				wait_for_current_work(&dw->work, wq);
1330 			break;
1331 		case DELAYED_WORK_SCHEDULED:
1332 			/*
1333 			 * If it is scheduled, mark it cancelled and
1334 			 * try to stop the callout before it starts.
1335 			 *
1336 			 * If it's too late and the callout has already
1337 			 * begun to execute, we must wait for it to
1338 			 * complete.  But we got in soon enough to ask
1339 			 * the callout not to run, so we successfully
1340 			 * cancelled it in that case.
1341 			 *
1342 			 * If we stopped the callout before it started,
1343 			 * then we must destroy the callout and
1344 			 * dissociate it from the workqueue ourselves.
1345 			 */
1346 			dw->dw_state = DELAYED_WORK_CANCELLED;
1347 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1348 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1349 				cancel_delayed_work_done(wq, dw);
1350 			cancelled_p = true;
1351 			break;
1352 		case DELAYED_WORK_RESCHEDULED:
1353 			/*
1354 			 * If it is being rescheduled, the callout has
1355 			 * already fired.  We must ask it to cancel and
1356 			 * wait for it to complete.
1357 			 */
1358 			dw->dw_state = DELAYED_WORK_CANCELLED;
1359 			dw->dw_resched = -1;
1360 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1361 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1362 			cancelled_p = true;
1363 			break;
1364 		case DELAYED_WORK_CANCELLED:
1365 			/*
1366 			 * If it is being cancelled, the callout has
1367 			 * already fired.  We need only wait for it to
1368 			 * complete.  Someone else, however, claims
1369 			 * credit for cancelling it.
1370 			 */
1371 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1372 			cancelled_p = false;
1373 			break;
1374 		default:
1375 			panic("invalid delayed work state: %d",
1376 			    dw->dw_state);
1377 		}
1378 	}
1379 	mutex_exit(&wq->wq_lock);
1380 
1381 	return cancelled_p;
1382 }
1383 
1384 /*
1385  * Flush
1386  */
1387 
1388 /*
1389  * flush_scheduled_work()
1390  *
1391  *	Wait for all work queued on system_wq to complete.  This does
1392  *	not include delayed work.
1393  */
1394 void
1395 flush_scheduled_work(void)
1396 {
1397 
1398 	flush_workqueue(system_wq);
1399 }
1400 
1401 /*
1402  * flush_workqueue_locked(wq)
1403  *
1404  *	Wait for all work queued on wq to complete.  This does not
1405  *	include delayed work.  True if there was work to be flushed,
1406  *	false it the queue was empty.
1407  *
1408  *	Caller must hold wq's lock.
1409  */
1410 static bool
1411 flush_workqueue_locked(struct workqueue_struct *wq)
1412 {
1413 	uint64_t gen;
1414 	bool work_queued = false;
1415 
1416 	KASSERT(mutex_owned(&wq->wq_lock));
1417 
1418 	/* Get the current generation number.  */
1419 	gen = wq->wq_gen;
1420 
1421 	/*
1422 	 * If there's a batch of work in progress, we must wait for the
1423 	 * worker thread to finish that batch.
1424 	 */
1425 	if (wq->wq_current_work != NULL) {
1426 		gen++;
1427 		work_queued = true;
1428 	}
1429 
1430 	/*
1431 	 * If there's any work yet to be claimed from the queue by the
1432 	 * worker thread, we must wait for it to finish one more batch
1433 	 * too.
1434 	 */
1435 	if (!TAILQ_EMPTY(&wq->wq_queue) || !TAILQ_EMPTY(&wq->wq_dqueue)) {
1436 		gen++;
1437 		work_queued = true;
1438 	}
1439 
1440 	/* Wait until the generation number has caught up.  */
1441 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1442 	while (wq->wq_gen < gen)
1443 		cv_wait(&wq->wq_cv, &wq->wq_lock);
1444 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1445 
1446 	/* Return whether we had to wait for anything.  */
1447 	return work_queued;
1448 }
1449 
1450 /*
1451  * flush_workqueue(wq)
1452  *
1453  *	Wait for all work queued on wq to complete.  This does not
1454  *	include delayed work.
1455  */
1456 void
1457 flush_workqueue(struct workqueue_struct *wq)
1458 {
1459 
1460 	mutex_enter(&wq->wq_lock);
1461 	(void)flush_workqueue_locked(wq);
1462 	mutex_exit(&wq->wq_lock);
1463 }
1464 
1465 /*
1466  * drain_workqueue(wq)
1467  *
1468  *	Repeatedly flush wq until there is no more work.
1469  */
1470 void
1471 drain_workqueue(struct workqueue_struct *wq)
1472 {
1473 	unsigned ntries = 0;
1474 
1475 	mutex_enter(&wq->wq_lock);
1476 	while (flush_workqueue_locked(wq)) {
1477 		if (ntries++ == 10 || (ntries % 100) == 0)
1478 			printf("linux workqueue %s"
1479 			    ": still clogged after %u flushes",
1480 			    wq->wq_name, ntries);
1481 	}
1482 	mutex_exit(&wq->wq_lock);
1483 }
1484 
1485 /*
1486  * flush_work(work)
1487  *
1488  *	If work is queued or currently executing, wait for it to
1489  *	complete.
1490  *
1491  *	Return true if we waited to flush it, false if it was already
1492  *	idle.
1493  */
1494 bool
1495 flush_work(struct work_struct *work)
1496 {
1497 	struct workqueue_struct *wq;
1498 
1499 	/* If there's no workqueue, nothing to flush.  */
1500 	if ((wq = work_queue(work)) == NULL)
1501 		return false;
1502 
1503 	flush_workqueue(wq);
1504 	return true;
1505 }
1506 
1507 /*
1508  * flush_delayed_work(dw)
1509  *
1510  *	If dw is scheduled to run after a delay, queue it immediately
1511  *	instead.  Then, if dw is queued or currently executing, wait
1512  *	for it to complete.
1513  */
1514 bool
1515 flush_delayed_work(struct delayed_work *dw)
1516 {
1517 	struct workqueue_struct *wq;
1518 	bool waited = false;
1519 
1520 	/* If there's no workqueue, nothing to flush.  */
1521 	if ((wq = work_queue(&dw->work)) == NULL)
1522 		return false;
1523 
1524 	mutex_enter(&wq->wq_lock);
1525 	if (__predict_false(work_queue(&dw->work) != wq)) {
1526 		/*
1527 		 * Moved off the queue already (and possibly to another
1528 		 * queue, though that would be ill-advised), so it must
1529 		 * have completed, and we have nothing more to do.
1530 		 */
1531 		waited = false;
1532 	} else {
1533 		switch (dw->dw_state) {
1534 		case DELAYED_WORK_IDLE:
1535 			/*
1536 			 * It has a workqueue assigned and the callout
1537 			 * is idle, so it must be in progress or on the
1538 			 * queue.  In that case, we'll wait for it to
1539 			 * complete.
1540 			 */
1541 			break;
1542 		case DELAYED_WORK_SCHEDULED:
1543 		case DELAYED_WORK_RESCHEDULED:
1544 		case DELAYED_WORK_CANCELLED:
1545 			/*
1546 			 * The callout is scheduled, and may have even
1547 			 * started.  Mark it as scheduled so that if
1548 			 * the callout has fired it will queue the work
1549 			 * itself.  Try to stop the callout -- if we
1550 			 * can, queue the work now; if we can't, wait
1551 			 * for the callout to complete, which entails
1552 			 * queueing it.
1553 			 */
1554 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1555 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1556 				/*
1557 				 * We stopped it before it ran.  No
1558 				 * state change in the interim is
1559 				 * possible.  Destroy the callout and
1560 				 * queue it ourselves.
1561 				 */
1562 				KASSERT(dw->dw_state ==
1563 				    DELAYED_WORK_SCHEDULED);
1564 				dw_callout_destroy(wq, dw);
1565 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1566 				    work_entry);
1567 				cv_broadcast(&wq->wq_cv);
1568 				SDT_PROBE2(sdt, linux, work, queue,
1569 				    &dw->work, wq);
1570 			}
1571 			break;
1572 		default:
1573 			panic("invalid delayed work state: %d", dw->dw_state);
1574 		}
1575 		/*
1576 		 * Waiting for the whole queue to flush is overkill,
1577 		 * but doesn't hurt.
1578 		 */
1579 		(void)flush_workqueue_locked(wq);
1580 		waited = true;
1581 	}
1582 	mutex_exit(&wq->wq_lock);
1583 
1584 	return waited;
1585 }
1586 
1587 /*
1588  * delayed_work_pending(dw)
1589  *
1590  *	True if dw is currently scheduled to execute, false if not.
1591  */
1592 bool
1593 delayed_work_pending(const struct delayed_work *dw)
1594 {
1595 
1596 	return work_pending(&dw->work);
1597 }
1598