xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision e606f3ecaa652aebeca9fe931be2ec642c29db5d)
1 /*	$NetBSD: linux_work.c,v 1.58 2021/12/19 12:11:36 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.58 2021/12/19 12:11:36 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49 
50 #include <linux/workqueue.h>
51 
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54 
55 struct workqueue_struct {
56 	kmutex_t		wq_lock;
57 	kcondvar_t		wq_cv;
58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
59 	struct work_head	wq_rcu;	    /* RCU work scheduled */
60 	struct work_head	wq_queue;   /* work to run */
61 	struct work_head	wq_dqueue;  /* delayed work to run now */
62 	struct work_struct	*wq_current_work;
63 	int			wq_flags;
64 	bool			wq_dying;
65 	uint64_t		wq_gen;
66 	struct lwp		*wq_lwp;
67 	const char		*wq_name;
68 };
69 
70 static void __dead	linux_workqueue_thread(void *);
71 static void		linux_workqueue_timeout(void *);
72 static bool		work_claimed(struct work_struct *,
73 			    struct workqueue_struct *);
74 static struct workqueue_struct *
75 			work_queue(struct work_struct *);
76 static bool		acquire_work(struct work_struct *,
77 			    struct workqueue_struct *);
78 static void		release_work(struct work_struct *,
79 			    struct workqueue_struct *);
80 static void		wait_for_current_work(struct work_struct *,
81 			    struct workqueue_struct *);
82 static void		dw_callout_init(struct workqueue_struct *,
83 			    struct delayed_work *);
84 static void		dw_callout_destroy(struct workqueue_struct *,
85 			    struct delayed_work *);
86 static void		cancel_delayed_work_done(struct workqueue_struct *,
87 			    struct delayed_work *);
88 
89 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
90     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
91 SDT_PROBE_DEFINE2(sdt, linux, work, release,
92     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
93 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
94     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
95 SDT_PROBE_DEFINE2(sdt, linux, work, rcu,
96     "struct rcu_work *"/*work*/, "struct workqueue_struct *"/*wq*/);
97 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
98     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
99 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
101     "unsigned long"/*ticks*/);
102 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
103     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
104 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
105     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
106 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
107     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
108 SDT_PROBE_DEFINE2(sdt, linux, work, run,
109     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
110 SDT_PROBE_DEFINE2(sdt, linux, work, done,
111     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
112 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
113     "struct workqueue_struct *"/*wq*/);
114 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
115     "struct workqueue_struct *"/*wq*/);
116 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
117     "struct workqueue_struct *"/*wq*/);
118 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
119     "struct workqueue_struct *"/*wq*/);
120 
121 static specificdata_key_t workqueue_key __read_mostly;
122 
123 struct workqueue_struct	*system_highpri_wq __read_mostly;
124 struct workqueue_struct	*system_long_wq __read_mostly;
125 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
126 struct workqueue_struct	*system_unbound_wq __read_mostly;
127 struct workqueue_struct	*system_wq __read_mostly;
128 
129 static inline uintptr_t
130 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
131 {
132 
133 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
134 }
135 
136 /*
137  * linux_workqueue_init()
138  *
139  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
140  *	NetBSD error on failure.
141  */
142 static int
143 linux_workqueue_init0(void)
144 {
145 	int error;
146 
147 	error = lwp_specific_key_create(&workqueue_key, NULL);
148 	if (error)
149 		goto out;
150 
151 	system_highpri_wq = alloc_ordered_workqueue("lnxhipwq", 0);
152 	if (system_highpri_wq == NULL) {
153 		error = ENOMEM;
154 		goto out;
155 	}
156 
157 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
158 	if (system_long_wq == NULL) {
159 		error = ENOMEM;
160 		goto out;
161 	}
162 
163 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
164 	if (system_power_efficient_wq == NULL) {
165 		error = ENOMEM;
166 		goto out;
167 	}
168 
169 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
170 	if (system_unbound_wq == NULL) {
171 		error = ENOMEM;
172 		goto out;
173 	}
174 
175 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
176 	if (system_wq == NULL) {
177 		error = ENOMEM;
178 		goto out;
179 	}
180 
181 	/* Success!  */
182 	error = 0;
183 
184 out:	if (error) {
185 		if (system_highpri_wq)
186 			destroy_workqueue(system_highpri_wq);
187 		if (system_long_wq)
188 			destroy_workqueue(system_long_wq);
189 		if (system_power_efficient_wq)
190 			destroy_workqueue(system_power_efficient_wq);
191 		if (system_unbound_wq)
192 			destroy_workqueue(system_unbound_wq);
193 		if (system_wq)
194 			destroy_workqueue(system_wq);
195 		if (workqueue_key)
196 			lwp_specific_key_delete(workqueue_key);
197 	}
198 
199 	return error;
200 }
201 
202 /*
203  * linux_workqueue_fini()
204  *
205  *	Destroy the Linux workqueue subsystem.  Never fails.
206  */
207 static void
208 linux_workqueue_fini0(void)
209 {
210 
211 	destroy_workqueue(system_power_efficient_wq);
212 	destroy_workqueue(system_long_wq);
213 	destroy_workqueue(system_wq);
214 	lwp_specific_key_delete(workqueue_key);
215 }
216 
217 #ifndef _MODULE
218 static ONCE_DECL(linux_workqueue_init_once);
219 #endif
220 
221 int
222 linux_workqueue_init(void)
223 {
224 #ifdef _MODULE
225 	return linux_workqueue_init0();
226 #else
227 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
228 #endif
229 }
230 
231 void
232 linux_workqueue_fini(void)
233 {
234 #ifdef _MODULE
235 	return linux_workqueue_fini0();
236 #else
237 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
238 #endif
239 }
240 
241 /*
242  * Workqueues
243  */
244 
245 /*
246  * alloc_workqueue(name, flags, max_active)
247  *
248  *	Create a workqueue of the given name.  max_active is the
249  *	maximum number of work items in flight, or 0 for the default.
250  *	Return NULL on failure, pointer to struct workqueue_struct
251  *	object on success.
252  */
253 struct workqueue_struct *
254 alloc_workqueue(const char *name, int flags, unsigned max_active)
255 {
256 	struct workqueue_struct *wq;
257 	int error;
258 
259 	KASSERT(max_active == 0 || max_active == 1);
260 
261 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
262 
263 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
264 	cv_init(&wq->wq_cv, name);
265 	TAILQ_INIT(&wq->wq_delayed);
266 	TAILQ_INIT(&wq->wq_rcu);
267 	TAILQ_INIT(&wq->wq_queue);
268 	TAILQ_INIT(&wq->wq_dqueue);
269 	wq->wq_current_work = NULL;
270 	wq->wq_flags = 0;
271 	wq->wq_dying = false;
272 	wq->wq_gen = 0;
273 	wq->wq_lwp = NULL;
274 	wq->wq_name = name;
275 
276 	error = kthread_create(PRI_NONE,
277 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
278 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
279 	if (error)
280 		goto fail0;
281 
282 	return wq;
283 
284 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
285 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
286 	KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
287 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
288 	cv_destroy(&wq->wq_cv);
289 	mutex_destroy(&wq->wq_lock);
290 	kmem_free(wq, sizeof(*wq));
291 	return NULL;
292 }
293 
294 /*
295  * alloc_ordered_workqueue(name, flags)
296  *
297  *	Same as alloc_workqueue(name, flags, 1).
298  */
299 struct workqueue_struct *
300 alloc_ordered_workqueue(const char *name, int flags)
301 {
302 
303 	return alloc_workqueue(name, flags, 1);
304 }
305 
306 /*
307  * destroy_workqueue(wq)
308  *
309  *	Destroy a workqueue created with wq.  Cancel any pending
310  *	delayed work.  Wait for all queued work to complete.
311  *
312  *	May sleep.
313  */
314 void
315 destroy_workqueue(struct workqueue_struct *wq)
316 {
317 
318 	/*
319 	 * Cancel all delayed work.  We do this first because any
320 	 * delayed work that that has already timed out, which we can't
321 	 * cancel, may have queued new work.
322 	 */
323 	mutex_enter(&wq->wq_lock);
324 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
325 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
326 
327 		KASSERT(work_queue(&dw->work) == wq);
328 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
329 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
330 			dw->dw_state == DELAYED_WORK_CANCELLED),
331 		    "delayed work %p in bad state: %d",
332 		    dw, dw->dw_state);
333 
334 		/*
335 		 * Mark it cancelled and try to stop the callout before
336 		 * it starts.
337 		 *
338 		 * If it's too late and the callout has already begun
339 		 * to execute, then it will notice that we asked to
340 		 * cancel it and remove itself from the queue before
341 		 * returning.
342 		 *
343 		 * If we stopped the callout before it started,
344 		 * however, then we can safely destroy the callout and
345 		 * dissociate it from the workqueue ourselves.
346 		 */
347 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
348 		dw->dw_state = DELAYED_WORK_CANCELLED;
349 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
350 			cancel_delayed_work_done(wq, dw);
351 	}
352 	mutex_exit(&wq->wq_lock);
353 
354 	/* Wait for all scheduled RCU work to complete.  */
355 	mutex_enter(&wq->wq_lock);
356 	while (!TAILQ_EMPTY(&wq->wq_rcu))
357 		cv_wait(&wq->wq_cv, &wq->wq_lock);
358 	mutex_exit(&wq->wq_lock);
359 
360 	/*
361 	 * At this point, no new work can be put on the queue.
362 	 */
363 
364 	/* Tell the thread to exit.  */
365 	mutex_enter(&wq->wq_lock);
366 	wq->wq_dying = true;
367 	cv_broadcast(&wq->wq_cv);
368 	mutex_exit(&wq->wq_lock);
369 
370 	/* Wait for it to exit.  */
371 	(void)kthread_join(wq->wq_lwp);
372 
373 	KASSERT(wq->wq_dying);
374 	KASSERT(wq->wq_flags == 0);
375 	KASSERT(wq->wq_current_work == NULL);
376 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
377 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
378 	KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
379 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
380 	cv_destroy(&wq->wq_cv);
381 	mutex_destroy(&wq->wq_lock);
382 
383 	kmem_free(wq, sizeof(*wq));
384 }
385 
386 /*
387  * Work thread and callout
388  */
389 
390 /*
391  * linux_workqueue_thread(cookie)
392  *
393  *	Main function for a workqueue's worker thread.  Waits until
394  *	there is work queued, grabs a batch of work off the queue,
395  *	executes it all, bumps the generation number, and repeats,
396  *	until dying.
397  */
398 static void __dead
399 linux_workqueue_thread(void *cookie)
400 {
401 	struct workqueue_struct *const wq = cookie;
402 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
403 	struct work_struct marker, *work;
404 	unsigned i;
405 
406 	lwp_setspecific(workqueue_key, wq);
407 
408 	mutex_enter(&wq->wq_lock);
409 	for (;;) {
410 		/*
411 		 * Wait until there's activity.  If there's no work and
412 		 * we're dying, stop here.
413 		 */
414 		if (TAILQ_EMPTY(&wq->wq_queue) &&
415 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
416 			if (wq->wq_dying)
417 				break;
418 			cv_wait(&wq->wq_cv, &wq->wq_lock);
419 			continue;
420 		}
421 
422 		/*
423 		 * Start a batch of work.  Use a marker to delimit when
424 		 * the batch ends so we can advance the generation
425 		 * after the batch.
426 		 */
427 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
428 		for (i = 0; i < 2; i++) {
429 			if (TAILQ_EMPTY(q[i]))
430 				continue;
431 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
432 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
433 				void (*func)(struct work_struct *);
434 
435 				KASSERT(work_queue(work) == wq);
436 				KASSERT(work_claimed(work, wq));
437 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
438 					container_of(work, struct delayed_work,
439 					    work)->dw_state ==
440 					DELAYED_WORK_IDLE),
441 				    "delayed work %p queued and scheduled",
442 				    work);
443 
444 				TAILQ_REMOVE(q[i], work, work_entry);
445 				KASSERT(wq->wq_current_work == NULL);
446 				wq->wq_current_work = work;
447 				func = work->func;
448 				release_work(work, wq);
449 				/* Can't dereference work after this point.  */
450 
451 				mutex_exit(&wq->wq_lock);
452 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
453 				(*func)(work);
454 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
455 				mutex_enter(&wq->wq_lock);
456 
457 				KASSERT(wq->wq_current_work == work);
458 				wq->wq_current_work = NULL;
459 				cv_broadcast(&wq->wq_cv);
460 			}
461 			TAILQ_REMOVE(q[i], &marker, work_entry);
462 		}
463 
464 		/* Notify cancel that we've completed a batch of work.  */
465 		wq->wq_gen++;
466 		cv_broadcast(&wq->wq_cv);
467 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
468 	}
469 	mutex_exit(&wq->wq_lock);
470 
471 	kthread_exit(0);
472 }
473 
474 /*
475  * linux_workqueue_timeout(cookie)
476  *
477  *	Delayed work timeout callback.
478  *
479  *	- If scheduled, queue it.
480  *	- If rescheduled, callout_schedule ourselves again.
481  *	- If cancelled, destroy the callout and release the work from
482  *        the workqueue.
483  */
484 static void
485 linux_workqueue_timeout(void *cookie)
486 {
487 	struct delayed_work *const dw = cookie;
488 	struct workqueue_struct *const wq = work_queue(&dw->work);
489 
490 	KASSERTMSG(wq != NULL,
491 	    "delayed work %p state %d resched %d",
492 	    dw, dw->dw_state, dw->dw_resched);
493 
494 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
495 
496 	mutex_enter(&wq->wq_lock);
497 	KASSERT(work_queue(&dw->work) == wq);
498 	switch (dw->dw_state) {
499 	case DELAYED_WORK_IDLE:
500 		panic("delayed work callout uninitialized: %p", dw);
501 	case DELAYED_WORK_SCHEDULED:
502 		dw_callout_destroy(wq, dw);
503 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
504 		cv_broadcast(&wq->wq_cv);
505 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
506 		break;
507 	case DELAYED_WORK_RESCHEDULED:
508 		KASSERT(dw->dw_resched >= 0);
509 		callout_schedule(&dw->dw_callout, dw->dw_resched);
510 		dw->dw_state = DELAYED_WORK_SCHEDULED;
511 		dw->dw_resched = -1;
512 		break;
513 	case DELAYED_WORK_CANCELLED:
514 		cancel_delayed_work_done(wq, dw);
515 		/* Can't dereference dw after this point.  */
516 		goto out;
517 	default:
518 		panic("delayed work callout in bad state: %p", dw);
519 	}
520 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
521 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
522 out:	mutex_exit(&wq->wq_lock);
523 }
524 
525 /*
526  * current_work()
527  *
528  *	If in a workqueue worker thread, return the work it is
529  *	currently executing.  Otherwise return NULL.
530  */
531 struct work_struct *
532 current_work(void)
533 {
534 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
535 
536 	/* If we're not a workqueue thread, then there's no work.  */
537 	if (wq == NULL)
538 		return NULL;
539 
540 	/*
541 	 * Otherwise, this should be possible only while work is in
542 	 * progress.  Return the current work item.
543 	 */
544 	KASSERT(wq->wq_current_work != NULL);
545 	return wq->wq_current_work;
546 }
547 
548 /*
549  * Work
550  */
551 
552 /*
553  * INIT_WORK(work, fn)
554  *
555  *	Initialize work for use with a workqueue to call fn in a worker
556  *	thread.  There is no corresponding destruction operation.
557  */
558 void
559 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
560 {
561 
562 	work->work_owner = 0;
563 	work->func = fn;
564 }
565 
566 /*
567  * work_claimed(work, wq)
568  *
569  *	True if work is currently claimed by a workqueue, meaning it is
570  *	either on the queue or scheduled in a callout.  The workqueue
571  *	must be wq, and caller must hold wq's lock.
572  */
573 static bool
574 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
575 {
576 
577 	KASSERT(work_queue(work) == wq);
578 	KASSERT(mutex_owned(&wq->wq_lock));
579 
580 	return atomic_load_relaxed(&work->work_owner) & 1;
581 }
582 
583 /*
584  * work_pending(work)
585  *
586  *	True if work is currently claimed by any workqueue, scheduled
587  *	to run on that workqueue.
588  */
589 bool
590 work_pending(const struct work_struct *work)
591 {
592 
593 	return atomic_load_relaxed(&work->work_owner) & 1;
594 }
595 
596 /*
597  * work_queue(work)
598  *
599  *	Return the last queue that work was queued on, or NULL if it
600  *	was never queued.
601  */
602 static struct workqueue_struct *
603 work_queue(struct work_struct *work)
604 {
605 
606 	return (struct workqueue_struct *)
607 	    (atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
608 }
609 
610 /*
611  * acquire_work(work, wq)
612  *
613  *	Try to claim work for wq.  If work is already claimed, it must
614  *	be claimed by wq; return false.  If work is not already
615  *	claimed, claim it, issue a memory barrier to match any prior
616  *	release_work, and return true.
617  *
618  *	Caller must hold wq's lock.
619  */
620 static bool
621 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
622 {
623 	uintptr_t owner0, owner;
624 
625 	KASSERT(mutex_owned(&wq->wq_lock));
626 	KASSERT(((uintptr_t)wq & 1) == 0);
627 
628 	owner = (uintptr_t)wq | 1;
629 	do {
630 		owner0 = atomic_load_relaxed(&work->work_owner);
631 		if (owner0 & 1) {
632 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
633 			return false;
634 		}
635 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
636 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
637 	    owner0);
638 
639 	KASSERT(work_queue(work) == wq);
640 	membar_enter();
641 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
642 	return true;
643 }
644 
645 /*
646  * release_work(work, wq)
647  *
648  *	Issue a memory barrier to match any subsequent acquire_work and
649  *	dissociate work from wq.
650  *
651  *	Caller must hold wq's lock and work must be associated with wq.
652  */
653 static void
654 release_work(struct work_struct *work, struct workqueue_struct *wq)
655 {
656 
657 	KASSERT(work_queue(work) == wq);
658 	KASSERT(mutex_owned(&wq->wq_lock));
659 
660 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
661 	membar_exit();
662 
663 	/*
664 	 * Non-interlocked r/m/w is safe here because nobody else can
665 	 * write to this while the claimed bit is set and the workqueue
666 	 * lock is held.
667 	 */
668 	atomic_store_relaxed(&work->work_owner,
669 	    atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
670 }
671 
672 /*
673  * schedule_work(work)
674  *
675  *	If work is not already queued on system_wq, queue it to be run
676  *	by system_wq's worker thread when it next can.  True if it was
677  *	newly queued, false if it was already queued.  If the work was
678  *	already running, queue it to run again.
679  *
680  *	Caller must ensure work is not queued to run on a different
681  *	workqueue.
682  */
683 bool
684 schedule_work(struct work_struct *work)
685 {
686 
687 	return queue_work(system_wq, work);
688 }
689 
690 /*
691  * queue_work(wq, work)
692  *
693  *	If work is not already queued on wq, queue it to be run by wq's
694  *	worker thread when it next can.  True if it was newly queued,
695  *	false if it was already queued.  If the work was already
696  *	running, queue it to run again.
697  *
698  *	Caller must ensure work is not queued to run on a different
699  *	workqueue.
700  */
701 bool
702 queue_work(struct workqueue_struct *wq, struct work_struct *work)
703 {
704 	bool newly_queued;
705 
706 	KASSERT(wq != NULL);
707 
708 	mutex_enter(&wq->wq_lock);
709 	if (__predict_true(acquire_work(work, wq))) {
710 		/*
711 		 * It wasn't on any workqueue at all.  Put it on this
712 		 * one, and signal the worker thread that there is work
713 		 * to do.
714 		 */
715 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
716 		cv_broadcast(&wq->wq_cv);
717 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
718 		newly_queued = true;
719 	} else {
720 		/*
721 		 * It was already on this workqueue.  Nothing to do
722 		 * since it is already queued.
723 		 */
724 		newly_queued = false;
725 	}
726 	mutex_exit(&wq->wq_lock);
727 
728 	return newly_queued;
729 }
730 
731 /*
732  * cancel_work(work)
733  *
734  *	If work was queued, remove it from the queue and return true.
735  *	If work was not queued, return false.  Work may still be
736  *	running when this returns.
737  */
738 bool
739 cancel_work(struct work_struct *work)
740 {
741 	struct workqueue_struct *wq;
742 	bool cancelled_p = false;
743 
744 	/* If there's no workqueue, nothing to cancel.   */
745 	if ((wq = work_queue(work)) == NULL)
746 		goto out;
747 
748 	mutex_enter(&wq->wq_lock);
749 	if (__predict_false(work_queue(work) != wq)) {
750 		/*
751 		 * It has finished execution or been cancelled by
752 		 * another thread, and has been moved off the
753 		 * workqueue, so it's too to cancel.
754 		 */
755 		cancelled_p = false;
756 	} else {
757 		/* Check whether it's on the queue.  */
758 		if (work_claimed(work, wq)) {
759 			/*
760 			 * It is still on the queue.  Take it off the
761 			 * queue and report successful cancellation.
762 			 */
763 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
764 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
765 			release_work(work, wq);
766 			/* Can't dereference work after this point.  */
767 			cancelled_p = true;
768 		} else {
769 			/* Not on the queue.  Couldn't cancel it.  */
770 			cancelled_p = false;
771 		}
772 	}
773 	mutex_exit(&wq->wq_lock);
774 
775 out:	return cancelled_p;
776 }
777 
778 /*
779  * cancel_work_sync(work)
780  *
781  *	If work was queued, remove it from the queue and return true.
782  *	If work was not queued, return false.  Either way, if work is
783  *	currently running, wait for it to complete.
784  *
785  *	May sleep.
786  */
787 bool
788 cancel_work_sync(struct work_struct *work)
789 {
790 	struct workqueue_struct *wq;
791 	bool cancelled_p = false;
792 
793 	/* If there's no workqueue, nothing to cancel.   */
794 	if ((wq = work_queue(work)) == NULL)
795 		goto out;
796 
797 	mutex_enter(&wq->wq_lock);
798 	if (__predict_false(work_queue(work) != wq)) {
799 		/*
800 		 * It has finished execution or been cancelled by
801 		 * another thread, and has been moved off the
802 		 * workqueue, so it's too late to cancel.
803 		 */
804 		cancelled_p = false;
805 	} else {
806 		/* Check whether it's on the queue.  */
807 		if (work_claimed(work, wq)) {
808 			/*
809 			 * It is still on the queue.  Take it off the
810 			 * queue and report successful cancellation.
811 			 */
812 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
813 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
814 			release_work(work, wq);
815 			/* Can't dereference work after this point.  */
816 			cancelled_p = true;
817 		} else {
818 			/* Not on the queue.  Couldn't cancel it.  */
819 			cancelled_p = false;
820 		}
821 		/* If it's still running, wait for it to complete.  */
822 		if (wq->wq_current_work == work)
823 			wait_for_current_work(work, wq);
824 	}
825 	mutex_exit(&wq->wq_lock);
826 
827 out:	return cancelled_p;
828 }
829 
830 /*
831  * wait_for_current_work(work, wq)
832  *
833  *	wq must be currently executing work.  Wait for it to finish.
834  *
835  *	Does not dereference work.
836  */
837 static void
838 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
839 {
840 	uint64_t gen;
841 
842 	KASSERT(mutex_owned(&wq->wq_lock));
843 	KASSERT(wq->wq_current_work == work);
844 
845 	/* Wait only one generation in case it gets requeued quickly.  */
846 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
847 	gen = wq->wq_gen;
848 	do {
849 		cv_wait(&wq->wq_cv, &wq->wq_lock);
850 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
851 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
852 }
853 
854 /*
855  * Delayed work
856  */
857 
858 /*
859  * INIT_DELAYED_WORK(dw, fn)
860  *
861  *	Initialize dw for use with a workqueue to call fn in a worker
862  *	thread after a delay.  There is no corresponding destruction
863  *	operation.
864  */
865 void
866 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
867 {
868 
869 	INIT_WORK(&dw->work, fn);
870 	dw->dw_state = DELAYED_WORK_IDLE;
871 	dw->dw_resched = -1;
872 
873 	/*
874 	 * Defer callout_init until we are going to schedule the
875 	 * callout, which can then callout_destroy it, because
876 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
877 	 * we have no opportunity to call callout_destroy.
878 	 */
879 }
880 
881 /*
882  * schedule_delayed_work(dw, ticks)
883  *
884  *	If it is not currently scheduled, schedule dw to run after
885  *	ticks on system_wq.  If currently executing and not already
886  *	rescheduled, reschedule it.  True if it was newly scheduled,
887  *	false if it was already scheduled.
888  *
889  *	If ticks == 0, queue it to run as soon as the worker can,
890  *	without waiting for the next callout tick to run.
891  */
892 bool
893 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
894 {
895 
896 	return queue_delayed_work(system_wq, dw, ticks);
897 }
898 
899 /*
900  * dw_callout_init(wq, dw)
901  *
902  *	Initialize the callout of dw and transition to
903  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
904  */
905 static void
906 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
907 {
908 
909 	KASSERT(mutex_owned(&wq->wq_lock));
910 	KASSERT(work_queue(&dw->work) == wq);
911 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
912 
913 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
914 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
915 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
916 	dw->dw_state = DELAYED_WORK_SCHEDULED;
917 }
918 
919 /*
920  * dw_callout_destroy(wq, dw)
921  *
922  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
923  */
924 static void
925 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
926 {
927 
928 	KASSERT(mutex_owned(&wq->wq_lock));
929 	KASSERT(work_queue(&dw->work) == wq);
930 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
931 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
932 	    dw->dw_state == DELAYED_WORK_CANCELLED);
933 
934 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
935 	callout_destroy(&dw->dw_callout);
936 	dw->dw_resched = -1;
937 	dw->dw_state = DELAYED_WORK_IDLE;
938 }
939 
940 /*
941  * cancel_delayed_work_done(wq, dw)
942  *
943  *	Complete cancellation of a delayed work: transition from
944  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
945  *	workqueue.  Caller must not dereference dw after this returns.
946  */
947 static void
948 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
949 {
950 
951 	KASSERT(mutex_owned(&wq->wq_lock));
952 	KASSERT(work_queue(&dw->work) == wq);
953 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
954 
955 	dw_callout_destroy(wq, dw);
956 	release_work(&dw->work, wq);
957 	/* Can't dereference dw after this point.  */
958 }
959 
960 /*
961  * queue_delayed_work(wq, dw, ticks)
962  *
963  *	If it is not currently scheduled, schedule dw to run after
964  *	ticks on wq.  If currently queued, remove it from the queue
965  *	first.
966  *
967  *	If ticks == 0, queue it to run as soon as the worker can,
968  *	without waiting for the next callout tick to run.
969  */
970 bool
971 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
972     unsigned long ticks)
973 {
974 	bool newly_queued;
975 
976 	mutex_enter(&wq->wq_lock);
977 	if (__predict_true(acquire_work(&dw->work, wq))) {
978 		/*
979 		 * It wasn't on any workqueue at all.  Schedule it to
980 		 * run on this one.
981 		 */
982 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
983 		if (ticks == 0) {
984 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
985 			    work_entry);
986 			cv_broadcast(&wq->wq_cv);
987 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
988 		} else {
989 			/*
990 			 * Initialize a callout and schedule to run
991 			 * after a delay.
992 			 */
993 			dw_callout_init(wq, dw);
994 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
995 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
996 		}
997 		newly_queued = true;
998 	} else {
999 		/* It was already on this workqueue.  */
1000 		switch (dw->dw_state) {
1001 		case DELAYED_WORK_IDLE:
1002 		case DELAYED_WORK_SCHEDULED:
1003 		case DELAYED_WORK_RESCHEDULED:
1004 			/* On the queue or already scheduled.  Leave it.  */
1005 			newly_queued = false;
1006 			break;
1007 		case DELAYED_WORK_CANCELLED:
1008 			/*
1009 			 * Scheduled and the callout began, but it was
1010 			 * cancelled.  Reschedule it.
1011 			 */
1012 			if (ticks == 0) {
1013 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1014 				SDT_PROBE2(sdt, linux, work, queue,
1015 				    &dw->work, wq);
1016 			} else {
1017 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1018 				dw->dw_resched = MIN(INT_MAX, ticks);
1019 				SDT_PROBE3(sdt, linux, work, schedule,
1020 				    dw, wq, ticks);
1021 			}
1022 			newly_queued = true;
1023 			break;
1024 		default:
1025 			panic("invalid delayed work state: %d",
1026 			    dw->dw_state);
1027 		}
1028 	}
1029 	mutex_exit(&wq->wq_lock);
1030 
1031 	return newly_queued;
1032 }
1033 
1034 /*
1035  * mod_delayed_work(wq, dw, ticks)
1036  *
1037  *	Schedule dw to run after ticks.  If scheduled or queued,
1038  *	reschedule.  If ticks == 0, run without delay.
1039  *
1040  *	True if it modified the timer of an already scheduled work,
1041  *	false if it newly scheduled the work.
1042  */
1043 bool
1044 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
1045     unsigned long ticks)
1046 {
1047 	bool timer_modified;
1048 
1049 	mutex_enter(&wq->wq_lock);
1050 	if (acquire_work(&dw->work, wq)) {
1051 		/*
1052 		 * It wasn't on any workqueue at all.  Schedule it to
1053 		 * run on this one.
1054 		 */
1055 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
1056 		if (ticks == 0) {
1057 			/*
1058 			 * Run immediately: put it on the queue and
1059 			 * signal the worker thread.
1060 			 */
1061 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1062 			    work_entry);
1063 			cv_broadcast(&wq->wq_cv);
1064 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
1065 		} else {
1066 			/*
1067 			 * Initialize a callout and schedule to run
1068 			 * after a delay.
1069 			 */
1070 			dw_callout_init(wq, dw);
1071 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1072 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
1073 		}
1074 		timer_modified = false;
1075 	} else {
1076 		/* It was already on this workqueue.  */
1077 		switch (dw->dw_state) {
1078 		case DELAYED_WORK_IDLE:
1079 			/* On the queue.  */
1080 			if (ticks == 0) {
1081 				/* Leave it be.  */
1082 				SDT_PROBE2(sdt, linux, work, cancel,
1083 				    &dw->work, wq);
1084 				SDT_PROBE2(sdt, linux, work, queue,
1085 				    &dw->work, wq);
1086 			} else {
1087 				/* Remove from the queue and schedule.  */
1088 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1089 				    work_entry);
1090 				dw_callout_init(wq, dw);
1091 				callout_schedule(&dw->dw_callout,
1092 				    MIN(INT_MAX, ticks));
1093 				SDT_PROBE2(sdt, linux, work, cancel,
1094 				    &dw->work, wq);
1095 				SDT_PROBE3(sdt, linux, work, schedule,
1096 				    dw, wq, ticks);
1097 			}
1098 			timer_modified = true;
1099 			break;
1100 		case DELAYED_WORK_SCHEDULED:
1101 			/*
1102 			 * It is scheduled to run after a delay.  Try
1103 			 * to stop it and reschedule it; if we can't,
1104 			 * either reschedule it or cancel it to put it
1105 			 * on the queue, and inform the callout.
1106 			 */
1107 			if (callout_stop(&dw->dw_callout)) {
1108 				/* Can't stop, callout has begun.  */
1109 				if (ticks == 0) {
1110 					/*
1111 					 * We don't actually need to do
1112 					 * anything.  The callout will
1113 					 * queue it as soon as it gets
1114 					 * the lock.
1115 					 */
1116 					SDT_PROBE2(sdt, linux, work, cancel,
1117 					    &dw->work, wq);
1118 					SDT_PROBE2(sdt, linux, work, queue,
1119 					    &dw->work, wq);
1120 				} else {
1121 					/* Ask the callout to reschedule.  */
1122 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1123 					dw->dw_resched = MIN(INT_MAX, ticks);
1124 					SDT_PROBE2(sdt, linux, work, cancel,
1125 					    &dw->work, wq);
1126 					SDT_PROBE3(sdt, linux, work, schedule,
1127 					    dw, wq, ticks);
1128 				}
1129 			} else {
1130 				/* We stopped the callout before it began.  */
1131 				if (ticks == 0) {
1132 					/*
1133 					 * Run immediately: destroy the
1134 					 * callout, put it on the
1135 					 * queue, and signal the worker
1136 					 * thread.
1137 					 */
1138 					dw_callout_destroy(wq, dw);
1139 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1140 					    &dw->work, work_entry);
1141 					cv_broadcast(&wq->wq_cv);
1142 					SDT_PROBE2(sdt, linux, work, cancel,
1143 					    &dw->work, wq);
1144 					SDT_PROBE2(sdt, linux, work, queue,
1145 					    &dw->work, wq);
1146 				} else {
1147 					/*
1148 					 * Reschedule the callout.  No
1149 					 * state change.
1150 					 */
1151 					callout_schedule(&dw->dw_callout,
1152 					    MIN(INT_MAX, ticks));
1153 					SDT_PROBE2(sdt, linux, work, cancel,
1154 					    &dw->work, wq);
1155 					SDT_PROBE3(sdt, linux, work, schedule,
1156 					    dw, wq, ticks);
1157 				}
1158 			}
1159 			timer_modified = true;
1160 			break;
1161 		case DELAYED_WORK_RESCHEDULED:
1162 			/*
1163 			 * Someone rescheduled it after the callout
1164 			 * started but before the poor thing even had a
1165 			 * chance to acquire the lock.
1166 			 */
1167 			if (ticks == 0) {
1168 				/*
1169 				 * We can just switch back to
1170 				 * DELAYED_WORK_SCHEDULED so that the
1171 				 * callout will queue the work as soon
1172 				 * as it gets the lock.
1173 				 */
1174 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1175 				dw->dw_resched = -1;
1176 				SDT_PROBE2(sdt, linux, work, cancel,
1177 				    &dw->work, wq);
1178 				SDT_PROBE2(sdt, linux, work, queue,
1179 				    &dw->work, wq);
1180 			} else {
1181 				/* Change the rescheduled time.  */
1182 				dw->dw_resched = ticks;
1183 				SDT_PROBE2(sdt, linux, work, cancel,
1184 				    &dw->work, wq);
1185 				SDT_PROBE3(sdt, linux, work, schedule,
1186 				    dw, wq, ticks);
1187 			}
1188 			timer_modified = true;
1189 			break;
1190 		case DELAYED_WORK_CANCELLED:
1191 			/*
1192 			 * Someone cancelled it after the callout
1193 			 * started but before the poor thing even had a
1194 			 * chance to acquire the lock.
1195 			 */
1196 			if (ticks == 0) {
1197 				/*
1198 				 * We can just switch back to
1199 				 * DELAYED_WORK_SCHEDULED so that the
1200 				 * callout will queue the work as soon
1201 				 * as it gets the lock.
1202 				 */
1203 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1204 				SDT_PROBE2(sdt, linux, work, queue,
1205 				    &dw->work, wq);
1206 			} else {
1207 				/* Ask it to reschedule.  */
1208 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1209 				dw->dw_resched = MIN(INT_MAX, ticks);
1210 				SDT_PROBE3(sdt, linux, work, schedule,
1211 				    dw, wq, ticks);
1212 			}
1213 			timer_modified = false;
1214 			break;
1215 		default:
1216 			panic("invalid delayed work state: %d", dw->dw_state);
1217 		}
1218 	}
1219 	mutex_exit(&wq->wq_lock);
1220 
1221 	return timer_modified;
1222 }
1223 
1224 /*
1225  * cancel_delayed_work(dw)
1226  *
1227  *	If work was scheduled or queued, remove it from the schedule or
1228  *	queue and return true.  If work was not scheduled or queued,
1229  *	return false.  Note that work may already be running; if it
1230  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1231  *	will return false, and either way, cancel_delayed_work will NOT
1232  *	wait for the work to complete.
1233  */
1234 bool
1235 cancel_delayed_work(struct delayed_work *dw)
1236 {
1237 	struct workqueue_struct *wq;
1238 	bool cancelled_p;
1239 
1240 	/* If there's no workqueue, nothing to cancel.   */
1241 	if ((wq = work_queue(&dw->work)) == NULL)
1242 		return false;
1243 
1244 	mutex_enter(&wq->wq_lock);
1245 	if (__predict_false(work_queue(&dw->work) != wq)) {
1246 		cancelled_p = false;
1247 	} else {
1248 		switch (dw->dw_state) {
1249 		case DELAYED_WORK_IDLE:
1250 			/*
1251 			 * It is either on the queue or already running
1252 			 * or both.
1253 			 */
1254 			if (work_claimed(&dw->work, wq)) {
1255 				/* On the queue.  Remove and release.  */
1256 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1257 				    work_entry);
1258 				SDT_PROBE2(sdt, linux, work, cancel,
1259 				    &dw->work, wq);
1260 				release_work(&dw->work, wq);
1261 				/* Can't dereference dw after this point.  */
1262 				cancelled_p = true;
1263 			} else {
1264 				/* Not on the queue, so didn't cancel.  */
1265 				cancelled_p = false;
1266 			}
1267 			break;
1268 		case DELAYED_WORK_SCHEDULED:
1269 			/*
1270 			 * If it is scheduled, mark it cancelled and
1271 			 * try to stop the callout before it starts.
1272 			 *
1273 			 * If it's too late and the callout has already
1274 			 * begun to execute, tough.
1275 			 *
1276 			 * If we stopped the callout before it started,
1277 			 * however, then destroy the callout and
1278 			 * dissociate it from the workqueue ourselves.
1279 			 */
1280 			dw->dw_state = DELAYED_WORK_CANCELLED;
1281 			cancelled_p = true;
1282 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1283 			if (!callout_stop(&dw->dw_callout))
1284 				cancel_delayed_work_done(wq, dw);
1285 			break;
1286 		case DELAYED_WORK_RESCHEDULED:
1287 			/*
1288 			 * If it is being rescheduled, the callout has
1289 			 * already fired.  We must ask it to cancel.
1290 			 */
1291 			dw->dw_state = DELAYED_WORK_CANCELLED;
1292 			dw->dw_resched = -1;
1293 			cancelled_p = true;
1294 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1295 			break;
1296 		case DELAYED_WORK_CANCELLED:
1297 			/*
1298 			 * If it is being cancelled, the callout has
1299 			 * already fired.  There is nothing more for us
1300 			 * to do.  Someone else claims credit for
1301 			 * cancelling it.
1302 			 */
1303 			cancelled_p = false;
1304 			break;
1305 		default:
1306 			panic("invalid delayed work state: %d",
1307 			    dw->dw_state);
1308 		}
1309 	}
1310 	mutex_exit(&wq->wq_lock);
1311 
1312 	return cancelled_p;
1313 }
1314 
1315 /*
1316  * cancel_delayed_work_sync(dw)
1317  *
1318  *	If work was scheduled or queued, remove it from the schedule or
1319  *	queue and return true.  If work was not scheduled or queued,
1320  *	return false.  Note that work may already be running; if it
1321  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1322  *	will return false; either way, wait for it to complete.
1323  */
1324 bool
1325 cancel_delayed_work_sync(struct delayed_work *dw)
1326 {
1327 	struct workqueue_struct *wq;
1328 	bool cancelled_p;
1329 
1330 	/* If there's no workqueue, nothing to cancel.  */
1331 	if ((wq = work_queue(&dw->work)) == NULL)
1332 		return false;
1333 
1334 	mutex_enter(&wq->wq_lock);
1335 	if (__predict_false(work_queue(&dw->work) != wq)) {
1336 		cancelled_p = false;
1337 	} else {
1338 		switch (dw->dw_state) {
1339 		case DELAYED_WORK_IDLE:
1340 			/*
1341 			 * It is either on the queue or already running
1342 			 * or both.
1343 			 */
1344 			if (work_claimed(&dw->work, wq)) {
1345 				/* On the queue.  Remove and release.  */
1346 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1347 				    work_entry);
1348 				SDT_PROBE2(sdt, linux, work, cancel,
1349 				    &dw->work, wq);
1350 				release_work(&dw->work, wq);
1351 				/* Can't dereference dw after this point.  */
1352 				cancelled_p = true;
1353 			} else {
1354 				/* Not on the queue, so didn't cancel. */
1355 				cancelled_p = false;
1356 			}
1357 			/* If it's still running, wait for it to complete.  */
1358 			if (wq->wq_current_work == &dw->work)
1359 				wait_for_current_work(&dw->work, wq);
1360 			break;
1361 		case DELAYED_WORK_SCHEDULED:
1362 			/*
1363 			 * If it is scheduled, mark it cancelled and
1364 			 * try to stop the callout before it starts.
1365 			 *
1366 			 * If it's too late and the callout has already
1367 			 * begun to execute, we must wait for it to
1368 			 * complete.  But we got in soon enough to ask
1369 			 * the callout not to run, so we successfully
1370 			 * cancelled it in that case.
1371 			 *
1372 			 * If we stopped the callout before it started,
1373 			 * then we must destroy the callout and
1374 			 * dissociate it from the workqueue ourselves.
1375 			 */
1376 			dw->dw_state = DELAYED_WORK_CANCELLED;
1377 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1378 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1379 				cancel_delayed_work_done(wq, dw);
1380 			cancelled_p = true;
1381 			break;
1382 		case DELAYED_WORK_RESCHEDULED:
1383 			/*
1384 			 * If it is being rescheduled, the callout has
1385 			 * already fired.  We must ask it to cancel and
1386 			 * wait for it to complete.
1387 			 */
1388 			dw->dw_state = DELAYED_WORK_CANCELLED;
1389 			dw->dw_resched = -1;
1390 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1391 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1392 			cancelled_p = true;
1393 			break;
1394 		case DELAYED_WORK_CANCELLED:
1395 			/*
1396 			 * If it is being cancelled, the callout has
1397 			 * already fired.  We need only wait for it to
1398 			 * complete.  Someone else, however, claims
1399 			 * credit for cancelling it.
1400 			 */
1401 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1402 			cancelled_p = false;
1403 			break;
1404 		default:
1405 			panic("invalid delayed work state: %d",
1406 			    dw->dw_state);
1407 		}
1408 	}
1409 	mutex_exit(&wq->wq_lock);
1410 
1411 	return cancelled_p;
1412 }
1413 
1414 /*
1415  * Flush
1416  */
1417 
1418 /*
1419  * flush_scheduled_work()
1420  *
1421  *	Wait for all work queued on system_wq to complete.  This does
1422  *	not include delayed work.
1423  */
1424 void
1425 flush_scheduled_work(void)
1426 {
1427 
1428 	flush_workqueue(system_wq);
1429 }
1430 
1431 struct flush_work {
1432 	kmutex_t		fw_lock;
1433 	kcondvar_t		fw_cv;
1434 	struct work_struct	fw_work;
1435 	bool			fw_done;
1436 };
1437 
1438 static void
1439 flush_work_cb(struct work_struct *work)
1440 {
1441 	struct flush_work *fw = container_of(work, struct flush_work, fw_work);
1442 
1443 	mutex_enter(&fw->fw_lock);
1444 	fw->fw_done = true;
1445 	cv_broadcast(&fw->fw_cv);
1446 	mutex_exit(&fw->fw_lock);
1447 }
1448 
1449 /*
1450  * flush_workqueue(wq)
1451  *
1452  *	Wait for all work queued on wq to complete.  This does not
1453  *	include delayed work.
1454  */
1455 void
1456 flush_workqueue(struct workqueue_struct *wq)
1457 {
1458 	struct flush_work fw;
1459 
1460 	if (lwp_getspecific(workqueue_key) == wq) {
1461 		printf("%s: running from workqueue %s\n", __func__,
1462 		    wq->wq_name);
1463 		return;
1464 	}
1465 
1466 	mutex_init(&fw.fw_lock, MUTEX_DEFAULT, IPL_VM);
1467 	cv_init(&fw.fw_cv, "lxwqflsh");
1468 	INIT_WORK(&fw.fw_work, &flush_work_cb);
1469 	fw.fw_done = false;
1470 
1471 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1472 	queue_work(wq, &fw.fw_work);
1473 
1474 	mutex_enter(&fw.fw_lock);
1475 	while (!fw.fw_done)
1476 		cv_wait(&fw.fw_cv, &fw.fw_lock);
1477 	mutex_exit(&fw.fw_lock);
1478 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1479 
1480 	KASSERT(fw.fw_done);
1481 	/* no DESTROY_WORK */
1482 	cv_destroy(&fw.fw_cv);
1483 	mutex_destroy(&fw.fw_lock);
1484 }
1485 
1486 /*
1487  * drain_workqueue(wq)
1488  *
1489  *	Repeatedly flush wq until there is no more work.
1490  */
1491 void
1492 drain_workqueue(struct workqueue_struct *wq)
1493 {
1494 	unsigned ntries = 0;
1495 	bool done;
1496 
1497 	do {
1498 		if (ntries++ == 10 || (ntries % 100) == 0)
1499 			printf("linux workqueue %s"
1500 			    ": still clogged after %u flushes",
1501 			    wq->wq_name, ntries);
1502 		flush_workqueue(wq);
1503 		mutex_enter(&wq->wq_lock);
1504 		done = wq->wq_current_work == NULL;
1505 		done &= TAILQ_EMPTY(&wq->wq_queue);
1506 		done &= TAILQ_EMPTY(&wq->wq_dqueue);
1507 		mutex_exit(&wq->wq_lock);
1508 	} while (!done);
1509 }
1510 
1511 /*
1512  * flush_work(work)
1513  *
1514  *	If work is queued or currently executing, wait for it to
1515  *	complete.
1516  *
1517  *	Return true if we waited to flush it, false if it was already
1518  *	idle.
1519  */
1520 bool
1521 flush_work(struct work_struct *work)
1522 {
1523 	struct workqueue_struct *wq;
1524 
1525 	/* If there's no workqueue, nothing to flush.  */
1526 	if ((wq = work_queue(work)) == NULL)
1527 		return false;
1528 
1529 	flush_workqueue(wq);
1530 	return true;
1531 }
1532 
1533 /*
1534  * flush_delayed_work(dw)
1535  *
1536  *	If dw is scheduled to run after a delay, queue it immediately
1537  *	instead.  Then, if dw is queued or currently executing, wait
1538  *	for it to complete.
1539  */
1540 bool
1541 flush_delayed_work(struct delayed_work *dw)
1542 {
1543 	struct workqueue_struct *wq;
1544 	bool waited = false;
1545 
1546 	/* If there's no workqueue, nothing to flush.  */
1547 	if ((wq = work_queue(&dw->work)) == NULL)
1548 		return false;
1549 
1550 	mutex_enter(&wq->wq_lock);
1551 	if (__predict_false(work_queue(&dw->work) != wq)) {
1552 		/*
1553 		 * Moved off the queue already (and possibly to another
1554 		 * queue, though that would be ill-advised), so it must
1555 		 * have completed, and we have nothing more to do.
1556 		 */
1557 		waited = false;
1558 	} else {
1559 		switch (dw->dw_state) {
1560 		case DELAYED_WORK_IDLE:
1561 			/*
1562 			 * It has a workqueue assigned and the callout
1563 			 * is idle, so it must be in progress or on the
1564 			 * queue.  In that case, we'll wait for it to
1565 			 * complete.
1566 			 */
1567 			break;
1568 		case DELAYED_WORK_SCHEDULED:
1569 		case DELAYED_WORK_RESCHEDULED:
1570 		case DELAYED_WORK_CANCELLED:
1571 			/*
1572 			 * The callout is scheduled, and may have even
1573 			 * started.  Mark it as scheduled so that if
1574 			 * the callout has fired it will queue the work
1575 			 * itself.  Try to stop the callout -- if we
1576 			 * can, queue the work now; if we can't, wait
1577 			 * for the callout to complete, which entails
1578 			 * queueing it.
1579 			 */
1580 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1581 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1582 				/*
1583 				 * We stopped it before it ran.  No
1584 				 * state change in the interim is
1585 				 * possible.  Destroy the callout and
1586 				 * queue it ourselves.
1587 				 */
1588 				KASSERT(dw->dw_state ==
1589 				    DELAYED_WORK_SCHEDULED);
1590 				dw_callout_destroy(wq, dw);
1591 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1592 				    work_entry);
1593 				cv_broadcast(&wq->wq_cv);
1594 				SDT_PROBE2(sdt, linux, work, queue,
1595 				    &dw->work, wq);
1596 			}
1597 			break;
1598 		default:
1599 			panic("invalid delayed work state: %d", dw->dw_state);
1600 		}
1601 		/*
1602 		 * Waiting for the whole queue to flush is overkill,
1603 		 * but doesn't hurt.
1604 		 */
1605 		mutex_exit(&wq->wq_lock);
1606 		flush_workqueue(wq);
1607 		mutex_enter(&wq->wq_lock);
1608 		waited = true;
1609 	}
1610 	mutex_exit(&wq->wq_lock);
1611 
1612 	return waited;
1613 }
1614 
1615 /*
1616  * delayed_work_pending(dw)
1617  *
1618  *	True if dw is currently scheduled to execute, false if not.
1619  */
1620 bool
1621 delayed_work_pending(const struct delayed_work *dw)
1622 {
1623 
1624 	return work_pending(&dw->work);
1625 }
1626 
1627 /*
1628  * INIT_RCU_WORK(rw, fn)
1629  *
1630  *	Initialize rw for use with a workqueue to call fn in a worker
1631  *	thread after an RCU grace period.  There is no corresponding
1632  *	destruction operation.
1633  */
1634 void
1635 INIT_RCU_WORK(struct rcu_work *rw, void (*fn)(struct work_struct *))
1636 {
1637 
1638 	INIT_WORK(&rw->work, fn);
1639 }
1640 
1641 static void
1642 queue_rcu_work_cb(struct rcu_head *r)
1643 {
1644 	struct rcu_work *rw = container_of(r, struct rcu_work, rw_rcu);
1645 	struct workqueue_struct *wq = work_queue(&rw->work);
1646 
1647 	mutex_enter(&wq->wq_lock);
1648 	KASSERT(work_pending(&rw->work));
1649 	KASSERT(work_queue(&rw->work) == wq);
1650 	destroy_rcu_head(&rw->rw_rcu);
1651 	TAILQ_REMOVE(&wq->wq_rcu, &rw->work, work_entry);
1652 	TAILQ_INSERT_TAIL(&wq->wq_queue, &rw->work, work_entry);
1653 	cv_broadcast(&wq->wq_cv);
1654 	SDT_PROBE2(sdt, linux, work, queue,  &rw->work, wq);
1655 	mutex_exit(&wq->wq_lock);
1656 }
1657 
1658 /*
1659  * queue_rcu_work(wq, rw)
1660  *
1661  *	Schedule rw to run on wq after an RCU grace period.
1662  */
1663 void
1664 queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rw)
1665 {
1666 
1667 	mutex_enter(&wq->wq_lock);
1668 	if (acquire_work(&rw->work, wq)) {
1669 		init_rcu_head(&rw->rw_rcu);
1670 		SDT_PROBE2(sdt, linux, work, rcu,  rw, wq);
1671 		TAILQ_INSERT_TAIL(&wq->wq_rcu, &rw->work, work_entry);
1672 		call_rcu(&rw->rw_rcu, &queue_rcu_work_cb);
1673 	}
1674 	mutex_exit(&wq->wq_lock);
1675 }
1676