xref: /netbsd-src/sys/external/bsd/common/linux/linux_work.c (revision 04caf091723b866bc52f2b4f123a999c552bc7a2)
1 /*	$NetBSD: linux_work.c,v 1.61 2022/04/09 23:43:31 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Taylor R. Campbell.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.61 2022/04/09 23:43:31 riastradh Exp $");
34 
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49 
50 #include <linux/workqueue.h>
51 
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54 
55 struct workqueue_struct {
56 	kmutex_t		wq_lock;
57 	kcondvar_t		wq_cv;
58 	struct dwork_head	wq_delayed; /* delayed work scheduled */
59 	struct work_head	wq_rcu;	    /* RCU work scheduled */
60 	struct work_head	wq_queue;   /* work to run */
61 	struct work_head	wq_dqueue;  /* delayed work to run now */
62 	struct work_struct	*wq_current_work;
63 	int			wq_flags;
64 	bool			wq_dying;
65 	uint64_t		wq_gen;
66 	struct lwp		*wq_lwp;
67 	const char		*wq_name;
68 };
69 
70 static void __dead	linux_workqueue_thread(void *);
71 static void		linux_workqueue_timeout(void *);
72 static bool		work_claimed(struct work_struct *,
73 			    struct workqueue_struct *);
74 static struct workqueue_struct *
75 			work_queue(struct work_struct *);
76 static bool		acquire_work(struct work_struct *,
77 			    struct workqueue_struct *);
78 static void		release_work(struct work_struct *,
79 			    struct workqueue_struct *);
80 static void		wait_for_current_work(struct work_struct *,
81 			    struct workqueue_struct *);
82 static void		dw_callout_init(struct workqueue_struct *,
83 			    struct delayed_work *);
84 static void		dw_callout_destroy(struct workqueue_struct *,
85 			    struct delayed_work *);
86 static void		cancel_delayed_work_done(struct workqueue_struct *,
87 			    struct delayed_work *);
88 
89 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
90     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
91 SDT_PROBE_DEFINE2(sdt, linux, work, release,
92     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
93 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
94     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
95 SDT_PROBE_DEFINE2(sdt, linux, work, rcu,
96     "struct rcu_work *"/*work*/, "struct workqueue_struct *"/*wq*/);
97 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
98     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
99 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
100     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
101     "unsigned long"/*ticks*/);
102 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
103     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
104 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
105     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
106 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
107     "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
108 SDT_PROBE_DEFINE2(sdt, linux, work, run,
109     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
110 SDT_PROBE_DEFINE2(sdt, linux, work, done,
111     "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
112 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
113     "struct workqueue_struct *"/*wq*/);
114 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
115     "struct workqueue_struct *"/*wq*/);
116 SDT_PROBE_DEFINE1(sdt, linux, work, flush__self,
117     "struct workqueue_struct *"/*wq*/);
118 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
119     "struct workqueue_struct *"/*wq*/);
120 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
121     "struct workqueue_struct *"/*wq*/);
122 
123 static specificdata_key_t workqueue_key __read_mostly;
124 
125 struct workqueue_struct	*system_highpri_wq __read_mostly;
126 struct workqueue_struct	*system_long_wq __read_mostly;
127 struct workqueue_struct	*system_power_efficient_wq __read_mostly;
128 struct workqueue_struct	*system_unbound_wq __read_mostly;
129 struct workqueue_struct	*system_wq __read_mostly;
130 
131 static inline uintptr_t
atomic_cas_uintptr(volatile uintptr_t * p,uintptr_t old,uintptr_t new)132 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
133 {
134 
135 	return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
136 }
137 
138 /*
139  * linux_workqueue_init()
140  *
141  *	Initialize the Linux workqueue subsystem.  Return 0 on success,
142  *	NetBSD error on failure.
143  */
144 static int
linux_workqueue_init0(void)145 linux_workqueue_init0(void)
146 {
147 	int error;
148 
149 	error = lwp_specific_key_create(&workqueue_key, NULL);
150 	if (error)
151 		goto out;
152 
153 	system_highpri_wq = alloc_ordered_workqueue("lnxhipwq", 0);
154 	if (system_highpri_wq == NULL) {
155 		error = ENOMEM;
156 		goto out;
157 	}
158 
159 	system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
160 	if (system_long_wq == NULL) {
161 		error = ENOMEM;
162 		goto out;
163 	}
164 
165 	system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
166 	if (system_power_efficient_wq == NULL) {
167 		error = ENOMEM;
168 		goto out;
169 	}
170 
171 	system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
172 	if (system_unbound_wq == NULL) {
173 		error = ENOMEM;
174 		goto out;
175 	}
176 
177 	system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
178 	if (system_wq == NULL) {
179 		error = ENOMEM;
180 		goto out;
181 	}
182 
183 	/* Success!  */
184 	error = 0;
185 
186 out:	if (error) {
187 		if (system_highpri_wq)
188 			destroy_workqueue(system_highpri_wq);
189 		if (system_long_wq)
190 			destroy_workqueue(system_long_wq);
191 		if (system_power_efficient_wq)
192 			destroy_workqueue(system_power_efficient_wq);
193 		if (system_unbound_wq)
194 			destroy_workqueue(system_unbound_wq);
195 		if (system_wq)
196 			destroy_workqueue(system_wq);
197 		if (workqueue_key)
198 			lwp_specific_key_delete(workqueue_key);
199 	}
200 
201 	return error;
202 }
203 
204 /*
205  * linux_workqueue_fini()
206  *
207  *	Destroy the Linux workqueue subsystem.  Never fails.
208  */
209 static void
linux_workqueue_fini0(void)210 linux_workqueue_fini0(void)
211 {
212 
213 	destroy_workqueue(system_power_efficient_wq);
214 	destroy_workqueue(system_long_wq);
215 	destroy_workqueue(system_wq);
216 	lwp_specific_key_delete(workqueue_key);
217 }
218 
219 #ifndef _MODULE
220 static ONCE_DECL(linux_workqueue_init_once);
221 #endif
222 
223 int
linux_workqueue_init(void)224 linux_workqueue_init(void)
225 {
226 #ifdef _MODULE
227 	return linux_workqueue_init0();
228 #else
229 	return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
230 #endif
231 }
232 
233 void
linux_workqueue_fini(void)234 linux_workqueue_fini(void)
235 {
236 #ifdef _MODULE
237 	return linux_workqueue_fini0();
238 #else
239 	return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
240 #endif
241 }
242 
243 /*
244  * Workqueues
245  */
246 
247 /*
248  * alloc_workqueue(name, flags, max_active)
249  *
250  *	Create a workqueue of the given name.  max_active is the
251  *	maximum number of work items in flight, or 0 for the default.
252  *	Return NULL on failure, pointer to struct workqueue_struct
253  *	object on success.
254  */
255 struct workqueue_struct *
alloc_workqueue(const char * name,int flags,unsigned max_active)256 alloc_workqueue(const char *name, int flags, unsigned max_active)
257 {
258 	struct workqueue_struct *wq;
259 	int error;
260 
261 	KASSERT(max_active == 0 || max_active == 1);
262 
263 	wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
264 
265 	mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
266 	cv_init(&wq->wq_cv, name);
267 	TAILQ_INIT(&wq->wq_delayed);
268 	TAILQ_INIT(&wq->wq_rcu);
269 	TAILQ_INIT(&wq->wq_queue);
270 	TAILQ_INIT(&wq->wq_dqueue);
271 	wq->wq_current_work = NULL;
272 	wq->wq_flags = 0;
273 	wq->wq_dying = false;
274 	wq->wq_gen = 0;
275 	wq->wq_lwp = NULL;
276 	wq->wq_name = name;
277 
278 	error = kthread_create(PRI_NONE,
279 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
280 	    &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
281 	if (error)
282 		goto fail0;
283 
284 	return wq;
285 
286 fail0:	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
287 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
288 	KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
289 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
290 	cv_destroy(&wq->wq_cv);
291 	mutex_destroy(&wq->wq_lock);
292 	kmem_free(wq, sizeof(*wq));
293 	return NULL;
294 }
295 
296 /*
297  * alloc_ordered_workqueue(name, flags)
298  *
299  *	Same as alloc_workqueue(name, flags, 1).
300  */
301 struct workqueue_struct *
alloc_ordered_workqueue(const char * name,int flags)302 alloc_ordered_workqueue(const char *name, int flags)
303 {
304 
305 	return alloc_workqueue(name, flags, 1);
306 }
307 
308 /*
309  * destroy_workqueue(wq)
310  *
311  *	Destroy a workqueue created with wq.  Cancel any pending
312  *	delayed work.  Wait for all queued work to complete.
313  *
314  *	May sleep.
315  */
316 void
destroy_workqueue(struct workqueue_struct * wq)317 destroy_workqueue(struct workqueue_struct *wq)
318 {
319 
320 	/*
321 	 * Cancel all delayed work.  We do this first because any
322 	 * delayed work that that has already timed out, which we can't
323 	 * cancel, may have queued new work.
324 	 */
325 	mutex_enter(&wq->wq_lock);
326 	while (!TAILQ_EMPTY(&wq->wq_delayed)) {
327 		struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
328 
329 		KASSERT(work_queue(&dw->work) == wq);
330 		KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
331 			dw->dw_state == DELAYED_WORK_RESCHEDULED ||
332 			dw->dw_state == DELAYED_WORK_CANCELLED),
333 		    "delayed work %p in bad state: %d",
334 		    dw, dw->dw_state);
335 
336 		/*
337 		 * Mark it cancelled and try to stop the callout before
338 		 * it starts.
339 		 *
340 		 * If it's too late and the callout has already begun
341 		 * to execute, then it will notice that we asked to
342 		 * cancel it and remove itself from the queue before
343 		 * returning.
344 		 *
345 		 * If we stopped the callout before it started,
346 		 * however, then we can safely destroy the callout and
347 		 * dissociate it from the workqueue ourselves.
348 		 */
349 		SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
350 		dw->dw_state = DELAYED_WORK_CANCELLED;
351 		if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
352 			cancel_delayed_work_done(wq, dw);
353 	}
354 	mutex_exit(&wq->wq_lock);
355 
356 	/* Wait for all scheduled RCU work to complete.  */
357 	mutex_enter(&wq->wq_lock);
358 	while (!TAILQ_EMPTY(&wq->wq_rcu))
359 		cv_wait(&wq->wq_cv, &wq->wq_lock);
360 	mutex_exit(&wq->wq_lock);
361 
362 	/*
363 	 * At this point, no new work can be put on the queue.
364 	 */
365 
366 	/* Tell the thread to exit.  */
367 	mutex_enter(&wq->wq_lock);
368 	wq->wq_dying = true;
369 	cv_broadcast(&wq->wq_cv);
370 	mutex_exit(&wq->wq_lock);
371 
372 	/* Wait for it to exit.  */
373 	(void)kthread_join(wq->wq_lwp);
374 
375 	KASSERT(wq->wq_dying);
376 	KASSERT(wq->wq_flags == 0);
377 	KASSERT(wq->wq_current_work == NULL);
378 	KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
379 	KASSERT(TAILQ_EMPTY(&wq->wq_queue));
380 	KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
381 	KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
382 	cv_destroy(&wq->wq_cv);
383 	mutex_destroy(&wq->wq_lock);
384 
385 	kmem_free(wq, sizeof(*wq));
386 }
387 
388 /*
389  * Work thread and callout
390  */
391 
392 /*
393  * linux_workqueue_thread(cookie)
394  *
395  *	Main function for a workqueue's worker thread.  Waits until
396  *	there is work queued, grabs a batch of work off the queue,
397  *	executes it all, bumps the generation number, and repeats,
398  *	until dying.
399  */
400 static void __dead
linux_workqueue_thread(void * cookie)401 linux_workqueue_thread(void *cookie)
402 {
403 	struct workqueue_struct *const wq = cookie;
404 	struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
405 	struct work_struct marker, *work;
406 	unsigned i;
407 
408 	lwp_setspecific(workqueue_key, wq);
409 
410 	mutex_enter(&wq->wq_lock);
411 	for (;;) {
412 		/*
413 		 * Wait until there's activity.  If there's no work and
414 		 * we're dying, stop here.
415 		 */
416 		if (TAILQ_EMPTY(&wq->wq_queue) &&
417 		    TAILQ_EMPTY(&wq->wq_dqueue)) {
418 			if (wq->wq_dying)
419 				break;
420 			cv_wait(&wq->wq_cv, &wq->wq_lock);
421 			continue;
422 		}
423 
424 		/*
425 		 * Start a batch of work.  Use a marker to delimit when
426 		 * the batch ends so we can advance the generation
427 		 * after the batch.
428 		 */
429 		SDT_PROBE1(sdt, linux, work, batch__start,  wq);
430 		for (i = 0; i < 2; i++) {
431 			if (TAILQ_EMPTY(q[i]))
432 				continue;
433 			TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
434 			while ((work = TAILQ_FIRST(q[i])) != &marker) {
435 				void (*func)(struct work_struct *);
436 
437 				KASSERT(work_queue(work) == wq);
438 				KASSERT(work_claimed(work, wq));
439 				KASSERTMSG((q[i] != &wq->wq_dqueue ||
440 					container_of(work, struct delayed_work,
441 					    work)->dw_state ==
442 					DELAYED_WORK_IDLE),
443 				    "delayed work %p queued and scheduled",
444 				    work);
445 
446 				TAILQ_REMOVE(q[i], work, work_entry);
447 				KASSERT(wq->wq_current_work == NULL);
448 				wq->wq_current_work = work;
449 				func = work->func;
450 				release_work(work, wq);
451 				/* Can't dereference work after this point.  */
452 
453 				mutex_exit(&wq->wq_lock);
454 				SDT_PROBE2(sdt, linux, work, run,  work, wq);
455 				(*func)(work);
456 				SDT_PROBE2(sdt, linux, work, done,  work, wq);
457 				mutex_enter(&wq->wq_lock);
458 
459 				KASSERT(wq->wq_current_work == work);
460 				wq->wq_current_work = NULL;
461 				cv_broadcast(&wq->wq_cv);
462 			}
463 			TAILQ_REMOVE(q[i], &marker, work_entry);
464 		}
465 
466 		/* Notify cancel that we've completed a batch of work.  */
467 		wq->wq_gen++;
468 		cv_broadcast(&wq->wq_cv);
469 		SDT_PROBE1(sdt, linux, work, batch__done,  wq);
470 	}
471 	mutex_exit(&wq->wq_lock);
472 
473 	kthread_exit(0);
474 }
475 
476 /*
477  * linux_workqueue_timeout(cookie)
478  *
479  *	Delayed work timeout callback.
480  *
481  *	- If scheduled, queue it.
482  *	- If rescheduled, callout_schedule ourselves again.
483  *	- If cancelled, destroy the callout and release the work from
484  *        the workqueue.
485  */
486 static void
linux_workqueue_timeout(void * cookie)487 linux_workqueue_timeout(void *cookie)
488 {
489 	struct delayed_work *const dw = cookie;
490 	struct workqueue_struct *const wq = work_queue(&dw->work);
491 
492 	KASSERTMSG(wq != NULL,
493 	    "delayed work %p state %d resched %d",
494 	    dw, dw->dw_state, dw->dw_resched);
495 
496 	SDT_PROBE2(sdt, linux, work, timer,  dw, wq);
497 
498 	mutex_enter(&wq->wq_lock);
499 	KASSERT(work_queue(&dw->work) == wq);
500 	switch (dw->dw_state) {
501 	case DELAYED_WORK_IDLE:
502 		panic("delayed work callout uninitialized: %p", dw);
503 	case DELAYED_WORK_SCHEDULED:
504 		dw_callout_destroy(wq, dw);
505 		TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
506 		cv_broadcast(&wq->wq_cv);
507 		SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
508 		break;
509 	case DELAYED_WORK_RESCHEDULED:
510 		KASSERT(dw->dw_resched >= 0);
511 		callout_schedule(&dw->dw_callout, dw->dw_resched);
512 		dw->dw_state = DELAYED_WORK_SCHEDULED;
513 		dw->dw_resched = -1;
514 		break;
515 	case DELAYED_WORK_CANCELLED:
516 		cancel_delayed_work_done(wq, dw);
517 		/* Can't dereference dw after this point.  */
518 		goto out;
519 	default:
520 		panic("delayed work callout in bad state: %p", dw);
521 	}
522 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
523 	    dw->dw_state == DELAYED_WORK_SCHEDULED);
524 out:	mutex_exit(&wq->wq_lock);
525 }
526 
527 /*
528  * current_work()
529  *
530  *	If in a workqueue worker thread, return the work it is
531  *	currently executing.  Otherwise return NULL.
532  */
533 struct work_struct *
current_work(void)534 current_work(void)
535 {
536 	struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
537 
538 	/* If we're not a workqueue thread, then there's no work.  */
539 	if (wq == NULL)
540 		return NULL;
541 
542 	/*
543 	 * Otherwise, this should be possible only while work is in
544 	 * progress.  Return the current work item.
545 	 */
546 	KASSERT(wq->wq_current_work != NULL);
547 	return wq->wq_current_work;
548 }
549 
550 /*
551  * Work
552  */
553 
554 /*
555  * INIT_WORK(work, fn)
556  *
557  *	Initialize work for use with a workqueue to call fn in a worker
558  *	thread.  There is no corresponding destruction operation.
559  */
560 void
INIT_WORK(struct work_struct * work,void (* fn)(struct work_struct *))561 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
562 {
563 
564 	work->work_owner = 0;
565 	work->func = fn;
566 }
567 
568 /*
569  * work_claimed(work, wq)
570  *
571  *	True if work is currently claimed by a workqueue, meaning it is
572  *	either on the queue or scheduled in a callout.  The workqueue
573  *	must be wq, and caller must hold wq's lock.
574  */
575 static bool
work_claimed(struct work_struct * work,struct workqueue_struct * wq)576 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
577 {
578 
579 	KASSERT(work_queue(work) == wq);
580 	KASSERT(mutex_owned(&wq->wq_lock));
581 
582 	return atomic_load_relaxed(&work->work_owner) & 1;
583 }
584 
585 /*
586  * work_pending(work)
587  *
588  *	True if work is currently claimed by any workqueue, scheduled
589  *	to run on that workqueue.
590  */
591 bool
work_pending(const struct work_struct * work)592 work_pending(const struct work_struct *work)
593 {
594 
595 	return atomic_load_relaxed(&work->work_owner) & 1;
596 }
597 
598 /*
599  * work_queue(work)
600  *
601  *	Return the last queue that work was queued on, or NULL if it
602  *	was never queued.
603  */
604 static struct workqueue_struct *
work_queue(struct work_struct * work)605 work_queue(struct work_struct *work)
606 {
607 
608 	return (struct workqueue_struct *)
609 	    (atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
610 }
611 
612 /*
613  * acquire_work(work, wq)
614  *
615  *	Try to claim work for wq.  If work is already claimed, it must
616  *	be claimed by wq; return false.  If work is not already
617  *	claimed, claim it, issue a memory barrier to match any prior
618  *	release_work, and return true.
619  *
620  *	Caller must hold wq's lock.
621  */
622 static bool
acquire_work(struct work_struct * work,struct workqueue_struct * wq)623 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
624 {
625 	uintptr_t owner0, owner;
626 
627 	KASSERT(mutex_owned(&wq->wq_lock));
628 	KASSERT(((uintptr_t)wq & 1) == 0);
629 
630 	owner = (uintptr_t)wq | 1;
631 	do {
632 		owner0 = atomic_load_relaxed(&work->work_owner);
633 		if (owner0 & 1) {
634 			KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
635 			return false;
636 		}
637 		KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
638 	} while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
639 	    owner0);
640 
641 	KASSERT(work_queue(work) == wq);
642 	membar_acquire();
643 	SDT_PROBE2(sdt, linux, work, acquire,  work, wq);
644 	return true;
645 }
646 
647 /*
648  * release_work(work, wq)
649  *
650  *	Issue a memory barrier to match any subsequent acquire_work and
651  *	dissociate work from wq.
652  *
653  *	Caller must hold wq's lock and work must be associated with wq.
654  */
655 static void
release_work(struct work_struct * work,struct workqueue_struct * wq)656 release_work(struct work_struct *work, struct workqueue_struct *wq)
657 {
658 
659 	KASSERT(work_queue(work) == wq);
660 	KASSERT(mutex_owned(&wq->wq_lock));
661 
662 	SDT_PROBE2(sdt, linux, work, release,  work, wq);
663 	membar_release();
664 
665 	/*
666 	 * Non-interlocked r/m/w is safe here because nobody else can
667 	 * write to this while the claimed bit is set and the workqueue
668 	 * lock is held.
669 	 */
670 	atomic_store_relaxed(&work->work_owner,
671 	    atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
672 }
673 
674 /*
675  * schedule_work(work)
676  *
677  *	If work is not already queued on system_wq, queue it to be run
678  *	by system_wq's worker thread when it next can.  True if it was
679  *	newly queued, false if it was already queued.  If the work was
680  *	already running, queue it to run again.
681  *
682  *	Caller must ensure work is not queued to run on a different
683  *	workqueue.
684  */
685 bool
schedule_work(struct work_struct * work)686 schedule_work(struct work_struct *work)
687 {
688 
689 	return queue_work(system_wq, work);
690 }
691 
692 /*
693  * queue_work(wq, work)
694  *
695  *	If work is not already queued on wq, queue it to be run by wq's
696  *	worker thread when it next can.  True if it was newly queued,
697  *	false if it was already queued.  If the work was already
698  *	running, queue it to run again.
699  *
700  *	Caller must ensure work is not queued to run on a different
701  *	workqueue.
702  */
703 bool
queue_work(struct workqueue_struct * wq,struct work_struct * work)704 queue_work(struct workqueue_struct *wq, struct work_struct *work)
705 {
706 	bool newly_queued;
707 
708 	KASSERT(wq != NULL);
709 
710 	mutex_enter(&wq->wq_lock);
711 	if (__predict_true(acquire_work(work, wq))) {
712 		/*
713 		 * It wasn't on any workqueue at all.  Put it on this
714 		 * one, and signal the worker thread that there is work
715 		 * to do.
716 		 */
717 		TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
718 		cv_broadcast(&wq->wq_cv);
719 		SDT_PROBE2(sdt, linux, work, queue,  work, wq);
720 		newly_queued = true;
721 	} else {
722 		/*
723 		 * It was already on this workqueue.  Nothing to do
724 		 * since it is already queued.
725 		 */
726 		newly_queued = false;
727 	}
728 	mutex_exit(&wq->wq_lock);
729 
730 	return newly_queued;
731 }
732 
733 /*
734  * cancel_work(work)
735  *
736  *	If work was queued, remove it from the queue and return true.
737  *	If work was not queued, return false.  Work may still be
738  *	running when this returns.
739  */
740 bool
cancel_work(struct work_struct * work)741 cancel_work(struct work_struct *work)
742 {
743 	struct workqueue_struct *wq;
744 	bool cancelled_p = false;
745 
746 	/* If there's no workqueue, nothing to cancel.   */
747 	if ((wq = work_queue(work)) == NULL)
748 		goto out;
749 
750 	mutex_enter(&wq->wq_lock);
751 	if (__predict_false(work_queue(work) != wq)) {
752 		/*
753 		 * It has finished execution or been cancelled by
754 		 * another thread, and has been moved off the
755 		 * workqueue, so it's too to cancel.
756 		 */
757 		cancelled_p = false;
758 	} else {
759 		/* Check whether it's on the queue.  */
760 		if (work_claimed(work, wq)) {
761 			/*
762 			 * It is still on the queue.  Take it off the
763 			 * queue and report successful cancellation.
764 			 */
765 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
766 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
767 			release_work(work, wq);
768 			/* Can't dereference work after this point.  */
769 			cancelled_p = true;
770 		} else {
771 			/* Not on the queue.  Couldn't cancel it.  */
772 			cancelled_p = false;
773 		}
774 	}
775 	mutex_exit(&wq->wq_lock);
776 
777 out:	return cancelled_p;
778 }
779 
780 /*
781  * cancel_work_sync(work)
782  *
783  *	If work was queued, remove it from the queue and return true.
784  *	If work was not queued, return false.  Either way, if work is
785  *	currently running, wait for it to complete.
786  *
787  *	May sleep.
788  */
789 bool
cancel_work_sync(struct work_struct * work)790 cancel_work_sync(struct work_struct *work)
791 {
792 	struct workqueue_struct *wq;
793 	bool cancelled_p = false;
794 
795 	/* If there's no workqueue, nothing to cancel.   */
796 	if ((wq = work_queue(work)) == NULL)
797 		goto out;
798 
799 	mutex_enter(&wq->wq_lock);
800 	if (__predict_false(work_queue(work) != wq)) {
801 		/*
802 		 * It has finished execution or been cancelled by
803 		 * another thread, and has been moved off the
804 		 * workqueue, so it's too late to cancel.
805 		 */
806 		cancelled_p = false;
807 	} else {
808 		/* Check whether it's on the queue.  */
809 		if (work_claimed(work, wq)) {
810 			/*
811 			 * It is still on the queue.  Take it off the
812 			 * queue and report successful cancellation.
813 			 */
814 			TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
815 			SDT_PROBE2(sdt, linux, work, cancel,  work, wq);
816 			release_work(work, wq);
817 			/* Can't dereference work after this point.  */
818 			cancelled_p = true;
819 		} else {
820 			/* Not on the queue.  Couldn't cancel it.  */
821 			cancelled_p = false;
822 		}
823 		/* If it's still running, wait for it to complete.  */
824 		if (wq->wq_current_work == work)
825 			wait_for_current_work(work, wq);
826 	}
827 	mutex_exit(&wq->wq_lock);
828 
829 out:	return cancelled_p;
830 }
831 
832 /*
833  * wait_for_current_work(work, wq)
834  *
835  *	wq must be currently executing work.  Wait for it to finish.
836  *
837  *	Does not dereference work.
838  */
839 static void
wait_for_current_work(struct work_struct * work,struct workqueue_struct * wq)840 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
841 {
842 	uint64_t gen;
843 
844 	KASSERT(mutex_owned(&wq->wq_lock));
845 	KASSERT(wq->wq_current_work == work);
846 
847 	/* Wait only one generation in case it gets requeued quickly.  */
848 	SDT_PROBE2(sdt, linux, work, wait__start,  work, wq);
849 	gen = wq->wq_gen;
850 	do {
851 		cv_wait(&wq->wq_cv, &wq->wq_lock);
852 	} while (wq->wq_current_work == work && wq->wq_gen == gen);
853 	SDT_PROBE2(sdt, linux, work, wait__done,  work, wq);
854 }
855 
856 /*
857  * Delayed work
858  */
859 
860 /*
861  * INIT_DELAYED_WORK(dw, fn)
862  *
863  *	Initialize dw for use with a workqueue to call fn in a worker
864  *	thread after a delay.  There is no corresponding destruction
865  *	operation.
866  */
867 void
INIT_DELAYED_WORK(struct delayed_work * dw,void (* fn)(struct work_struct *))868 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
869 {
870 
871 	INIT_WORK(&dw->work, fn);
872 	dw->dw_state = DELAYED_WORK_IDLE;
873 	dw->dw_resched = -1;
874 
875 	/*
876 	 * Defer callout_init until we are going to schedule the
877 	 * callout, which can then callout_destroy it, because
878 	 * otherwise since there's no DESTROY_DELAYED_WORK or anything
879 	 * we have no opportunity to call callout_destroy.
880 	 */
881 }
882 
883 /*
884  * schedule_delayed_work(dw, ticks)
885  *
886  *	If it is not currently scheduled, schedule dw to run after
887  *	ticks on system_wq.  If currently executing and not already
888  *	rescheduled, reschedule it.  True if it was newly scheduled,
889  *	false if it was already scheduled.
890  *
891  *	If ticks == 0, queue it to run as soon as the worker can,
892  *	without waiting for the next callout tick to run.
893  */
894 bool
schedule_delayed_work(struct delayed_work * dw,unsigned long ticks)895 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
896 {
897 
898 	return queue_delayed_work(system_wq, dw, ticks);
899 }
900 
901 /*
902  * dw_callout_init(wq, dw)
903  *
904  *	Initialize the callout of dw and transition to
905  *	DELAYED_WORK_SCHEDULED.  Caller must use callout_schedule.
906  */
907 static void
dw_callout_init(struct workqueue_struct * wq,struct delayed_work * dw)908 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
909 {
910 
911 	KASSERT(mutex_owned(&wq->wq_lock));
912 	KASSERT(work_queue(&dw->work) == wq);
913 	KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
914 
915 	callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
916 	callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
917 	TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
918 	dw->dw_state = DELAYED_WORK_SCHEDULED;
919 }
920 
921 /*
922  * dw_callout_destroy(wq, dw)
923  *
924  *	Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
925  */
926 static void
dw_callout_destroy(struct workqueue_struct * wq,struct delayed_work * dw)927 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
928 {
929 
930 	KASSERT(mutex_owned(&wq->wq_lock));
931 	KASSERT(work_queue(&dw->work) == wq);
932 	KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
933 	    dw->dw_state == DELAYED_WORK_RESCHEDULED ||
934 	    dw->dw_state == DELAYED_WORK_CANCELLED);
935 
936 	TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
937 	callout_destroy(&dw->dw_callout);
938 	dw->dw_resched = -1;
939 	dw->dw_state = DELAYED_WORK_IDLE;
940 }
941 
942 /*
943  * cancel_delayed_work_done(wq, dw)
944  *
945  *	Complete cancellation of a delayed work: transition from
946  *	DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
947  *	workqueue.  Caller must not dereference dw after this returns.
948  */
949 static void
cancel_delayed_work_done(struct workqueue_struct * wq,struct delayed_work * dw)950 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
951 {
952 
953 	KASSERT(mutex_owned(&wq->wq_lock));
954 	KASSERT(work_queue(&dw->work) == wq);
955 	KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
956 
957 	dw_callout_destroy(wq, dw);
958 	release_work(&dw->work, wq);
959 	/* Can't dereference dw after this point.  */
960 }
961 
962 /*
963  * queue_delayed_work(wq, dw, ticks)
964  *
965  *	If it is not currently scheduled, schedule dw to run after
966  *	ticks on wq.  If currently queued, remove it from the queue
967  *	first.
968  *
969  *	If ticks == 0, queue it to run as soon as the worker can,
970  *	without waiting for the next callout tick to run.
971  */
972 bool
queue_delayed_work(struct workqueue_struct * wq,struct delayed_work * dw,unsigned long ticks)973 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
974     unsigned long ticks)
975 {
976 	bool newly_queued;
977 
978 	mutex_enter(&wq->wq_lock);
979 	if (__predict_true(acquire_work(&dw->work, wq))) {
980 		/*
981 		 * It wasn't on any workqueue at all.  Schedule it to
982 		 * run on this one.
983 		 */
984 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
985 		if (ticks == 0) {
986 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
987 			    work_entry);
988 			cv_broadcast(&wq->wq_cv);
989 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
990 		} else {
991 			/*
992 			 * Initialize a callout and schedule to run
993 			 * after a delay.
994 			 */
995 			dw_callout_init(wq, dw);
996 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
997 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
998 		}
999 		newly_queued = true;
1000 	} else {
1001 		/* It was already on this workqueue.  */
1002 		switch (dw->dw_state) {
1003 		case DELAYED_WORK_IDLE:
1004 		case DELAYED_WORK_SCHEDULED:
1005 		case DELAYED_WORK_RESCHEDULED:
1006 			/* On the queue or already scheduled.  Leave it.  */
1007 			newly_queued = false;
1008 			break;
1009 		case DELAYED_WORK_CANCELLED:
1010 			/*
1011 			 * Scheduled and the callout began, but it was
1012 			 * cancelled.  Reschedule it.
1013 			 */
1014 			if (ticks == 0) {
1015 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1016 				SDT_PROBE2(sdt, linux, work, queue,
1017 				    &dw->work, wq);
1018 			} else {
1019 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1020 				dw->dw_resched = MIN(INT_MAX, ticks);
1021 				SDT_PROBE3(sdt, linux, work, schedule,
1022 				    dw, wq, ticks);
1023 			}
1024 			newly_queued = true;
1025 			break;
1026 		default:
1027 			panic("invalid delayed work state: %d",
1028 			    dw->dw_state);
1029 		}
1030 	}
1031 	mutex_exit(&wq->wq_lock);
1032 
1033 	return newly_queued;
1034 }
1035 
1036 /*
1037  * mod_delayed_work(wq, dw, ticks)
1038  *
1039  *	Schedule dw to run after ticks.  If scheduled or queued,
1040  *	reschedule.  If ticks == 0, run without delay.
1041  *
1042  *	True if it modified the timer of an already scheduled work,
1043  *	false if it newly scheduled the work.
1044  */
1045 bool
mod_delayed_work(struct workqueue_struct * wq,struct delayed_work * dw,unsigned long ticks)1046 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
1047     unsigned long ticks)
1048 {
1049 	bool timer_modified;
1050 
1051 	mutex_enter(&wq->wq_lock);
1052 	if (acquire_work(&dw->work, wq)) {
1053 		/*
1054 		 * It wasn't on any workqueue at all.  Schedule it to
1055 		 * run on this one.
1056 		 */
1057 		KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
1058 		if (ticks == 0) {
1059 			/*
1060 			 * Run immediately: put it on the queue and
1061 			 * signal the worker thread.
1062 			 */
1063 			TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1064 			    work_entry);
1065 			cv_broadcast(&wq->wq_cv);
1066 			SDT_PROBE2(sdt, linux, work, queue,  &dw->work, wq);
1067 		} else {
1068 			/*
1069 			 * Initialize a callout and schedule to run
1070 			 * after a delay.
1071 			 */
1072 			dw_callout_init(wq, dw);
1073 			callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1074 			SDT_PROBE3(sdt, linux, work, schedule,  dw, wq, ticks);
1075 		}
1076 		timer_modified = false;
1077 	} else {
1078 		/* It was already on this workqueue.  */
1079 		switch (dw->dw_state) {
1080 		case DELAYED_WORK_IDLE:
1081 			/* On the queue.  */
1082 			if (ticks == 0) {
1083 				/* Leave it be.  */
1084 				SDT_PROBE2(sdt, linux, work, cancel,
1085 				    &dw->work, wq);
1086 				SDT_PROBE2(sdt, linux, work, queue,
1087 				    &dw->work, wq);
1088 			} else {
1089 				/* Remove from the queue and schedule.  */
1090 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1091 				    work_entry);
1092 				dw_callout_init(wq, dw);
1093 				callout_schedule(&dw->dw_callout,
1094 				    MIN(INT_MAX, ticks));
1095 				SDT_PROBE2(sdt, linux, work, cancel,
1096 				    &dw->work, wq);
1097 				SDT_PROBE3(sdt, linux, work, schedule,
1098 				    dw, wq, ticks);
1099 			}
1100 			timer_modified = true;
1101 			break;
1102 		case DELAYED_WORK_SCHEDULED:
1103 			/*
1104 			 * It is scheduled to run after a delay.  Try
1105 			 * to stop it and reschedule it; if we can't,
1106 			 * either reschedule it or cancel it to put it
1107 			 * on the queue, and inform the callout.
1108 			 */
1109 			if (callout_stop(&dw->dw_callout)) {
1110 				/* Can't stop, callout has begun.  */
1111 				if (ticks == 0) {
1112 					/*
1113 					 * We don't actually need to do
1114 					 * anything.  The callout will
1115 					 * queue it as soon as it gets
1116 					 * the lock.
1117 					 */
1118 					SDT_PROBE2(sdt, linux, work, cancel,
1119 					    &dw->work, wq);
1120 					SDT_PROBE2(sdt, linux, work, queue,
1121 					    &dw->work, wq);
1122 				} else {
1123 					/* Ask the callout to reschedule.  */
1124 					dw->dw_state = DELAYED_WORK_RESCHEDULED;
1125 					dw->dw_resched = MIN(INT_MAX, ticks);
1126 					SDT_PROBE2(sdt, linux, work, cancel,
1127 					    &dw->work, wq);
1128 					SDT_PROBE3(sdt, linux, work, schedule,
1129 					    dw, wq, ticks);
1130 				}
1131 			} else {
1132 				/* We stopped the callout before it began.  */
1133 				if (ticks == 0) {
1134 					/*
1135 					 * Run immediately: destroy the
1136 					 * callout, put it on the
1137 					 * queue, and signal the worker
1138 					 * thread.
1139 					 */
1140 					dw_callout_destroy(wq, dw);
1141 					TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1142 					    &dw->work, work_entry);
1143 					cv_broadcast(&wq->wq_cv);
1144 					SDT_PROBE2(sdt, linux, work, cancel,
1145 					    &dw->work, wq);
1146 					SDT_PROBE2(sdt, linux, work, queue,
1147 					    &dw->work, wq);
1148 				} else {
1149 					/*
1150 					 * Reschedule the callout.  No
1151 					 * state change.
1152 					 */
1153 					callout_schedule(&dw->dw_callout,
1154 					    MIN(INT_MAX, ticks));
1155 					SDT_PROBE2(sdt, linux, work, cancel,
1156 					    &dw->work, wq);
1157 					SDT_PROBE3(sdt, linux, work, schedule,
1158 					    dw, wq, ticks);
1159 				}
1160 			}
1161 			timer_modified = true;
1162 			break;
1163 		case DELAYED_WORK_RESCHEDULED:
1164 			/*
1165 			 * Someone rescheduled it after the callout
1166 			 * started but before the poor thing even had a
1167 			 * chance to acquire the lock.
1168 			 */
1169 			if (ticks == 0) {
1170 				/*
1171 				 * We can just switch back to
1172 				 * DELAYED_WORK_SCHEDULED so that the
1173 				 * callout will queue the work as soon
1174 				 * as it gets the lock.
1175 				 */
1176 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1177 				dw->dw_resched = -1;
1178 				SDT_PROBE2(sdt, linux, work, cancel,
1179 				    &dw->work, wq);
1180 				SDT_PROBE2(sdt, linux, work, queue,
1181 				    &dw->work, wq);
1182 			} else {
1183 				/* Change the rescheduled time.  */
1184 				dw->dw_resched = ticks;
1185 				SDT_PROBE2(sdt, linux, work, cancel,
1186 				    &dw->work, wq);
1187 				SDT_PROBE3(sdt, linux, work, schedule,
1188 				    dw, wq, ticks);
1189 			}
1190 			timer_modified = true;
1191 			break;
1192 		case DELAYED_WORK_CANCELLED:
1193 			/*
1194 			 * Someone cancelled it after the callout
1195 			 * started but before the poor thing even had a
1196 			 * chance to acquire the lock.
1197 			 */
1198 			if (ticks == 0) {
1199 				/*
1200 				 * We can just switch back to
1201 				 * DELAYED_WORK_SCHEDULED so that the
1202 				 * callout will queue the work as soon
1203 				 * as it gets the lock.
1204 				 */
1205 				dw->dw_state = DELAYED_WORK_SCHEDULED;
1206 				SDT_PROBE2(sdt, linux, work, queue,
1207 				    &dw->work, wq);
1208 			} else {
1209 				/* Ask it to reschedule.  */
1210 				dw->dw_state = DELAYED_WORK_RESCHEDULED;
1211 				dw->dw_resched = MIN(INT_MAX, ticks);
1212 				SDT_PROBE3(sdt, linux, work, schedule,
1213 				    dw, wq, ticks);
1214 			}
1215 			timer_modified = false;
1216 			break;
1217 		default:
1218 			panic("invalid delayed work state: %d", dw->dw_state);
1219 		}
1220 	}
1221 	mutex_exit(&wq->wq_lock);
1222 
1223 	return timer_modified;
1224 }
1225 
1226 /*
1227  * cancel_delayed_work(dw)
1228  *
1229  *	If work was scheduled or queued, remove it from the schedule or
1230  *	queue and return true.  If work was not scheduled or queued,
1231  *	return false.  Note that work may already be running; if it
1232  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1233  *	will return false, and either way, cancel_delayed_work will NOT
1234  *	wait for the work to complete.
1235  */
1236 bool
cancel_delayed_work(struct delayed_work * dw)1237 cancel_delayed_work(struct delayed_work *dw)
1238 {
1239 	struct workqueue_struct *wq;
1240 	bool cancelled_p;
1241 
1242 	/* If there's no workqueue, nothing to cancel.   */
1243 	if ((wq = work_queue(&dw->work)) == NULL)
1244 		return false;
1245 
1246 	mutex_enter(&wq->wq_lock);
1247 	if (__predict_false(work_queue(&dw->work) != wq)) {
1248 		cancelled_p = false;
1249 	} else {
1250 		switch (dw->dw_state) {
1251 		case DELAYED_WORK_IDLE:
1252 			/*
1253 			 * It is either on the queue or already running
1254 			 * or both.
1255 			 */
1256 			if (work_claimed(&dw->work, wq)) {
1257 				/* On the queue.  Remove and release.  */
1258 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1259 				    work_entry);
1260 				SDT_PROBE2(sdt, linux, work, cancel,
1261 				    &dw->work, wq);
1262 				release_work(&dw->work, wq);
1263 				/* Can't dereference dw after this point.  */
1264 				cancelled_p = true;
1265 			} else {
1266 				/* Not on the queue, so didn't cancel.  */
1267 				cancelled_p = false;
1268 			}
1269 			break;
1270 		case DELAYED_WORK_SCHEDULED:
1271 			/*
1272 			 * If it is scheduled, mark it cancelled and
1273 			 * try to stop the callout before it starts.
1274 			 *
1275 			 * If it's too late and the callout has already
1276 			 * begun to execute, tough.
1277 			 *
1278 			 * If we stopped the callout before it started,
1279 			 * however, then destroy the callout and
1280 			 * dissociate it from the workqueue ourselves.
1281 			 */
1282 			dw->dw_state = DELAYED_WORK_CANCELLED;
1283 			cancelled_p = true;
1284 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1285 			if (!callout_stop(&dw->dw_callout))
1286 				cancel_delayed_work_done(wq, dw);
1287 			break;
1288 		case DELAYED_WORK_RESCHEDULED:
1289 			/*
1290 			 * If it is being rescheduled, the callout has
1291 			 * already fired.  We must ask it to cancel.
1292 			 */
1293 			dw->dw_state = DELAYED_WORK_CANCELLED;
1294 			dw->dw_resched = -1;
1295 			cancelled_p = true;
1296 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1297 			break;
1298 		case DELAYED_WORK_CANCELLED:
1299 			/*
1300 			 * If it is being cancelled, the callout has
1301 			 * already fired.  There is nothing more for us
1302 			 * to do.  Someone else claims credit for
1303 			 * cancelling it.
1304 			 */
1305 			cancelled_p = false;
1306 			break;
1307 		default:
1308 			panic("invalid delayed work state: %d",
1309 			    dw->dw_state);
1310 		}
1311 	}
1312 	mutex_exit(&wq->wq_lock);
1313 
1314 	return cancelled_p;
1315 }
1316 
1317 /*
1318  * cancel_delayed_work_sync(dw)
1319  *
1320  *	If work was scheduled or queued, remove it from the schedule or
1321  *	queue and return true.  If work was not scheduled or queued,
1322  *	return false.  Note that work may already be running; if it
1323  *	hasn't been rescheduled or requeued, then cancel_delayed_work
1324  *	will return false; either way, wait for it to complete.
1325  */
1326 bool
cancel_delayed_work_sync(struct delayed_work * dw)1327 cancel_delayed_work_sync(struct delayed_work *dw)
1328 {
1329 	struct workqueue_struct *wq;
1330 	bool cancelled_p;
1331 
1332 	/* If there's no workqueue, nothing to cancel.  */
1333 	if ((wq = work_queue(&dw->work)) == NULL)
1334 		return false;
1335 
1336 	mutex_enter(&wq->wq_lock);
1337 	if (__predict_false(work_queue(&dw->work) != wq)) {
1338 		cancelled_p = false;
1339 	} else {
1340 		switch (dw->dw_state) {
1341 		case DELAYED_WORK_IDLE:
1342 			/*
1343 			 * It is either on the queue or already running
1344 			 * or both.
1345 			 */
1346 			if (work_claimed(&dw->work, wq)) {
1347 				/* On the queue.  Remove and release.  */
1348 				TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1349 				    work_entry);
1350 				SDT_PROBE2(sdt, linux, work, cancel,
1351 				    &dw->work, wq);
1352 				release_work(&dw->work, wq);
1353 				/* Can't dereference dw after this point.  */
1354 				cancelled_p = true;
1355 			} else {
1356 				/* Not on the queue, so didn't cancel. */
1357 				cancelled_p = false;
1358 			}
1359 			/* If it's still running, wait for it to complete.  */
1360 			if (wq->wq_current_work == &dw->work)
1361 				wait_for_current_work(&dw->work, wq);
1362 			break;
1363 		case DELAYED_WORK_SCHEDULED:
1364 			/*
1365 			 * If it is scheduled, mark it cancelled and
1366 			 * try to stop the callout before it starts.
1367 			 *
1368 			 * If it's too late and the callout has already
1369 			 * begun to execute, we must wait for it to
1370 			 * complete.  But we got in soon enough to ask
1371 			 * the callout not to run, so we successfully
1372 			 * cancelled it in that case.
1373 			 *
1374 			 * If we stopped the callout before it started,
1375 			 * then we must destroy the callout and
1376 			 * dissociate it from the workqueue ourselves.
1377 			 */
1378 			dw->dw_state = DELAYED_WORK_CANCELLED;
1379 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1380 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1381 				cancel_delayed_work_done(wq, dw);
1382 			cancelled_p = true;
1383 			break;
1384 		case DELAYED_WORK_RESCHEDULED:
1385 			/*
1386 			 * If it is being rescheduled, the callout has
1387 			 * already fired.  We must ask it to cancel and
1388 			 * wait for it to complete.
1389 			 */
1390 			dw->dw_state = DELAYED_WORK_CANCELLED;
1391 			dw->dw_resched = -1;
1392 			SDT_PROBE2(sdt, linux, work, cancel,  &dw->work, wq);
1393 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1394 			cancelled_p = true;
1395 			break;
1396 		case DELAYED_WORK_CANCELLED:
1397 			/*
1398 			 * If it is being cancelled, the callout has
1399 			 * already fired.  We need only wait for it to
1400 			 * complete.  Someone else, however, claims
1401 			 * credit for cancelling it.
1402 			 */
1403 			(void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1404 			cancelled_p = false;
1405 			break;
1406 		default:
1407 			panic("invalid delayed work state: %d",
1408 			    dw->dw_state);
1409 		}
1410 	}
1411 	mutex_exit(&wq->wq_lock);
1412 
1413 	return cancelled_p;
1414 }
1415 
1416 /*
1417  * Flush
1418  */
1419 
1420 /*
1421  * flush_scheduled_work()
1422  *
1423  *	Wait for all work queued on system_wq to complete.  This does
1424  *	not include delayed work.
1425  */
1426 void
flush_scheduled_work(void)1427 flush_scheduled_work(void)
1428 {
1429 
1430 	flush_workqueue(system_wq);
1431 }
1432 
1433 struct flush_work {
1434 	kmutex_t		fw_lock;
1435 	kcondvar_t		fw_cv;
1436 	struct work_struct	fw_work;
1437 	bool			fw_done;
1438 };
1439 
1440 static void
flush_work_cb(struct work_struct * work)1441 flush_work_cb(struct work_struct *work)
1442 {
1443 	struct flush_work *fw = container_of(work, struct flush_work, fw_work);
1444 
1445 	mutex_enter(&fw->fw_lock);
1446 	fw->fw_done = true;
1447 	cv_broadcast(&fw->fw_cv);
1448 	mutex_exit(&fw->fw_lock);
1449 }
1450 
1451 /*
1452  * flush_workqueue(wq)
1453  *
1454  *	Wait for all work queued on wq to complete.  This does not
1455  *	include delayed work.
1456  */
1457 void
flush_workqueue(struct workqueue_struct * wq)1458 flush_workqueue(struct workqueue_struct *wq)
1459 {
1460 	struct flush_work fw;
1461 
1462 	if (lwp_getspecific(workqueue_key) == wq) {
1463 		SDT_PROBE1(sdt, linux, work, flush__self,  wq);
1464 		return;
1465 	}
1466 
1467 	mutex_init(&fw.fw_lock, MUTEX_DEFAULT, IPL_VM);
1468 	cv_init(&fw.fw_cv, "lxwqflsh");
1469 	INIT_WORK(&fw.fw_work, &flush_work_cb);
1470 	fw.fw_done = false;
1471 
1472 	SDT_PROBE1(sdt, linux, work, flush__start,  wq);
1473 	queue_work(wq, &fw.fw_work);
1474 
1475 	mutex_enter(&fw.fw_lock);
1476 	while (!fw.fw_done)
1477 		cv_wait(&fw.fw_cv, &fw.fw_lock);
1478 	mutex_exit(&fw.fw_lock);
1479 	SDT_PROBE1(sdt, linux, work, flush__done,  wq);
1480 
1481 	KASSERT(fw.fw_done);
1482 	/* no DESTROY_WORK */
1483 	cv_destroy(&fw.fw_cv);
1484 	mutex_destroy(&fw.fw_lock);
1485 }
1486 
1487 /*
1488  * drain_workqueue(wq)
1489  *
1490  *	Repeatedly flush wq until there is no more work.
1491  */
1492 void
drain_workqueue(struct workqueue_struct * wq)1493 drain_workqueue(struct workqueue_struct *wq)
1494 {
1495 	unsigned ntries = 0;
1496 	bool done;
1497 
1498 	do {
1499 		if (ntries++ == 10 || (ntries % 100) == 0)
1500 			printf("linux workqueue %s"
1501 			    ": still clogged after %u flushes",
1502 			    wq->wq_name, ntries);
1503 		flush_workqueue(wq);
1504 		mutex_enter(&wq->wq_lock);
1505 		done = wq->wq_current_work == NULL;
1506 		done &= TAILQ_EMPTY(&wq->wq_queue);
1507 		done &= TAILQ_EMPTY(&wq->wq_dqueue);
1508 		mutex_exit(&wq->wq_lock);
1509 	} while (!done);
1510 }
1511 
1512 /*
1513  * flush_work(work)
1514  *
1515  *	If work is queued or currently executing, wait for it to
1516  *	complete.
1517  *
1518  *	Return true if we waited to flush it, false if it was already
1519  *	idle.
1520  */
1521 bool
flush_work(struct work_struct * work)1522 flush_work(struct work_struct *work)
1523 {
1524 	struct workqueue_struct *wq;
1525 
1526 	/* If there's no workqueue, nothing to flush.  */
1527 	if ((wq = work_queue(work)) == NULL)
1528 		return false;
1529 
1530 	flush_workqueue(wq);
1531 	return true;
1532 }
1533 
1534 /*
1535  * flush_delayed_work(dw)
1536  *
1537  *	If dw is scheduled to run after a delay, queue it immediately
1538  *	instead.  Then, if dw is queued or currently executing, wait
1539  *	for it to complete.
1540  */
1541 bool
flush_delayed_work(struct delayed_work * dw)1542 flush_delayed_work(struct delayed_work *dw)
1543 {
1544 	struct workqueue_struct *wq;
1545 	bool waited = false;
1546 
1547 	/* If there's no workqueue, nothing to flush.  */
1548 	if ((wq = work_queue(&dw->work)) == NULL)
1549 		return false;
1550 
1551 	mutex_enter(&wq->wq_lock);
1552 	if (__predict_false(work_queue(&dw->work) != wq)) {
1553 		/*
1554 		 * Moved off the queue already (and possibly to another
1555 		 * queue, though that would be ill-advised), so it must
1556 		 * have completed, and we have nothing more to do.
1557 		 */
1558 		waited = false;
1559 	} else {
1560 		switch (dw->dw_state) {
1561 		case DELAYED_WORK_IDLE:
1562 			/*
1563 			 * It has a workqueue assigned and the callout
1564 			 * is idle, so it must be in progress or on the
1565 			 * queue.  In that case, we'll wait for it to
1566 			 * complete.
1567 			 */
1568 			break;
1569 		case DELAYED_WORK_SCHEDULED:
1570 		case DELAYED_WORK_RESCHEDULED:
1571 		case DELAYED_WORK_CANCELLED:
1572 			/*
1573 			 * The callout is scheduled, and may have even
1574 			 * started.  Mark it as scheduled so that if
1575 			 * the callout has fired it will queue the work
1576 			 * itself.  Try to stop the callout -- if we
1577 			 * can, queue the work now; if we can't, wait
1578 			 * for the callout to complete, which entails
1579 			 * queueing it.
1580 			 */
1581 			dw->dw_state = DELAYED_WORK_SCHEDULED;
1582 			if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1583 				/*
1584 				 * We stopped it before it ran.  No
1585 				 * state change in the interim is
1586 				 * possible.  Destroy the callout and
1587 				 * queue it ourselves.
1588 				 */
1589 				KASSERT(dw->dw_state ==
1590 				    DELAYED_WORK_SCHEDULED);
1591 				dw_callout_destroy(wq, dw);
1592 				TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1593 				    work_entry);
1594 				cv_broadcast(&wq->wq_cv);
1595 				SDT_PROBE2(sdt, linux, work, queue,
1596 				    &dw->work, wq);
1597 			}
1598 			break;
1599 		default:
1600 			panic("invalid delayed work state: %d", dw->dw_state);
1601 		}
1602 		/*
1603 		 * Waiting for the whole queue to flush is overkill,
1604 		 * but doesn't hurt.
1605 		 */
1606 		mutex_exit(&wq->wq_lock);
1607 		flush_workqueue(wq);
1608 		mutex_enter(&wq->wq_lock);
1609 		waited = true;
1610 	}
1611 	mutex_exit(&wq->wq_lock);
1612 
1613 	return waited;
1614 }
1615 
1616 /*
1617  * delayed_work_pending(dw)
1618  *
1619  *	True if dw is currently scheduled to execute, false if not.
1620  */
1621 bool
delayed_work_pending(const struct delayed_work * dw)1622 delayed_work_pending(const struct delayed_work *dw)
1623 {
1624 
1625 	return work_pending(&dw->work);
1626 }
1627 
1628 /*
1629  * INIT_RCU_WORK(rw, fn)
1630  *
1631  *	Initialize rw for use with a workqueue to call fn in a worker
1632  *	thread after an RCU grace period.  There is no corresponding
1633  *	destruction operation.
1634  */
1635 void
INIT_RCU_WORK(struct rcu_work * rw,void (* fn)(struct work_struct *))1636 INIT_RCU_WORK(struct rcu_work *rw, void (*fn)(struct work_struct *))
1637 {
1638 
1639 	INIT_WORK(&rw->work, fn);
1640 }
1641 
1642 static void
queue_rcu_work_cb(struct rcu_head * r)1643 queue_rcu_work_cb(struct rcu_head *r)
1644 {
1645 	struct rcu_work *rw = container_of(r, struct rcu_work, rw_rcu);
1646 	struct workqueue_struct *wq = work_queue(&rw->work);
1647 
1648 	mutex_enter(&wq->wq_lock);
1649 	KASSERT(work_pending(&rw->work));
1650 	KASSERT(work_queue(&rw->work) == wq);
1651 	destroy_rcu_head(&rw->rw_rcu);
1652 	TAILQ_REMOVE(&wq->wq_rcu, &rw->work, work_entry);
1653 	TAILQ_INSERT_TAIL(&wq->wq_queue, &rw->work, work_entry);
1654 	cv_broadcast(&wq->wq_cv);
1655 	SDT_PROBE2(sdt, linux, work, queue,  &rw->work, wq);
1656 	mutex_exit(&wq->wq_lock);
1657 }
1658 
1659 /*
1660  * queue_rcu_work(wq, rw)
1661  *
1662  *	Schedule rw to run on wq after an RCU grace period.
1663  */
1664 void
queue_rcu_work(struct workqueue_struct * wq,struct rcu_work * rw)1665 queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rw)
1666 {
1667 
1668 	mutex_enter(&wq->wq_lock);
1669 	if (acquire_work(&rw->work, wq)) {
1670 		init_rcu_head(&rw->rw_rcu);
1671 		SDT_PROBE2(sdt, linux, work, rcu,  rw, wq);
1672 		TAILQ_INSERT_TAIL(&wq->wq_rcu, &rw->work, work_entry);
1673 		call_rcu(&rw->rw_rcu, &queue_rcu_work_cb);
1674 	}
1675 	mutex_exit(&wq->wq_lock);
1676 }
1677