1 /* $NetBSD: linux_work.c,v 1.61 2022/04/09 23:43:31 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Taylor R. Campbell.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_work.c,v 1.61 2022/04/09 23:43:31 riastradh Exp $");
34
35 #include <sys/types.h>
36 #include <sys/atomic.h>
37 #include <sys/callout.h>
38 #include <sys/condvar.h>
39 #include <sys/errno.h>
40 #include <sys/kmem.h>
41 #include <sys/kthread.h>
42 #include <sys/lwp.h>
43 #include <sys/mutex.h>
44 #ifndef _MODULE
45 #include <sys/once.h>
46 #endif
47 #include <sys/queue.h>
48 #include <sys/sdt.h>
49
50 #include <linux/workqueue.h>
51
52 TAILQ_HEAD(work_head, work_struct);
53 TAILQ_HEAD(dwork_head, delayed_work);
54
55 struct workqueue_struct {
56 kmutex_t wq_lock;
57 kcondvar_t wq_cv;
58 struct dwork_head wq_delayed; /* delayed work scheduled */
59 struct work_head wq_rcu; /* RCU work scheduled */
60 struct work_head wq_queue; /* work to run */
61 struct work_head wq_dqueue; /* delayed work to run now */
62 struct work_struct *wq_current_work;
63 int wq_flags;
64 bool wq_dying;
65 uint64_t wq_gen;
66 struct lwp *wq_lwp;
67 const char *wq_name;
68 };
69
70 static void __dead linux_workqueue_thread(void *);
71 static void linux_workqueue_timeout(void *);
72 static bool work_claimed(struct work_struct *,
73 struct workqueue_struct *);
74 static struct workqueue_struct *
75 work_queue(struct work_struct *);
76 static bool acquire_work(struct work_struct *,
77 struct workqueue_struct *);
78 static void release_work(struct work_struct *,
79 struct workqueue_struct *);
80 static void wait_for_current_work(struct work_struct *,
81 struct workqueue_struct *);
82 static void dw_callout_init(struct workqueue_struct *,
83 struct delayed_work *);
84 static void dw_callout_destroy(struct workqueue_struct *,
85 struct delayed_work *);
86 static void cancel_delayed_work_done(struct workqueue_struct *,
87 struct delayed_work *);
88
89 SDT_PROBE_DEFINE2(sdt, linux, work, acquire,
90 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
91 SDT_PROBE_DEFINE2(sdt, linux, work, release,
92 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
93 SDT_PROBE_DEFINE2(sdt, linux, work, queue,
94 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
95 SDT_PROBE_DEFINE2(sdt, linux, work, rcu,
96 "struct rcu_work *"/*work*/, "struct workqueue_struct *"/*wq*/);
97 SDT_PROBE_DEFINE2(sdt, linux, work, cancel,
98 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
99 SDT_PROBE_DEFINE3(sdt, linux, work, schedule,
100 "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/,
101 "unsigned long"/*ticks*/);
102 SDT_PROBE_DEFINE2(sdt, linux, work, timer,
103 "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
104 SDT_PROBE_DEFINE2(sdt, linux, work, wait__start,
105 "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
106 SDT_PROBE_DEFINE2(sdt, linux, work, wait__done,
107 "struct delayed_work *"/*dw*/, "struct workqueue_struct *"/*wq*/);
108 SDT_PROBE_DEFINE2(sdt, linux, work, run,
109 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
110 SDT_PROBE_DEFINE2(sdt, linux, work, done,
111 "struct work_struct *"/*work*/, "struct workqueue_struct *"/*wq*/);
112 SDT_PROBE_DEFINE1(sdt, linux, work, batch__start,
113 "struct workqueue_struct *"/*wq*/);
114 SDT_PROBE_DEFINE1(sdt, linux, work, batch__done,
115 "struct workqueue_struct *"/*wq*/);
116 SDT_PROBE_DEFINE1(sdt, linux, work, flush__self,
117 "struct workqueue_struct *"/*wq*/);
118 SDT_PROBE_DEFINE1(sdt, linux, work, flush__start,
119 "struct workqueue_struct *"/*wq*/);
120 SDT_PROBE_DEFINE1(sdt, linux, work, flush__done,
121 "struct workqueue_struct *"/*wq*/);
122
123 static specificdata_key_t workqueue_key __read_mostly;
124
125 struct workqueue_struct *system_highpri_wq __read_mostly;
126 struct workqueue_struct *system_long_wq __read_mostly;
127 struct workqueue_struct *system_power_efficient_wq __read_mostly;
128 struct workqueue_struct *system_unbound_wq __read_mostly;
129 struct workqueue_struct *system_wq __read_mostly;
130
131 static inline uintptr_t
atomic_cas_uintptr(volatile uintptr_t * p,uintptr_t old,uintptr_t new)132 atomic_cas_uintptr(volatile uintptr_t *p, uintptr_t old, uintptr_t new)
133 {
134
135 return (uintptr_t)atomic_cas_ptr(p, (void *)old, (void *)new);
136 }
137
138 /*
139 * linux_workqueue_init()
140 *
141 * Initialize the Linux workqueue subsystem. Return 0 on success,
142 * NetBSD error on failure.
143 */
144 static int
linux_workqueue_init0(void)145 linux_workqueue_init0(void)
146 {
147 int error;
148
149 error = lwp_specific_key_create(&workqueue_key, NULL);
150 if (error)
151 goto out;
152
153 system_highpri_wq = alloc_ordered_workqueue("lnxhipwq", 0);
154 if (system_highpri_wq == NULL) {
155 error = ENOMEM;
156 goto out;
157 }
158
159 system_long_wq = alloc_ordered_workqueue("lnxlngwq", 0);
160 if (system_long_wq == NULL) {
161 error = ENOMEM;
162 goto out;
163 }
164
165 system_power_efficient_wq = alloc_ordered_workqueue("lnxpwrwq", 0);
166 if (system_power_efficient_wq == NULL) {
167 error = ENOMEM;
168 goto out;
169 }
170
171 system_unbound_wq = alloc_ordered_workqueue("lnxubdwq", 0);
172 if (system_unbound_wq == NULL) {
173 error = ENOMEM;
174 goto out;
175 }
176
177 system_wq = alloc_ordered_workqueue("lnxsyswq", 0);
178 if (system_wq == NULL) {
179 error = ENOMEM;
180 goto out;
181 }
182
183 /* Success! */
184 error = 0;
185
186 out: if (error) {
187 if (system_highpri_wq)
188 destroy_workqueue(system_highpri_wq);
189 if (system_long_wq)
190 destroy_workqueue(system_long_wq);
191 if (system_power_efficient_wq)
192 destroy_workqueue(system_power_efficient_wq);
193 if (system_unbound_wq)
194 destroy_workqueue(system_unbound_wq);
195 if (system_wq)
196 destroy_workqueue(system_wq);
197 if (workqueue_key)
198 lwp_specific_key_delete(workqueue_key);
199 }
200
201 return error;
202 }
203
204 /*
205 * linux_workqueue_fini()
206 *
207 * Destroy the Linux workqueue subsystem. Never fails.
208 */
209 static void
linux_workqueue_fini0(void)210 linux_workqueue_fini0(void)
211 {
212
213 destroy_workqueue(system_power_efficient_wq);
214 destroy_workqueue(system_long_wq);
215 destroy_workqueue(system_wq);
216 lwp_specific_key_delete(workqueue_key);
217 }
218
219 #ifndef _MODULE
220 static ONCE_DECL(linux_workqueue_init_once);
221 #endif
222
223 int
linux_workqueue_init(void)224 linux_workqueue_init(void)
225 {
226 #ifdef _MODULE
227 return linux_workqueue_init0();
228 #else
229 return INIT_ONCE(&linux_workqueue_init_once, &linux_workqueue_init0);
230 #endif
231 }
232
233 void
linux_workqueue_fini(void)234 linux_workqueue_fini(void)
235 {
236 #ifdef _MODULE
237 return linux_workqueue_fini0();
238 #else
239 return FINI_ONCE(&linux_workqueue_init_once, &linux_workqueue_fini0);
240 #endif
241 }
242
243 /*
244 * Workqueues
245 */
246
247 /*
248 * alloc_workqueue(name, flags, max_active)
249 *
250 * Create a workqueue of the given name. max_active is the
251 * maximum number of work items in flight, or 0 for the default.
252 * Return NULL on failure, pointer to struct workqueue_struct
253 * object on success.
254 */
255 struct workqueue_struct *
alloc_workqueue(const char * name,int flags,unsigned max_active)256 alloc_workqueue(const char *name, int flags, unsigned max_active)
257 {
258 struct workqueue_struct *wq;
259 int error;
260
261 KASSERT(max_active == 0 || max_active == 1);
262
263 wq = kmem_zalloc(sizeof(*wq), KM_SLEEP);
264
265 mutex_init(&wq->wq_lock, MUTEX_DEFAULT, IPL_VM);
266 cv_init(&wq->wq_cv, name);
267 TAILQ_INIT(&wq->wq_delayed);
268 TAILQ_INIT(&wq->wq_rcu);
269 TAILQ_INIT(&wq->wq_queue);
270 TAILQ_INIT(&wq->wq_dqueue);
271 wq->wq_current_work = NULL;
272 wq->wq_flags = 0;
273 wq->wq_dying = false;
274 wq->wq_gen = 0;
275 wq->wq_lwp = NULL;
276 wq->wq_name = name;
277
278 error = kthread_create(PRI_NONE,
279 KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL,
280 &linux_workqueue_thread, wq, &wq->wq_lwp, "%s", name);
281 if (error)
282 goto fail0;
283
284 return wq;
285
286 fail0: KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
287 KASSERT(TAILQ_EMPTY(&wq->wq_queue));
288 KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
289 KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
290 cv_destroy(&wq->wq_cv);
291 mutex_destroy(&wq->wq_lock);
292 kmem_free(wq, sizeof(*wq));
293 return NULL;
294 }
295
296 /*
297 * alloc_ordered_workqueue(name, flags)
298 *
299 * Same as alloc_workqueue(name, flags, 1).
300 */
301 struct workqueue_struct *
alloc_ordered_workqueue(const char * name,int flags)302 alloc_ordered_workqueue(const char *name, int flags)
303 {
304
305 return alloc_workqueue(name, flags, 1);
306 }
307
308 /*
309 * destroy_workqueue(wq)
310 *
311 * Destroy a workqueue created with wq. Cancel any pending
312 * delayed work. Wait for all queued work to complete.
313 *
314 * May sleep.
315 */
316 void
destroy_workqueue(struct workqueue_struct * wq)317 destroy_workqueue(struct workqueue_struct *wq)
318 {
319
320 /*
321 * Cancel all delayed work. We do this first because any
322 * delayed work that that has already timed out, which we can't
323 * cancel, may have queued new work.
324 */
325 mutex_enter(&wq->wq_lock);
326 while (!TAILQ_EMPTY(&wq->wq_delayed)) {
327 struct delayed_work *const dw = TAILQ_FIRST(&wq->wq_delayed);
328
329 KASSERT(work_queue(&dw->work) == wq);
330 KASSERTMSG((dw->dw_state == DELAYED_WORK_SCHEDULED ||
331 dw->dw_state == DELAYED_WORK_RESCHEDULED ||
332 dw->dw_state == DELAYED_WORK_CANCELLED),
333 "delayed work %p in bad state: %d",
334 dw, dw->dw_state);
335
336 /*
337 * Mark it cancelled and try to stop the callout before
338 * it starts.
339 *
340 * If it's too late and the callout has already begun
341 * to execute, then it will notice that we asked to
342 * cancel it and remove itself from the queue before
343 * returning.
344 *
345 * If we stopped the callout before it started,
346 * however, then we can safely destroy the callout and
347 * dissociate it from the workqueue ourselves.
348 */
349 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
350 dw->dw_state = DELAYED_WORK_CANCELLED;
351 if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
352 cancel_delayed_work_done(wq, dw);
353 }
354 mutex_exit(&wq->wq_lock);
355
356 /* Wait for all scheduled RCU work to complete. */
357 mutex_enter(&wq->wq_lock);
358 while (!TAILQ_EMPTY(&wq->wq_rcu))
359 cv_wait(&wq->wq_cv, &wq->wq_lock);
360 mutex_exit(&wq->wq_lock);
361
362 /*
363 * At this point, no new work can be put on the queue.
364 */
365
366 /* Tell the thread to exit. */
367 mutex_enter(&wq->wq_lock);
368 wq->wq_dying = true;
369 cv_broadcast(&wq->wq_cv);
370 mutex_exit(&wq->wq_lock);
371
372 /* Wait for it to exit. */
373 (void)kthread_join(wq->wq_lwp);
374
375 KASSERT(wq->wq_dying);
376 KASSERT(wq->wq_flags == 0);
377 KASSERT(wq->wq_current_work == NULL);
378 KASSERT(TAILQ_EMPTY(&wq->wq_dqueue));
379 KASSERT(TAILQ_EMPTY(&wq->wq_queue));
380 KASSERT(TAILQ_EMPTY(&wq->wq_rcu));
381 KASSERT(TAILQ_EMPTY(&wq->wq_delayed));
382 cv_destroy(&wq->wq_cv);
383 mutex_destroy(&wq->wq_lock);
384
385 kmem_free(wq, sizeof(*wq));
386 }
387
388 /*
389 * Work thread and callout
390 */
391
392 /*
393 * linux_workqueue_thread(cookie)
394 *
395 * Main function for a workqueue's worker thread. Waits until
396 * there is work queued, grabs a batch of work off the queue,
397 * executes it all, bumps the generation number, and repeats,
398 * until dying.
399 */
400 static void __dead
linux_workqueue_thread(void * cookie)401 linux_workqueue_thread(void *cookie)
402 {
403 struct workqueue_struct *const wq = cookie;
404 struct work_head *const q[2] = { &wq->wq_queue, &wq->wq_dqueue };
405 struct work_struct marker, *work;
406 unsigned i;
407
408 lwp_setspecific(workqueue_key, wq);
409
410 mutex_enter(&wq->wq_lock);
411 for (;;) {
412 /*
413 * Wait until there's activity. If there's no work and
414 * we're dying, stop here.
415 */
416 if (TAILQ_EMPTY(&wq->wq_queue) &&
417 TAILQ_EMPTY(&wq->wq_dqueue)) {
418 if (wq->wq_dying)
419 break;
420 cv_wait(&wq->wq_cv, &wq->wq_lock);
421 continue;
422 }
423
424 /*
425 * Start a batch of work. Use a marker to delimit when
426 * the batch ends so we can advance the generation
427 * after the batch.
428 */
429 SDT_PROBE1(sdt, linux, work, batch__start, wq);
430 for (i = 0; i < 2; i++) {
431 if (TAILQ_EMPTY(q[i]))
432 continue;
433 TAILQ_INSERT_TAIL(q[i], &marker, work_entry);
434 while ((work = TAILQ_FIRST(q[i])) != &marker) {
435 void (*func)(struct work_struct *);
436
437 KASSERT(work_queue(work) == wq);
438 KASSERT(work_claimed(work, wq));
439 KASSERTMSG((q[i] != &wq->wq_dqueue ||
440 container_of(work, struct delayed_work,
441 work)->dw_state ==
442 DELAYED_WORK_IDLE),
443 "delayed work %p queued and scheduled",
444 work);
445
446 TAILQ_REMOVE(q[i], work, work_entry);
447 KASSERT(wq->wq_current_work == NULL);
448 wq->wq_current_work = work;
449 func = work->func;
450 release_work(work, wq);
451 /* Can't dereference work after this point. */
452
453 mutex_exit(&wq->wq_lock);
454 SDT_PROBE2(sdt, linux, work, run, work, wq);
455 (*func)(work);
456 SDT_PROBE2(sdt, linux, work, done, work, wq);
457 mutex_enter(&wq->wq_lock);
458
459 KASSERT(wq->wq_current_work == work);
460 wq->wq_current_work = NULL;
461 cv_broadcast(&wq->wq_cv);
462 }
463 TAILQ_REMOVE(q[i], &marker, work_entry);
464 }
465
466 /* Notify cancel that we've completed a batch of work. */
467 wq->wq_gen++;
468 cv_broadcast(&wq->wq_cv);
469 SDT_PROBE1(sdt, linux, work, batch__done, wq);
470 }
471 mutex_exit(&wq->wq_lock);
472
473 kthread_exit(0);
474 }
475
476 /*
477 * linux_workqueue_timeout(cookie)
478 *
479 * Delayed work timeout callback.
480 *
481 * - If scheduled, queue it.
482 * - If rescheduled, callout_schedule ourselves again.
483 * - If cancelled, destroy the callout and release the work from
484 * the workqueue.
485 */
486 static void
linux_workqueue_timeout(void * cookie)487 linux_workqueue_timeout(void *cookie)
488 {
489 struct delayed_work *const dw = cookie;
490 struct workqueue_struct *const wq = work_queue(&dw->work);
491
492 KASSERTMSG(wq != NULL,
493 "delayed work %p state %d resched %d",
494 dw, dw->dw_state, dw->dw_resched);
495
496 SDT_PROBE2(sdt, linux, work, timer, dw, wq);
497
498 mutex_enter(&wq->wq_lock);
499 KASSERT(work_queue(&dw->work) == wq);
500 switch (dw->dw_state) {
501 case DELAYED_WORK_IDLE:
502 panic("delayed work callout uninitialized: %p", dw);
503 case DELAYED_WORK_SCHEDULED:
504 dw_callout_destroy(wq, dw);
505 TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work, work_entry);
506 cv_broadcast(&wq->wq_cv);
507 SDT_PROBE2(sdt, linux, work, queue, &dw->work, wq);
508 break;
509 case DELAYED_WORK_RESCHEDULED:
510 KASSERT(dw->dw_resched >= 0);
511 callout_schedule(&dw->dw_callout, dw->dw_resched);
512 dw->dw_state = DELAYED_WORK_SCHEDULED;
513 dw->dw_resched = -1;
514 break;
515 case DELAYED_WORK_CANCELLED:
516 cancel_delayed_work_done(wq, dw);
517 /* Can't dereference dw after this point. */
518 goto out;
519 default:
520 panic("delayed work callout in bad state: %p", dw);
521 }
522 KASSERT(dw->dw_state == DELAYED_WORK_IDLE ||
523 dw->dw_state == DELAYED_WORK_SCHEDULED);
524 out: mutex_exit(&wq->wq_lock);
525 }
526
527 /*
528 * current_work()
529 *
530 * If in a workqueue worker thread, return the work it is
531 * currently executing. Otherwise return NULL.
532 */
533 struct work_struct *
current_work(void)534 current_work(void)
535 {
536 struct workqueue_struct *wq = lwp_getspecific(workqueue_key);
537
538 /* If we're not a workqueue thread, then there's no work. */
539 if (wq == NULL)
540 return NULL;
541
542 /*
543 * Otherwise, this should be possible only while work is in
544 * progress. Return the current work item.
545 */
546 KASSERT(wq->wq_current_work != NULL);
547 return wq->wq_current_work;
548 }
549
550 /*
551 * Work
552 */
553
554 /*
555 * INIT_WORK(work, fn)
556 *
557 * Initialize work for use with a workqueue to call fn in a worker
558 * thread. There is no corresponding destruction operation.
559 */
560 void
INIT_WORK(struct work_struct * work,void (* fn)(struct work_struct *))561 INIT_WORK(struct work_struct *work, void (*fn)(struct work_struct *))
562 {
563
564 work->work_owner = 0;
565 work->func = fn;
566 }
567
568 /*
569 * work_claimed(work, wq)
570 *
571 * True if work is currently claimed by a workqueue, meaning it is
572 * either on the queue or scheduled in a callout. The workqueue
573 * must be wq, and caller must hold wq's lock.
574 */
575 static bool
work_claimed(struct work_struct * work,struct workqueue_struct * wq)576 work_claimed(struct work_struct *work, struct workqueue_struct *wq)
577 {
578
579 KASSERT(work_queue(work) == wq);
580 KASSERT(mutex_owned(&wq->wq_lock));
581
582 return atomic_load_relaxed(&work->work_owner) & 1;
583 }
584
585 /*
586 * work_pending(work)
587 *
588 * True if work is currently claimed by any workqueue, scheduled
589 * to run on that workqueue.
590 */
591 bool
work_pending(const struct work_struct * work)592 work_pending(const struct work_struct *work)
593 {
594
595 return atomic_load_relaxed(&work->work_owner) & 1;
596 }
597
598 /*
599 * work_queue(work)
600 *
601 * Return the last queue that work was queued on, or NULL if it
602 * was never queued.
603 */
604 static struct workqueue_struct *
work_queue(struct work_struct * work)605 work_queue(struct work_struct *work)
606 {
607
608 return (struct workqueue_struct *)
609 (atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
610 }
611
612 /*
613 * acquire_work(work, wq)
614 *
615 * Try to claim work for wq. If work is already claimed, it must
616 * be claimed by wq; return false. If work is not already
617 * claimed, claim it, issue a memory barrier to match any prior
618 * release_work, and return true.
619 *
620 * Caller must hold wq's lock.
621 */
622 static bool
acquire_work(struct work_struct * work,struct workqueue_struct * wq)623 acquire_work(struct work_struct *work, struct workqueue_struct *wq)
624 {
625 uintptr_t owner0, owner;
626
627 KASSERT(mutex_owned(&wq->wq_lock));
628 KASSERT(((uintptr_t)wq & 1) == 0);
629
630 owner = (uintptr_t)wq | 1;
631 do {
632 owner0 = atomic_load_relaxed(&work->work_owner);
633 if (owner0 & 1) {
634 KASSERT((owner0 & ~(uintptr_t)1) == (uintptr_t)wq);
635 return false;
636 }
637 KASSERT(owner0 == (uintptr_t)NULL || owner0 == (uintptr_t)wq);
638 } while (atomic_cas_uintptr(&work->work_owner, owner0, owner) !=
639 owner0);
640
641 KASSERT(work_queue(work) == wq);
642 membar_acquire();
643 SDT_PROBE2(sdt, linux, work, acquire, work, wq);
644 return true;
645 }
646
647 /*
648 * release_work(work, wq)
649 *
650 * Issue a memory barrier to match any subsequent acquire_work and
651 * dissociate work from wq.
652 *
653 * Caller must hold wq's lock and work must be associated with wq.
654 */
655 static void
release_work(struct work_struct * work,struct workqueue_struct * wq)656 release_work(struct work_struct *work, struct workqueue_struct *wq)
657 {
658
659 KASSERT(work_queue(work) == wq);
660 KASSERT(mutex_owned(&wq->wq_lock));
661
662 SDT_PROBE2(sdt, linux, work, release, work, wq);
663 membar_release();
664
665 /*
666 * Non-interlocked r/m/w is safe here because nobody else can
667 * write to this while the claimed bit is set and the workqueue
668 * lock is held.
669 */
670 atomic_store_relaxed(&work->work_owner,
671 atomic_load_relaxed(&work->work_owner) & ~(uintptr_t)1);
672 }
673
674 /*
675 * schedule_work(work)
676 *
677 * If work is not already queued on system_wq, queue it to be run
678 * by system_wq's worker thread when it next can. True if it was
679 * newly queued, false if it was already queued. If the work was
680 * already running, queue it to run again.
681 *
682 * Caller must ensure work is not queued to run on a different
683 * workqueue.
684 */
685 bool
schedule_work(struct work_struct * work)686 schedule_work(struct work_struct *work)
687 {
688
689 return queue_work(system_wq, work);
690 }
691
692 /*
693 * queue_work(wq, work)
694 *
695 * If work is not already queued on wq, queue it to be run by wq's
696 * worker thread when it next can. True if it was newly queued,
697 * false if it was already queued. If the work was already
698 * running, queue it to run again.
699 *
700 * Caller must ensure work is not queued to run on a different
701 * workqueue.
702 */
703 bool
queue_work(struct workqueue_struct * wq,struct work_struct * work)704 queue_work(struct workqueue_struct *wq, struct work_struct *work)
705 {
706 bool newly_queued;
707
708 KASSERT(wq != NULL);
709
710 mutex_enter(&wq->wq_lock);
711 if (__predict_true(acquire_work(work, wq))) {
712 /*
713 * It wasn't on any workqueue at all. Put it on this
714 * one, and signal the worker thread that there is work
715 * to do.
716 */
717 TAILQ_INSERT_TAIL(&wq->wq_queue, work, work_entry);
718 cv_broadcast(&wq->wq_cv);
719 SDT_PROBE2(sdt, linux, work, queue, work, wq);
720 newly_queued = true;
721 } else {
722 /*
723 * It was already on this workqueue. Nothing to do
724 * since it is already queued.
725 */
726 newly_queued = false;
727 }
728 mutex_exit(&wq->wq_lock);
729
730 return newly_queued;
731 }
732
733 /*
734 * cancel_work(work)
735 *
736 * If work was queued, remove it from the queue and return true.
737 * If work was not queued, return false. Work may still be
738 * running when this returns.
739 */
740 bool
cancel_work(struct work_struct * work)741 cancel_work(struct work_struct *work)
742 {
743 struct workqueue_struct *wq;
744 bool cancelled_p = false;
745
746 /* If there's no workqueue, nothing to cancel. */
747 if ((wq = work_queue(work)) == NULL)
748 goto out;
749
750 mutex_enter(&wq->wq_lock);
751 if (__predict_false(work_queue(work) != wq)) {
752 /*
753 * It has finished execution or been cancelled by
754 * another thread, and has been moved off the
755 * workqueue, so it's too to cancel.
756 */
757 cancelled_p = false;
758 } else {
759 /* Check whether it's on the queue. */
760 if (work_claimed(work, wq)) {
761 /*
762 * It is still on the queue. Take it off the
763 * queue and report successful cancellation.
764 */
765 TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
766 SDT_PROBE2(sdt, linux, work, cancel, work, wq);
767 release_work(work, wq);
768 /* Can't dereference work after this point. */
769 cancelled_p = true;
770 } else {
771 /* Not on the queue. Couldn't cancel it. */
772 cancelled_p = false;
773 }
774 }
775 mutex_exit(&wq->wq_lock);
776
777 out: return cancelled_p;
778 }
779
780 /*
781 * cancel_work_sync(work)
782 *
783 * If work was queued, remove it from the queue and return true.
784 * If work was not queued, return false. Either way, if work is
785 * currently running, wait for it to complete.
786 *
787 * May sleep.
788 */
789 bool
cancel_work_sync(struct work_struct * work)790 cancel_work_sync(struct work_struct *work)
791 {
792 struct workqueue_struct *wq;
793 bool cancelled_p = false;
794
795 /* If there's no workqueue, nothing to cancel. */
796 if ((wq = work_queue(work)) == NULL)
797 goto out;
798
799 mutex_enter(&wq->wq_lock);
800 if (__predict_false(work_queue(work) != wq)) {
801 /*
802 * It has finished execution or been cancelled by
803 * another thread, and has been moved off the
804 * workqueue, so it's too late to cancel.
805 */
806 cancelled_p = false;
807 } else {
808 /* Check whether it's on the queue. */
809 if (work_claimed(work, wq)) {
810 /*
811 * It is still on the queue. Take it off the
812 * queue and report successful cancellation.
813 */
814 TAILQ_REMOVE(&wq->wq_queue, work, work_entry);
815 SDT_PROBE2(sdt, linux, work, cancel, work, wq);
816 release_work(work, wq);
817 /* Can't dereference work after this point. */
818 cancelled_p = true;
819 } else {
820 /* Not on the queue. Couldn't cancel it. */
821 cancelled_p = false;
822 }
823 /* If it's still running, wait for it to complete. */
824 if (wq->wq_current_work == work)
825 wait_for_current_work(work, wq);
826 }
827 mutex_exit(&wq->wq_lock);
828
829 out: return cancelled_p;
830 }
831
832 /*
833 * wait_for_current_work(work, wq)
834 *
835 * wq must be currently executing work. Wait for it to finish.
836 *
837 * Does not dereference work.
838 */
839 static void
wait_for_current_work(struct work_struct * work,struct workqueue_struct * wq)840 wait_for_current_work(struct work_struct *work, struct workqueue_struct *wq)
841 {
842 uint64_t gen;
843
844 KASSERT(mutex_owned(&wq->wq_lock));
845 KASSERT(wq->wq_current_work == work);
846
847 /* Wait only one generation in case it gets requeued quickly. */
848 SDT_PROBE2(sdt, linux, work, wait__start, work, wq);
849 gen = wq->wq_gen;
850 do {
851 cv_wait(&wq->wq_cv, &wq->wq_lock);
852 } while (wq->wq_current_work == work && wq->wq_gen == gen);
853 SDT_PROBE2(sdt, linux, work, wait__done, work, wq);
854 }
855
856 /*
857 * Delayed work
858 */
859
860 /*
861 * INIT_DELAYED_WORK(dw, fn)
862 *
863 * Initialize dw for use with a workqueue to call fn in a worker
864 * thread after a delay. There is no corresponding destruction
865 * operation.
866 */
867 void
INIT_DELAYED_WORK(struct delayed_work * dw,void (* fn)(struct work_struct *))868 INIT_DELAYED_WORK(struct delayed_work *dw, void (*fn)(struct work_struct *))
869 {
870
871 INIT_WORK(&dw->work, fn);
872 dw->dw_state = DELAYED_WORK_IDLE;
873 dw->dw_resched = -1;
874
875 /*
876 * Defer callout_init until we are going to schedule the
877 * callout, which can then callout_destroy it, because
878 * otherwise since there's no DESTROY_DELAYED_WORK or anything
879 * we have no opportunity to call callout_destroy.
880 */
881 }
882
883 /*
884 * schedule_delayed_work(dw, ticks)
885 *
886 * If it is not currently scheduled, schedule dw to run after
887 * ticks on system_wq. If currently executing and not already
888 * rescheduled, reschedule it. True if it was newly scheduled,
889 * false if it was already scheduled.
890 *
891 * If ticks == 0, queue it to run as soon as the worker can,
892 * without waiting for the next callout tick to run.
893 */
894 bool
schedule_delayed_work(struct delayed_work * dw,unsigned long ticks)895 schedule_delayed_work(struct delayed_work *dw, unsigned long ticks)
896 {
897
898 return queue_delayed_work(system_wq, dw, ticks);
899 }
900
901 /*
902 * dw_callout_init(wq, dw)
903 *
904 * Initialize the callout of dw and transition to
905 * DELAYED_WORK_SCHEDULED. Caller must use callout_schedule.
906 */
907 static void
dw_callout_init(struct workqueue_struct * wq,struct delayed_work * dw)908 dw_callout_init(struct workqueue_struct *wq, struct delayed_work *dw)
909 {
910
911 KASSERT(mutex_owned(&wq->wq_lock));
912 KASSERT(work_queue(&dw->work) == wq);
913 KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
914
915 callout_init(&dw->dw_callout, CALLOUT_MPSAFE);
916 callout_setfunc(&dw->dw_callout, &linux_workqueue_timeout, dw);
917 TAILQ_INSERT_HEAD(&wq->wq_delayed, dw, dw_entry);
918 dw->dw_state = DELAYED_WORK_SCHEDULED;
919 }
920
921 /*
922 * dw_callout_destroy(wq, dw)
923 *
924 * Destroy the callout of dw and transition to DELAYED_WORK_IDLE.
925 */
926 static void
dw_callout_destroy(struct workqueue_struct * wq,struct delayed_work * dw)927 dw_callout_destroy(struct workqueue_struct *wq, struct delayed_work *dw)
928 {
929
930 KASSERT(mutex_owned(&wq->wq_lock));
931 KASSERT(work_queue(&dw->work) == wq);
932 KASSERT(dw->dw_state == DELAYED_WORK_SCHEDULED ||
933 dw->dw_state == DELAYED_WORK_RESCHEDULED ||
934 dw->dw_state == DELAYED_WORK_CANCELLED);
935
936 TAILQ_REMOVE(&wq->wq_delayed, dw, dw_entry);
937 callout_destroy(&dw->dw_callout);
938 dw->dw_resched = -1;
939 dw->dw_state = DELAYED_WORK_IDLE;
940 }
941
942 /*
943 * cancel_delayed_work_done(wq, dw)
944 *
945 * Complete cancellation of a delayed work: transition from
946 * DELAYED_WORK_CANCELLED to DELAYED_WORK_IDLE and off the
947 * workqueue. Caller must not dereference dw after this returns.
948 */
949 static void
cancel_delayed_work_done(struct workqueue_struct * wq,struct delayed_work * dw)950 cancel_delayed_work_done(struct workqueue_struct *wq, struct delayed_work *dw)
951 {
952
953 KASSERT(mutex_owned(&wq->wq_lock));
954 KASSERT(work_queue(&dw->work) == wq);
955 KASSERT(dw->dw_state == DELAYED_WORK_CANCELLED);
956
957 dw_callout_destroy(wq, dw);
958 release_work(&dw->work, wq);
959 /* Can't dereference dw after this point. */
960 }
961
962 /*
963 * queue_delayed_work(wq, dw, ticks)
964 *
965 * If it is not currently scheduled, schedule dw to run after
966 * ticks on wq. If currently queued, remove it from the queue
967 * first.
968 *
969 * If ticks == 0, queue it to run as soon as the worker can,
970 * without waiting for the next callout tick to run.
971 */
972 bool
queue_delayed_work(struct workqueue_struct * wq,struct delayed_work * dw,unsigned long ticks)973 queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
974 unsigned long ticks)
975 {
976 bool newly_queued;
977
978 mutex_enter(&wq->wq_lock);
979 if (__predict_true(acquire_work(&dw->work, wq))) {
980 /*
981 * It wasn't on any workqueue at all. Schedule it to
982 * run on this one.
983 */
984 KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
985 if (ticks == 0) {
986 TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
987 work_entry);
988 cv_broadcast(&wq->wq_cv);
989 SDT_PROBE2(sdt, linux, work, queue, &dw->work, wq);
990 } else {
991 /*
992 * Initialize a callout and schedule to run
993 * after a delay.
994 */
995 dw_callout_init(wq, dw);
996 callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
997 SDT_PROBE3(sdt, linux, work, schedule, dw, wq, ticks);
998 }
999 newly_queued = true;
1000 } else {
1001 /* It was already on this workqueue. */
1002 switch (dw->dw_state) {
1003 case DELAYED_WORK_IDLE:
1004 case DELAYED_WORK_SCHEDULED:
1005 case DELAYED_WORK_RESCHEDULED:
1006 /* On the queue or already scheduled. Leave it. */
1007 newly_queued = false;
1008 break;
1009 case DELAYED_WORK_CANCELLED:
1010 /*
1011 * Scheduled and the callout began, but it was
1012 * cancelled. Reschedule it.
1013 */
1014 if (ticks == 0) {
1015 dw->dw_state = DELAYED_WORK_SCHEDULED;
1016 SDT_PROBE2(sdt, linux, work, queue,
1017 &dw->work, wq);
1018 } else {
1019 dw->dw_state = DELAYED_WORK_RESCHEDULED;
1020 dw->dw_resched = MIN(INT_MAX, ticks);
1021 SDT_PROBE3(sdt, linux, work, schedule,
1022 dw, wq, ticks);
1023 }
1024 newly_queued = true;
1025 break;
1026 default:
1027 panic("invalid delayed work state: %d",
1028 dw->dw_state);
1029 }
1030 }
1031 mutex_exit(&wq->wq_lock);
1032
1033 return newly_queued;
1034 }
1035
1036 /*
1037 * mod_delayed_work(wq, dw, ticks)
1038 *
1039 * Schedule dw to run after ticks. If scheduled or queued,
1040 * reschedule. If ticks == 0, run without delay.
1041 *
1042 * True if it modified the timer of an already scheduled work,
1043 * false if it newly scheduled the work.
1044 */
1045 bool
mod_delayed_work(struct workqueue_struct * wq,struct delayed_work * dw,unsigned long ticks)1046 mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dw,
1047 unsigned long ticks)
1048 {
1049 bool timer_modified;
1050
1051 mutex_enter(&wq->wq_lock);
1052 if (acquire_work(&dw->work, wq)) {
1053 /*
1054 * It wasn't on any workqueue at all. Schedule it to
1055 * run on this one.
1056 */
1057 KASSERT(dw->dw_state == DELAYED_WORK_IDLE);
1058 if (ticks == 0) {
1059 /*
1060 * Run immediately: put it on the queue and
1061 * signal the worker thread.
1062 */
1063 TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1064 work_entry);
1065 cv_broadcast(&wq->wq_cv);
1066 SDT_PROBE2(sdt, linux, work, queue, &dw->work, wq);
1067 } else {
1068 /*
1069 * Initialize a callout and schedule to run
1070 * after a delay.
1071 */
1072 dw_callout_init(wq, dw);
1073 callout_schedule(&dw->dw_callout, MIN(INT_MAX, ticks));
1074 SDT_PROBE3(sdt, linux, work, schedule, dw, wq, ticks);
1075 }
1076 timer_modified = false;
1077 } else {
1078 /* It was already on this workqueue. */
1079 switch (dw->dw_state) {
1080 case DELAYED_WORK_IDLE:
1081 /* On the queue. */
1082 if (ticks == 0) {
1083 /* Leave it be. */
1084 SDT_PROBE2(sdt, linux, work, cancel,
1085 &dw->work, wq);
1086 SDT_PROBE2(sdt, linux, work, queue,
1087 &dw->work, wq);
1088 } else {
1089 /* Remove from the queue and schedule. */
1090 TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1091 work_entry);
1092 dw_callout_init(wq, dw);
1093 callout_schedule(&dw->dw_callout,
1094 MIN(INT_MAX, ticks));
1095 SDT_PROBE2(sdt, linux, work, cancel,
1096 &dw->work, wq);
1097 SDT_PROBE3(sdt, linux, work, schedule,
1098 dw, wq, ticks);
1099 }
1100 timer_modified = true;
1101 break;
1102 case DELAYED_WORK_SCHEDULED:
1103 /*
1104 * It is scheduled to run after a delay. Try
1105 * to stop it and reschedule it; if we can't,
1106 * either reschedule it or cancel it to put it
1107 * on the queue, and inform the callout.
1108 */
1109 if (callout_stop(&dw->dw_callout)) {
1110 /* Can't stop, callout has begun. */
1111 if (ticks == 0) {
1112 /*
1113 * We don't actually need to do
1114 * anything. The callout will
1115 * queue it as soon as it gets
1116 * the lock.
1117 */
1118 SDT_PROBE2(sdt, linux, work, cancel,
1119 &dw->work, wq);
1120 SDT_PROBE2(sdt, linux, work, queue,
1121 &dw->work, wq);
1122 } else {
1123 /* Ask the callout to reschedule. */
1124 dw->dw_state = DELAYED_WORK_RESCHEDULED;
1125 dw->dw_resched = MIN(INT_MAX, ticks);
1126 SDT_PROBE2(sdt, linux, work, cancel,
1127 &dw->work, wq);
1128 SDT_PROBE3(sdt, linux, work, schedule,
1129 dw, wq, ticks);
1130 }
1131 } else {
1132 /* We stopped the callout before it began. */
1133 if (ticks == 0) {
1134 /*
1135 * Run immediately: destroy the
1136 * callout, put it on the
1137 * queue, and signal the worker
1138 * thread.
1139 */
1140 dw_callout_destroy(wq, dw);
1141 TAILQ_INSERT_TAIL(&wq->wq_dqueue,
1142 &dw->work, work_entry);
1143 cv_broadcast(&wq->wq_cv);
1144 SDT_PROBE2(sdt, linux, work, cancel,
1145 &dw->work, wq);
1146 SDT_PROBE2(sdt, linux, work, queue,
1147 &dw->work, wq);
1148 } else {
1149 /*
1150 * Reschedule the callout. No
1151 * state change.
1152 */
1153 callout_schedule(&dw->dw_callout,
1154 MIN(INT_MAX, ticks));
1155 SDT_PROBE2(sdt, linux, work, cancel,
1156 &dw->work, wq);
1157 SDT_PROBE3(sdt, linux, work, schedule,
1158 dw, wq, ticks);
1159 }
1160 }
1161 timer_modified = true;
1162 break;
1163 case DELAYED_WORK_RESCHEDULED:
1164 /*
1165 * Someone rescheduled it after the callout
1166 * started but before the poor thing even had a
1167 * chance to acquire the lock.
1168 */
1169 if (ticks == 0) {
1170 /*
1171 * We can just switch back to
1172 * DELAYED_WORK_SCHEDULED so that the
1173 * callout will queue the work as soon
1174 * as it gets the lock.
1175 */
1176 dw->dw_state = DELAYED_WORK_SCHEDULED;
1177 dw->dw_resched = -1;
1178 SDT_PROBE2(sdt, linux, work, cancel,
1179 &dw->work, wq);
1180 SDT_PROBE2(sdt, linux, work, queue,
1181 &dw->work, wq);
1182 } else {
1183 /* Change the rescheduled time. */
1184 dw->dw_resched = ticks;
1185 SDT_PROBE2(sdt, linux, work, cancel,
1186 &dw->work, wq);
1187 SDT_PROBE3(sdt, linux, work, schedule,
1188 dw, wq, ticks);
1189 }
1190 timer_modified = true;
1191 break;
1192 case DELAYED_WORK_CANCELLED:
1193 /*
1194 * Someone cancelled it after the callout
1195 * started but before the poor thing even had a
1196 * chance to acquire the lock.
1197 */
1198 if (ticks == 0) {
1199 /*
1200 * We can just switch back to
1201 * DELAYED_WORK_SCHEDULED so that the
1202 * callout will queue the work as soon
1203 * as it gets the lock.
1204 */
1205 dw->dw_state = DELAYED_WORK_SCHEDULED;
1206 SDT_PROBE2(sdt, linux, work, queue,
1207 &dw->work, wq);
1208 } else {
1209 /* Ask it to reschedule. */
1210 dw->dw_state = DELAYED_WORK_RESCHEDULED;
1211 dw->dw_resched = MIN(INT_MAX, ticks);
1212 SDT_PROBE3(sdt, linux, work, schedule,
1213 dw, wq, ticks);
1214 }
1215 timer_modified = false;
1216 break;
1217 default:
1218 panic("invalid delayed work state: %d", dw->dw_state);
1219 }
1220 }
1221 mutex_exit(&wq->wq_lock);
1222
1223 return timer_modified;
1224 }
1225
1226 /*
1227 * cancel_delayed_work(dw)
1228 *
1229 * If work was scheduled or queued, remove it from the schedule or
1230 * queue and return true. If work was not scheduled or queued,
1231 * return false. Note that work may already be running; if it
1232 * hasn't been rescheduled or requeued, then cancel_delayed_work
1233 * will return false, and either way, cancel_delayed_work will NOT
1234 * wait for the work to complete.
1235 */
1236 bool
cancel_delayed_work(struct delayed_work * dw)1237 cancel_delayed_work(struct delayed_work *dw)
1238 {
1239 struct workqueue_struct *wq;
1240 bool cancelled_p;
1241
1242 /* If there's no workqueue, nothing to cancel. */
1243 if ((wq = work_queue(&dw->work)) == NULL)
1244 return false;
1245
1246 mutex_enter(&wq->wq_lock);
1247 if (__predict_false(work_queue(&dw->work) != wq)) {
1248 cancelled_p = false;
1249 } else {
1250 switch (dw->dw_state) {
1251 case DELAYED_WORK_IDLE:
1252 /*
1253 * It is either on the queue or already running
1254 * or both.
1255 */
1256 if (work_claimed(&dw->work, wq)) {
1257 /* On the queue. Remove and release. */
1258 TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1259 work_entry);
1260 SDT_PROBE2(sdt, linux, work, cancel,
1261 &dw->work, wq);
1262 release_work(&dw->work, wq);
1263 /* Can't dereference dw after this point. */
1264 cancelled_p = true;
1265 } else {
1266 /* Not on the queue, so didn't cancel. */
1267 cancelled_p = false;
1268 }
1269 break;
1270 case DELAYED_WORK_SCHEDULED:
1271 /*
1272 * If it is scheduled, mark it cancelled and
1273 * try to stop the callout before it starts.
1274 *
1275 * If it's too late and the callout has already
1276 * begun to execute, tough.
1277 *
1278 * If we stopped the callout before it started,
1279 * however, then destroy the callout and
1280 * dissociate it from the workqueue ourselves.
1281 */
1282 dw->dw_state = DELAYED_WORK_CANCELLED;
1283 cancelled_p = true;
1284 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
1285 if (!callout_stop(&dw->dw_callout))
1286 cancel_delayed_work_done(wq, dw);
1287 break;
1288 case DELAYED_WORK_RESCHEDULED:
1289 /*
1290 * If it is being rescheduled, the callout has
1291 * already fired. We must ask it to cancel.
1292 */
1293 dw->dw_state = DELAYED_WORK_CANCELLED;
1294 dw->dw_resched = -1;
1295 cancelled_p = true;
1296 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
1297 break;
1298 case DELAYED_WORK_CANCELLED:
1299 /*
1300 * If it is being cancelled, the callout has
1301 * already fired. There is nothing more for us
1302 * to do. Someone else claims credit for
1303 * cancelling it.
1304 */
1305 cancelled_p = false;
1306 break;
1307 default:
1308 panic("invalid delayed work state: %d",
1309 dw->dw_state);
1310 }
1311 }
1312 mutex_exit(&wq->wq_lock);
1313
1314 return cancelled_p;
1315 }
1316
1317 /*
1318 * cancel_delayed_work_sync(dw)
1319 *
1320 * If work was scheduled or queued, remove it from the schedule or
1321 * queue and return true. If work was not scheduled or queued,
1322 * return false. Note that work may already be running; if it
1323 * hasn't been rescheduled or requeued, then cancel_delayed_work
1324 * will return false; either way, wait for it to complete.
1325 */
1326 bool
cancel_delayed_work_sync(struct delayed_work * dw)1327 cancel_delayed_work_sync(struct delayed_work *dw)
1328 {
1329 struct workqueue_struct *wq;
1330 bool cancelled_p;
1331
1332 /* If there's no workqueue, nothing to cancel. */
1333 if ((wq = work_queue(&dw->work)) == NULL)
1334 return false;
1335
1336 mutex_enter(&wq->wq_lock);
1337 if (__predict_false(work_queue(&dw->work) != wq)) {
1338 cancelled_p = false;
1339 } else {
1340 switch (dw->dw_state) {
1341 case DELAYED_WORK_IDLE:
1342 /*
1343 * It is either on the queue or already running
1344 * or both.
1345 */
1346 if (work_claimed(&dw->work, wq)) {
1347 /* On the queue. Remove and release. */
1348 TAILQ_REMOVE(&wq->wq_dqueue, &dw->work,
1349 work_entry);
1350 SDT_PROBE2(sdt, linux, work, cancel,
1351 &dw->work, wq);
1352 release_work(&dw->work, wq);
1353 /* Can't dereference dw after this point. */
1354 cancelled_p = true;
1355 } else {
1356 /* Not on the queue, so didn't cancel. */
1357 cancelled_p = false;
1358 }
1359 /* If it's still running, wait for it to complete. */
1360 if (wq->wq_current_work == &dw->work)
1361 wait_for_current_work(&dw->work, wq);
1362 break;
1363 case DELAYED_WORK_SCHEDULED:
1364 /*
1365 * If it is scheduled, mark it cancelled and
1366 * try to stop the callout before it starts.
1367 *
1368 * If it's too late and the callout has already
1369 * begun to execute, we must wait for it to
1370 * complete. But we got in soon enough to ask
1371 * the callout not to run, so we successfully
1372 * cancelled it in that case.
1373 *
1374 * If we stopped the callout before it started,
1375 * then we must destroy the callout and
1376 * dissociate it from the workqueue ourselves.
1377 */
1378 dw->dw_state = DELAYED_WORK_CANCELLED;
1379 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
1380 if (!callout_halt(&dw->dw_callout, &wq->wq_lock))
1381 cancel_delayed_work_done(wq, dw);
1382 cancelled_p = true;
1383 break;
1384 case DELAYED_WORK_RESCHEDULED:
1385 /*
1386 * If it is being rescheduled, the callout has
1387 * already fired. We must ask it to cancel and
1388 * wait for it to complete.
1389 */
1390 dw->dw_state = DELAYED_WORK_CANCELLED;
1391 dw->dw_resched = -1;
1392 SDT_PROBE2(sdt, linux, work, cancel, &dw->work, wq);
1393 (void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1394 cancelled_p = true;
1395 break;
1396 case DELAYED_WORK_CANCELLED:
1397 /*
1398 * If it is being cancelled, the callout has
1399 * already fired. We need only wait for it to
1400 * complete. Someone else, however, claims
1401 * credit for cancelling it.
1402 */
1403 (void)callout_halt(&dw->dw_callout, &wq->wq_lock);
1404 cancelled_p = false;
1405 break;
1406 default:
1407 panic("invalid delayed work state: %d",
1408 dw->dw_state);
1409 }
1410 }
1411 mutex_exit(&wq->wq_lock);
1412
1413 return cancelled_p;
1414 }
1415
1416 /*
1417 * Flush
1418 */
1419
1420 /*
1421 * flush_scheduled_work()
1422 *
1423 * Wait for all work queued on system_wq to complete. This does
1424 * not include delayed work.
1425 */
1426 void
flush_scheduled_work(void)1427 flush_scheduled_work(void)
1428 {
1429
1430 flush_workqueue(system_wq);
1431 }
1432
1433 struct flush_work {
1434 kmutex_t fw_lock;
1435 kcondvar_t fw_cv;
1436 struct work_struct fw_work;
1437 bool fw_done;
1438 };
1439
1440 static void
flush_work_cb(struct work_struct * work)1441 flush_work_cb(struct work_struct *work)
1442 {
1443 struct flush_work *fw = container_of(work, struct flush_work, fw_work);
1444
1445 mutex_enter(&fw->fw_lock);
1446 fw->fw_done = true;
1447 cv_broadcast(&fw->fw_cv);
1448 mutex_exit(&fw->fw_lock);
1449 }
1450
1451 /*
1452 * flush_workqueue(wq)
1453 *
1454 * Wait for all work queued on wq to complete. This does not
1455 * include delayed work.
1456 */
1457 void
flush_workqueue(struct workqueue_struct * wq)1458 flush_workqueue(struct workqueue_struct *wq)
1459 {
1460 struct flush_work fw;
1461
1462 if (lwp_getspecific(workqueue_key) == wq) {
1463 SDT_PROBE1(sdt, linux, work, flush__self, wq);
1464 return;
1465 }
1466
1467 mutex_init(&fw.fw_lock, MUTEX_DEFAULT, IPL_VM);
1468 cv_init(&fw.fw_cv, "lxwqflsh");
1469 INIT_WORK(&fw.fw_work, &flush_work_cb);
1470 fw.fw_done = false;
1471
1472 SDT_PROBE1(sdt, linux, work, flush__start, wq);
1473 queue_work(wq, &fw.fw_work);
1474
1475 mutex_enter(&fw.fw_lock);
1476 while (!fw.fw_done)
1477 cv_wait(&fw.fw_cv, &fw.fw_lock);
1478 mutex_exit(&fw.fw_lock);
1479 SDT_PROBE1(sdt, linux, work, flush__done, wq);
1480
1481 KASSERT(fw.fw_done);
1482 /* no DESTROY_WORK */
1483 cv_destroy(&fw.fw_cv);
1484 mutex_destroy(&fw.fw_lock);
1485 }
1486
1487 /*
1488 * drain_workqueue(wq)
1489 *
1490 * Repeatedly flush wq until there is no more work.
1491 */
1492 void
drain_workqueue(struct workqueue_struct * wq)1493 drain_workqueue(struct workqueue_struct *wq)
1494 {
1495 unsigned ntries = 0;
1496 bool done;
1497
1498 do {
1499 if (ntries++ == 10 || (ntries % 100) == 0)
1500 printf("linux workqueue %s"
1501 ": still clogged after %u flushes",
1502 wq->wq_name, ntries);
1503 flush_workqueue(wq);
1504 mutex_enter(&wq->wq_lock);
1505 done = wq->wq_current_work == NULL;
1506 done &= TAILQ_EMPTY(&wq->wq_queue);
1507 done &= TAILQ_EMPTY(&wq->wq_dqueue);
1508 mutex_exit(&wq->wq_lock);
1509 } while (!done);
1510 }
1511
1512 /*
1513 * flush_work(work)
1514 *
1515 * If work is queued or currently executing, wait for it to
1516 * complete.
1517 *
1518 * Return true if we waited to flush it, false if it was already
1519 * idle.
1520 */
1521 bool
flush_work(struct work_struct * work)1522 flush_work(struct work_struct *work)
1523 {
1524 struct workqueue_struct *wq;
1525
1526 /* If there's no workqueue, nothing to flush. */
1527 if ((wq = work_queue(work)) == NULL)
1528 return false;
1529
1530 flush_workqueue(wq);
1531 return true;
1532 }
1533
1534 /*
1535 * flush_delayed_work(dw)
1536 *
1537 * If dw is scheduled to run after a delay, queue it immediately
1538 * instead. Then, if dw is queued or currently executing, wait
1539 * for it to complete.
1540 */
1541 bool
flush_delayed_work(struct delayed_work * dw)1542 flush_delayed_work(struct delayed_work *dw)
1543 {
1544 struct workqueue_struct *wq;
1545 bool waited = false;
1546
1547 /* If there's no workqueue, nothing to flush. */
1548 if ((wq = work_queue(&dw->work)) == NULL)
1549 return false;
1550
1551 mutex_enter(&wq->wq_lock);
1552 if (__predict_false(work_queue(&dw->work) != wq)) {
1553 /*
1554 * Moved off the queue already (and possibly to another
1555 * queue, though that would be ill-advised), so it must
1556 * have completed, and we have nothing more to do.
1557 */
1558 waited = false;
1559 } else {
1560 switch (dw->dw_state) {
1561 case DELAYED_WORK_IDLE:
1562 /*
1563 * It has a workqueue assigned and the callout
1564 * is idle, so it must be in progress or on the
1565 * queue. In that case, we'll wait for it to
1566 * complete.
1567 */
1568 break;
1569 case DELAYED_WORK_SCHEDULED:
1570 case DELAYED_WORK_RESCHEDULED:
1571 case DELAYED_WORK_CANCELLED:
1572 /*
1573 * The callout is scheduled, and may have even
1574 * started. Mark it as scheduled so that if
1575 * the callout has fired it will queue the work
1576 * itself. Try to stop the callout -- if we
1577 * can, queue the work now; if we can't, wait
1578 * for the callout to complete, which entails
1579 * queueing it.
1580 */
1581 dw->dw_state = DELAYED_WORK_SCHEDULED;
1582 if (!callout_halt(&dw->dw_callout, &wq->wq_lock)) {
1583 /*
1584 * We stopped it before it ran. No
1585 * state change in the interim is
1586 * possible. Destroy the callout and
1587 * queue it ourselves.
1588 */
1589 KASSERT(dw->dw_state ==
1590 DELAYED_WORK_SCHEDULED);
1591 dw_callout_destroy(wq, dw);
1592 TAILQ_INSERT_TAIL(&wq->wq_dqueue, &dw->work,
1593 work_entry);
1594 cv_broadcast(&wq->wq_cv);
1595 SDT_PROBE2(sdt, linux, work, queue,
1596 &dw->work, wq);
1597 }
1598 break;
1599 default:
1600 panic("invalid delayed work state: %d", dw->dw_state);
1601 }
1602 /*
1603 * Waiting for the whole queue to flush is overkill,
1604 * but doesn't hurt.
1605 */
1606 mutex_exit(&wq->wq_lock);
1607 flush_workqueue(wq);
1608 mutex_enter(&wq->wq_lock);
1609 waited = true;
1610 }
1611 mutex_exit(&wq->wq_lock);
1612
1613 return waited;
1614 }
1615
1616 /*
1617 * delayed_work_pending(dw)
1618 *
1619 * True if dw is currently scheduled to execute, false if not.
1620 */
1621 bool
delayed_work_pending(const struct delayed_work * dw)1622 delayed_work_pending(const struct delayed_work *dw)
1623 {
1624
1625 return work_pending(&dw->work);
1626 }
1627
1628 /*
1629 * INIT_RCU_WORK(rw, fn)
1630 *
1631 * Initialize rw for use with a workqueue to call fn in a worker
1632 * thread after an RCU grace period. There is no corresponding
1633 * destruction operation.
1634 */
1635 void
INIT_RCU_WORK(struct rcu_work * rw,void (* fn)(struct work_struct *))1636 INIT_RCU_WORK(struct rcu_work *rw, void (*fn)(struct work_struct *))
1637 {
1638
1639 INIT_WORK(&rw->work, fn);
1640 }
1641
1642 static void
queue_rcu_work_cb(struct rcu_head * r)1643 queue_rcu_work_cb(struct rcu_head *r)
1644 {
1645 struct rcu_work *rw = container_of(r, struct rcu_work, rw_rcu);
1646 struct workqueue_struct *wq = work_queue(&rw->work);
1647
1648 mutex_enter(&wq->wq_lock);
1649 KASSERT(work_pending(&rw->work));
1650 KASSERT(work_queue(&rw->work) == wq);
1651 destroy_rcu_head(&rw->rw_rcu);
1652 TAILQ_REMOVE(&wq->wq_rcu, &rw->work, work_entry);
1653 TAILQ_INSERT_TAIL(&wq->wq_queue, &rw->work, work_entry);
1654 cv_broadcast(&wq->wq_cv);
1655 SDT_PROBE2(sdt, linux, work, queue, &rw->work, wq);
1656 mutex_exit(&wq->wq_lock);
1657 }
1658
1659 /*
1660 * queue_rcu_work(wq, rw)
1661 *
1662 * Schedule rw to run on wq after an RCU grace period.
1663 */
1664 void
queue_rcu_work(struct workqueue_struct * wq,struct rcu_work * rw)1665 queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rw)
1666 {
1667
1668 mutex_enter(&wq->wq_lock);
1669 if (acquire_work(&rw->work, wq)) {
1670 init_rcu_head(&rw->rw_rcu);
1671 SDT_PROBE2(sdt, linux, work, rcu, rw, wq);
1672 TAILQ_INSERT_TAIL(&wq->wq_rcu, &rw->work, work_entry);
1673 call_rcu(&rw->rw_rcu, &queue_rcu_work_cb);
1674 }
1675 mutex_exit(&wq->wq_lock);
1676 }
1677