1*b578a8edSthorpej /* $NetBSD: wqinput.c,v 1.8 2020/02/07 12:35:33 thorpej Exp $ */
258973905Sozaki-r
358973905Sozaki-r /*-
458973905Sozaki-r * Copyright (c) 2017 Internet Initiative Japan Inc.
558973905Sozaki-r * All rights reserved.
658973905Sozaki-r *
758973905Sozaki-r * Redistribution and use in source and binary forms, with or without
858973905Sozaki-r * modification, are permitted provided that the following conditions
958973905Sozaki-r * are met:
1058973905Sozaki-r * 1. Redistributions of source code must retain the above copyright
1158973905Sozaki-r * notice, this list of conditions and the following disclaimer.
1258973905Sozaki-r * 2. Redistributions in binary form must reproduce the above copyright
1358973905Sozaki-r * notice, this list of conditions and the following disclaimer in the
1458973905Sozaki-r * documentation and/or other materials provided with the distribution.
1558973905Sozaki-r *
1658973905Sozaki-r * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1758973905Sozaki-r * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1858973905Sozaki-r * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1958973905Sozaki-r * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2058973905Sozaki-r * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2158973905Sozaki-r * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2258973905Sozaki-r * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2358973905Sozaki-r * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2458973905Sozaki-r * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2558973905Sozaki-r * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2658973905Sozaki-r * POSSIBILITY OF SUCH DAMAGE.
2758973905Sozaki-r */
2858973905Sozaki-r
29ef58cf2cSozaki-r #ifdef _KERNEL_OPT
30ef58cf2cSozaki-r #include "opt_net_mpsafe.h"
31ef58cf2cSozaki-r #endif
32ef58cf2cSozaki-r
3358973905Sozaki-r #include <sys/param.h>
3458973905Sozaki-r #include <sys/kmem.h>
3558973905Sozaki-r #include <sys/mbuf.h>
3658973905Sozaki-r #include <sys/protosw.h>
3758973905Sozaki-r #include <sys/socketvar.h>
3858973905Sozaki-r #include <sys/syslog.h>
3958973905Sozaki-r #include <sys/workqueue.h>
4058973905Sozaki-r #include <sys/atomic.h>
4158973905Sozaki-r #include <sys/queue.h>
4258973905Sozaki-r #include <sys/percpu.h>
4358973905Sozaki-r #include <sys/sysctl.h>
44*b578a8edSthorpej #include <sys/xcall.h>
4558973905Sozaki-r
4658973905Sozaki-r #include <net/if.h>
4758973905Sozaki-r #include <netinet/wqinput.h>
4858973905Sozaki-r
4958973905Sozaki-r #define WQINPUT_LIST_MAXLEN IFQ_MAXLEN
5058973905Sozaki-r
5158973905Sozaki-r struct wqinput_work {
5258973905Sozaki-r struct mbuf *ww_mbuf;
5358973905Sozaki-r int ww_off;
5458973905Sozaki-r int ww_proto;
5558973905Sozaki-r struct wqinput_work *ww_next;
5658973905Sozaki-r };
5758973905Sozaki-r
5858973905Sozaki-r struct wqinput_worklist {
5958973905Sozaki-r /*
6058973905Sozaki-r * XXX: TAILQ cannot be used because TAILQ_INIT memories the address
6158973905Sozaki-r * of percpu data while percpu(9) may move percpu data during bootup.
6258973905Sozaki-r */
6358973905Sozaki-r struct wqinput_work *wwl_head;
6458973905Sozaki-r struct wqinput_work *wwl_tail;
6558973905Sozaki-r unsigned int wwl_len;
66da99216aSmsaitoh uint64_t wwl_dropped;
6758973905Sozaki-r struct work wwl_work;
6858973905Sozaki-r bool wwl_wq_is_active;
6958973905Sozaki-r };
7058973905Sozaki-r
7158973905Sozaki-r struct wqinput {
7258973905Sozaki-r struct workqueue *wqi_wq;
7358973905Sozaki-r struct pool wqi_work_pool;
7458973905Sozaki-r struct percpu *wqi_worklists; /* struct wqinput_worklist */
7558973905Sozaki-r void (*wqi_input)(struct mbuf *, int, int);
7658973905Sozaki-r };
7758973905Sozaki-r
7858973905Sozaki-r static void wqinput_work(struct work *, void *);
7958973905Sozaki-r static void wqinput_sysctl_setup(const char *, struct wqinput *);
8058973905Sozaki-r
8158973905Sozaki-r static void
wqinput_drops(void * p,void * arg,struct cpu_info * ci __unused)8258973905Sozaki-r wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused)
8358973905Sozaki-r {
8469653399Sozaki-r struct wqinput_worklist **const wwlp = p;
8569653399Sozaki-r struct wqinput_worklist *const wwl = *wwlp;
86da99216aSmsaitoh uint64_t *sum = arg;
8758973905Sozaki-r
8858973905Sozaki-r *sum += wwl->wwl_dropped;
8958973905Sozaki-r }
9058973905Sozaki-r
9158973905Sozaki-r static int
wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)9258973905Sozaki-r wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)
9358973905Sozaki-r {
9458973905Sozaki-r struct sysctlnode node;
9558973905Sozaki-r struct wqinput *wqi;
96da99216aSmsaitoh uint64_t sum = 0;
9758973905Sozaki-r int error;
9858973905Sozaki-r
9958973905Sozaki-r node = *rnode;
10058973905Sozaki-r wqi = node.sysctl_data;
10158973905Sozaki-r
102*b578a8edSthorpej percpu_foreach_xcall(wqi->wqi_worklists, XC_HIGHPRI_IPL(IPL_SOFTNET),
103*b578a8edSthorpej wqinput_drops, &sum);
10458973905Sozaki-r
10558973905Sozaki-r node.sysctl_data = ∑
10658973905Sozaki-r error = sysctl_lookup(SYSCTLFN_CALL(&node));
10758973905Sozaki-r if (error != 0 || newp == NULL)
10858973905Sozaki-r return error;
10958973905Sozaki-r
11058973905Sozaki-r return 0;
11158973905Sozaki-r }
11258973905Sozaki-r
11358973905Sozaki-r static void
wqinput_sysctl_setup(const char * name,struct wqinput * wqi)11458973905Sozaki-r wqinput_sysctl_setup(const char *name, struct wqinput *wqi)
11558973905Sozaki-r {
11658973905Sozaki-r const struct sysctlnode *cnode, *rnode;
11758973905Sozaki-r int error;
11858973905Sozaki-r
11958973905Sozaki-r error = sysctl_createv(NULL, 0, NULL, &rnode,
12058973905Sozaki-r CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput",
12158973905Sozaki-r SYSCTL_DESCR("workqueue-based pr_input controls"),
12258973905Sozaki-r NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL);
12358973905Sozaki-r if (error != 0)
12458973905Sozaki-r goto bad;
12558973905Sozaki-r
12658973905Sozaki-r error = sysctl_createv(NULL, 0, &rnode, &rnode,
12758973905Sozaki-r CTLFLAG_PERMANENT, CTLTYPE_NODE, name,
12858973905Sozaki-r SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"),
12958973905Sozaki-r NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
13058973905Sozaki-r if (error != 0)
13158973905Sozaki-r goto bad;
13258973905Sozaki-r
13358973905Sozaki-r error = sysctl_createv(NULL, 0, &rnode, &rnode,
13458973905Sozaki-r CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq",
13558973905Sozaki-r SYSCTL_DESCR("wqinput input queue controls"),
13658973905Sozaki-r NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
13758973905Sozaki-r if (error != 0)
13858973905Sozaki-r goto bad;
13958973905Sozaki-r
14058973905Sozaki-r error = sysctl_createv(NULL, 0, &rnode, &cnode,
141da99216aSmsaitoh CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops",
14258973905Sozaki-r SYSCTL_DESCR("Total packets dropped due to full input queue"),
14358973905Sozaki-r wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL);
14458973905Sozaki-r if (error != 0)
14558973905Sozaki-r goto bad;
14658973905Sozaki-r
14758973905Sozaki-r return;
14858973905Sozaki-r bad:
14958973905Sozaki-r log(LOG_ERR, "%s: could not create a sysctl node for %s\n",
15058973905Sozaki-r __func__, name);
15158973905Sozaki-r return;
15258973905Sozaki-r }
15358973905Sozaki-r
15469653399Sozaki-r static struct wqinput_worklist *
wqinput_percpu_getref(percpu_t * pc)15569653399Sozaki-r wqinput_percpu_getref(percpu_t *pc)
15669653399Sozaki-r {
15769653399Sozaki-r
15869653399Sozaki-r return *(struct wqinput_worklist **)percpu_getref(pc);
15969653399Sozaki-r }
16069653399Sozaki-r
16169653399Sozaki-r static void
wqinput_percpu_putref(percpu_t * pc)16269653399Sozaki-r wqinput_percpu_putref(percpu_t *pc)
16369653399Sozaki-r {
16469653399Sozaki-r
16569653399Sozaki-r percpu_putref(pc);
16669653399Sozaki-r }
16769653399Sozaki-r
16869653399Sozaki-r static void
wqinput_percpu_init_cpu(void * p,void * arg __unused,struct cpu_info * ci __unused)16969653399Sozaki-r wqinput_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused)
17069653399Sozaki-r {
17169653399Sozaki-r struct wqinput_worklist **wwlp = p;
17269653399Sozaki-r
17369653399Sozaki-r *wwlp = kmem_zalloc(sizeof(**wwlp), KM_SLEEP);
17469653399Sozaki-r }
17569653399Sozaki-r
17658973905Sozaki-r struct wqinput *
wqinput_create(const char * name,void (* func)(struct mbuf *,int,int))17758973905Sozaki-r wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
17858973905Sozaki-r {
17958973905Sozaki-r struct wqinput *wqi;
18058973905Sozaki-r int error;
18158973905Sozaki-r char namebuf[32];
18258973905Sozaki-r
18358973905Sozaki-r snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name);
18458973905Sozaki-r
18558973905Sozaki-r wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP);
18658973905Sozaki-r
18758973905Sozaki-r error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi,
18858973905Sozaki-r PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
18958973905Sozaki-r if (error != 0)
19058973905Sozaki-r panic("%s: workqueue_create failed (%d)\n", __func__, error);
19158973905Sozaki-r pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
1922c8d4f61Spara name, NULL, IPL_SOFTNET);
1934734f65cSriastradh wqi->wqi_worklists = percpu_create(sizeof(struct wqinput_worklist *),
1944734f65cSriastradh wqinput_percpu_init_cpu, NULL, NULL);
19558973905Sozaki-r wqi->wqi_input = func;
19658973905Sozaki-r
19758973905Sozaki-r wqinput_sysctl_setup(name, wqi);
19858973905Sozaki-r
19958973905Sozaki-r return wqi;
20058973905Sozaki-r }
20158973905Sozaki-r
20258973905Sozaki-r static struct wqinput_work *
wqinput_work_get(struct wqinput_worklist * wwl)20358973905Sozaki-r wqinput_work_get(struct wqinput_worklist *wwl)
20458973905Sozaki-r {
20558973905Sozaki-r struct wqinput_work *work;
20658973905Sozaki-r
20758973905Sozaki-r /* Must be called at IPL_SOFTNET */
20858973905Sozaki-r
20958973905Sozaki-r work = wwl->wwl_head;
21058973905Sozaki-r if (work != NULL) {
21158973905Sozaki-r KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len);
21258973905Sozaki-r wwl->wwl_len--;
21358973905Sozaki-r wwl->wwl_head = work->ww_next;
21458973905Sozaki-r work->ww_next = NULL;
21558973905Sozaki-r
21658973905Sozaki-r if (wwl->wwl_head == NULL)
21758973905Sozaki-r wwl->wwl_tail = NULL;
21858973905Sozaki-r } else {
21958973905Sozaki-r KASSERT(wwl->wwl_len == 0);
22058973905Sozaki-r }
22158973905Sozaki-r
22258973905Sozaki-r return work;
22358973905Sozaki-r }
22458973905Sozaki-r
22558973905Sozaki-r static void
wqinput_work(struct work * wk,void * arg)22658973905Sozaki-r wqinput_work(struct work *wk, void *arg)
22758973905Sozaki-r {
22858973905Sozaki-r struct wqinput *wqi = arg;
22958973905Sozaki-r struct wqinput_work *work;
23058973905Sozaki-r struct wqinput_worklist *wwl;
23158973905Sozaki-r int s;
23258973905Sozaki-r
23358973905Sozaki-r /* Users expect to run at IPL_SOFTNET */
23458973905Sozaki-r s = splsoftnet();
23558973905Sozaki-r /* This also prevents LWP migrations between CPUs */
23669653399Sozaki-r wwl = wqinput_percpu_getref(wqi->wqi_worklists);
23758973905Sozaki-r
23858973905Sozaki-r /* We can allow enqueuing another work at this point */
23958973905Sozaki-r wwl->wwl_wq_is_active = false;
24058973905Sozaki-r
24158973905Sozaki-r while ((work = wqinput_work_get(wwl)) != NULL) {
24258973905Sozaki-r mutex_enter(softnet_lock);
243ef58cf2cSozaki-r KERNEL_LOCK_UNLESS_NET_MPSAFE();
24458973905Sozaki-r wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto);
245ef58cf2cSozaki-r KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
24658973905Sozaki-r mutex_exit(softnet_lock);
24758973905Sozaki-r
24858973905Sozaki-r pool_put(&wqi->wqi_work_pool, work);
24958973905Sozaki-r }
25058973905Sozaki-r
25169653399Sozaki-r wqinput_percpu_putref(wqi->wqi_worklists);
25258973905Sozaki-r splx(s);
25358973905Sozaki-r }
25458973905Sozaki-r
25558973905Sozaki-r static void
wqinput_work_put(struct wqinput_worklist * wwl,struct wqinput_work * work)25658973905Sozaki-r wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work)
25758973905Sozaki-r {
25858973905Sozaki-r
25958973905Sozaki-r if (wwl->wwl_tail != NULL) {
26058973905Sozaki-r wwl->wwl_tail->ww_next = work;
26158973905Sozaki-r } else {
26258973905Sozaki-r wwl->wwl_head = work;
26358973905Sozaki-r }
26458973905Sozaki-r wwl->wwl_tail = work;
26558973905Sozaki-r wwl->wwl_len++;
26658973905Sozaki-r }
26758973905Sozaki-r
26858973905Sozaki-r void
wqinput_input(struct wqinput * wqi,struct mbuf * m,int off,int proto)26958973905Sozaki-r wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto)
27058973905Sozaki-r {
27158973905Sozaki-r struct wqinput_work *work;
27258973905Sozaki-r struct wqinput_worklist *wwl;
27358973905Sozaki-r
27469653399Sozaki-r wwl = wqinput_percpu_getref(wqi->wqi_worklists);
27558973905Sozaki-r
27658973905Sozaki-r /* Prevent too much work and mbuf from being queued */
27758973905Sozaki-r if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) {
27858973905Sozaki-r wwl->wwl_dropped++;
27958973905Sozaki-r m_freem(m);
28058973905Sozaki-r goto out;
28158973905Sozaki-r }
28258973905Sozaki-r
28358973905Sozaki-r work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT);
284df67fa0aSozaki-r if (work == NULL) {
285df67fa0aSozaki-r wwl->wwl_dropped++;
286df67fa0aSozaki-r m_freem(m);
287df67fa0aSozaki-r goto out;
288df67fa0aSozaki-r }
28958973905Sozaki-r work->ww_mbuf = m;
29058973905Sozaki-r work->ww_off = off;
29158973905Sozaki-r work->ww_proto = proto;
29258973905Sozaki-r work->ww_next = NULL;
29358973905Sozaki-r
29458973905Sozaki-r wqinput_work_put(wwl, work);
29558973905Sozaki-r
29658973905Sozaki-r /* Avoid enqueuing another work when one is already enqueued */
29758973905Sozaki-r if (wwl->wwl_wq_is_active)
29858973905Sozaki-r goto out;
29958973905Sozaki-r wwl->wwl_wq_is_active = true;
30058973905Sozaki-r
30158973905Sozaki-r workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL);
30258973905Sozaki-r out:
30369653399Sozaki-r wqinput_percpu_putref(wqi->wqi_worklists);
30458973905Sozaki-r }
305