xref: /netbsd-src/sys/netinet/wqinput.c (revision b578a8edb05a42ec854bcb5ba6648341cd9ea956)
1 /*	$NetBSD: wqinput.c,v 1.8 2020/02/07 12:35:33 thorpej Exp $	*/
2 
3 /*-
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #ifdef _KERNEL_OPT
30 #include "opt_net_mpsafe.h"
31 #endif
32 
33 #include <sys/param.h>
34 #include <sys/kmem.h>
35 #include <sys/mbuf.h>
36 #include <sys/protosw.h>
37 #include <sys/socketvar.h>
38 #include <sys/syslog.h>
39 #include <sys/workqueue.h>
40 #include <sys/atomic.h>
41 #include <sys/queue.h>
42 #include <sys/percpu.h>
43 #include <sys/sysctl.h>
44 #include <sys/xcall.h>
45 
46 #include <net/if.h>
47 #include <netinet/wqinput.h>
48 
49 #define WQINPUT_LIST_MAXLEN	IFQ_MAXLEN
50 
51 struct wqinput_work {
52 	struct mbuf	*ww_mbuf;
53 	int		ww_off;
54 	int		ww_proto;
55 	struct wqinput_work *ww_next;
56 };
57 
58 struct wqinput_worklist {
59 	/*
60 	 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address
61 	 * of percpu data while percpu(9) may move percpu data during bootup.
62 	 */
63 	struct wqinput_work *wwl_head;
64 	struct wqinput_work *wwl_tail;
65 	unsigned int	wwl_len;
66 	uint64_t	wwl_dropped;
67 	struct work	wwl_work;
68 	bool		wwl_wq_is_active;
69 };
70 
71 struct wqinput {
72 	struct workqueue *wqi_wq;
73 	struct pool	wqi_work_pool;
74 	struct percpu	*wqi_worklists; /* struct wqinput_worklist */
75 	void    	(*wqi_input)(struct mbuf *, int, int);
76 };
77 
78 static void wqinput_work(struct work *, void *);
79 static void wqinput_sysctl_setup(const char *, struct wqinput *);
80 
81 static void
wqinput_drops(void * p,void * arg,struct cpu_info * ci __unused)82 wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused)
83 {
84 	struct wqinput_worklist **const wwlp = p;
85 	struct wqinput_worklist *const wwl = *wwlp;
86 	uint64_t *sum = arg;
87 
88 	*sum += wwl->wwl_dropped;
89 }
90 
91 static int
wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)92 wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)
93 {
94 	struct sysctlnode node;
95 	struct wqinput *wqi;
96 	uint64_t sum = 0;
97 	int error;
98 
99 	node = *rnode;
100 	wqi = node.sysctl_data;
101 
102 	percpu_foreach_xcall(wqi->wqi_worklists, XC_HIGHPRI_IPL(IPL_SOFTNET),
103 	    wqinput_drops, &sum);
104 
105 	node.sysctl_data = &sum;
106 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
107 	if (error != 0 || newp == NULL)
108 		return error;
109 
110 	return 0;
111 }
112 
113 static void
wqinput_sysctl_setup(const char * name,struct wqinput * wqi)114 wqinput_sysctl_setup(const char *name, struct wqinput *wqi)
115 {
116 	const struct sysctlnode *cnode, *rnode;
117 	int error;
118 
119 	error = sysctl_createv(NULL, 0, NULL, &rnode,
120 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput",
121 	    SYSCTL_DESCR("workqueue-based pr_input controls"),
122 	    NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL);
123 	if (error != 0)
124 		goto bad;
125 
126 	error = sysctl_createv(NULL, 0, &rnode, &rnode,
127 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, name,
128 	    SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"),
129 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
130 	if (error != 0)
131 		goto bad;
132 
133 	error = sysctl_createv(NULL, 0, &rnode, &rnode,
134 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq",
135 	    SYSCTL_DESCR("wqinput input queue controls"),
136 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
137 	if (error != 0)
138 		goto bad;
139 
140 	error = sysctl_createv(NULL, 0, &rnode, &cnode,
141 	    CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops",
142 	    SYSCTL_DESCR("Total packets dropped due to full input queue"),
143 	    wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL);
144 	if (error != 0)
145 		goto bad;
146 
147 	return;
148 bad:
149 	log(LOG_ERR, "%s: could not create a sysctl node for %s\n",
150 	    __func__, name);
151 	return;
152 }
153 
154 static struct wqinput_worklist *
wqinput_percpu_getref(percpu_t * pc)155 wqinput_percpu_getref(percpu_t *pc)
156 {
157 
158 	return *(struct wqinput_worklist **)percpu_getref(pc);
159 }
160 
161 static void
wqinput_percpu_putref(percpu_t * pc)162 wqinput_percpu_putref(percpu_t *pc)
163 {
164 
165 	percpu_putref(pc);
166 }
167 
168 static void
wqinput_percpu_init_cpu(void * p,void * arg __unused,struct cpu_info * ci __unused)169 wqinput_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused)
170 {
171 	struct wqinput_worklist **wwlp = p;
172 
173 	*wwlp = kmem_zalloc(sizeof(**wwlp), KM_SLEEP);
174 }
175 
176 struct wqinput *
wqinput_create(const char * name,void (* func)(struct mbuf *,int,int))177 wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
178 {
179 	struct wqinput *wqi;
180 	int error;
181 	char namebuf[32];
182 
183 	snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name);
184 
185 	wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP);
186 
187 	error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi,
188 	    PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
189 	if (error != 0)
190 		panic("%s: workqueue_create failed (%d)\n", __func__, error);
191 	pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
192 	    name, NULL, IPL_SOFTNET);
193 	wqi->wqi_worklists = percpu_create(sizeof(struct wqinput_worklist *),
194 	    wqinput_percpu_init_cpu, NULL, NULL);
195 	wqi->wqi_input = func;
196 
197 	wqinput_sysctl_setup(name, wqi);
198 
199 	return wqi;
200 }
201 
202 static struct wqinput_work *
wqinput_work_get(struct wqinput_worklist * wwl)203 wqinput_work_get(struct wqinput_worklist *wwl)
204 {
205 	struct wqinput_work *work;
206 
207 	/* Must be called at IPL_SOFTNET */
208 
209 	work = wwl->wwl_head;
210 	if (work != NULL) {
211 		KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len);
212 		wwl->wwl_len--;
213 		wwl->wwl_head = work->ww_next;
214 		work->ww_next = NULL;
215 
216 		if (wwl->wwl_head == NULL)
217 			wwl->wwl_tail = NULL;
218 	} else {
219 		KASSERT(wwl->wwl_len == 0);
220 	}
221 
222 	return work;
223 }
224 
225 static void
wqinput_work(struct work * wk,void * arg)226 wqinput_work(struct work *wk, void *arg)
227 {
228 	struct wqinput *wqi = arg;
229 	struct wqinput_work *work;
230 	struct wqinput_worklist *wwl;
231 	int s;
232 
233 	/* Users expect to run at IPL_SOFTNET */
234 	s = splsoftnet();
235 	/* This also prevents LWP migrations between CPUs */
236 	wwl = wqinput_percpu_getref(wqi->wqi_worklists);
237 
238 	/* We can allow enqueuing another work at this point */
239 	wwl->wwl_wq_is_active = false;
240 
241 	while ((work = wqinput_work_get(wwl)) != NULL) {
242 		mutex_enter(softnet_lock);
243 		KERNEL_LOCK_UNLESS_NET_MPSAFE();
244 		wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto);
245 		KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
246 		mutex_exit(softnet_lock);
247 
248 		pool_put(&wqi->wqi_work_pool, work);
249 	}
250 
251 	wqinput_percpu_putref(wqi->wqi_worklists);
252 	splx(s);
253 }
254 
255 static void
wqinput_work_put(struct wqinput_worklist * wwl,struct wqinput_work * work)256 wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work)
257 {
258 
259 	if (wwl->wwl_tail != NULL) {
260 		wwl->wwl_tail->ww_next = work;
261 	} else {
262 		wwl->wwl_head = work;
263 	}
264 	wwl->wwl_tail = work;
265 	wwl->wwl_len++;
266 }
267 
268 void
wqinput_input(struct wqinput * wqi,struct mbuf * m,int off,int proto)269 wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto)
270 {
271 	struct wqinput_work *work;
272 	struct wqinput_worklist *wwl;
273 
274 	wwl = wqinput_percpu_getref(wqi->wqi_worklists);
275 
276 	/* Prevent too much work and mbuf from being queued */
277 	if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) {
278 		wwl->wwl_dropped++;
279 		m_freem(m);
280 		goto out;
281 	}
282 
283 	work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT);
284 	if (work == NULL) {
285 		wwl->wwl_dropped++;
286 		m_freem(m);
287 		goto out;
288 	}
289 	work->ww_mbuf = m;
290 	work->ww_off = off;
291 	work->ww_proto = proto;
292 	work->ww_next = NULL;
293 
294 	wqinput_work_put(wwl, work);
295 
296 	/* Avoid enqueuing another work when one is already enqueued */
297 	if (wwl->wwl_wq_is_active)
298 		goto out;
299 	wwl->wwl_wq_is_active = true;
300 
301 	workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL);
302 out:
303 	wqinput_percpu_putref(wqi->wqi_worklists);
304 }
305