xref: /netbsd-src/sys/netinet/wqinput.c (revision deb6f0161a9109e7de9b519dc8dfb9478668dcdd)
1 /*	$NetBSD: wqinput.c,v 1.5 2018/08/10 07:20:59 msaitoh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #ifdef _KERNEL_OPT
30 #include "opt_net_mpsafe.h"
31 #endif
32 
33 #include <sys/param.h>
34 #include <sys/kmem.h>
35 #include <sys/mbuf.h>
36 #include <sys/protosw.h>
37 #include <sys/socketvar.h>
38 #include <sys/syslog.h>
39 #include <sys/workqueue.h>
40 #include <sys/atomic.h>
41 #include <sys/queue.h>
42 #include <sys/percpu.h>
43 #include <sys/sysctl.h>
44 
45 #include <net/if.h>
46 #include <netinet/wqinput.h>
47 
48 #define WQINPUT_LIST_MAXLEN	IFQ_MAXLEN
49 
50 struct wqinput_work {
51 	struct mbuf	*ww_mbuf;
52 	int		ww_off;
53 	int		ww_proto;
54 	struct wqinput_work *ww_next;
55 };
56 
57 struct wqinput_worklist {
58 	/*
59 	 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address
60 	 * of percpu data while percpu(9) may move percpu data during bootup.
61 	 */
62 	struct wqinput_work *wwl_head;
63 	struct wqinput_work *wwl_tail;
64 	unsigned int	wwl_len;
65 	uint64_t	wwl_dropped;
66 	struct work	wwl_work;
67 	bool		wwl_wq_is_active;
68 };
69 
70 struct wqinput {
71 	struct workqueue *wqi_wq;
72 	struct pool	wqi_work_pool;
73 	struct percpu	*wqi_worklists; /* struct wqinput_worklist */
74 	void    	(*wqi_input)(struct mbuf *, int, int);
75 };
76 
77 static void wqinput_work(struct work *, void *);
78 static void wqinput_sysctl_setup(const char *, struct wqinput *);
79 
80 static void
81 wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused)
82 {
83 	struct wqinput_worklist *const wwl = p;
84 	uint64_t *sum = arg;
85 
86 	*sum += wwl->wwl_dropped;
87 }
88 
89 static int
90 wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)
91 {
92 	struct sysctlnode node;
93 	struct wqinput *wqi;
94 	uint64_t sum = 0;
95 	int error;
96 
97 	node = *rnode;
98 	wqi = node.sysctl_data;
99 
100 	percpu_foreach(wqi->wqi_worklists, wqinput_drops, &sum);
101 
102 	node.sysctl_data = &sum;
103 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
104 	if (error != 0 || newp == NULL)
105 		return error;
106 
107 	return 0;
108 }
109 
110 static void
111 wqinput_sysctl_setup(const char *name, struct wqinput *wqi)
112 {
113 	const struct sysctlnode *cnode, *rnode;
114 	int error;
115 
116 	error = sysctl_createv(NULL, 0, NULL, &rnode,
117 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput",
118 	    SYSCTL_DESCR("workqueue-based pr_input controls"),
119 	    NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL);
120 	if (error != 0)
121 		goto bad;
122 
123 	error = sysctl_createv(NULL, 0, &rnode, &rnode,
124 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, name,
125 	    SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"),
126 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
127 	if (error != 0)
128 		goto bad;
129 
130 	error = sysctl_createv(NULL, 0, &rnode, &rnode,
131 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq",
132 	    SYSCTL_DESCR("wqinput input queue controls"),
133 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
134 	if (error != 0)
135 		goto bad;
136 
137 	error = sysctl_createv(NULL, 0, &rnode, &cnode,
138 	    CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops",
139 	    SYSCTL_DESCR("Total packets dropped due to full input queue"),
140 	    wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL);
141 	if (error != 0)
142 		goto bad;
143 
144 	return;
145 bad:
146 	log(LOG_ERR, "%s: could not create a sysctl node for %s\n",
147 	    __func__, name);
148 	return;
149 }
150 
151 struct wqinput *
152 wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
153 {
154 	struct wqinput *wqi;
155 	int error;
156 	char namebuf[32];
157 
158 	snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name);
159 
160 	wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP);
161 
162 	error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi,
163 	    PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
164 	if (error != 0)
165 		panic("%s: workqueue_create failed (%d)\n", __func__, error);
166 	pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
167 	    name, NULL, IPL_SOFTNET);
168 	wqi->wqi_worklists = percpu_alloc(sizeof(struct wqinput_worklist));
169 	wqi->wqi_input = func;
170 
171 	wqinput_sysctl_setup(name, wqi);
172 
173 	return wqi;
174 }
175 
176 static struct wqinput_work *
177 wqinput_work_get(struct wqinput_worklist *wwl)
178 {
179 	struct wqinput_work *work;
180 
181 	/* Must be called at IPL_SOFTNET */
182 
183 	work = wwl->wwl_head;
184 	if (work != NULL) {
185 		KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len);
186 		wwl->wwl_len--;
187 		wwl->wwl_head = work->ww_next;
188 		work->ww_next = NULL;
189 
190 		if (wwl->wwl_head == NULL)
191 			wwl->wwl_tail = NULL;
192 	} else {
193 		KASSERT(wwl->wwl_len == 0);
194 	}
195 
196 	return work;
197 }
198 
199 static void
200 wqinput_work(struct work *wk, void *arg)
201 {
202 	struct wqinput *wqi = arg;
203 	struct wqinput_work *work;
204 	struct wqinput_worklist *wwl;
205 	int s;
206 
207 	/* Users expect to run at IPL_SOFTNET */
208 	s = splsoftnet();
209 	/* This also prevents LWP migrations between CPUs */
210 	wwl = percpu_getref(wqi->wqi_worklists);
211 
212 	/* We can allow enqueuing another work at this point */
213 	wwl->wwl_wq_is_active = false;
214 
215 	while ((work = wqinput_work_get(wwl)) != NULL) {
216 		mutex_enter(softnet_lock);
217 		KERNEL_LOCK_UNLESS_NET_MPSAFE();
218 		wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto);
219 		KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
220 		mutex_exit(softnet_lock);
221 
222 		pool_put(&wqi->wqi_work_pool, work);
223 	}
224 
225 	percpu_putref(wqi->wqi_worklists);
226 	splx(s);
227 }
228 
229 static void
230 wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work)
231 {
232 
233 	if (wwl->wwl_tail != NULL) {
234 		wwl->wwl_tail->ww_next = work;
235 	} else {
236 		wwl->wwl_head = work;
237 	}
238 	wwl->wwl_tail = work;
239 	wwl->wwl_len++;
240 }
241 
242 void
243 wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto)
244 {
245 	struct wqinput_work *work;
246 	struct wqinput_worklist *wwl;
247 
248 	wwl = percpu_getref(wqi->wqi_worklists);
249 
250 	/* Prevent too much work and mbuf from being queued */
251 	if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) {
252 		wwl->wwl_dropped++;
253 		m_freem(m);
254 		goto out;
255 	}
256 
257 	work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT);
258 	if (work == NULL) {
259 		wwl->wwl_dropped++;
260 		m_freem(m);
261 		goto out;
262 	}
263 	work->ww_mbuf = m;
264 	work->ww_off = off;
265 	work->ww_proto = proto;
266 	work->ww_next = NULL;
267 
268 	wqinput_work_put(wwl, work);
269 
270 	/* Avoid enqueuing another work when one is already enqueued */
271 	if (wwl->wwl_wq_is_active)
272 		goto out;
273 	wwl->wwl_wq_is_active = true;
274 
275 	workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL);
276 out:
277 	percpu_putref(wqi->wqi_worklists);
278 }
279