xref: /netbsd-src/sys/netinet/wqinput.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /*	$NetBSD: wqinput.c,v 1.3 2017/06/02 19:10:19 para Exp $	*/
2 
3 /*-
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/kmem.h>
31 #include <sys/mbuf.h>
32 #include <sys/protosw.h>
33 #include <sys/socketvar.h>
34 #include <sys/syslog.h>
35 #include <sys/workqueue.h>
36 #include <sys/atomic.h>
37 #include <sys/queue.h>
38 #include <sys/percpu.h>
39 #include <sys/sysctl.h>
40 
41 #include <net/if.h>
42 #include <netinet/wqinput.h>
43 
44 #define WQINPUT_LIST_MAXLEN	IFQ_MAXLEN
45 
46 struct wqinput_work {
47 	struct mbuf	*ww_mbuf;
48 	int		ww_off;
49 	int		ww_proto;
50 	struct wqinput_work *ww_next;
51 };
52 
53 struct wqinput_worklist {
54 	/*
55 	 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address
56 	 * of percpu data while percpu(9) may move percpu data during bootup.
57 	 */
58 	struct wqinput_work *wwl_head;
59 	struct wqinput_work *wwl_tail;
60 	unsigned int	wwl_len;
61 	unsigned long	wwl_dropped;
62 	struct work	wwl_work;
63 	bool		wwl_wq_is_active;
64 };
65 
66 struct wqinput {
67 	struct workqueue *wqi_wq;
68 	struct pool	wqi_work_pool;
69 	struct percpu	*wqi_worklists; /* struct wqinput_worklist */
70 	void    	(*wqi_input)(struct mbuf *, int, int);
71 };
72 
73 static void wqinput_work(struct work *, void *);
74 static void wqinput_sysctl_setup(const char *, struct wqinput *);
75 
76 static void
77 wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused)
78 {
79 	struct wqinput_worklist *const wwl = p;
80 	int *sum = arg;
81 
82 	*sum += wwl->wwl_dropped;
83 }
84 
85 static int
86 wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)
87 {
88 	struct sysctlnode node;
89 	struct wqinput *wqi;
90 	int sum = 0;
91 	int error;
92 
93 	node = *rnode;
94 	wqi = node.sysctl_data;
95 
96 	percpu_foreach(wqi->wqi_worklists, wqinput_drops, &sum);
97 
98 	node.sysctl_data = &sum;
99 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
100 	if (error != 0 || newp == NULL)
101 		return error;
102 
103 	return 0;
104 }
105 
106 static void
107 wqinput_sysctl_setup(const char *name, struct wqinput *wqi)
108 {
109 	const struct sysctlnode *cnode, *rnode;
110 	int error;
111 
112 	error = sysctl_createv(NULL, 0, NULL, &rnode,
113 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput",
114 	    SYSCTL_DESCR("workqueue-based pr_input controls"),
115 	    NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL);
116 	if (error != 0)
117 		goto bad;
118 
119 	error = sysctl_createv(NULL, 0, &rnode, &rnode,
120 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, name,
121 	    SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"),
122 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
123 	if (error != 0)
124 		goto bad;
125 
126 	error = sysctl_createv(NULL, 0, &rnode, &rnode,
127 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq",
128 	    SYSCTL_DESCR("wqinput input queue controls"),
129 	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
130 	if (error != 0)
131 		goto bad;
132 
133 	error = sysctl_createv(NULL, 0, &rnode, &cnode,
134 	    CTLFLAG_PERMANENT, CTLTYPE_INT, "drops",
135 	    SYSCTL_DESCR("Total packets dropped due to full input queue"),
136 	    wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL);
137 	if (error != 0)
138 		goto bad;
139 
140 	return;
141 bad:
142 	log(LOG_ERR, "%s: could not create a sysctl node for %s\n",
143 	    __func__, name);
144 	return;
145 }
146 
147 struct wqinput *
148 wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
149 {
150 	struct wqinput *wqi;
151 	int error;
152 	char namebuf[32];
153 
154 	snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name);
155 
156 	wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP);
157 
158 	error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi,
159 	    PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
160 	if (error != 0)
161 		panic("%s: workqueue_create failed (%d)\n", __func__, error);
162 	pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
163 	    name, NULL, IPL_SOFTNET);
164 	wqi->wqi_worklists = percpu_alloc(sizeof(struct wqinput_worklist));
165 	wqi->wqi_input = func;
166 
167 	wqinput_sysctl_setup(name, wqi);
168 
169 	return wqi;
170 }
171 
172 static struct wqinput_work *
173 wqinput_work_get(struct wqinput_worklist *wwl)
174 {
175 	struct wqinput_work *work;
176 
177 	/* Must be called at IPL_SOFTNET */
178 
179 	work = wwl->wwl_head;
180 	if (work != NULL) {
181 		KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len);
182 		wwl->wwl_len--;
183 		wwl->wwl_head = work->ww_next;
184 		work->ww_next = NULL;
185 
186 		if (wwl->wwl_head == NULL)
187 			wwl->wwl_tail = NULL;
188 	} else {
189 		KASSERT(wwl->wwl_len == 0);
190 	}
191 
192 	return work;
193 }
194 
195 static void
196 wqinput_work(struct work *wk, void *arg)
197 {
198 	struct wqinput *wqi = arg;
199 	struct wqinput_work *work;
200 	struct wqinput_worklist *wwl;
201 	int s;
202 
203 	/* Users expect to run at IPL_SOFTNET */
204 	s = splsoftnet();
205 	/* This also prevents LWP migrations between CPUs */
206 	wwl = percpu_getref(wqi->wqi_worklists);
207 
208 	/* We can allow enqueuing another work at this point */
209 	wwl->wwl_wq_is_active = false;
210 
211 	while ((work = wqinput_work_get(wwl)) != NULL) {
212 		mutex_enter(softnet_lock);
213 		wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto);
214 		mutex_exit(softnet_lock);
215 
216 		pool_put(&wqi->wqi_work_pool, work);
217 	}
218 
219 	percpu_putref(wqi->wqi_worklists);
220 	splx(s);
221 }
222 
223 static void
224 wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work)
225 {
226 
227 	if (wwl->wwl_tail != NULL) {
228 		wwl->wwl_tail->ww_next = work;
229 	} else {
230 		wwl->wwl_head = work;
231 	}
232 	wwl->wwl_tail = work;
233 	wwl->wwl_len++;
234 }
235 
236 void
237 wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto)
238 {
239 	struct wqinput_work *work;
240 	struct wqinput_worklist *wwl;
241 
242 	wwl = percpu_getref(wqi->wqi_worklists);
243 
244 	/* Prevent too much work and mbuf from being queued */
245 	if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) {
246 		wwl->wwl_dropped++;
247 		m_freem(m);
248 		goto out;
249 	}
250 
251 	work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT);
252 	if (work == NULL) {
253 		wwl->wwl_dropped++;
254 		m_freem(m);
255 		goto out;
256 	}
257 	work->ww_mbuf = m;
258 	work->ww_off = off;
259 	work->ww_proto = proto;
260 	work->ww_next = NULL;
261 
262 	wqinput_work_put(wwl, work);
263 
264 	/* Avoid enqueuing another work when one is already enqueued */
265 	if (wwl->wwl_wq_is_active)
266 		goto out;
267 	wwl->wwl_wq_is_active = true;
268 
269 	workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL);
270 out:
271 	percpu_putref(wqi->wqi_worklists);
272 }
273