xref: /netbsd-src/sys/rump/net/lib/libsockin/sockin.c (revision ba65fde2d7fefa7d39838fa5fa855e62bd606b5e)
1 /*	$NetBSD: sockin.c,v 1.26 2011/03/31 19:40:54 dyoung Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2009 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.26 2011/03/31 19:40:54 dyoung Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/poll.h>
39 #include <sys/protosw.h>
40 #include <sys/queue.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/time.h>
44 
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/radix.h>
48 
49 #include <netinet/in.h>
50 #include <netinet/in_systm.h>
51 #include <netinet/ip.h>
52 
53 #include <rump/rumpuser.h>
54 
55 #include "rump_private.h"
56 
57 /*
58  * An inet communication domain which uses the socket interface.
59  * Currently supports only IPv4 UDP, but could easily be extended to
60  * support IPv6 and TCP by adding more stuff to the protosw.
61  */
62 
63 DOMAIN_DEFINE(sockindomain);
64 
65 static void	sockin_init(void);
66 static int	sockin_usrreq(struct socket *, int, struct mbuf *,
67 			      struct mbuf *, struct mbuf *, struct lwp *);
68 static int	sockin_ctloutput(int op, struct socket *, struct sockopt *);
69 
70 const struct protosw sockinsw[] = {
71 {
72 	.pr_type = SOCK_DGRAM,
73 	.pr_domain = &sockindomain,
74 	.pr_protocol = IPPROTO_UDP,
75 	.pr_flags = PR_ATOMIC|PR_ADDR,
76 	.pr_usrreq = sockin_usrreq,
77 	.pr_ctloutput = sockin_ctloutput,
78 },
79 {
80 	.pr_type = SOCK_STREAM,
81 	.pr_domain = &sockindomain,
82 	.pr_protocol = IPPROTO_TCP,
83 	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
84 	.pr_usrreq = sockin_usrreq,
85 	.pr_ctloutput = sockin_ctloutput,
86 }};
87 
88 struct domain sockindomain = {
89 	.dom_family = PF_INET,
90 	.dom_name = "socket_inet",
91 	.dom_init = sockin_init,
92 	.dom_externalize = NULL,
93 	.dom_dispose = NULL,
94 	.dom_protosw = sockinsw,
95 	.dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
96 	.dom_rtattach = rt_inithead,
97 	.dom_rtoffset = 32,
98 	.dom_maxrtkey = sizeof(struct sockaddr_in),
99 	.dom_ifattach = NULL,
100 	.dom_ifdetach = NULL,
101 	.dom_ifqueues = { NULL },
102 	.dom_link = { NULL },
103 	.dom_mowner = MOWNER_INIT("",""),
104 	.dom_rtcache = { NULL },
105 	.dom_sockaddr_cmp = NULL
106 };
107 
108 #define SO2S(so) ((intptr_t)(so->so_internal))
109 #define SOCKIN_SBSIZE 65536
110 
111 struct sockin_unit {
112 	struct socket *su_so;
113 
114 	LIST_ENTRY(sockin_unit) su_entries;
115 };
116 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
117 static kmutex_t su_mtx;
118 static bool rebuild;
119 static int nsock;
120 
121 /* XXX: for the bpf hack */
122 static struct ifnet sockin_if;
123 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
124 
125 static int
126 registersock(struct socket *so, int news)
127 {
128 	struct sockin_unit *su;
129 
130 	su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
131 	if (!su)
132 		return ENOMEM;
133 
134 	so->so_internal = (void *)(intptr_t)news;
135 	su->su_so = so;
136 
137 	mutex_enter(&su_mtx);
138 	LIST_INSERT_HEAD(&su_ent, su, su_entries);
139 	nsock++;
140 	rebuild = true;
141 	mutex_exit(&su_mtx);
142 
143 	return 0;
144 }
145 
146 static void
147 removesock(struct socket *so)
148 {
149 	struct sockin_unit *su_iter;
150 	int error;
151 
152 	mutex_enter(&su_mtx);
153 	LIST_FOREACH(su_iter, &su_ent, su_entries) {
154 		if (su_iter->su_so == so)
155 			break;
156 	}
157 	if (!su_iter)
158 		panic("no such socket");
159 
160 	LIST_REMOVE(su_iter, su_entries);
161 	nsock--;
162 	rebuild = true;
163 	mutex_exit(&su_mtx);
164 
165 	rumpuser_close(SO2S(su_iter->su_so), &error);
166 	kmem_free(su_iter, sizeof(*su_iter));
167 }
168 
169 static void
170 sockin_process(struct socket *so)
171 {
172 	struct sockaddr_in from;
173 	struct iovec io;
174 	struct msghdr rmsg;
175 	struct mbuf *m;
176 	ssize_t n;
177 	size_t plen;
178 	int error;
179 
180 	m = m_gethdr(M_WAIT, MT_DATA);
181 	if (so->so_proto->pr_type == SOCK_DGRAM) {
182 		plen = IP_MAXPACKET;
183 		MEXTMALLOC(m, plen, M_DONTWAIT);
184 	} else {
185 		plen = MCLBYTES;
186 		MCLGET(m, M_DONTWAIT);
187 	}
188 	if ((m->m_flags & M_EXT) == 0) {
189 		m_freem(m);
190 		return;
191 	}
192 
193 	memset(&rmsg, 0, sizeof(rmsg));
194 	io.iov_base = mtod(m, void *);
195 	io.iov_len = plen;
196 	rmsg.msg_iov = &io;
197 	rmsg.msg_iovlen = 1;
198 	rmsg.msg_name = (struct sockaddr *)&from;
199 	rmsg.msg_namelen = sizeof(from);
200 
201 	n = rumpuser_net_recvmsg(SO2S(so), &rmsg, 0, &error);
202 	if (n <= 0) {
203 		m_freem(m);
204 
205 		/* Treat a TCP socket a goner */
206 		if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
207 			mutex_enter(softnet_lock);
208 			soisdisconnected(so);
209 			mutex_exit(softnet_lock);
210 			removesock(so);
211 		}
212 		return;
213 	}
214 	m->m_len = m->m_pkthdr.len = n;
215 
216 	bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
217 
218 	mutex_enter(softnet_lock);
219 	if (so->so_proto->pr_type == SOCK_DGRAM) {
220 		if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
221 			m_freem(m);
222 		}
223 	} else {
224 		sbappendstream(&so->so_rcv, m);
225 	}
226 
227 	sorwakeup(so);
228 	mutex_exit(softnet_lock);
229 }
230 
231 static void
232 sockin_accept(struct socket *so)
233 {
234 	struct socket *nso;
235 	struct sockaddr_in sin;
236 	int news, error, slen;
237 
238 	slen = sizeof(sin);
239 	news = rumpuser_net_accept(SO2S(so), (struct sockaddr *)&sin,
240 	    &slen, &error);
241 	if (news == -1)
242 		return;
243 
244 	mutex_enter(softnet_lock);
245 	nso = sonewconn(so, SS_ISCONNECTED);
246 	if (nso == NULL)
247 		goto errout;
248 	if (registersock(nso, news) != 0)
249 		goto errout;
250 	mutex_exit(softnet_lock);
251 	return;
252 
253  errout:
254 	rumpuser_close(news, &error);
255 	if (nso)
256 		soclose(nso);
257 	mutex_exit(softnet_lock);
258 }
259 
260 #define POLLTIMEOUT 100	/* check for new entries every 100ms */
261 
262 /* XXX: doesn't handle socket (kernel) locking properly? */
263 static void
264 sockinworker(void *arg)
265 {
266 	struct pollfd *pfds = NULL, *npfds;
267 	struct sockin_unit *su_iter;
268 	struct socket *so;
269 	int cursock = 0, i, rv, error;
270 
271 	/*
272 	 * Loop reading requests.  Check for new sockets periodically
273 	 * (could be smarter, but I'm lazy).
274 	 */
275 	for (;;) {
276 		if (rebuild) {
277 			npfds = NULL;
278 			mutex_enter(&su_mtx);
279 			if (nsock)
280 				npfds = kmem_alloc(nsock * sizeof(*npfds),
281 				    KM_NOSLEEP);
282 			if (npfds || nsock == 0) {
283 				if (pfds)
284 					kmem_free(pfds, cursock*sizeof(*pfds));
285 				pfds = npfds;
286 				cursock = nsock;
287 				rebuild = false;
288 
289 				i = 0;
290 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
291 					pfds[i].fd = SO2S(su_iter->su_so);
292 					pfds[i].events = POLLIN;
293 					pfds[i].revents = 0;
294 					i++;
295 				}
296 				KASSERT(i == nsock);
297 			}
298 			mutex_exit(&su_mtx);
299 		}
300 
301 		/* find affected sockets & process */
302 		rv = rumpuser_poll(pfds, cursock, POLLTIMEOUT, &error);
303 		for (i = 0; i < cursock && rv > 0; i++) {
304 			if (pfds[i].revents & POLLIN) {
305 				mutex_enter(&su_mtx);
306 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
307 					if (SO2S(su_iter->su_so)==pfds[i].fd) {
308 						so = su_iter->su_so;
309 						mutex_exit(&su_mtx);
310 						if(so->so_options&SO_ACCEPTCONN)
311 							sockin_accept(so);
312 						else
313 							sockin_process(so);
314 						mutex_enter(&su_mtx);
315 						break;
316 					}
317 				}
318 				/* if we can't find it, just wing it */
319 				KASSERT(rebuild || su_iter);
320 				mutex_exit(&su_mtx);
321 				pfds[i].revents = 0;
322 				rv--;
323 				i = -1;
324 				continue;
325 			}
326 
327 			/* something else?  ignore */
328 			if (pfds[i].revents) {
329 				pfds[i].revents = 0;
330 				rv--;
331 			}
332 		}
333 		KASSERT(rv <= 0);
334 	}
335 
336 }
337 
338 static void
339 sockin_init(void)
340 {
341 	int rv;
342 
343 	if (rump_threads) {
344 		if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
345 		    NULL, NULL, "sockwork")) != 0)
346 			panic("sockin_init: could not create worker thread\n");
347 	} else {
348 		printf("sockin_init: no threads => no worker thread\n");
349 	}
350 	mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
351 	strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
352 	bpf_attach(&sockin_if, DLT_NULL, 0);
353 }
354 
355 static int
356 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
357 	struct mbuf *control, struct lwp *l)
358 {
359 	int error = 0, rv;
360 
361 	switch (req) {
362 	case PRU_ATTACH:
363 	{
364 		int news, dummy;
365 		int sbsize;
366 
367 		sosetlock(so);
368 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
369 			error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
370 			if (error)
371 				break;
372 		}
373 
374 		news = rumpuser_net_socket(PF_INET, so->so_proto->pr_type,
375 		    0, &error);
376 		if (news == -1)
377 			break;
378 
379 		/* for UDP sockets, make sure we can send&recv max */
380 		if (so->so_proto->pr_type == SOCK_DGRAM) {
381 			sbsize = SOCKIN_SBSIZE;
382 			rumpuser_net_setsockopt(news, SOL_SOCKET, SO_SNDBUF,
383 			    &sbsize, sizeof(sbsize), &error);
384 			sbsize = SOCKIN_SBSIZE;
385 			rumpuser_net_setsockopt(news, SOL_SOCKET, SO_RCVBUF,
386 			    &sbsize, sizeof(sbsize), &error);
387 		}
388 
389 		if ((error = registersock(so, news)) != 0)
390 			rumpuser_close(news, &dummy);
391 
392 		break;
393 	}
394 
395 	case PRU_ACCEPT:
396 		/* we do all the work in the worker thread */
397 		break;
398 
399 	case PRU_BIND:
400 		rumpuser_net_bind(SO2S(so), mtod(nam, const struct sockaddr *),
401 		    sizeof(struct sockaddr_in), &error);
402 		break;
403 
404 	case PRU_CONNECT:
405 		rv = rumpuser_net_connect(SO2S(so),
406 		    mtod(nam, struct sockaddr *), sizeof(struct sockaddr_in),
407 		    &error);
408 		if (rv == 0)
409 			soisconnected(so);
410 		break;
411 
412 	case PRU_LISTEN:
413 		rumpuser_net_listen(SO2S(so), so->so_qlimit, &error);
414 		break;
415 
416 	case PRU_SEND:
417 	{
418 		struct sockaddr *saddr;
419 		struct msghdr mhdr;
420 		size_t iov_max, i;
421 		struct iovec iov_buf[32], *iov;
422 		struct mbuf *m2;
423 		size_t tot;
424 		int s;
425 
426 		bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
427 
428 		memset(&mhdr, 0, sizeof(mhdr));
429 
430 		iov_max = 0;
431 		for (m2 = m; m2 != NULL; m2 = m2->m_next) {
432 			iov_max++;
433 		}
434 
435 		if (iov_max <= __arraycount(iov_buf)) {
436 			iov = iov_buf;
437 		} else {
438 			iov = kmem_alloc(sizeof(struct iovec) * iov_max,
439 			    KM_SLEEP);
440 		}
441 
442 		tot = 0;
443 		for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
444 			iov[i].iov_base = m2->m_data;
445 			iov[i].iov_len = m2->m_len;
446 			tot += m2->m_len;
447 		}
448 		mhdr.msg_iov = iov;
449 		mhdr.msg_iovlen = i;
450 		s = SO2S(so);
451 
452 		if (nam != NULL) {
453 			saddr = mtod(nam, struct sockaddr *);
454 			mhdr.msg_name = saddr;
455 			mhdr.msg_namelen = saddr->sa_len;
456 		}
457 
458 		rumpuser_net_sendmsg(s, &mhdr, 0, &error);
459 
460 		if (iov != iov_buf)
461 			kmem_free(iov, sizeof(struct iovec) * iov_max);
462 
463 		m_freem(m);
464 		m_freem(control);
465 
466 		/* this assumes too many things to list.. buthey, testing */
467 		if (!rump_threads)
468 			sockin_process(so);
469 	}
470 		break;
471 
472 	case PRU_SHUTDOWN:
473 		removesock(so);
474 		break;
475 
476 	case PRU_SOCKADDR:
477 	case PRU_PEERADDR:
478 	{
479 		int slen = nam->m_len;
480 		enum rumpuser_getnametype which;
481 
482 		if (req == PRU_SOCKADDR)
483 			which = RUMPUSER_SOCKNAME;
484 		else
485 			which = RUMPUSER_PEERNAME;
486 		rumpuser_net_getname(SO2S(so),
487 		    mtod(nam, struct sockaddr *), &slen, which, &error);
488 		if (error == 0)
489 			nam->m_len = slen;
490 		break;
491 	}
492 
493 	case PRU_CONTROL:
494 		error = ENOTTY;
495 		break;
496 
497 	default:
498 		panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
499 	}
500 
501 	return error;
502 }
503 
504 static int
505 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
506 {
507 	int error;
508 
509 	rumpuser_net_setsockopt(SO2S(so), sopt->sopt_level,
510 	    sopt->sopt_name, sopt->sopt_data, sopt->sopt_size, &error);
511 	return error;
512 }
513